diff --git a/mlc.py b/mlc.py new file mode 100644 index 0000000..e98a76e --- /dev/null +++ b/mlc.py @@ -0,0 +1,66 @@ +from pymongo import MongoClient +import json +import os +import pprint +client = MongoClient('localhost',27017) +db = client.MLC +atc = db.atc + +wdir = os.getcwd(); + +wdir += "/READ" + +for root, dirs, files in os.walk(wdir): + print("WORKING") + for mfile in files: + print("OPEN : ",root+"/"+mfile) + rf = open(root+"/"+mfile); + + + line = rf.readline().replace("\r","") + stc = [] + while line: + ptr = 0 + ktmp = line.find(".", ptr) + tmp = 0; + while tmp < len(line) and tmp >= 0: + tmp = line.find(" ", ptr) + stc.append(line[ptr:tmp].lower()) + ptr = tmp+1; + + + stmp = 0; + + wlink = [] + for word in enumerate(stc): + strword = str(word) + if strword.find(".") == -1 and strword.find("?") == -1 and strword.find("!") == -1: + dotmp = filter(str.isalpha, strword) + wlink.append(dotmp) + stmp += 1; + else : + dotmp = filter(str.isalpha, strword) + wlink.append(dotmp) + stmp = 0; + wcount = 0; + for cword in wlink: + fwd = ""; + back = ""; + if wcount == 0: + fwd = ""; + else: + fwd = wlink[wcount-1]; + if wcount < (len(wlink)-1): + back = wlink[wcount+1] + else: + back = ""; + wcount += 1; + wposts = {"word":cword,"connect":wlink,"fwd":fwd,"back":back}; + result = atc.insert_one(wposts).inserted_id; + wlink = [] + stc = [] + line = rf.readline().replace("\r","") + rf.close() + +client.close() +print("Done.") diff --git a/mlcs.py b/mlcs.py new file mode 100644 index 0000000..2df7a79 --- /dev/null +++ b/mlcs.py @@ -0,0 +1,40 @@ +from pymongo import MongoClient +import random + +client = MongoClient('localhost',27017) +db = client.MLC +atc = db.atc +while 1: + findword = "" + usrcmd = str(raw_input("\nTHE FIRST WORD: ")) + findword = usrcmd; + wherefind = "back" + cntword = [usrcmd]; + exact = 4; + tmpext = 0; + while findword != "": + fword = [] + if tmpext > exact: + cntword = [usrcmd]; + tmpext = 0; + print(""), + cntword.append(findword) + + print(findword), + result = atc.find({"word":findword,"connect":{"$all":cntword}},{wherefind:1}).limit(5000); + for item in result : + fword.append(item[wherefind]); + + fwordset = set(fword) + fwordtmp = {} + + for item in fwordset: + fwordtmp[item] = fword.count(item); + + fwordsort = sorted(fwordtmp.items(), key=lambda x:x[1],reverse=True) + choose = random.randint(0, len(fwordsort)/5) + findword = fwordsort[choose][0] + tmpext += 1; + + +client.close(); \ No newline at end of file diff --git a/mlct.py b/mlct.py new file mode 100644 index 0000000..a2c5756 --- /dev/null +++ b/mlct.py @@ -0,0 +1,119 @@ +from pymongo import MongoClient +import numpy as np +import pdb +client = MongoClient('localhost',27017) +db = client.MLC +atc = db.atc +tnk = db.tnk + +def list_sort_hw(result,wherefind): + fword = [] + for item in result : + fword.append(item[wherefind]); + + fwordset = set(fword) + fwordtmp = {} + fwordsort = [] + for item in fwordset: + fwordtmp[item] = fword.count(item); + fwordsort = sorted(fwordtmp.items(), key=lambda x:x[1],reverse=True) + return fwordsort + +def create_tclist(result): + tclist = [] + for item in result : + tclist.append(item["connect"]) + return tclist + +def lstndx(tclist, cntw): + lndx = [] + vndx = [] + for cnti in tclist: + if set(cntw).issubset(set(cnti)) == True: + #print(cnti) + #pdb.set_trace() + tndx = [] + fnum = cnti.count(cntw[0]) + tndx.append(cnti.index(cntw[0])) + exist = 1; + while exist: + try: + tndx.append(cnti.index(cntw[0],tndx[-1]+1)) + except : + exist = 0; + + for i in range(fnum): + for k in range(1,len(cntw)): + nears = [] + ndx = cnti.index(cntw[k]) + lmndx = ndx + if ndx - tndx[i] > 0: + nears.append(ndx - tndx[i]) + exist = 1; + while exist: + try: + ndx = cnti.index(cntw[k],lmndx+1) + lmndx = ndx; + if ndx - tndx[i] > 0: + nears.append(ndx - tndx[i]) + except: + exist = 0; + + if len(nears): + mndx = min(nears)+tndx[i]; + lndx.append(mndx) + vcndx = [] + if len(lndx): + lndx.append(tndx[i]) + acndx = np.array(lndx) + vcndx.append(np.std(acndx,axis=0)) + if len(vcndx): + vndx.append(min(vcndx)) + if len(vndx): + vrndx = np.array(vndx) + rndx = np.mean(vrndx) + else : + rndx = -1; + return rndx + +def deeper(m_target,f_cntw,m_sentv,depth): + tlist = atc.find({"word":m_target,"connect":{"$all":f_cntw}},{"connect":1,"back":1}).limit(100000) + #pdb.set_trace() + fsort = list_sort_hw(tlist,"back") + if len(fsort): + for alyw in fsort: + m_cntw = list(f_cntw) + if alyw[1] < sentv: + break + if alyw[0] != m_cntw[-1]: + m_cntw.append(alyw[0]) + #i_target = alyw[0] + rtclist = atc.find({"word":m_target},{"connect":1}).limit(1000) + tclist = create_tclist(rtclist) + dcvt = lstndx(tclist,m_cntw) + #print(m_cntw) + #print(dcvt) + if dcvt < 10 and dcvt > 0: + print(m_cntw) + print(dcvt) + #pdb.set_trace() + if (depth > 1): + deeper(m_target,m_cntw,m_sentv,depth-1) + else: + return 0; + +target = "you" +sentv = 12 +dptv = 2 +#alist = atc.find({"word":target},{"connect":1,"back":1}).limit(10000) + +fword = {} + +#sortlist = list_sort_hw(alist,"back"); + + +deeper(target,[target],sentv,dptv) + +client.close(); + +