Add.
This commit is contained in:
parent
303219e96f
commit
879e5be3d1
66
mlc.py
Normal file
66
mlc.py
Normal file
@ -0,0 +1,66 @@
|
||||
from pymongo import MongoClient
|
||||
import json
|
||||
import os
|
||||
import pprint
|
||||
client = MongoClient('localhost',27017)
|
||||
db = client.MLC
|
||||
atc = db.atc
|
||||
|
||||
wdir = os.getcwd();
|
||||
|
||||
wdir += "/READ"
|
||||
|
||||
for root, dirs, files in os.walk(wdir):
|
||||
print("WORKING")
|
||||
for mfile in files:
|
||||
print("OPEN : ",root+"/"+mfile)
|
||||
rf = open(root+"/"+mfile);
|
||||
|
||||
|
||||
line = rf.readline().replace("\r","")
|
||||
stc = []
|
||||
while line:
|
||||
ptr = 0
|
||||
ktmp = line.find(".", ptr)
|
||||
tmp = 0;
|
||||
while tmp < len(line) and tmp >= 0:
|
||||
tmp = line.find(" ", ptr)
|
||||
stc.append(line[ptr:tmp].lower())
|
||||
ptr = tmp+1;
|
||||
|
||||
|
||||
stmp = 0;
|
||||
|
||||
wlink = []
|
||||
for word in enumerate(stc):
|
||||
strword = str(word)
|
||||
if strword.find(".") == -1 and strword.find("?") == -1 and strword.find("!") == -1:
|
||||
dotmp = filter(str.isalpha, strword)
|
||||
wlink.append(dotmp)
|
||||
stmp += 1;
|
||||
else :
|
||||
dotmp = filter(str.isalpha, strword)
|
||||
wlink.append(dotmp)
|
||||
stmp = 0;
|
||||
wcount = 0;
|
||||
for cword in wlink:
|
||||
fwd = "";
|
||||
back = "";
|
||||
if wcount == 0:
|
||||
fwd = "";
|
||||
else:
|
||||
fwd = wlink[wcount-1];
|
||||
if wcount < (len(wlink)-1):
|
||||
back = wlink[wcount+1]
|
||||
else:
|
||||
back = "";
|
||||
wcount += 1;
|
||||
wposts = {"word":cword,"connect":wlink,"fwd":fwd,"back":back};
|
||||
result = atc.insert_one(wposts).inserted_id;
|
||||
wlink = []
|
||||
stc = []
|
||||
line = rf.readline().replace("\r","")
|
||||
rf.close()
|
||||
|
||||
client.close()
|
||||
print("Done.")
|
40
mlcs.py
Normal file
40
mlcs.py
Normal file
@ -0,0 +1,40 @@
|
||||
from pymongo import MongoClient
|
||||
import random
|
||||
|
||||
client = MongoClient('localhost',27017)
|
||||
db = client.MLC
|
||||
atc = db.atc
|
||||
while 1:
|
||||
findword = ""
|
||||
usrcmd = str(raw_input("\nTHE FIRST WORD: "))
|
||||
findword = usrcmd;
|
||||
wherefind = "back"
|
||||
cntword = [usrcmd];
|
||||
exact = 4;
|
||||
tmpext = 0;
|
||||
while findword != "":
|
||||
fword = []
|
||||
if tmpext > exact:
|
||||
cntword = [usrcmd];
|
||||
tmpext = 0;
|
||||
print(""),
|
||||
cntword.append(findword)
|
||||
|
||||
print(findword),
|
||||
result = atc.find({"word":findword,"connect":{"$all":cntword}},{wherefind:1}).limit(5000);
|
||||
for item in result :
|
||||
fword.append(item[wherefind]);
|
||||
|
||||
fwordset = set(fword)
|
||||
fwordtmp = {}
|
||||
|
||||
for item in fwordset:
|
||||
fwordtmp[item] = fword.count(item);
|
||||
|
||||
fwordsort = sorted(fwordtmp.items(), key=lambda x:x[1],reverse=True)
|
||||
choose = random.randint(0, len(fwordsort)/5)
|
||||
findword = fwordsort[choose][0]
|
||||
tmpext += 1;
|
||||
|
||||
|
||||
client.close();
|
119
mlct.py
Normal file
119
mlct.py
Normal file
@ -0,0 +1,119 @@
|
||||
from pymongo import MongoClient
|
||||
import numpy as np
|
||||
import pdb
|
||||
client = MongoClient('localhost',27017)
|
||||
db = client.MLC
|
||||
atc = db.atc
|
||||
tnk = db.tnk
|
||||
|
||||
def list_sort_hw(result,wherefind):
|
||||
fword = []
|
||||
for item in result :
|
||||
fword.append(item[wherefind]);
|
||||
|
||||
fwordset = set(fword)
|
||||
fwordtmp = {}
|
||||
fwordsort = []
|
||||
for item in fwordset:
|
||||
fwordtmp[item] = fword.count(item);
|
||||
fwordsort = sorted(fwordtmp.items(), key=lambda x:x[1],reverse=True)
|
||||
return fwordsort
|
||||
|
||||
def create_tclist(result):
|
||||
tclist = []
|
||||
for item in result :
|
||||
tclist.append(item["connect"])
|
||||
return tclist
|
||||
|
||||
def lstndx(tclist, cntw):
|
||||
lndx = []
|
||||
vndx = []
|
||||
for cnti in tclist:
|
||||
if set(cntw).issubset(set(cnti)) == True:
|
||||
#print(cnti)
|
||||
#pdb.set_trace()
|
||||
tndx = []
|
||||
fnum = cnti.count(cntw[0])
|
||||
tndx.append(cnti.index(cntw[0]))
|
||||
exist = 1;
|
||||
while exist:
|
||||
try:
|
||||
tndx.append(cnti.index(cntw[0],tndx[-1]+1))
|
||||
except :
|
||||
exist = 0;
|
||||
|
||||
for i in range(fnum):
|
||||
for k in range(1,len(cntw)):
|
||||
nears = []
|
||||
ndx = cnti.index(cntw[k])
|
||||
lmndx = ndx
|
||||
if ndx - tndx[i] > 0:
|
||||
nears.append(ndx - tndx[i])
|
||||
exist = 1;
|
||||
while exist:
|
||||
try:
|
||||
ndx = cnti.index(cntw[k],lmndx+1)
|
||||
lmndx = ndx;
|
||||
if ndx - tndx[i] > 0:
|
||||
nears.append(ndx - tndx[i])
|
||||
except:
|
||||
exist = 0;
|
||||
|
||||
if len(nears):
|
||||
mndx = min(nears)+tndx[i];
|
||||
lndx.append(mndx)
|
||||
vcndx = []
|
||||
if len(lndx):
|
||||
lndx.append(tndx[i])
|
||||
acndx = np.array(lndx)
|
||||
vcndx.append(np.std(acndx,axis=0))
|
||||
if len(vcndx):
|
||||
vndx.append(min(vcndx))
|
||||
if len(vndx):
|
||||
vrndx = np.array(vndx)
|
||||
rndx = np.mean(vrndx)
|
||||
else :
|
||||
rndx = -1;
|
||||
return rndx
|
||||
|
||||
def deeper(m_target,f_cntw,m_sentv,depth):
|
||||
tlist = atc.find({"word":m_target,"connect":{"$all":f_cntw}},{"connect":1,"back":1}).limit(100000)
|
||||
#pdb.set_trace()
|
||||
fsort = list_sort_hw(tlist,"back")
|
||||
if len(fsort):
|
||||
for alyw in fsort:
|
||||
m_cntw = list(f_cntw)
|
||||
if alyw[1] < sentv:
|
||||
break
|
||||
if alyw[0] != m_cntw[-1]:
|
||||
m_cntw.append(alyw[0])
|
||||
#i_target = alyw[0]
|
||||
rtclist = atc.find({"word":m_target},{"connect":1}).limit(1000)
|
||||
tclist = create_tclist(rtclist)
|
||||
dcvt = lstndx(tclist,m_cntw)
|
||||
#print(m_cntw)
|
||||
#print(dcvt)
|
||||
if dcvt < 10 and dcvt > 0:
|
||||
print(m_cntw)
|
||||
print(dcvt)
|
||||
#pdb.set_trace()
|
||||
if (depth > 1):
|
||||
deeper(m_target,m_cntw,m_sentv,depth-1)
|
||||
else:
|
||||
return 0;
|
||||
|
||||
target = "you"
|
||||
sentv = 12
|
||||
dptv = 2
|
||||
#alist = atc.find({"word":target},{"connect":1,"back":1}).limit(10000)
|
||||
|
||||
fword = {}
|
||||
|
||||
#sortlist = list_sort_hw(alist,"back");
|
||||
|
||||
|
||||
deeper(target,[target],sentv,dptv)
|
||||
|
||||
client.close();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user