This commit is contained in:
Saturneric 2020-09-01 00:34:20 +08:00
parent 303219e96f
commit 879e5be3d1
3 changed files with 225 additions and 0 deletions

66
mlc.py Normal file
View File

@ -0,0 +1,66 @@
from pymongo import MongoClient
import json
import os
import pprint
client = MongoClient('localhost',27017)
db = client.MLC
atc = db.atc
wdir = os.getcwd();
wdir += "/READ"
for root, dirs, files in os.walk(wdir):
print("WORKING")
for mfile in files:
print("OPEN : ",root+"/"+mfile)
rf = open(root+"/"+mfile);
line = rf.readline().replace("\r","")
stc = []
while line:
ptr = 0
ktmp = line.find(".", ptr)
tmp = 0;
while tmp < len(line) and tmp >= 0:
tmp = line.find(" ", ptr)
stc.append(line[ptr:tmp].lower())
ptr = tmp+1;
stmp = 0;
wlink = []
for word in enumerate(stc):
strword = str(word)
if strword.find(".") == -1 and strword.find("?") == -1 and strword.find("!") == -1:
dotmp = filter(str.isalpha, strword)
wlink.append(dotmp)
stmp += 1;
else :
dotmp = filter(str.isalpha, strword)
wlink.append(dotmp)
stmp = 0;
wcount = 0;
for cword in wlink:
fwd = "";
back = "";
if wcount == 0:
fwd = "";
else:
fwd = wlink[wcount-1];
if wcount < (len(wlink)-1):
back = wlink[wcount+1]
else:
back = "";
wcount += 1;
wposts = {"word":cword,"connect":wlink,"fwd":fwd,"back":back};
result = atc.insert_one(wposts).inserted_id;
wlink = []
stc = []
line = rf.readline().replace("\r","")
rf.close()
client.close()
print("Done.")

40
mlcs.py Normal file
View File

@ -0,0 +1,40 @@
from pymongo import MongoClient
import random
client = MongoClient('localhost',27017)
db = client.MLC
atc = db.atc
while 1:
findword = ""
usrcmd = str(raw_input("\nTHE FIRST WORD: "))
findword = usrcmd;
wherefind = "back"
cntword = [usrcmd];
exact = 4;
tmpext = 0;
while findword != "":
fword = []
if tmpext > exact:
cntword = [usrcmd];
tmpext = 0;
print(""),
cntword.append(findword)
print(findword),
result = atc.find({"word":findword,"connect":{"$all":cntword}},{wherefind:1}).limit(5000);
for item in result :
fword.append(item[wherefind]);
fwordset = set(fword)
fwordtmp = {}
for item in fwordset:
fwordtmp[item] = fword.count(item);
fwordsort = sorted(fwordtmp.items(), key=lambda x:x[1],reverse=True)
choose = random.randint(0, len(fwordsort)/5)
findword = fwordsort[choose][0]
tmpext += 1;
client.close();

119
mlct.py Normal file
View File

@ -0,0 +1,119 @@
from pymongo import MongoClient
import numpy as np
import pdb
client = MongoClient('localhost',27017)
db = client.MLC
atc = db.atc
tnk = db.tnk
def list_sort_hw(result,wherefind):
fword = []
for item in result :
fword.append(item[wherefind]);
fwordset = set(fword)
fwordtmp = {}
fwordsort = []
for item in fwordset:
fwordtmp[item] = fword.count(item);
fwordsort = sorted(fwordtmp.items(), key=lambda x:x[1],reverse=True)
return fwordsort
def create_tclist(result):
tclist = []
for item in result :
tclist.append(item["connect"])
return tclist
def lstndx(tclist, cntw):
lndx = []
vndx = []
for cnti in tclist:
if set(cntw).issubset(set(cnti)) == True:
#print(cnti)
#pdb.set_trace()
tndx = []
fnum = cnti.count(cntw[0])
tndx.append(cnti.index(cntw[0]))
exist = 1;
while exist:
try:
tndx.append(cnti.index(cntw[0],tndx[-1]+1))
except :
exist = 0;
for i in range(fnum):
for k in range(1,len(cntw)):
nears = []
ndx = cnti.index(cntw[k])
lmndx = ndx
if ndx - tndx[i] > 0:
nears.append(ndx - tndx[i])
exist = 1;
while exist:
try:
ndx = cnti.index(cntw[k],lmndx+1)
lmndx = ndx;
if ndx - tndx[i] > 0:
nears.append(ndx - tndx[i])
except:
exist = 0;
if len(nears):
mndx = min(nears)+tndx[i];
lndx.append(mndx)
vcndx = []
if len(lndx):
lndx.append(tndx[i])
acndx = np.array(lndx)
vcndx.append(np.std(acndx,axis=0))
if len(vcndx):
vndx.append(min(vcndx))
if len(vndx):
vrndx = np.array(vndx)
rndx = np.mean(vrndx)
else :
rndx = -1;
return rndx
def deeper(m_target,f_cntw,m_sentv,depth):
tlist = atc.find({"word":m_target,"connect":{"$all":f_cntw}},{"connect":1,"back":1}).limit(100000)
#pdb.set_trace()
fsort = list_sort_hw(tlist,"back")
if len(fsort):
for alyw in fsort:
m_cntw = list(f_cntw)
if alyw[1] < sentv:
break
if alyw[0] != m_cntw[-1]:
m_cntw.append(alyw[0])
#i_target = alyw[0]
rtclist = atc.find({"word":m_target},{"connect":1}).limit(1000)
tclist = create_tclist(rtclist)
dcvt = lstndx(tclist,m_cntw)
#print(m_cntw)
#print(dcvt)
if dcvt < 10 and dcvt > 0:
print(m_cntw)
print(dcvt)
#pdb.set_trace()
if (depth > 1):
deeper(m_target,m_cntw,m_sentv,depth-1)
else:
return 0;
target = "you"
sentv = 12
dptv = 2
#alist = atc.find({"word":target},{"connect":1,"back":1}).limit(10000)
fword = {}
#sortlist = list_sort_hw(alist,"back");
deeper(target,[target],sentv,dptv)
client.close();