50 lines
1.4 KiB
Python
50 lines
1.4 KiB
Python
# encoding = 'utf-8'
|
|
from tool.engine_crawling_thread import *
|
|
from time import sleep
|
|
from model.models import *
|
|
from random import choice
|
|
from datetime import datetime
|
|
start = datetime.now()
|
|
# standard0s = Standard2.query.filter(Standard0.id>=353055).all()
|
|
standard0s = NeuSrc.query.all()
|
|
print("Time to query:", datetime.now()-start)
|
|
vowel0s = Vowel0.query.all()
|
|
consonants = Consonant.query.all()
|
|
|
|
ls_rs = []
|
|
def unit(standard0):
|
|
# print(standard0.id, ':', standard0.value)
|
|
text = standard0.value.replace(u'\n', '').lower()
|
|
ls = text.split(' ')
|
|
text = ''
|
|
for word in ls:
|
|
if word == '':
|
|
continue
|
|
countv = 0
|
|
for v in vowel0s:
|
|
if v.value in word:
|
|
countv = countv + 1
|
|
countc = 0
|
|
for c in consonants:
|
|
if c.value in word:
|
|
countc = countc + 1
|
|
ls_nb = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0']
|
|
countn = 0
|
|
for nb in ls_nb:
|
|
if nb in word:
|
|
countn = countn + 1
|
|
if countv == 0 and countc!=0 and countn==0:
|
|
print(word)
|
|
with open('acronym.txt', 'a', newline='') as f:
|
|
f.writelines(word+'\n')
|
|
ls_rs.append(word)
|
|
|
|
|
|
# for i in standard0s:
|
|
# unit(i)
|
|
|
|
engine_crawling_thread(standard0s, unit=unit, num_threads=5)
|
|
|
|
ls_rs = list(set(ls_rs))
|
|
print(len(ls_rs))
|