97 lines
3.4 KiB
Python
97 lines
3.4 KiB
Python
# encoding = 'utf-8'
|
|
from tool.engine_crawling_thread import *
|
|
from time import sleep
|
|
from model.models import *
|
|
from random import choice
|
|
from datetime import datetime
|
|
start = datetime.now()
|
|
# standard0s = Standard2.query.filter(Standard0.id>=353055).all()
|
|
standard0s = StandardVal.query.all()
|
|
print("Time to query:", datetime.now()-start)
|
|
vowels = Vowel.query.all()
|
|
def unit(standard0):
|
|
print(standard0.id, ':', standard0.value)
|
|
text = standard0.value.replace(u'\n', '')
|
|
ls = text.split(' ')
|
|
text = ''
|
|
# text1 = ''
|
|
# text2 = ''
|
|
# text3 = ''
|
|
for word in ls:
|
|
if word == '':
|
|
text = text + word + ' '
|
|
# text1 = text1 + word + ' '
|
|
# text2 = text2 + word + ' '
|
|
# text3 = text3 + word + ' '
|
|
continue
|
|
if word[-1:] == '-':
|
|
x = choice([0, 1])
|
|
if x == 0 and 'đ' in word[0]:
|
|
word = word.replace('đ', 'dd')
|
|
elif x ==1 and 'đ' in word[0]:
|
|
word = word.replace('đ', 'd') + 'd'
|
|
word = word[:-1]
|
|
# print(word)
|
|
word0 = word
|
|
# word1 = word
|
|
# word2 = word
|
|
# word3 = word
|
|
for v in vowels:
|
|
if v.value in word:
|
|
x = choice([1, 2, 3])
|
|
if x==1:
|
|
word0 = word.replace(v.value, v.raw_telex1) + v.raw_telex1_2
|
|
if x == 2:
|
|
word0 = word.replace(v.value, v.raw_telex2) + v.raw_telex2_2
|
|
if x == 3:
|
|
word0 = word.replace(v.value, v.raw_telex3) + v.raw_telex3_2
|
|
break
|
|
word0 = word0 + '-'
|
|
# word1 = word1 + '-'
|
|
# word2 = word2 + '-'
|
|
# word3 = word3 + '-'
|
|
else:
|
|
x = choice([0, 1])
|
|
if x == 0 and 'đ' in word[0]:
|
|
word = word.replace('đ', 'dd')
|
|
elif x == 1 and 'đ' in word[0]:
|
|
word = word.replace('đ', 'd') + 'd'
|
|
word0 = word
|
|
# word1 = word
|
|
# word2 = word
|
|
# word3 = word
|
|
for v in vowels:
|
|
if v.value in word:
|
|
# word1 = word.replace(v.value, v.raw_telex1) + v.raw_telex1_2
|
|
# word2 = word.replace(v.value, v.raw_telex2) + v.raw_telex2_2
|
|
# word3 = word.replace(v.value, v.raw_telex3) + v.raw_telex3_2
|
|
# break
|
|
x = choice([1, 2, 3])
|
|
if x == 1:
|
|
word0 = word.replace(v.value, v.raw_telex1) + v.raw_telex1_2
|
|
if x == 2:
|
|
word0 = word.replace(v.value, v.raw_telex2) + v.raw_telex2_2
|
|
if x == 3:
|
|
word0 = word.replace(v.value, v.raw_telex3) + v.raw_telex3_2
|
|
break
|
|
text = text + word0 + ' '
|
|
# text1 = text1 + word1 + ' '
|
|
# text2 = text2 + word2 + ' '
|
|
# text3 = text3 + word3 + ' '
|
|
text = text[:-1] + '\n'
|
|
# text1 = text1[:-1]+'\n'
|
|
# text2 = text2[:-1]+'\n'
|
|
# text3 = text3[:-1]+'\n'
|
|
# print(text1)
|
|
# print(text2)
|
|
# print(text3)
|
|
# print(text)
|
|
raw_telex = RawTelexVal(standard_id=standard0.id, raw_telex=text)
|
|
db.session.add(raw_telex)
|
|
db.session.commit()
|
|
|
|
# for i in standard0s:
|
|
# unit(i)
|
|
|
|
engine_crawling_thread(standard0s, unit=unit, num_threads=5)
|