spell_correction_n_gram/sandbox.py
2019-12-20 16:29:35 +07:00

32 lines
869 B
Python

from tools.check_number_english import is_number
from nltk.tokenize import word_tokenize
with open('./checkpoint/new_dictionary.txt') as f:
dict = f.read()
with open('result1.txt', 'r') as f:
lines0 = f.readlines()
with open('rl1.txt', 'r') as f:
lines1 = f.readlines()
print(len(lines0), len(lines1))
# for line in lines0:
# print(line.replace('-', ''))
# with open('result1.txt', 'a+') as f:
# f.write(line.replace('-', ''))
count = 0
for index, line in enumerate(lines0):
item = lines1[index][:-1]+ ', ' + line[:-1]
ls_word = word_tokenize(item)
ls2 = [i for i in ls_word if is_number(i)==False]
item = ' '.join(ls2)
if lines1[index][:-1] == line[:-1]:
count = count + 1
print(item)
# with open('labels_log2.txt', 'a+') as f:
# f.write(item + '\n')
print(count)
a = ','
print(is_number(a))