spell_correction_n_gram/sandbox.py

from tools.check_number_english import is_number
from nltk.tokenize import word_tokenize
with open('./checkpoint/new_dictionary.txt') as f:
    dict = f.read()

with open('result1.txt', 'r') as f:
    lines0 = f.readlines()
with open('rl1.txt', 'r') as f:
    lines1 = f.readlines()

print(len(lines0), len(lines1))
# for line in lines0:
#     print(line.replace('-', ''))
#     with open('result1.txt', 'a+') as f:
#         f.write(line.replace('-', ''))
count = 0
for index, line in enumerate(lines0):
    item = lines1[index][:-1]+ ', ' + line[:-1]
    ls_word = word_tokenize(item)
    ls2 = [i for i in ls_word if is_number(i)==False]
    item = ' '.join(ls2)
    if lines1[index][:-1] == line[:-1]:
        count = count + 1
    print(item)
    # with open('labels_log2.txt', 'a+') as f:
    #     f.write(item + '\n')

print(count)

a = ','

print(is_number(a))