-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathAlign.py
95 lines (85 loc) · 2.48 KB
/
Align.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from metric import Correct_Rate, Accuracy, Align
from jiwer import wer, cer
import pandas as pd
#Align ref_our, human_our, ref_human
tone = ['_1', '_2', '_3', '_4', '_5a', '_5b', '_6a', '_6b']
test = pd.read_csv("notone_NCCF.csv")
del_sub_count = 0
ins_del_sub_count = 0
number_phoneme = 0
del_All = []
for i in range(len(test)):
cnt, len_sentence, all_Del = Correct_Rate((test['Transcript'][i].split(" ")), (test['Predict'][i].split(" ")))
# print(all_Del)
del_All.extend(all_Del)
del_sub_count+=cnt
# number_phoneme+=len_sentence
cnt, len_sentence = Accuracy((test['Transcript'][i].split(" ")), (test['Predict'][i].split(" ")))
ins_del_sub_count+=cnt
number_phoneme+=len_sentence
# print(number_phoneme)
# print(del_sub_count)
# print(ins_del_sub_count)
print((number_phoneme-del_sub_count)/number_phoneme)
print((number_phoneme-ins_del_sub_count)/number_phoneme)
f = open("./ref_human_detail", 'a', encoding='utf-8')
cor_cnt = 0
sub_cnt = 0
ins_cnt = 0
del_cnt = 0
for i in range(len(test)):
# f.write("000" + str(test['Path'][i]) + " ")
path = test['Path'][i]
path = str(path)
seq1 = test['Canonical'][i]
seq2 = test['Transcript'][i]
seq1, seq2 = Align(seq1.split(" "), seq2.split(" "))
REF = ''
HYP = ''
OP = ''
cor = 0
sub = 0
ins = 0
dell = 0
# print(path)
for i in range(len(seq1)):
REF = REF + seq1[i] + " "
HYP = HYP + seq2[i] + " "
if seq1[i]!="<eps>" and seq2[i]=="<eps>":
OP = OP + "D" + " "
dell = dell + 1
del_cnt +=1
elif seq1[i] == "<eps>" and seq2[i]!="<eps>" :
OP = OP + "I" + " "
ins = ins + 1
ins_cnt+=1
elif (seq1[i]!=seq2[i]) and seq2[i]!="<eps>" and seq1[i]!="<eps>":
OP = OP + "S" + " "
sub = sub + 1
sub_cnt+=1
else:
OP = OP + "C" + " "
cor = cor + 1
cor_cnt+=1
# print(REF)
# print(HYP)
# print(OP)
cor = str(cor)
sub = str(sub)
ins = str(ins)
dell = str(dell)
# print(cor)
# print(sub)
# print(ins)
# print(dell)
# print(REF)
# print(HYP)
# print(OP)
f.write(path + " " + "ref" + " " + REF + "\n")
f.write(path + " " + "hyp" + " " + HYP + "\n")
f.write(path + " " + "op" + " " + OP + "\n")
f.write(path + " " + "#csid" + " " + cor + " " + sub + " " + ins + " " + dell + "\n")
print(cor_cnt)
print(sub_cnt)
print(ins_cnt)
print(del_cnt)