10
if __name__ == '__main__':
11
if len(sys.argv) != 11:
12
sys.stderr.write("Usage: %s ABEAM NBEAM LMWEIGHT LMFILE DENLATDIR PRUNED_DENLATDIR FILELIST TRANSFILE FILECOUNT FILEOFFSET\n" % (sys.argv[0]))
19
print "%s\n" % command
21
abeam, nbeam, lw, lmfile, denlatdir, pruned_denlatdir, ctlfile, transfile, filecount, fileoffset = sys.argv[1:]
26
start = int(fileoffset)
27
end = int(fileoffset) + int(filecount)
30
lm = sphinxbase.NGramModel(lmfile)
33
f = open(ctlfile, 'r')
37
# read transcription file
38
f = open(transfile, 'r')
47
# prune lattices one by one
48
for i in range(start, end):
52
if r[0] != '<s>': r.insert(0, '<s>')
53
if r[-1] != '</s>': r.append('</s>')
54
r = filter(lambda x: not lattice.is_filler(x), r)
56
print "process sent: %s" % c
59
print "\t load lattice ..."
60
dag = lattice.Dag(os.path.join(denlatdir, c + ".lat.gz"))
62
dag.remove_unreachable()
65
dag.edges_unigram_score(lm,lw)
69
print "\t edge pruning ..."
70
dag.forward_edge_prune(abeam)
71
dag.backward_edge_prune(abeam)
72
dag.remove_unreachable()
75
print "\t node pruning ..."
76
dag.post_node_prune(nbeam)
77
dag.remove_unreachable()
80
err, bt = dag.minimum_error(r)
83
print "\t saving pruned lattice ...\n"
84
dag.dag2sphinx(os.path.join(pruned_denlatdir, c + ".lat.gz"))
87
nodecount += dag.n_nodes()
88
edgecount += dag.n_edges()
89
wer += float(err) / len(r)
90
density += float(dag.n_edges())/len(r)
92
print "Average Lattice Word Error Rate: %.2f%%" % (wer / sentcount * 100)
93
print "Average Lattice Density: %.2f" % (float(density) / sentcount)
94
print "Average Number of Node: %.2f" % (float(nodecount) / sentcount)
95
print "Average Number of Arc: %.2f" % (float(edgecount) / sentcount)