forked from sequitur-g2p/sequitur-g2p
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathEditDistance.cc
126 lines (111 loc) · 3.56 KB
/
EditDistance.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
/*
* $Id: EditDistance.cc 1667 2007-06-02 14:32:35Z max $
*
* Copyright (c) 2004-2005 RWTH Aachen University
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License Version 2 (June
* 1991) as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you will find it at
* http://www.gnu.org/licenses/gpl.html, or write to the Free Software
* Foundation, Inc., 51 Franlin Street, Fifth Floor, Boston, MA 02110,
* USA.
*
* Should a provision of no. 9 and 10 of the GNU General Public License
* be invalid or become invalid, a valid provision is deemed to have been
* agreed upon which comes closest to what the parties intended
* commercially. In any case guarantee/warranty shall be limited to gross
* negligent actions or intended actions or fraudulent concealment.
*/
#include "Assertions.hh"
#include <vector>
struct Hyp {
int cost;
int pre_i, pre_j;
Hyp() {};
Hyp(int _cost, int _pre_i, int _pre_j) : cost(_cost), pre_i(_pre_i), pre_j(_pre_j) {}
};
PyObject *python_align(PyObject *self, PyObject *args) {
struct SubstitutionCost {
int operator() (PyObject *a, PyObject *b) {
return (PyObject_Compare(a, b)) ? 1 : 0;
}
};
SubstitutionCost sub_cost;
PyObject *a = 0, *b = 0;
if (!PyArg_ParseTuple(args, "OO", &a, &b)) return NULL;
if (!PySequence_Check(a)) return NULL;
if (!PySequence_Check(b)) return NULL;
int len_a = PyObject_Length(a);
int len_b = PyObject_Length(b);
std::vector< std::vector<Hyp> > D(len_a + 1, std::vector<Hyp>(len_b + 1));
int c;
D[0][0] = Hyp(0, 0, 0);
for (int j = 1 ; j <= len_b ; ++j) {
c = D[0][j-1].cost;
c += 1; // del_cost(b[j-1])
D[0][j] = Hyp(c, 0, j-1);
}
for (int i = 1 ; i <= len_a ; ++i) {
PyObject *ai = PySequence_GetItem(a, i-1);
c = D[i-1][0].cost;
c += 1; // ins_cost(ai);
D[i][0] = Hyp(c, i-1, 0);
for (int j = 1 ; j <= len_b ; ++j) {
PyObject *bj = PySequence_GetItem(b, j-1);
c = D[i][j-1].cost;
c += 1; // del_cost(bj)
if (true)
D[i][j] = Hyp(c, i, j-1);
c = D[i-1][j].cost;
c += 1; // ins_cost(ai])
if (c < D[i][j].cost)
D[i][j] = Hyp(c, i-1, j);
c = D[i-1][j-1].cost;
c += sub_cost(ai, bj);
if (c < D[i][j].cost)
D[i][j] = Hyp(c, i-1, j-1);
Py_DECREF(bj);
}
Py_DECREF(ai);
}
// traceback
PyObject *alignment = PyList_New(0);
int i = len_a;
int j = len_b;
while (i > 0 || j > 0) {
Hyp &h(D[i][j]);
// alignment.append((a[pi:i], b[pj:j]))
PyObject *p = 0;
if (h.pre_i == i-1 && h.pre_j == j) {
p = Py_BuildValue("(N,O)",
PySequence_GetItem(a, h.pre_i),
Py_None);
} else if (h.pre_i == i && h.pre_j == j-1) {
p = Py_BuildValue("(O,N)",
Py_None,
PySequence_GetItem(b, h.pre_j));
} else if (h.pre_i == i-1 && h.pre_j == j-1) {
p = Py_BuildValue("(N,N)",
PySequence_GetItem(a, h.pre_i),
PySequence_GetItem(b, h.pre_j));
} else {
defect();
return NULL;
}
PyList_Append(alignment, p); Py_DECREF(p);
i = h.pre_i;
j = h.pre_j;
}
PyList_Reverse(alignment);
PyObject *result = Py_BuildValue("Oi", alignment, D[len_a][len_b].cost);
Py_DECREF(alignment);
return result;
}