3
# This file is part of dxdiff.
5
# dxdiff is free software: you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation, either version 3 of the License, or
8
# (at your option) any later version.
10
# dxdiff is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
# GNU General Public License for more details.
15
# You should have received a copy of the GNU General Public License
16
# along with Diamond. If not, see <http://www.gnu.org/licenses/>.
18
Find the LCS (Longest Common Subsequence). Uses [http://www.xmailserver.org/diff2.pdf]
21
from utils import irange
25
return [(xy, xy) for xy in irange(V[0][0])]
29
if k == -D or (k != D and V[D][k - 1] < V[D][k + 1]):
40
return __path(V, D - 1, k) + [(x + d, y + d) for d in irange(tx - x)]
42
def __eq(a, b): return a == b
44
def path(a, b, eq = __eq):
46
Finds the path through the match grid of sequence a and b,
47
using the function eq to determine equality.
55
if mn == 0: # two empty sequences
62
for k in irange(-D, D, 2):
63
if k == -D or (k != D and V[k - 1] < V[k + 1]):
69
while x < m and y < n and eq(a[x], b[y]):
77
return __path(Vd, D, k)
81
raise Exception("lcs should not reach here")
85
Given an edit script path returns the longest common subseqence.
90
for i in range(1, len(path)):
93
dx, dy = x - px, y - py
94
if dx == 1 and dy == 1:
95
result.append((px, py))
101
Returns an edit script for a given match grid path.
102
The edit script transforms sequence A of the match grid
103
into sequence B via deletions ("D", index) and inserations
104
("I", A index, B value).
108
for i in range(len(path) - 1):
111
dx, dy = nx - x, ny - y
112
if dx == 1 and dy == 1:
115
patch.append(("D", x))
117
patch.append(("I", x, b[y]))
123
Given a sequence and a patch from the ses function transforms a into b
152
class __Test_lcs(unittest.TestCase):
154
self.assertEqual(path("", ""), [(0, 0)])
155
self.assertEqual(path("", "a"), [(0, 0), (0, 1)])
156
self.assertEqual(path("a", ""), [(0, 0), (1, 0)])
158
def test_single(self):
159
self.assertEqual(path("a", "a"), [(0, 0), (1, 1)])
160
self.assertEqual(path("a", "b"), [(0, 0), (1, 0), (1, 1)])
162
def test_short(self):
163
self.assertEqual(path("ab", "ab"), [(0, 0), (1, 1), (2, 2)])
164
self.assertEqual(path("ab", "ac"), [(0, 0), (1, 1), (2, 1), (2, 2)])
165
self.assertEqual(path("abcabba", "cbabac"), [(0, 0), (1, 0), (2, 0), (3, 1), (3, 2), (4, 3), (5, 4), (6, 4), (7, 5), (7, 6)])
166
self.assertEqual(path("hello", "help me"), [(0, 0), (1, 1), (2, 2), (3, 3), (4, 3), (5, 3), (5, 4), (5, 5), (5, 6), (5, 7)])
169
self.assertEqual(path("hello", "night night"),
170
[(0, 0), (0, 1), (0, 2), (0, 3), (1, 4), (2, 4),
171
(3, 4), (4, 4), (5, 4), (5, 5), (5, 6), (5, 7),
172
(5, 8), (5, 9), (5, 10), (5, 11)])
174
self.assertEqual(path([
176
"document has stayed the",
177
"same from version to",
178
"version. It shouldn't",
179
"be shown if it doesn't",
180
"change. Otherwise, that",
181
"would not be helping to",
182
"compress the size of the",
185
"This paragraph contains",
186
"text that is outdated.",
187
"It will be deleted in the",
190
"It is important to spell",
191
"check this dokument. On",
193
"misspelled word isn't",
194
"the end of the world.",
195
"Nothing in the rest of",
196
"this paragraph needs to",
197
"be changed. Things can",
200
"This is an important",
202
"therefore be located at",
203
"the beginning of this",
207
"document has stayed the",
208
"same from version to",
209
"version. It shouldn't",
210
"be shown if it doesn't",
211
"change. Otherwise, that",
212
"would not be helping to",
213
"compress anything.",
215
"It is important to spell",
216
"check this document. On",
218
"misspelled word isn't",
219
"the end of the world.",
220
"Nothing in the rest of",
221
"this paragraph needs to",
222
"be changed. Things can",
223
"be added after it.",
225
"This paragraph contains",
226
"important new additions",
228
]), [(0, 0), (0, 1), (0, 2), (0, 3), (0, 4),
229
(0, 5), (0, 6), (1, 7), (2, 8), (3, 9),
230
(4, 10), (5, 11), (6, 12), (7, 13), (8, 13),
231
(9, 13), (9, 14), (10, 15), (11, 15), (12, 15),
232
(13, 15), (14, 15), (15, 15), (16, 16), (17, 16),
233
(17, 17), (18, 18), (19, 19), (20, 20), (21, 21),
234
(22, 22), (23, 23), (24, 24), (24, 25), (24, 26),
237
class __Test_diff(unittest.TestCase):
241
self.assertEqual(ses(p, ""), [])
243
def test_delete(self):
245
self.assertEqual(ses(p, ""), [("D", 0)])
248
self.assertEqual(ses(p, ""), [("D", 0), ("D", 1), ("D", 2), ("D", 3)])
250
p = path("abcd", "cd")
251
self.assertEqual(ses(p, "cd"), [("D", 0), ("D", 1)])
253
p = path("abcd", "ab")
254
self.assertEqual(ses(p, "ab"), [("D", 2), ("D", 3)])
256
p = path("abcd", "bc")
257
self.assertEqual(ses(p, "bc"), [("D", 0), ("D", 3)])
259
def test_insert(self):
261
self.assertEqual(ses(p, "a"), [("I", 0, "a")])
264
self.assertEqual(ses(p, "abcd"), [("I", 0, "a"), ("I", 0, "b"), ("I", 0, "c"), ("I", 0, "d")])
266
def test_delins(self):
267
p = path("abcd", "abef")
268
self.assertEqual(ses(p, "abef"), [("D", 2), ("D", 3), ("I", 4, "e"), ("I", 4, "f")])
270
class __Test_patch(unittest.TestCase):
271
def do_patch(self, a, b):
272
self.assertEqual(patch(ses(path(a, b), b), a), b)
274
def test_patch(self):
275
self.do_patch("", "hello")
276
self.do_patch("hello", "")
277
self.do_patch("hello", "hello")
278
self.do_patch("hello", "night night")
279
self.do_patch("hello", "help me")
280
self.do_patch("test bob", "kill bob")
281
self.do_patch("test bill", "test jane")
283
if __name__ == "__main__":