~widelands-dev/widelands-website/django_staticfiles

173.2.2 by Holger Rapp
Added diff_match_patch from google, because they do not provide a setup.py
1
#!/usr/bin/python2.4
2
3
# Taken from google because they do not provide a setup.py file. Thanks anyway
4
438.1.6 by franku
run the script
5
"""Diff Match and Patch.
173.2.2 by Holger Rapp
Added diff_match_patch from google, because they do not provide a setup.py
6
7
Copyright 2006 Google Inc.
8
http://code.google.com/p/google-diff-match-patch/
9
10
Licensed under the Apache License, Version 2.0 (the "License");
11
you may not use this file except in compliance with the License.
12
You may obtain a copy of the License at
13
14
  http://www.apache.org/licenses/LICENSE-2.0
15
16
Unless required by applicable law or agreed to in writing, software
17
distributed under the License is distributed on an "AS IS" BASIS,
18
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
See the License for the specific language governing permissions and
20
limitations under the License.
438.1.6 by franku
run the script
21
173.2.2 by Holger Rapp
Added diff_match_patch from google, because they do not provide a setup.py
22
"""
23
24
"""Functions for diff, match and patch.
25
26
Computes the difference between two texts to create a patch.
27
Applies the patch onto another text, allowing for errors.
28
"""
29
30
__author__ = 'fraser@google.com (Neil Fraser)'
31
32
import math
33
import time
34
import urllib
35
import re
36
438.1.6 by franku
run the script
37
173.2.2 by Holger Rapp
Added diff_match_patch from google, because they do not provide a setup.py
38
class diff_match_patch:
438.1.6 by franku
run the script
39
    """Class containing the diff, match and patch methods.
40
41
    Also contains the behaviour settings.
42
43
    """
44
45
    def __init__(self):
46
        """Inits a diff_match_patch object with default settings.
47
48
        Redefine these in your program to override the defaults.
49
50
        """
51
52
        # Number of seconds to map a diff before giving up (0 for infinity).
53
        self.Diff_Timeout = 1.0
54
        # Cost of an empty edit operation in terms of edit characters.
55
        self.Diff_EditCost = 4
56
        # The size beyond which the double-ended diff activates.
57
        # Double-ending is twice as fast, but less accurate.
58
        self.Diff_DualThreshold = 32
59
        # At what point is no match declared (0.0 = perfection, 1.0 = very
60
        # loose).
61
        self.Match_Threshold = 0.5
62
        # How far to search for a match (0 = exact location, 1000+ = broad match).
63
        # A match this many characters away from the expected location will add
64
        # 1.0 to the score (0.0 is a perfect match).
65
        self.Match_Distance = 1000
66
        # When deleting a large block of text (over ~64 characters), how close does
67
        # the contents have to match the expected contents. (0.0 = perfection,
68
        # 1.0 = very loose).  Note that Match_Threshold controls how closely the
69
        # end points of a delete need to match.
70
        self.Patch_DeleteThreshold = 0.5
71
        # Chunk size for context length.
72
        self.Patch_Margin = 4
73
74
        # How many bits in a number?
75
        # Python has no maximum, thus to disable patch splitting set to 0.
76
        # However to avoid long patches in certain pathological cases, use 32.
77
        # Multiple short patches (using native ints) are much faster than long
78
        # ones.
79
        self.Match_MaxBits = 32
80
81
    #  DIFF FUNCTIONS
82
83
    # The data structure representing a diff is an array of tuples:
84
    # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")]
85
    # which means: delete "Hello", add "Goodbye" and keep " world."
86
    DIFF_DELETE = -1
87
    DIFF_INSERT = 1
88
    DIFF_EQUAL = 0
89
90
    def diff_main(self, text1, text2, checklines=True):
91
        """Find the differences between two texts.  Simplifies the problem by
92
        stripping any common prefix or suffix off the texts before diffing.
93
94
        Args:
95
          text1: Old string to be diffed.
96
          text2: New string to be diffed.
97
          checklines: Optional speedup flag.  If present and false, then don't run
98
            a line-level diff first to identify the changed areas.
99
            Defaults to true, which does a faster, slightly less optimal diff.
100
101
        Returns:
102
          Array of changes.
103
104
        """
105
106
        # Check for equality (speedup)
107
        if text1 == text2:
108
            return [(self.DIFF_EQUAL, text1)]
109
110
        # Trim off common prefix (speedup)
111
        commonlength = self.diff_commonPrefix(text1, text2)
112
        commonprefix = text1[:commonlength]
113
        text1 = text1[commonlength:]
114
        text2 = text2[commonlength:]
115
116
        # Trim off common suffix (speedup)
117
        commonlength = self.diff_commonSuffix(text1, text2)
118
        if commonlength == 0:
119
            commonsuffix = ''
120
        else:
121
            commonsuffix = text1[-commonlength:]
122
            text1 = text1[:-commonlength]
123
            text2 = text2[:-commonlength]
124
125
        # Compute the diff on the middle block
126
        diffs = self.diff_compute(text1, text2, checklines)
127
128
        # Restore the prefix and suffix
129
        if commonprefix:
130
            diffs[:0] = [(self.DIFF_EQUAL, commonprefix)]
131
        if commonsuffix:
132
            diffs.append((self.DIFF_EQUAL, commonsuffix))
133
        self.diff_cleanupMerge(diffs)
134
        return diffs
135
136
    def diff_compute(self, text1, text2, checklines):
137
        """Find the differences between two texts.  Assumes that the texts do
138
        not have any common prefix or suffix.
139
140
        Args:
141
          text1: Old string to be diffed.
142
          text2: New string to be diffed.
143
          checklines: Speedup flag.  If false, then don't run a line-level diff
144
            first to identify the changed areas.
145
            If true, then run a faster, slightly less optimal diff.
146
147
        Returns:
148
          Array of changes.
149
150
        """
151
        if not text1:
152
            # Just add some text (speedup)
153
            return [(self.DIFF_INSERT, text2)]
154
155
        if not text2:
156
            # Just delete some text (speedup)
157
            return [(self.DIFF_DELETE, text1)]
158
159
        if len(text1) > len(text2):
160
            (longtext, shorttext) = (text1, text2)
161
        else:
162
            (shorttext, longtext) = (text1, text2)
163
        i = longtext.find(shorttext)
164
        if i != -1:
165
            # Shorter text is inside the longer text (speedup)
166
            diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext),
167
                     (self.DIFF_INSERT, longtext[i + len(shorttext):])]
168
            # Swap insertions for deletions if diff is reversed.
169
            if len(text1) > len(text2):
170
                diffs[0] = (self.DIFF_DELETE, diffs[0][1])
171
                diffs[2] = (self.DIFF_DELETE, diffs[2][1])
172
            return diffs
173
        longtext = shorttext = None  # Garbage collect.
174
175
        # Check to see if the problem can be split in two.
176
        hm = self.diff_halfMatch(text1, text2)
177
        if hm:
178
            # A half-match was found, sort out the return data.
179
            (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
180
            # Send both pairs off for separate processing.
181
            diffs_a = self.diff_main(text1_a, text2_a, checklines)
182
            diffs_b = self.diff_main(text1_b, text2_b, checklines)
183
            # Merge the results.
184
            return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b
185
186
        # Perform a real diff.
187
        if checklines and (len(text1) < 100 or len(text2) < 100):
188
            checklines = False  # Too trivial for the overhead.
189
        if checklines:
190
            # Scan the text on a line-by-line basis first.
191
            (text1, text2, linearray) = self.diff_linesToChars(text1, text2)
192
193
        diffs = self.diff_map(text1, text2)
194
        if not diffs:  # No acceptable result.
195
            diffs = [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
196
        if checklines:
197
            # Convert the diff back to original text.
198
            self.diff_charsToLines(diffs, linearray)
199
            # Eliminate freak matches (e.g. blank lines)
200
            self.diff_cleanupSemantic(diffs)
201
202
            # Rediff any replacement blocks, this time character-by-character.
203
            # Add a dummy entry at the end.
204
            diffs.append((self.DIFF_EQUAL, ''))
205
            pointer = 0
206
            count_delete = 0
207
            count_insert = 0
208
            text_delete = ''
209
            text_insert = ''
210
            while pointer < len(diffs):
211
                if diffs[pointer][0] == self.DIFF_INSERT:
212
                    count_insert += 1
213
                    text_insert += diffs[pointer][1]
214
                elif diffs[pointer][0] == self.DIFF_DELETE:
215
                    count_delete += 1
216
                    text_delete += diffs[pointer][1]
217
                elif diffs[pointer][0] == self.DIFF_EQUAL:
218
                    # Upon reaching an equality, check for prior redundancies.
219
                    if count_delete >= 1 and count_insert >= 1:
220
                        # Delete the offending records and add the merged ones.
221
                        a = self.diff_main(text_delete, text_insert, False)
222
                        diffs[pointer - count_delete -
223
                              count_insert: pointer] = a
224
                        pointer = pointer - count_delete - \
225
                            count_insert + len(a)
226
                    count_insert = 0
227
                    count_delete = 0
228
                    text_delete = ''
229
                    text_insert = ''
230
173.2.2 by Holger Rapp
Added diff_match_patch from google, because they do not provide a setup.py
231
                pointer += 1
438.1.6 by franku
run the script
232
233
            diffs.pop()  # Remove the dummy entry at the end.
234
        return diffs
235
236
    def diff_linesToChars(self, text1, text2):
237
        """Split two texts into an array of strings.  Reduce the texts to a
238
        string of hashes where each Unicode character represents one line.
239
240
        Args:
241
          text1: First string.
242
          text2: Second string.
243
244
        Returns:
245
          Three element tuple, containing the encoded text1, the encoded text2 and
246
          the array of unique strings.  The zeroth element of the array of unique
247
          strings is intentionally blank.
248
249
        """
250
        lineArray = []  # e.g. lineArray[4] == "Hello\n"
251
        lineHash = {}   # e.g. lineHash["Hello\n"] == 4
252
253
        # "\x00" is a valid character, but various debuggers don't like it.
254
        # So we'll insert a junk entry to avoid generating a null character.
255
        lineArray.append('')
256
257
        def diff_linesToCharsMunge(text):
258
            """Split a text into an array of strings.  Reduce the texts to a
259
            string of hashes where each Unicode character represents one line.
260
            Modifies linearray and linehash through being a closure.
261
262
            Args:
263
              text: String to encode.
264
265
            Returns:
266
              Encoded string.
267
268
            """
269
            chars = []
270
            # Walk the text, pulling out a substring for each line.
271
            # text.split('\n') would would temporarily double our memory footprint.
272
            # Modifying text would create many large strings to garbage
273
            # collect.
274
            lineStart = 0
275
            lineEnd = -1
276
            while lineEnd < len(text) - 1:
277
                lineEnd = text.find('\n', lineStart)
278
                if lineEnd == -1:
279
                    lineEnd = len(text) - 1
280
                line = text[lineStart:lineEnd + 1]
281
                lineStart = lineEnd + 1
282
283
                if line in lineHash:
284
                    chars.append(unichr(lineHash[line]))
285
                else:
286
                    lineArray.append(line)
287
                    lineHash[line] = len(lineArray) - 1
288
                    chars.append(unichr(len(lineArray) - 1))
289
            return ''.join(chars)
290
291
        chars1 = diff_linesToCharsMunge(text1)
292
        chars2 = diff_linesToCharsMunge(text2)
293
        return (chars1, chars2, lineArray)
294
295
    def diff_charsToLines(self, diffs, lineArray):
296
        """Rehydrate the text in a diff from a string of line hashes to real
297
        lines of text.
298
299
        Args:
300
          diffs: Array of diff tuples.
301
          lineArray: Array of unique strings.
302
303
        """
304
        for x in xrange(len(diffs)):
305
            text = []
306
            for char in diffs[x][1]:
307
                text.append(lineArray[ord(char)])
308
            diffs[x] = (diffs[x][0], ''.join(text))
309
310
    def diff_map(self, text1, text2):
311
        """Explore the intersection points between the two texts.
312
313
        Args:
314
          text1: Old string to be diffed.
315
          text2: New string to be diffed.
316
317
        Returns:
318
          Array of diff tuples or None if no diff available.
319
320
        """
321
322
        # Unlike in most languages, Python counts time in seconds.
323
        s_end = time.time() + self.Diff_Timeout  # Don't run for too long.
324
        # Cache the text lengths to prevent multiple calls.
325
        text1_length = len(text1)
326
        text2_length = len(text2)
327
        max_d = text1_length + text2_length - 1
328
        doubleEnd = self.Diff_DualThreshold * 2 < max_d
329
        v_map1 = []
330
        v_map2 = []
331
        v1 = {}
332
        v2 = {}
333
        v1[1] = 0
334
        v2[1] = 0
335
        footsteps = {}
336
        done = False
337
        # If the total number of characters is odd, then the front path will
338
        # collide with the reverse path.
339
        front = (text1_length + text2_length) % 2
340
        for d in xrange(max_d):
341
            # Bail out if timeout reached.
342
            if self.Diff_Timeout > 0 and time.time() > s_end:
343
                return None
344
345
            # Walk the front path one step.
346
            v_map1.append({})
347
            for k in xrange(-d, d + 1, 2):
348
                if k == -d or k != d and v1[k - 1] < v1[k + 1]:
349
                    x = v1[k + 1]
350
                else:
351
                    x = v1[k - 1] + 1
352
                y = x - k
353
                if doubleEnd:
354
                    footstep = (x, y)
355
                    if front and footstep in footsteps:
356
                        done = True
357
                    if not front:
358
                        footsteps[footstep] = d
359
360
                while (not done and x < text1_length and y < text2_length and
361
                       text1[x] == text2[y]):
362
                    x += 1
363
                    y += 1
364
                    if doubleEnd:
365
                        footstep = (x, y)
366
                        if front and footstep in footsteps:
367
                            done = True
368
                        if not front:
369
                            footsteps[footstep] = d
370
371
                v1[k] = x
372
                v_map1[d][(x, y)] = True
373
                if x == text1_length and y == text2_length:
374
                    # Reached the end in single-path mode.
375
                    return self.diff_path1(v_map1, text1, text2)
376
                elif done:
377
                    # Front path ran over reverse path.
378
                    v_map2 = v_map2[:footsteps[footstep] + 1]
379
                    a = self.diff_path1(v_map1, text1[:x], text2[:y])
380
                    b = self.diff_path2(v_map2, text1[x:], text2[y:])
381
                    return a + b
382
383
            if doubleEnd:
384
                # Walk the reverse path one step.
385
                v_map2.append({})
386
                for k in xrange(-d, d + 1, 2):
387
                    if k == -d or k != d and v2[k - 1] < v2[k + 1]:
388
                        x = v2[k + 1]
389
                    else:
390
                        x = v2[k - 1] + 1
391
                    y = x - k
392
                    footstep = (text1_length - x, text2_length - y)
393
                    if not front and footstep in footsteps:
394
                        done = True
395
                    if front:
396
                        footsteps[footstep] = d
397
                    while (not done and x < text1_length and y < text2_length and
398
                           text1[-x - 1] == text2[-y - 1]):
399
                        x += 1
400
                        y += 1
401
                        footstep = (text1_length - x, text2_length - y)
402
                        if not front and footstep in footsteps:
403
                            done = True
404
                        if front:
405
                            footsteps[footstep] = d
406
407
                    v2[k] = x
408
                    v_map2[d][(x, y)] = True
409
                    if done:
410
                        # Reverse path ran over front path.
411
                        v_map1 = v_map1[:footsteps[footstep] + 1]
412
                        a = self.diff_path1(v_map1, text1[:text1_length - x],
413
                                            text2[:text2_length - y])
414
                        b = self.diff_path2(v_map2, text1[text1_length - x:],
415
                                            text2[text2_length - y:])
416
                        return a + b
417
418
        # Number of diffs equals number of characters, no commonality at all.
419
        return None
420
421
    def diff_path1(self, v_map, text1, text2):
422
        """Work from the middle back to the start to determine the path.
423
424
        Args:
425
          v_map: Array of paths.
426
          text1: Old string fragment to be diffed.
427
          text2: New string fragment to be diffed.
428
429
        Returns:
430
          Array of diff tuples.
431
432
        """
433
        path = []
434
        x = len(text1)
435
        y = len(text2)
436
        last_op = None
437
        for d in xrange(len(v_map) - 2, -1, -1):
438
            while True:
439
                if (x - 1, y) in v_map[d]:
440
                    x -= 1
441
                    if last_op == self.DIFF_DELETE:
442
                        path[0] = (self.DIFF_DELETE, text1[x] + path[0][1])
443
                    else:
444
                        path[:0] = [(self.DIFF_DELETE, text1[x])]
445
                    last_op = self.DIFF_DELETE
446
                    break
447
                elif (x, y - 1) in v_map[d]:
448
                    y -= 1
449
                    if last_op == self.DIFF_INSERT:
450
                        path[0] = (self.DIFF_INSERT, text2[y] + path[0][1])
451
                    else:
452
                        path[:0] = [(self.DIFF_INSERT, text2[y])]
453
                    last_op = self.DIFF_INSERT
454
                    break
455
                else:
456
                    x -= 1
457
                    y -= 1
458
                    assert text1[x] == text2[y], ('No diagonal.  ' +
459
                                                  "Can't happen. (diff_path1)")
460
                    if last_op == self.DIFF_EQUAL:
461
                        path[0] = (self.DIFF_EQUAL, text1[x] + path[0][1])
462
                    else:
463
                        path[:0] = [(self.DIFF_EQUAL, text1[x])]
464
                    last_op = self.DIFF_EQUAL
465
        return path
466
467
    def diff_path2(self, v_map, text1, text2):
468
        """Work from the middle back to the end to determine the path.
469
470
        Args:
471
          v_map: Array of paths.
472
          text1: Old string fragment to be diffed.
473
          text2: New string fragment to be diffed.
474
475
        Returns:
476
          Array of diff tuples.
477
478
        """
479
        path = []
480
        x = len(text1)
481
        y = len(text2)
482
        last_op = None
483
        for d in xrange(len(v_map) - 2, -1, -1):
484
            while True:
485
                if (x - 1, y) in v_map[d]:
486
                    x -= 1
487
                    if last_op == self.DIFF_DELETE:
488
                        path[-1] = (self.DIFF_DELETE, path[-1]
489
                                    [1] + text1[-x - 1])
490
                    else:
491
                        path.append((self.DIFF_DELETE, text1[-x - 1]))
492
                    last_op = self.DIFF_DELETE
493
                    break
494
                elif (x, y - 1) in v_map[d]:
495
                    y -= 1
496
                    if last_op == self.DIFF_INSERT:
497
                        path[-1] = (self.DIFF_INSERT, path[-1]
498
                                    [1] + text2[-y - 1])
499
                    else:
500
                        path.append((self.DIFF_INSERT, text2[-y - 1]))
501
                    last_op = self.DIFF_INSERT
502
                    break
503
                else:
504
                    x -= 1
505
                    y -= 1
506
                    assert text1[-x - 1] == text2[-y - 1], ('No diagonal.  ' +
507
                                                            "Can't happen. (diff_path2)")
508
                    if last_op == self.DIFF_EQUAL:
509
                        path[-1] = (self.DIFF_EQUAL, path[-1]
510
                                    [1] + text1[-x - 1])
511
                    else:
512
                        path.append((self.DIFF_EQUAL, text1[-x - 1]))
513
                    last_op = self.DIFF_EQUAL
514
        return path
515
516
    def diff_commonPrefix(self, text1, text2):
517
        """Determine the common prefix of two strings.
518
519
        Args:
520
          text1: First string.
521
          text2: Second string.
522
523
        Returns:
524
          The number of characters common to the start of each string.
525
526
        """
527
        # Quick check for common null cases.
528
        if not text1 or not text2 or text1[0] != text2[0]:
529
            return 0
530
        # Binary search.
531
        # Performance analysis: http://neil.fraser.name/news/2007/10/09/
532
        pointermin = 0
533
        pointermax = min(len(text1), len(text2))
534
        pointermid = pointermax
535
        pointerstart = 0
536
        while pointermin < pointermid:
537
            if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]:
538
                pointermin = pointermid
539
                pointerstart = pointermin
540
            else:
541
                pointermax = pointermid
542
            pointermid = int((pointermax - pointermin) / 2 + pointermin)
543
        return pointermid
544
545
    def diff_commonSuffix(self, text1, text2):
546
        """Determine the common suffix of two strings.
547
548
        Args:
549
          text1: First string.
550
          text2: Second string.
551
552
        Returns:
553
          The number of characters common to the end of each string.
554
555
        """
556
        # Quick check for common null cases.
557
        if not text1 or not text2 or text1[-1] != text2[-1]:
558
            return 0
559
        # Binary search.
560
        # Performance analysis: http://neil.fraser.name/news/2007/10/09/
561
        pointermin = 0
562
        pointermax = min(len(text1), len(text2))
563
        pointermid = pointermax
564
        pointerend = 0
565
        while pointermin < pointermid:
566
            if (text1[-pointermid:len(text1) - pointerend] ==
567
                    text2[-pointermid:len(text2) - pointerend]):
568
                pointermin = pointermid
569
                pointerend = pointermin
570
            else:
571
                pointermax = pointermid
572
            pointermid = int((pointermax - pointermin) / 2 + pointermin)
573
        return pointermid
574
575
    def diff_halfMatch(self, text1, text2):
576
        """Do the two texts share a substring which is at least half the length
577
        of the longer text?
578
579
        Args:
580
          text1: First string.
581
          text2: Second string.
582
583
        Returns:
584
          Five element Array, containing the prefix of text1, the suffix of text1,
585
          the prefix of text2, the suffix of text2 and the common middle.  Or None
586
          if there was no match.
587
588
        """
589
        if len(text1) > len(text2):
590
            (longtext, shorttext) = (text1, text2)
591
        else:
592
            (shorttext, longtext) = (text1, text2)
593
        if len(longtext) < 10 or len(shorttext) < 1:
594
            return None  # Pointless.
595
596
        def diff_halfMatchI(longtext, shorttext, i):
597
            """Does a substring of shorttext exist within longtext such that
598
            the substring is at least half the length of longtext? Closure, but
599
            does not reference any external variables.
600
601
            Args:
602
              longtext: Longer string.
603
              shorttext: Shorter string.
604
              i: Start index of quarter length substring within longtext.
605
606
            Returns:
607
              Five element Array, containing the prefix of longtext, the suffix of
608
              longtext, the prefix of shorttext, the suffix of shorttext and the
609
              common middle.  Or None if there was no match.
610
611
            """
612
            seed = longtext[i:i + len(longtext) / 4]
613
            best_common = ''
614
            j = shorttext.find(seed)
615
            while j != -1:
616
                prefixLength = self.diff_commonPrefix(
617
                    longtext[i:], shorttext[j:])
618
                suffixLength = self.diff_commonSuffix(
619
                    longtext[:i], shorttext[:j])
620
                if len(best_common) < suffixLength + prefixLength:
621
                    best_common = (shorttext[j - suffixLength:j] +
622
                                   shorttext[j:j + prefixLength])
623
                    best_longtext_a = longtext[:i - suffixLength]
624
                    best_longtext_b = longtext[i + prefixLength:]
625
                    best_shorttext_a = shorttext[:j - suffixLength]
626
                    best_shorttext_b = shorttext[j + prefixLength:]
627
                j = shorttext.find(seed, j + 1)
628
629
            if len(best_common) >= len(longtext) / 2:
630
                return (best_longtext_a, best_longtext_b,
631
                        best_shorttext_a, best_shorttext_b, best_common)
632
            else:
633
                return None
634
635
        # First check if the second quarter is the seed for a half-match.
636
        hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) / 4)
637
        # Check again based on the third quarter.
638
        hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) / 2)
639
        if not hm1 and not hm2:
640
            return None
641
        elif not hm2:
642
            hm = hm1
643
        elif not hm1:
644
            hm = hm2
645
        else:
646
            # Both matched.  Select the longest.
647
            if len(hm1[4]) > len(hm2[4]):
648
                hm = hm1
649
            else:
650
                hm = hm2
651
652
        # A half-match was found, sort out the return data.
653
        if len(text1) > len(text2):
654
            (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
655
        else:
656
            (text2_a, text2_b, text1_a, text1_b, mid_common) = hm
657
        return (text1_a, text1_b, text2_a, text2_b, mid_common)
658
659
    def diff_cleanupSemantic(self, diffs):
660
        """Reduce the number of edits by eliminating semantically trivial
661
        equalities.
662
663
        Args:
664
          diffs: Array of diff tuples.
665
666
        """
667
        changes = False
668
        equalities = []  # Stack of indices where equalities are found.
669
        lastequality = None  # Always equal to equalities[-1][1]
670
        pointer = 0  # Index of current position.
671
        # Number of chars that changed prior to the equality.
672
        length_changes1 = 0
673
        length_changes2 = 0  # Number of chars that changed after the equality.
674
        while pointer < len(diffs):
675
            if diffs[pointer][0] == self.DIFF_EQUAL:  # equality found
676
                equalities.append(pointer)
677
                length_changes1 = length_changes2
678
                length_changes2 = 0
679
                lastequality = diffs[pointer][1]
680
            else:  # an insertion or deletion
681
                length_changes2 += len(diffs[pointer][1])
682
                if (lastequality != None and (len(lastequality) <= length_changes1) and
683
                        (len(lastequality) <= length_changes2)):
684
                    # Duplicate record
685
                    diffs.insert(equalities[-1],
686
                                 (self.DIFF_DELETE, lastequality))
687
                    # Change second copy to insert.
688
                    diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
689
                                                 diffs[equalities[-1] + 1][1])
690
                    # Throw away the equality we just deleted.
691
                    equalities.pop()
692
                    # Throw away the previous equality (it needs to be
693
                    # reevaluated).
694
                    if len(equalities) != 0:
695
                        equalities.pop()
696
                    if len(equalities):
697
                        pointer = equalities[-1]
698
                    else:
699
                        pointer = -1
700
                    length_changes1 = 0  # Reset the counters.
701
                    length_changes2 = 0
702
                    lastequality = None
703
                    changes = True
704
            pointer += 1
705
706
        if changes:
707
            self.diff_cleanupMerge(diffs)
708
709
        self.diff_cleanupSemanticLossless(diffs)
710
711
    def diff_cleanupSemanticLossless(self, diffs):
712
        """Look for single edits surrounded on both sides by equalities
713
        which can be shifted sideways to align the edit to a word boundary.
714
        e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
715
716
        Args:
717
          diffs: Array of diff tuples.
718
        """
719
720
        def diff_cleanupSemanticScore(one, two):
721
            """Given two strings, compute a score representing whether the
722
            internal boundary falls on logical boundaries. Scores range from 5
723
            (best) to 0 (worst). Closure, but does not reference any external
724
            variables.
725
726
            Args:
727
              one: First string.
728
              two: Second string.
729
730
            Returns:
731
              The score.
732
733
            """
734
            if not one or not two:
735
                # Edges are the best.
736
                return 5
737
738
            # Each port of this function behaves slightly differently due to
739
            # subtle differences in each language's definition of things like
740
            # 'whitespace'.  Since this function's purpose is largely cosmetic,
741
            # the choice has been made to use each language's native features
742
            # rather than force total conformity.
743
            score = 0
744
            # One point for non-alphanumeric.
745
            if not one[-1].isalnum() or not two[0].isalnum():
746
                score += 1
747
                # Two points for whitespace.
748
                if one[-1].isspace() or two[0].isspace():
749
                    score += 1
750
                    # Three points for line breaks.
751
                    if (one[-1] == '\r' or one[-1] == '\n' or
752
                            two[0] == '\r' or two[0] == '\n'):
753
                        score += 1
754
                        # Four points for blank lines.
755
                        if (re.search('\\n\\r?\\n$', one) or
756
                                re.match('^\\r?\\n\\r?\\n', two)):
757
                            score += 1
758
            return score
759
760
        pointer = 1
761
        # Intentionally ignore the first and last element (don't need
762
        # checking).
763
        while pointer < len(diffs) - 1:
764
            if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
765
                    diffs[pointer + 1][0] == self.DIFF_EQUAL):
766
                # This is a single edit surrounded by equalities.
767
                equality1 = diffs[pointer - 1][1]
768
                edit = diffs[pointer][1]
769
                equality2 = diffs[pointer + 1][1]
770
771
                # First, shift the edit as far left as possible.
772
                commonOffset = self.diff_commonSuffix(equality1, edit)
773
                if commonOffset:
774
                    commonString = edit[-commonOffset:]
775
                    equality1 = equality1[:-commonOffset]
776
                    edit = commonString + edit[:-commonOffset]
777
                    equality2 = commonString + equality2
778
779
                # Second, step character by character right, looking for the
780
                # best fit.
781
                bestEquality1 = equality1
782
                bestEdit = edit
783
                bestEquality2 = equality2
784
                bestScore = (diff_cleanupSemanticScore(equality1, edit) +
785
                             diff_cleanupSemanticScore(edit, equality2))
786
                while edit and equality2 and edit[0] == equality2[0]:
787
                    equality1 += edit[0]
788
                    edit = edit[1:] + equality2[0]
789
                    equality2 = equality2[1:]
790
                    score = (diff_cleanupSemanticScore(equality1, edit) +
791
                             diff_cleanupSemanticScore(edit, equality2))
792
                    # The >= encourages trailing rather than leading whitespace
793
                    # on edits.
794
                    if score >= bestScore:
795
                        bestScore = score
796
                        bestEquality1 = equality1
797
                        bestEdit = edit
798
                        bestEquality2 = equality2
799
800
                if diffs[pointer - 1][1] != bestEquality1:
801
                    # We have an improvement, save it back to the diff.
802
                    if bestEquality1:
803
                        diffs[pointer - 1] = (diffs[pointer - 1]
804
                                              [0], bestEquality1)
805
                    else:
806
                        del diffs[pointer - 1]
807
                        pointer -= 1
808
                    diffs[pointer] = (diffs[pointer][0], bestEdit)
809
                    if bestEquality2:
810
                        diffs[pointer + 1] = (diffs[pointer + 1]
811
                                              [0], bestEquality2)
812
                    else:
813
                        del diffs[pointer + 1]
814
                        pointer -= 1
815
            pointer += 1
816
817
    def diff_cleanupEfficiency(self, diffs):
818
        """Reduce the number of edits by eliminating operationally trivial
819
        equalities.
820
821
        Args:
822
          diffs: Array of diff tuples.
823
824
        """
825
        changes = False
826
        equalities = []  # Stack of indices where equalities are found.
827
        lastequality = ''  # Always equal to equalities[-1][1]
828
        pointer = 0  # Index of current position.
829
        # Is there an insertion operation before the last equality.
830
        pre_ins = False
831
        # Is there a deletion operation before the last equality.
832
        pre_del = False
833
        # Is there an insertion operation after the last equality.
834
        post_ins = False
835
        # Is there a deletion operation after the last equality.
836
        post_del = False
837
        while pointer < len(diffs):
838
            if diffs[pointer][0] == self.DIFF_EQUAL:  # equality found
839
                if (len(diffs[pointer][1]) < self.Diff_EditCost and
840
                        (post_ins or post_del)):
841
                    # Candidate found.
842
                    equalities.append(pointer)
843
                    pre_ins = post_ins
844
                    pre_del = post_del
845
                    lastequality = diffs[pointer][1]
846
                else:
847
                    # Not a candidate, and can never become one.
848
                    equalities = []
849
                    lastequality = ''
850
851
                post_ins = post_del = False
852
            else:  # an insertion or deletion
853
                if diffs[pointer][0] == self.DIFF_DELETE:
854
                    post_del = True
855
                else:
856
                    post_ins = True
857
858
                # Five types to be split:
859
                # <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
860
                # <ins>A</ins>X<ins>C</ins><del>D</del>
861
                # <ins>A</ins><del>B</del>X<ins>C</ins>
862
                # <ins>A</del>X<ins>C</ins><del>D</del>
863
                # <ins>A</ins><del>B</del>X<del>C</del>
864
865
                if lastequality and ((pre_ins and pre_del and post_ins and post_del) or
866
                                     ((len(lastequality) < self.Diff_EditCost / 2) and
867
                                      (pre_ins + pre_del + post_ins + post_del) == 3)):
868
                    # Duplicate record
869
                    diffs.insert(equalities[-1],
870
                                 (self.DIFF_DELETE, lastequality))
871
                    # Change second copy to insert.
872
                    diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
873
                                                 diffs[equalities[-1] + 1][1])
874
                    equalities.pop()  # Throw away the equality we just deleted
875
                    lastequality = ''
876
                    if pre_ins and pre_del:
877
                        # No changes made which could affect previous entry,
878
                        # keep going.
879
                        post_ins = post_del = True
880
                        equalities = []
881
                    else:
882
                        if len(equalities):
883
                            equalities.pop()  # Throw away the previous equality
884
                        if len(equalities):
885
                            pointer = equalities[-1]
886
                        else:
887
                            pointer = -1
888
                        post_ins = post_del = False
889
                    changes = True
890
            pointer += 1
891
892
        if changes:
893
            self.diff_cleanupMerge(diffs)
894
895
    def diff_cleanupMerge(self, diffs):
896
        """Reorder and merge like edit sections.  Merge equalities. Any edit
897
        section can move as long as it doesn't cross an equality.
898
899
        Args:
900
          diffs: Array of diff tuples.
901
902
        """
903
        diffs.append((self.DIFF_EQUAL, ''))  # Add a dummy entry at the end.
904
        pointer = 0
905
        count_delete = 0
173.2.2 by Holger Rapp
Added diff_match_patch from google, because they do not provide a setup.py
906
        count_insert = 0
907
        text_delete = ''
908
        text_insert = ''
438.1.6 by franku
run the script
909
        while pointer < len(diffs):
910
            if diffs[pointer][0] == self.DIFF_INSERT:
911
                count_insert += 1
912
                text_insert += diffs[pointer][1]
913
                pointer += 1
914
            elif diffs[pointer][0] == self.DIFF_DELETE:
915
                count_delete += 1
916
                text_delete += diffs[pointer][1]
917
                pointer += 1
918
            elif diffs[pointer][0] == self.DIFF_EQUAL:
919
                # Upon reaching an equality, check for prior redundancies.
920
                if count_delete != 0 or count_insert != 0:
921
                    if count_delete != 0 and count_insert != 0:
922
                        # Factor out any common prefixies.
923
                        commonlength = self.diff_commonPrefix(
924
                            text_insert, text_delete)
925
                        if commonlength != 0:
926
                            x = pointer - count_delete - count_insert - 1
927
                            if x >= 0 and diffs[x][0] == self.DIFF_EQUAL:
928
                                diffs[x] = (diffs[x][0], diffs[x][1] +
929
                                            text_insert[:commonlength])
930
                            else:
931
                                diffs.insert(
932
                                    0, (self.DIFF_EQUAL, text_insert[:commonlength]))
933
                                pointer += 1
934
                            text_insert = text_insert[commonlength:]
935
                            text_delete = text_delete[commonlength:]
936
                        # Factor out any common suffixies.
937
                        commonlength = self.diff_commonSuffix(
938
                            text_insert, text_delete)
939
                        if commonlength != 0:
940
                            diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] +
941
                                              diffs[pointer][1])
942
                            text_insert = text_insert[:-commonlength]
943
                            text_delete = text_delete[:-commonlength]
944
                    # Delete the offending records and add the merged ones.
945
                    if count_delete == 0:
946
                        diffs[pointer - count_insert: pointer] = [
947
                            (self.DIFF_INSERT, text_insert)]
948
                    elif count_insert == 0:
949
                        diffs[pointer - count_delete: pointer] = [
950
                            (self.DIFF_DELETE, text_delete)]
951
                    else:
952
                        diffs[pointer - count_delete - count_insert: pointer] = [
953
                            (self.DIFF_DELETE, text_delete),
954
                            (self.DIFF_INSERT, text_insert)]
955
                    pointer = pointer - count_delete - count_insert + 1
956
                    if count_delete != 0:
957
                        pointer += 1
958
                    if count_insert != 0:
959
                        pointer += 1
960
                elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL:
961
                    # Merge this equality with the previous one.
962
                    diffs[pointer - 1] = (diffs[pointer - 1][0],
963
                                          diffs[pointer - 1][1] + diffs[pointer][1])
964
                    del diffs[pointer]
965
                else:
966
                    pointer += 1
967
968
                count_insert = 0
969
                count_delete = 0
970
                text_delete = ''
971
                text_insert = ''
972
973
        if diffs[-1][1] == '':
974
            diffs.pop()  # Remove the dummy entry at the end.
975
976
        # Second pass: look for single edits surrounded on both sides by equalities
977
        # which can be shifted sideways to eliminate an equality.
978
        # e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
979
        changes = False
980
        pointer = 1
981
        # Intentionally ignore the first and last element (don't need
982
        # checking).
983
        while pointer < len(diffs) - 1:
984
            if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
985
                    diffs[pointer + 1][0] == self.DIFF_EQUAL):
986
                # This is a single edit surrounded by equalities.
987
                if diffs[pointer][1].endswith(diffs[pointer - 1][1]):
988
                    # Shift the edit over the previous equality.
989
                    diffs[pointer] = (diffs[pointer][0],
990
                                      diffs[pointer - 1][1] +
991
                                      diffs[pointer][1][:-len(diffs[pointer - 1][1])])
992
                    diffs[pointer + 1] = (diffs[pointer + 1][0],
993
                                          diffs[pointer - 1][1] + diffs[pointer + 1][1])
994
                    del diffs[pointer - 1]
995
                    changes = True
996
                elif diffs[pointer][1].startswith(diffs[pointer + 1][1]):
997
                    # Shift the edit over the next equality.
998
                    diffs[pointer - 1] = (diffs[pointer - 1][0],
999
                                          diffs[pointer - 1][1] + diffs[pointer + 1][1])
1000
                    diffs[pointer] = (diffs[pointer][0],
1001
                                      diffs[pointer][1][len(diffs[pointer + 1][1]):] +
1002
                                      diffs[pointer + 1][1])
1003
                    del diffs[pointer + 1]
1004
                    changes = True
1005
            pointer += 1
1006
1007
        # If shifts were made, the diff needs reordering and another shift
1008
        # sweep.
1009
        if changes:
1010
            self.diff_cleanupMerge(diffs)
1011
1012
    def diff_xIndex(self, diffs, loc):
1013
        """loc is a location in text1, compute and return the equivalent location
1014
        in text2.  e.g. "The cat" vs "The big cat", 1->1, 5->8
1015
1016
        Args:
1017
          diffs: Array of diff tuples.
1018
          loc: Location within text1.
1019
1020
        Returns:
1021
          Location within text2.
1022
        """
1023
        chars1 = 0
1024
        chars2 = 0
1025
        last_chars1 = 0
1026
        last_chars2 = 0
1027
        for x in xrange(len(diffs)):
1028
            (op, text) = diffs[x]
1029
            if op != self.DIFF_INSERT:  # Equality or deletion.
1030
                chars1 += len(text)
1031
            if op != self.DIFF_DELETE:  # Equality or insertion.
1032
                chars2 += len(text)
1033
            if chars1 > loc:  # Overshot the location.
1034
                break
1035
            last_chars1 = chars1
1036
            last_chars2 = chars2
1037
1038
        if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE:
1039
            # The location was deleted.
1040
            return last_chars2
1041
        # Add the remaining len(character).
1042
        return last_chars2 + (loc - last_chars1)
1043
1044
    def diff_prettyHtml(self, diffs):
1045
        """Convert a diff array into a pretty HTML report.
1046
1047
        Args:
1048
          diffs: Array of diff tuples.
1049
1050
        Returns:
1051
          HTML representation.
1052
1053
        """
1054
        html = []
1055
        i = 0
1056
        for (op, data) in diffs:
1057
            text = (data.replace('&', '&amp;').replace('<', '&lt;')
1058
                    .replace('>', '&gt;').replace('\n', '&para;<BR>'))
1059
            if op == self.DIFF_INSERT:
1060
                html.append("<SPAN STYLE=\"color: #00FF00;\" TITLE=\"i=%i\">%s</SPAN>"
1061
                            % (i, text))
1062
            elif op == self.DIFF_DELETE:
1063
                html.append("<SPAN STYLE=\"color: #FF0000;\" TITLE=\"i=%i\">%s</SPAN>"
1064
                            % (i, text))
1065
            elif op == self.DIFF_EQUAL:
1066
                html.append("<SPAN TITLE=\"i=%i\">%s</SPAN>" % (i, text))
1067
            if op != self.DIFF_DELETE:
1068
                i += len(data)
1069
        return ''.join(html)
1070
1071
    def diff_text1(self, diffs):
1072
        """Compute and return the source text (all equalities and deletions).
1073
1074
        Args:
1075
          diffs: Array of diff tuples.
1076
1077
        Returns:
1078
          Source text.
1079
1080
        """
1081
        text = []
1082
        for (op, data) in diffs:
1083
            if op != self.DIFF_INSERT:
1084
                text.append(data)
1085
        return ''.join(text)
1086
1087
    def diff_text2(self, diffs):
1088
        """Compute and return the destination text (all equalities and
1089
        insertions).
1090
1091
        Args:
1092
          diffs: Array of diff tuples.
1093
1094
        Returns:
1095
          Destination text.
1096
1097
        """
1098
        text = []
1099
        for (op, data) in diffs:
1100
            if op != self.DIFF_DELETE:
1101
                text.append(data)
1102
        return ''.join(text)
1103
1104
    def diff_levenshtein(self, diffs):
1105
        """Compute the Levenshtein distance; the number of inserted, deleted or
1106
        substituted characters.
1107
1108
        Args:
1109
          diffs: Array of diff tuples.
1110
1111
        Returns:
1112
          Number of changes.
1113
1114
        """
1115
        levenshtein = 0
173.2.2 by Holger Rapp
Added diff_match_patch from google, because they do not provide a setup.py
1116
        insertions = 0
1117
        deletions = 0
438.1.6 by franku
run the script
1118
        for (op, data) in diffs:
1119
            if op == self.DIFF_INSERT:
1120
                insertions += len(data)
1121
            elif op == self.DIFF_DELETE:
1122
                deletions += len(data)
1123
            elif op == self.DIFF_EQUAL:
1124
                # A deletion and an insertion is one substitution.
1125
                levenshtein += max(insertions, deletions)
1126
                insertions = 0
1127
                deletions = 0
1128
        levenshtein += max(insertions, deletions)
1129
        return levenshtein
1130
1131
    def diff_toDelta(self, diffs):
1132
        """Crush the diff into an encoded string which describes the operations
1133
        required to transform text1 into text2.
1134
        E.g. =3\t-2\t+ing  -> Keep 3 chars, delete 2 chars, insert 'ing'.
1135
        Operations are tab-separated.  Inserted text is escaped using %xx notation.
1136
1137
        Args:
1138
          diffs: Array of diff tuples.
1139
1140
        Returns:
1141
          Delta text.
1142
        """
1143
        text = []
1144
        for (op, data) in diffs:
1145
            if op == self.DIFF_INSERT:
1146
                # High ascii will raise UnicodeDecodeError.  Use Unicode
1147
                # instead.
1148
                data = data.encode('utf-8')
1149
                text.append('+' + urllib.quote(data, "!~*'();/?:@&=+$,# "))
1150
            elif op == self.DIFF_DELETE:
1151
                text.append('-%d' % len(data))
1152
            elif op == self.DIFF_EQUAL:
1153
                text.append('=%d' % len(data))
1154
        return '\t'.join(text)
1155
1156
    def diff_fromDelta(self, text1, delta):
1157
        """Given the original text1, and an encoded string which describes the
1158
        operations required to transform text1 into text2, compute the full
1159
        diff.
1160
1161
        Args:
1162
          text1: Source string for the diff.
1163
          delta: Delta text.
1164
1165
        Returns:
1166
          Array of diff tuples.
1167
1168
        Raises:
1169
          ValueError: If invalid input.
1170
1171
        """
1172
        if type(delta) == unicode:
1173
            # Deltas should be composed of a subset of ascii chars, Unicode not
1174
            # required.  If this encode raises UnicodeEncodeError, delta is
1175
            # invalid.
1176
            delta = delta.encode('ascii')
1177
        diffs = []
1178
        pointer = 0  # Cursor in text1
1179
        tokens = delta.split('\t')
1180
        for token in tokens:
1181
            if token == '':
1182
                # Blank tokens are ok (from a trailing \t).
1183
                continue
1184
            # Each token begins with a one character parameter which specifies the
1185
            # operation of this token (delete, insert, equality).
1186
            param = token[1:]
1187
            if token[0] == '+':
1188
                param = urllib.unquote(param).decode('utf-8')
1189
                diffs.append((self.DIFF_INSERT, param))
1190
            elif token[0] == '-' or token[0] == '=':
1191
                try:
1192
                    n = int(param)
1193
                except ValueError:
1194
                    raise ValueError, 'Invalid number in diff_fromDelta: ' + param
1195
                if n < 0:
1196
                    raise ValueError, 'Negative number in diff_fromDelta: ' + param
1197
                text = text1[pointer: pointer + n]
1198
                pointer += n
1199
                if token[0] == '=':
1200
                    diffs.append((self.DIFF_EQUAL, text))
1201
                else:
1202
                    diffs.append((self.DIFF_DELETE, text))
173.2.2 by Holger Rapp
Added diff_match_patch from google, because they do not provide a setup.py
1203
            else:
438.1.6 by franku
run the script
1204
                # Anything else is an error.
1205
                raise ValueError, ('Invalid diff operation in diff_fromDelta: ' +
1206
                                   token[0])
1207
        if pointer != len(text1):
1208
            raise ValueError, (
1209
                'Delta length (%d) does not equal source text length (%d).' %
1210
                (pointer, len(text1)))
1211
        return diffs
1212
1213
    #  MATCH FUNCTIONS
1214
1215
    def match_main(self, text, pattern, loc):
1216
        """Locate the best instance of 'pattern' in 'text' near 'loc'.
1217
1218
        Args:
1219
          text: The text to search.
1220
          pattern: The pattern to search for.
1221
          loc: The location to search around.
1222
1223
        Returns:
1224
          Best match index or -1.
1225
1226
        """
1227
        loc = max(0, min(loc, len(text)))
1228
        if text == pattern:
1229
            # Shortcut (potentially not guaranteed by the algorithm)
1230
            return 0
1231
        elif not text:
1232
            # Nothing to match.
1233
            return -1
1234
        elif text[loc:loc + len(pattern)] == pattern:
1235
            # Perfect match at the perfect spot!  (Includes case of null
1236
            # pattern)
1237
            return loc
1238
        else:
1239
            # Do a fuzzy compare.
1240
            match = self.match_bitap(text, pattern, loc)
1241
            return match
1242
1243
    def match_bitap(self, text, pattern, loc):
1244
        """Locate the best instance of 'pattern' in 'text' near 'loc' using the
1245
        Bitap algorithm.
1246
1247
        Args:
1248
          text: The text to search.
1249
          pattern: The pattern to search for.
1250
          loc: The location to search around.
1251
1252
        Returns:
1253
          Best match index or -1.
1254
1255
        """
1256
        # Python doesn't have a maxint limit, so ignore this check.
1257
        # if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits:
1258
        #  raise ValueError("Pattern too long for this application.")
1259
1260
        # Initialise the alphabet.
1261
        s = self.match_alphabet(pattern)
1262
1263
        def match_bitapScore(e, x):
1264
            """Compute and return the score for a match with e errors and x
1265
            location. Accesses loc and pattern through being a closure.
1266
1267
            Args:
1268
              e: Number of errors in match.
1269
              x: Location of match.
1270
1271
            Returns:
1272
              Overall score for match (0.0 = good, 1.0 = bad).
1273
1274
            """
1275
            accuracy = float(e) / len(pattern)
1276
            proximity = abs(loc - x)
1277
            if not self.Match_Distance:
1278
                # Dodge divide by zero error.
1279
                return proximity and 1.0 or accuracy
1280
            return accuracy + (proximity / float(self.Match_Distance))
1281
1282
        # Highest score beyond which we give up.
1283
        score_threshold = self.Match_Threshold
1284
        # Is there a nearby exact match? (speedup)
1285
        best_loc = text.find(pattern, loc)
1286
        if best_loc != -1:
1287
            score_threshold = min(match_bitapScore(
1288
                0, best_loc), score_threshold)
1289
            # What about in the other direction? (speedup)
1290
            best_loc = text.rfind(pattern, loc + len(pattern))
1291
            if best_loc != -1:
1292
                score_threshold = min(match_bitapScore(
1293
                    0, best_loc), score_threshold)
1294
1295
        # Initialise the bit arrays.
1296
        matchmask = 1 << (len(pattern) - 1)
1297
        best_loc = -1
1298
1299
        bin_max = len(pattern) + len(text)
1300
        # Empty initialization added to appease pychecker.
1301
        last_rd = None
1302
        for d in xrange(len(pattern)):
1303
            # Scan for the best match each iteration allows for one more error.
1304
            # Run a binary search to determine how far from 'loc' we can stray at
1305
            # this error level.
1306
            bin_min = 0
1307
            bin_mid = bin_max
1308
            while bin_min < bin_mid:
1309
                if match_bitapScore(d, loc + bin_mid) <= score_threshold:
1310
                    bin_min = bin_mid
1311
                else:
1312
                    bin_max = bin_mid
1313
                bin_mid = (bin_max - bin_min) / 2 + bin_min
1314
1315
            # Use the result from this iteration as the maximum for the next.
1316
            bin_max = bin_mid
1317
            start = max(1, loc - bin_mid + 1)
1318
            finish = min(loc + bin_mid, len(text)) + len(pattern)
1319
1320
            rd = range(finish + 1)
1321
            rd.append((1 << d) - 1)
1322
            for j in xrange(finish, start - 1, -1):
1323
                if len(text) <= j - 1:
1324
                    # Out of range.
1325
                    charMatch = 0
1326
                else:
1327
                    charMatch = s.get(text[j - 1], 0)
1328
                if d == 0:  # First pass: exact match.
1329
                    rd[j] = ((rd[j + 1] << 1) | 1) & charMatch
1330
                else:  # Subsequent passes: fuzzy match.
1331
                    rd[j] = ((rd[j + 1] << 1) | 1) & charMatch | (
1332
                        ((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]
1333
                if rd[j] & matchmask:
1334
                    score = match_bitapScore(d, j - 1)
1335
                    # This match will almost certainly be better than any existing match.
1336
                    # But check anyway.
1337
                    if score <= score_threshold:
1338
                        # Told you so.
1339
                        score_threshold = score
1340
                        best_loc = j - 1
1341
                        if best_loc > loc:
1342
                            # When passing loc, don't exceed our current
1343
                            # distance from loc.
1344
                            start = max(1, 2 * loc - best_loc)
1345
                        else:
1346
                            # Already passed loc, downhill from here on in.
1347
                            break
1348
            # No hope for a (better) match at greater error levels.
1349
            if match_bitapScore(d + 1, loc) > score_threshold:
1350
                break
1351
            last_rd = rd
1352
        return best_loc
1353
1354
    def match_alphabet(self, pattern):
1355
        """Initialise the alphabet for the Bitap algorithm.
1356
1357
        Args:
1358
          pattern: The text to encode.
1359
1360
        Returns:
1361
          Hash of character locations.
1362
1363
        """
1364
        s = {}
1365
        for char in pattern:
1366
            s[char] = 0
1367
        for i in xrange(len(pattern)):
1368
            s[pattern[i]] |= 1 << (len(pattern) - i - 1)
1369
        return s
1370
1371
    #  PATCH FUNCTIONS
1372
1373
    def patch_addContext(self, patch, text):
1374
        """Increase the context until it is unique, but don't let the pattern
1375
        expand beyond Match_MaxBits.
1376
1377
        Args:
1378
          patch: The patch to grow.
1379
          text: Source text.
1380
1381
        """
1382
        if len(text) == 0:
1383
            return
1384
        pattern = text[patch.start2: patch.start2 + patch.length1]
1385
        padding = 0
1386
1387
        # Look for the first and last matches of pattern in text.  If two different
1388
        # matches are found, increase the pattern length.
1389
        while (text.find(pattern) != text.rfind(pattern) and (self.Match_MaxBits ==
1390
                                                              0 or len(pattern) < self.Match_MaxBits - self.Patch_Margin -
1391
                                                              self.Patch_Margin)):
1392
            padding += self.Patch_Margin
1393
            pattern = text[max(0, patch.start2 - padding):
1394
                           patch.start2 + patch.length1 + padding]
1395
        # Add one chunk for good luck.
1396
        padding += self.Patch_Margin
1397
1398
        # Add the prefix.
1399
        prefix = text[max(0, patch.start2 - padding): patch.start2]
1400
        if prefix:
1401
            patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)]
1402
        # Add the suffix.
1403
        suffix = text[patch.start2 + patch.length1:
1404
                      patch.start2 + patch.length1 + padding]
1405
        if suffix:
1406
            patch.diffs.append((self.DIFF_EQUAL, suffix))
1407
1408
        # Roll back the start points.
1409
        patch.start1 -= len(prefix)
1410
        patch.start2 -= len(prefix)
1411
        # Extend lengths.
1412
        patch.length1 += len(prefix) + len(suffix)
1413
        patch.length2 += len(prefix) + len(suffix)
1414
1415
    def patch_make(self, a, b=None, c=None):
1416
        """Compute a list of patches to turn text1 into text2.
1417
        Use diffs if provided, otherwise compute it ourselves.
1418
        There are four ways to call this function, depending on what data is
1419
        available to the caller:
1420
        Method 1:
1421
        a = text1, b = text2
1422
        Method 2:
1423
        a = diffs
1424
        Method 3 (optimal):
1425
        a = text1, b = diffs
1426
        Method 4 (deprecated, use method 3):
1427
        a = text1, b = text2, c = diffs
1428
1429
        Args:
1430
          a: text1 (methods 1,3,4) or Array of diff tuples for text1 to
1431
              text2 (method 2).
1432
          b: text2 (methods 1,4) or Array of diff tuples for text1 to
1433
              text2 (method 3) or undefined (method 2).
1434
          c: Array of diff tuples for text1 to text2 (method 4) or
1435
              undefined (methods 1,2,3).
1436
1437
        Returns:
1438
          Array of patch objects.
1439
        """
1440
        text1 = None
1441
        diffs = None
1442
        # Note that texts may arrive as 'str' or 'unicode'.
1443
        if isinstance(a, basestring) and isinstance(b, basestring) and c is None:
1444
            # Method 1: text1, text2
1445
            # Compute diffs from text1 and text2.
1446
            text1 = a
1447
            diffs = self.diff_main(text1, b, True)
1448
            if len(diffs) > 2:
1449
                self.diff_cleanupSemantic(diffs)
1450
                self.diff_cleanupEfficiency(diffs)
1451
        elif isinstance(a, list) and b is None and c is None:
1452
            # Method 2: diffs
1453
            # Compute text1 from diffs.
1454
            diffs = a
1455
            text1 = self.diff_text1(diffs)
1456
        elif isinstance(a, basestring) and isinstance(b, list) and c is None:
1457
            # Method 3: text1, diffs
1458
            text1 = a
1459
            diffs = b
1460
        elif (isinstance(a, basestring) and isinstance(b, basestring) and
1461
              isinstance(c, list)):
1462
            # Method 4: text1, text2, diffs
1463
            # text2 is not used.
1464
            text1 = a
1465
            diffs = c
1466
        else:
1467
            raise ValueError('Unknown call format to patch_make.')
1468
1469
        if not diffs:
1470
            return []  # Get rid of the None case.
1471
        patches = []
1472
        patch = patch_obj()
1473
        char_count1 = 0  # Number of characters into the text1 string.
1474
        char_count2 = 0  # Number of characters into the text2 string.
1475
        # Recreate the patches to determine context info.
1476
        prepatch_text = text1
1477
        postpatch_text = text1
1478
        for x in xrange(len(diffs)):
1479
            (diff_type, diff_text) = diffs[x]
1480
            if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL:
1481
                # A new patch starts here.
1482
                patch.start1 = char_count1
1483
                patch.start2 = char_count2
1484
            if diff_type == self.DIFF_INSERT:
1485
                # Insertion
1486
                patch.diffs.append(diffs[x])
1487
                patch.length2 += len(diff_text)
1488
                postpatch_text = (postpatch_text[:char_count2] + diff_text +
1489
                                  postpatch_text[char_count2:])
1490
            elif diff_type == self.DIFF_DELETE:
1491
                # Deletion.
1492
                patch.length1 += len(diff_text)
1493
                patch.diffs.append(diffs[x])
1494
                postpatch_text = (postpatch_text[:char_count2] +
1495
                                  postpatch_text[char_count2 + len(diff_text):])
1496
            elif (diff_type == self.DIFF_EQUAL and
1497
                  len(diff_text) <= 2 * self.Patch_Margin and
1498
                  len(patch.diffs) != 0 and len(diffs) != x + 1):
1499
                # Small equality inside a patch.
1500
                patch.diffs.append(diffs[x])
1501
                patch.length1 += len(diff_text)
1502
                patch.length2 += len(diff_text)
1503
1504
            if (diff_type == self.DIFF_EQUAL and
1505
                    len(diff_text) >= 2 * self.Patch_Margin):
1506
                # Time for a new patch.
1507
                if len(patch.diffs) != 0:
1508
                    self.patch_addContext(patch, prepatch_text)
1509
                    patches.append(patch)
1510
                    patch = patch_obj()
1511
                    # Unlike Unidiff, our patch lists have a rolling context.
1512
                    # http://code.google.com/p/google-diff-match-patch/wiki/Unidiff
1513
                    # Update prepatch text & pos to reflect the application of the
1514
                    # just completed patch.
1515
                    prepatch_text = postpatch_text
1516
                    char_count1 = char_count2
1517
1518
            # Update the current character count.
1519
            if diff_type != self.DIFF_INSERT:
1520
                char_count1 += len(diff_text)
1521
            if diff_type != self.DIFF_DELETE:
1522
                char_count2 += len(diff_text)
1523
1524
        # Pick up the leftover patch if not empty.
173.2.2 by Holger Rapp
Added diff_match_patch from google, because they do not provide a setup.py
1525
        if len(patch.diffs) != 0:
438.1.6 by franku
run the script
1526
            self.patch_addContext(patch, prepatch_text)
1527
            patches.append(patch)
1528
        return patches
1529
1530
    def patch_deepCopy(self, patches):
1531
        """Given an array of patches, return another array that is identical.
1532
1533
        Args:
1534
          patches: Array of patch objects.
1535
1536
        Returns:
1537
          Array of patch objects.
1538
1539
        """
1540
        patchesCopy = []
1541
        for patch in patches:
1542
            patchCopy = patch_obj()
1543
            # No need to deep copy the tuples since they are immutable.
1544
            patchCopy.diffs = patch.diffs[:]
1545
            patchCopy.start1 = patch.start1
1546
            patchCopy.start2 = patch.start2
1547
            patchCopy.length1 = patch.length1
1548
            patchCopy.length2 = patch.length2
1549
            patchesCopy.append(patchCopy)
1550
        return patchesCopy
1551
1552
    def patch_apply(self, patches, text):
1553
        """Merge a set of patches onto the text.  Return a patched text, as
1554
        well as a list of true/false values indicating which patches were
1555
        applied.
1556
1557
        Args:
1558
          patches: Array of patch objects.
1559
          text: Old text.
1560
1561
        Returns:
1562
          Two element Array, containing the new text and an array of boolean values.
1563
1564
        """
1565
        if not patches:
1566
            return (text, [])
1567
1568
        # Deep copy the patches so that no changes are made to originals.
1569
        patches = self.patch_deepCopy(patches)
1570
1571
        nullPadding = self.patch_addPadding(patches)
1572
        text = nullPadding + text + nullPadding
1573
        self.patch_splitMax(patches)
1574
1575
        # delta keeps track of the offset between the expected and actual location
1576
        # of the previous patch.  If there are patches expected at positions 10 and
1577
        # 20, but the first patch was found at 12, delta is 2 and the second patch
1578
        # has an effective expected position of 22.
1579
        delta = 0
1580
        results = []
1581
        for patch in patches:
1582
            expected_loc = patch.start2 + delta
1583
            text1 = self.diff_text1(patch.diffs)
1584
            end_loc = -1
1585
            if len(text1) > self.Match_MaxBits:
1586
                # patch_splitMax will only provide an oversized pattern in the case of
1587
                # a monster delete.
1588
                start_loc = self.match_main(text, text1[:self.Match_MaxBits],
1589
                                            expected_loc)
1590
                if start_loc != -1:
1591
                    end_loc = self.match_main(text, text1[-self.Match_MaxBits:],
1592
                                              expected_loc + len(text1) - self.Match_MaxBits)
1593
                    if end_loc == -1 or start_loc >= end_loc:
1594
                        # Can't find valid trailing context.  Drop this patch.
1595
                        start_loc = -1
1596
            else:
1597
                start_loc = self.match_main(text, text1, expected_loc)
1598
            if start_loc == -1:
1599
                # No match found.  :(
1600
                results.append(False)
1601
                # Subtract the delta for this failed patch from subsequent
1602
                # patches.
1603
                delta -= patch.length2 - patch.length1
1604
            else:
1605
                # Found a match.  :)
1606
                results.append(True)
1607
                delta = start_loc - expected_loc
1608
                if end_loc == -1:
1609
                    text2 = text[start_loc: start_loc + len(text1)]
1610
                else:
1611
                    text2 = text[start_loc: end_loc + self.Match_MaxBits]
1612
                if text1 == text2:
1613
                    # Perfect match, just shove the replacement text in.
1614
                    text = (text[:start_loc] + self.diff_text2(patch.diffs) +
1615
                            text[start_loc + len(text1):])
1616
                else:
1617
                    # Imperfect match.
1618
                    # Run a diff to get a framework of equivalent indices.
1619
                    diffs = self.diff_main(text1, text2, False)
1620
                    if (len(text1) > self.Match_MaxBits and
1621
                            self.diff_levenshtein(diffs) / float(len(text1)) >
1622
                            self.Patch_DeleteThreshold):
1623
                        # The end points match, but the content is unacceptably
1624
                        # bad.
1625
                        results[-1] = False
1626
                    else:
1627
                        self.diff_cleanupSemanticLossless(diffs)
1628
                        index1 = 0
1629
                        for (op, data) in patch.diffs:
1630
                            if op != self.DIFF_EQUAL:
1631
                                index2 = self.diff_xIndex(diffs, index1)
1632
                            if op == self.DIFF_INSERT:  # Insertion
1633
                                text = text[:start_loc + index2] + data + text[start_loc +
1634
                                                                               index2:]
1635
                            elif op == self.DIFF_DELETE:  # Deletion
1636
                                text = text[:start_loc + index2] + text[start_loc +
1637
                                                                        self.diff_xIndex(diffs, index1 + len(data)):]
1638
                            if op != self.DIFF_DELETE:
1639
                                index1 += len(data)
1640
        # Strip the padding off.
1641
        text = text[len(nullPadding):-len(nullPadding)]
1642
        return (text, results)
1643
1644
    def patch_addPadding(self, patches):
1645
        """Add some padding on text start and end so that edges can match
1646
        something.  Intended to be called only from within patch_apply.
1647
1648
        Args:
1649
          patches: Array of patch objects.
1650
1651
        Returns:
1652
          The padding string added to each side.
1653
1654
        """
1655
        paddingLength = self.Patch_Margin
1656
        nullPadding = ''
1657
        for x in xrange(1, paddingLength + 1):
1658
            nullPadding += chr(x)
1659
1660
        # Bump all the patches forward.
1661
        for patch in patches:
1662
            patch.start1 += paddingLength
1663
            patch.start2 += paddingLength
1664
1665
        # Add some padding on start of first diff.
1666
        patch = patches[0]
1667
        diffs = patch.diffs
1668
        if not diffs or diffs[0][0] != self.DIFF_EQUAL:
1669
            # Add nullPadding equality.
1670
            diffs.insert(0, (self.DIFF_EQUAL, nullPadding))
1671
            patch.start1 -= paddingLength  # Should be 0.
1672
            patch.start2 -= paddingLength  # Should be 0.
1673
            patch.length1 += paddingLength
1674
            patch.length2 += paddingLength
1675
        elif paddingLength > len(diffs[0][1]):
1676
            # Grow first equality.
1677
            extraLength = paddingLength - len(diffs[0][1])
1678
            newText = nullPadding[len(diffs[0][1]):] + diffs[0][1]
1679
            diffs[0] = (diffs[0][0], newText)
1680
            patch.start1 -= extraLength
1681
            patch.start2 -= extraLength
1682
            patch.length1 += extraLength
1683
            patch.length2 += extraLength
1684
1685
        # Add some padding on end of last diff.
1686
        patch = patches[-1]
1687
        diffs = patch.diffs
1688
        if not diffs or diffs[-1][0] != self.DIFF_EQUAL:
1689
            # Add nullPadding equality.
1690
            diffs.append((self.DIFF_EQUAL, nullPadding))
1691
            patch.length1 += paddingLength
1692
            patch.length2 += paddingLength
1693
        elif paddingLength > len(diffs[-1][1]):
1694
            # Grow last equality.
1695
            extraLength = paddingLength - len(diffs[-1][1])
1696
            newText = diffs[-1][1] + nullPadding[:extraLength]
1697
            diffs[-1] = (diffs[-1][0], newText)
1698
            patch.length1 += extraLength
1699
            patch.length2 += extraLength
1700
1701
        return nullPadding
1702
1703
    def patch_splitMax(self, patches):
1704
        """Look through the patches and break up any which are longer than the
1705
        maximum limit of the match algorithm.
1706
1707
        Args:
1708
          patches: Array of patch objects.
1709
1710
        """
1711
        if self.Match_MaxBits == 0:
1712
            return
1713
        for x in xrange(len(patches)):
1714
            if patches[x].length1 > self.Match_MaxBits:
1715
                bigpatch = patches[x]
1716
                # Remove the big old patch.
1717
                del patches[x]
1718
                x -= 1
1719
                patch_size = self.Match_MaxBits
1720
                start1 = bigpatch.start1
1721
                start2 = bigpatch.start2
1722
                precontext = ''
1723
                while len(bigpatch.diffs) != 0:
1724
                    # Create one of several smaller patches.
1725
                    patch = patch_obj()
1726
                    empty = True
1727
                    patch.start1 = start1 - len(precontext)
1728
                    patch.start2 = start2 - len(precontext)
1729
                    if precontext:
1730
                        patch.length1 = patch.length2 = len(precontext)
1731
                        patch.diffs.append((self.DIFF_EQUAL, precontext))
1732
1733
                    while (len(bigpatch.diffs) != 0 and
1734
                           patch.length1 < patch_size - self.Patch_Margin):
1735
                        (diff_type, diff_text) = bigpatch.diffs[0]
1736
                        if diff_type == self.DIFF_INSERT:
1737
                            # Insertions are harmless.
1738
                            patch.length2 += len(diff_text)
1739
                            start2 += len(diff_text)
1740
                            patch.diffs.append(bigpatch.diffs.pop(0))
1741
                            empty = False
1742
                        elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and
1743
                              patch.diffs[0][0] == self.DIFF_EQUAL and
1744
                              len(diff_text) > 2 * patch_size):
1745
                            # This is a large deletion.  Let it pass in one
1746
                            # chunk.
1747
                            patch.length1 += len(diff_text)
1748
                            start1 += len(diff_text)
1749
                            empty = False
1750
                            patch.diffs.append((diff_type, diff_text))
1751
                            del bigpatch.diffs[0]
1752
                        else:
1753
                            # Deletion or equality.  Only take as much as we
1754
                            # can stomach.
1755
                            diff_text = diff_text[:patch_size - patch.length1 -
1756
                                                  self.Patch_Margin]
1757
                            patch.length1 += len(diff_text)
1758
                            start1 += len(diff_text)
1759
                            if diff_type == self.DIFF_EQUAL:
1760
                                patch.length2 += len(diff_text)
1761
                                start2 += len(diff_text)
1762
                            else:
1763
                                empty = False
1764
1765
                            patch.diffs.append((diff_type, diff_text))
1766
                            if diff_text == bigpatch.diffs[0][1]:
1767
                                del bigpatch.diffs[0]
1768
                            else:
1769
                                bigpatch.diffs[0] = (bigpatch.diffs[0][0],
1770
                                                     bigpatch.diffs[0][1][len(diff_text):])
1771
1772
                    # Compute the head context for the next patch.
1773
                    precontext = self.diff_text2(patch.diffs)
1774
                    precontext = precontext[-self.Patch_Margin:]
1775
                    # Append the end context for this patch.
1776
                    postcontext = self.diff_text1(bigpatch.diffs)[
1777
                        :self.Patch_Margin]
1778
                    if postcontext:
1779
                        patch.length1 += len(postcontext)
1780
                        patch.length2 += len(postcontext)
1781
                        if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:
1782
                            patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] +
1783
                                               postcontext)
1784
                        else:
1785
                            patch.diffs.append((self.DIFF_EQUAL, postcontext))
1786
1787
                    if not empty:
1788
                        x += 1
1789
                        patches.insert(x, patch)
1790
1791
    def patch_toText(self, patches):
1792
        """Take a list of patches and return a textual representation.
1793
1794
        Args:
1795
          patches: Array of patch objects.
1796
1797
        Returns:
1798
          Text representation of patches.
1799
1800
        """
1801
        text = []
1802
        for patch in patches:
1803
            text.append(str(patch))
1804
        return ''.join(text)
1805
1806
    def patch_fromText(self, textline):
1807
        """Parse a textual representation of patches and return a list of patch
1808
        objects.
1809
1810
        Args:
1811
          textline: Text representation of patches.
1812
1813
        Returns:
1814
          Array of patch objects.
1815
1816
        Raises:
1817
          ValueError: If invalid input.
1818
1819
        """
1820
        if type(textline) == unicode:
1821
            # Patches should be composed of a subset of ascii chars, Unicode not
1822
            # required.  If this encode raises UnicodeEncodeError, patch is
1823
            # invalid.
1824
            textline = textline.encode('ascii')
1825
        patches = []
1826
        if not textline:
1827
            return patches
1828
        text = textline.split('\n')
1829
        while len(text) != 0:
1830
            m = re.match('^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$', text[0])
1831
            if not m:
1832
                raise ValueError, 'Invalid patch string: ' + text[0]
1833
            patch = patch_obj()
1834
            patches.append(patch)
1835
            patch.start1 = int(m.group(1))
1836
            if m.group(2) == '':
1837
                patch.start1 -= 1
1838
                patch.length1 = 1
1839
            elif m.group(2) == '0':
1840
                patch.length1 = 0
1841
            else:
1842
                patch.start1 -= 1
1843
                patch.length1 = int(m.group(2))
1844
1845
            patch.start2 = int(m.group(3))
1846
            if m.group(4) == '':
1847
                patch.start2 -= 1
1848
                patch.length2 = 1
1849
            elif m.group(4) == '0':
1850
                patch.length2 = 0
1851
            else:
1852
                patch.start2 -= 1
1853
                patch.length2 = int(m.group(4))
1854
1855
            del text[0]
1856
1857
            while len(text) != 0:
1858
                if text[0]:
1859
                    sign = text[0][0]
1860
                else:
1861
                    sign = ''
1862
                line = urllib.unquote(text[0][1:])
1863
                line = line.decode('utf-8')
1864
                if sign == '+':
1865
                    # Insertion.
1866
                    patch.diffs.append((self.DIFF_INSERT, line))
1867
                elif sign == '-':
1868
                    # Deletion.
1869
                    patch.diffs.append((self.DIFF_DELETE, line))
1870
                elif sign == ' ':
1871
                    # Minor equality.
1872
                    patch.diffs.append((self.DIFF_EQUAL, line))
1873
                elif sign == '@':
1874
                    # Start of next patch.
1875
                    break
1876
                elif sign == '':
1877
                    # Blank line?  Whatever.
1878
                    pass
1879
                else:
1880
                    # WTF?
1881
                    raise ValueError, "Invalid patch mode: '%s'\n%s" % (sign, line)
1882
                del text[0]
1883
        return patches
173.2.2 by Holger Rapp
Added diff_match_patch from google, because they do not provide a setup.py
1884
1885
1886
class patch_obj:
438.1.6 by franku
run the script
1887
    """Class representing one patch operation."""
1888
1889
    def __init__(self):
1890
        """Initializes with an empty list of diffs."""
1891
        self.diffs = []
1892
        self.start1 = None
1893
        self.start2 = None
1894
        self.length1 = 0
1895
        self.length2 = 0
1896
1897
    def __str__(self):
1898
        """Emmulate GNU diff's format.
1899
        Header: @@ -382,8 +481,9 @@
1900
        Indicies are printed as 1-based, not 0-based.
1901
1902
        Returns:
1903
          The GNU diff string.
1904
        """
1905
        if self.length1 == 0:
1906
            coords1 = str(self.start1) + ',0'
1907
        elif self.length1 == 1:
1908
            coords1 = str(self.start1 + 1)
1909
        else:
1910
            coords1 = str(self.start1 + 1) + ',' + str(self.length1)
1911
        if self.length2 == 0:
1912
            coords2 = str(self.start2) + ',0'
1913
        elif self.length2 == 1:
1914
            coords2 = str(self.start2 + 1)
1915
        else:
1916
            coords2 = str(self.start2 + 1) + ',' + str(self.length2)
1917
        text = ['@@ -', coords1, ' +', coords2, ' @@\n']
1918
        # Escape the body of the patch with %xx notation.
1919
        for (op, data) in self.diffs:
1920
            if op == diff_match_patch.DIFF_INSERT:
1921
                text.append('+')
1922
            elif op == diff_match_patch.DIFF_DELETE:
1923
                text.append('-')
1924
            elif op == diff_match_patch.DIFF_EQUAL:
1925
                text.append(' ')
1926
            # High ascii will raise UnicodeDecodeError.  Use Unicode instead.
1927
            data = data.encode('utf-8')
1928
            text.append(urllib.quote(data, "!~*'();/?:@&=+$,# ") + '\n')
1929
        return ''.join(text)