3
# Taken from google because they do not provide a setup.py file. Thanks anyway
5
"""Diff Match and Patch
7
Copyright 2006 Google Inc.
8
http://code.google.com/p/google-diff-match-patch/
10
Licensed under the Apache License, Version 2.0 (the "License");
11
you may not use this file except in compliance with the License.
12
You may obtain a copy of the License at
14
http://www.apache.org/licenses/LICENSE-2.0
16
Unless required by applicable law or agreed to in writing, software
17
distributed under the License is distributed on an "AS IS" BASIS,
18
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
See the License for the specific language governing permissions and
20
limitations under the License.
23
"""Functions for diff, match and patch.
25
Computes the difference between two texts to create a patch.
26
Applies the patch onto another text, allowing for errors.
29
__author__ = 'fraser@google.com (Neil Fraser)'
36
class diff_match_patch:
37
"""Class containing the diff, match and patch methods.
39
Also contains the behaviour settings.
43
"""Inits a diff_match_patch object with default settings.
44
Redefine these in your program to override the defaults.
47
# Number of seconds to map a diff before giving up (0 for infinity).
48
self.Diff_Timeout = 1.0
49
# Cost of an empty edit operation in terms of edit characters.
50
self.Diff_EditCost = 4
51
# The size beyond which the double-ended diff activates.
52
# Double-ending is twice as fast, but less accurate.
53
self.Diff_DualThreshold = 32
54
# At what point is no match declared (0.0 = perfection, 1.0 = very loose).
55
self.Match_Threshold = 0.5
56
# How far to search for a match (0 = exact location, 1000+ = broad match).
57
# A match this many characters away from the expected location will add
58
# 1.0 to the score (0.0 is a perfect match).
59
self.Match_Distance = 1000
60
# When deleting a large block of text (over ~64 characters), how close does
61
# the contents have to match the expected contents. (0.0 = perfection,
62
# 1.0 = very loose). Note that Match_Threshold controls how closely the
63
# end points of a delete need to match.
64
self.Patch_DeleteThreshold = 0.5
65
# Chunk size for context length.
68
# How many bits in a number?
69
# Python has no maximum, thus to disable patch splitting set to 0.
70
# However to avoid long patches in certain pathological cases, use 32.
71
# Multiple short patches (using native ints) are much faster than long ones.
72
self.Match_MaxBits = 32
76
# The data structure representing a diff is an array of tuples:
77
# [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")]
78
# which means: delete "Hello", add "Goodbye" and keep " world."
83
def diff_main(self, text1, text2, checklines=True):
84
"""Find the differences between two texts. Simplifies the problem by
85
stripping any common prefix or suffix off the texts before diffing.
88
text1: Old string to be diffed.
89
text2: New string to be diffed.
90
checklines: Optional speedup flag. If present and false, then don't run
91
a line-level diff first to identify the changed areas.
92
Defaults to true, which does a faster, slightly less optimal diff.
98
# Check for equality (speedup)
100
return [(self.DIFF_EQUAL, text1)]
102
# Trim off common prefix (speedup)
103
commonlength = self.diff_commonPrefix(text1, text2)
104
commonprefix = text1[:commonlength]
105
text1 = text1[commonlength:]
106
text2 = text2[commonlength:]
108
# Trim off common suffix (speedup)
109
commonlength = self.diff_commonSuffix(text1, text2)
110
if commonlength == 0:
113
commonsuffix = text1[-commonlength:]
114
text1 = text1[:-commonlength]
115
text2 = text2[:-commonlength]
117
# Compute the diff on the middle block
118
diffs = self.diff_compute(text1, text2, checklines)
120
# Restore the prefix and suffix
122
diffs[:0] = [(self.DIFF_EQUAL, commonprefix)]
124
diffs.append((self.DIFF_EQUAL, commonsuffix))
125
self.diff_cleanupMerge(diffs)
128
def diff_compute(self, text1, text2, checklines):
129
"""Find the differences between two texts. Assumes that the texts do not
130
have any common prefix or suffix.
133
text1: Old string to be diffed.
134
text2: New string to be diffed.
135
checklines: Speedup flag. If false, then don't run a line-level diff
136
first to identify the changed areas.
137
If true, then run a faster, slightly less optimal diff.
143
# Just add some text (speedup)
144
return [(self.DIFF_INSERT, text2)]
147
# Just delete some text (speedup)
148
return [(self.DIFF_DELETE, text1)]
150
if len(text1) > len(text2):
151
(longtext, shorttext) = (text1, text2)
153
(shorttext, longtext) = (text1, text2)
154
i = longtext.find(shorttext)
156
# Shorter text is inside the longer text (speedup)
157
diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext),
158
(self.DIFF_INSERT, longtext[i + len(shorttext):])]
159
# Swap insertions for deletions if diff is reversed.
160
if len(text1) > len(text2):
161
diffs[0] = (self.DIFF_DELETE, diffs[0][1])
162
diffs[2] = (self.DIFF_DELETE, diffs[2][1])
164
longtext = shorttext = None # Garbage collect.
166
# Check to see if the problem can be split in two.
167
hm = self.diff_halfMatch(text1, text2)
169
# A half-match was found, sort out the return data.
170
(text1_a, text1_b, text2_a, text2_b, mid_common) = hm
171
# Send both pairs off for separate processing.
172
diffs_a = self.diff_main(text1_a, text2_a, checklines)
173
diffs_b = self.diff_main(text1_b, text2_b, checklines)
175
return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b
177
# Perform a real diff.
178
if checklines and (len(text1) < 100 or len(text2) < 100):
179
checklines = False # Too trivial for the overhead.
181
# Scan the text on a line-by-line basis first.
182
(text1, text2, linearray) = self.diff_linesToChars(text1, text2)
184
diffs = self.diff_map(text1, text2)
185
if not diffs: # No acceptable result.
186
diffs = [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
188
# Convert the diff back to original text.
189
self.diff_charsToLines(diffs, linearray)
190
# Eliminate freak matches (e.g. blank lines)
191
self.diff_cleanupSemantic(diffs)
193
# Rediff any replacement blocks, this time character-by-character.
194
# Add a dummy entry at the end.
195
diffs.append((self.DIFF_EQUAL, ''))
201
while pointer < len(diffs):
202
if diffs[pointer][0] == self.DIFF_INSERT:
204
text_insert += diffs[pointer][1]
205
elif diffs[pointer][0] == self.DIFF_DELETE:
207
text_delete += diffs[pointer][1]
208
elif diffs[pointer][0] == self.DIFF_EQUAL:
209
# Upon reaching an equality, check for prior redundancies.
210
if count_delete >= 1 and count_insert >= 1:
211
# Delete the offending records and add the merged ones.
212
a = self.diff_main(text_delete, text_insert, False)
213
diffs[pointer - count_delete - count_insert : pointer] = a
214
pointer = pointer - count_delete - count_insert + len(a)
222
diffs.pop() # Remove the dummy entry at the end.
225
def diff_linesToChars(self, text1, text2):
226
"""Split two texts into an array of strings. Reduce the texts to a string
227
of hashes where each Unicode character represents one line.
231
text2: Second string.
234
Three element tuple, containing the encoded text1, the encoded text2 and
235
the array of unique strings. The zeroth element of the array of unique
236
strings is intentionally blank.
238
lineArray = [] # e.g. lineArray[4] == "Hello\n"
239
lineHash = {} # e.g. lineHash["Hello\n"] == 4
241
# "\x00" is a valid character, but various debuggers don't like it.
242
# So we'll insert a junk entry to avoid generating a null character.
245
def diff_linesToCharsMunge(text):
246
"""Split a text into an array of strings. Reduce the texts to a string
247
of hashes where each Unicode character represents one line.
248
Modifies linearray and linehash through being a closure.
251
text: String to encode.
257
# Walk the text, pulling out a substring for each line.
258
# text.split('\n') would would temporarily double our memory footprint.
259
# Modifying text would create many large strings to garbage collect.
262
while lineEnd < len(text) - 1:
263
lineEnd = text.find('\n', lineStart)
265
lineEnd = len(text) - 1
266
line = text[lineStart:lineEnd + 1]
267
lineStart = lineEnd + 1
270
chars.append(unichr(lineHash[line]))
272
lineArray.append(line)
273
lineHash[line] = len(lineArray) - 1
274
chars.append(unichr(len(lineArray) - 1))
275
return "".join(chars)
277
chars1 = diff_linesToCharsMunge(text1)
278
chars2 = diff_linesToCharsMunge(text2)
279
return (chars1, chars2, lineArray)
281
def diff_charsToLines(self, diffs, lineArray):
282
"""Rehydrate the text in a diff from a string of line hashes to real lines
286
diffs: Array of diff tuples.
287
lineArray: Array of unique strings.
289
for x in xrange(len(diffs)):
291
for char in diffs[x][1]:
292
text.append(lineArray[ord(char)])
293
diffs[x] = (diffs[x][0], "".join(text))
295
def diff_map(self, text1, text2):
296
"""Explore the intersection points between the two texts.
299
text1: Old string to be diffed.
300
text2: New string to be diffed.
303
Array of diff tuples or None if no diff available.
306
# Unlike in most languages, Python counts time in seconds.
307
s_end = time.time() + self.Diff_Timeout # Don't run for too long.
308
# Cache the text lengths to prevent multiple calls.
309
text1_length = len(text1)
310
text2_length = len(text2)
311
max_d = text1_length + text2_length - 1
312
doubleEnd = self.Diff_DualThreshold * 2 < max_d
321
# If the total number of characters is odd, then the front path will
322
# collide with the reverse path.
323
front = (text1_length + text2_length) % 2
324
for d in xrange(max_d):
325
# Bail out if timeout reached.
326
if self.Diff_Timeout > 0 and time.time() > s_end:
329
# Walk the front path one step.
331
for k in xrange(-d, d + 1, 2):
332
if k == -d or k != d and v1[k - 1] < v1[k + 1]:
339
if front and footstep in footsteps:
342
footsteps[footstep] = d
344
while (not done and x < text1_length and y < text2_length and
345
text1[x] == text2[y]):
350
if front and footstep in footsteps:
353
footsteps[footstep] = d
356
v_map1[d][(x, y)] = True
357
if x == text1_length and y == text2_length:
358
# Reached the end in single-path mode.
359
return self.diff_path1(v_map1, text1, text2)
361
# Front path ran over reverse path.
362
v_map2 = v_map2[:footsteps[footstep] + 1]
363
a = self.diff_path1(v_map1, text1[:x], text2[:y])
364
b = self.diff_path2(v_map2, text1[x:], text2[y:])
368
# Walk the reverse path one step.
370
for k in xrange(-d, d + 1, 2):
371
if k == -d or k != d and v2[k - 1] < v2[k + 1]:
376
footstep = (text1_length - x, text2_length - y)
377
if not front and footstep in footsteps:
380
footsteps[footstep] = d
381
while (not done and x < text1_length and y < text2_length and
382
text1[-x - 1] == text2[-y - 1]):
385
footstep = (text1_length - x, text2_length - y)
386
if not front and footstep in footsteps:
389
footsteps[footstep] = d
392
v_map2[d][(x, y)] = True
394
# Reverse path ran over front path.
395
v_map1 = v_map1[:footsteps[footstep] + 1]
396
a = self.diff_path1(v_map1, text1[:text1_length - x],
397
text2[:text2_length - y])
398
b = self.diff_path2(v_map2, text1[text1_length - x:],
399
text2[text2_length - y:])
402
# Number of diffs equals number of characters, no commonality at all.
405
def diff_path1(self, v_map, text1, text2):
406
"""Work from the middle back to the start to determine the path.
409
v_map: Array of paths.
410
text1: Old string fragment to be diffed.
411
text2: New string fragment to be diffed.
414
Array of diff tuples.
420
for d in xrange(len(v_map) - 2, -1, -1):
422
if (x - 1, y) in v_map[d]:
424
if last_op == self.DIFF_DELETE:
425
path[0] = (self.DIFF_DELETE, text1[x] + path[0][1])
427
path[:0] = [(self.DIFF_DELETE, text1[x])]
428
last_op = self.DIFF_DELETE
430
elif (x, y - 1) in v_map[d]:
432
if last_op == self.DIFF_INSERT:
433
path[0] = (self.DIFF_INSERT, text2[y] + path[0][1])
435
path[:0] = [(self.DIFF_INSERT, text2[y])]
436
last_op = self.DIFF_INSERT
441
assert text1[x] == text2[y], ("No diagonal. " +
442
"Can't happen. (diff_path1)")
443
if last_op == self.DIFF_EQUAL:
444
path[0] = (self.DIFF_EQUAL, text1[x] + path[0][1])
446
path[:0] = [(self.DIFF_EQUAL, text1[x])]
447
last_op = self.DIFF_EQUAL
450
def diff_path2(self, v_map, text1, text2):
451
"""Work from the middle back to the end to determine the path.
454
v_map: Array of paths.
455
text1: Old string fragment to be diffed.
456
text2: New string fragment to be diffed.
459
Array of diff tuples.
465
for d in xrange(len(v_map) - 2, -1, -1):
467
if (x - 1, y) in v_map[d]:
469
if last_op == self.DIFF_DELETE:
470
path[-1] = (self.DIFF_DELETE, path[-1][1] + text1[-x - 1])
472
path.append((self.DIFF_DELETE, text1[-x - 1]))
473
last_op = self.DIFF_DELETE
475
elif (x, y - 1) in v_map[d]:
477
if last_op == self.DIFF_INSERT:
478
path[-1] = (self.DIFF_INSERT, path[-1][1] + text2[-y - 1])
480
path.append((self.DIFF_INSERT, text2[-y - 1]))
481
last_op = self.DIFF_INSERT
486
assert text1[-x - 1] == text2[-y - 1], ("No diagonal. " +
487
"Can't happen. (diff_path2)")
488
if last_op == self.DIFF_EQUAL:
489
path[-1] = (self.DIFF_EQUAL, path[-1][1] + text1[-x - 1])
491
path.append((self.DIFF_EQUAL, text1[-x - 1]))
492
last_op = self.DIFF_EQUAL
495
def diff_commonPrefix(self, text1, text2):
496
"""Determine the common prefix of two strings.
500
text2: Second string.
503
The number of characters common to the start of each string.
505
# Quick check for common null cases.
506
if not text1 or not text2 or text1[0] != text2[0]:
509
# Performance analysis: http://neil.fraser.name/news/2007/10/09/
511
pointermax = min(len(text1), len(text2))
512
pointermid = pointermax
514
while pointermin < pointermid:
515
if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]:
516
pointermin = pointermid
517
pointerstart = pointermin
519
pointermax = pointermid
520
pointermid = int((pointermax - pointermin) / 2 + pointermin)
523
def diff_commonSuffix(self, text1, text2):
524
"""Determine the common suffix of two strings.
528
text2: Second string.
531
The number of characters common to the end of each string.
533
# Quick check for common null cases.
534
if not text1 or not text2 or text1[-1] != text2[-1]:
537
# Performance analysis: http://neil.fraser.name/news/2007/10/09/
539
pointermax = min(len(text1), len(text2))
540
pointermid = pointermax
542
while pointermin < pointermid:
543
if (text1[-pointermid:len(text1) - pointerend] ==
544
text2[-pointermid:len(text2) - pointerend]):
545
pointermin = pointermid
546
pointerend = pointermin
548
pointermax = pointermid
549
pointermid = int((pointermax - pointermin) / 2 + pointermin)
552
def diff_halfMatch(self, text1, text2):
553
"""Do the two texts share a substring which is at least half the length of
558
text2: Second string.
561
Five element Array, containing the prefix of text1, the suffix of text1,
562
the prefix of text2, the suffix of text2 and the common middle. Or None
563
if there was no match.
565
if len(text1) > len(text2):
566
(longtext, shorttext) = (text1, text2)
568
(shorttext, longtext) = (text1, text2)
569
if len(longtext) < 10 or len(shorttext) < 1:
570
return None # Pointless.
572
def diff_halfMatchI(longtext, shorttext, i):
573
"""Does a substring of shorttext exist within longtext such that the
574
substring is at least half the length of longtext?
575
Closure, but does not reference any external variables.
578
longtext: Longer string.
579
shorttext: Shorter string.
580
i: Start index of quarter length substring within longtext.
583
Five element Array, containing the prefix of longtext, the suffix of
584
longtext, the prefix of shorttext, the suffix of shorttext and the
585
common middle. Or None if there was no match.
587
seed = longtext[i:i + len(longtext) / 4]
589
j = shorttext.find(seed)
591
prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:])
592
suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j])
593
if len(best_common) < suffixLength + prefixLength:
594
best_common = (shorttext[j - suffixLength:j] +
595
shorttext[j:j + prefixLength])
596
best_longtext_a = longtext[:i - suffixLength]
597
best_longtext_b = longtext[i + prefixLength:]
598
best_shorttext_a = shorttext[:j - suffixLength]
599
best_shorttext_b = shorttext[j + prefixLength:]
600
j = shorttext.find(seed, j + 1)
602
if len(best_common) >= len(longtext) / 2:
603
return (best_longtext_a, best_longtext_b,
604
best_shorttext_a, best_shorttext_b, best_common)
608
# First check if the second quarter is the seed for a half-match.
609
hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) / 4)
610
# Check again based on the third quarter.
611
hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) / 2)
612
if not hm1 and not hm2:
619
# Both matched. Select the longest.
620
if len(hm1[4]) > len(hm2[4]):
625
# A half-match was found, sort out the return data.
626
if len(text1) > len(text2):
627
(text1_a, text1_b, text2_a, text2_b, mid_common) = hm
629
(text2_a, text2_b, text1_a, text1_b, mid_common) = hm
630
return (text1_a, text1_b, text2_a, text2_b, mid_common)
632
def diff_cleanupSemantic(self, diffs):
633
"""Reduce the number of edits by eliminating semantically trivial
637
diffs: Array of diff tuples.
640
equalities = [] # Stack of indices where equalities are found.
641
lastequality = None # Always equal to equalities[-1][1]
642
pointer = 0 # Index of current position.
643
length_changes1 = 0 # Number of chars that changed prior to the equality.
644
length_changes2 = 0 # Number of chars that changed after the equality.
645
while pointer < len(diffs):
646
if diffs[pointer][0] == self.DIFF_EQUAL: # equality found
647
equalities.append(pointer)
648
length_changes1 = length_changes2
650
lastequality = diffs[pointer][1]
651
else: # an insertion or deletion
652
length_changes2 += len(diffs[pointer][1])
653
if (lastequality != None and (len(lastequality) <= length_changes1) and
654
(len(lastequality) <= length_changes2)):
656
diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))
657
# Change second copy to insert.
658
diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
659
diffs[equalities[-1] + 1][1])
660
# Throw away the equality we just deleted.
662
# Throw away the previous equality (it needs to be reevaluated).
663
if len(equalities) != 0:
666
pointer = equalities[-1]
669
length_changes1 = 0 # Reset the counters.
676
self.diff_cleanupMerge(diffs)
678
self.diff_cleanupSemanticLossless(diffs)
680
def diff_cleanupSemanticLossless(self, diffs):
681
"""Look for single edits surrounded on both sides by equalities
682
which can be shifted sideways to align the edit to a word boundary.
683
e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
686
diffs: Array of diff tuples.
689
def diff_cleanupSemanticScore(one, two):
690
"""Given two strings, compute a score representing whether the
691
internal boundary falls on logical boundaries.
692
Scores range from 5 (best) to 0 (worst).
693
Closure, but does not reference any external variables.
702
if not one or not two:
703
# Edges are the best.
706
# Each port of this function behaves slightly differently due to
707
# subtle differences in each language's definition of things like
708
# 'whitespace'. Since this function's purpose is largely cosmetic,
709
# the choice has been made to use each language's native features
710
# rather than force total conformity.
712
# One point for non-alphanumeric.
713
if not one[-1].isalnum() or not two[0].isalnum():
715
# Two points for whitespace.
716
if one[-1].isspace() or two[0].isspace():
718
# Three points for line breaks.
719
if (one[-1] == "\r" or one[-1] == "\n" or
720
two[0] == "\r" or two[0] == "\n"):
722
# Four points for blank lines.
723
if (re.search("\\n\\r?\\n$", one) or
724
re.match("^\\r?\\n\\r?\\n", two)):
729
# Intentionally ignore the first and last element (don't need checking).
730
while pointer < len(diffs) - 1:
731
if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
732
diffs[pointer + 1][0] == self.DIFF_EQUAL):
733
# This is a single edit surrounded by equalities.
734
equality1 = diffs[pointer - 1][1]
735
edit = diffs[pointer][1]
736
equality2 = diffs[pointer + 1][1]
738
# First, shift the edit as far left as possible.
739
commonOffset = self.diff_commonSuffix(equality1, edit)
741
commonString = edit[-commonOffset:]
742
equality1 = equality1[:-commonOffset]
743
edit = commonString + edit[:-commonOffset]
744
equality2 = commonString + equality2
746
# Second, step character by character right, looking for the best fit.
747
bestEquality1 = equality1
749
bestEquality2 = equality2
750
bestScore = (diff_cleanupSemanticScore(equality1, edit) +
751
diff_cleanupSemanticScore(edit, equality2))
752
while edit and equality2 and edit[0] == equality2[0]:
754
edit = edit[1:] + equality2[0]
755
equality2 = equality2[1:]
756
score = (diff_cleanupSemanticScore(equality1, edit) +
757
diff_cleanupSemanticScore(edit, equality2))
758
# The >= encourages trailing rather than leading whitespace on edits.
759
if score >= bestScore:
761
bestEquality1 = equality1
763
bestEquality2 = equality2
765
if diffs[pointer - 1][1] != bestEquality1:
766
# We have an improvement, save it back to the diff.
768
diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1)
770
del diffs[pointer - 1]
772
diffs[pointer] = (diffs[pointer][0], bestEdit)
774
diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2)
776
del diffs[pointer + 1]
780
def diff_cleanupEfficiency(self, diffs):
781
"""Reduce the number of edits by eliminating operationally trivial
785
diffs: Array of diff tuples.
788
equalities = [] # Stack of indices where equalities are found.
789
lastequality = '' # Always equal to equalities[-1][1]
790
pointer = 0 # Index of current position.
791
pre_ins = False # Is there an insertion operation before the last equality.
792
pre_del = False # Is there a deletion operation before the last equality.
793
post_ins = False # Is there an insertion operation after the last equality.
794
post_del = False # Is there a deletion operation after the last equality.
795
while pointer < len(diffs):
796
if diffs[pointer][0] == self.DIFF_EQUAL: # equality found
797
if (len(diffs[pointer][1]) < self.Diff_EditCost and
798
(post_ins or post_del)):
800
equalities.append(pointer)
803
lastequality = diffs[pointer][1]
805
# Not a candidate, and can never become one.
809
post_ins = post_del = False
810
else: # an insertion or deletion
811
if diffs[pointer][0] == self.DIFF_DELETE:
816
# Five types to be split:
817
# <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
818
# <ins>A</ins>X<ins>C</ins><del>D</del>
819
# <ins>A</ins><del>B</del>X<ins>C</ins>
820
# <ins>A</del>X<ins>C</ins><del>D</del>
821
# <ins>A</ins><del>B</del>X<del>C</del>
823
if lastequality and ((pre_ins and pre_del and post_ins and post_del) or
824
((len(lastequality) < self.Diff_EditCost / 2) and
825
(pre_ins + pre_del + post_ins + post_del) == 3)):
827
diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))
828
# Change second copy to insert.
829
diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
830
diffs[equalities[-1] + 1][1])
831
equalities.pop() # Throw away the equality we just deleted
833
if pre_ins and pre_del:
834
# No changes made which could affect previous entry, keep going.
835
post_ins = post_del = True
839
equalities.pop() # Throw away the previous equality
841
pointer = equalities[-1]
844
post_ins = post_del = False
849
self.diff_cleanupMerge(diffs)
851
def diff_cleanupMerge(self, diffs):
852
"""Reorder and merge like edit sections. Merge equalities.
853
Any edit section can move as long as it doesn't cross an equality.
856
diffs: Array of diff tuples.
858
diffs.append((self.DIFF_EQUAL, '')) # Add a dummy entry at the end.
864
while pointer < len(diffs):
865
if diffs[pointer][0] == self.DIFF_INSERT:
867
text_insert += diffs[pointer][1]
869
elif diffs[pointer][0] == self.DIFF_DELETE:
871
text_delete += diffs[pointer][1]
873
elif diffs[pointer][0] == self.DIFF_EQUAL:
874
# Upon reaching an equality, check for prior redundancies.
875
if count_delete != 0 or count_insert != 0:
876
if count_delete != 0 and count_insert != 0:
877
# Factor out any common prefixies.
878
commonlength = self.diff_commonPrefix(text_insert, text_delete)
879
if commonlength != 0:
880
x = pointer - count_delete - count_insert - 1
881
if x >= 0 and diffs[x][0] == self.DIFF_EQUAL:
882
diffs[x] = (diffs[x][0], diffs[x][1] +
883
text_insert[:commonlength])
885
diffs.insert(0, (self.DIFF_EQUAL, text_insert[:commonlength]))
887
text_insert = text_insert[commonlength:]
888
text_delete = text_delete[commonlength:]
889
# Factor out any common suffixies.
890
commonlength = self.diff_commonSuffix(text_insert, text_delete)
891
if commonlength != 0:
892
diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] +
894
text_insert = text_insert[:-commonlength]
895
text_delete = text_delete[:-commonlength]
896
# Delete the offending records and add the merged ones.
897
if count_delete == 0:
898
diffs[pointer - count_insert : pointer] = [
899
(self.DIFF_INSERT, text_insert)]
900
elif count_insert == 0:
901
diffs[pointer - count_delete : pointer] = [
902
(self.DIFF_DELETE, text_delete)]
904
diffs[pointer - count_delete - count_insert : pointer] = [
905
(self.DIFF_DELETE, text_delete),
906
(self.DIFF_INSERT, text_insert)]
907
pointer = pointer - count_delete - count_insert + 1
908
if count_delete != 0:
910
if count_insert != 0:
912
elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL:
913
# Merge this equality with the previous one.
914
diffs[pointer - 1] = (diffs[pointer - 1][0],
915
diffs[pointer - 1][1] + diffs[pointer][1])
925
if diffs[-1][1] == '':
926
diffs.pop() # Remove the dummy entry at the end.
928
# Second pass: look for single edits surrounded on both sides by equalities
929
# which can be shifted sideways to eliminate an equality.
930
# e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
933
# Intentionally ignore the first and last element (don't need checking).
934
while pointer < len(diffs) - 1:
935
if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
936
diffs[pointer + 1][0] == self.DIFF_EQUAL):
937
# This is a single edit surrounded by equalities.
938
if diffs[pointer][1].endswith(diffs[pointer - 1][1]):
939
# Shift the edit over the previous equality.
940
diffs[pointer] = (diffs[pointer][0],
941
diffs[pointer - 1][1] +
942
diffs[pointer][1][:-len(diffs[pointer - 1][1])])
943
diffs[pointer + 1] = (diffs[pointer + 1][0],
944
diffs[pointer - 1][1] + diffs[pointer + 1][1])
945
del diffs[pointer - 1]
947
elif diffs[pointer][1].startswith(diffs[pointer + 1][1]):
948
# Shift the edit over the next equality.
949
diffs[pointer - 1] = (diffs[pointer - 1][0],
950
diffs[pointer - 1][1] + diffs[pointer + 1][1])
951
diffs[pointer] = (diffs[pointer][0],
952
diffs[pointer][1][len(diffs[pointer + 1][1]):] +
953
diffs[pointer + 1][1])
954
del diffs[pointer + 1]
958
# If shifts were made, the diff needs reordering and another shift sweep.
960
self.diff_cleanupMerge(diffs)
962
def diff_xIndex(self, diffs, loc):
963
"""loc is a location in text1, compute and return the equivalent location
964
in text2. e.g. "The cat" vs "The big cat", 1->1, 5->8
967
diffs: Array of diff tuples.
968
loc: Location within text1.
971
Location within text2.
977
for x in xrange(len(diffs)):
978
(op, text) = diffs[x]
979
if op != self.DIFF_INSERT: # Equality or deletion.
981
if op != self.DIFF_DELETE: # Equality or insertion.
983
if chars1 > loc: # Overshot the location.
988
if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE:
989
# The location was deleted.
991
# Add the remaining len(character).
992
return last_chars2 + (loc - last_chars1)
994
def diff_prettyHtml(self, diffs):
995
"""Convert a diff array into a pretty HTML report.
998
diffs: Array of diff tuples.
1001
HTML representation.
1005
for (op, data) in diffs:
1006
text = (data.replace("&", "&").replace("<", "<")
1007
.replace(">", ">").replace("\n", "¶<BR>"))
1008
if op == self.DIFF_INSERT:
1009
html.append("<INS STYLE=\"background:#E6FFE6;\" TITLE=\"i=%i\">%s</INS>"
1011
elif op == self.DIFF_DELETE:
1012
html.append("<DEL STYLE=\"background:#FFE6E6;\" TITLE=\"i=%i\">%s</DEL>"
1014
elif op == self.DIFF_EQUAL:
1015
html.append("<SPAN TITLE=\"i=%i\">%s</SPAN>" % (i, text))
1016
if op != self.DIFF_DELETE:
1018
return "".join(html)
1020
def diff_text1(self, diffs):
1021
"""Compute and return the source text (all equalities and deletions).
1024
diffs: Array of diff tuples.
1030
for (op, data) in diffs:
1031
if op != self.DIFF_INSERT:
1033
return "".join(text)
1035
def diff_text2(self, diffs):
1036
"""Compute and return the destination text (all equalities and insertions).
1039
diffs: Array of diff tuples.
1045
for (op, data) in diffs:
1046
if op != self.DIFF_DELETE:
1048
return "".join(text)
1050
def diff_levenshtein(self, diffs):
1051
"""Compute the Levenshtein distance; the number of inserted, deleted or
1052
substituted characters.
1055
diffs: Array of diff tuples.
1063
for (op, data) in diffs:
1064
if op == self.DIFF_INSERT:
1065
insertions += len(data)
1066
elif op == self.DIFF_DELETE:
1067
deletions += len(data)
1068
elif op == self.DIFF_EQUAL:
1069
# A deletion and an insertion is one substitution.
1070
levenshtein += max(insertions, deletions)
1073
levenshtein += max(insertions, deletions)
1076
def diff_toDelta(self, diffs):
1077
"""Crush the diff into an encoded string which describes the operations
1078
required to transform text1 into text2.
1079
E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'.
1080
Operations are tab-separated. Inserted text is escaped using %xx notation.
1083
diffs: Array of diff tuples.
1089
for (op, data) in diffs:
1090
if op == self.DIFF_INSERT:
1091
# High ascii will raise UnicodeDecodeError. Use Unicode instead.
1092
data = data.encode("utf-8")
1093
text.append("+" + urllib.quote(data, "!~*'();/?:@&=+$,# "))
1094
elif op == self.DIFF_DELETE:
1095
text.append("-%d" % len(data))
1096
elif op == self.DIFF_EQUAL:
1097
text.append("=%d" % len(data))
1098
return "\t".join(text)
1100
def diff_fromDelta(self, text1, delta):
1101
"""Given the original text1, and an encoded string which describes the
1102
operations required to transform text1 into text2, compute the full diff.
1105
text1: Source string for the diff.
1109
Array of diff tuples.
1112
ValueError: If invalid input.
1114
if type(delta) == unicode:
1115
# Deltas should be composed of a subset of ascii chars, Unicode not
1116
# required. If this encode raises UnicodeEncodeError, delta is invalid.
1117
delta = delta.encode("ascii")
1119
pointer = 0 # Cursor in text1
1120
tokens = delta.split("\t")
1121
for token in tokens:
1123
# Blank tokens are ok (from a trailing \t).
1125
# Each token begins with a one character parameter which specifies the
1126
# operation of this token (delete, insert, equality).
1129
param = urllib.unquote(param).decode("utf-8")
1130
diffs.append((self.DIFF_INSERT, param))
1131
elif token[0] == "-" or token[0] == "=":
1135
raise ValueError, "Invalid number in diff_fromDelta: " + param
1137
raise ValueError, "Negative number in diff_fromDelta: " + param
1138
text = text1[pointer : pointer + n]
1141
diffs.append((self.DIFF_EQUAL, text))
1143
diffs.append((self.DIFF_DELETE, text))
1145
# Anything else is an error.
1146
raise ValueError, ("Invalid diff operation in diff_fromDelta: " +
1148
if pointer != len(text1):
1150
"Delta length (%d) does not equal source text length (%d)." %
1151
(pointer, len(text1)))
1156
def match_main(self, text, pattern, loc):
1157
"""Locate the best instance of 'pattern' in 'text' near 'loc'.
1160
text: The text to search.
1161
pattern: The pattern to search for.
1162
loc: The location to search around.
1165
Best match index or -1.
1167
loc = max(0, min(loc, len(text)))
1169
# Shortcut (potentially not guaranteed by the algorithm)
1174
elif text[loc:loc + len(pattern)] == pattern:
1175
# Perfect match at the perfect spot! (Includes case of null pattern)
1178
# Do a fuzzy compare.
1179
match = self.match_bitap(text, pattern, loc)
1182
def match_bitap(self, text, pattern, loc):
1183
"""Locate the best instance of 'pattern' in 'text' near 'loc' using the
1187
text: The text to search.
1188
pattern: The pattern to search for.
1189
loc: The location to search around.
1192
Best match index or -1.
1194
# Python doesn't have a maxint limit, so ignore this check.
1195
#if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits:
1196
# raise ValueError("Pattern too long for this application.")
1198
# Initialise the alphabet.
1199
s = self.match_alphabet(pattern)
1201
def match_bitapScore(e, x):
1202
"""Compute and return the score for a match with e errors and x location.
1203
Accesses loc and pattern through being a closure.
1206
e: Number of errors in match.
1207
x: Location of match.
1210
Overall score for match (0.0 = good, 1.0 = bad).
1212
accuracy = float(e) / len(pattern)
1213
proximity = abs(loc - x)
1214
if not self.Match_Distance:
1215
# Dodge divide by zero error.
1216
return proximity and 1.0 or accuracy
1217
return accuracy + (proximity / float(self.Match_Distance))
1219
# Highest score beyond which we give up.
1220
score_threshold = self.Match_Threshold
1221
# Is there a nearby exact match? (speedup)
1222
best_loc = text.find(pattern, loc)
1224
score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
1225
# What about in the other direction? (speedup)
1226
best_loc = text.rfind(pattern, loc + len(pattern))
1228
score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
1230
# Initialise the bit arrays.
1231
matchmask = 1 << (len(pattern) - 1)
1234
bin_max = len(pattern) + len(text)
1235
# Empty initialization added to appease pychecker.
1237
for d in xrange(len(pattern)):
1238
# Scan for the best match each iteration allows for one more error.
1239
# Run a binary search to determine how far from 'loc' we can stray at
1243
while bin_min < bin_mid:
1244
if match_bitapScore(d, loc + bin_mid) <= score_threshold:
1248
bin_mid = (bin_max - bin_min) / 2 + bin_min
1250
# Use the result from this iteration as the maximum for the next.
1252
start = max(1, loc - bin_mid + 1)
1253
finish = min(loc + bin_mid, len(text)) + len(pattern)
1255
rd = range(finish + 1)
1256
rd.append((1 << d) - 1)
1257
for j in xrange(finish, start - 1, -1):
1258
if len(text) <= j - 1:
1262
charMatch = s.get(text[j - 1], 0)
1263
if d == 0: # First pass: exact match.
1264
rd[j] = ((rd[j + 1] << 1) | 1) & charMatch
1265
else: # Subsequent passes: fuzzy match.
1266
rd[j] = ((rd[j + 1] << 1) | 1) & charMatch | (
1267
((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]
1268
if rd[j] & matchmask:
1269
score = match_bitapScore(d, j - 1)
1270
# This match will almost certainly be better than any existing match.
1272
if score <= score_threshold:
1274
score_threshold = score
1277
# When passing loc, don't exceed our current distance from loc.
1278
start = max(1, 2 * loc - best_loc)
1280
# Already passed loc, downhill from here on in.
1282
# No hope for a (better) match at greater error levels.
1283
if match_bitapScore(d + 1, loc) > score_threshold:
1288
def match_alphabet(self, pattern):
1289
"""Initialise the alphabet for the Bitap algorithm.
1292
pattern: The text to encode.
1295
Hash of character locations.
1298
for char in pattern:
1300
for i in xrange(len(pattern)):
1301
s[pattern[i]] |= 1 << (len(pattern) - i - 1)
1306
def patch_addContext(self, patch, text):
1307
"""Increase the context until it is unique,
1308
but don't let the pattern expand beyond Match_MaxBits.
1311
patch: The patch to grow.
1316
pattern = text[patch.start2 : patch.start2 + patch.length1]
1319
# Look for the first and last matches of pattern in text. If two different
1320
# matches are found, increase the pattern length.
1321
while (text.find(pattern) != text.rfind(pattern) and (self.Match_MaxBits ==
1322
0 or len(pattern) < self.Match_MaxBits - self.Patch_Margin -
1323
self.Patch_Margin)):
1324
padding += self.Patch_Margin
1325
pattern = text[max(0, patch.start2 - padding) :
1326
patch.start2 + patch.length1 + padding]
1327
# Add one chunk for good luck.
1328
padding += self.Patch_Margin
1331
prefix = text[max(0, patch.start2 - padding) : patch.start2]
1333
patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)]
1335
suffix = text[patch.start2 + patch.length1 :
1336
patch.start2 + patch.length1 + padding]
1338
patch.diffs.append((self.DIFF_EQUAL, suffix))
1340
# Roll back the start points.
1341
patch.start1 -= len(prefix)
1342
patch.start2 -= len(prefix)
1344
patch.length1 += len(prefix) + len(suffix)
1345
patch.length2 += len(prefix) + len(suffix)
1347
def patch_make(self, a, b=None, c=None):
1348
"""Compute a list of patches to turn text1 into text2.
1349
Use diffs if provided, otherwise compute it ourselves.
1350
There are four ways to call this function, depending on what data is
1351
available to the caller:
1353
a = text1, b = text2
1357
a = text1, b = diffs
1358
Method 4 (deprecated, use method 3):
1359
a = text1, b = text2, c = diffs
1362
a: text1 (methods 1,3,4) or Array of diff tuples for text1 to
1364
b: text2 (methods 1,4) or Array of diff tuples for text1 to
1365
text2 (method 3) or undefined (method 2).
1366
c: Array of diff tuples for text1 to text2 (method 4) or
1367
undefined (methods 1,2,3).
1370
Array of patch objects.
1374
# Note that texts may arrive as 'str' or 'unicode'.
1375
if isinstance(a, basestring) and isinstance(b, basestring) and c is None:
1376
# Method 1: text1, text2
1377
# Compute diffs from text1 and text2.
1379
diffs = self.diff_main(text1, b, True)
1381
self.diff_cleanupSemantic(diffs)
1382
self.diff_cleanupEfficiency(diffs)
1383
elif isinstance(a, list) and b is None and c is None:
1385
# Compute text1 from diffs.
1387
text1 = self.diff_text1(diffs)
1388
elif isinstance(a, basestring) and isinstance(b, list) and c is None:
1389
# Method 3: text1, diffs
1392
elif (isinstance(a, basestring) and isinstance(b, basestring) and
1393
isinstance(c, list)):
1394
# Method 4: text1, text2, diffs
1395
# text2 is not used.
1399
raise ValueError("Unknown call format to patch_make.")
1402
return [] # Get rid of the None case.
1405
char_count1 = 0 # Number of characters into the text1 string.
1406
char_count2 = 0 # Number of characters into the text2 string.
1407
prepatch_text = text1 # Recreate the patches to determine context info.
1408
postpatch_text = text1
1409
for x in xrange(len(diffs)):
1410
(diff_type, diff_text) = diffs[x]
1411
if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL:
1412
# A new patch starts here.
1413
patch.start1 = char_count1
1414
patch.start2 = char_count2
1415
if diff_type == self.DIFF_INSERT:
1417
patch.diffs.append(diffs[x])
1418
patch.length2 += len(diff_text)
1419
postpatch_text = (postpatch_text[:char_count2] + diff_text +
1420
postpatch_text[char_count2:])
1421
elif diff_type == self.DIFF_DELETE:
1423
patch.length1 += len(diff_text)
1424
patch.diffs.append(diffs[x])
1425
postpatch_text = (postpatch_text[:char_count2] +
1426
postpatch_text[char_count2 + len(diff_text):])
1427
elif (diff_type == self.DIFF_EQUAL and
1428
len(diff_text) <= 2 * self.Patch_Margin and
1429
len(patch.diffs) != 0 and len(diffs) != x + 1):
1430
# Small equality inside a patch.
1431
patch.diffs.append(diffs[x])
1432
patch.length1 += len(diff_text)
1433
patch.length2 += len(diff_text)
1435
if (diff_type == self.DIFF_EQUAL and
1436
len(diff_text) >= 2 * self.Patch_Margin):
1437
# Time for a new patch.
1438
if len(patch.diffs) != 0:
1439
self.patch_addContext(patch, prepatch_text)
1440
patches.append(patch)
1442
# Unlike Unidiff, our patch lists have a rolling context.
1443
# http://code.google.com/p/google-diff-match-patch/wiki/Unidiff
1444
# Update prepatch text & pos to reflect the application of the
1445
# just completed patch.
1446
prepatch_text = postpatch_text
1447
char_count1 = char_count2
1449
# Update the current character count.
1450
if diff_type != self.DIFF_INSERT:
1451
char_count1 += len(diff_text)
1452
if diff_type != self.DIFF_DELETE:
1453
char_count2 += len(diff_text)
1455
# Pick up the leftover patch if not empty.
1456
if len(patch.diffs) != 0:
1457
self.patch_addContext(patch, prepatch_text)
1458
patches.append(patch)
1461
def patch_deepCopy(self, patches):
1462
"""Given an array of patches, return another array that is identical.
1465
patches: Array of patch objects.
1468
Array of patch objects.
1471
for patch in patches:
1472
patchCopy = patch_obj()
1473
# No need to deep copy the tuples since they are immutable.
1474
patchCopy.diffs = patch.diffs[:]
1475
patchCopy.start1 = patch.start1
1476
patchCopy.start2 = patch.start2
1477
patchCopy.length1 = patch.length1
1478
patchCopy.length2 = patch.length2
1479
patchesCopy.append(patchCopy)
1482
def patch_apply(self, patches, text):
1483
"""Merge a set of patches onto the text. Return a patched text, as well
1484
as a list of true/false values indicating which patches were applied.
1487
patches: Array of patch objects.
1491
Two element Array, containing the new text and an array of boolean values.
1496
# Deep copy the patches so that no changes are made to originals.
1497
patches = self.patch_deepCopy(patches)
1499
nullPadding = self.patch_addPadding(patches)
1500
text = nullPadding + text + nullPadding
1501
self.patch_splitMax(patches)
1503
# delta keeps track of the offset between the expected and actual location
1504
# of the previous patch. If there are patches expected at positions 10 and
1505
# 20, but the first patch was found at 12, delta is 2 and the second patch
1506
# has an effective expected position of 22.
1509
for patch in patches:
1510
expected_loc = patch.start2 + delta
1511
text1 = self.diff_text1(patch.diffs)
1513
if len(text1) > self.Match_MaxBits:
1514
# patch_splitMax will only provide an oversized pattern in the case of
1516
start_loc = self.match_main(text, text1[:self.Match_MaxBits],
1519
end_loc = self.match_main(text, text1[-self.Match_MaxBits:],
1520
expected_loc + len(text1) - self.Match_MaxBits)
1521
if end_loc == -1 or start_loc >= end_loc:
1522
# Can't find valid trailing context. Drop this patch.
1525
start_loc = self.match_main(text, text1, expected_loc)
1527
# No match found. :(
1528
results.append(False)
1529
# Subtract the delta for this failed patch from subsequent patches.
1530
delta -= patch.length2 - patch.length1
1533
results.append(True)
1534
delta = start_loc - expected_loc
1536
text2 = text[start_loc : start_loc + len(text1)]
1538
text2 = text[start_loc : end_loc + self.Match_MaxBits]
1540
# Perfect match, just shove the replacement text in.
1541
text = (text[:start_loc] + self.diff_text2(patch.diffs) +
1542
text[start_loc + len(text1):])
1545
# Run a diff to get a framework of equivalent indices.
1546
diffs = self.diff_main(text1, text2, False)
1547
if (len(text1) > self.Match_MaxBits and
1548
self.diff_levenshtein(diffs) / float(len(text1)) >
1549
self.Patch_DeleteThreshold):
1550
# The end points match, but the content is unacceptably bad.
1553
self.diff_cleanupSemanticLossless(diffs)
1555
for (op, data) in patch.diffs:
1556
if op != self.DIFF_EQUAL:
1557
index2 = self.diff_xIndex(diffs, index1)
1558
if op == self.DIFF_INSERT: # Insertion
1559
text = text[:start_loc + index2] + data + text[start_loc +
1561
elif op == self.DIFF_DELETE: # Deletion
1562
text = text[:start_loc + index2] + text[start_loc +
1563
self.diff_xIndex(diffs, index1 + len(data)):]
1564
if op != self.DIFF_DELETE:
1566
# Strip the padding off.
1567
text = text[len(nullPadding):-len(nullPadding)]
1568
return (text, results)
1570
def patch_addPadding(self, patches):
1571
"""Add some padding on text start and end so that edges can match
1572
something. Intended to be called only from within patch_apply.
1575
patches: Array of patch objects.
1578
The padding string added to each side.
1580
paddingLength = self.Patch_Margin
1582
for x in xrange(1, paddingLength + 1):
1583
nullPadding += chr(x)
1585
# Bump all the patches forward.
1586
for patch in patches:
1587
patch.start1 += paddingLength
1588
patch.start2 += paddingLength
1590
# Add some padding on start of first diff.
1593
if not diffs or diffs[0][0] != self.DIFF_EQUAL:
1594
# Add nullPadding equality.
1595
diffs.insert(0, (self.DIFF_EQUAL, nullPadding))
1596
patch.start1 -= paddingLength # Should be 0.
1597
patch.start2 -= paddingLength # Should be 0.
1598
patch.length1 += paddingLength
1599
patch.length2 += paddingLength
1600
elif paddingLength > len(diffs[0][1]):
1601
# Grow first equality.
1602
extraLength = paddingLength - len(diffs[0][1])
1603
newText = nullPadding[len(diffs[0][1]):] + diffs[0][1]
1604
diffs[0] = (diffs[0][0], newText)
1605
patch.start1 -= extraLength
1606
patch.start2 -= extraLength
1607
patch.length1 += extraLength
1608
patch.length2 += extraLength
1610
# Add some padding on end of last diff.
1613
if not diffs or diffs[-1][0] != self.DIFF_EQUAL:
1614
# Add nullPadding equality.
1615
diffs.append((self.DIFF_EQUAL, nullPadding))
1616
patch.length1 += paddingLength
1617
patch.length2 += paddingLength
1618
elif paddingLength > len(diffs[-1][1]):
1619
# Grow last equality.
1620
extraLength = paddingLength - len(diffs[-1][1])
1621
newText = diffs[-1][1] + nullPadding[:extraLength]
1622
diffs[-1] = (diffs[-1][0], newText)
1623
patch.length1 += extraLength
1624
patch.length2 += extraLength
1628
def patch_splitMax(self, patches):
1629
"""Look through the patches and break up any which are longer than the
1630
maximum limit of the match algorithm.
1633
patches: Array of patch objects.
1635
if self.Match_MaxBits == 0:
1637
for x in xrange(len(patches)):
1638
if patches[x].length1 > self.Match_MaxBits:
1639
bigpatch = patches[x]
1640
# Remove the big old patch.
1643
patch_size = self.Match_MaxBits
1644
start1 = bigpatch.start1
1645
start2 = bigpatch.start2
1647
while len(bigpatch.diffs) != 0:
1648
# Create one of several smaller patches.
1651
patch.start1 = start1 - len(precontext)
1652
patch.start2 = start2 - len(precontext)
1654
patch.length1 = patch.length2 = len(precontext)
1655
patch.diffs.append((self.DIFF_EQUAL, precontext))
1657
while (len(bigpatch.diffs) != 0 and
1658
patch.length1 < patch_size - self.Patch_Margin):
1659
(diff_type, diff_text) = bigpatch.diffs[0]
1660
if diff_type == self.DIFF_INSERT:
1661
# Insertions are harmless.
1662
patch.length2 += len(diff_text)
1663
start2 += len(diff_text)
1664
patch.diffs.append(bigpatch.diffs.pop(0))
1666
elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and
1667
patch.diffs[0][0] == self.DIFF_EQUAL and
1668
len(diff_text) > 2 * patch_size):
1669
# This is a large deletion. Let it pass in one chunk.
1670
patch.length1 += len(diff_text)
1671
start1 += len(diff_text)
1673
patch.diffs.append((diff_type, diff_text))
1674
del bigpatch.diffs[0]
1676
# Deletion or equality. Only take as much as we can stomach.
1677
diff_text = diff_text[:patch_size - patch.length1 -
1679
patch.length1 += len(diff_text)
1680
start1 += len(diff_text)
1681
if diff_type == self.DIFF_EQUAL:
1682
patch.length2 += len(diff_text)
1683
start2 += len(diff_text)
1687
patch.diffs.append((diff_type, diff_text))
1688
if diff_text == bigpatch.diffs[0][1]:
1689
del bigpatch.diffs[0]
1691
bigpatch.diffs[0] = (bigpatch.diffs[0][0],
1692
bigpatch.diffs[0][1][len(diff_text):])
1694
# Compute the head context for the next patch.
1695
precontext = self.diff_text2(patch.diffs)
1696
precontext = precontext[-self.Patch_Margin:]
1697
# Append the end context for this patch.
1698
postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin]
1700
patch.length1 += len(postcontext)
1701
patch.length2 += len(postcontext)
1702
if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:
1703
patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] +
1706
patch.diffs.append((self.DIFF_EQUAL, postcontext))
1710
patches.insert(x, patch)
1712
def patch_toText(self, patches):
1713
"""Take a list of patches and return a textual representation.
1716
patches: Array of patch objects.
1719
Text representation of patches.
1722
for patch in patches:
1723
text.append(str(patch))
1724
return "".join(text)
1726
def patch_fromText(self, textline):
1727
"""Parse a textual representation of patches and return a list of patch
1731
textline: Text representation of patches.
1734
Array of patch objects.
1737
ValueError: If invalid input.
1739
if type(textline) == unicode:
1740
# Patches should be composed of a subset of ascii chars, Unicode not
1741
# required. If this encode raises UnicodeEncodeError, patch is invalid.
1742
textline = textline.encode("ascii")
1746
text = textline.split('\n')
1747
while len(text) != 0:
1748
m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0])
1750
raise ValueError, "Invalid patch string: " + text[0]
1752
patches.append(patch)
1753
patch.start1 = int(m.group(1))
1754
if m.group(2) == '':
1757
elif m.group(2) == '0':
1761
patch.length1 = int(m.group(2))
1763
patch.start2 = int(m.group(3))
1764
if m.group(4) == '':
1767
elif m.group(4) == '0':
1771
patch.length2 = int(m.group(4))
1775
while len(text) != 0:
1780
line = urllib.unquote(text[0][1:])
1781
line = line.decode("utf-8")
1784
patch.diffs.append((self.DIFF_INSERT, line))
1787
patch.diffs.append((self.DIFF_DELETE, line))
1790
patch.diffs.append((self.DIFF_EQUAL, line))
1792
# Start of next patch.
1795
# Blank line? Whatever.
1799
raise ValueError, "Invalid patch mode: '%s'\n%s" % (sign, line)
1805
"""Class representing one patch operation.
1809
"""Initializes with an empty list of diffs.
1818
"""Emmulate GNU diff's format.
1819
Header: @@ -382,8 +481,9 @@
1820
Indicies are printed as 1-based, not 0-based.
1823
The GNU diff string.
1825
if self.length1 == 0:
1826
coords1 = str(self.start1) + ",0"
1827
elif self.length1 == 1:
1828
coords1 = str(self.start1 + 1)
1830
coords1 = str(self.start1 + 1) + "," + str(self.length1)
1831
if self.length2 == 0:
1832
coords2 = str(self.start2) + ",0"
1833
elif self.length2 == 1:
1834
coords2 = str(self.start2 + 1)
1836
coords2 = str(self.start2 + 1) + "," + str(self.length2)
1837
text = ["@@ -", coords1, " +", coords2, " @@\n"]
1838
# Escape the body of the patch with %xx notation.
1839
for (op, data) in self.diffs:
1840
if op == diff_match_patch.DIFF_INSERT:
1842
elif op == diff_match_patch.DIFF_DELETE:
1844
elif op == diff_match_patch.DIFF_EQUAL:
1846
# High ascii will raise UnicodeDecodeError. Use Unicode instead.
1847
data = data.encode("utf-8")
1848
text.append(urllib.quote(data, "!~*'();/?:@&=+$,# ") + "\n")
1849
return "".join(text)