3
# Taken from google because they do not provide a setup.py file. Thanks anyway
5
"""Diff Match and Patch.
7
Copyright 2006 Google Inc.
8
http://code.google.com/p/google-diff-match-patch/
10
Licensed under the Apache License, Version 2.0 (the "License");
11
you may not use this file except in compliance with the License.
12
You may obtain a copy of the License at
14
http://www.apache.org/licenses/LICENSE-2.0
16
Unless required by applicable law or agreed to in writing, software
17
distributed under the License is distributed on an "AS IS" BASIS,
18
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
See the License for the specific language governing permissions and
20
limitations under the License.
24
"""Functions for diff, match and patch.
26
Computes the difference between two texts to create a patch.
27
Applies the patch onto another text, allowing for errors.
30
__author__ = 'fraser@google.com (Neil Fraser)'
38
class diff_match_patch:
39
"""Class containing the diff, match and patch methods.
41
Also contains the behaviour settings.
46
"""Inits a diff_match_patch object with default settings.
48
Redefine these in your program to override the defaults.
52
# Number of seconds to map a diff before giving up (0 for infinity).
53
self.Diff_Timeout = 1.0
54
# Cost of an empty edit operation in terms of edit characters.
55
self.Diff_EditCost = 4
56
# The size beyond which the double-ended diff activates.
57
# Double-ending is twice as fast, but less accurate.
58
self.Diff_DualThreshold = 32
59
# At what point is no match declared (0.0 = perfection, 1.0 = very
61
self.Match_Threshold = 0.5
62
# How far to search for a match (0 = exact location, 1000+ = broad match).
63
# A match this many characters away from the expected location will add
64
# 1.0 to the score (0.0 is a perfect match).
65
self.Match_Distance = 1000
66
# When deleting a large block of text (over ~64 characters), how close does
67
# the contents have to match the expected contents. (0.0 = perfection,
68
# 1.0 = very loose). Note that Match_Threshold controls how closely the
69
# end points of a delete need to match.
70
self.Patch_DeleteThreshold = 0.5
71
# Chunk size for context length.
74
# How many bits in a number?
75
# Python has no maximum, thus to disable patch splitting set to 0.
76
# However to avoid long patches in certain pathological cases, use 32.
77
# Multiple short patches (using native ints) are much faster than long
79
self.Match_MaxBits = 32
83
# The data structure representing a diff is an array of tuples:
84
# [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")]
85
# which means: delete "Hello", add "Goodbye" and keep " world."
90
def diff_main(self, text1, text2, checklines=True):
91
"""Find the differences between two texts. Simplifies the problem by
92
stripping any common prefix or suffix off the texts before diffing.
95
text1: Old string to be diffed.
96
text2: New string to be diffed.
97
checklines: Optional speedup flag. If present and false, then don't run
98
a line-level diff first to identify the changed areas.
99
Defaults to true, which does a faster, slightly less optimal diff.
106
# Check for equality (speedup)
108
return [(self.DIFF_EQUAL, text1)]
110
# Trim off common prefix (speedup)
111
commonlength = self.diff_commonPrefix(text1, text2)
112
commonprefix = text1[:commonlength]
113
text1 = text1[commonlength:]
114
text2 = text2[commonlength:]
116
# Trim off common suffix (speedup)
117
commonlength = self.diff_commonSuffix(text1, text2)
118
if commonlength == 0:
121
commonsuffix = text1[-commonlength:]
122
text1 = text1[:-commonlength]
123
text2 = text2[:-commonlength]
125
# Compute the diff on the middle block
126
diffs = self.diff_compute(text1, text2, checklines)
128
# Restore the prefix and suffix
130
diffs[:0] = [(self.DIFF_EQUAL, commonprefix)]
132
diffs.append((self.DIFF_EQUAL, commonsuffix))
133
self.diff_cleanupMerge(diffs)
136
def diff_compute(self, text1, text2, checklines):
137
"""Find the differences between two texts. Assumes that the texts do
138
not have any common prefix or suffix.
141
text1: Old string to be diffed.
142
text2: New string to be diffed.
143
checklines: Speedup flag. If false, then don't run a line-level diff
144
first to identify the changed areas.
145
If true, then run a faster, slightly less optimal diff.
152
# Just add some text (speedup)
153
return [(self.DIFF_INSERT, text2)]
156
# Just delete some text (speedup)
157
return [(self.DIFF_DELETE, text1)]
159
if len(text1) > len(text2):
160
(longtext, shorttext) = (text1, text2)
162
(shorttext, longtext) = (text1, text2)
163
i = longtext.find(shorttext)
165
# Shorter text is inside the longer text (speedup)
166
diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext),
167
(self.DIFF_INSERT, longtext[i + len(shorttext):])]
168
# Swap insertions for deletions if diff is reversed.
169
if len(text1) > len(text2):
170
diffs[0] = (self.DIFF_DELETE, diffs[0][1])
171
diffs[2] = (self.DIFF_DELETE, diffs[2][1])
173
longtext = shorttext = None # Garbage collect.
175
# Check to see if the problem can be split in two.
176
hm = self.diff_halfMatch(text1, text2)
178
# A half-match was found, sort out the return data.
179
(text1_a, text1_b, text2_a, text2_b, mid_common) = hm
180
# Send both pairs off for separate processing.
181
diffs_a = self.diff_main(text1_a, text2_a, checklines)
182
diffs_b = self.diff_main(text1_b, text2_b, checklines)
184
return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b
186
# Perform a real diff.
187
if checklines and (len(text1) < 100 or len(text2) < 100):
188
checklines = False # Too trivial for the overhead.
190
# Scan the text on a line-by-line basis first.
191
(text1, text2, linearray) = self.diff_linesToChars(text1, text2)
193
diffs = self.diff_map(text1, text2)
194
if not diffs: # No acceptable result.
195
diffs = [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
197
# Convert the diff back to original text.
198
self.diff_charsToLines(diffs, linearray)
199
# Eliminate freak matches (e.g. blank lines)
200
self.diff_cleanupSemantic(diffs)
202
# Rediff any replacement blocks, this time character-by-character.
203
# Add a dummy entry at the end.
204
diffs.append((self.DIFF_EQUAL, ''))
210
while pointer < len(diffs):
211
if diffs[pointer][0] == self.DIFF_INSERT:
213
text_insert += diffs[pointer][1]
214
elif diffs[pointer][0] == self.DIFF_DELETE:
216
text_delete += diffs[pointer][1]
217
elif diffs[pointer][0] == self.DIFF_EQUAL:
218
# Upon reaching an equality, check for prior redundancies.
219
if count_delete >= 1 and count_insert >= 1:
220
# Delete the offending records and add the merged ones.
221
a = self.diff_main(text_delete, text_insert, False)
222
diffs[pointer - count_delete -
223
count_insert: pointer] = a
224
pointer = pointer - count_delete - \
225
count_insert + len(a)
233
diffs.pop() # Remove the dummy entry at the end.
236
def diff_linesToChars(self, text1, text2):
237
"""Split two texts into an array of strings. Reduce the texts to a
238
string of hashes where each Unicode character represents one line.
242
text2: Second string.
245
Three element tuple, containing the encoded text1, the encoded text2 and
246
the array of unique strings. The zeroth element of the array of unique
247
strings is intentionally blank.
250
lineArray = [] # e.g. lineArray[4] == "Hello\n"
251
lineHash = {} # e.g. lineHash["Hello\n"] == 4
253
# "\x00" is a valid character, but various debuggers don't like it.
254
# So we'll insert a junk entry to avoid generating a null character.
257
def diff_linesToCharsMunge(text):
258
"""Split a text into an array of strings. Reduce the texts to a
259
string of hashes where each Unicode character represents one line.
260
Modifies linearray and linehash through being a closure.
263
text: String to encode.
270
# Walk the text, pulling out a substring for each line.
271
# text.split('\n') would would temporarily double our memory footprint.
272
# Modifying text would create many large strings to garbage
276
while lineEnd < len(text) - 1:
277
lineEnd = text.find('\n', lineStart)
279
lineEnd = len(text) - 1
280
line = text[lineStart:lineEnd + 1]
281
lineStart = lineEnd + 1
284
chars.append(unichr(lineHash[line]))
286
lineArray.append(line)
287
lineHash[line] = len(lineArray) - 1
288
chars.append(unichr(len(lineArray) - 1))
289
return ''.join(chars)
291
chars1 = diff_linesToCharsMunge(text1)
292
chars2 = diff_linesToCharsMunge(text2)
293
return (chars1, chars2, lineArray)
295
def diff_charsToLines(self, diffs, lineArray):
296
"""Rehydrate the text in a diff from a string of line hashes to real
300
diffs: Array of diff tuples.
301
lineArray: Array of unique strings.
304
for x in xrange(len(diffs)):
306
for char in diffs[x][1]:
307
text.append(lineArray[ord(char)])
308
diffs[x] = (diffs[x][0], ''.join(text))
310
def diff_map(self, text1, text2):
311
"""Explore the intersection points between the two texts.
314
text1: Old string to be diffed.
315
text2: New string to be diffed.
318
Array of diff tuples or None if no diff available.
322
# Unlike in most languages, Python counts time in seconds.
323
s_end = time.time() + self.Diff_Timeout # Don't run for too long.
324
# Cache the text lengths to prevent multiple calls.
325
text1_length = len(text1)
326
text2_length = len(text2)
327
max_d = text1_length + text2_length - 1
328
doubleEnd = self.Diff_DualThreshold * 2 < max_d
337
# If the total number of characters is odd, then the front path will
338
# collide with the reverse path.
339
front = (text1_length + text2_length) % 2
340
for d in xrange(max_d):
341
# Bail out if timeout reached.
342
if self.Diff_Timeout > 0 and time.time() > s_end:
345
# Walk the front path one step.
347
for k in xrange(-d, d + 1, 2):
348
if k == -d or k != d and v1[k - 1] < v1[k + 1]:
355
if front and footstep in footsteps:
358
footsteps[footstep] = d
360
while (not done and x < text1_length and y < text2_length and
361
text1[x] == text2[y]):
366
if front and footstep in footsteps:
369
footsteps[footstep] = d
372
v_map1[d][(x, y)] = True
373
if x == text1_length and y == text2_length:
374
# Reached the end in single-path mode.
375
return self.diff_path1(v_map1, text1, text2)
377
# Front path ran over reverse path.
378
v_map2 = v_map2[:footsteps[footstep] + 1]
379
a = self.diff_path1(v_map1, text1[:x], text2[:y])
380
b = self.diff_path2(v_map2, text1[x:], text2[y:])
384
# Walk the reverse path one step.
386
for k in xrange(-d, d + 1, 2):
387
if k == -d or k != d and v2[k - 1] < v2[k + 1]:
392
footstep = (text1_length - x, text2_length - y)
393
if not front and footstep in footsteps:
396
footsteps[footstep] = d
397
while (not done and x < text1_length and y < text2_length and
398
text1[-x - 1] == text2[-y - 1]):
401
footstep = (text1_length - x, text2_length - y)
402
if not front and footstep in footsteps:
405
footsteps[footstep] = d
408
v_map2[d][(x, y)] = True
410
# Reverse path ran over front path.
411
v_map1 = v_map1[:footsteps[footstep] + 1]
412
a = self.diff_path1(v_map1, text1[:text1_length - x],
413
text2[:text2_length - y])
414
b = self.diff_path2(v_map2, text1[text1_length - x:],
415
text2[text2_length - y:])
418
# Number of diffs equals number of characters, no commonality at all.
421
def diff_path1(self, v_map, text1, text2):
422
"""Work from the middle back to the start to determine the path.
425
v_map: Array of paths.
426
text1: Old string fragment to be diffed.
427
text2: New string fragment to be diffed.
430
Array of diff tuples.
437
for d in xrange(len(v_map) - 2, -1, -1):
439
if (x - 1, y) in v_map[d]:
441
if last_op == self.DIFF_DELETE:
442
path[0] = (self.DIFF_DELETE, text1[x] + path[0][1])
444
path[:0] = [(self.DIFF_DELETE, text1[x])]
445
last_op = self.DIFF_DELETE
447
elif (x, y - 1) in v_map[d]:
449
if last_op == self.DIFF_INSERT:
450
path[0] = (self.DIFF_INSERT, text2[y] + path[0][1])
452
path[:0] = [(self.DIFF_INSERT, text2[y])]
453
last_op = self.DIFF_INSERT
458
assert text1[x] == text2[y], ('No diagonal. ' +
459
"Can't happen. (diff_path1)")
460
if last_op == self.DIFF_EQUAL:
461
path[0] = (self.DIFF_EQUAL, text1[x] + path[0][1])
463
path[:0] = [(self.DIFF_EQUAL, text1[x])]
464
last_op = self.DIFF_EQUAL
467
def diff_path2(self, v_map, text1, text2):
468
"""Work from the middle back to the end to determine the path.
471
v_map: Array of paths.
472
text1: Old string fragment to be diffed.
473
text2: New string fragment to be diffed.
476
Array of diff tuples.
483
for d in xrange(len(v_map) - 2, -1, -1):
485
if (x - 1, y) in v_map[d]:
487
if last_op == self.DIFF_DELETE:
488
path[-1] = (self.DIFF_DELETE, path[-1]
491
path.append((self.DIFF_DELETE, text1[-x - 1]))
492
last_op = self.DIFF_DELETE
494
elif (x, y - 1) in v_map[d]:
496
if last_op == self.DIFF_INSERT:
497
path[-1] = (self.DIFF_INSERT, path[-1]
500
path.append((self.DIFF_INSERT, text2[-y - 1]))
501
last_op = self.DIFF_INSERT
506
assert text1[-x - 1] == text2[-y - 1], ('No diagonal. ' +
507
"Can't happen. (diff_path2)")
508
if last_op == self.DIFF_EQUAL:
509
path[-1] = (self.DIFF_EQUAL, path[-1]
512
path.append((self.DIFF_EQUAL, text1[-x - 1]))
513
last_op = self.DIFF_EQUAL
516
def diff_commonPrefix(self, text1, text2):
517
"""Determine the common prefix of two strings.
521
text2: Second string.
524
The number of characters common to the start of each string.
527
# Quick check for common null cases.
528
if not text1 or not text2 or text1[0] != text2[0]:
531
# Performance analysis: http://neil.fraser.name/news/2007/10/09/
533
pointermax = min(len(text1), len(text2))
534
pointermid = pointermax
536
while pointermin < pointermid:
537
if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]:
538
pointermin = pointermid
539
pointerstart = pointermin
541
pointermax = pointermid
542
pointermid = int((pointermax - pointermin) / 2 + pointermin)
545
def diff_commonSuffix(self, text1, text2):
546
"""Determine the common suffix of two strings.
550
text2: Second string.
553
The number of characters common to the end of each string.
556
# Quick check for common null cases.
557
if not text1 or not text2 or text1[-1] != text2[-1]:
560
# Performance analysis: http://neil.fraser.name/news/2007/10/09/
562
pointermax = min(len(text1), len(text2))
563
pointermid = pointermax
565
while pointermin < pointermid:
566
if (text1[-pointermid:len(text1) - pointerend] ==
567
text2[-pointermid:len(text2) - pointerend]):
568
pointermin = pointermid
569
pointerend = pointermin
571
pointermax = pointermid
572
pointermid = int((pointermax - pointermin) / 2 + pointermin)
575
def diff_halfMatch(self, text1, text2):
576
"""Do the two texts share a substring which is at least half the length
581
text2: Second string.
584
Five element Array, containing the prefix of text1, the suffix of text1,
585
the prefix of text2, the suffix of text2 and the common middle. Or None
586
if there was no match.
589
if len(text1) > len(text2):
590
(longtext, shorttext) = (text1, text2)
592
(shorttext, longtext) = (text1, text2)
593
if len(longtext) < 10 or len(shorttext) < 1:
594
return None # Pointless.
596
def diff_halfMatchI(longtext, shorttext, i):
597
"""Does a substring of shorttext exist within longtext such that
598
the substring is at least half the length of longtext? Closure, but
599
does not reference any external variables.
602
longtext: Longer string.
603
shorttext: Shorter string.
604
i: Start index of quarter length substring within longtext.
607
Five element Array, containing the prefix of longtext, the suffix of
608
longtext, the prefix of shorttext, the suffix of shorttext and the
609
common middle. Or None if there was no match.
612
seed = longtext[i:i + len(longtext) / 4]
614
j = shorttext.find(seed)
616
prefixLength = self.diff_commonPrefix(
617
longtext[i:], shorttext[j:])
618
suffixLength = self.diff_commonSuffix(
619
longtext[:i], shorttext[:j])
620
if len(best_common) < suffixLength + prefixLength:
621
best_common = (shorttext[j - suffixLength:j] +
622
shorttext[j:j + prefixLength])
623
best_longtext_a = longtext[:i - suffixLength]
624
best_longtext_b = longtext[i + prefixLength:]
625
best_shorttext_a = shorttext[:j - suffixLength]
626
best_shorttext_b = shorttext[j + prefixLength:]
627
j = shorttext.find(seed, j + 1)
629
if len(best_common) >= len(longtext) / 2:
630
return (best_longtext_a, best_longtext_b,
631
best_shorttext_a, best_shorttext_b, best_common)
635
# First check if the second quarter is the seed for a half-match.
636
hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) / 4)
637
# Check again based on the third quarter.
638
hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) / 2)
639
if not hm1 and not hm2:
646
# Both matched. Select the longest.
647
if len(hm1[4]) > len(hm2[4]):
652
# A half-match was found, sort out the return data.
653
if len(text1) > len(text2):
654
(text1_a, text1_b, text2_a, text2_b, mid_common) = hm
656
(text2_a, text2_b, text1_a, text1_b, mid_common) = hm
657
return (text1_a, text1_b, text2_a, text2_b, mid_common)
659
def diff_cleanupSemantic(self, diffs):
660
"""Reduce the number of edits by eliminating semantically trivial
664
diffs: Array of diff tuples.
668
equalities = [] # Stack of indices where equalities are found.
669
lastequality = None # Always equal to equalities[-1][1]
670
pointer = 0 # Index of current position.
671
# Number of chars that changed prior to the equality.
673
length_changes2 = 0 # Number of chars that changed after the equality.
674
while pointer < len(diffs):
675
if diffs[pointer][0] == self.DIFF_EQUAL: # equality found
676
equalities.append(pointer)
677
length_changes1 = length_changes2
679
lastequality = diffs[pointer][1]
680
else: # an insertion or deletion
681
length_changes2 += len(diffs[pointer][1])
682
if (lastequality != None and (len(lastequality) <= length_changes1) and
683
(len(lastequality) <= length_changes2)):
685
diffs.insert(equalities[-1],
686
(self.DIFF_DELETE, lastequality))
687
# Change second copy to insert.
688
diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
689
diffs[equalities[-1] + 1][1])
690
# Throw away the equality we just deleted.
692
# Throw away the previous equality (it needs to be
694
if len(equalities) != 0:
697
pointer = equalities[-1]
700
length_changes1 = 0 # Reset the counters.
707
self.diff_cleanupMerge(diffs)
709
self.diff_cleanupSemanticLossless(diffs)
711
def diff_cleanupSemanticLossless(self, diffs):
712
"""Look for single edits surrounded on both sides by equalities
713
which can be shifted sideways to align the edit to a word boundary.
714
e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
717
diffs: Array of diff tuples.
720
def diff_cleanupSemanticScore(one, two):
721
"""Given two strings, compute a score representing whether the
722
internal boundary falls on logical boundaries. Scores range from 5
723
(best) to 0 (worst). Closure, but does not reference any external
734
if not one or not two:
735
# Edges are the best.
738
# Each port of this function behaves slightly differently due to
739
# subtle differences in each language's definition of things like
740
# 'whitespace'. Since this function's purpose is largely cosmetic,
741
# the choice has been made to use each language's native features
742
# rather than force total conformity.
744
# One point for non-alphanumeric.
745
if not one[-1].isalnum() or not two[0].isalnum():
747
# Two points for whitespace.
748
if one[-1].isspace() or two[0].isspace():
750
# Three points for line breaks.
751
if (one[-1] == '\r' or one[-1] == '\n' or
752
two[0] == '\r' or two[0] == '\n'):
754
# Four points for blank lines.
755
if (re.search('\\n\\r?\\n$', one) or
756
re.match('^\\r?\\n\\r?\\n', two)):
761
# Intentionally ignore the first and last element (don't need
763
while pointer < len(diffs) - 1:
764
if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
765
diffs[pointer + 1][0] == self.DIFF_EQUAL):
766
# This is a single edit surrounded by equalities.
767
equality1 = diffs[pointer - 1][1]
768
edit = diffs[pointer][1]
769
equality2 = diffs[pointer + 1][1]
771
# First, shift the edit as far left as possible.
772
commonOffset = self.diff_commonSuffix(equality1, edit)
774
commonString = edit[-commonOffset:]
775
equality1 = equality1[:-commonOffset]
776
edit = commonString + edit[:-commonOffset]
777
equality2 = commonString + equality2
779
# Second, step character by character right, looking for the
781
bestEquality1 = equality1
783
bestEquality2 = equality2
784
bestScore = (diff_cleanupSemanticScore(equality1, edit) +
785
diff_cleanupSemanticScore(edit, equality2))
786
while edit and equality2 and edit[0] == equality2[0]:
788
edit = edit[1:] + equality2[0]
789
equality2 = equality2[1:]
790
score = (diff_cleanupSemanticScore(equality1, edit) +
791
diff_cleanupSemanticScore(edit, equality2))
792
# The >= encourages trailing rather than leading whitespace
794
if score >= bestScore:
796
bestEquality1 = equality1
798
bestEquality2 = equality2
800
if diffs[pointer - 1][1] != bestEquality1:
801
# We have an improvement, save it back to the diff.
803
diffs[pointer - 1] = (diffs[pointer - 1]
806
del diffs[pointer - 1]
808
diffs[pointer] = (diffs[pointer][0], bestEdit)
810
diffs[pointer + 1] = (diffs[pointer + 1]
813
del diffs[pointer + 1]
817
def diff_cleanupEfficiency(self, diffs):
818
"""Reduce the number of edits by eliminating operationally trivial
822
diffs: Array of diff tuples.
826
equalities = [] # Stack of indices where equalities are found.
827
lastequality = '' # Always equal to equalities[-1][1]
828
pointer = 0 # Index of current position.
829
# Is there an insertion operation before the last equality.
831
# Is there a deletion operation before the last equality.
833
# Is there an insertion operation after the last equality.
835
# Is there a deletion operation after the last equality.
837
while pointer < len(diffs):
838
if diffs[pointer][0] == self.DIFF_EQUAL: # equality found
839
if (len(diffs[pointer][1]) < self.Diff_EditCost and
840
(post_ins or post_del)):
842
equalities.append(pointer)
845
lastequality = diffs[pointer][1]
847
# Not a candidate, and can never become one.
851
post_ins = post_del = False
852
else: # an insertion or deletion
853
if diffs[pointer][0] == self.DIFF_DELETE:
858
# Five types to be split:
859
# <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
860
# <ins>A</ins>X<ins>C</ins><del>D</del>
861
# <ins>A</ins><del>B</del>X<ins>C</ins>
862
# <ins>A</del>X<ins>C</ins><del>D</del>
863
# <ins>A</ins><del>B</del>X<del>C</del>
865
if lastequality and ((pre_ins and pre_del and post_ins and post_del) or
866
((len(lastequality) < self.Diff_EditCost / 2) and
867
(pre_ins + pre_del + post_ins + post_del) == 3)):
869
diffs.insert(equalities[-1],
870
(self.DIFF_DELETE, lastequality))
871
# Change second copy to insert.
872
diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
873
diffs[equalities[-1] + 1][1])
874
equalities.pop() # Throw away the equality we just deleted
876
if pre_ins and pre_del:
877
# No changes made which could affect previous entry,
879
post_ins = post_del = True
883
equalities.pop() # Throw away the previous equality
885
pointer = equalities[-1]
888
post_ins = post_del = False
893
self.diff_cleanupMerge(diffs)
895
def diff_cleanupMerge(self, diffs):
896
"""Reorder and merge like edit sections. Merge equalities. Any edit
897
section can move as long as it doesn't cross an equality.
900
diffs: Array of diff tuples.
903
diffs.append((self.DIFF_EQUAL, '')) # Add a dummy entry at the end.
909
while pointer < len(diffs):
910
if diffs[pointer][0] == self.DIFF_INSERT:
912
text_insert += diffs[pointer][1]
914
elif diffs[pointer][0] == self.DIFF_DELETE:
916
text_delete += diffs[pointer][1]
918
elif diffs[pointer][0] == self.DIFF_EQUAL:
919
# Upon reaching an equality, check for prior redundancies.
920
if count_delete != 0 or count_insert != 0:
921
if count_delete != 0 and count_insert != 0:
922
# Factor out any common prefixies.
923
commonlength = self.diff_commonPrefix(
924
text_insert, text_delete)
925
if commonlength != 0:
926
x = pointer - count_delete - count_insert - 1
927
if x >= 0 and diffs[x][0] == self.DIFF_EQUAL:
928
diffs[x] = (diffs[x][0], diffs[x][1] +
929
text_insert[:commonlength])
932
0, (self.DIFF_EQUAL, text_insert[:commonlength]))
934
text_insert = text_insert[commonlength:]
935
text_delete = text_delete[commonlength:]
936
# Factor out any common suffixies.
937
commonlength = self.diff_commonSuffix(
938
text_insert, text_delete)
939
if commonlength != 0:
940
diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] +
942
text_insert = text_insert[:-commonlength]
943
text_delete = text_delete[:-commonlength]
944
# Delete the offending records and add the merged ones.
945
if count_delete == 0:
946
diffs[pointer - count_insert: pointer] = [
947
(self.DIFF_INSERT, text_insert)]
948
elif count_insert == 0:
949
diffs[pointer - count_delete: pointer] = [
950
(self.DIFF_DELETE, text_delete)]
952
diffs[pointer - count_delete - count_insert: pointer] = [
953
(self.DIFF_DELETE, text_delete),
954
(self.DIFF_INSERT, text_insert)]
955
pointer = pointer - count_delete - count_insert + 1
956
if count_delete != 0:
958
if count_insert != 0:
960
elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL:
961
# Merge this equality with the previous one.
962
diffs[pointer - 1] = (diffs[pointer - 1][0],
963
diffs[pointer - 1][1] + diffs[pointer][1])
973
if diffs[-1][1] == '':
974
diffs.pop() # Remove the dummy entry at the end.
976
# Second pass: look for single edits surrounded on both sides by equalities
977
# which can be shifted sideways to eliminate an equality.
978
# e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
981
# Intentionally ignore the first and last element (don't need
983
while pointer < len(diffs) - 1:
984
if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
985
diffs[pointer + 1][0] == self.DIFF_EQUAL):
986
# This is a single edit surrounded by equalities.
987
if diffs[pointer][1].endswith(diffs[pointer - 1][1]):
988
# Shift the edit over the previous equality.
989
diffs[pointer] = (diffs[pointer][0],
990
diffs[pointer - 1][1] +
991
diffs[pointer][1][:-len(diffs[pointer - 1][1])])
992
diffs[pointer + 1] = (diffs[pointer + 1][0],
993
diffs[pointer - 1][1] + diffs[pointer + 1][1])
994
del diffs[pointer - 1]
996
elif diffs[pointer][1].startswith(diffs[pointer + 1][1]):
997
# Shift the edit over the next equality.
998
diffs[pointer - 1] = (diffs[pointer - 1][0],
999
diffs[pointer - 1][1] + diffs[pointer + 1][1])
1000
diffs[pointer] = (diffs[pointer][0],
1001
diffs[pointer][1][len(diffs[pointer + 1][1]):] +
1002
diffs[pointer + 1][1])
1003
del diffs[pointer + 1]
1007
# If shifts were made, the diff needs reordering and another shift
1010
self.diff_cleanupMerge(diffs)
1012
def diff_xIndex(self, diffs, loc):
1013
"""loc is a location in text1, compute and return the equivalent location
1014
in text2. e.g. "The cat" vs "The big cat", 1->1, 5->8
1017
diffs: Array of diff tuples.
1018
loc: Location within text1.
1021
Location within text2.
1027
for x in xrange(len(diffs)):
1028
(op, text) = diffs[x]
1029
if op != self.DIFF_INSERT: # Equality or deletion.
1031
if op != self.DIFF_DELETE: # Equality or insertion.
1033
if chars1 > loc: # Overshot the location.
1035
last_chars1 = chars1
1036
last_chars2 = chars2
1038
if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE:
1039
# The location was deleted.
1041
# Add the remaining len(character).
1042
return last_chars2 + (loc - last_chars1)
1044
def diff_prettyHtml(self, diffs):
1045
"""Convert a diff array into a pretty HTML report.
1048
diffs: Array of diff tuples.
1051
HTML representation.
1056
for (op, data) in diffs:
1057
text = (data.replace('&', '&').replace('<', '<')
1058
.replace('>', '>').replace('\n', '¶<BR>'))
1059
if op == self.DIFF_INSERT:
1060
html.append("<SPAN STYLE=\"color: #00FF00;\" TITLE=\"i=%i\">%s</SPAN>"
1062
elif op == self.DIFF_DELETE:
1063
html.append("<SPAN STYLE=\"color: #FF0000;\" TITLE=\"i=%i\">%s</SPAN>"
1065
elif op == self.DIFF_EQUAL:
1066
html.append("<SPAN TITLE=\"i=%i\">%s</SPAN>" % (i, text))
1067
if op != self.DIFF_DELETE:
1069
return ''.join(html)
1071
def diff_text1(self, diffs):
1072
"""Compute and return the source text (all equalities and deletions).
1075
diffs: Array of diff tuples.
1082
for (op, data) in diffs:
1083
if op != self.DIFF_INSERT:
1085
return ''.join(text)
1087
def diff_text2(self, diffs):
1088
"""Compute and return the destination text (all equalities and
1092
diffs: Array of diff tuples.
1099
for (op, data) in diffs:
1100
if op != self.DIFF_DELETE:
1102
return ''.join(text)
1104
def diff_levenshtein(self, diffs):
1105
"""Compute the Levenshtein distance; the number of inserted, deleted or
1106
substituted characters.
1109
diffs: Array of diff tuples.
1118
for (op, data) in diffs:
1119
if op == self.DIFF_INSERT:
1120
insertions += len(data)
1121
elif op == self.DIFF_DELETE:
1122
deletions += len(data)
1123
elif op == self.DIFF_EQUAL:
1124
# A deletion and an insertion is one substitution.
1125
levenshtein += max(insertions, deletions)
1128
levenshtein += max(insertions, deletions)
1131
def diff_toDelta(self, diffs):
1132
"""Crush the diff into an encoded string which describes the operations
1133
required to transform text1 into text2.
1134
E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'.
1135
Operations are tab-separated. Inserted text is escaped using %xx notation.
1138
diffs: Array of diff tuples.
1144
for (op, data) in diffs:
1145
if op == self.DIFF_INSERT:
1146
# High ascii will raise UnicodeDecodeError. Use Unicode
1148
data = data.encode('utf-8')
1149
text.append('+' + urllib.quote(data, "!~*'();/?:@&=+$,# "))
1150
elif op == self.DIFF_DELETE:
1151
text.append('-%d' % len(data))
1152
elif op == self.DIFF_EQUAL:
1153
text.append('=%d' % len(data))
1154
return '\t'.join(text)
1156
def diff_fromDelta(self, text1, delta):
1157
"""Given the original text1, and an encoded string which describes the
1158
operations required to transform text1 into text2, compute the full
1162
text1: Source string for the diff.
1166
Array of diff tuples.
1169
ValueError: If invalid input.
1172
if type(delta) == unicode:
1173
# Deltas should be composed of a subset of ascii chars, Unicode not
1174
# required. If this encode raises UnicodeEncodeError, delta is
1176
delta = delta.encode('ascii')
1178
pointer = 0 # Cursor in text1
1179
tokens = delta.split('\t')
1180
for token in tokens:
1182
# Blank tokens are ok (from a trailing \t).
1184
# Each token begins with a one character parameter which specifies the
1185
# operation of this token (delete, insert, equality).
1188
param = urllib.unquote(param).decode('utf-8')
1189
diffs.append((self.DIFF_INSERT, param))
1190
elif token[0] == '-' or token[0] == '=':
1194
raise ValueError, 'Invalid number in diff_fromDelta: ' + param
1196
raise ValueError, 'Negative number in diff_fromDelta: ' + param
1197
text = text1[pointer: pointer + n]
1200
diffs.append((self.DIFF_EQUAL, text))
1202
diffs.append((self.DIFF_DELETE, text))
1204
# Anything else is an error.
1205
raise ValueError, ('Invalid diff operation in diff_fromDelta: ' +
1207
if pointer != len(text1):
1209
'Delta length (%d) does not equal source text length (%d).' %
1210
(pointer, len(text1)))
1215
def match_main(self, text, pattern, loc):
1216
"""Locate the best instance of 'pattern' in 'text' near 'loc'.
1219
text: The text to search.
1220
pattern: The pattern to search for.
1221
loc: The location to search around.
1224
Best match index or -1.
1227
loc = max(0, min(loc, len(text)))
1229
# Shortcut (potentially not guaranteed by the algorithm)
1234
elif text[loc:loc + len(pattern)] == pattern:
1235
# Perfect match at the perfect spot! (Includes case of null
1239
# Do a fuzzy compare.
1240
match = self.match_bitap(text, pattern, loc)
1243
def match_bitap(self, text, pattern, loc):
1244
"""Locate the best instance of 'pattern' in 'text' near 'loc' using the
1248
text: The text to search.
1249
pattern: The pattern to search for.
1250
loc: The location to search around.
1253
Best match index or -1.
1256
# Python doesn't have a maxint limit, so ignore this check.
1257
# if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits:
1258
# raise ValueError("Pattern too long for this application.")
1260
# Initialise the alphabet.
1261
s = self.match_alphabet(pattern)
1263
def match_bitapScore(e, x):
1264
"""Compute and return the score for a match with e errors and x
1265
location. Accesses loc and pattern through being a closure.
1268
e: Number of errors in match.
1269
x: Location of match.
1272
Overall score for match (0.0 = good, 1.0 = bad).
1275
accuracy = float(e) / len(pattern)
1276
proximity = abs(loc - x)
1277
if not self.Match_Distance:
1278
# Dodge divide by zero error.
1279
return proximity and 1.0 or accuracy
1280
return accuracy + (proximity / float(self.Match_Distance))
1282
# Highest score beyond which we give up.
1283
score_threshold = self.Match_Threshold
1284
# Is there a nearby exact match? (speedup)
1285
best_loc = text.find(pattern, loc)
1287
score_threshold = min(match_bitapScore(
1288
0, best_loc), score_threshold)
1289
# What about in the other direction? (speedup)
1290
best_loc = text.rfind(pattern, loc + len(pattern))
1292
score_threshold = min(match_bitapScore(
1293
0, best_loc), score_threshold)
1295
# Initialise the bit arrays.
1296
matchmask = 1 << (len(pattern) - 1)
1299
bin_max = len(pattern) + len(text)
1300
# Empty initialization added to appease pychecker.
1302
for d in xrange(len(pattern)):
1303
# Scan for the best match each iteration allows for one more error.
1304
# Run a binary search to determine how far from 'loc' we can stray at
1308
while bin_min < bin_mid:
1309
if match_bitapScore(d, loc + bin_mid) <= score_threshold:
1313
bin_mid = (bin_max - bin_min) / 2 + bin_min
1315
# Use the result from this iteration as the maximum for the next.
1317
start = max(1, loc - bin_mid + 1)
1318
finish = min(loc + bin_mid, len(text)) + len(pattern)
1320
rd = range(finish + 1)
1321
rd.append((1 << d) - 1)
1322
for j in xrange(finish, start - 1, -1):
1323
if len(text) <= j - 1:
1327
charMatch = s.get(text[j - 1], 0)
1328
if d == 0: # First pass: exact match.
1329
rd[j] = ((rd[j + 1] << 1) | 1) & charMatch
1330
else: # Subsequent passes: fuzzy match.
1331
rd[j] = ((rd[j + 1] << 1) | 1) & charMatch | (
1332
((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]
1333
if rd[j] & matchmask:
1334
score = match_bitapScore(d, j - 1)
1335
# This match will almost certainly be better than any existing match.
1337
if score <= score_threshold:
1339
score_threshold = score
1342
# When passing loc, don't exceed our current
1343
# distance from loc.
1344
start = max(1, 2 * loc - best_loc)
1346
# Already passed loc, downhill from here on in.
1348
# No hope for a (better) match at greater error levels.
1349
if match_bitapScore(d + 1, loc) > score_threshold:
1354
def match_alphabet(self, pattern):
1355
"""Initialise the alphabet for the Bitap algorithm.
1358
pattern: The text to encode.
1361
Hash of character locations.
1365
for char in pattern:
1367
for i in xrange(len(pattern)):
1368
s[pattern[i]] |= 1 << (len(pattern) - i - 1)
1373
def patch_addContext(self, patch, text):
1374
"""Increase the context until it is unique, but don't let the pattern
1375
expand beyond Match_MaxBits.
1378
patch: The patch to grow.
1384
pattern = text[patch.start2: patch.start2 + patch.length1]
1387
# Look for the first and last matches of pattern in text. If two different
1388
# matches are found, increase the pattern length.
1389
while (text.find(pattern) != text.rfind(pattern) and (self.Match_MaxBits ==
1390
0 or len(pattern) < self.Match_MaxBits - self.Patch_Margin -
1391
self.Patch_Margin)):
1392
padding += self.Patch_Margin
1393
pattern = text[max(0, patch.start2 - padding):
1394
patch.start2 + patch.length1 + padding]
1395
# Add one chunk for good luck.
1396
padding += self.Patch_Margin
1399
prefix = text[max(0, patch.start2 - padding): patch.start2]
1401
patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)]
1403
suffix = text[patch.start2 + patch.length1:
1404
patch.start2 + patch.length1 + padding]
1406
patch.diffs.append((self.DIFF_EQUAL, suffix))
1408
# Roll back the start points.
1409
patch.start1 -= len(prefix)
1410
patch.start2 -= len(prefix)
1412
patch.length1 += len(prefix) + len(suffix)
1413
patch.length2 += len(prefix) + len(suffix)
1415
def patch_make(self, a, b=None, c=None):
1416
"""Compute a list of patches to turn text1 into text2.
1417
Use diffs if provided, otherwise compute it ourselves.
1418
There are four ways to call this function, depending on what data is
1419
available to the caller:
1421
a = text1, b = text2
1425
a = text1, b = diffs
1426
Method 4 (deprecated, use method 3):
1427
a = text1, b = text2, c = diffs
1430
a: text1 (methods 1,3,4) or Array of diff tuples for text1 to
1432
b: text2 (methods 1,4) or Array of diff tuples for text1 to
1433
text2 (method 3) or undefined (method 2).
1434
c: Array of diff tuples for text1 to text2 (method 4) or
1435
undefined (methods 1,2,3).
1438
Array of patch objects.
1442
# Note that texts may arrive as 'str' or 'unicode'.
1443
if isinstance(a, basestring) and isinstance(b, basestring) and c is None:
1444
# Method 1: text1, text2
1445
# Compute diffs from text1 and text2.
1447
diffs = self.diff_main(text1, b, True)
1449
self.diff_cleanupSemantic(diffs)
1450
self.diff_cleanupEfficiency(diffs)
1451
elif isinstance(a, list) and b is None and c is None:
1453
# Compute text1 from diffs.
1455
text1 = self.diff_text1(diffs)
1456
elif isinstance(a, basestring) and isinstance(b, list) and c is None:
1457
# Method 3: text1, diffs
1460
elif (isinstance(a, basestring) and isinstance(b, basestring) and
1461
isinstance(c, list)):
1462
# Method 4: text1, text2, diffs
1463
# text2 is not used.
1467
raise ValueError('Unknown call format to patch_make.')
1470
return [] # Get rid of the None case.
1473
char_count1 = 0 # Number of characters into the text1 string.
1474
char_count2 = 0 # Number of characters into the text2 string.
1475
# Recreate the patches to determine context info.
1476
prepatch_text = text1
1477
postpatch_text = text1
1478
for x in xrange(len(diffs)):
1479
(diff_type, diff_text) = diffs[x]
1480
if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL:
1481
# A new patch starts here.
1482
patch.start1 = char_count1
1483
patch.start2 = char_count2
1484
if diff_type == self.DIFF_INSERT:
1486
patch.diffs.append(diffs[x])
1487
patch.length2 += len(diff_text)
1488
postpatch_text = (postpatch_text[:char_count2] + diff_text +
1489
postpatch_text[char_count2:])
1490
elif diff_type == self.DIFF_DELETE:
1492
patch.length1 += len(diff_text)
1493
patch.diffs.append(diffs[x])
1494
postpatch_text = (postpatch_text[:char_count2] +
1495
postpatch_text[char_count2 + len(diff_text):])
1496
elif (diff_type == self.DIFF_EQUAL and
1497
len(diff_text) <= 2 * self.Patch_Margin and
1498
len(patch.diffs) != 0 and len(diffs) != x + 1):
1499
# Small equality inside a patch.
1500
patch.diffs.append(diffs[x])
1501
patch.length1 += len(diff_text)
1502
patch.length2 += len(diff_text)
1504
if (diff_type == self.DIFF_EQUAL and
1505
len(diff_text) >= 2 * self.Patch_Margin):
1506
# Time for a new patch.
1507
if len(patch.diffs) != 0:
1508
self.patch_addContext(patch, prepatch_text)
1509
patches.append(patch)
1511
# Unlike Unidiff, our patch lists have a rolling context.
1512
# http://code.google.com/p/google-diff-match-patch/wiki/Unidiff
1513
# Update prepatch text & pos to reflect the application of the
1514
# just completed patch.
1515
prepatch_text = postpatch_text
1516
char_count1 = char_count2
1518
# Update the current character count.
1519
if diff_type != self.DIFF_INSERT:
1520
char_count1 += len(diff_text)
1521
if diff_type != self.DIFF_DELETE:
1522
char_count2 += len(diff_text)
1524
# Pick up the leftover patch if not empty.
1525
if len(patch.diffs) != 0:
1526
self.patch_addContext(patch, prepatch_text)
1527
patches.append(patch)
1530
def patch_deepCopy(self, patches):
1531
"""Given an array of patches, return another array that is identical.
1534
patches: Array of patch objects.
1537
Array of patch objects.
1541
for patch in patches:
1542
patchCopy = patch_obj()
1543
# No need to deep copy the tuples since they are immutable.
1544
patchCopy.diffs = patch.diffs[:]
1545
patchCopy.start1 = patch.start1
1546
patchCopy.start2 = patch.start2
1547
patchCopy.length1 = patch.length1
1548
patchCopy.length2 = patch.length2
1549
patchesCopy.append(patchCopy)
1552
def patch_apply(self, patches, text):
1553
"""Merge a set of patches onto the text. Return a patched text, as
1554
well as a list of true/false values indicating which patches were
1558
patches: Array of patch objects.
1562
Two element Array, containing the new text and an array of boolean values.
1568
# Deep copy the patches so that no changes are made to originals.
1569
patches = self.patch_deepCopy(patches)
1571
nullPadding = self.patch_addPadding(patches)
1572
text = nullPadding + text + nullPadding
1573
self.patch_splitMax(patches)
1575
# delta keeps track of the offset between the expected and actual location
1576
# of the previous patch. If there are patches expected at positions 10 and
1577
# 20, but the first patch was found at 12, delta is 2 and the second patch
1578
# has an effective expected position of 22.
1581
for patch in patches:
1582
expected_loc = patch.start2 + delta
1583
text1 = self.diff_text1(patch.diffs)
1585
if len(text1) > self.Match_MaxBits:
1586
# patch_splitMax will only provide an oversized pattern in the case of
1588
start_loc = self.match_main(text, text1[:self.Match_MaxBits],
1591
end_loc = self.match_main(text, text1[-self.Match_MaxBits:],
1592
expected_loc + len(text1) - self.Match_MaxBits)
1593
if end_loc == -1 or start_loc >= end_loc:
1594
# Can't find valid trailing context. Drop this patch.
1597
start_loc = self.match_main(text, text1, expected_loc)
1599
# No match found. :(
1600
results.append(False)
1601
# Subtract the delta for this failed patch from subsequent
1603
delta -= patch.length2 - patch.length1
1606
results.append(True)
1607
delta = start_loc - expected_loc
1609
text2 = text[start_loc: start_loc + len(text1)]
1611
text2 = text[start_loc: end_loc + self.Match_MaxBits]
1613
# Perfect match, just shove the replacement text in.
1614
text = (text[:start_loc] + self.diff_text2(patch.diffs) +
1615
text[start_loc + len(text1):])
1618
# Run a diff to get a framework of equivalent indices.
1619
diffs = self.diff_main(text1, text2, False)
1620
if (len(text1) > self.Match_MaxBits and
1621
self.diff_levenshtein(diffs) / float(len(text1)) >
1622
self.Patch_DeleteThreshold):
1623
# The end points match, but the content is unacceptably
1627
self.diff_cleanupSemanticLossless(diffs)
1629
for (op, data) in patch.diffs:
1630
if op != self.DIFF_EQUAL:
1631
index2 = self.diff_xIndex(diffs, index1)
1632
if op == self.DIFF_INSERT: # Insertion
1633
text = text[:start_loc + index2] + data + text[start_loc +
1635
elif op == self.DIFF_DELETE: # Deletion
1636
text = text[:start_loc + index2] + text[start_loc +
1637
self.diff_xIndex(diffs, index1 + len(data)):]
1638
if op != self.DIFF_DELETE:
1640
# Strip the padding off.
1641
text = text[len(nullPadding):-len(nullPadding)]
1642
return (text, results)
1644
def patch_addPadding(self, patches):
1645
"""Add some padding on text start and end so that edges can match
1646
something. Intended to be called only from within patch_apply.
1649
patches: Array of patch objects.
1652
The padding string added to each side.
1655
paddingLength = self.Patch_Margin
1657
for x in xrange(1, paddingLength + 1):
1658
nullPadding += chr(x)
1660
# Bump all the patches forward.
1661
for patch in patches:
1662
patch.start1 += paddingLength
1663
patch.start2 += paddingLength
1665
# Add some padding on start of first diff.
1668
if not diffs or diffs[0][0] != self.DIFF_EQUAL:
1669
# Add nullPadding equality.
1670
diffs.insert(0, (self.DIFF_EQUAL, nullPadding))
1671
patch.start1 -= paddingLength # Should be 0.
1672
patch.start2 -= paddingLength # Should be 0.
1673
patch.length1 += paddingLength
1674
patch.length2 += paddingLength
1675
elif paddingLength > len(diffs[0][1]):
1676
# Grow first equality.
1677
extraLength = paddingLength - len(diffs[0][1])
1678
newText = nullPadding[len(diffs[0][1]):] + diffs[0][1]
1679
diffs[0] = (diffs[0][0], newText)
1680
patch.start1 -= extraLength
1681
patch.start2 -= extraLength
1682
patch.length1 += extraLength
1683
patch.length2 += extraLength
1685
# Add some padding on end of last diff.
1688
if not diffs or diffs[-1][0] != self.DIFF_EQUAL:
1689
# Add nullPadding equality.
1690
diffs.append((self.DIFF_EQUAL, nullPadding))
1691
patch.length1 += paddingLength
1692
patch.length2 += paddingLength
1693
elif paddingLength > len(diffs[-1][1]):
1694
# Grow last equality.
1695
extraLength = paddingLength - len(diffs[-1][1])
1696
newText = diffs[-1][1] + nullPadding[:extraLength]
1697
diffs[-1] = (diffs[-1][0], newText)
1698
patch.length1 += extraLength
1699
patch.length2 += extraLength
1703
def patch_splitMax(self, patches):
1704
"""Look through the patches and break up any which are longer than the
1705
maximum limit of the match algorithm.
1708
patches: Array of patch objects.
1711
if self.Match_MaxBits == 0:
1713
for x in xrange(len(patches)):
1714
if patches[x].length1 > self.Match_MaxBits:
1715
bigpatch = patches[x]
1716
# Remove the big old patch.
1719
patch_size = self.Match_MaxBits
1720
start1 = bigpatch.start1
1721
start2 = bigpatch.start2
1723
while len(bigpatch.diffs) != 0:
1724
# Create one of several smaller patches.
1727
patch.start1 = start1 - len(precontext)
1728
patch.start2 = start2 - len(precontext)
1730
patch.length1 = patch.length2 = len(precontext)
1731
patch.diffs.append((self.DIFF_EQUAL, precontext))
1733
while (len(bigpatch.diffs) != 0 and
1734
patch.length1 < patch_size - self.Patch_Margin):
1735
(diff_type, diff_text) = bigpatch.diffs[0]
1736
if diff_type == self.DIFF_INSERT:
1737
# Insertions are harmless.
1738
patch.length2 += len(diff_text)
1739
start2 += len(diff_text)
1740
patch.diffs.append(bigpatch.diffs.pop(0))
1742
elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and
1743
patch.diffs[0][0] == self.DIFF_EQUAL and
1744
len(diff_text) > 2 * patch_size):
1745
# This is a large deletion. Let it pass in one
1747
patch.length1 += len(diff_text)
1748
start1 += len(diff_text)
1750
patch.diffs.append((diff_type, diff_text))
1751
del bigpatch.diffs[0]
1753
# Deletion or equality. Only take as much as we
1755
diff_text = diff_text[:patch_size - patch.length1 -
1757
patch.length1 += len(diff_text)
1758
start1 += len(diff_text)
1759
if diff_type == self.DIFF_EQUAL:
1760
patch.length2 += len(diff_text)
1761
start2 += len(diff_text)
1765
patch.diffs.append((diff_type, diff_text))
1766
if diff_text == bigpatch.diffs[0][1]:
1767
del bigpatch.diffs[0]
1769
bigpatch.diffs[0] = (bigpatch.diffs[0][0],
1770
bigpatch.diffs[0][1][len(diff_text):])
1772
# Compute the head context for the next patch.
1773
precontext = self.diff_text2(patch.diffs)
1774
precontext = precontext[-self.Patch_Margin:]
1775
# Append the end context for this patch.
1776
postcontext = self.diff_text1(bigpatch.diffs)[
1779
patch.length1 += len(postcontext)
1780
patch.length2 += len(postcontext)
1781
if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:
1782
patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] +
1785
patch.diffs.append((self.DIFF_EQUAL, postcontext))
1789
patches.insert(x, patch)
1791
def patch_toText(self, patches):
1792
"""Take a list of patches and return a textual representation.
1795
patches: Array of patch objects.
1798
Text representation of patches.
1802
for patch in patches:
1803
text.append(str(patch))
1804
return ''.join(text)
1806
def patch_fromText(self, textline):
1807
"""Parse a textual representation of patches and return a list of patch
1811
textline: Text representation of patches.
1814
Array of patch objects.
1817
ValueError: If invalid input.
1820
if type(textline) == unicode:
1821
# Patches should be composed of a subset of ascii chars, Unicode not
1822
# required. If this encode raises UnicodeEncodeError, patch is
1824
textline = textline.encode('ascii')
1828
text = textline.split('\n')
1829
while len(text) != 0:
1830
m = re.match('^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$', text[0])
1832
raise ValueError, 'Invalid patch string: ' + text[0]
1834
patches.append(patch)
1835
patch.start1 = int(m.group(1))
1836
if m.group(2) == '':
1839
elif m.group(2) == '0':
1843
patch.length1 = int(m.group(2))
1845
patch.start2 = int(m.group(3))
1846
if m.group(4) == '':
1849
elif m.group(4) == '0':
1853
patch.length2 = int(m.group(4))
1857
while len(text) != 0:
1862
line = urllib.unquote(text[0][1:])
1863
line = line.decode('utf-8')
1866
patch.diffs.append((self.DIFF_INSERT, line))
1869
patch.diffs.append((self.DIFF_DELETE, line))
1872
patch.diffs.append((self.DIFF_EQUAL, line))
1874
# Start of next patch.
1877
# Blank line? Whatever.
1881
raise ValueError, "Invalid patch mode: '%s'\n%s" % (sign, line)
1887
"""Class representing one patch operation."""
1890
"""Initializes with an empty list of diffs."""
1898
"""Emmulate GNU diff's format.
1899
Header: @@ -382,8 +481,9 @@
1900
Indicies are printed as 1-based, not 0-based.
1903
The GNU diff string.
1905
if self.length1 == 0:
1906
coords1 = str(self.start1) + ',0'
1907
elif self.length1 == 1:
1908
coords1 = str(self.start1 + 1)
1910
coords1 = str(self.start1 + 1) + ',' + str(self.length1)
1911
if self.length2 == 0:
1912
coords2 = str(self.start2) + ',0'
1913
elif self.length2 == 1:
1914
coords2 = str(self.start2 + 1)
1916
coords2 = str(self.start2 + 1) + ',' + str(self.length2)
1917
text = ['@@ -', coords1, ' +', coords2, ' @@\n']
1918
# Escape the body of the patch with %xx notation.
1919
for (op, data) in self.diffs:
1920
if op == diff_match_patch.DIFF_INSERT:
1922
elif op == diff_match_patch.DIFF_DELETE:
1924
elif op == diff_match_patch.DIFF_EQUAL:
1926
# High ascii will raise UnicodeDecodeError. Use Unicode instead.
1927
data = data.encode('utf-8')
1928
text.append(urllib.quote(data, "!~*'();/?:@&=+$,# ") + '\n')
1929
return ''.join(text)