~widelands-dev/widelands-website/trunk

532.1.4 by franku
fixed import of local_urls, fixed wl_markdown, replaced diff_match_patch.py with a python3 compatible version
1
#!/usr/bin/python3
2
3
# Taken from https://github.com/google/diff-match-patch/blob/master/python3/diff_match_patch.py
4
# Applied class attribute in diff_prettyHtml() to get variable coloring
5
6
"""Diff Match and Patch
7
Copyright 2018 The diff-match-patch Authors.
8
https://github.com/google/diff-match-patch
173.2.2 by Holger Rapp
Added diff_match_patch from google, because they do not provide a setup.py
9
10
Licensed under the Apache License, Version 2.0 (the "License");
11
you may not use this file except in compliance with the License.
12
You may obtain a copy of the License at
13
14
  http://www.apache.org/licenses/LICENSE-2.0
15
16
Unless required by applicable law or agreed to in writing, software
17
distributed under the License is distributed on an "AS IS" BASIS,
18
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
See the License for the specific language governing permissions and
20
limitations under the License.
21
"""
22
23
"""Functions for diff, match and patch.
24
25
Computes the difference between two texts to create a patch.
26
Applies the patch onto another text, allowing for errors.
27
"""
28
29
__author__ = 'fraser@google.com (Neil Fraser)'
30
532.1.4 by franku
fixed import of local_urls, fixed wl_markdown, replaced diff_match_patch.py with a python3 compatible version
31
import re
32
import sys
173.2.2 by Holger Rapp
Added diff_match_patch from google, because they do not provide a setup.py
33
import time
532.1.4 by franku
fixed import of local_urls, fixed wl_markdown, replaced diff_match_patch.py with a python3 compatible version
34
import urllib.parse
173.2.2 by Holger Rapp
Added diff_match_patch from google, because they do not provide a setup.py
35
438.1.6 by franku
run the script
36
173.2.2 by Holger Rapp
Added diff_match_patch from google, because they do not provide a setup.py
37
class diff_match_patch:
532.1.4 by franku
fixed import of local_urls, fixed wl_markdown, replaced diff_match_patch.py with a python3 compatible version
38
  """Class containing the diff, match and patch methods.
39
40
  Also contains the behaviour settings.
41
  """
42
43
  def __init__(self):
44
    """Inits a diff_match_patch object with default settings.
45
    Redefine these in your program to override the defaults.
46
    """
47
48
    # Number of seconds to map a diff before giving up (0 for infinity).
49
    self.Diff_Timeout = 1.0
50
    # Cost of an empty edit operation in terms of edit characters.
51
    self.Diff_EditCost = 4
52
    # At what point is no match declared (0.0 = perfection, 1.0 = very loose).
53
    self.Match_Threshold = 0.5
54
    # How far to search for a match (0 = exact location, 1000+ = broad match).
55
    # A match this many characters away from the expected location will add
56
    # 1.0 to the score (0.0 is a perfect match).
57
    self.Match_Distance = 1000
58
    # When deleting a large block of text (over ~64 characters), how close do
59
    # the contents have to be to match the expected contents. (0.0 = perfection,
60
    # 1.0 = very loose).  Note that Match_Threshold controls how closely the
61
    # end points of a delete need to match.
62
    self.Patch_DeleteThreshold = 0.5
63
    # Chunk size for context length.
64
    self.Patch_Margin = 4
65
66
    # The number of bits in an int.
67
    # Python has no maximum, thus to disable patch splitting set to 0.
68
    # However to avoid long patches in certain pathological cases, use 32.
69
    # Multiple short patches (using native ints) are much faster than long ones.
70
    self.Match_MaxBits = 32
71
72
  #  DIFF FUNCTIONS
73
74
  # The data structure representing a diff is an array of tuples:
75
  # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")]
76
  # which means: delete "Hello", add "Goodbye" and keep " world."
77
  DIFF_DELETE = -1
78
  DIFF_INSERT = 1
79
  DIFF_EQUAL = 0
80
81
  def diff_main(self, text1, text2, checklines=True, deadline=None):
82
    """Find the differences between two texts.  Simplifies the problem by
83
      stripping any common prefix or suffix off the texts before diffing.
84
85
    Args:
86
      text1: Old string to be diffed.
87
      text2: New string to be diffed.
88
      checklines: Optional speedup flag.  If present and false, then don't run
89
        a line-level diff first to identify the changed areas.
90
        Defaults to true, which does a faster, slightly less optimal diff.
91
      deadline: Optional time when the diff should be complete by.  Used
92
        internally for recursive calls.  Users should set DiffTimeout instead.
93
94
    Returns:
95
      Array of changes.
96
    """
97
    # Set a deadline by which time the diff must be complete.
98
    if deadline == None:
99
      # Unlike in most languages, Python counts time in seconds.
100
      if self.Diff_Timeout <= 0:
101
        deadline = sys.maxsize
102
      else:
103
        deadline = time.time() + self.Diff_Timeout
104
105
    # Check for null inputs.
106
    if text1 == None or text2 == None:
107
      raise ValueError("Null inputs. (diff_main)")
108
109
    # Check for equality (speedup).
110
    if text1 == text2:
111
      if text1:
112
        return [(self.DIFF_EQUAL, text1)]
113
      return []
114
115
    # Trim off common prefix (speedup).
116
    commonlength = self.diff_commonPrefix(text1, text2)
117
    commonprefix = text1[:commonlength]
118
    text1 = text1[commonlength:]
119
    text2 = text2[commonlength:]
120
121
    # Trim off common suffix (speedup).
122
    commonlength = self.diff_commonSuffix(text1, text2)
123
    if commonlength == 0:
124
      commonsuffix = ''
125
    else:
126
      commonsuffix = text1[-commonlength:]
127
      text1 = text1[:-commonlength]
128
      text2 = text2[:-commonlength]
129
130
    # Compute the diff on the middle block.
131
    diffs = self.diff_compute(text1, text2, checklines, deadline)
132
133
    # Restore the prefix and suffix.
134
    if commonprefix:
135
      diffs[:0] = [(self.DIFF_EQUAL, commonprefix)]
136
    if commonsuffix:
137
      diffs.append((self.DIFF_EQUAL, commonsuffix))
138
    self.diff_cleanupMerge(diffs)
139
    return diffs
140
141
  def diff_compute(self, text1, text2, checklines, deadline):
142
    """Find the differences between two texts.  Assumes that the texts do not
143
      have any common prefix or suffix.
144
145
    Args:
146
      text1: Old string to be diffed.
147
      text2: New string to be diffed.
148
      checklines: Speedup flag.  If false, then don't run a line-level diff
149
        first to identify the changed areas.
150
        If true, then run a faster, slightly less optimal diff.
151
      deadline: Time when the diff should be complete by.
152
153
    Returns:
154
      Array of changes.
155
    """
156
    if not text1:
157
      # Just add some text (speedup).
158
      return [(self.DIFF_INSERT, text2)]
159
160
    if not text2:
161
      # Just delete some text (speedup).
162
      return [(self.DIFF_DELETE, text1)]
163
164
    if len(text1) > len(text2):
165
      (longtext, shorttext) = (text1, text2)
166
    else:
167
      (shorttext, longtext) = (text1, text2)
168
    i = longtext.find(shorttext)
169
    if i != -1:
170
      # Shorter text is inside the longer text (speedup).
171
      diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext),
172
               (self.DIFF_INSERT, longtext[i + len(shorttext):])]
173
      # Swap insertions for deletions if diff is reversed.
174
      if len(text1) > len(text2):
175
        diffs[0] = (self.DIFF_DELETE, diffs[0][1])
176
        diffs[2] = (self.DIFF_DELETE, diffs[2][1])
177
      return diffs
178
179
    if len(shorttext) == 1:
180
      # Single character string.
181
      # After the previous speedup, the character can't be an equality.
182
      return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
183
184
    # Check to see if the problem can be split in two.
185
    hm = self.diff_halfMatch(text1, text2)
186
    if hm:
187
      # A half-match was found, sort out the return data.
188
      (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
189
      # Send both pairs off for separate processing.
190
      diffs_a = self.diff_main(text1_a, text2_a, checklines, deadline)
191
      diffs_b = self.diff_main(text1_b, text2_b, checklines, deadline)
192
      # Merge the results.
193
      return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b
194
195
    if checklines and len(text1) > 100 and len(text2) > 100:
196
      return self.diff_lineMode(text1, text2, deadline)
197
198
    return self.diff_bisect(text1, text2, deadline)
199
200
  def diff_lineMode(self, text1, text2, deadline):
201
    """Do a quick line-level diff on both strings, then rediff the parts for
202
      greater accuracy.
203
      This speedup can produce non-minimal diffs.
204
205
    Args:
206
      text1: Old string to be diffed.
207
      text2: New string to be diffed.
208
      deadline: Time when the diff should be complete by.
209
210
    Returns:
211
      Array of changes.
212
    """
213
214
    # Scan the text on a line-by-line basis first.
215
    (text1, text2, linearray) = self.diff_linesToChars(text1, text2)
216
217
    diffs = self.diff_main(text1, text2, False, deadline)
218
219
    # Convert the diff back to original text.
220
    self.diff_charsToLines(diffs, linearray)
221
    # Eliminate freak matches (e.g. blank lines)
222
    self.diff_cleanupSemantic(diffs)
223
224
    # Rediff any replacement blocks, this time character-by-character.
225
    # Add a dummy entry at the end.
226
    diffs.append((self.DIFF_EQUAL, ''))
227
    pointer = 0
228
    count_delete = 0
229
    count_insert = 0
230
    text_delete = ''
231
    text_insert = ''
232
    while pointer < len(diffs):
233
      if diffs[pointer][0] == self.DIFF_INSERT:
234
        count_insert += 1
235
        text_insert += diffs[pointer][1]
236
      elif diffs[pointer][0] == self.DIFF_DELETE:
237
        count_delete += 1
238
        text_delete += diffs[pointer][1]
239
      elif diffs[pointer][0] == self.DIFF_EQUAL:
240
        # Upon reaching an equality, check for prior redundancies.
241
        if count_delete >= 1 and count_insert >= 1:
242
          # Delete the offending records and add the merged ones.
243
          subDiff = self.diff_main(text_delete, text_insert, False, deadline)
244
          diffs[pointer - count_delete - count_insert : pointer] = subDiff
245
          pointer = pointer - count_delete - count_insert + len(subDiff)
246
        count_insert = 0
247
        count_delete = 0
248
        text_delete = ''
249
        text_insert = ''
250
251
      pointer += 1
252
253
    diffs.pop()  # Remove the dummy entry at the end.
254
255
    return diffs
256
257
  def diff_bisect(self, text1, text2, deadline):
258
    """Find the 'middle snake' of a diff, split the problem in two
259
      and return the recursively constructed diff.
260
      See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
261
262
    Args:
263
      text1: Old string to be diffed.
264
      text2: New string to be diffed.
265
      deadline: Time at which to bail if not yet complete.
266
267
    Returns:
268
      Array of diff tuples.
269
    """
270
271
    # Cache the text lengths to prevent multiple calls.
272
    text1_length = len(text1)
273
    text2_length = len(text2)
274
    max_d = (text1_length + text2_length + 1) // 2
275
    v_offset = max_d
276
    v_length = 2 * max_d
277
    v1 = [-1] * v_length
278
    v1[v_offset + 1] = 0
279
    v2 = v1[:]
280
    delta = text1_length - text2_length
281
    # If the total number of characters is odd, then the front path will
282
    # collide with the reverse path.
283
    front = (delta % 2 != 0)
284
    # Offsets for start and end of k loop.
285
    # Prevents mapping of space beyond the grid.
286
    k1start = 0
287
    k1end = 0
288
    k2start = 0
289
    k2end = 0
290
    for d in range(max_d):
291
      # Bail out if deadline is reached.
292
      if time.time() > deadline:
293
        break
294
295
      # Walk the front path one step.
296
      for k1 in range(-d + k1start, d + 1 - k1end, 2):
297
        k1_offset = v_offset + k1
298
        if k1 == -d or (k1 != d and
299
            v1[k1_offset - 1] < v1[k1_offset + 1]):
300
          x1 = v1[k1_offset + 1]
301
        else:
302
          x1 = v1[k1_offset - 1] + 1
303
        y1 = x1 - k1
304
        while (x1 < text1_length and y1 < text2_length and
305
               text1[x1] == text2[y1]):
306
          x1 += 1
307
          y1 += 1
308
        v1[k1_offset] = x1
309
        if x1 > text1_length:
310
          # Ran off the right of the graph.
311
          k1end += 2
312
        elif y1 > text2_length:
313
          # Ran off the bottom of the graph.
314
          k1start += 2
315
        elif front:
316
          k2_offset = v_offset + delta - k1
317
          if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] != -1:
318
            # Mirror x2 onto top-left coordinate system.
319
            x2 = text1_length - v2[k2_offset]
320
            if x1 >= x2:
321
              # Overlap detected.
322
              return self.diff_bisectSplit(text1, text2, x1, y1, deadline)
323
324
      # Walk the reverse path one step.
325
      for k2 in range(-d + k2start, d + 1 - k2end, 2):
326
        k2_offset = v_offset + k2
327
        if k2 == -d or (k2 != d and
328
            v2[k2_offset - 1] < v2[k2_offset + 1]):
329
          x2 = v2[k2_offset + 1]
330
        else:
331
          x2 = v2[k2_offset - 1] + 1
332
        y2 = x2 - k2
333
        while (x2 < text1_length and y2 < text2_length and
334
               text1[-x2 - 1] == text2[-y2 - 1]):
335
          x2 += 1
336
          y2 += 1
337
        v2[k2_offset] = x2
338
        if x2 > text1_length:
339
          # Ran off the left of the graph.
340
          k2end += 2
341
        elif y2 > text2_length:
342
          # Ran off the top of the graph.
343
          k2start += 2
344
        elif not front:
345
          k1_offset = v_offset + delta - k2
346
          if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] != -1:
347
            x1 = v1[k1_offset]
348
            y1 = v_offset + x1 - k1_offset
349
            # Mirror x2 onto top-left coordinate system.
350
            x2 = text1_length - x2
351
            if x1 >= x2:
352
              # Overlap detected.
353
              return self.diff_bisectSplit(text1, text2, x1, y1, deadline)
354
355
    # Diff took too long and hit the deadline or
356
    # number of diffs equals number of characters, no commonality at all.
357
    return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
358
359
  def diff_bisectSplit(self, text1, text2, x, y, deadline):
360
    """Given the location of the 'middle snake', split the diff in two parts
361
    and recurse.
362
363
    Args:
364
      text1: Old string to be diffed.
365
      text2: New string to be diffed.
366
      x: Index of split point in text1.
367
      y: Index of split point in text2.
368
      deadline: Time at which to bail if not yet complete.
369
370
    Returns:
371
      Array of diff tuples.
372
    """
373
    text1a = text1[:x]
374
    text2a = text2[:y]
375
    text1b = text1[x:]
376
    text2b = text2[y:]
377
378
    # Compute both diffs serially.
379
    diffs = self.diff_main(text1a, text2a, False, deadline)
380
    diffsb = self.diff_main(text1b, text2b, False, deadline)
381
382
    return diffs + diffsb
383
384
  def diff_linesToChars(self, text1, text2):
385
    """Split two texts into an array of strings.  Reduce the texts to a string
386
    of hashes where each Unicode character represents one line.
387
388
    Args:
389
      text1: First string.
390
      text2: Second string.
391
392
    Returns:
393
      Three element tuple, containing the encoded text1, the encoded text2 and
394
      the array of unique strings.  The zeroth element of the array of unique
395
      strings is intentionally blank.
396
    """
397
    lineArray = []  # e.g. lineArray[4] == "Hello\n"
398
    lineHash = {}   # e.g. lineHash["Hello\n"] == 4
399
400
    # "\x00" is a valid character, but various debuggers don't like it.
401
    # So we'll insert a junk entry to avoid generating a null character.
402
    lineArray.append('')
403
404
    def diff_linesToCharsMunge(text):
405
      """Split a text into an array of strings.  Reduce the texts to a string
406
      of hashes where each Unicode character represents one line.
407
      Modifies linearray and linehash through being a closure.
408
409
      Args:
410
        text: String to encode.
411
412
      Returns:
413
        Encoded string.
414
      """
415
      chars = []
416
      # Walk the text, pulling out a substring for each line.
417
      # text.split('\n') would would temporarily double our memory footprint.
418
      # Modifying text would create many large strings to garbage collect.
419
      lineStart = 0
420
      lineEnd = -1
421
      while lineEnd < len(text) - 1:
422
        lineEnd = text.find('\n', lineStart)
423
        if lineEnd == -1:
424
          lineEnd = len(text) - 1
425
        line = text[lineStart:lineEnd + 1]
426
427
        if line in lineHash:
428
          chars.append(chr(lineHash[line]))
429
        else:
430
          if len(lineArray) == maxLines:
431
            # Bail out at 1114111 because chr(1114112) throws.
432
            line = text[lineStart:]
433
            lineEnd = len(text)
434
          lineArray.append(line)
435
          lineHash[line] = len(lineArray) - 1
436
          chars.append(chr(len(lineArray) - 1))
437
        lineStart = lineEnd + 1
438
      return "".join(chars)
439
440
    # Allocate 2/3rds of the space for text1, the rest for text2.
441
    maxLines = 666666
442
    chars1 = diff_linesToCharsMunge(text1)
443
    maxLines = 1114111
444
    chars2 = diff_linesToCharsMunge(text2)
445
    return (chars1, chars2, lineArray)
446
447
  def diff_charsToLines(self, diffs, lineArray):
448
    """Rehydrate the text in a diff from a string of line hashes to real lines
449
    of text.
450
451
    Args:
452
      diffs: Array of diff tuples.
453
      lineArray: Array of unique strings.
454
    """
455
    for i in range(len(diffs)):
456
      text = []
457
      for char in diffs[i][1]:
458
        text.append(lineArray[ord(char)])
459
      diffs[i] = (diffs[i][0], "".join(text))
460
461
  def diff_commonPrefix(self, text1, text2):
462
    """Determine the common prefix of two strings.
463
464
    Args:
465
      text1: First string.
466
      text2: Second string.
467
468
    Returns:
469
      The number of characters common to the start of each string.
470
    """
471
    # Quick check for common null cases.
472
    if not text1 or not text2 or text1[0] != text2[0]:
473
      return 0
474
    # Binary search.
475
    # Performance analysis: https://neil.fraser.name/news/2007/10/09/
476
    pointermin = 0
477
    pointermax = min(len(text1), len(text2))
478
    pointermid = pointermax
479
    pointerstart = 0
480
    while pointermin < pointermid:
481
      if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]:
482
        pointermin = pointermid
483
        pointerstart = pointermin
484
      else:
485
        pointermax = pointermid
486
      pointermid = (pointermax - pointermin) // 2 + pointermin
487
    return pointermid
488
489
  def diff_commonSuffix(self, text1, text2):
490
    """Determine the common suffix of two strings.
491
492
    Args:
493
      text1: First string.
494
      text2: Second string.
495
496
    Returns:
497
      The number of characters common to the end of each string.
498
    """
499
    # Quick check for common null cases.
500
    if not text1 or not text2 or text1[-1] != text2[-1]:
501
      return 0
502
    # Binary search.
503
    # Performance analysis: https://neil.fraser.name/news/2007/10/09/
504
    pointermin = 0
505
    pointermax = min(len(text1), len(text2))
506
    pointermid = pointermax
507
    pointerend = 0
508
    while pointermin < pointermid:
509
      if (text1[-pointermid:len(text1) - pointerend] ==
510
          text2[-pointermid:len(text2) - pointerend]):
511
        pointermin = pointermid
512
        pointerend = pointermin
513
      else:
514
        pointermax = pointermid
515
      pointermid = (pointermax - pointermin) // 2 + pointermin
516
    return pointermid
517
518
  def diff_commonOverlap(self, text1, text2):
519
    """Determine if the suffix of one string is the prefix of another.
520
521
    Args:
522
      text1 First string.
523
      text2 Second string.
524
525
    Returns:
526
      The number of characters common to the end of the first
527
      string and the start of the second string.
528
    """
529
    # Cache the text lengths to prevent multiple calls.
530
    text1_length = len(text1)
531
    text2_length = len(text2)
532
    # Eliminate the null case.
533
    if text1_length == 0 or text2_length == 0:
534
      return 0
535
    # Truncate the longer string.
536
    if text1_length > text2_length:
537
      text1 = text1[-text2_length:]
538
    elif text1_length < text2_length:
539
      text2 = text2[:text1_length]
540
    text_length = min(text1_length, text2_length)
541
    # Quick check for the worst case.
542
    if text1 == text2:
543
      return text_length
544
545
    # Start by looking for a single character match
546
    # and increase length until no match is found.
547
    # Performance analysis: https://neil.fraser.name/news/2010/11/04/
548
    best = 0
549
    length = 1
550
    while True:
551
      pattern = text1[-length:]
552
      found = text2.find(pattern)
553
      if found == -1:
554
        return best
555
      length += found
556
      if found == 0 or text1[-length:] == text2[:length]:
557
        best = length
558
        length += 1
559
560
  def diff_halfMatch(self, text1, text2):
561
    """Do the two texts share a substring which is at least half the length of
562
    the longer text?
563
    This speedup can produce non-minimal diffs.
564
565
    Args:
566
      text1: First string.
567
      text2: Second string.
568
569
    Returns:
570
      Five element Array, containing the prefix of text1, the suffix of text1,
571
      the prefix of text2, the suffix of text2 and the common middle.  Or None
572
      if there was no match.
573
    """
574
    if self.Diff_Timeout <= 0:
575
      # Don't risk returning a non-optimal diff if we have unlimited time.
576
      return None
577
    if len(text1) > len(text2):
578
      (longtext, shorttext) = (text1, text2)
579
    else:
580
      (shorttext, longtext) = (text1, text2)
581
    if len(longtext) < 4 or len(shorttext) * 2 < len(longtext):
582
      return None  # Pointless.
583
584
    def diff_halfMatchI(longtext, shorttext, i):
585
      """Does a substring of shorttext exist within longtext such that the
586
      substring is at least half the length of longtext?
587
      Closure, but does not reference any external variables.
588
589
      Args:
590
        longtext: Longer string.
591
        shorttext: Shorter string.
592
        i: Start index of quarter length substring within longtext.
593
594
      Returns:
595
        Five element Array, containing the prefix of longtext, the suffix of
596
        longtext, the prefix of shorttext, the suffix of shorttext and the
597
        common middle.  Or None if there was no match.
598
      """
599
      seed = longtext[i:i + len(longtext) // 4]
600
      best_common = ''
601
      j = shorttext.find(seed)
602
      while j != -1:
603
        prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:])
604
        suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j])
605
        if len(best_common) < suffixLength + prefixLength:
606
          best_common = (shorttext[j - suffixLength:j] +
607
              shorttext[j:j + prefixLength])
608
          best_longtext_a = longtext[:i - suffixLength]
609
          best_longtext_b = longtext[i + prefixLength:]
610
          best_shorttext_a = shorttext[:j - suffixLength]
611
          best_shorttext_b = shorttext[j + prefixLength:]
612
        j = shorttext.find(seed, j + 1)
613
614
      if len(best_common) * 2 >= len(longtext):
615
        return (best_longtext_a, best_longtext_b,
616
                best_shorttext_a, best_shorttext_b, best_common)
617
      else:
438.1.6 by franku
run the script
618
        return None
619
532.1.4 by franku
fixed import of local_urls, fixed wl_markdown, replaced diff_match_patch.py with a python3 compatible version
620
    # First check if the second quarter is the seed for a half-match.
621
    hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) // 4)
622
    # Check again based on the third quarter.
623
    hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) // 2)
624
    if not hm1 and not hm2:
625
      return None
626
    elif not hm2:
627
      hm = hm1
628
    elif not hm1:
629
      hm = hm2
630
    else:
631
      # Both matched.  Select the longest.
632
      if len(hm1[4]) > len(hm2[4]):
633
        hm = hm1
634
      else:
635
        hm = hm2
636
637
    # A half-match was found, sort out the return data.
638
    if len(text1) > len(text2):
639
      (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
640
    else:
641
      (text2_a, text2_b, text1_a, text1_b, mid_common) = hm
642
    return (text1_a, text1_b, text2_a, text2_b, mid_common)
643
644
  def diff_cleanupSemantic(self, diffs):
645
    """Reduce the number of edits by eliminating semantically trivial
646
    equalities.
647
648
    Args:
649
      diffs: Array of diff tuples.
650
    """
651
    changes = False
652
    equalities = []  # Stack of indices where equalities are found.
653
    lastEquality = None  # Always equal to diffs[equalities[-1]][1]
654
    pointer = 0  # Index of current position.
655
    # Number of chars that changed prior to the equality.
656
    length_insertions1, length_deletions1 = 0, 0
657
    # Number of chars that changed after the equality.
658
    length_insertions2, length_deletions2 = 0, 0
659
    while pointer < len(diffs):
660
      if diffs[pointer][0] == self.DIFF_EQUAL:  # Equality found.
661
        equalities.append(pointer)
662
        length_insertions1, length_insertions2 = length_insertions2, 0
663
        length_deletions1, length_deletions2 = length_deletions2, 0
664
        lastEquality = diffs[pointer][1]
665
      else:  # An insertion or deletion.
666
        if diffs[pointer][0] == self.DIFF_INSERT:
667
          length_insertions2 += len(diffs[pointer][1])
668
        else:
669
          length_deletions2 += len(diffs[pointer][1])
670
        # Eliminate an equality that is smaller or equal to the edits on both
671
        # sides of it.
672
        if (lastEquality and (len(lastEquality) <=
673
            max(length_insertions1, length_deletions1)) and
674
            (len(lastEquality) <= max(length_insertions2, length_deletions2))):
675
          # Duplicate record.
676
          diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality))
677
          # Change second copy to insert.
678
          diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
679
              diffs[equalities[-1] + 1][1])
680
          # Throw away the equality we just deleted.
681
          equalities.pop()
682
          # Throw away the previous equality (it needs to be reevaluated).
683
          if len(equalities):
684
            equalities.pop()
685
          if len(equalities):
686
            pointer = equalities[-1]
687
          else:
688
            pointer = -1
689
          # Reset the counters.
690
          length_insertions1, length_deletions1 = 0, 0
691
          length_insertions2, length_deletions2 = 0, 0
692
          lastEquality = None
693
          changes = True
694
      pointer += 1
695
696
    # Normalize the diff.
697
    if changes:
698
      self.diff_cleanupMerge(diffs)
699
    self.diff_cleanupSemanticLossless(diffs)
700
701
    # Find any overlaps between deletions and insertions.
702
    # e.g: <del>abcxxx</del><ins>xxxdef</ins>
703
    #   -> <del>abc</del>xxx<ins>def</ins>
704
    # e.g: <del>xxxabc</del><ins>defxxx</ins>
705
    #   -> <ins>def</ins>xxx<del>abc</del>
706
    # Only extract an overlap if it is as big as the edit ahead or behind it.
707
    pointer = 1
708
    while pointer < len(diffs):
709
      if (diffs[pointer - 1][0] == self.DIFF_DELETE and
710
          diffs[pointer][0] == self.DIFF_INSERT):
711
        deletion = diffs[pointer - 1][1]
712
        insertion = diffs[pointer][1]
713
        overlap_length1 = self.diff_commonOverlap(deletion, insertion)
714
        overlap_length2 = self.diff_commonOverlap(insertion, deletion)
715
        if overlap_length1 >= overlap_length2:
716
          if (overlap_length1 >= len(deletion) / 2.0 or
717
              overlap_length1 >= len(insertion) / 2.0):
718
            # Overlap found.  Insert an equality and trim the surrounding edits.
719
            diffs.insert(pointer, (self.DIFF_EQUAL,
720
                                   insertion[:overlap_length1]))
721
            diffs[pointer - 1] = (self.DIFF_DELETE,
722
                                  deletion[:len(deletion) - overlap_length1])
723
            diffs[pointer + 1] = (self.DIFF_INSERT,
724
                                  insertion[overlap_length1:])
725
            pointer += 1
726
        else:
727
          if (overlap_length2 >= len(deletion) / 2.0 or
728
              overlap_length2 >= len(insertion) / 2.0):
729
            # Reverse overlap found.
730
            # Insert an equality and swap and trim the surrounding edits.
731
            diffs.insert(pointer, (self.DIFF_EQUAL, deletion[:overlap_length2]))
732
            diffs[pointer - 1] = (self.DIFF_INSERT,
733
                                  insertion[:len(insertion) - overlap_length2])
734
            diffs[pointer + 1] = (self.DIFF_DELETE, deletion[overlap_length2:])
735
            pointer += 1
736
        pointer += 1
737
      pointer += 1
738
739
  def diff_cleanupSemanticLossless(self, diffs):
740
    """Look for single edits surrounded on both sides by equalities
741
    which can be shifted sideways to align the edit to a word boundary.
742
    e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
743
744
    Args:
745
      diffs: Array of diff tuples.
746
    """
747
748
    def diff_cleanupSemanticScore(one, two):
749
      """Given two strings, compute a score representing whether the
750
      internal boundary falls on logical boundaries.
751
      Scores range from 6 (best) to 0 (worst).
752
      Closure, but does not reference any external variables.
753
754
      Args:
755
        one: First string.
756
        two: Second string.
757
758
      Returns:
759
        The score.
760
      """
761
      if not one or not two:
762
        # Edges are the best.
763
        return 6
764
765
      # Each port of this function behaves slightly differently due to
766
      # subtle differences in each language's definition of things like
767
      # 'whitespace'.  Since this function's purpose is largely cosmetic,
768
      # the choice has been made to use each language's native features
769
      # rather than force total conformity.
770
      char1 = one[-1]
771
      char2 = two[0]
772
      nonAlphaNumeric1 = not char1.isalnum()
773
      nonAlphaNumeric2 = not char2.isalnum()
774
      whitespace1 = nonAlphaNumeric1 and char1.isspace()
775
      whitespace2 = nonAlphaNumeric2 and char2.isspace()
776
      lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n")
777
      lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n")
778
      blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one)
779
      blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two)
780
781
      if blankLine1 or blankLine2:
782
        # Five points for blank lines.
783
        return 5
784
      elif lineBreak1 or lineBreak2:
785
        # Four points for line breaks.
786
        return 4
787
      elif nonAlphaNumeric1 and not whitespace1 and whitespace2:
788
        # Three points for end of sentences.
789
        return 3
790
      elif whitespace1 or whitespace2:
791
        # Two points for whitespace.
792
        return 2
793
      elif nonAlphaNumeric1 or nonAlphaNumeric2:
794
        # One point for non-alphanumeric.
795
        return 1
796
      return 0
797
798
    pointer = 1
799
    # Intentionally ignore the first and last element (don't need checking).
800
    while pointer < len(diffs) - 1:
801
      if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
802
          diffs[pointer + 1][0] == self.DIFF_EQUAL):
803
        # This is a single edit surrounded by equalities.
804
        equality1 = diffs[pointer - 1][1]
805
        edit = diffs[pointer][1]
806
        equality2 = diffs[pointer + 1][1]
807
808
        # First, shift the edit as far left as possible.
809
        commonOffset = self.diff_commonSuffix(equality1, edit)
810
        if commonOffset:
811
          commonString = edit[-commonOffset:]
812
          equality1 = equality1[:-commonOffset]
813
          edit = commonString + edit[:-commonOffset]
814
          equality2 = commonString + equality2
815
816
        # Second, step character by character right, looking for the best fit.
817
        bestEquality1 = equality1
818
        bestEdit = edit
819
        bestEquality2 = equality2
820
        bestScore = (diff_cleanupSemanticScore(equality1, edit) +
821
            diff_cleanupSemanticScore(edit, equality2))
822
        while edit and equality2 and edit[0] == equality2[0]:
823
          equality1 += edit[0]
824
          edit = edit[1:] + equality2[0]
825
          equality2 = equality2[1:]
826
          score = (diff_cleanupSemanticScore(equality1, edit) +
827
              diff_cleanupSemanticScore(edit, equality2))
828
          # The >= encourages trailing rather than leading whitespace on edits.
829
          if score >= bestScore:
830
            bestScore = score
831
            bestEquality1 = equality1
832
            bestEdit = edit
833
            bestEquality2 = equality2
834
835
        if diffs[pointer - 1][1] != bestEquality1:
836
          # We have an improvement, save it back to the diff.
837
          if bestEquality1:
838
            diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1)
839
          else:
840
            del diffs[pointer - 1]
841
            pointer -= 1
842
          diffs[pointer] = (diffs[pointer][0], bestEdit)
843
          if bestEquality2:
844
            diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2)
845
          else:
846
            del diffs[pointer + 1]
847
            pointer -= 1
848
      pointer += 1
849
850
  # Define some regex patterns for matching boundaries.
851
  BLANKLINEEND = re.compile(r"\n\r?\n$")
852
  BLANKLINESTART = re.compile(r"^\r?\n\r?\n")
853
854
  def diff_cleanupEfficiency(self, diffs):
855
    """Reduce the number of edits by eliminating operationally trivial
856
    equalities.
857
858
    Args:
859
      diffs: Array of diff tuples.
860
    """
861
    changes = False
862
    equalities = []  # Stack of indices where equalities are found.
863
    lastEquality = None  # Always equal to diffs[equalities[-1]][1]
864
    pointer = 0  # Index of current position.
865
    pre_ins = False  # Is there an insertion operation before the last equality.
866
    pre_del = False  # Is there a deletion operation before the last equality.
867
    post_ins = False  # Is there an insertion operation after the last equality.
868
    post_del = False  # Is there a deletion operation after the last equality.
869
    while pointer < len(diffs):
870
      if diffs[pointer][0] == self.DIFF_EQUAL:  # Equality found.
871
        if (len(diffs[pointer][1]) < self.Diff_EditCost and
872
            (post_ins or post_del)):
873
          # Candidate found.
874
          equalities.append(pointer)
875
          pre_ins = post_ins
876
          pre_del = post_del
877
          lastEquality = diffs[pointer][1]
878
        else:
879
          # Not a candidate, and can never become one.
880
          equalities = []
881
          lastEquality = None
882
883
        post_ins = post_del = False
884
      else:  # An insertion or deletion.
885
        if diffs[pointer][0] == self.DIFF_DELETE:
886
          post_del = True
887
        else:
888
          post_ins = True
889
890
        # Five types to be split:
891
        # <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
892
        # <ins>A</ins>X<ins>C</ins><del>D</del>
893
        # <ins>A</ins><del>B</del>X<ins>C</ins>
894
        # <ins>A</del>X<ins>C</ins><del>D</del>
895
        # <ins>A</ins><del>B</del>X<del>C</del>
896
897
        if lastEquality and ((pre_ins and pre_del and post_ins and post_del) or
898
                             ((len(lastEquality) < self.Diff_EditCost / 2) and
899
                              (pre_ins + pre_del + post_ins + post_del) == 3)):
900
          # Duplicate record.
901
          diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality))
902
          # Change second copy to insert.
903
          diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
904
              diffs[equalities[-1] + 1][1])
905
          equalities.pop()  # Throw away the equality we just deleted.
906
          lastEquality = None
907
          if pre_ins and pre_del:
908
            # No changes made which could affect previous entry, keep going.
909
            post_ins = post_del = True
910
            equalities = []
911
          else:
912
            if len(equalities):
913
              equalities.pop()  # Throw away the previous equality.
914
            if len(equalities):
915
              pointer = equalities[-1]
916
            else:
917
              pointer = -1
918
            post_ins = post_del = False
919
          changes = True
920
      pointer += 1
921
922
    if changes:
923
      self.diff_cleanupMerge(diffs)
924
925
  def diff_cleanupMerge(self, diffs):
926
    """Reorder and merge like edit sections.  Merge equalities.
927
    Any edit section can move as long as it doesn't cross an equality.
928
929
    Args:
930
      diffs: Array of diff tuples.
931
    """
932
    diffs.append((self.DIFF_EQUAL, ''))  # Add a dummy entry at the end.
933
    pointer = 0
934
    count_delete = 0
935
    count_insert = 0
936
    text_delete = ''
937
    text_insert = ''
938
    while pointer < len(diffs):
939
      if diffs[pointer][0] == self.DIFF_INSERT:
940
        count_insert += 1
941
        text_insert += diffs[pointer][1]
942
        pointer += 1
943
      elif diffs[pointer][0] == self.DIFF_DELETE:
944
        count_delete += 1
945
        text_delete += diffs[pointer][1]
946
        pointer += 1
947
      elif diffs[pointer][0] == self.DIFF_EQUAL:
948
        # Upon reaching an equality, check for prior redundancies.
949
        if count_delete + count_insert > 1:
950
          if count_delete != 0 and count_insert != 0:
951
            # Factor out any common prefixies.
952
            commonlength = self.diff_commonPrefix(text_insert, text_delete)
953
            if commonlength != 0:
954
              x = pointer - count_delete - count_insert - 1
955
              if x >= 0 and diffs[x][0] == self.DIFF_EQUAL:
956
                diffs[x] = (diffs[x][0], diffs[x][1] +
957
                            text_insert[:commonlength])
958
              else:
959
                diffs.insert(0, (self.DIFF_EQUAL, text_insert[:commonlength]))
960
                pointer += 1
961
              text_insert = text_insert[commonlength:]
962
              text_delete = text_delete[commonlength:]
963
            # Factor out any common suffixies.
964
            commonlength = self.diff_commonSuffix(text_insert, text_delete)
965
            if commonlength != 0:
966
              diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] +
967
                  diffs[pointer][1])
968
              text_insert = text_insert[:-commonlength]
969
              text_delete = text_delete[:-commonlength]
970
          # Delete the offending records and add the merged ones.
971
          new_ops = []
972
          if len(text_delete) != 0:
973
            new_ops.append((self.DIFF_DELETE, text_delete))
974
          if len(text_insert) != 0:
975
            new_ops.append((self.DIFF_INSERT, text_insert))
976
          pointer -= count_delete + count_insert
977
          diffs[pointer : pointer + count_delete + count_insert] = new_ops
978
          pointer += len(new_ops) + 1
979
        elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL:
980
          # Merge this equality with the previous one.
981
          diffs[pointer - 1] = (diffs[pointer - 1][0],
982
                                diffs[pointer - 1][1] + diffs[pointer][1])
983
          del diffs[pointer]
984
        else:
985
          pointer += 1
986
987
        count_insert = 0
438.1.6 by franku
run the script
988
        count_delete = 0
173.2.2 by Holger Rapp
Added diff_match_patch from google, because they do not provide a setup.py
989
        text_delete = ''
990
        text_insert = ''
532.1.4 by franku
fixed import of local_urls, fixed wl_markdown, replaced diff_match_patch.py with a python3 compatible version
991
992
    if diffs[-1][1] == '':
993
      diffs.pop()  # Remove the dummy entry at the end.
994
995
    # Second pass: look for single edits surrounded on both sides by equalities
996
    # which can be shifted sideways to eliminate an equality.
997
    # e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
998
    changes = False
999
    pointer = 1
1000
    # Intentionally ignore the first and last element (don't need checking).
1001
    while pointer < len(diffs) - 1:
1002
      if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
1003
          diffs[pointer + 1][0] == self.DIFF_EQUAL):
1004
        # This is a single edit surrounded by equalities.
1005
        if diffs[pointer][1].endswith(diffs[pointer - 1][1]):
1006
          # Shift the edit over the previous equality.
1007
          if diffs[pointer - 1][1] != "":
1008
            diffs[pointer] = (diffs[pointer][0],
1009
                diffs[pointer - 1][1] +
1010
                diffs[pointer][1][:-len(diffs[pointer - 1][1])])
1011
            diffs[pointer + 1] = (diffs[pointer + 1][0],
1012
                                  diffs[pointer - 1][1] + diffs[pointer + 1][1])
1013
          del diffs[pointer - 1]
1014
          changes = True
1015
        elif diffs[pointer][1].startswith(diffs[pointer + 1][1]):
1016
          # Shift the edit over the next equality.
1017
          diffs[pointer - 1] = (diffs[pointer - 1][0],
1018
                                diffs[pointer - 1][1] + diffs[pointer + 1][1])
1019
          diffs[pointer] = (diffs[pointer][0],
1020
              diffs[pointer][1][len(diffs[pointer + 1][1]):] +
1021
              diffs[pointer + 1][1])
1022
          del diffs[pointer + 1]
1023
          changes = True
1024
      pointer += 1
1025
1026
    # If shifts were made, the diff needs reordering and another shift sweep.
1027
    if changes:
1028
      self.diff_cleanupMerge(diffs)
1029
1030
  def diff_xIndex(self, diffs, loc):
1031
    """loc is a location in text1, compute and return the equivalent location
1032
    in text2.  e.g. "The cat" vs "The big cat", 1->1, 5->8
1033
1034
    Args:
1035
      diffs: Array of diff tuples.
1036
      loc: Location within text1.
1037
1038
    Returns:
1039
      Location within text2.
1040
    """
1041
    chars1 = 0
1042
    chars2 = 0
1043
    last_chars1 = 0
1044
    last_chars2 = 0
1045
    for x in range(len(diffs)):
1046
      (op, text) = diffs[x]
1047
      if op != self.DIFF_INSERT:  # Equality or deletion.
1048
        chars1 += len(text)
1049
      if op != self.DIFF_DELETE:  # Equality or insertion.
1050
        chars2 += len(text)
1051
      if chars1 > loc:  # Overshot the location.
1052
        break
1053
      last_chars1 = chars1
1054
      last_chars2 = chars2
1055
1056
    if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE:
1057
      # The location was deleted.
1058
      return last_chars2
1059
    # Add the remaining len(character).
1060
    return last_chars2 + (loc - last_chars1)
1061
1062
  def diff_prettyHtml(self, diffs):
1063
    """Convert a diff array into a pretty HTML report.
1064
1065
    Args:
1066
      diffs: Array of diff tuples.
1067
1068
    Returns:
1069
      HTML representation.
1070
    """
1071
    html = []
1072
    for (op, data) in diffs:
1073
      text = (data.replace("&", "&amp;").replace("<", "&lt;")
1074
                 .replace(">", "&gt;").replace("\n", "&para;<br>"))
1075
      if op == self.DIFF_INSERT:
532.1.5 by franku
applied coloring for diff_match_patch
1076
        html.append("<ins class=\"inserted\">%s</ins>" % text)
532.1.4 by franku
fixed import of local_urls, fixed wl_markdown, replaced diff_match_patch.py with a python3 compatible version
1077
      elif op == self.DIFF_DELETE:
532.1.5 by franku
applied coloring for diff_match_patch
1078
        html.append("<del class=\"removed\">%s</del>" % text)
532.1.4 by franku
fixed import of local_urls, fixed wl_markdown, replaced diff_match_patch.py with a python3 compatible version
1079
      elif op == self.DIFF_EQUAL:
1080
        html.append("<span>%s</span>" % text)
1081
    return "".join(html)
1082
1083
  def diff_text1(self, diffs):
1084
    """Compute and return the source text (all equalities and deletions).
1085
1086
    Args:
1087
      diffs: Array of diff tuples.
1088
1089
    Returns:
1090
      Source text.
1091
    """
1092
    text = []
1093
    for (op, data) in diffs:
1094
      if op != self.DIFF_INSERT:
1095
        text.append(data)
1096
    return "".join(text)
1097
1098
  def diff_text2(self, diffs):
1099
    """Compute and return the destination text (all equalities and insertions).
1100
1101
    Args:
1102
      diffs: Array of diff tuples.
1103
1104
    Returns:
1105
      Destination text.
1106
    """
1107
    text = []
1108
    for (op, data) in diffs:
1109
      if op != self.DIFF_DELETE:
1110
        text.append(data)
1111
    return "".join(text)
1112
1113
  def diff_levenshtein(self, diffs):
1114
    """Compute the Levenshtein distance; the number of inserted, deleted or
1115
    substituted characters.
1116
1117
    Args:
1118
      diffs: Array of diff tuples.
1119
1120
    Returns:
1121
      Number of changes.
1122
    """
1123
    levenshtein = 0
1124
    insertions = 0
1125
    deletions = 0
1126
    for (op, data) in diffs:
1127
      if op == self.DIFF_INSERT:
1128
        insertions += len(data)
1129
      elif op == self.DIFF_DELETE:
1130
        deletions += len(data)
1131
      elif op == self.DIFF_EQUAL:
1132
        # A deletion and an insertion is one substitution.
1133
        levenshtein += max(insertions, deletions)
173.2.2 by Holger Rapp
Added diff_match_patch from google, because they do not provide a setup.py
1134
        insertions = 0
1135
        deletions = 0
532.1.4 by franku
fixed import of local_urls, fixed wl_markdown, replaced diff_match_patch.py with a python3 compatible version
1136
    levenshtein += max(insertions, deletions)
1137
    return levenshtein
1138
1139
  def diff_toDelta(self, diffs):
1140
    """Crush the diff into an encoded string which describes the operations
1141
    required to transform text1 into text2.
1142
    E.g. =3\t-2\t+ing  -> Keep 3 chars, delete 2 chars, insert 'ing'.
1143
    Operations are tab-separated.  Inserted text is escaped using %xx notation.
1144
1145
    Args:
1146
      diffs: Array of diff tuples.
1147
1148
    Returns:
1149
      Delta text.
1150
    """
1151
    text = []
1152
    for (op, data) in diffs:
1153
      if op == self.DIFF_INSERT:
1154
        # High ascii will raise UnicodeDecodeError.  Use Unicode instead.
1155
        data = data.encode("utf-8")
1156
        text.append("+" + urllib.parse.quote(data, "!~*'();/?:@&=+$,# "))
1157
      elif op == self.DIFF_DELETE:
1158
        text.append("-%d" % len(data))
1159
      elif op == self.DIFF_EQUAL:
1160
        text.append("=%d" % len(data))
1161
    return "\t".join(text)
1162
1163
  def diff_fromDelta(self, text1, delta):
1164
    """Given the original text1, and an encoded string which describes the
1165
    operations required to transform text1 into text2, compute the full diff.
1166
1167
    Args:
1168
      text1: Source string for the diff.
1169
      delta: Delta text.
1170
1171
    Returns:
1172
      Array of diff tuples.
1173
1174
    Raises:
1175
      ValueError: If invalid input.
1176
    """
1177
    diffs = []
1178
    pointer = 0  # Cursor in text1
1179
    tokens = delta.split("\t")
1180
    for token in tokens:
1181
      if token == "":
1182
        # Blank tokens are ok (from a trailing \t).
1183
        continue
1184
      # Each token begins with a one character parameter which specifies the
1185
      # operation of this token (delete, insert, equality).
1186
      param = token[1:]
1187
      if token[0] == "+":
1188
        param = urllib.parse.unquote(param)
1189
        diffs.append((self.DIFF_INSERT, param))
1190
      elif token[0] == "-" or token[0] == "=":
1191
        try:
1192
          n = int(param)
1193
        except ValueError:
1194
          raise ValueError("Invalid number in diff_fromDelta: " + param)
1195
        if n < 0:
1196
          raise ValueError("Negative number in diff_fromDelta: " + param)
1197
        text = text1[pointer : pointer + n]
1198
        pointer += n
1199
        if token[0] == "=":
1200
          diffs.append((self.DIFF_EQUAL, text))
1201
        else:
1202
          diffs.append((self.DIFF_DELETE, text))
1203
      else:
1204
        # Anything else is an error.
1205
        raise ValueError("Invalid diff operation in diff_fromDelta: " +
1206
            token[0])
1207
    if pointer != len(text1):
1208
      raise ValueError(
1209
          "Delta length (%d) does not equal source text length (%d)." %
1210
         (pointer, len(text1)))
1211
    return diffs
1212
1213
  #  MATCH FUNCTIONS
1214
1215
  def match_main(self, text, pattern, loc):
1216
    """Locate the best instance of 'pattern' in 'text' near 'loc'.
1217
1218
    Args:
1219
      text: The text to search.
1220
      pattern: The pattern to search for.
1221
      loc: The location to search around.
1222
1223
    Returns:
1224
      Best match index or -1.
1225
    """
1226
    # Check for null inputs.
1227
    if text == None or pattern == None:
1228
      raise ValueError("Null inputs. (match_main)")
1229
1230
    loc = max(0, min(loc, len(text)))
1231
    if text == pattern:
1232
      # Shortcut (potentially not guaranteed by the algorithm)
1233
      return 0
1234
    elif not text:
1235
      # Nothing to match.
1236
      return -1
1237
    elif text[loc:loc + len(pattern)] == pattern:
1238
      # Perfect match at the perfect spot!  (Includes case of null pattern)
1239
      return loc
1240
    else:
1241
      # Do a fuzzy compare.
1242
      match = self.match_bitap(text, pattern, loc)
1243
      return match
1244
1245
  def match_bitap(self, text, pattern, loc):
1246
    """Locate the best instance of 'pattern' in 'text' near 'loc' using the
1247
    Bitap algorithm.
1248
1249
    Args:
1250
      text: The text to search.
1251
      pattern: The pattern to search for.
1252
      loc: The location to search around.
1253
1254
    Returns:
1255
      Best match index or -1.
1256
    """
1257
    # Python doesn't have a maxint limit, so ignore this check.
1258
    #if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits:
1259
    #  raise ValueError("Pattern too long for this application.")
1260
1261
    # Initialise the alphabet.
1262
    s = self.match_alphabet(pattern)
1263
1264
    def match_bitapScore(e, x):
1265
      """Compute and return the score for a match with e errors and x location.
1266
      Accesses loc and pattern through being a closure.
1267
1268
      Args:
1269
        e: Number of errors in match.
1270
        x: Location of match.
1271
1272
      Returns:
1273
        Overall score for match (0.0 = good, 1.0 = bad).
1274
      """
1275
      accuracy = float(e) / len(pattern)
1276
      proximity = abs(loc - x)
1277
      if not self.Match_Distance:
1278
        # Dodge divide by zero error.
1279
        return proximity and 1.0 or accuracy
1280
      return accuracy + (proximity / float(self.Match_Distance))
1281
1282
    # Highest score beyond which we give up.
1283
    score_threshold = self.Match_Threshold
1284
    # Is there a nearby exact match? (speedup)
1285
    best_loc = text.find(pattern, loc)
1286
    if best_loc != -1:
1287
      score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
1288
      # What about in the other direction? (speedup)
1289
      best_loc = text.rfind(pattern, loc + len(pattern))
1290
      if best_loc != -1:
1291
        score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
1292
1293
    # Initialise the bit arrays.
1294
    matchmask = 1 << (len(pattern) - 1)
1295
    best_loc = -1
1296
1297
    bin_max = len(pattern) + len(text)
1298
    # Empty initialization added to appease pychecker.
1299
    last_rd = None
1300
    for d in range(len(pattern)):
1301
      # Scan for the best match each iteration allows for one more error.
1302
      # Run a binary search to determine how far from 'loc' we can stray at
1303
      # this error level.
1304
      bin_min = 0
1305
      bin_mid = bin_max
1306
      while bin_min < bin_mid:
1307
        if match_bitapScore(d, loc + bin_mid) <= score_threshold:
1308
          bin_min = bin_mid
1309
        else:
1310
          bin_max = bin_mid
1311
        bin_mid = (bin_max - bin_min) // 2 + bin_min
1312
1313
      # Use the result from this iteration as the maximum for the next.
1314
      bin_max = bin_mid
1315
      start = max(1, loc - bin_mid + 1)
1316
      finish = min(loc + bin_mid, len(text)) + len(pattern)
1317
1318
      rd = [0] * (finish + 2)
1319
      rd[finish + 1] = (1 << d) - 1
1320
      for j in range(finish, start - 1, -1):
1321
        if len(text) <= j - 1:
1322
          # Out of range.
1323
          charMatch = 0
1324
        else:
1325
          charMatch = s.get(text[j - 1], 0)
1326
        if d == 0:  # First pass: exact match.
1327
          rd[j] = ((rd[j + 1] << 1) | 1) & charMatch
1328
        else:  # Subsequent passes: fuzzy match.
1329
          rd[j] = (((rd[j + 1] << 1) | 1) & charMatch) | (
1330
              ((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]
1331
        if rd[j] & matchmask:
1332
          score = match_bitapScore(d, j - 1)
1333
          # This match will almost certainly be better than any existing match.
1334
          # But check anyway.
1335
          if score <= score_threshold:
1336
            # Told you so.
1337
            score_threshold = score
1338
            best_loc = j - 1
1339
            if best_loc > loc:
1340
              # When passing loc, don't exceed our current distance from loc.
1341
              start = max(1, 2 * loc - best_loc)
173.2.2 by Holger Rapp
Added diff_match_patch from google, because they do not provide a setup.py
1342
            else:
532.1.4 by franku
fixed import of local_urls, fixed wl_markdown, replaced diff_match_patch.py with a python3 compatible version
1343
              # Already passed loc, downhill from here on in.
1344
              break
1345
      # No hope for a (better) match at greater error levels.
1346
      if match_bitapScore(d + 1, loc) > score_threshold:
1347
        break
1348
      last_rd = rd
1349
    return best_loc
1350
1351
  def match_alphabet(self, pattern):
1352
    """Initialise the alphabet for the Bitap algorithm.
1353
1354
    Args:
1355
      pattern: The text to encode.
1356
1357
    Returns:
1358
      Hash of character locations.
1359
    """
1360
    s = {}
1361
    for char in pattern:
1362
      s[char] = 0
1363
    for i in range(len(pattern)):
1364
      s[pattern[i]] |= 1 << (len(pattern) - i - 1)
1365
    return s
1366
1367
  #  PATCH FUNCTIONS
1368
1369
  def patch_addContext(self, patch, text):
1370
    """Increase the context until it is unique,
1371
    but don't let the pattern expand beyond Match_MaxBits.
1372
1373
    Args:
1374
      patch: The patch to grow.
1375
      text: Source text.
1376
    """
1377
    if len(text) == 0:
1378
      return
1379
    pattern = text[patch.start2 : patch.start2 + patch.length1]
1380
    padding = 0
1381
1382
    # Look for the first and last matches of pattern in text.  If two different
1383
    # matches are found, increase the pattern length.
1384
    while (text.find(pattern) != text.rfind(pattern) and (self.Match_MaxBits ==
1385
        0 or len(pattern) < self.Match_MaxBits - self.Patch_Margin -
1386
        self.Patch_Margin)):
1387
      padding += self.Patch_Margin
1388
      pattern = text[max(0, patch.start2 - padding) :
1389
                     patch.start2 + patch.length1 + padding]
1390
    # Add one chunk for good luck.
1391
    padding += self.Patch_Margin
1392
1393
    # Add the prefix.
1394
    prefix = text[max(0, patch.start2 - padding) : patch.start2]
1395
    if prefix:
1396
      patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)]
1397
    # Add the suffix.
1398
    suffix = text[patch.start2 + patch.length1 :
1399
                  patch.start2 + patch.length1 + padding]
1400
    if suffix:
1401
      patch.diffs.append((self.DIFF_EQUAL, suffix))
1402
1403
    # Roll back the start points.
1404
    patch.start1 -= len(prefix)
1405
    patch.start2 -= len(prefix)
1406
    # Extend lengths.
1407
    patch.length1 += len(prefix) + len(suffix)
1408
    patch.length2 += len(prefix) + len(suffix)
1409
1410
  def patch_make(self, a, b=None, c=None):
1411
    """Compute a list of patches to turn text1 into text2.
1412
    Use diffs if provided, otherwise compute it ourselves.
1413
    There are four ways to call this function, depending on what data is
1414
    available to the caller:
1415
    Method 1:
1416
    a = text1, b = text2
1417
    Method 2:
1418
    a = diffs
1419
    Method 3 (optimal):
1420
    a = text1, b = diffs
1421
    Method 4 (deprecated, use method 3):
1422
    a = text1, b = text2, c = diffs
1423
1424
    Args:
1425
      a: text1 (methods 1,3,4) or Array of diff tuples for text1 to
1426
          text2 (method 2).
1427
      b: text2 (methods 1,4) or Array of diff tuples for text1 to
1428
          text2 (method 3) or undefined (method 2).
1429
      c: Array of diff tuples for text1 to text2 (method 4) or
1430
          undefined (methods 1,2,3).
1431
1432
    Returns:
1433
      Array of Patch objects.
1434
    """
1435
    text1 = None
1436
    diffs = None
1437
    if isinstance(a, str) and isinstance(b, str) and c is None:
1438
      # Method 1: text1, text2
1439
      # Compute diffs from text1 and text2.
1440
      text1 = a
1441
      diffs = self.diff_main(text1, b, True)
1442
      if len(diffs) > 2:
1443
        self.diff_cleanupSemantic(diffs)
1444
        self.diff_cleanupEfficiency(diffs)
1445
    elif isinstance(a, list) and b is None and c is None:
1446
      # Method 2: diffs
1447
      # Compute text1 from diffs.
1448
      diffs = a
1449
      text1 = self.diff_text1(diffs)
1450
    elif isinstance(a, str) and isinstance(b, list) and c is None:
1451
      # Method 3: text1, diffs
1452
      text1 = a
1453
      diffs = b
1454
    elif (isinstance(a, str) and isinstance(b, str) and
1455
          isinstance(c, list)):
1456
      # Method 4: text1, text2, diffs
1457
      # text2 is not used.
1458
      text1 = a
1459
      diffs = c
1460
    else:
1461
      raise ValueError("Unknown call format to patch_make.")
1462
1463
    if not diffs:
1464
      return []  # Get rid of the None case.
1465
    patches = []
1466
    patch = patch_obj()
1467
    char_count1 = 0  # Number of characters into the text1 string.
1468
    char_count2 = 0  # Number of characters into the text2 string.
1469
    prepatch_text = text1  # Recreate the patches to determine context info.
1470
    postpatch_text = text1
1471
    for x in range(len(diffs)):
1472
      (diff_type, diff_text) = diffs[x]
1473
      if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL:
1474
        # A new patch starts here.
1475
        patch.start1 = char_count1
1476
        patch.start2 = char_count2
1477
      if diff_type == self.DIFF_INSERT:
1478
        # Insertion
1479
        patch.diffs.append(diffs[x])
1480
        patch.length2 += len(diff_text)
1481
        postpatch_text = (postpatch_text[:char_count2] + diff_text +
1482
                          postpatch_text[char_count2:])
1483
      elif diff_type == self.DIFF_DELETE:
1484
        # Deletion.
1485
        patch.length1 += len(diff_text)
1486
        patch.diffs.append(diffs[x])
1487
        postpatch_text = (postpatch_text[:char_count2] +
1488
                          postpatch_text[char_count2 + len(diff_text):])
1489
      elif (diff_type == self.DIFF_EQUAL and
1490
            len(diff_text) <= 2 * self.Patch_Margin and
1491
            len(patch.diffs) != 0 and len(diffs) != x + 1):
1492
        # Small equality inside a patch.
1493
        patch.diffs.append(diffs[x])
1494
        patch.length1 += len(diff_text)
1495
        patch.length2 += len(diff_text)
1496
1497
      if (diff_type == self.DIFF_EQUAL and
1498
          len(diff_text) >= 2 * self.Patch_Margin):
1499
        # Time for a new patch.
1500
        if len(patch.diffs) != 0:
1501
          self.patch_addContext(patch, prepatch_text)
1502
          patches.append(patch)
1503
          patch = patch_obj()
1504
          # Unlike Unidiff, our patch lists have a rolling context.
1505
          # https://github.com/google/diff-match-patch/wiki/Unidiff
1506
          # Update prepatch text & pos to reflect the application of the
1507
          # just completed patch.
1508
          prepatch_text = postpatch_text
1509
          char_count1 = char_count2
1510
1511
      # Update the current character count.
1512
      if diff_type != self.DIFF_INSERT:
1513
        char_count1 += len(diff_text)
1514
      if diff_type != self.DIFF_DELETE:
1515
        char_count2 += len(diff_text)
1516
1517
    # Pick up the leftover patch if not empty.
1518
    if len(patch.diffs) != 0:
1519
      self.patch_addContext(patch, prepatch_text)
1520
      patches.append(patch)
1521
    return patches
1522
1523
  def patch_deepCopy(self, patches):
1524
    """Given an array of patches, return another array that is identical.
1525
1526
    Args:
1527
      patches: Array of Patch objects.
1528
1529
    Returns:
1530
      Array of Patch objects.
1531
    """
1532
    patchesCopy = []
1533
    for patch in patches:
1534
      patchCopy = patch_obj()
1535
      # No need to deep copy the tuples since they are immutable.
1536
      patchCopy.diffs = patch.diffs[:]
1537
      patchCopy.start1 = patch.start1
1538
      patchCopy.start2 = patch.start2
1539
      patchCopy.length1 = patch.length1
1540
      patchCopy.length2 = patch.length2
1541
      patchesCopy.append(patchCopy)
1542
    return patchesCopy
1543
1544
  def patch_apply(self, patches, text):
1545
    """Merge a set of patches onto the text.  Return a patched text, as well
1546
    as a list of true/false values indicating which patches were applied.
1547
1548
    Args:
1549
      patches: Array of Patch objects.
1550
      text: Old text.
1551
1552
    Returns:
1553
      Two element Array, containing the new text and an array of boolean values.
1554
    """
1555
    if not patches:
1556
      return (text, [])
1557
1558
    # Deep copy the patches so that no changes are made to originals.
1559
    patches = self.patch_deepCopy(patches)
1560
1561
    nullPadding = self.patch_addPadding(patches)
1562
    text = nullPadding + text + nullPadding
1563
    self.patch_splitMax(patches)
1564
1565
    # delta keeps track of the offset between the expected and actual location
1566
    # of the previous patch.  If there are patches expected at positions 10 and
1567
    # 20, but the first patch was found at 12, delta is 2 and the second patch
1568
    # has an effective expected position of 22.
1569
    delta = 0
1570
    results = []
1571
    for patch in patches:
1572
      expected_loc = patch.start2 + delta
1573
      text1 = self.diff_text1(patch.diffs)
1574
      end_loc = -1
1575
      if len(text1) > self.Match_MaxBits:
1576
        # patch_splitMax will only provide an oversized pattern in the case of
1577
        # a monster delete.
1578
        start_loc = self.match_main(text, text1[:self.Match_MaxBits],
1579
                                    expected_loc)
1580
        if start_loc != -1:
1581
          end_loc = self.match_main(text, text1[-self.Match_MaxBits:],
1582
              expected_loc + len(text1) - self.Match_MaxBits)
1583
          if end_loc == -1 or start_loc >= end_loc:
1584
            # Can't find valid trailing context.  Drop this patch.
1585
            start_loc = -1
1586
      else:
1587
        start_loc = self.match_main(text, text1, expected_loc)
1588
      if start_loc == -1:
1589
        # No match found.  :(
1590
        results.append(False)
1591
        # Subtract the delta for this failed patch from subsequent patches.
1592
        delta -= patch.length2 - patch.length1
1593
      else:
1594
        # Found a match.  :)
1595
        results.append(True)
1596
        delta = start_loc - expected_loc
1597
        if end_loc == -1:
1598
          text2 = text[start_loc : start_loc + len(text1)]
1599
        else:
1600
          text2 = text[start_loc : end_loc + self.Match_MaxBits]
1601
        if text1 == text2:
1602
          # Perfect match, just shove the replacement text in.
1603
          text = (text[:start_loc] + self.diff_text2(patch.diffs) +
1604
                      text[start_loc + len(text1):])
1605
        else:
1606
          # Imperfect match.
1607
          # Run a diff to get a framework of equivalent indices.
1608
          diffs = self.diff_main(text1, text2, False)
1609
          if (len(text1) > self.Match_MaxBits and
1610
              self.diff_levenshtein(diffs) / float(len(text1)) >
1611
              self.Patch_DeleteThreshold):
1612
            # The end points match, but the content is unacceptably bad.
1613
            results[-1] = False
1614
          else:
1615
            self.diff_cleanupSemanticLossless(diffs)
1616
            index1 = 0
1617
            for (op, data) in patch.diffs:
1618
              if op != self.DIFF_EQUAL:
1619
                index2 = self.diff_xIndex(diffs, index1)
1620
              if op == self.DIFF_INSERT:  # Insertion
1621
                text = text[:start_loc + index2] + data + text[start_loc +
1622
                                                               index2:]
1623
              elif op == self.DIFF_DELETE:  # Deletion
1624
                text = text[:start_loc + index2] + text[start_loc +
1625
                    self.diff_xIndex(diffs, index1 + len(data)):]
1626
              if op != self.DIFF_DELETE:
1627
                index1 += len(data)
1628
    # Strip the padding off.
1629
    text = text[len(nullPadding):-len(nullPadding)]
1630
    return (text, results)
1631
1632
  def patch_addPadding(self, patches):
1633
    """Add some padding on text start and end so that edges can match
1634
    something.  Intended to be called only from within patch_apply.
1635
1636
    Args:
1637
      patches: Array of Patch objects.
1638
1639
    Returns:
1640
      The padding string added to each side.
1641
    """
1642
    paddingLength = self.Patch_Margin
1643
    nullPadding = ""
1644
    for x in range(1, paddingLength + 1):
1645
      nullPadding += chr(x)
1646
1647
    # Bump all the patches forward.
1648
    for patch in patches:
1649
      patch.start1 += paddingLength
1650
      patch.start2 += paddingLength
1651
1652
    # Add some padding on start of first diff.
1653
    patch = patches[0]
1654
    diffs = patch.diffs
1655
    if not diffs or diffs[0][0] != self.DIFF_EQUAL:
1656
      # Add nullPadding equality.
1657
      diffs.insert(0, (self.DIFF_EQUAL, nullPadding))
1658
      patch.start1 -= paddingLength  # Should be 0.
1659
      patch.start2 -= paddingLength  # Should be 0.
1660
      patch.length1 += paddingLength
1661
      patch.length2 += paddingLength
1662
    elif paddingLength > len(diffs[0][1]):
1663
      # Grow first equality.
1664
      extraLength = paddingLength - len(diffs[0][1])
1665
      newText = nullPadding[len(diffs[0][1]):] + diffs[0][1]
1666
      diffs[0] = (diffs[0][0], newText)
1667
      patch.start1 -= extraLength
1668
      patch.start2 -= extraLength
1669
      patch.length1 += extraLength
1670
      patch.length2 += extraLength
1671
1672
    # Add some padding on end of last diff.
1673
    patch = patches[-1]
1674
    diffs = patch.diffs
1675
    if not diffs or diffs[-1][0] != self.DIFF_EQUAL:
1676
      # Add nullPadding equality.
1677
      diffs.append((self.DIFF_EQUAL, nullPadding))
1678
      patch.length1 += paddingLength
1679
      patch.length2 += paddingLength
1680
    elif paddingLength > len(diffs[-1][1]):
1681
      # Grow last equality.
1682
      extraLength = paddingLength - len(diffs[-1][1])
1683
      newText = diffs[-1][1] + nullPadding[:extraLength]
1684
      diffs[-1] = (diffs[-1][0], newText)
1685
      patch.length1 += extraLength
1686
      patch.length2 += extraLength
1687
1688
    return nullPadding
1689
1690
  def patch_splitMax(self, patches):
1691
    """Look through the patches and break up any which are longer than the
1692
    maximum limit of the match algorithm.
1693
    Intended to be called only from within patch_apply.
1694
1695
    Args:
1696
      patches: Array of Patch objects.
1697
    """
1698
    patch_size = self.Match_MaxBits
1699
    if patch_size == 0:
1700
      # Python has the option of not splitting strings due to its ability
1701
      # to handle integers of arbitrary precision.
1702
      return
1703
    for x in range(len(patches)):
1704
      if patches[x].length1 <= patch_size:
1705
        continue
1706
      bigpatch = patches[x]
1707
      # Remove the big old patch.
1708
      del patches[x]
1709
      x -= 1
1710
      start1 = bigpatch.start1
1711
      start2 = bigpatch.start2
1712
      precontext = ''
1713
      while len(bigpatch.diffs) != 0:
1714
        # Create one of several smaller patches.
438.1.6 by franku
run the script
1715
        patch = patch_obj()
532.1.4 by franku
fixed import of local_urls, fixed wl_markdown, replaced diff_match_patch.py with a python3 compatible version
1716
        empty = True
1717
        patch.start1 = start1 - len(precontext)
1718
        patch.start2 = start2 - len(precontext)
1719
        if precontext:
1720
          patch.length1 = patch.length2 = len(precontext)
1721
          patch.diffs.append((self.DIFF_EQUAL, precontext))
1722
1723
        while (len(bigpatch.diffs) != 0 and
1724
               patch.length1 < patch_size - self.Patch_Margin):
1725
          (diff_type, diff_text) = bigpatch.diffs[0]
1726
          if diff_type == self.DIFF_INSERT:
1727
            # Insertions are harmless.
1728
            patch.length2 += len(diff_text)
1729
            start2 += len(diff_text)
1730
            patch.diffs.append(bigpatch.diffs.pop(0))
1731
            empty = False
1732
          elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and
1733
              patch.diffs[0][0] == self.DIFF_EQUAL and
1734
              len(diff_text) > 2 * patch_size):
1735
            # This is a large deletion.  Let it pass in one chunk.
1736
            patch.length1 += len(diff_text)
1737
            start1 += len(diff_text)
1738
            empty = False
1739
            patch.diffs.append((diff_type, diff_text))
1740
            del bigpatch.diffs[0]
1741
          else:
1742
            # Deletion or equality.  Only take as much as we can stomach.
1743
            diff_text = diff_text[:patch_size - patch.length1 -
1744
                                  self.Patch_Margin]
1745
            patch.length1 += len(diff_text)
1746
            start1 += len(diff_text)
1747
            if diff_type == self.DIFF_EQUAL:
1748
              patch.length2 += len(diff_text)
1749
              start2 += len(diff_text)
1750
            else:
1751
              empty = False
1752
1753
            patch.diffs.append((diff_type, diff_text))
1754
            if diff_text == bigpatch.diffs[0][1]:
1755
              del bigpatch.diffs[0]
1756
            else:
1757
              bigpatch.diffs[0] = (bigpatch.diffs[0][0],
1758
                                   bigpatch.diffs[0][1][len(diff_text):])
1759
1760
        # Compute the head context for the next patch.
1761
        precontext = self.diff_text2(patch.diffs)
1762
        precontext = precontext[-self.Patch_Margin:]
1763
        # Append the end context for this patch.
1764
        postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin]
1765
        if postcontext:
1766
          patch.length1 += len(postcontext)
1767
          patch.length2 += len(postcontext)
1768
          if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:
1769
            patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] +
1770
                               postcontext)
1771
          else:
1772
            patch.diffs.append((self.DIFF_EQUAL, postcontext))
1773
1774
        if not empty:
1775
          x += 1
1776
          patches.insert(x, patch)
1777
1778
  def patch_toText(self, patches):
1779
    """Take a list of patches and return a textual representation.
1780
1781
    Args:
1782
      patches: Array of Patch objects.
1783
1784
    Returns:
1785
      Text representation of patches.
1786
    """
1787
    text = []
1788
    for patch in patches:
1789
      text.append(str(patch))
1790
    return "".join(text)
1791
1792
  def patch_fromText(self, textline):
1793
    """Parse a textual representation of patches and return a list of patch
1794
    objects.
1795
1796
    Args:
1797
      textline: Text representation of patches.
1798
1799
    Returns:
1800
      Array of Patch objects.
1801
1802
    Raises:
1803
      ValueError: If invalid input.
1804
    """
1805
    patches = []
1806
    if not textline:
1807
      return patches
1808
    text = textline.split('\n')
1809
    while len(text) != 0:
1810
      m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0])
1811
      if not m:
1812
        raise ValueError("Invalid patch string: " + text[0])
1813
      patch = patch_obj()
1814
      patches.append(patch)
1815
      patch.start1 = int(m.group(1))
1816
      if m.group(2) == '':
1817
        patch.start1 -= 1
1818
        patch.length1 = 1
1819
      elif m.group(2) == '0':
1820
        patch.length1 = 0
1821
      else:
1822
        patch.start1 -= 1
1823
        patch.length1 = int(m.group(2))
1824
1825
      patch.start2 = int(m.group(3))
1826
      if m.group(4) == '':
1827
        patch.start2 -= 1
1828
        patch.length2 = 1
1829
      elif m.group(4) == '0':
1830
        patch.length2 = 0
1831
      else:
1832
        patch.start2 -= 1
1833
        patch.length2 = int(m.group(4))
1834
1835
      del text[0]
1836
1837
      while len(text) != 0:
1838
        if text[0]:
1839
          sign = text[0][0]
1840
        else:
1841
          sign = ''
1842
        line = urllib.parse.unquote(text[0][1:])
1843
        if sign == '+':
1844
          # Insertion.
1845
          patch.diffs.append((self.DIFF_INSERT, line))
1846
        elif sign == '-':
1847
          # Deletion.
1848
          patch.diffs.append((self.DIFF_DELETE, line))
1849
        elif sign == ' ':
1850
          # Minor equality.
1851
          patch.diffs.append((self.DIFF_EQUAL, line))
1852
        elif sign == '@':
1853
          # Start of next patch.
1854
          break
1855
        elif sign == '':
1856
          # Blank line?  Whatever.
1857
          pass
1858
        else:
1859
          # WTF?
1860
          raise ValueError("Invalid patch mode: '%s'\n%s" % (sign, line))
1861
        del text[0]
1862
    return patches
173.2.2 by Holger Rapp
Added diff_match_patch from google, because they do not provide a setup.py
1863
1864
1865
class patch_obj:
532.1.4 by franku
fixed import of local_urls, fixed wl_markdown, replaced diff_match_patch.py with a python3 compatible version
1866
  """Class representing one patch operation.
1867
  """
1868
1869
  def __init__(self):
1870
    """Initializes with an empty list of diffs.
1871
    """
1872
    self.diffs = []
1873
    self.start1 = None
1874
    self.start2 = None
1875
    self.length1 = 0
1876
    self.length2 = 0
1877
1878
  def __str__(self):
1879
    """Emulate GNU diff's format.
1880
    Header: @@ -382,8 +481,9 @@
1881
    Indices are printed as 1-based, not 0-based.
1882
1883
    Returns:
1884
      The GNU diff string.
1885
    """
1886
    if self.length1 == 0:
1887
      coords1 = str(self.start1) + ",0"
1888
    elif self.length1 == 1:
1889
      coords1 = str(self.start1 + 1)
1890
    else:
1891
      coords1 = str(self.start1 + 1) + "," + str(self.length1)
1892
    if self.length2 == 0:
1893
      coords2 = str(self.start2) + ",0"
1894
    elif self.length2 == 1:
1895
      coords2 = str(self.start2 + 1)
1896
    else:
1897
      coords2 = str(self.start2 + 1) + "," + str(self.length2)
1898
    text = ["@@ -", coords1, " +", coords2, " @@\n"]
1899
    # Escape the body of the patch with %xx notation.
1900
    for (op, data) in self.diffs:
1901
      if op == diff_match_patch.DIFF_INSERT:
1902
        text.append("+")
1903
      elif op == diff_match_patch.DIFF_DELETE:
1904
        text.append("-")
1905
      elif op == diff_match_patch.DIFF_EQUAL:
1906
        text.append(" ")
1907
      # High ascii will raise UnicodeDecodeError.  Use Unicode instead.
1908
      data = data.encode("utf-8")
1909
      text.append(urllib.parse.quote(data, "!~*'();/?:@&=+$,# ") + "\n")
1910
    return "".join(text)