2
* Copyright (C) 2008, Johannes E. Schindelin <johannes.schindelin@gmx.de>
3
* Copyright (C) 2009, Gil Ran <gilrun@gmail.com>
7
* Redistribution and use in source and binary forms, with or
8
* without modification, are permitted provided that the following
11
* - Redistributions of source code must retain the above copyright
12
* notice, this list of conditions and the following disclaimer.
14
* - Redistributions in binary form must reproduce the above
15
* copyright notice, this list of conditions and the following
16
* disclaimer in the documentation and/or other materials provided
17
* with the distribution.
19
* - Neither the name of the Git Development Community nor the
20
* names of its contributors may be used to endorse or promote
21
* products derived from this software without specific prior
24
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
25
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
26
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
29
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
31
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
32
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
34
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
36
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40
using System.Collections.Generic;
41
using ICSharpCode.SharpDevelop.Editor;
43
namespace ICSharpCode.AvalonEdit.AddIn.MyersDiff
46
/// Diff algorithm, based on "An O(ND) Difference Algorithm and its
47
/// Variations", by Eugene Myers.
49
/// The basic idea is to put the line numbers of text A as columns ("x") and the
50
/// lines of text B as rows ("y"). Now you try to find the shortest "edit path"
51
/// from the upper left corner to the lower right corner, where you can
52
/// always go horizontally or vertically, but diagonally from (x,y) to
53
/// (x+1,y+1) only if line x in text A is identical to line y in text B.
55
/// Myers' fundamental concept is the "furthest reaching D-path on diagonal k":
56
/// a D-path is an edit path starting at the upper left corner and containing
57
/// exactly D non-diagonal elements ("differences"). The furthest reaching
58
/// D-path on diagonal k is the one that contains the most (diagonal) elements
59
/// which ends on diagonal k (where k = y - x).
63
/// H E L L O W O R L D
69
/// Since every D-path has exactly D horizontal or vertical elements, it can
70
/// only end on the diagonals -D, -D+2, ..., D-2, D.
72
/// Since every furthest reaching D-path contains at least one furthest
73
/// reaching (D-1)-path (except for D=0), we can construct them recursively.
75
/// Since we are really interested in the shortest edit path, we can start
76
/// looking for a 0-path, then a 1-path, and so on, until we find a path that
77
/// ends in the lower right corner.
79
/// To save space, we do not need to store all paths (which has quadratic space
80
/// requirements), but generate the D-paths simultaneously from both sides.
81
/// When the ends meet, we will have found "the middle" of the path. From the
82
/// end points of that diagonal part, we can generate the rest recursively.
84
/// This only requires linear space.
86
/// The overall (runtime) complexity is
88
/// O(N * D^2 + 2 * N/2 * (D/2)^2 + 4 * N/4 * (D/4)^2 + ...)
89
/// = O(N * D^2 * 5 / 4) = O(N * D^2),
91
/// (With each step, we have to find the middle parts of twice as many regions
92
/// as before, but the regions (as well as the D) are halved.)
94
/// So the overall runtime complexity stays the same with linear space,
95
/// albeit with a larger constant factor.
97
public class MyersDiff
100
/// The list of edits found during the last call to <see cref="calculateEdits()"/>
102
protected List<Edit> edits;
105
/// The first text to be compared. Referred to as "Text A" in the comments
107
protected ISequence a;
110
/// The second text to be compared. Referred to as "Text B" in the comments
112
protected ISequence b;
115
/// The only constructor
117
/// <param name="a">the text A which should be compared</param>
118
/// <param name="b">the text B which should be compared</param>
119
public MyersDiff(ISequence a, ISequence b)
123
middle = new MiddleEdit(a, b);
127
/// <returns>the list of edits found during the last call to {@link #calculateEdits()}</returns>
128
public List<Edit> GetEdits()
133
// TODO: use ThreadLocal for future multi-threaded operations
137
/// Entrypoint into the algorithm this class is all about. This method triggers that the
138
/// differences between A and B are calculated in form of a list of edits.
140
protected void CalculateEdits()
142
edits = new List<Edit>();
144
middle.Initialize(0, a.Size(), 0, b.Size());
145
if (middle.beginA >= middle.endA &&
146
middle.beginB >= middle.endB)
149
CalculateEdits(middle.beginA, middle.endA,
150
middle.beginB, middle.endB);
154
/// Calculates the differences between a given part of A against another given part of B
156
/// <param name="beginA">start of the part of A which should be compared (0<=beginA<sizeof(A))</param>
157
/// <param name="endA">end of the part of A which should be compared (beginA<=endA<sizeof(A))</param>
158
/// <param name="beginB">start of the part of B which should be compared (0<=beginB<sizeof(B))</param>
159
/// <param name="endB">end of the part of B which should be compared (beginB<=endB<sizeof(B))</param>
160
protected void CalculateEdits(int beginA, int endA,
161
int beginB, int endB)
163
Edit edit = middle.Calculate(beginA, endA, beginB, endB);
165
if (beginA < edit.BeginA || beginB < edit.BeginB)
167
int k = edit.BeginB - edit.BeginA;
168
int x = middle.backward.Snake(k, edit.BeginA);
169
CalculateEdits(beginA, x, beginB, k + x);
172
if (edit.EditType != ChangeType.None)
177
if (endA > edit.EndA || endB > edit.EndB)
179
int k = edit.EndB - edit.EndA;
180
int x = middle.forward.Snake(k, edit.EndA);
181
CalculateEdits(x, endA, k + x, endB);
186
/// A class to help bisecting the sequences a and b to find minimal
189
/// As the arrays are reused for space efficiency, you will need one
190
/// instance per thread.
192
/// The entry function is the calculate() method.
196
private readonly ISequence _a;
197
private readonly ISequence _b;
199
public MiddleEdit(ISequence a, ISequence b)
203
forward = new ForwardEditPaths(this);
204
backward = new BackwardEditPaths(this);
207
public void Initialize(int beginA, int endA, int beginB, int endB)
209
this.beginA = beginA; this.endA = endA;
210
this.beginB = beginB; this.endB = endB;
212
// strip common parts on either end
213
int k = beginB - beginA;
214
this.beginA = forward.Snake(k, beginA);
215
this.beginB = k + this.beginA;
218
this.endA = backward.Snake(k, endA);
219
this.endB = k + this.endA;
223
/// This function calculates the "middle" Edit of the shortest
224
/// edit path between the given subsequences of a and b.
226
/// Once a forward path and a backward path meet, we found the
227
/// middle part. From the last snake end point on both of them,
228
/// we construct the Edit.
230
/// It is assumed that there is at least one edit in the range.
232
// TODO: measure speed impact when this is synchronized
233
public Edit Calculate(int beginA, int endA, int beginB, int endB)
235
if (beginA == endA || beginB == endB)
236
return new Edit(beginA, endA, beginB, endB);
237
this.beginA = beginA; this.endA = endA;
238
this.beginB = beginB; this.endB = endB;
241
* Following the conventions in Myers' paper, "k" is
242
* the difference between the index into "b" and the
245
int minK = beginB - endA;
246
int maxK = endB - beginA;
248
forward.Initialize(beginB - beginA, beginA, minK, maxK);
249
backward.Initialize(endB - endA, endA, minK, maxK);
251
for (int d = 1; ; d++)
252
if (forward.Calculate(d) ||
253
backward.Calculate(d))
260
* For each d, we need to hold the d-paths for the diagonals
261
* k = -d, -d + 2, ..., d - 2, d. These are stored in the
262
* forward (and backward) array.
264
* As we allow subsequences, too, this needs some refinement:
265
* the forward paths start on the diagonal forwardK =
266
* beginB - beginA, and backward paths start on the diagonal
267
* backwardK = endB - endA.
269
* So, we need to hold the forward d-paths for the diagonals
270
* k = forwardK - d, forwardK - d + 2, ..., forwardK + d and
271
* the analogue for the backward d-paths. This means that
272
* we can turn (k, d) into the forward array index using this
275
* i = (d + k - forwardK) / 2
277
* There is a further complication: the edit paths should not
278
* leave the specified subsequences, so k is bounded by
279
* minK = beginB - endA and maxK = endB - beginA. However,
280
* (k - forwardK) _must_ be odd whenever d is odd, and it
281
* _must_ be even when d is even.
283
* The values in the "forward" and "backward" arrays are
284
* positions ("x") in the sequence a, to get the corresponding
285
* positions ("y") in the sequence b, you have to calculate
286
* the appropriate k and then y:
288
* k = forwardK - d + i * 2
291
* (substitute backwardK for forwardK if you want to get the
292
* y position for an entry in the "backward" array.
294
public EditPaths forward;
295
public EditPaths backward;
297
/* Some variables which are shared between methods */
302
protected Edit _edit;
304
internal abstract class EditPaths
306
protected readonly MiddleEdit _middleEdit;
307
private List<int> x = new List<int>();
308
private List<long> _snake = new List<long>();
312
int prevBeginK, prevEndK;
313
/* if we hit one end early, no need to look further */
314
protected int minK, maxK; // TODO: better explanation
316
protected EditPaths(MiddleEdit middleEdit)
318
_middleEdit = middleEdit;
321
int GetIndex(int d, int k)
324
if (((d + k - middleK) % 2) == 1)
325
throw new InvalidOperationException("odd: " + d + " + " + k + " - " + middleK);
326
return (d + k - middleK) / 2;
329
public int GetX(int d, int k)
332
if (k < beginK || k > endK)
333
throw new InvalidOperationException("k " + k + " not in " + beginK + " - " + endK);
334
return x[GetIndex(d, k)];
337
public long GetSnake(int d, int k)
340
if (k < beginK || k > endK)
341
throw new InvalidOperationException("k " + k + " not in " + beginK + " - " + endK);
342
return _snake[GetIndex(d, k)];
345
private int ForceKIntoRange(int k)
347
/* if k is odd, so must be the result */
349
return minK + ((k ^ minK) & 1);
351
return maxK - ((k ^ maxK) & 1);
355
public void Initialize(int k, int x, int minK, int maxK)
359
beginK = endK = middleK = k;
363
_snake.Add(NewSnake(k, x));
366
public abstract int Snake(int k, int x);
367
protected abstract int GetLeft(int x);
368
protected abstract int GetRight(int x);
369
protected abstract bool IsBetter(int left, int right);
370
protected abstract void AdjustMinMaxK(int k, int x);
371
protected abstract bool Meets(int d, int k, int x, long snake);
373
long NewSnake(int k, int x)
376
long ret = ((long)x) << 32;
380
int Snake2x(long snake)
382
return (int)((ulong)snake >> 32);
385
int Snake2y(long snake)
387
return (int)(((ulong)snake << 32) >> 32);
390
protected bool MakeEdit(long snake1, long snake2)
392
int x1 = Snake2x(snake1), x2 = Snake2x(snake2);
393
int y1 = Snake2y(snake1), y2 = Snake2y(snake2);
396
* Check for incompatible partial edit paths:
397
* when there are ambiguities, we might have
398
* hit incompatible (i.e. non-overlapping)
399
* forward/backward paths.
401
* In that case, just pretend that we have
402
* an empty edit at the end of one snake; this
403
* will force a decision which path to take
404
* in the next recursion step.
406
if (x1 > x2 || y1 > y2)
411
_middleEdit._edit = new Edit(x1, x2, y1, y2);
415
public bool Calculate(int d)
419
beginK = ForceKIntoRange(middleK - d);
420
endK = ForceKIntoRange(middleK + d);
421
// TODO: handle i more efficiently
422
// TODO: walk snake(k, getX(d, k)) only once per (d, k)
423
// TODO: move end points out of the loop to avoid conditionals inside the loop
424
// go backwards so that we can avoid temp vars
425
for (int k = endK; k >= beginK; k -= 2)
427
int left = -1, right = -1;
428
long leftSnake = -1L, rightSnake = -1L;
429
// TODO: refactor into its own function
433
i = GetIndex(d - 1, k - 1);
435
int end = Snake(k - 1, left);
436
leftSnake = left != end ?
437
NewSnake(k - 1, end) :
440
if (Meets(d, k - 1, end, leftSnake))
446
i = GetIndex(d - 1, k + 1);
448
int end = Snake(k + 1, right);
449
rightSnake = right != end ?
450
NewSnake(k + 1, end) :
453
if (Meets(d, k + 1, end, rightSnake))
455
right = GetRight(end);
461
IsBetter(left, right)))
464
newSnakeTmp = leftSnake;
469
newSnakeTmp = rightSnake;
472
if (Meets(d, k, newX, newSnakeTmp))
474
AdjustMinMaxK(k, newX);
477
_snake.Set(i, newSnakeTmp);
483
class ForwardEditPaths : EditPaths
485
public ForwardEditPaths(MiddleEdit middleEdit)
490
public override int Snake(int k, int x)
492
for (; x < _middleEdit.endA && k + x < _middleEdit.endB; x++)
493
if (!_middleEdit._a.Equals(x, _middleEdit._b, k + x))
498
protected override int GetLeft(int x)
503
protected override int GetRight(int x)
508
protected override bool IsBetter(int left, int right)
513
protected override void AdjustMinMaxK(int k, int x)
515
if (x >= _middleEdit.endA || k + x >= _middleEdit.endB)
517
if (k > _middleEdit.backward.middleK)
524
protected override bool Meets(int d, int k, int x, long snake)
526
if (k < _middleEdit.backward.beginK || k > _middleEdit.backward.endK)
528
// TODO: move out of loop
529
if (((d - 1 + k - _middleEdit.backward.middleK) % 2) == 1)
531
if (x < _middleEdit.backward.GetX(d - 1, k))
533
MakeEdit(snake, _middleEdit.backward.GetSnake(d - 1, k));
538
class BackwardEditPaths : EditPaths
540
public BackwardEditPaths(MiddleEdit middleEdit)
545
public override int Snake(int k, int x)
547
for (; x > _middleEdit.beginA && k + x > _middleEdit.beginB; x--)
548
if (!_middleEdit._a.Equals(x - 1, _middleEdit._b, k + x - 1))
553
protected override int GetLeft(int x)
558
protected override int GetRight(int x)
563
protected override bool IsBetter(int left, int right)
568
protected override void AdjustMinMaxK(int k, int x)
570
if (x <= _middleEdit.beginA || k + x <= _middleEdit.beginB)
572
if (k > _middleEdit.forward.middleK)
579
protected override bool Meets(int d, int k, int x, long snake)
581
if (k < _middleEdit.forward.beginK || k > _middleEdit.forward.endK)
583
// TODO: move out of loop
584
if (((d + k - _middleEdit.forward.middleK) % 2) == 1)
586
if (x > _middleEdit.forward.GetX(d, k))
588
MakeEdit(_middleEdit.forward.GetSnake(d, k), snake);