2
This code is derived from jgit (http://eclipse.org/jgit).
3
Copyright owners are documented in jgit's IP log.
5
This program and the accompanying materials are made available
6
under the terms of the Eclipse Distribution License v1.0 which
7
accompanies this distribution, is reproduced below, and is
8
available at http://www.eclipse.org/org/documents/edl-v10.php
12
Redistribution and use in source and binary forms, with or
13
without modification, are permitted provided that the following
16
- Redistributions of source code must retain the above copyright
17
notice, this list of conditions and the following disclaimer.
19
- Redistributions in binary form must reproduce the above
20
copyright notice, this list of conditions and the following
21
disclaimer in the documentation and/or other materials provided
22
with the distribution.
24
- Neither the name of the Eclipse Foundation, Inc. nor the
25
names of its contributors may be used to endorse or promote
26
products derived from this software without specific prior
29
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53
/// Diff algorithm, based on "An O(ND) Difference Algorithm and its
54
/// Variations", by Eugene Myers.
57
/// Diff algorithm, based on "An O(ND) Difference Algorithm and its
58
/// Variations", by Eugene Myers.
59
/// The basic idea is to put the line numbers of text A as columns ("x") and the
60
/// lines of text B as rows ("y"). Now you try to find the shortest "edit path"
61
/// from the upper left corner to the lower right corner, where you can
62
/// always go horizontally or vertically, but diagonally from (x,y) to
63
/// (x+1,y+1) only if line x in text A is identical to line y in text B.
64
/// Myers' fundamental concept is the "furthest reaching D-path on diagonal k":
65
/// a D-path is an edit path starting at the upper left corner and containing
66
/// exactly D non-diagonal elements ("differences"). The furthest reaching
67
/// D-path on diagonal k is the one that contains the most (diagonal) elements
68
/// which ends on diagonal k (where k = y - x).
70
/// H E L L O W O R L D
75
/// Since every D-path has exactly D horizontal or vertical elements, it can
76
/// only end on the diagonals -D, -D+2, ..., D-2, D.
77
/// Since every furthest reaching D-path contains at least one furthest
78
/// reaching (D-1)-path (except for D=0), we can construct them recursively.
79
/// Since we are really interested in the shortest edit path, we can start
80
/// looking for a 0-path, then a 1-path, and so on, until we find a path that
81
/// ends in the lower right corner.
82
/// To save space, we do not need to store all paths (which has quadratic space
83
/// requirements), but generate the D-paths simultaneously from both sides.
84
/// When the ends meet, we will have found "the middle" of the path. From the
85
/// end points of that diagonal part, we can generate the rest recursively.
86
/// This only requires linear space.
87
/// The overall (runtime) complexity is
88
/// O(N * D^2 + 2 * N/2 * (D/2)^2 + 4 * N/4 * (D/4)^2 + ...)
89
/// = O(N * D^2 * 5 / 4) = O(N * D^2),
90
/// (With each step, we have to find the middle parts of twice as many regions
91
/// as before, but the regions (as well as the D) are halved.)
92
/// So the overall runtime complexity stays the same with linear space,
93
/// albeit with a larger constant factor.
96
public class MyersDiff<S> where S:Sequence
98
private sealed class _LowLevelDiffAlgorithm_110 : LowLevelDiffAlgorithm
100
public _LowLevelDiffAlgorithm_110()
104
public override void DiffNonCommon<S>(EditList edits, HashedSequenceComparator<S>
105
cmp, HashedSequence<S> a, HashedSequence<S> b, Edit region)
107
new NGit.Diff.MyersDiff<S>(edits, cmp, a, b, region);
111
/// <summary>Singleton instance of MyersDiff.</summary>
112
/// <remarks>Singleton instance of MyersDiff.</remarks>
113
public static readonly DiffAlgorithm INSTANCE = new _LowLevelDiffAlgorithm_110();
116
/// The list of edits found during the last call to
117
/// <see cref="MyersDiff{S}.CalculateEdits(Edit)">MyersDiff<S>.CalculateEdits(Edit)
120
protected internal EditList edits;
122
/// <summary>Comparison function for sequences.</summary>
123
/// <remarks>Comparison function for sequences.</remarks>
124
protected internal HashedSequenceComparator<S> cmp;
126
/// <summary>The first text to be compared.</summary>
127
/// <remarks>The first text to be compared. Referred to as "Text A" in the comments</remarks>
128
protected internal HashedSequence<S> a;
130
/// <summary>The second text to be compared.</summary>
131
/// <remarks>The second text to be compared. Referred to as "Text B" in the comments</remarks>
132
protected internal HashedSequence<S> b;
134
private MyersDiff(EditList edits, HashedSequenceComparator<S> cmp, HashedSequence
135
<S> a, HashedSequence<S> b, Edit region)
137
middle = new MyersDiff<S>.MiddleEdit(this);
142
CalculateEdits(region);
145
internal MyersDiff<S>.MiddleEdit middle;
147
// TODO: use ThreadLocal for future multi-threaded operations
148
/// <summary>Entrypoint into the algorithm this class is all about.</summary>
150
/// Entrypoint into the algorithm this class is all about. This method triggers that the
151
/// differences between A and B are calculated in form of a list of edits.
153
/// <param name="r">portion of the sequences to examine.</param>
154
private void CalculateEdits(Edit r)
156
middle.Initialize(r.beginA, r.endA, r.beginB, r.endB);
157
if (middle.beginA >= middle.endA && middle.beginB >= middle.endB)
161
CalculateEdits(middle.beginA, middle.endA, middle.beginB, middle.endB);
164
/// <summary>Calculates the differences between a given part of A against another given part of B
166
/// <param name="beginA">start of the part of A which should be compared (0<=beginA<sizeof(A))
168
/// <param name="endA">end of the part of A which should be compared (beginA<=endA<sizeof(A))
170
/// <param name="beginB">start of the part of B which should be compared (0<=beginB<sizeof(B))
172
/// <param name="endB">end of the part of B which should be compared (beginB<=endB<sizeof(B))
174
protected internal virtual void CalculateEdits(int beginA, int endA, int beginB,
177
Edit edit = middle.Calculate(beginA, endA, beginB, endB);
178
if (beginA < edit.beginA || beginB < edit.beginB)
180
int k = edit.beginB - edit.beginA;
181
int x = middle.backward.Snake(k, edit.beginA);
182
CalculateEdits(beginA, x, beginB, k + x);
184
if (edit.GetType() != Edit.Type.EMPTY)
186
edits.Add(edits.Count, edit);
189
if (endA > edit.endA || endB > edit.endB)
191
int k = edit.endB - edit.endA;
192
int x = middle.forward.Snake(k, edit.endA);
193
CalculateEdits(x, endA, k + x, endB);
198
/// A class to help bisecting the sequences a and b to find minimal
202
/// A class to help bisecting the sequences a and b to find minimal
204
/// As the arrays are reused for space efficiency, you will need one
205
/// instance per thread.
206
/// The entry function is the calculate() method.
208
internal class MiddleEdit
210
internal virtual void Initialize(int beginA, int endA, int beginB, int endB)
212
this.beginA = beginA;
214
this.beginB = beginB;
216
// strip common parts on either end
217
int k = beginB - beginA;
218
this.beginA = this.forward.Snake(k, beginA);
219
this.beginB = k + this.beginA;
221
this.endA = this.backward.Snake(k, endA);
222
this.endB = k + this.endA;
225
// TODO: measure speed impact when this is synchronized
226
internal virtual Edit Calculate(int beginA, int endA, int beginB, int endB)
228
if (beginA == endA || beginB == endB)
230
return new Edit(beginA, endA, beginB, endB);
232
this.beginA = beginA;
234
this.beginB = beginB;
236
int minK = beginB - endA;
237
int maxK = endB - beginA;
238
this.forward.Initialize(beginB - beginA, beginA, minK, maxK);
239
this.backward.Initialize(endB - endA, endA, minK, maxK);
240
for (int d = 1; ; d++)
242
if (this.forward.Calculate(d) || this.backward.Calculate(d))
249
internal MyersDiff<S>.MiddleEdit.EditPaths forward;
251
internal MyersDiff<S>.MiddleEdit.EditPaths backward;
253
protected internal int beginA;
255
protected internal int endA;
257
protected internal int beginB;
259
protected internal int endB;
261
protected internal Edit edit;
263
internal abstract class EditPaths
265
private IntList x = new IntList();
267
private LongList snake = new LongList();
273
internal int middleK;
275
internal int prevBeginK;
277
internal int prevEndK;
283
// TODO: better explanation
284
internal int GetIndex(int d, int k)
287
if (((d + k - this.middleK) % 2) != 0)
289
throw new RuntimeException(MessageFormat.Format(JGitText.Get().unexpectedOddResult
290
, d, k, this.middleK));
292
return (d + k - this.middleK) / 2;
295
internal int GetX(int d, int k)
298
if (k < this.beginK || k > this.endK)
300
throw new RuntimeException(MessageFormat.Format(JGitText.Get().kNotInRange, k, this
301
.beginK, this.endK));
303
return this.x.Get(this.GetIndex(d, k));
306
internal long GetSnake(int d, int k)
309
if (k < this.beginK || k > this.endK)
311
throw new RuntimeException(MessageFormat.Format(JGitText.Get().kNotInRange, k, this
312
.beginK, this.endK));
314
return this.snake.Get(this.GetIndex(d, k));
317
private int ForceKIntoRange(int k)
321
return this.minK + ((k ^ this.minK) & 1);
327
return this.maxK - ((k ^ this.maxK) & 1);
333
internal virtual void Initialize(int k, int x, int minK, int maxK)
337
this.beginK = this.endK = this.middleK = k;
341
this.snake.Add(this.NewSnake(k, x));
344
internal abstract int Snake(int k, int x);
346
internal abstract int GetLeft(int x);
348
internal abstract int GetRight(int x);
350
internal abstract bool IsBetter(int left, int right);
352
internal abstract void AdjustMinMaxK(int k, int x);
354
internal abstract bool Meets(int d, int k, int x, long snake);
356
internal long NewSnake(int k, int x)
359
long ret = ((long)x) << 32;
363
internal int Snake2x(long snake)
365
return (int)((long)(((ulong)snake) >> 32));
368
internal int Snake2y(long snake)
373
internal bool MakeEdit(long snake1, long snake2)
375
int x1 = this.Snake2x(snake1);
376
int x2 = this.Snake2x(snake2);
377
int y1 = this.Snake2y(snake1);
378
int y2 = this.Snake2y(snake2);
379
if (x1 > x2 || y1 > y2)
384
this._enclosing.edit = new Edit(x1, x2, y1, y2);
388
internal virtual bool Calculate(int d)
390
this.prevBeginK = this.beginK;
391
this.prevEndK = this.endK;
392
this.beginK = this.ForceKIntoRange(this.middleK - d);
393
this.endK = this.ForceKIntoRange(this.middleK + d);
394
// TODO: handle i more efficiently
395
// TODO: walk snake(k, getX(d, k)) only once per (d, k)
396
// TODO: move end points out of the loop to avoid conditionals inside the loop
397
// go backwards so that we can avoid temp vars
398
for (int k = this.endK; k >= this.beginK; k -= 2)
402
long leftSnake = -1L;
403
long rightSnake = -1L;
404
// TODO: refactor into its own function
405
if (k > this.prevBeginK)
407
int i = this.GetIndex(d - 1, k - 1);
408
left = this.x.Get(i);
409
int end = this.Snake(k - 1, left);
410
leftSnake = left != end ? this.NewSnake(k - 1, end) : this.snake.Get(i);
411
if (this.Meets(d, k - 1, end, leftSnake))
415
left = this.GetLeft(end);
417
if (k < this.prevEndK)
419
int i = this.GetIndex(d - 1, k + 1);
420
right = this.x.Get(i);
421
int end = this.Snake(k + 1, right);
422
rightSnake = right != end ? this.NewSnake(k + 1, end) : this.snake.Get(i);
423
if (this.Meets(d, k + 1, end, rightSnake))
427
right = this.GetRight(end);
431
if (k >= this.prevEndK || (k > this.prevBeginK && this.IsBetter(left, right)))
434
newSnake = leftSnake;
439
newSnake = rightSnake;
441
if (this.Meets(d, k, newX, newSnake))
445
this.AdjustMinMaxK(k, newX);
446
int i_1 = this.GetIndex(d, k);
447
this.x.Set(i_1, newX);
448
this.snake.Set(i_1, newSnake);
453
internal EditPaths(MiddleEdit _enclosing)
455
this._enclosing = _enclosing;
458
private readonly MiddleEdit _enclosing;
461
internal class ForwardEditPaths : MyersDiff<S>.MiddleEdit.EditPaths
463
internal sealed override int Snake(int k, int x)
465
for (; x < this._enclosing.endA && k + x < this._enclosing.endB; x++)
467
if (!this._enclosing._enclosing.cmp.Equals(this._enclosing._enclosing.a, x, this.
468
_enclosing._enclosing.b, k + x))
476
internal sealed override int GetLeft(int x)
481
internal sealed override int GetRight(int x)
486
internal sealed override bool IsBetter(int left, int right)
491
internal sealed override void AdjustMinMaxK(int k, int x)
493
if (x >= this._enclosing.endA || k + x >= this._enclosing.endB)
495
if (k > this._enclosing.backward.middleK)
506
internal sealed override bool Meets(int d, int k, int x, long snake)
508
if (k < this._enclosing.backward.beginK || k > this._enclosing.backward.endK)
512
// TODO: move out of loop
513
if (((d - 1 + k - this._enclosing.backward.middleK) % 2) != 0)
517
if (x < this._enclosing.backward.GetX(d - 1, k))
521
this.MakeEdit(snake, this._enclosing.backward.GetSnake(d - 1, k));
525
internal ForwardEditPaths(MiddleEdit _enclosing) : base(_enclosing)
527
this._enclosing = _enclosing;
530
private readonly MiddleEdit _enclosing;
533
internal class BackwardEditPaths : MyersDiff<S>.MiddleEdit.EditPaths
535
internal sealed override int Snake(int k, int x)
537
for (; x > this._enclosing.beginA && k + x > this._enclosing.beginB; x--)
539
if (!this._enclosing._enclosing.cmp.Equals(this._enclosing._enclosing.a, x - 1, this
540
._enclosing._enclosing.b, k + x - 1))
548
internal sealed override int GetLeft(int x)
553
internal sealed override int GetRight(int x)
558
internal sealed override bool IsBetter(int left, int right)
563
internal sealed override void AdjustMinMaxK(int k, int x)
565
if (x <= this._enclosing.beginA || k + x <= this._enclosing.beginB)
567
if (k > this._enclosing.forward.middleK)
578
internal sealed override bool Meets(int d, int k, int x, long snake)
580
if (k < this._enclosing.forward.beginK || k > this._enclosing.forward.endK)
584
// TODO: move out of loop
585
if (((d + k - this._enclosing.forward.middleK) % 2) != 0)
589
if (x > this._enclosing.forward.GetX(d, k))
593
this.MakeEdit(this._enclosing.forward.GetSnake(d, k), snake);
597
internal BackwardEditPaths(MiddleEdit _enclosing) : base(_enclosing)
599
this._enclosing = _enclosing;
602
private readonly MiddleEdit _enclosing;
605
public MiddleEdit(MyersDiff<S> _enclosing)
607
this._enclosing = _enclosing;
608
forward = new MyersDiff<S>.MiddleEdit.ForwardEditPaths(this);
609
backward = new MyersDiff<S>.MiddleEdit.BackwardEditPaths(this);
612
private readonly MyersDiff<S> _enclosing;
615
/// <param name="args">two filenames specifying the contents to be diffed</param>
616
public static void Main(string[] args)
618
if (args.Length != 2)
620
System.Console.Error.WriteLine(JGitText.Get().need2Arguments);
621
System.Environment.Exit(1);
625
RawText a = new RawText(new FilePath(args[0]));
626
RawText b = new RawText(new FilePath(args[1]));
627
EditList r = INSTANCE.Diff(RawTextComparator.DEFAULT, a, b);
628
System.Console.Out.WriteLine(r.ToString());
632
Sharpen.Runtime.PrintStackTrace(e);