3
* Based on Tye McQueen's Algorithm::Diff Perl module version 1.19_01
4
* Converted to C# by Joshua Tauberer <tauberer@for.net>
6
* The Perl module's copyright notice:
7
* Parts Copyright (c) 2000-2004 Ned Konz. All rights reserved.
8
* Parts by Tye McQueen.
10
* The Perl module's readme has a ridiculously long list of
11
* thanks for all of the previous authors, who are:
12
* Mario Wolczko (author of SmallTalk code the module is based on)
19
* The Perl module was released under the Perl Artistic License,
20
* and I leave my additions in the public domain, so I leave
21
* it up to you to figure out what you need to do if you want
22
* to distribute this file in some form.
26
using System.Collections;
27
using System.Collections.Generic;
30
namespace MonoDevelop.Components.Diff {
31
public interface IDiff : IEnumerable {
36
public abstract class Hunk {
39
public abstract int ChangedLists { get; }
41
public abstract bool Same { get; }
42
public abstract bool Conflict { get; }
44
public abstract bool IsSame(int index);
46
public abstract Range Original();
47
public abstract Range Changes(int index);
49
public int MaxLines() {
50
int m = Original().Count;
51
for (int i = 0; i < ChangedLists; i++)
52
if (Changes(i).Count > m)
58
public class Diff : IDiff {
59
internal IList left, right;
61
IEqualityComparer hashcoder;
63
public IList Left { get { return left; } }
64
public IList Right { get { return right; } }
69
public Trio(Trio a, int b, int c) {
76
public class Hunk : MonoDevelop.Components.Diff.Hunk {
78
int s1start, s1end, s2start, s2end;
81
internal Hunk(IList left, IList right, int s1start, int s1end, int s2start, int s2end, bool same) {
84
this.s1start = s1start;
86
this.s2start = s2start;
91
internal void SetLists(IList left, IList right) {
96
public override int ChangedLists { get { return 1; } }
98
public override bool Same { get { return same; } }
100
public override bool Conflict { get { return false; } }
102
public override bool IsSame(int index) {
103
if (index != 0) throw new ArgumentException();
107
private Range get(int seq) {
108
int start = (seq==1?s1start:s2start);
109
int end = (seq==1?s1end:s2end);
110
IList list = (seq==1?left:right);
111
if (end < start) return new Range(list, start, 0);
112
return new Range(list, start, end-start+1);
115
public Range Left { get { return get(1); } }
116
public Range Right { get { return get(2); } }
118
public override Range Original() { return Left; }
119
public override Range Changes(int index) {
120
if (index != 0) throw new ArgumentException();
124
public override int GetHashCode() {
125
return unchecked(s1start + s1end + s2start + s2end);
128
public override bool Equals(object o) {
131
s1start == h.s1start &&
132
s1start == h.s1end &&
133
s1start == h.s2start &&
134
s1start == h.s2end &&
138
public override string ToString() {
139
if (left == null || right == null)
140
return base.ToString();
144
public string DiffString() {
145
if (left == null || right == null)
146
throw new InvalidOperationException("This hunk is based on a patch which does not have the compared data.");
148
StringBuilder ret = new StringBuilder();
151
foreach (object item in Left) {
153
ret.Append(item.ToString());
154
ret.Append(Environment.NewLine);
157
foreach (object item in Left) {
159
ret.Append(item.ToString());
160
ret.Append(Environment.NewLine);
162
foreach (object item in Right) {
164
ret.Append(item.ToString());
165
ret.Append(Environment.NewLine);
169
return ret.ToString();
172
internal Hunk Crop(int shiftstart, int shiftend) {
173
return new Diff.Hunk(left, right, Left.Start+shiftstart, Left.End-shiftend, Right.Start+shiftstart, Right.End-shiftend, same);
176
internal Hunk Reverse() {
177
return new Diff.Hunk(right, left, Right.Start, Right.End, Left.Start, Left.End, same);
181
public Diff(IList left, IList right, IComparer comparer, IEqualityComparer hashcoder) {
184
this.comparer = comparer;
185
this.hashcoder = hashcoder;
189
public Diff(string leftFile, string rightFile, bool caseSensitive, bool compareWhitespace)
190
: this(UnifiedDiff.LoadFileLines(leftFile), UnifiedDiff.LoadFileLines(rightFile), caseSensitive, compareWhitespace) {
193
public Diff(string[] left, string[] right, bool caseSensitive, bool compareWhitespace)
195
StripWhitespace(left, !compareWhitespace),
196
StripWhitespace(right, !compareWhitespace),
197
caseSensitive ? (IComparer)Comparer.Default : (IComparer)CaseInsensitiveComparer.Default,
198
caseSensitive ? null : StringComparer.InvariantCultureIgnoreCase
202
////////////////////////////////////////////////////////////
205
private static string[] StripWhitespace(string[] lines, bool strip) {
206
if (!strip) return lines;
207
string[] ret = new string[lines.Length];
208
for (int i = 0; i < lines.Length; i++) {
209
StringBuilder sb = new StringBuilder();
210
foreach (char c in lines[i])
211
if (!char.IsWhiteSpace(c))
213
ret[i] = sb.ToString();
218
////////////////////////////////////////////////////////////
220
IEnumerator IEnumerable.GetEnumerator() {
222
throw new InvalidOperationException("No comparison has been performed.");
223
return new Enumerator(this);
226
public override string ToString() {
227
System.IO.StringWriter w = new System.IO.StringWriter();
228
UnifiedDiff.WriteUnifiedDiff(this, w);
232
public Patch CreatePatch() {
234
foreach (Hunk hunk in this)
236
ctr += hunk.Right.Count;
238
object[] rightData = new object[ctr];
240
ArrayList hunks = new ArrayList();
242
foreach (Hunk hunk in this) {
244
hunks.Add(new Patch.Hunk(rightData, hunk.Left.Start, hunk.Left.Count, 0, 0, true));
246
hunks.Add(new Patch.Hunk(rightData, hunk.Left.Start, hunk.Left.Count, ctr, hunk.Right.Count, false));
247
for (int i = 0; i < hunk.Right.Count; i++)
248
rightData[ctr++] = hunk.Right[i];
253
return new Patch((Patch.Hunk[])hunks.ToArray(typeof(Hunk)));
257
# McIlroy-Hunt diff algorithm
258
# Adapted from the Smalltalk code of Mario I. Wolczko, <mario@wolczko.com>
259
# by Ned Konz, perl@bike-nomad.com
260
# Updates by Tye McQueen, http://perlmonks.org/?node=tye
262
# Create a hash that maps each element of $aCollection to the set of
263
# positions it occupies in $aCollection, restricted to the elements
264
# within the range of indexes specified by $start and $end.
265
# The fourth parameter is a subroutine reference that will be called to
266
# generate a string to use as a key.
267
# Additional parameters, if any, will be passed to this subroutine.
269
# my $hashRef = _withPositionsOfInInterval( \@array, $start, $end, $keyGen );
272
Hashtable _withPositionsOfInInterval(IList aCollection, int start, int end) {
273
Hashtable d = new Hashtable(hashcoder);
274
for (int index = start; index <= end; index++) {
275
object element = aCollection[index];
276
if (d.ContainsKey(element)) {
277
List<int> list = (List<int>)d[element];
280
List<int> list = new List<int>();
285
foreach (List<int> list in d.Values)
291
# Find the place at which aValue would normally be inserted into the
292
# array. If that place is already occupied by aValue, do nothing, and
293
# return undef. If the place does not exist (i.e., it is off the end of
294
# the array), add it to the end, otherwise replace the element at that
295
# point with aValue. It is assumed that the array's values are numeric.
296
# This is where the bulk (75%) of the time is spent in this module, so
297
# try to make it fast!
299
// NOTE: Instead of returning undef, it returns -1.
300
int _replaceNextLargerWith(List<int> array, int value, int high) {
302
high = array.Count-1;
305
if (high == -1 || value > (int)array[array.Count-1]) {
307
return array.Count-1;
310
// binary search for insertion point...
313
while (low <= high) {
314
index = (high + low) / 2;
316
found = (int)array[index];
320
else if (value > found)
326
// # now insertion point is in $low.
327
array[low] = value; // overwrite next larger
332
# This method computes the longest common subsequence in $a and $b.
334
# Result is array or ref, whose contents is such that
335
# $a->[ $i ] == $b->[ $result[ $i ] ]
336
# foreach $i in ( 0 .. $#result ) if $result[ $i ] is defined.
338
# An additional argument may be passed; this is a hash or key generating
339
# function that should return a string that uniquely identifies the given
340
# element. It should be the case that if the key is the same, the elements
341
# will compare the same. If this parameter is undef or missing, the key
342
# will be the element as a string.
344
# By default, comparisons will use "eq" and elements will be turned into keys
345
# using the default stringizing operator '""'.
347
# Additional parameters, if any, will be passed to the key generation
351
bool compare(object a, object b) {
352
if (comparer == null) return a.Equals(b);
353
return comparer.Compare(a, b) == 0;
356
bool IsPrepared(out Hashtable bMatches) {
361
List<int> _longestCommonSubsequence(IList a, IList b) {
363
int aFinish = a.Count-1;
364
List<int> matchVector = new List<int>();
367
// initialize matchVector to length of a
368
for (int i = 0; i < a.Count; i++)
371
if (!IsPrepared(out bMatches)) {
373
int bFinish = b.Count-1;
375
// First we prune off any common elements at the beginning
376
while (aStart <= aFinish && bStart <= bFinish && compare(a[aStart], b[bStart]))
377
matchVector[aStart++] = bStart++;
380
while (aStart <= aFinish && bStart <= bFinish && compare(a[aFinish], b[bFinish]))
381
matchVector[aFinish--] = bFinish--;
383
// Now compute the equivalence classes of positions of elements
385
_withPositionsOfInInterval(b, bStart, bFinish);
388
List<int> thresh = new List<int>();
389
List<Trio> links = new List<Trio>();
391
for (int i = aStart; i <= aFinish; i++) {
392
List<int> aimatches = (List<int>)bMatches[a[i]];
393
if (aimatches != null) {
395
for (int ji = 0; ji < aimatches.Count; ji++) {
396
int j = aimatches[ji];
397
// # optimization: most of the time this will be true
398
if (k>0 && (int)thresh[k] > j && (int)thresh[k-1] < j)
401
k = _replaceNextLargerWith(thresh, j, k);
403
// oddly, it's faster to always test this (CPU cache?).
405
Trio t = new Trio( (Trio)( k>0 ? links[k-1] : null ), i, j );
406
if (k == links.Count)
415
if (thresh.Count > 0) {
416
for (Trio link = (Trio)links[thresh.Count-1]; link != null; link = link.a)
417
matchVector[link.b] = link.c;
423
/*void prepare(IList list) {
424
prepared = _withPositionsOfInInterval(list, 0, list.Count-1);
428
void LCSidx(IList a, IList b, out List<int> am, out List<int> bm) {
429
List<int> match = _longestCommonSubsequence(a, b);
430
am = new List<int>();
431
for (int i = 0; i < match.Count; i++)
432
if ((int)match[i] != -1)
434
bm = new List<int>();
435
for (int vi = 0; vi < am.Count; vi++)
436
bm.Add(match[am[vi]]);
439
List<int> compact_diff(IList a, IList b) {
440
List<int> am, bm, cdiff;
441
LCSidx(a, b, out am, out bm);
442
cdiff = new List<int>();
447
while(am.Count > 0 && ai == (int)am[0] && bi == (int)bm[0]) {
456
if (am.Count == 0) break;
463
if (ai < a.Count || bi < b.Count) {
473
List<int> cdif = null;
476
cdif = compact_diff(left, right);
478
if (0 == (int)cdif[2] && 0 == (int)cdif[3]) {
484
_End = (1+cdif.Count)/2;
487
private class Enumerator : IEnumerator {
491
public Enumerator(Diff diff) {
496
public object Current { get { _ChkPos(); return gethunk(); } }
498
public bool MoveNext() { return next(); }
500
public void Reset() { reset(0); }
503
if (_Pos == 0) throw new InvalidOperationException("Position is reset.");
506
void reset(int pos) {
507
if (pos < 0 || diff._End <= pos) pos = -1;
525
a1 = (int)diff.cdif[off1-2];
526
a2 = (int)diff.cdif[off1] - 1;
527
b1 = (int)diff.cdif[off2-2];
528
b2 = (int)diff.cdif[off2] - 1;
531
return new Hunk(diff.left, diff.right, a1, a2, b1, b2, s);
536
if (diff._Same != ((1 & _Pos) != 0))
543
public class Range : IList {
547
static ArrayList EmptyList = new ArrayList();
549
public Range(IList list, int start, int count) {
555
public int Start { get { return start; } }
557
public int Count { get { return count; } }
559
public int End { get { return start+count-1; } }
561
private void Check() {
562
if (count > 0 && list == null)
563
throw new InvalidOperationException("This range does not refer to a list with data.");
566
public object this[int index] {
569
if (index < 0 || index >= count)
570
throw new ArgumentException("index");
571
return list[index+start];
575
// IEnumerable Functions
577
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() {
578
if (count == 0 && list == null) return EmptyList.GetEnumerator();
580
return new Enumer(this);
583
private class Enumer : IEnumerator {
586
public Enumer(Range list) { this.list = list; }
587
public void Reset() { index = -1; }
588
public bool MoveNext() {
590
return index < list.Count;
592
public object Current { get { return list[index]; } }
595
// ICollection Functions
597
void ICollection.CopyTo(Array array, int index) {
599
for (int i = 0; i < Count; i++)
600
array.SetValue(this[i], i + index);
602
object ICollection.SyncRoot {
605
bool ICollection.IsSynchronized {
606
get { return false; }
611
bool IList.IsFixedSize { get { return true; } }
613
bool IList.IsReadOnly { get { return true; } }
615
object IList.this[int index] { get { throw new InvalidOperationException(); } set { throw new InvalidOperationException(); } }
617
int IList.Add(object obj) { throw new InvalidOperationException(); }
619
void IList.Clear() { throw new InvalidOperationException(); }
621
void IList.Insert(int index, object obj) { throw new InvalidOperationException(); }
623
void IList.Remove(object obj) { throw new InvalidOperationException(); }
625
void IList.RemoveAt(int index) { throw new InvalidOperationException(); }
627
public bool Contains(object obj) {
628
return IndexOf(obj) != -1;
631
public int IndexOf(object obj) {
632
for (int i = 0; i < Count; i++)
633
if (obj.Equals(this[i]))