2
This code is derived from jgit (http://eclipse.org/jgit).
3
Copyright owners are documented in jgit's IP log.
5
This program and the accompanying materials are made available
6
under the terms of the Eclipse Distribution License v1.0 which
7
accompanies this distribution, is reproduced below, and is
8
available at http://www.eclipse.org/org/documents/edl-v10.php
12
Redistribution and use in source and binary forms, with or
13
without modification, are permitted provided that the following
16
- Redistributions of source code must retain the above copyright
17
notice, this list of conditions and the following disclaimer.
19
- Redistributions in binary form must reproduce the above
20
copyright notice, this list of conditions and the following
21
disclaimer in the documentation and/or other materials provided
22
with the distribution.
24
- Neither the name of the Eclipse Foundation, Inc. nor the
25
names of its contributors may be used to endorse or promote
26
products derived from this software without specific prior
29
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45
using System.Collections.Generic;
56
/// <summary>Patch header describing an action for a single file path.</summary>
57
/// <remarks>Patch header describing an action for a single file path.</remarks>
58
public class FileHeader : DiffEntry
60
private static readonly byte[] OLD_MODE = Constants.EncodeASCII("old mode ");
62
private static readonly byte[] NEW_MODE = Constants.EncodeASCII("new mode ");
64
internal static readonly byte[] DELETED_FILE_MODE = Constants.EncodeASCII("deleted file mode "
67
internal static readonly byte[] NEW_FILE_MODE = Constants.EncodeASCII("new file mode "
70
private static readonly byte[] COPY_FROM = Constants.EncodeASCII("copy from ");
72
private static readonly byte[] COPY_TO = Constants.EncodeASCII("copy to ");
74
private static readonly byte[] RENAME_OLD = Constants.EncodeASCII("rename old ");
76
private static readonly byte[] RENAME_NEW = Constants.EncodeASCII("rename new ");
78
private static readonly byte[] RENAME_FROM = Constants.EncodeASCII("rename from "
81
private static readonly byte[] RENAME_TO = Constants.EncodeASCII("rename to ");
83
private static readonly byte[] SIMILARITY_INDEX = Constants.EncodeASCII("similarity index "
86
private static readonly byte[] DISSIMILARITY_INDEX = Constants.EncodeASCII("dissimilarity index "
89
internal static readonly byte[] INDEX = Constants.EncodeASCII("index ");
91
internal static readonly byte[] OLD_NAME = Constants.EncodeASCII("--- ");
93
internal static readonly byte[] NEW_NAME = Constants.EncodeASCII("+++ ");
95
/// <summary>Type of patch used by this file.</summary>
96
/// <remarks>Type of patch used by this file.</remarks>
104
/// <summary>Buffer holding the patch data for this file.</summary>
105
/// <remarks>Buffer holding the patch data for this file.</remarks>
106
internal readonly byte[] buf;
110
/// <see cref="buf">buf</see>
111
/// to the "diff ..." line.
113
internal readonly int startOffset;
116
/// Position 1 past the end of this file within
117
/// <see cref="buf">buf</see>
120
internal int endOffset;
122
/// <summary>Type of patch used to modify this file</summary>
123
internal FileHeader.PatchType patchType;
125
/// <summary>The hunks of this file</summary>
126
private IList<HunkHeader> hunks;
130
/// <see cref="patchType">patchType</see>
132
/// <see cref="PatchType.GIT_BINARY">PatchType.GIT_BINARY</see>
135
internal BinaryHunk forwardBinaryHunk;
139
/// <see cref="patchType">patchType</see>
141
/// <see cref="PatchType.GIT_BINARY">PatchType.GIT_BINARY</see>
144
internal BinaryHunk reverseBinaryHunk;
146
/// <summary>Constructs a new FileHeader</summary>
147
/// <param name="headerLines">buffer holding the diff header for this file</param>
148
/// <param name="edits">the edits for this file</param>
149
/// <param name="type">the type of patch used to modify this file</param>
150
public FileHeader(byte[] headerLines, EditList edits, FileHeader.PatchType type) :
153
endOffset = headerLines.Length;
154
int ptr = ParseGitFileName(NGit.Patch.Patch.DIFF_GIT.Length, headerLines.Length);
155
ParseGitHeaders(ptr, headerLines.Length);
156
this.patchType = type;
157
AddHunk(new HunkHeader(this, edits));
160
internal FileHeader(byte[] b, int offset)
163
startOffset = offset;
164
changeType = DiffEntry.ChangeType.MODIFY;
165
// unless otherwise designated
166
patchType = FileHeader.PatchType.UNIFIED;
169
internal virtual int GetParentCount()
174
/// <returns>the byte array holding this file's patch script.</returns>
175
public virtual byte[] GetBuffer()
181
/// offset the start of this file's script in
182
/// <see cref="GetBuffer()">GetBuffer()</see>
185
public virtual int GetStartOffset()
190
/// <returns>offset one past the end of the file script.</returns>
191
public virtual int GetEndOffset()
196
/// <summary>Convert the patch script for this file into a string.</summary>
198
/// Convert the patch script for this file into a string.
200
/// The default character encoding (
201
/// <see cref="NGit.Constants.CHARSET">NGit.Constants.CHARSET</see>
203
/// both the old and new files.
205
/// <returns>the patch script, as a Unicode string.</returns>
206
public virtual string GetScriptText()
208
return GetScriptText(null, null);
211
/// <summary>Convert the patch script for this file into a string.</summary>
212
/// <remarks>Convert the patch script for this file into a string.</remarks>
213
/// <param name="oldCharset">hint character set to decode the old lines with.</param>
214
/// <param name="newCharset">hint character set to decode the new lines with.</param>
215
/// <returns>the patch script, as a Unicode string.</returns>
216
public virtual string GetScriptText(Encoding oldCharset, Encoding newCharset)
218
return GetScriptText(new Encoding[] { oldCharset, newCharset });
221
internal virtual string GetScriptText(Encoding[] charsetGuess)
223
if (GetHunks().IsEmpty())
225
// If we have no hunks then we can safely assume the entire
226
// patch is a binary style patch, or a meta-data only style
227
// patch. Either way the encoding of the headers should be
228
// strictly 7-bit US-ASCII and the body is either 7-bit ASCII
229
// (due to the base 85 encoding used for a BinaryHunk) or is
230
// arbitrary noise we have chosen to ignore and not understand
231
// (e.g. the message "Binary files ... differ").
233
return RawParseUtils.ExtractBinaryString(buf, startOffset, endOffset);
235
if (charsetGuess != null && charsetGuess.Length != GetParentCount() + 1)
237
throw new ArgumentException(MessageFormat.Format(JGitText.Get().expectedCharacterEncodingGuesses
238
, (GetParentCount() + 1)));
240
if (TrySimpleConversion(charsetGuess))
242
Encoding cs = charsetGuess != null ? charsetGuess[0] : null;
245
cs = Constants.CHARSET;
249
return RawParseUtils.DecodeNoFallback(cs, buf, startOffset, endOffset);
251
catch (CharacterCodingException)
255
// Try the much slower, more-memory intensive version which
256
// can handle a character set conversion patch.
257
StringBuilder r = new StringBuilder(endOffset - startOffset);
258
// Always treat the headers as US-ASCII; Git file names are encoded
259
// in a C style escape if any character has the high-bit set.
261
int hdrEnd = GetHunks()[0].GetStartOffset();
262
for (int ptr = startOffset; ptr < hdrEnd; )
264
int eol = Math.Min(hdrEnd, RawParseUtils.NextLF(buf, ptr));
265
r.Append(RawParseUtils.ExtractBinaryString(buf, ptr, eol));
268
string[] files = ExtractFileLines(charsetGuess);
269
int[] offsets = new int[files.Length];
270
foreach (HunkHeader h in GetHunks())
272
h.ExtractFileLines(r, files, offsets);
277
private static bool TrySimpleConversion(Encoding[] charsetGuess)
279
if (charsetGuess == null)
283
for (int i = 1; i < charsetGuess.Length; i++)
285
if (charsetGuess[i] != charsetGuess[0])
293
private string[] ExtractFileLines(Encoding[] csGuess)
295
TemporaryBuffer[] tmp = new TemporaryBuffer[GetParentCount() + 1];
298
for (int i = 0; i < tmp.Length; i++)
300
tmp[i] = new TemporaryBuffer.LocalFile();
302
foreach (HunkHeader h in GetHunks())
304
h.ExtractFileLines(tmp);
306
string[] r = new string[tmp.Length];
307
for (int i_1 = 0; i_1 < tmp.Length; i_1++)
309
Encoding cs = csGuess != null ? csGuess[i_1] : null;
312
cs = Constants.CHARSET;
314
r[i_1] = RawParseUtils.Decode(cs, tmp[i_1].ToByteArray());
318
catch (IOException ioe)
320
throw new RuntimeException(JGitText.Get().cannotConvertScriptToText, ioe);
324
foreach (TemporaryBuffer b in tmp)
334
/// <returns>style of patch used to modify this file</returns>
335
public virtual FileHeader.PatchType GetPatchType()
340
/// <returns>true if this patch modifies metadata about a file</returns>
341
public virtual bool HasMetaDataChanges()
343
return changeType != DiffEntry.ChangeType.MODIFY || newMode != oldMode;
346
/// <returns>hunks altering this file; in order of appearance in patch</returns>
347
public virtual IList<HunkHeader> GetHunks()
351
return Sharpen.Collections.EmptyList<HunkHeader>();
356
internal virtual void AddHunk(HunkHeader h)
358
if (h.GetFileHeader() != this)
360
throw new ArgumentException(JGitText.Get().hunkBelongsToAnotherFile);
364
hunks = new AList<HunkHeader>();
369
internal virtual HunkHeader NewHunkHeader(int offset)
371
return new HunkHeader(this, offset);
376
/// <see cref="PatchType.GIT_BINARY">PatchType.GIT_BINARY</see>
377
/// , the new-image delta/literal
379
public virtual BinaryHunk GetForwardBinaryHunk()
381
return forwardBinaryHunk;
386
/// <see cref="PatchType.GIT_BINARY">PatchType.GIT_BINARY</see>
387
/// , the old-image delta/literal
389
public virtual BinaryHunk GetReverseBinaryHunk()
391
return reverseBinaryHunk;
394
/// <returns>a list describing the content edits performed on this file.</returns>
395
public virtual EditList ToEditList()
397
EditList r = new EditList();
398
foreach (HunkHeader hunk in hunks)
400
Sharpen.Collections.AddAll(r, hunk.ToEditList());
405
/// <summary>Parse a "diff --git" or "diff --cc" line.</summary>
406
/// <remarks>Parse a "diff --git" or "diff --cc" line.</remarks>
407
/// <param name="ptr">first character after the "diff --git " or "diff --cc " part.</param>
408
/// <param name="end">one past the last position to parse.</param>
409
/// <returns>first character after the LF at the end of the line; -1 on error.</returns>
410
internal virtual int ParseGitFileName(int ptr, int end)
412
int eol = RawParseUtils.NextLF(buf, ptr);
418
// buffer[ptr..eol] looks like "a/foo b/foo\n". After the first
419
// A regex to match this is "^[^/]+/(.*?) [^/+]+/\1\n$". There
420
// is only one way to split the line such that text to the left
421
// of the space matches the text to the right, excluding the part
422
// before the first slash.
424
int aStart = RawParseUtils.NextLF(buf, ptr, '/');
431
int sp = RawParseUtils.NextLF(buf, ptr, ' ');
434
// We can't split the header, it isn't valid.
435
// This may be OK if this is a rename patch.
439
int bStart = RawParseUtils.NextLF(buf, sp, '/');
444
// If buffer[aStart..sp - 1] = buffer[bStart..eol - 1]
445
// we have a valid split.
447
if (Eq(aStart, sp - 1, bStart, eol - 1))
451
// We're a double quoted name. The region better end
452
// in a double quote too, and we need to decode the
453
// characters before reading the name.
455
if (buf[sp - 2] != '"')
459
oldPath = QuotedString.GIT_PATH.Dequote(buf, bol, sp - 1);
460
oldPath = P1(oldPath);
464
oldPath = RawParseUtils.Decode(Constants.CHARSET, buf, aStart, sp - 1);
469
// This split wasn't correct. Move past the space and try
470
// another split as the space must be part of the file name.
477
internal virtual int ParseGitHeaders(int ptr, int end)
481
int eol = RawParseUtils.NextLF(buf, ptr);
482
if (IsHunkHdr(buf, ptr, eol) >= 1)
484
// First hunk header; break out and parse them later.
489
if (RawParseUtils.Match(buf, ptr, OLD_NAME) >= 0)
491
ParseOldName(ptr, eol);
495
if (RawParseUtils.Match(buf, ptr, NEW_NAME) >= 0)
497
ParseNewName(ptr, eol);
501
if (RawParseUtils.Match(buf, ptr, OLD_MODE) >= 0)
503
oldMode = ParseFileMode(ptr + OLD_MODE.Length, eol);
507
if (RawParseUtils.Match(buf, ptr, NEW_MODE) >= 0)
509
newMode = ParseFileMode(ptr + NEW_MODE.Length, eol);
513
if (RawParseUtils.Match(buf, ptr, DELETED_FILE_MODE) >= 0)
515
oldMode = ParseFileMode(ptr + DELETED_FILE_MODE.Length, eol);
516
newMode = FileMode.MISSING;
517
changeType = DiffEntry.ChangeType.DELETE;
521
if (RawParseUtils.Match(buf, ptr, NEW_FILE_MODE) >= 0)
523
ParseNewFileMode(ptr, eol);
527
if (RawParseUtils.Match(buf, ptr, COPY_FROM) >= 0)
529
oldPath = ParseName(oldPath, ptr + COPY_FROM.Length, eol);
530
changeType = DiffEntry.ChangeType.COPY;
534
if (RawParseUtils.Match(buf, ptr, COPY_TO) >= 0)
536
newPath = ParseName(newPath, ptr + COPY_TO.Length, eol);
537
changeType = DiffEntry.ChangeType.COPY;
541
if (RawParseUtils.Match(buf, ptr, RENAME_OLD) >= 0)
543
oldPath = ParseName(oldPath, ptr + RENAME_OLD.Length, eol);
544
changeType = DiffEntry.ChangeType.RENAME;
548
if (RawParseUtils.Match(buf, ptr, RENAME_NEW) >= 0)
550
newPath = ParseName(newPath, ptr + RENAME_NEW.Length, eol);
551
changeType = DiffEntry.ChangeType.RENAME;
555
if (RawParseUtils.Match(buf, ptr, RENAME_FROM) >= 0)
557
oldPath = ParseName(oldPath, ptr + RENAME_FROM.Length, eol);
558
changeType = DiffEntry.ChangeType.RENAME;
562
if (RawParseUtils.Match(buf, ptr, RENAME_TO) >= 0)
564
newPath = ParseName(newPath, ptr + RENAME_TO.Length, eol);
565
changeType = DiffEntry.ChangeType.RENAME;
569
if (RawParseUtils.Match(buf, ptr, SIMILARITY_INDEX) >= 0)
571
score = RawParseUtils.ParseBase10(buf, ptr + SIMILARITY_INDEX.Length, null);
575
if (RawParseUtils.Match(buf, ptr, DISSIMILARITY_INDEX) >= 0)
577
score = RawParseUtils.ParseBase10(buf, ptr + DISSIMILARITY_INDEX.Length, null);
581
if (RawParseUtils.Match(buf, ptr, INDEX) >= 0)
583
ParseIndexLine(ptr + INDEX.Length, eol);
587
// Probably an empty patch (stat dirty).
610
internal virtual void ParseOldName(int ptr, int eol)
612
oldPath = P1(ParseName(oldPath, ptr + OLD_NAME.Length, eol));
613
if (oldPath == DEV_NULL)
615
changeType = DiffEntry.ChangeType.ADD;
619
internal virtual void ParseNewName(int ptr, int eol)
621
newPath = P1(ParseName(newPath, ptr + NEW_NAME.Length, eol));
622
if (newPath == DEV_NULL)
624
changeType = DiffEntry.ChangeType.DELETE;
628
internal virtual void ParseNewFileMode(int ptr, int eol)
630
oldMode = FileMode.MISSING;
631
newMode = ParseFileMode(ptr + NEW_FILE_MODE.Length, eol);
632
changeType = DiffEntry.ChangeType.ADD;
635
internal virtual int ParseTraditionalHeaders(int ptr, int end)
639
int eol = RawParseUtils.NextLF(buf, ptr);
640
if (IsHunkHdr(buf, ptr, eol) >= 1)
642
// First hunk header; break out and parse them later.
647
if (RawParseUtils.Match(buf, ptr, OLD_NAME) >= 0)
649
ParseOldName(ptr, eol);
653
if (RawParseUtils.Match(buf, ptr, NEW_NAME) >= 0)
655
ParseNewName(ptr, eol);
659
// Possibly an empty patch.
669
private string ParseName(string expect, int ptr, int end)
678
// New style GNU diff format
680
r = QuotedString.GIT_PATH.Dequote(buf, ptr, end - 1);
684
// Older style GNU diff format, an optional tab ends the name.
687
while (ptr < tab && buf[tab - 1] != '\t')
695
r = RawParseUtils.Decode(Constants.CHARSET, buf, ptr, tab - 1);
697
if (r.Equals(DEV_NULL))
704
private static string P1(string r)
706
int s = r.IndexOf('/');
707
return s > 0 ? Sharpen.Runtime.Substring(r, s + 1) : r;
710
internal virtual FileMode ParseFileMode(int ptr, int end)
713
while (ptr < end - 1)
716
tmp += buf[ptr++] - '0';
718
return FileMode.FromBits(tmp);
721
internal virtual void ParseIndexLine(int ptr, int end)
723
// "index $asha1..$bsha1[ $mode]" where $asha1 and $bsha1
724
// can be unique abbreviations
726
int dot2 = RawParseUtils.NextLF(buf, ptr, '.');
727
int mode = RawParseUtils.NextLF(buf, dot2, ' ');
728
oldId = AbbreviatedObjectId.FromString(buf, ptr, dot2 - 1);
729
newId = AbbreviatedObjectId.FromString(buf, dot2 + 1, mode - 1);
732
newMode = oldMode = ParseFileMode(mode, end);
736
private bool Eq(int aPtr, int aEnd, int bPtr, int bEnd)
738
if (aEnd - aPtr != bEnd - bPtr)
744
if (buf[aPtr++] != buf[bPtr++])
752
/// <summary>Determine if this is a patch hunk header.</summary>
753
/// <remarks>Determine if this is a patch hunk header.</remarks>
754
/// <param name="buf">the buffer to scan</param>
755
/// <param name="start">first position in the buffer to evaluate</param>
756
/// <param name="end">
757
/// last position to consider; usually the end of the buffer (
758
/// <code>buf.length</code>) or the first position on the next
759
/// line. This is only used to avoid very long runs of '@' from
760
/// killing the scan loop.
763
/// the number of "ancestor revisions" in the hunk header. A
764
/// traditional two-way diff ("@@ -...") returns 1; a combined diff
765
/// for a 3 way-merge returns 3. If this is not a hunk header, 0 is
766
/// returned instead.
768
internal static int IsHunkHdr(byte[] buf, int start, int end)
771
while (ptr < end && buf[ptr] == '@')
779
if (ptr == end || buf[ptr++] != ' ')
783
if (ptr == end || buf[ptr++] != '-')
787
return (ptr - 3) - start;