2
This code is derived from jgit (http://eclipse.org/jgit).
3
Copyright owners are documented in jgit's IP log.
5
This program and the accompanying materials are made available
6
under the terms of the Eclipse Distribution License v1.0 which
7
accompanies this distribution, is reproduced below, and is
8
available at http://www.eclipse.org/org/documents/edl-v10.php
12
Redistribution and use in source and binary forms, with or
13
without modification, are permitted provided that the following
16
- Redistributions of source code must retain the above copyright
17
notice, this list of conditions and the following disclaimer.
19
- Redistributions in binary form must reproduce the above
20
copyright notice, this list of conditions and the following
21
disclaimer in the documentation and/or other materials provided
22
with the distribution.
24
- Neither the name of the Eclipse Foundation, Inc. nor the
25
names of its contributors may be used to endorse or promote
26
products derived from this software without specific prior
29
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
30
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
31
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
32
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
34
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
37
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
38
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
39
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
41
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50
/// <summary>A Sequence supporting UNIX formatted text in byte[] format.</summary>
52
/// A Sequence supporting UNIX formatted text in byte[] format.
54
/// Elements of the sequence are the lines of the file, as delimited by the UNIX
55
/// newline character ('\n'). The file content is treated as 8 bit binary text,
56
/// with no assumptions or requirements on character encoding.
58
/// Note that the first line of the file is element 0, as defined by the Sequence
59
/// interface API. Traditionally in a text editor a patch file the first line is
60
/// line number 1. Callers may need to subtract 1 prior to invoking methods if
61
/// they are converting from "line number" to "element index".
63
public class RawText : Sequence
65
/// <summary>A Rawtext of length 0</summary>
66
public static readonly NGit.Diff.RawText EMPTY_TEXT = new NGit.Diff.RawText(new byte
70
/// Number of bytes to check for heuristics in
71
/// <see cref="IsBinary(byte[])">IsBinary(byte[])</see>
74
private const int FIRST_FEW_BYTES = 8000;
76
/// <summary>The file content for this sequence.</summary>
77
/// <remarks>The file content for this sequence.</remarks>
78
protected internal readonly byte[] content;
81
/// Map of line number to starting position within
82
/// <see cref="content">content</see>
85
protected internal readonly IntList lines;
87
/// <summary>Create a new sequence from an existing content byte array.</summary>
89
/// Create a new sequence from an existing content byte array.
91
/// The entire array (indexes 0 through length-1) is used as the content.
93
/// <param name="input">
94
/// the content array. The array is never modified, so passing
95
/// through cached arrays is safe.
97
public RawText(byte[] input)
100
lines = RawParseUtils.LineMap(content, 0, content.Length);
103
/// <summary>Create a new sequence from a file.</summary>
105
/// Create a new sequence from a file.
107
/// The entire file contents are used.
109
/// <param name="file">the text file.</param>
110
/// <exception cref="System.IO.IOException">if Exceptions occur while reading the file
112
public RawText(FilePath file) : this(IOUtil.ReadFully(file))
116
/// <returns>total number of items in the sequence.</returns>
117
public override int Size()
119
// The line map is always 2 entries larger than the number of lines in
120
// the file. Index 0 is padded out/unused. The last index is the total
121
// length of the buffer, and acts as a sentinel.
123
return lines.Size() - 2;
126
/// <summary>Write a specific line to the output stream, without its trailing LF.</summary>
128
/// Write a specific line to the output stream, without its trailing LF.
130
/// The specified line is copied as-is, with no character encoding
131
/// translation performed.
133
/// If the specified line ends with an LF ('\n'), the LF is <b>not</b>
134
/// copied. It is up to the caller to write the LF, if desired, between
137
/// <param name="out">stream to copy the line data onto.</param>
139
/// index of the line to extract. Note this is 0-based, so line
140
/// number 1 is actually index 0.
142
/// <exception cref="System.IO.IOException">the stream write operation failed.</exception>
143
public virtual void WriteLine(OutputStream @out, int i)
145
int start = GetStart(i);
147
if (content[end - 1] == '\n')
151
@out.Write(content, start, end - start);
154
/// <summary>Determine if the file ends with a LF ('\n').</summary>
155
/// <remarks>Determine if the file ends with a LF ('\n').</remarks>
156
/// <returns>true if the last line has an LF; false otherwise.</returns>
157
public virtual bool IsMissingNewlineAtEnd()
159
int end = lines.Get(lines.Size() - 1);
164
return content[end - 1] != '\n';
167
/// <summary>Get the text for a single line.</summary>
168
/// <remarks>Get the text for a single line.</remarks>
170
/// index of the line to extract. Note this is 0-based, so line
171
/// number 1 is actually index 0.
173
/// <returns>the text for the line, without a trailing LF.</returns>
174
public virtual string GetString(int i)
176
return GetString(i, i + 1, true);
179
/// <summary>Get the text for a region of lines.</summary>
180
/// <remarks>Get the text for a region of lines.</remarks>
181
/// <param name="begin">
182
/// index of the first line to extract. Note this is 0-based, so
183
/// line number 1 is actually index 0.
185
/// <param name="end">index of one past the last line to extract.</param>
186
/// <param name="dropLF">
187
/// if true the trailing LF ('\n') of the last returned line is
188
/// dropped, if present.
191
/// the text for lines
192
/// <code>[begin, end)</code>
195
public virtual string GetString(int begin, int end, bool dropLF)
201
int s = GetStart(begin);
202
int e = GetEnd(end - 1);
203
if (dropLF && content[e - 1] == '\n')
210
/// <summary>Decode a region of the text into a String.</summary>
212
/// Decode a region of the text into a String.
213
/// The default implementation of this method tries to guess the character
214
/// set by considering UTF-8, the platform default, and falling back on
215
/// ISO-8859-1 if neither of those can correctly decode the region given.
217
/// <param name="start">first byte of the content to decode.</param>
218
/// <param name="end">one past the last byte of the content to decode.</param>
221
/// <code>[start, end)</code>
222
/// decoded as a String.
224
protected internal virtual string Decode(int start, int end)
226
return RawParseUtils.Decode(content, start, end);
229
private int GetStart(int i)
231
return lines.Get(i + 1);
234
private int GetEnd(int i)
236
return lines.Get(i + 2);
240
/// Determine heuristically whether a byte array represents binary (as
241
/// opposed to text) content.
244
/// Determine heuristically whether a byte array represents binary (as
245
/// opposed to text) content.
247
/// <param name="raw">the raw file content.</param>
248
/// <returns>true if raw is likely to be a binary file, false otherwise</returns>
249
public static bool IsBinary(byte[] raw)
251
return IsBinary(raw, raw.Length);
255
/// Determine heuristically whether the bytes contained in a stream
256
/// represents binary (as opposed to text) content.
259
/// Determine heuristically whether the bytes contained in a stream
260
/// represents binary (as opposed to text) content.
261
/// Note: Do not further use this stream after having called this method! The
262
/// stream may not be fully read and will be left at an unknown position
263
/// after consuming an unknown number of bytes. The caller is responsible for
264
/// closing the stream.
266
/// <param name="raw">input stream containing the raw file content.</param>
267
/// <returns>true if raw is likely to be a binary file, false otherwise</returns>
268
/// <exception cref="System.IO.IOException">if input stream could not be read</exception>
269
public static bool IsBinary(InputStream raw)
271
byte[] buffer = new byte[FIRST_FEW_BYTES];
273
while (cnt < buffer.Length)
275
int n = raw.Read(buffer, cnt, buffer.Length - cnt);
282
return IsBinary(buffer, cnt);
286
/// Determine heuristically whether a byte array represents binary (as
287
/// opposed to text) content.
290
/// Determine heuristically whether a byte array represents binary (as
291
/// opposed to text) content.
293
/// <param name="raw">the raw file content.</param>
294
/// <param name="length">
295
/// number of bytes in
297
/// to evaluate. This should be
298
/// <code>raw.length</code>
301
/// was over-allocated by
304
/// <returns>true if raw is likely to be a binary file, false otherwise</returns>
305
public static bool IsBinary(byte[] raw, int length)
307
// Same heuristic as C Git
308
if (length > FIRST_FEW_BYTES)
310
length = FIRST_FEW_BYTES;
312
for (int ptr = 0; ptr < length; ptr++)
314
if (raw[ptr] == '\0')