91
94
DELTA_TYPES = (OFS_DELTA, REF_DELTA)
94
def take_msb_bytes(read):
97
def take_msb_bytes(read, crc32=None):
95
98
"""Read bytes marked with most significant bit.
97
100
:param read: Read function
100
103
while len(ret) == 0 or ret[-1] & 0x80:
101
ret.append(ord(read(1)))
105
def read_zlib_chunks(read_some, dec_size, buffer_size=4096):
105
if crc32 is not None:
106
crc32 = binascii.crc32(b, crc32)
111
class UnpackedObject(object):
112
"""Class encapsulating an object unpacked from a pack file.
114
These objects should only be created from within unpack_object. Most
115
members start out as empty and are filled in at various points by
116
read_zlib_chunks, unpack_object, DeltaChainIterator, etc.
118
End users of this object should take care that the function they're getting
119
this object from is guaranteed to set the members they need.
123
'offset', # Offset in its pack.
124
'_sha', # Cached binary SHA.
125
'obj_type_num', # Type of this object.
126
'obj_chunks', # Decompressed and delta-resolved chunks.
127
'pack_type_num', # Type of this object in the pack (may be a delta).
128
'delta_base', # Delta base offset or SHA.
129
'comp_chunks', # Compressed object chunks.
130
'decomp_chunks', # Decompressed object chunks.
131
'decomp_len', # Decompressed length of this object.
135
# TODO(dborowitz): read_zlib_chunks and unpack_object could very well be
136
# methods of this object.
137
def __init__(self, pack_type_num, delta_base, decomp_len, crc32):
140
self.pack_type_num = pack_type_num
141
self.delta_base = delta_base
142
self.comp_chunks = None
143
self.decomp_chunks = []
144
self.decomp_len = decomp_len
147
if pack_type_num in DELTA_TYPES:
148
self.obj_type_num = None
149
self.obj_chunks = None
151
self.obj_type_num = pack_type_num
152
self.obj_chunks = self.decomp_chunks
153
self.delta_base = delta_base
156
"""Return the binary SHA of this object."""
157
if self._sha is None:
158
self._sha = obj_sha(self.obj_type_num, self.obj_chunks)
162
"""Return a ShaFile from this object."""
163
return ShaFile.from_raw_chunks(self.obj_type_num, self.obj_chunks)
165
# Only provided for backwards compatibility with code that expects either
166
# chunks or a delta tuple.
168
"""Return the decompressed chunks, or (delta base, delta chunks)."""
169
if self.pack_type_num in DELTA_TYPES:
170
return (self.delta_base, self.decomp_chunks)
172
return self.decomp_chunks
174
def __eq__(self, other):
175
if not isinstance(other, UnpackedObject):
177
for slot in self.__slots__:
178
if getattr(self, slot) != getattr(other, slot):
182
def __ne__(self, other):
183
return not (self == other)
186
data = ['%s=%r' % (s, getattr(self, s)) for s in self.__slots__]
187
return '%s(%s)' % (self.__class__.__name__, ', '.join(data))
193
def read_zlib_chunks(read_some, unpacked, include_comp=False,
194
buffer_size=_ZLIB_BUFSIZE):
106
195
"""Read zlib data from a buffer.
108
197
This function requires that the buffer have additional data following the
109
198
compressed data, which is guaranteed to be the case for git pack files.
111
200
:param read_some: Read function that returns at least one byte, but may
112
return less than the requested size
113
:param dec_size: Expected size of the decompressed buffer
114
:param buffer_size: Size of the read buffer
115
:return: Tuple with list of chunks, length of compressed data length and
116
and unused read data.
201
return less than the requested size.
202
:param unpacked: An UnpackedObject to write result data to. If its crc32
203
attr is not None, the CRC32 of the compressed bytes will be computed
204
using this starting CRC32.
205
After this function, will have the following attrs set:
206
* comp_chunks (if include_comp is True)
210
:param include_comp: If True, include compressed data in the result.
211
:param buffer_size: Size of the read buffer.
212
:return: Leftover unused data from the decompression.
117
213
:raise zlib.error: if a decompression error occurred.
120
raise ValueError("non-negative zlib data stream size expected")
121
obj = zlib.decompressobj()
125
while obj.unused_data == "":
215
if unpacked.decomp_len <= -1:
216
raise ValueError('non-negative zlib data stream size expected')
217
decomp_obj = zlib.decompressobj()
220
decomp_chunks = unpacked.decomp_chunks
222
crc32 = unpacked.crc32
126
225
add = read_some(buffer_size)
128
raise zlib.error("EOF before end of zlib stream")
130
decomp = obj.decompress(add)
134
raise zlib.error("decompressed data does not match expected size")
135
comp_len = fed - len(obj.unused_data)
136
return ret, comp_len, obj.unused_data
227
raise zlib.error('EOF before end of zlib stream')
228
comp_chunks.append(add)
229
decomp = decomp_obj.decompress(add)
230
decomp_len += len(decomp)
231
decomp_chunks.append(decomp)
232
unused = decomp_obj.unused_data
235
if crc32 is not None:
236
crc32 = binascii.crc32(add[:-left], crc32)
238
comp_chunks[-1] = add[:-left]
240
elif crc32 is not None:
241
crc32 = binascii.crc32(add, crc32)
242
if crc32 is not None:
245
if decomp_len != unpacked.decomp_len:
246
raise zlib.error('decompressed data does not match expected size')
248
unpacked.crc32 = crc32
250
unpacked.comp_chunks = comp_chunks
139
254
def iter_sha1(iter):
534
652
return sum(imap(len, chunks))
537
def unpack_object(read_all, read_some=None):
655
def unpack_object(read_all, read_some=None, compute_crc32=False,
656
include_comp=False, zlib_bufsize=_ZLIB_BUFSIZE):
538
657
"""Unpack a Git object.
540
659
:param read_all: Read function that blocks until the number of requested
542
661
:param read_some: Read function that returns at least one byte, but may not
543
662
return the number of bytes requested.
544
:return: tuple with type, uncompressed data, compressed size and tail data.
663
:param compute_crc32: If True, compute the CRC32 of the compressed data. If
664
False, the returned CRC32 will be None.
665
:param include_comp: If True, include compressed data in the result.
666
:param zlib_bufsize: An optional buffer size for zlib operations.
667
:return: A tuple of (unpacked, unused), where unused is the unused data
668
leftover from decompression, and unpacked in an UnpackedObject with
669
the following attrs set:
671
* obj_chunks (for non-delta types)
673
* delta_base (for delta types)
674
* comp_chunks (if include_comp is True)
677
* crc32 (if compute_crc32 is True)
546
679
if read_some is None:
547
680
read_some = read_all
548
bytes = take_msb_bytes(read_all)
549
type = (bytes[0] >> 4) & 0x07
686
bytes, crc32 = take_msb_bytes(read_all, crc32=crc32)
687
type_num = (bytes[0] >> 4) & 0x07
550
688
size = bytes[0] & 0x0f
551
689
for i, byte in enumerate(bytes[1:]):
552
690
size += (byte & 0x7f) << ((i * 7) + 4)
553
692
raw_base = len(bytes)
554
if type == OFS_DELTA:
555
bytes = take_msb_bytes(read_all)
693
if type_num == OFS_DELTA:
694
bytes, crc32 = take_msb_bytes(read_all, crc32=crc32)
556
695
raw_base += len(bytes)
557
696
assert not (bytes[-1] & 0x80)
558
697
delta_base_offset = bytes[0] & 0x7f
560
699
delta_base_offset += 1
561
700
delta_base_offset <<= 7
562
701
delta_base_offset += (byte & 0x7f)
563
uncomp, comp_len, unused = read_zlib_chunks(read_some, size)
564
assert size == chunks_length(uncomp)
565
return type, (delta_base_offset, uncomp), comp_len+raw_base, unused
566
elif type == REF_DELTA:
567
basename = read_all(20)
702
delta_base = delta_base_offset
703
elif type_num == REF_DELTA:
704
delta_base = read_all(20)
706
crc32 = binascii.crc32(delta_base, crc32)
569
uncomp, comp_len, unused = read_zlib_chunks(read_some, size)
570
assert size == chunks_length(uncomp)
571
return type, (basename, uncomp), comp_len+raw_base, unused
573
uncomp, comp_len, unused = read_zlib_chunks(read_some, size)
574
assert chunks_length(uncomp) == size
575
return type, uncomp, comp_len+raw_base, unused
711
unpacked = UnpackedObject(type_num, delta_base, size, crc32)
712
unused = read_zlib_chunks(read_some, unpacked, buffer_size=zlib_bufsize,
713
include_comp=include_comp)
714
return unpacked, unused
578
717
def _compute_object_size((num, obj)):
665
805
def __len__(self):
666
806
return self._num_objects
668
def read_objects(self):
808
def read_objects(self, compute_crc32=False):
669
809
"""Read the objects in this pack file.
671
:raise AssertionError: if there is an error in the pack format.
811
:param compute_crc32: If True, compute the CRC32 of the compressed
812
data. If False, the returned CRC32 will be None.
813
:return: Iterator over UnpackedObjects with the following members set:
816
obj_chunks (for non-delta types)
817
delta_base (for delta types)
820
crc32 (if compute_crc32 is True)
672
821
:raise ChecksumMismatch: if the checksum of the pack contents does not
673
822
match the checksum in the pack trailer.
674
823
:raise zlib.error: if an error occurred during zlib decompression.
675
824
:raise IOError: if an error occurred writing to the output file.
677
826
pack_version, self._num_objects = read_pack_header(self.read)
827
if pack_version is None:
678
830
for i in xrange(self._num_objects):
679
type, uncomp, comp_len, unused = unpack_object(self.read, self.recv)
680
yield type, uncomp, comp_len
832
unpacked, unused = unpack_object(
833
self.read, read_some=self.recv, compute_crc32=compute_crc32,
834
zlib_bufsize=self._zlib_bufsize)
835
unpacked.offset = offset
682
837
# prepend any unused data to current read buffer
689
pack_sha = sha_to_hex(''.join([c for c in self._trailer]))
690
calculated_sha = self.sha.hexdigest()
691
if pack_sha != calculated_sha:
692
raise ChecksumMismatch(pack_sha, calculated_sha)
695
class PackObjectIterator(object):
697
def __init__(self, pack, progress=None):
699
self.offset = pack._header_size
701
self.map = pack._file
702
self._progress = progress
711
if self.i == self.num:
713
self.map.seek(self.offset)
714
(type, obj, total_size, unused) = unpack_object(self.map.read)
715
self.map.seek(self.offset)
716
crc32 = zlib.crc32(self.map.read(total_size)) & 0xffffffff
717
ret = (self.offset, type, obj, crc32)
718
self.offset += total_size
719
if self._progress is not None:
720
self._progress(self.i, self.num)
846
if self._buf_len() < 20:
847
# If the read buffer is full, then the last read() got the whole
848
# trailer off the wire. If not, it means there is still some of the
849
# trailer to read. We need to read() all 20 bytes; N come from the
850
# read buffer and (20 - N) come from the wire.
853
pack_sha = ''.join(self._trailer)
854
if pack_sha != self.sha.digest():
855
raise ChecksumMismatch(sha_to_hex(pack_sha), self.sha.hexdigest())
858
class PackStreamCopier(PackStreamReader):
859
"""Class to verify a pack stream as it is being read.
861
The pack is read from a ReceivableProtocol using read() or recv() as
862
appropriate and written out to the given file-like object.
865
def __init__(self, read_all, read_some, outfile, delta_iter=None):
866
"""Initialize the copier.
868
:param read_all: Read function that blocks until the number of requested
870
:param read_some: Read function that returns at least one byte, but may
871
not return the number of bytes requested.
872
:param outfile: File-like object to write output through.
873
:param delta_iter: Optional DeltaChainIterator to record deltas as we
876
super(PackStreamCopier, self).__init__(read_all, read_some=read_some)
877
self.outfile = outfile
878
self._delta_iter = delta_iter
880
def _read(self, read, size):
881
"""Read data from the read callback and write it to the file."""
882
data = super(PackStreamCopier, self)._read(read, size)
883
self.outfile.write(data)
887
"""Verify a pack stream and write it to the output file.
889
See PackStreamReader.iterobjects for a list of exceptions this may
893
for unpacked in self.read_objects():
894
self._delta_iter.record(unpacked)
896
for _ in self.read_objects():
724
900
def obj_sha(type, chunks):
725
901
"""Compute the SHA for a numeric type and object chunks."""
861
1051
self._offset_cache[offset] = type, chunks
862
1052
return type, chunks
864
def iterobjects(self, progress=None):
865
return PackObjectIterator(self, progress)
1054
def iterobjects(self, progress=None, compute_crc32=True):
1055
self._file.seek(self._header_size)
1056
for i in xrange(1, self._num_objects + 1):
1057
offset = self._file.tell()
1058
unpacked, unused = unpack_object(
1059
self._file.read, compute_crc32=compute_crc32)
1060
if progress is not None:
1061
progress(i, self._num_objects)
1062
yield (offset, unpacked.pack_type_num, unpacked._obj(),
1064
self._file.seek(-len(unused), SEEK_CUR) # Back up over unused data.
1066
def _iter_unpacked(self):
1067
# TODO(dborowitz): Merge this with iterobjects, if we can change its
1069
self._file.seek(self._header_size)
1070
for _ in xrange(self._num_objects):
1071
offset = self._file.tell()
1072
unpacked, unused = unpack_object(
1073
self._file.read, compute_crc32=False)
1074
unpacked.offset = offset
1076
self._file.seek(-len(unused), SEEK_CUR) # Back up over unused data.
867
1078
def iterentries(self, progress=None):
868
1079
"""Yield entries summarizing the contents of this pack.
954
1164
if offset in self._offset_cache:
955
1165
return self._offset_cache[offset]
956
1166
assert isinstance(offset, long) or isinstance(offset, int),\
957
"offset was %r" % offset
1167
'offset was %r' % offset
958
1168
assert offset >= self._header_size
959
1169
self._file.seek(offset)
960
return unpack_object(self._file.read)[:2]
963
class ThinPackData(PackData):
964
"""PackData for thin packs, which require an ObjectStore for resolving."""
966
def __init__(self, resolve_ext_ref, *args, **kwargs):
967
super(ThinPackData, self).__init__(*args, **kwargs)
968
self.resolve_ext_ref = resolve_ext_ref
1170
unpacked, _ = unpack_object(self._file.read)
1171
return (unpacked.pack_type_num, unpacked._obj())
1174
class DeltaChainIterator(object):
1175
"""Abstract iterator over pack data based on delta chains.
1177
Each object in the pack is guaranteed to be inflated exactly once,
1178
regardless of how many objects reference it as a delta base. As a result,
1179
memory usage is proportional to the length of the longest delta chain.
1181
Subclasses can override _result to define the result type of the iterator.
1182
By default, results are UnpackedObjects with the following members set:
1188
* delta_base (for delta types)
1189
* comp_chunks (if _include_comp is True)
1192
* crc32 (if _compute_crc32 is True)
1195
_compute_crc32 = False
1196
_include_comp = False
1198
def __init__(self, file_obj, resolve_ext_ref=None):
1199
self._file = file_obj
1200
self._resolve_ext_ref = resolve_ext_ref
1201
self._pending_ofs = defaultdict(list)
1202
self._pending_ref = defaultdict(list)
971
def from_file(cls, resolve_ext_ref, file, size):
972
return cls(resolve_ext_ref, str(file), file=file, size=size)
974
def get_ref(self, sha):
975
"""Resolve a reference looking in both this pack and the store."""
977
# As part of completing a pack we create a Pack object with a
978
# ThinPackData and a full PackIndex, so check in the index first if
980
# TODO(dborowitz): reevaluate this when the pack completion code is
982
return super(ThinPackData, self).get_ref(sha)
984
type, obj = self.resolve_ext_ref(sha)
985
return None, type, obj
987
def iterentries(self, progress=None):
988
"""Yield entries summarizing the contents of this pack.
990
:param progress: Progress function, called with current and
993
This will yield tuples with (sha, offset, crc32)
996
postponed = defaultdict(list)
998
class Postpone(Exception):
999
"""Raised to postpone delta resolving."""
1001
def __init__(self, sha):
1004
def get_ref_text(sha):
1005
assert len(sha) == 20
1008
type, obj = self.get_object_at(offset)
1009
return offset, type, obj
1208
def for_pack_data(cls, pack_data, resolve_ext_ref=None):
1209
walker = cls(None, resolve_ext_ref=resolve_ext_ref)
1210
walker.set_pack_data(pack_data)
1211
for unpacked in pack_data._iter_unpacked():
1212
walker.record(unpacked)
1215
def record(self, unpacked):
1216
type_num = unpacked.pack_type_num
1217
offset = unpacked.offset
1218
if type_num == OFS_DELTA:
1219
base_offset = offset - unpacked.delta_base
1220
self._pending_ofs[base_offset].append(offset)
1221
elif type_num == REF_DELTA:
1222
self._pending_ref[unpacked.delta_base].append(offset)
1224
self._full_ofs.append((offset, type_num))
1226
def set_pack_data(self, pack_data):
1227
self._file = pack_data._file
1229
def _walk_all_chains(self):
1230
for offset, type_num in self._full_ofs:
1231
for result in self._follow_chain(offset, type_num, None):
1233
for result in self._walk_ref_chains():
1235
assert not self._pending_ofs
1237
def _ensure_no_pending(self):
1238
if self._pending_ref:
1239
raise KeyError([sha_to_hex(s) for s in self._pending_ref])
1241
def _walk_ref_chains(self):
1242
if not self._resolve_ext_ref:
1243
self._ensure_no_pending()
1246
for base_sha, pending in sorted(self._pending_ref.iteritems()):
1011
return self.get_ref(sha)
1248
type_num, chunks = self._resolve_ext_ref(base_sha)
1012
1249
except KeyError:
1016
todo = chain(self.iterobjects(progress=progress), extra)
1017
for (offset, type, obj, crc32) in todo:
1018
assert isinstance(offset, int)
1020
# Inflate postponed delta
1021
obj, type = self.get_object_at(offset)
1022
assert isinstance(type, int)
1023
assert isinstance(obj, list) or isinstance(obj, tuple)
1025
type, obj = self.resolve_object(offset, type, obj, get_ref_text)
1027
# Save memory by not storing the inflated obj in postponed
1028
postponed[e.sha].append((offset, type, None, crc32))
1030
sha = obj_sha(type, obj)
1032
yield sha, offset, crc32
1033
extra.extend(postponed.pop(sha, []))
1035
raise KeyError([sha_to_hex(h) for h in postponed.keys()])
1250
# Not an external ref, but may depend on one. Either it will get
1251
# popped via a _follow_chain call, or we will raise an error
1254
self._ext_refs.append(base_sha)
1255
self._pending_ref.pop(base_sha)
1256
for new_offset in pending:
1257
for result in self._follow_chain(new_offset, type_num, chunks):
1260
self._ensure_no_pending()
1262
def _result(self, unpacked):
1265
def _resolve_object(self, offset, obj_type_num, base_chunks):
1266
self._file.seek(offset)
1267
unpacked, _ = unpack_object(
1268
self._file.read, include_comp=self._include_comp,
1269
compute_crc32=self._compute_crc32)
1270
unpacked.offset = offset
1271
if base_chunks is None:
1272
assert unpacked.pack_type_num == obj_type_num
1274
assert unpacked.pack_type_num in DELTA_TYPES
1275
unpacked.obj_type_num = obj_type_num
1276
unpacked.obj_chunks = apply_delta(base_chunks,
1277
unpacked.decomp_chunks)
1280
def _follow_chain(self, offset, obj_type_num, base_chunks):
1281
# Unlike PackData.get_object_at, there is no need to cache offsets as
1282
# this approach by design inflates each object exactly once.
1283
unpacked = self._resolve_object(offset, obj_type_num, base_chunks)
1284
yield self._result(unpacked)
1286
pending = chain(self._pending_ofs.pop(unpacked.offset, []),
1287
self._pending_ref.pop(unpacked.sha(), []))
1288
for new_offset in pending:
1289
for new_result in self._follow_chain(
1290
new_offset, unpacked.obj_type_num, unpacked.obj_chunks):
1294
return self._walk_all_chains()
1297
return self._ext_refs
1300
class PackIndexer(DeltaChainIterator):
1301
"""Delta chain iterator that yields index entries."""
1303
_compute_crc32 = True
1305
def _result(self, unpacked):
1306
return unpacked.sha(), unpacked.offset, unpacked.crc32
1309
class PackInflater(DeltaChainIterator):
1310
"""Delta chain iterator that yields ShaFile objects."""
1312
def _result(self, unpacked):
1313
return unpacked.sha_file()
1038
1316
class SHA1Reader(object):
1093
1407
:param object: Object to write
1094
1408
:return: Tuple with offset at which the object was written, and crc32
1097
packed_data_hdr = ""
1098
if type == OFS_DELTA:
1099
(delta_base_offset, object) = object
1100
elif type == REF_DELTA:
1101
(basename, object) = object
1103
c = (type << 4) | (size & 15)
1106
packed_data_hdr += (chr(c | 0x80))
1109
packed_data_hdr += chr(c)
1110
if type == OFS_DELTA:
1111
ret = [delta_base_offset & 0x7f]
1112
delta_base_offset >>= 7
1113
while delta_base_offset:
1114
delta_base_offset -= 1
1115
ret.insert(0, 0x80 | (delta_base_offset & 0x7f))
1116
delta_base_offset >>= 7
1117
packed_data_hdr += "".join([chr(x) for x in ret])
1118
elif type == REF_DELTA:
1119
assert len(basename) == 20
1120
packed_data_hdr += basename
1121
packed_data = packed_data_hdr + zlib.compress(object)
1122
f.write(packed_data)
1123
return (offset, (zlib.crc32(packed_data) & 0xffffffff))
1126
def write_pack(filename, objects, num_objects):
1410
if type in DELTA_TYPES:
1411
delta_base, object = object
1414
header = pack_object_header(type, delta_base, len(object))
1415
comp_data = zlib.compress(object)
1417
for data in (header, comp_data):
1421
crc32 = binascii.crc32(data, crc32)
1422
return crc32 & 0xffffffff
1425
def write_pack(filename, objects, num_objects=None):
1127
1426
"""Write a new pack data file.
1129
1428
:param filename: Path to the new pack file (without .pack extension)
1130
:param objects: Iterable over (object, path) tuples to write
1131
:param num_objects: Number of objects to write
1429
:param objects: Iterable of (object, path) tuples to write.
1430
Should provide __len__
1132
1431
:return: Tuple with checksum of pack file and index file
1134
f = GitFile(filename + ".pack", 'wb')
1433
if num_objects is not None:
1434
warnings.warn('num_objects argument to write_pack is deprecated',
1436
f = GitFile(filename + '.pack', 'wb')
1136
entries, data_sum = write_pack_data(f, objects, num_objects)
1438
entries, data_sum = write_pack_objects(f, objects,
1439
num_objects=num_objects)
1442
entries = [(k, v[0], v[1]) for (k, v) in entries.iteritems()]
1140
f = GitFile(filename + ".idx", 'wb')
1444
f = GitFile(filename + '.idx', 'wb')
1142
1446
return data_sum, write_pack_index_v2(f, entries, data_sum)
1151
1455
f.write(struct.pack('>L', num_objects)) # Number of objects in pack
1154
def write_pack_data(f, objects, num_objects, window=10):
1155
"""Write a new pack data file.
1458
def deltify_pack_objects(objects, window=10):
1459
"""Generate deltas for pack objects.
1157
:param f: File to write to
1158
:param objects: Iterable over (object, path) tuples to write
1159
:param num_objects: Number of objects to write
1160
:param window: Sliding window size for searching for deltas; currently
1162
:return: List with (name, offset, crc32 checksum) entries, pack checksum
1461
:param objects: Objects to deltify
1462
:param window: Window size
1463
:return: Iterator over type_num, object id, delta_base, content
1464
delta_base is None for full text entries
1164
recency = list(objects)
1165
# FIXME: Somehow limit delta depth
1166
# FIXME: Make thin-pack optional (its not used when cloning a pack)
1167
1466
# Build a list of objects ordered by the magic Linus heuristic
1168
1467
# This helps us find good objects to diff against us
1170
for obj, path in recency:
1171
magic.append( (obj.type_num, path, 1, -obj.raw_length(), obj) )
1469
for obj, path in objects:
1470
magic.append((obj.type_num, path, -obj.raw_length(), obj))
1173
# Build a map of objects and their index in magic - so we can find
1174
# preceeding objects to diff against
1176
for i in range(len(magic)):
1177
offs[magic[i][4]] = i
1181
write_pack_header(f, num_objects)
1182
for o, path in recency:
1183
sha1 = o.sha().digest()
1473
possible_bases = deque()
1475
for type_num, path, neg_length, o in magic:
1185
1476
raw = o.as_raw_string()
1188
#for i in range(offs[o]-window, window):
1189
# if i < 0 or i >= len(offs): continue
1191
# if b.type_num != orig_t: continue
1192
# base = b.as_raw_string()
1193
# delta = create_delta(base, raw)
1194
# if len(delta) < len(winner):
1196
# t = 6 if magic[i][2] == 1 else 7
1197
offset, crc32 = write_pack_object(f, t, winner)
1198
entries.append((sha1, offset, crc32))
1479
for base in possible_bases:
1480
if base.type_num != type_num:
1482
delta = create_delta(base.as_raw_string(), raw)
1483
if len(delta) < len(winner):
1484
winner_base = base.sha().digest()
1486
yield type_num, o.sha().digest(), winner_base, winner
1487
possible_bases.appendleft(o)
1488
while len(possible_bases) > window:
1489
possible_bases.pop()
1492
def write_pack_objects(f, objects, window=10, num_objects=None):
1493
"""Write a new pack data file.
1495
:param f: File to write to
1496
:param objects: Iterable of (object, path) tuples to write.
1497
Should provide __len__
1498
:param window: Sliding window size for searching for deltas; currently
1500
:param num_objects: Number of objects (do not use, deprecated)
1501
:return: Dict mapping id -> (offset, crc32 checksum), pack checksum
1503
if num_objects is None:
1504
num_objects = len(objects)
1505
# FIXME: pack_contents = deltify_pack_objects(objects, window)
1507
(o.type_num, o.sha().digest(), None, o.as_raw_string())
1508
for (o, path) in objects)
1509
return write_pack_data(f, num_objects, pack_contents)
1512
def write_pack_data(f, num_records, records):
1513
"""Write a new pack data file.
1515
:param f: File to write to
1516
:param num_records: Number of records
1517
:param records: Iterator over type_num, object_id, delta_base, raw
1518
:return: Dict mapping id -> (offset, crc32 checksum), pack checksum
1523
write_pack_header(f, num_records)
1524
for type_num, object_id, delta_base, raw in records:
1525
if delta_base is not None:
1527
base_offset, base_crc32 = entries[delta_base]
1529
type_num = REF_DELTA
1530
raw = (delta_base, raw)
1532
type_num = OFS_DELTA
1533
raw = (base_offset, raw)
1535
crc32 = write_pack_object(f, type_num, raw)
1536
entries[object_id] = (offset, crc32)
1199
1537
return entries, f.write_sha()