~jelmer/dulwich/lp-pqm : revision 427

35

except ImportError:

36

from dulwich._compat import defaultdict

37

38

import binascii

38

39

from cStringIO import (

39

40

StringIO,

40

41

)

60

61

except ImportError:

61

62

from dulwich._compat import unpack_from

62

63

import sys

64

import warnings

63

65

import zlib

64

66

65

67

from dulwich.errors import (

72

74

)

73

75

from dulwich._compat import (

74

76

make_sha,

77

SEEK_CUR,

75

78

SEEK_END,

76

79

)

77

80

from dulwich.objects import (

91

94

DELTA_TYPES = (OFS_DELTA, REF_DELTA)

92

95

93

96

94

def take_msb_bytes(read):

97

def take_msb_bytes(read, crc32=None):

95

98

"""Read bytes marked with most significant bit.

96

99

97

100

:param read: Read function

98

101

"""

99

102

ret = []

100

103

while len(ret) == 0 or ret[-1] & 0x80:

101

ret.append(ord(read(1)))

102

return ret

103

104

105

def read_zlib_chunks(read_some, dec_size, buffer_size=4096):

104

b = read(1)

105

if crc32 is not None:

106

crc32 = binascii.crc32(b, crc32)

107

ret.append(ord(b))

108

return ret, crc32

109

110

111

class UnpackedObject(object):

112

"""Class encapsulating an object unpacked from a pack file.

113

114

These objects should only be created from within unpack_object. Most

115

members start out as empty and are filled in at various points by

116

read_zlib_chunks, unpack_object, DeltaChainIterator, etc.

117

118

End users of this object should take care that the function they're getting

119

this object from is guaranteed to set the members they need.

120

"""

121

122

__slots__ = [

123

'offset', # Offset in its pack.

124

'_sha', # Cached binary SHA.

125

'obj_type_num', # Type of this object.

126

'obj_chunks', # Decompressed and delta-resolved chunks.

127

'pack_type_num', # Type of this object in the pack (may be a delta).

128

'delta_base', # Delta base offset or SHA.

129

'comp_chunks', # Compressed object chunks.

130

'decomp_chunks', # Decompressed object chunks.

131

'decomp_len', # Decompressed length of this object.

132

'crc32', # CRC32.

133

]

134

135

# TODO(dborowitz): read_zlib_chunks and unpack_object could very well be

136

# methods of this object.

137

def __init__(self, pack_type_num, delta_base, decomp_len, crc32):

138

self.offset = None

139

self._sha = None

140

self.pack_type_num = pack_type_num

141

self.delta_base = delta_base

142

self.comp_chunks = None

143

self.decomp_chunks = []

144

self.decomp_len = decomp_len

145

self.crc32 = crc32

146

147

if pack_type_num in DELTA_TYPES:

148

self.obj_type_num = None

149

self.obj_chunks = None

150

else:

151

self.obj_type_num = pack_type_num

152

self.obj_chunks = self.decomp_chunks

153

self.delta_base = delta_base

154

155

def sha(self):

156

"""Return the binary SHA of this object."""

157

if self._sha is None:

158

self._sha = obj_sha(self.obj_type_num, self.obj_chunks)

159

return self._sha

160

161

def sha_file(self):

162

"""Return a ShaFile from this object."""

163

return ShaFile.from_raw_chunks(self.obj_type_num, self.obj_chunks)

164

165

# Only provided for backwards compatibility with code that expects either

166

# chunks or a delta tuple.

167

def _obj(self):

168

"""Return the decompressed chunks, or (delta base, delta chunks)."""

169

if self.pack_type_num in DELTA_TYPES:

170

return (self.delta_base, self.decomp_chunks)

171

else:

172

return self.decomp_chunks

173

174

def __eq__(self, other):

175

if not isinstance(other, UnpackedObject):

176

return False

177

for slot in self.__slots__:

178

if getattr(self, slot) != getattr(other, slot):

179

return False

180

return True

181

182

def __ne__(self, other):

183

return not (self == other)

184

185

def __repr__(self):

186

data = ['%s=%r' % (s, getattr(self, s)) for s in self.__slots__]

187

return '%s(%s)' % (self.__class__.__name__, ', '.join(data))

188

189

190

_ZLIB_BUFSIZE = 4096

191

192

193

def read_zlib_chunks(read_some, unpacked, include_comp=False,

194

buffer_size=_ZLIB_BUFSIZE):

106

195

"""Read zlib data from a buffer.

107

196

108

197

This function requires that the buffer have additional data following the

109

198

compressed data, which is guaranteed to be the case for git pack files.

110

199

111

200

:param read_some: Read function that returns at least one byte, but may

112

return less than the requested size

113

:param dec_size: Expected size of the decompressed buffer

114

:param buffer_size: Size of the read buffer

115

:return: Tuple with list of chunks, length of compressed data length and

116

and unused read data.

201

return less than the requested size.

202

:param unpacked: An UnpackedObject to write result data to. If its crc32

203

attr is not None, the CRC32 of the compressed bytes will be computed

204

using this starting CRC32.

205

After this function, will have the following attrs set:

206

* comp_chunks (if include_comp is True)

207

* decomp_chunks

208

* decomp_len

209

* crc32

210

:param include_comp: If True, include compressed data in the result.

211

:param buffer_size: Size of the read buffer.

212

:return: Leftover unused data from the decompression.

117

213

:raise zlib.error: if a decompression error occurred.

118

214

"""

119

if dec_size <= -1:

120

raise ValueError("non-negative zlib data stream size expected")

121

obj = zlib.decompressobj()

122

ret = []

123

fed = 0

124

size = 0

125

while obj.unused_data == "":

215

if unpacked.decomp_len <= -1:

216

raise ValueError('non-negative zlib data stream size expected')

217

decomp_obj = zlib.decompressobj()

218

219

comp_chunks = []

220

decomp_chunks = unpacked.decomp_chunks

221

decomp_len = 0

222

crc32 = unpacked.crc32

223

224

while True:

126

225

add = read_some(buffer_size)

127

226

if not add:

128

raise zlib.error("EOF before end of zlib stream")

129

fed += len(add)

130

decomp = obj.decompress(add)

131

size += len(decomp)

132

ret.append(decomp)

133

if size != dec_size:

134

raise zlib.error("decompressed data does not match expected size")

135

comp_len = fed - len(obj.unused_data)

136

return ret, comp_len, obj.unused_data

227

raise zlib.error('EOF before end of zlib stream')

228

comp_chunks.append(add)

229

decomp = decomp_obj.decompress(add)

230

decomp_len += len(decomp)

231

decomp_chunks.append(decomp)

232

unused = decomp_obj.unused_data

233

if unused:

234

left = len(unused)

235

if crc32 is not None:

236

crc32 = binascii.crc32(add[:-left], crc32)

237

if include_comp:

238

comp_chunks[-1] = add[:-left]

239

break

240

elif crc32 is not None:

241

crc32 = binascii.crc32(add, crc32)

242

if crc32 is not None:

243

crc32 &= 0xffffffff

244

245

if decomp_len != unpacked.decomp_len:

246

raise zlib.error('decompressed data does not match expected size')

247

248

unpacked.crc32 = crc32

249

if include_comp:

250

unpacked.comp_chunks = comp_chunks

251

return unused

137

252

138

253

139

254

def iter_sha1(iter):

190

305

"""

191

306

contents, size = _load_file_contents(f)

192

307

if contents[:4] == '\377tOc':

193

version = struct.unpack(">L", contents[4:8])[0]

308

version = struct.unpack('>L', contents[4:8])[0]

194

309

if version == 2:

195

310

return PackIndex2(path, file=f, contents=contents,

196

311

size=size)

197

312

else:

198

raise KeyError("Unknown pack index format %d" % version)

313

raise KeyError('Unknown pack index format %d' % version)

199

314

else:

200

315

return PackIndex1(path, file=f, contents=contents, size=size)

201

316

410

525

ret = []

411

526

for i in range(0x100):

412

527

fanout_entry = self._contents[start_offset+i*4:start_offset+(i+1)*4]

413

ret.append(struct.unpack(">L", fanout_entry)[0])

528

ret.append(struct.unpack('>L', fanout_entry)[0])

414

529

return ret

415

530

416

531

def check(self):

468

583

self._fan_out_table = self._read_fan_out_table(0)

469

584

470

585

def _unpack_entry(self, i):

471

(offset, name) = unpack_from(">L20s", self._contents,

586

(offset, name) = unpack_from('>L20s', self._contents,

472

587

(0x100 * 4) + (i * 24))

473

588

return (name, offset, None)

474

589

478

593

479

594

def _unpack_offset(self, i):

480

595

offset = (0x100 * 4) + (i * 24)

481

return unpack_from(">L", self._contents, offset)[0]

596

return unpack_from('>L', self._contents, offset)[0]

482

597

483

598

def _unpack_crc32_checksum(self, i):

484

599

# Not stored in v1 index files

490

605

491

606

def __init__(self, filename, file=None, contents=None, size=None):

492

607

super(PackIndex2, self).__init__(filename, file, contents, size)

493

assert self._contents[:4] == '\377tOc', "Not a v2 pack index file"

494

(self.version, ) = unpack_from(">L", self._contents, 4)

495

assert self.version == 2, "Version was %d" % self.version

608

assert self._contents[:4] == '\377tOc', 'Not a v2 pack index file'

609

(self.version, ) = unpack_from('>L', self._contents, 4)

610

assert self.version == 2, 'Version was %d' % self.version

496

611

self._fan_out_table = self._read_fan_out_table(8)

497

612

self._name_table_offset = 8 + 0x100 * 4

498

613

self._crc32_table_offset = self._name_table_offset + 20 * len(self)

509

624

510

625

def _unpack_offset(self, i):

511

626

offset = self._pack_offset_table_offset + i * 4

512

return unpack_from(">L", self._contents, offset)[0]

627

return unpack_from('>L', self._contents, offset)[0]

513

628

514

629

def _unpack_crc32_checksum(self, i):

515

return unpack_from(">L", self._contents,

630

return unpack_from('>L', self._contents,

516

631

self._crc32_table_offset + i * 4)[0]

517

632

518

633

520

635

"""Read the header of a pack file.

521

636

522

637

:param read: Read function

523

:return: Tuple with pack version and number of objects

638

:return: Tuple of (pack version, number of objects). If no data is available

639

to read, returns (None, None).

524

640

"""

525

641

header = read(12)

526

assert header[:4] == "PACK"

527

(version,) = unpack_from(">L", header, 4)

528

assert version in (2, 3), "Version was %d" % version

529

(num_objects,) = unpack_from(">L", header, 8)

642

if not header:

643

return None, None

644

assert header[:4] == 'PACK'

645

(version,) = unpack_from('>L', header, 4)

646

assert version in (2, 3), 'Version was %d' % version

647

(num_objects,) = unpack_from('>L', header, 8)

530

648

return (version, num_objects)

531

649

532

650

534

652

return sum(imap(len, chunks))

535

653

536

654

537

def unpack_object(read_all, read_some=None):

655

def unpack_object(read_all, read_some=None, compute_crc32=False,

656

include_comp=False, zlib_bufsize=_ZLIB_BUFSIZE):

538

657

"""Unpack a Git object.

539

658

540

659

:param read_all: Read function that blocks until the number of requested

541

660

bytes are read.

542

661

:param read_some: Read function that returns at least one byte, but may not

543

662

return the number of bytes requested.

544

:return: tuple with type, uncompressed data, compressed size and tail data.

663

:param compute_crc32: If True, compute the CRC32 of the compressed data. If

664

False, the returned CRC32 will be None.

665

:param include_comp: If True, include compressed data in the result.

666

:param zlib_bufsize: An optional buffer size for zlib operations.

667

:return: A tuple of (unpacked, unused), where unused is the unused data

668

leftover from decompression, and unpacked in an UnpackedObject with

669

the following attrs set:

670

671

* obj_chunks (for non-delta types)

672

* pack_type_num

673

* delta_base (for delta types)

674

* comp_chunks (if include_comp is True)

675

* decomp_chunks

676

* decomp_len

677

* crc32 (if compute_crc32 is True)

545

678

"""

546

679

if read_some is None:

547

680

read_some = read_all

548

bytes = take_msb_bytes(read_all)

549

type = (bytes[0] >> 4) & 0x07

681

if compute_crc32:

682

crc32 = 0

683

else:

684

crc32 = None

685

686

bytes, crc32 = take_msb_bytes(read_all, crc32=crc32)

687

type_num = (bytes[0] >> 4) & 0x07

550

688

size = bytes[0] & 0x0f

551

689

for i, byte in enumerate(bytes[1:]):

552

690

size += (byte & 0x7f) << ((i * 7) + 4)

691

553

692

raw_base = len(bytes)

554

if type == OFS_DELTA:

555

bytes = take_msb_bytes(read_all)

693

if type_num == OFS_DELTA:

694

bytes, crc32 = take_msb_bytes(read_all, crc32=crc32)

556

695

raw_base += len(bytes)

557

696

assert not (bytes[-1] & 0x80)

558

697

delta_base_offset = bytes[0] & 0x7f

560

699

delta_base_offset += 1

561

700

delta_base_offset <<= 7

562

701

delta_base_offset += (byte & 0x7f)

563

uncomp, comp_len, unused = read_zlib_chunks(read_some, size)

564

assert size == chunks_length(uncomp)

565

return type, (delta_base_offset, uncomp), comp_len+raw_base, unused

566

elif type == REF_DELTA:

567

basename = read_all(20)

702

delta_base = delta_base_offset

703

elif type_num == REF_DELTA:

704

delta_base = read_all(20)

705

if compute_crc32:

706

crc32 = binascii.crc32(delta_base, crc32)

568

707

raw_base += 20

569

uncomp, comp_len, unused = read_zlib_chunks(read_some, size)

570

assert size == chunks_length(uncomp)

571

return type, (basename, uncomp), comp_len+raw_base, unused

572

708

else:

573

uncomp, comp_len, unused = read_zlib_chunks(read_some, size)

574

assert chunks_length(uncomp) == size

575

return type, uncomp, comp_len+raw_base, unused

709

delta_base = None

710

711

unpacked = UnpackedObject(type_num, delta_base, size, crc32)

712

unused = read_zlib_chunks(read_some, unpacked, buffer_size=zlib_bufsize,

713

include_comp=include_comp)

714

return unpacked, unused

576

715

577

716

578

717

def _compute_object_size((num, obj)):

589

728

appropriate.

590

729

"""

591

730

592

def __init__(self, read_all, read_some=None):

731

def __init__(self, read_all, read_some=None, zlib_bufsize=_ZLIB_BUFSIZE):

593

732

self.read_all = read_all

594

733

if read_some is None:

595

734

self.read_some = read_all

600

739

self._rbuf = StringIO()

601

740

# trailer is a deque to avoid memory allocation on small reads

602

741

self._trailer = deque()

742

self._zlib_bufsize = zlib_bufsize

603

743

604

744

def _read(self, read, size):

605

745

"""Read up to size bytes using the given callback.

606

746

607

747

As a side effect, update the verifier's hash (excluding the last 20

608

bytes read) and write through to the output file.

748

bytes read).

609

749

610

750

:param read: The read callback to read from.

611

751

:param size: The maximum number of bytes to read; the particular

665

805

def __len__(self):

666

806

return self._num_objects

667

807

668

def read_objects(self):

808

def read_objects(self, compute_crc32=False):

669

809

"""Read the objects in this pack file.

670

810

671

:raise AssertionError: if there is an error in the pack format.

811

:param compute_crc32: If True, compute the CRC32 of the compressed

812

data. If False, the returned CRC32 will be None.

813

:return: Iterator over UnpackedObjects with the following members set:

814

offset

815

obj_type_num

816

obj_chunks (for non-delta types)

817

delta_base (for delta types)

818

decomp_chunks

819

decomp_len

820

crc32 (if compute_crc32 is True)

672

821

:raise ChecksumMismatch: if the checksum of the pack contents does not

673

822

match the checksum in the pack trailer.

674

823

:raise zlib.error: if an error occurred during zlib decompression.

675

824

:raise IOError: if an error occurred writing to the output file.

676

825

"""

677

826

pack_version, self._num_objects = read_pack_header(self.read)

827

if pack_version is None:

828

return

829

678

830

for i in xrange(self._num_objects):

679

type, uncomp, comp_len, unused = unpack_object(self.read, self.recv)

680

yield type, uncomp, comp_len

831

offset = self.offset

832

unpacked, unused = unpack_object(

833

self.read, read_some=self.recv, compute_crc32=compute_crc32,

834

zlib_bufsize=self._zlib_bufsize)

835

unpacked.offset = offset

681

836

682

837

# prepend any unused data to current read buffer

683

838

buf = StringIO()

686

841

buf.seek(0)

687

842

self._rbuf = buf

688

843

689

pack_sha = sha_to_hex(''.join([c for c in self._trailer]))

690

calculated_sha = self.sha.hexdigest()

691

if pack_sha != calculated_sha:

692

raise ChecksumMismatch(pack_sha, calculated_sha)

693

694

695

class PackObjectIterator(object):

696

697

def __init__(self, pack, progress=None):

698

self.i = 0

699

self.offset = pack._header_size

700

self.num = len(pack)

701

self.map = pack._file

702

self._progress = progress

703

704

def __iter__(self):

705

return self

706

707

def __len__(self):

708

return self.num

709

710

def next(self):

711

if self.i == self.num:

712

raise StopIteration

713

self.map.seek(self.offset)

714

(type, obj, total_size, unused) = unpack_object(self.map.read)

715

self.map.seek(self.offset)

716

crc32 = zlib.crc32(self.map.read(total_size)) & 0xffffffff

717

ret = (self.offset, type, obj, crc32)

718

self.offset += total_size

719

if self._progress is not None:

720

self._progress(self.i, self.num)

721

self.i+=1

722

return ret

844

yield unpacked

845

846

if self._buf_len() < 20:

847

# If the read buffer is full, then the last read() got the whole

848

# trailer off the wire. If not, it means there is still some of the

849

# trailer to read. We need to read() all 20 bytes; N come from the

850

# read buffer and (20 - N) come from the wire.

851

self.read(20)

852

853

pack_sha = ''.join(self._trailer)

854

if pack_sha != self.sha.digest():

855

raise ChecksumMismatch(sha_to_hex(pack_sha), self.sha.hexdigest())

856

857

858

class PackStreamCopier(PackStreamReader):

859

"""Class to verify a pack stream as it is being read.

860

861

The pack is read from a ReceivableProtocol using read() or recv() as

862

appropriate and written out to the given file-like object.

863

"""

864

865

def __init__(self, read_all, read_some, outfile, delta_iter=None):

866

"""Initialize the copier.

867

868

:param read_all: Read function that blocks until the number of requested

869

bytes are read.

870

:param read_some: Read function that returns at least one byte, but may

871

not return the number of bytes requested.

872

:param outfile: File-like object to write output through.

873

:param delta_iter: Optional DeltaChainIterator to record deltas as we

874

read them.

875

"""

876

super(PackStreamCopier, self).__init__(read_all, read_some=read_some)

877

self.outfile = outfile

878

self._delta_iter = delta_iter

879

880

def _read(self, read, size):

881

"""Read data from the read callback and write it to the file."""

882

data = super(PackStreamCopier, self)._read(read, size)

883

self.outfile.write(data)

884

return data

885

886

def verify(self):

887

"""Verify a pack stream and write it to the output file.

888

889

See PackStreamReader.iterobjects for a list of exceptions this may

890

throw.

891

"""

892

if self._delta_iter:

893

for unpacked in self.read_objects():

894

self._delta_iter.record(unpacked)

895

else:

896

for _ in self.read_objects():

897

pass

898

723

899

724

900

def obj_sha(type, chunks):

725

901

"""Compute the SHA for a numeric type and object chunks."""

730

906

return sha.digest()

731

907

732

908

909

def compute_file_sha(f, start_ofs=0, end_ofs=0, buffer_size=1<<16):

910

"""Hash a portion of a file into a new SHA.

911

912

:param f: A file-like object to read from that supports seek().

913

:param start_ofs: The offset in the file to start reading at.

914

:param end_ofs: The offset in the file to end reading at, relative to the

915

end of the file.

916

:param buffer_size: A buffer size for reading.

917

:return: A new SHA object updated with data read from the file.

918

"""

919

sha = make_sha()

920

f.seek(0, SEEK_END)

921

todo = f.tell() + end_ofs - start_ofs

922

f.seek(start_ofs)

923

while todo:

924

data = f.read(min(todo, buffer_size))

925

sha.update(data)

926

todo -= len(data)

927

return sha

928

929

733

930

class PackData(object):

734

931

"""The data contained in a packfile.

735

932

794

991

return self._size

795

992

self._size = os.path.getsize(self._filename)

796

993

if self._size < self._header_size:

797

errmsg = ("%s is too small for a packfile (%d < %d)" %

994

errmsg = ('%s is too small for a packfile (%d < %d)' %

798

995

(self._filename, self._size, self._header_size))

799

996

raise AssertionError(errmsg)

800

997

return self._size

808

1005

809

1006

:return: 20-byte binary SHA1 digest

810

1007

"""

811

s = make_sha()

812

self._file.seek(0)

813

todo = self._get_size() - 20

814

while todo > 0:

815

x = self._file.read(min(todo, 1<<16))

816

s.update(x)

817

todo -= len(x)

818

return s.digest()

1008

return compute_file_sha(self._file, end_ofs=-20).digest()

819

1009

820

1010

def get_ref(self, sha):

821

1011

"""Get the object for a ref SHA, only looking in this pack."""

861

1051

self._offset_cache[offset] = type, chunks

862

1052

return type, chunks

863

1053

864

def iterobjects(self, progress=None):

865

return PackObjectIterator(self, progress)

1054

def iterobjects(self, progress=None, compute_crc32=True):

1055

self._file.seek(self._header_size)

1056

for i in xrange(1, self._num_objects + 1):

1057

offset = self._file.tell()

1058

unpacked, unused = unpack_object(

1059

self._file.read, compute_crc32=compute_crc32)

1060

if progress is not None:

1061

progress(i, self._num_objects)

1062

yield (offset, unpacked.pack_type_num, unpacked._obj(),

1063

unpacked.crc32)

1064

self._file.seek(-len(unused), SEEK_CUR) # Back up over unused data.

1065

1066

def _iter_unpacked(self):

1067

# TODO(dborowitz): Merge this with iterobjects, if we can change its

1068

# return type.

1069

self._file.seek(self._header_size)

1070

for _ in xrange(self._num_objects):

1071

offset = self._file.tell()

1072

unpacked, unused = unpack_object(

1073

self._file.read, compute_crc32=False)

1074

unpacked.offset = offset

1075

yield unpacked

1076

self._file.seek(-len(unused), SEEK_CUR) # Back up over unused data.

866

1077

867

1078

def iterentries(self, progress=None):

868

1079

"""Yield entries summarizing the contents of this pack.

871

1082

object count.

872

1083

:return: iterator of tuples with (sha, offset, crc32)

873

1084

"""

874

for offset, type, obj, crc32 in self.iterobjects(progress=progress):

875

assert isinstance(offset, int)

876

assert isinstance(type, int)

877

assert isinstance(obj, list) or isinstance(obj, tuple)

878

type, obj = self.resolve_object(offset, type, obj)

879

yield obj_sha(type, obj), offset, crc32

1085

num_objects = self._num_objects

1086

for i, result in enumerate(PackIndexer.for_pack_data(self)):

1087

if progress is not None:

1088

progress(i, num_objects)

1089

yield result

880

1090

881

1091

def sorted_entries(self, progress=None):

882

1092

"""Return entries in this pack, sorted by SHA.

930

1140

elif version == 2:

931

1141

return self.create_index_v2(filename, progress)

932

1142

else:

933

raise ValueError("unknown index format %d" % version)

1143

raise ValueError('unknown index format %d' % version)

934

1144

935

1145

def get_stored_checksum(self):

936

1146

"""Return the expected checksum stored in this pack."""

937

self._file.seek(self._get_size()-20)

1147

self._file.seek(-20, SEEK_END)

938

1148

return self._file.read(20)

939

1149

940

1150

def check(self):

954

1164

if offset in self._offset_cache:

955

1165

return self._offset_cache[offset]

956

1166

assert isinstance(offset, long) or isinstance(offset, int),\

957

"offset was %r" % offset

1167

'offset was %r' % offset

958

1168

assert offset >= self._header_size

959

1169

self._file.seek(offset)

960

return unpack_object(self._file.read)[:2]

961

962

963

class ThinPackData(PackData):

964

"""PackData for thin packs, which require an ObjectStore for resolving."""

965

966

def __init__(self, resolve_ext_ref, *args, **kwargs):

967

super(ThinPackData, self).__init__(*args, **kwargs)

968

self.resolve_ext_ref = resolve_ext_ref

1170

unpacked, _ = unpack_object(self._file.read)

1171

return (unpacked.pack_type_num, unpacked._obj())

1172

1173

1174

class DeltaChainIterator(object):

1175

"""Abstract iterator over pack data based on delta chains.

1176

1177

Each object in the pack is guaranteed to be inflated exactly once,

1178

regardless of how many objects reference it as a delta base. As a result,

1179

memory usage is proportional to the length of the longest delta chain.

1180

1181

Subclasses can override _result to define the result type of the iterator.

1182

By default, results are UnpackedObjects with the following members set:

1183

1184

* offset

1185

* obj_type_num

1186

* obj_chunks

1187

* pack_type_num

1188

* delta_base (for delta types)

1189

* comp_chunks (if _include_comp is True)

1190

* decomp_chunks

1191

* decomp_len

1192

* crc32 (if _compute_crc32 is True)

1193

"""

1194

1195

_compute_crc32 = False

1196

_include_comp = False

1197

1198

def __init__(self, file_obj, resolve_ext_ref=None):

1199

self._file = file_obj

1200

self._resolve_ext_ref = resolve_ext_ref

1201

self._pending_ofs = defaultdict(list)

1202

self._pending_ref = defaultdict(list)

1203

self._full_ofs = []

1204

self._shas = {}

1205

self._ext_refs = []

969

1206

970

1207

@classmethod

971

def from_file(cls, resolve_ext_ref, file, size):

972

return cls(resolve_ext_ref, str(file), file=file, size=size)

973

974

def get_ref(self, sha):

975

"""Resolve a reference looking in both this pack and the store."""

976

try:

977

# As part of completing a pack we create a Pack object with a

978

# ThinPackData and a full PackIndex, so check in the index first if

979

# possible.

980

# TODO(dborowitz): reevaluate this when the pack completion code is

981

# rewritten.

982

return super(ThinPackData, self).get_ref(sha)

983

except KeyError:

984

type, obj = self.resolve_ext_ref(sha)

985

return None, type, obj

986

987

def iterentries(self, progress=None):

988

"""Yield entries summarizing the contents of this pack.

989

990

:param progress: Progress function, called with current and

991

total object count.

992

993

This will yield tuples with (sha, offset, crc32)

994

"""

995

found = {}

996

postponed = defaultdict(list)

997

998

class Postpone(Exception):

999

"""Raised to postpone delta resolving."""

1000

1001

def __init__(self, sha):

1002

self.sha = sha

1003

1004

def get_ref_text(sha):

1005

assert len(sha) == 20

1006

if sha in found:

1007

offset = found[sha]

1008

type, obj = self.get_object_at(offset)

1009

return offset, type, obj

1208

def for_pack_data(cls, pack_data, resolve_ext_ref=None):

1209

walker = cls(None, resolve_ext_ref=resolve_ext_ref)

1210

walker.set_pack_data(pack_data)

1211

for unpacked in pack_data._iter_unpacked():

1212

walker.record(unpacked)

1213

return walker

1214

1215

def record(self, unpacked):

1216

type_num = unpacked.pack_type_num

1217

offset = unpacked.offset

1218

if type_num == OFS_DELTA:

1219

base_offset = offset - unpacked.delta_base

1220

self._pending_ofs[base_offset].append(offset)

1221

elif type_num == REF_DELTA:

1222

self._pending_ref[unpacked.delta_base].append(offset)

1223

else:

1224

self._full_ofs.append((offset, type_num))

1225

1226

def set_pack_data(self, pack_data):

1227

self._file = pack_data._file

1228

1229

def _walk_all_chains(self):

1230

for offset, type_num in self._full_ofs:

1231

for result in self._follow_chain(offset, type_num, None):

1232

yield result

1233

for result in self._walk_ref_chains():

1234

yield result

1235

assert not self._pending_ofs

1236

1237

def _ensure_no_pending(self):

1238

if self._pending_ref:

1239

raise KeyError([sha_to_hex(s) for s in self._pending_ref])

1240

1241

def _walk_ref_chains(self):

1242

if not self._resolve_ext_ref:

1243

self._ensure_no_pending()

1244

return

1245

1246

for base_sha, pending in sorted(self._pending_ref.iteritems()):

1010

1247

try:

1011

return self.get_ref(sha)

1248

type_num, chunks = self._resolve_ext_ref(base_sha)

1012

1249

except KeyError:

1013

raise Postpone(sha)

1014

1015

extra = []

1016

todo = chain(self.iterobjects(progress=progress), extra)

1017

for (offset, type, obj, crc32) in todo:

1018

assert isinstance(offset, int)

1019

if obj is None:

1020

# Inflate postponed delta

1021

obj, type = self.get_object_at(offset)

1022

assert isinstance(type, int)

1023

assert isinstance(obj, list) or isinstance(obj, tuple)

1024

try:

1025

type, obj = self.resolve_object(offset, type, obj, get_ref_text)

1026

except Postpone, e:

1027

# Save memory by not storing the inflated obj in postponed

1028

postponed[e.sha].append((offset, type, None, crc32))

1029

else:

1030

sha = obj_sha(type, obj)

1031

found[sha] = offset

1032

yield sha, offset, crc32

1033

extra.extend(postponed.pop(sha, []))

1034

if postponed:

1035

raise KeyError([sha_to_hex(h) for h in postponed.keys()])

1250

# Not an external ref, but may depend on one. Either it will get

1251

# popped via a _follow_chain call, or we will raise an error

1252

# below.

1253

continue

1254

self._ext_refs.append(base_sha)

1255

self._pending_ref.pop(base_sha)

1256

for new_offset in pending:

1257

for result in self._follow_chain(new_offset, type_num, chunks):

1258

yield result

1259

1260

self._ensure_no_pending()

1261

1262

def _result(self, unpacked):

1263

return unpacked

1264

1265

def _resolve_object(self, offset, obj_type_num, base_chunks):

1266

self._file.seek(offset)

1267

unpacked, _ = unpack_object(

1268

self._file.read, include_comp=self._include_comp,

1269

compute_crc32=self._compute_crc32)

1270

unpacked.offset = offset

1271

if base_chunks is None:

1272

assert unpacked.pack_type_num == obj_type_num

1273

else:

1274

assert unpacked.pack_type_num in DELTA_TYPES

1275

unpacked.obj_type_num = obj_type_num

1276

unpacked.obj_chunks = apply_delta(base_chunks,

1277

unpacked.decomp_chunks)

1278

return unpacked

1279

1280

def _follow_chain(self, offset, obj_type_num, base_chunks):

1281

# Unlike PackData.get_object_at, there is no need to cache offsets as

1282

# this approach by design inflates each object exactly once.

1283

unpacked = self._resolve_object(offset, obj_type_num, base_chunks)

1284

yield self._result(unpacked)

1285

1286

pending = chain(self._pending_ofs.pop(unpacked.offset, []),

1287

self._pending_ref.pop(unpacked.sha(), []))

1288

for new_offset in pending:

1289

for new_result in self._follow_chain(

1290

new_offset, unpacked.obj_type_num, unpacked.obj_chunks):

1291

yield new_result

1292

1293

def __iter__(self):

1294

return self._walk_all_chains()

1295

1296

def ext_refs(self):

1297

return self._ext_refs

1298

1299

1300

class PackIndexer(DeltaChainIterator):

1301

"""Delta chain iterator that yields index entries."""

1302

1303

_compute_crc32 = True

1304

1305

def _result(self, unpacked):

1306

return unpacked.sha(), unpacked.offset, unpacked.crc32

1307

1308

1309

class PackInflater(DeltaChainIterator):

1310

"""Delta chain iterator that yields ShaFile objects."""

1311

1312

def _result(self, unpacked):

1313

return unpacked.sha_file()

1036

1314

1037

1315

1038

1316

class SHA1Reader(object):

1040

1318

1041

1319

def __init__(self, f):

1042

1320

self.f = f

1043

self.sha1 = make_sha("")

1321

self.sha1 = make_sha('')

1044

1322

1045

1323

def read(self, num=None):

1046

1324

data = self.f.read(num)

1064

1342

1065

1343

def __init__(self, f):

1066

1344

self.f = f

1067

self.sha1 = make_sha("")

1345

self.length = 0

1346

self.sha1 = make_sha('')

1068

1347

1069

1348

def write(self, data):

1070

1349

self.sha1.update(data)

1071

1350

self.f.write(data)

1351

self.length += len(data)

1072

1352

1073

1353

def write_sha(self):

1074

1354

sha = self.sha1.digest()

1075

1355

assert len(sha) == 20

1076

1356

self.f.write(sha)

1357

self.length += len(sha)

1077

1358

return sha

1078

1359

1079

1360

def close(self):

1081

1362

self.f.close()

1082

1363

return sha

1083

1364

1365

def offset(self):

1366

return self.length

1367

1084

1368

def tell(self):

1085

1369

return self.f.tell()

1086

1370

1087

1371

1088

def write_pack_object(f, type, object):

1372

def pack_object_header(type_num, delta_base, size):

1373

"""Create a pack object header for the given object info.

1374

1375

:param type_num: Numeric type of the object.

1376

:param delta_base: Delta base offset or ref, or None for whole objects.

1377

:param size: Uncompressed object size.

1378

:return: A header for a packed object.

1379

"""

1380

header = ''

1381

c = (type_num << 4) | (size & 15)

1382

size >>= 4

1383

while size:

1384

header += (chr(c | 0x80))

1385

c = size & 0x7f

1386

size >>= 7

1387

header += chr(c)

1388

if type_num == OFS_DELTA:

1389

ret = [delta_base & 0x7f]

1390

delta_base >>= 7

1391

while delta_base:

1392

delta_base -= 1

1393

ret.insert(0, 0x80 | (delta_base & 0x7f))

1394

delta_base >>= 7

1395

header += ''.join([chr(x) for x in ret])

1396

elif type_num == REF_DELTA:

1397

assert len(delta_base) == 20

1398

header += delta_base

1399

return header

1400

1401

1402

def write_pack_object(f, type, object, sha=None):

1089

1403

"""Write pack object to a file.

1090

1404

1091

1405

:param f: File to write to

1093

1407

:param object: Object to write

1094

1408

:return: Tuple with offset at which the object was written, and crc32

1095

1409

"""

1096

offset = f.tell()

1097

packed_data_hdr = ""

1098

if type == OFS_DELTA:

1099

(delta_base_offset, object) = object

1100

elif type == REF_DELTA:

1101

(basename, object) = object

1102

size = len(object)

1103

c = (type << 4) | (size & 15)

1104

size >>= 4

1105

while size:

1106

packed_data_hdr += (chr(c | 0x80))

1107

c = size & 0x7f

1108

size >>= 7

1109

packed_data_hdr += chr(c)

1110

if type == OFS_DELTA:

1111

ret = [delta_base_offset & 0x7f]

1112

delta_base_offset >>= 7

1113

while delta_base_offset:

1114

delta_base_offset -= 1

1115

ret.insert(0, 0x80 | (delta_base_offset & 0x7f))

1116

delta_base_offset >>= 7

1117

packed_data_hdr += "".join([chr(x) for x in ret])

1118

elif type == REF_DELTA:

1119

assert len(basename) == 20

1120

packed_data_hdr += basename

1121

packed_data = packed_data_hdr + zlib.compress(object)

1122

f.write(packed_data)

1123

return (offset, (zlib.crc32(packed_data) & 0xffffffff))

1124

1125

1126

def write_pack(filename, objects, num_objects):

1410

if type in DELTA_TYPES:

1411

delta_base, object = object

1412

else:

1413

delta_base = None

1414

header = pack_object_header(type, delta_base, len(object))

1415

comp_data = zlib.compress(object)

1416

crc32 = 0

1417

for data in (header, comp_data):

1418

f.write(data)

1419

if sha is not None:

1420

sha.update(data)

1421

crc32 = binascii.crc32(data, crc32)

1422

return crc32 & 0xffffffff

1423

1424

1425

def write_pack(filename, objects, num_objects=None):

1127

1426

"""Write a new pack data file.

1128

1427

1129

1428

:param filename: Path to the new pack file (without .pack extension)

1130

:param objects: Iterable over (object, path) tuples to write

1131

:param num_objects: Number of objects to write

1429

:param objects: Iterable of (object, path) tuples to write.

1430

Should provide __len__

1132

1431

:return: Tuple with checksum of pack file and index file

1133

1432

"""

1134

f = GitFile(filename + ".pack", 'wb')

1433

if num_objects is not None:

1434

warnings.warn('num_objects argument to write_pack is deprecated',

1435

DeprecationWarning)

1436

f = GitFile(filename + '.pack', 'wb')

1135

1437

try:

1136

entries, data_sum = write_pack_data(f, objects, num_objects)

1438

entries, data_sum = write_pack_objects(f, objects,

1439

num_objects=num_objects)

1137

1440

finally:

1138

1441

f.close()

1442

entries = [(k, v[0], v[1]) for (k, v) in entries.iteritems()]

1139

1443

entries.sort()

1140

f = GitFile(filename + ".idx", 'wb')

1444

f = GitFile(filename + '.idx', 'wb')

1141

1445

try:

1142

1446

return data_sum, write_pack_index_v2(f, entries, data_sum)

1143

1447

finally:

1151

1455

f.write(struct.pack('>L', num_objects)) # Number of objects in pack

1152

1456

1153

1457

1154

def write_pack_data(f, objects, num_objects, window=10):

1155

"""Write a new pack data file.

1458

def deltify_pack_objects(objects, window=10):

1459

"""Generate deltas for pack objects.

1156

1460

1157

:param f: File to write to

1158

:param objects: Iterable over (object, path) tuples to write

1159

:param num_objects: Number of objects to write

1160

:param window: Sliding window size for searching for deltas; currently

1161

unimplemented

1162

:return: List with (name, offset, crc32 checksum) entries, pack checksum

1461

:param objects: Objects to deltify

1462

:param window: Window size

1463

:return: Iterator over type_num, object id, delta_base, content

1464

delta_base is None for full text entries

1163

1465

"""

1164

recency = list(objects)

1165

# FIXME: Somehow limit delta depth

1166

# FIXME: Make thin-pack optional (its not used when cloning a pack)

1167

1466

# Build a list of objects ordered by the magic Linus heuristic

1168

1467

# This helps us find good objects to diff against us

1169

1468

magic = []

1170

for obj, path in recency:

1171

magic.append( (obj.type_num, path, 1, -obj.raw_length(), obj) )

1469

for obj, path in objects:

1470

magic.append((obj.type_num, path, -obj.raw_length(), obj))

1172

1471

magic.sort()

1173

# Build a map of objects and their index in magic - so we can find

1174

# preceeding objects to diff against

1175

offs = {}

1176

for i in range(len(magic)):

1177

offs[magic[i][4]] = i

1178

# Write the pack

1179

entries = []

1180

f = SHA1Writer(f)

1181

write_pack_header(f, num_objects)

1182

for o, path in recency:

1183

sha1 = o.sha().digest()

1184

orig_t = o.type_num

1472

1473

possible_bases = deque()

1474

1475

for type_num, path, neg_length, o in magic:

1185

1476

raw = o.as_raw_string()

1186

1477

winner = raw

1187

t = orig_t

1188

#for i in range(offs[o]-window, window):

1189

# if i < 0 or i >= len(offs): continue

1190

# b = magic[i][4]

1191

# if b.type_num != orig_t: continue

1192

# base = b.as_raw_string()

1193

# delta = create_delta(base, raw)

1194

# if len(delta) < len(winner):

1195

# winner = delta

1196

# t = 6 if magic[i][2] == 1 else 7

1197

offset, crc32 = write_pack_object(f, t, winner)

1198

entries.append((sha1, offset, crc32))

1478

winner_base = None

1479

for base in possible_bases:

1480

if base.type_num != type_num:

1481

continue

1482

delta = create_delta(base.as_raw_string(), raw)

1483

if len(delta) < len(winner):

1484

winner_base = base.sha().digest()

1485

winner = delta

1486

yield type_num, o.sha().digest(), winner_base, winner

1487

possible_bases.appendleft(o)

1488

while len(possible_bases) > window:

1489

possible_bases.pop()

1490

1491

1492

def write_pack_objects(f, objects, window=10, num_objects=None):

1493

"""Write a new pack data file.

1494

1495

:param f: File to write to

1496

:param objects: Iterable of (object, path) tuples to write.

1497

Should provide __len__

1498

:param window: Sliding window size for searching for deltas; currently

1499

unimplemented

1500

:param num_objects: Number of objects (do not use, deprecated)

1501

:return: Dict mapping id -> (offset, crc32 checksum), pack checksum

1502

"""

1503

if num_objects is None:

1504

num_objects = len(objects)

1505

# FIXME: pack_contents = deltify_pack_objects(objects, window)

1506

pack_contents = (

1507

(o.type_num, o.sha().digest(), None, o.as_raw_string())

1508

for (o, path) in objects)

1509

return write_pack_data(f, num_objects, pack_contents)

1510

1511

1512

def write_pack_data(f, num_records, records):

1513

"""Write a new pack data file.

1514

1515

:param f: File to write to

1516

:param num_records: Number of records

1517

:param records: Iterator over type_num, object_id, delta_base, raw

1518

:return: Dict mapping id -> (offset, crc32 checksum), pack checksum

1519

"""

1520

# Write the pack

1521

entries = {}

1522

f = SHA1Writer(f)

1523

write_pack_header(f, num_records)

1524

for type_num, object_id, delta_base, raw in records:

1525

if delta_base is not None:

1526

try:

1527

base_offset, base_crc32 = entries[delta_base]

1528

except KeyError:

1529

type_num = REF_DELTA

1530

raw = (delta_base, raw)

1531

else:

1532

type_num = OFS_DELTA

1533

raw = (base_offset, raw)

1534

offset = f.offset()

1535

crc32 = write_pack_object(f, type_num, raw)

1536

entries[object_id] = (offset, crc32)

1199

1537

return entries, f.write_sha()

1200

1538

1201

1539

1214

1552

fan_out_table[ord(name[0])] += 1

1215

1553

# Fan-out table

1216

1554

for i in range(0x100):

1217

f.write(struct.pack(">L", fan_out_table[i]))

1555

f.write(struct.pack('>L', fan_out_table[i]))

1218

1556

fan_out_table[i+1] += fan_out_table[i]

1219

1557

for (name, offset, entry_checksum) in entries:

1220

f.write(struct.pack(">L20s", offset, name))

1558

f.write(struct.pack('>L20s', offset, name))

1221

1559

assert len(pack_checksum) == 20

1222

1560

f.write(pack_checksum)

1223

1561

return f.write_sha()

1231

1569

"""

1232

1570

assert isinstance(base_buf, str)

1233

1571

assert isinstance(target_buf, str)

1234

out_buf = ""

1572

out_buf = ''

1235

1573

# write delta header

1236

1574

def encode_size(size):

1237

ret = ""

1575

ret = ''

1238

1576

c = size & 0x7f

1239

1577

size >>= 7

1240

1578

while size:

1249

1587

seq = difflib.SequenceMatcher(a=base_buf, b=target_buf)

1250

1588

for opcode, i1, i2, j1, j2 in seq.get_opcodes():

1251

1589

# Git patch opcodes don't care about deletes!

1252

#if opcode == "replace" or opcode == "delete":

1590

#if opcode == 'replace' or opcode == 'delete':

1253

1591

# pass

1254

if opcode == "equal":

1592

if opcode == 'equal':

1255

1593

# If they are equal, unpacker will use data from base_buf

1256

1594

# Write out an opcode that says what range to use

1257

scratch = ""

1595

scratch = ''

1258

1596

op = 0x80

1259

1597

o = i1

1260

1598

for i in range(4):

1268

1606

op |= 1 << (4+i)

1269

1607

out_buf += chr(op)

1270

1608

out_buf += scratch

1271

if opcode == "replace" or opcode == "insert":

1609

if opcode == 'replace' or opcode == 'insert':

1272

1610

# If we are replacing a range or adding one, then we just

1273

1611

# output it to the stream (prefixed by its size)

1274

1612

s = j2 - j1

1290

1628

:param delta: Delta instructions

1291

1629

"""

1292

1630

if type(src_buf) != str:

1293

src_buf = "".join(src_buf)

1631

src_buf = ''.join(src_buf)

1294

1632

if type(delta) != str:

1295

delta = "".join(delta)

1633

delta = ''.join(delta)

1296

1634

out = []

1297

1635

index = 0

1298

1636

delta_length = len(delta)

1309

1647

return size, index

1310

1648

src_size, index = get_delta_header_size(delta, index)

1311

1649

dest_size, index = get_delta_header_size(delta, index)

1312

assert src_size == len(src_buf), "%d vs %d" % (src_size, len(src_buf))

1650

assert src_size == len(src_buf), '%d vs %d' % (src_size, len(src_buf))

1313

1651

while index < delta_length:

1314

1652

cmd = ord(delta[index])

1315

1653

index += 1

1337

1675

out.append(delta[index:index+cmd])

1338

1676

index += cmd

1339

1677

else:

1340

raise ApplyDeltaError("Invalid opcode 0")

1678

raise ApplyDeltaError('Invalid opcode 0')

1341

1679

1342

1680

if index != delta_length:

1343

raise ApplyDeltaError("delta not empty: %r" % delta[index:])

1681

raise ApplyDeltaError('delta not empty: %r' % delta[index:])

1344

1682

1345

1683

if dest_size != chunks_length(out):

1346

raise ApplyDeltaError("dest size incorrect")

1684

raise ApplyDeltaError('dest size incorrect')

1347

1685

1348

1686

return out

1349

1687

1359

1697

"""

1360

1698

f = SHA1Writer(f)

1361

1699

f.write('\377tOc') # Magic!

1362

f.write(struct.pack(">L", 2))

1700

f.write(struct.pack('>L', 2))

1363

1701

fan_out_table = defaultdict(lambda: 0)

1364

1702

for (name, offset, entry_checksum) in entries:

1365

1703

fan_out_table[ord(name[0])] += 1

1366

1704

# Fan-out table

1367

1705

for i in range(0x100):

1368

f.write(struct.pack(">L", fan_out_table[i]))

1706

f.write(struct.pack('>L', fan_out_table[i]))

1369

1707

fan_out_table[i+1] += fan_out_table[i]

1370

1708

for (name, offset, entry_checksum) in entries:

1371

1709

f.write(name)

1372

1710

for (name, offset, entry_checksum) in entries:

1373

f.write(struct.pack(">L", entry_checksum))

1711

f.write(struct.pack('>L', entry_checksum))

1374

1712

for (name, offset, entry_checksum) in entries:

1375

1713

# FIXME: handle if MSBit is set in offset

1376

f.write(struct.pack(">L", offset))

1714

f.write(struct.pack('>L', offset))

1377

1715

# FIXME: handle table for pack files > 8 Gb

1378

1716

assert len(pack_checksum) == 20

1379

1717

f.write(pack_checksum)

1387

1725

self._basename = basename

1388

1726

self._data = None

1389

1727

self._idx = None

1390

self._idx_path = self._basename + ".idx"

1391

self._data_path = self._basename + ".pack"

1728

self._idx_path = self._basename + '.idx'

1729

self._data_path = self._basename + '.pack'

1392

1730

self._data_load = lambda: PackData(self._data_path)

1393

1731

self._idx_load = lambda: load_pack_index(self._idx_path)

1394

1732

1396

1734

def from_lazy_objects(self, data_fn, idx_fn):

1397

1735

"""Create a new pack object from callables to load pack data and

1398

1736

index objects."""

1399

ret = Pack("")

1737

ret = Pack('')

1400

1738

ret._data_load = data_fn

1401

1739

ret._idx_load = idx_fn

1402

1740

return ret

1404

1742

@classmethod

1405

1743

def from_objects(self, data, idx):

1406

1744

"""Create a new pack object from pack data and index objects."""

1407

ret = Pack("")

1745

ret = Pack('')

1408

1746

ret._data_load = lambda: data

1409

1747

ret._idx_load = lambda: idx

1410

1748

return ret

1419

1757

if self._data is None:

1420

1758

self._data = self._data_load()

1421

1759

self._data.pack = self

1422

assert len(self.index) == len(self._data)

1423

idx_stored_checksum = self.index.get_pack_checksum()

1424

data_stored_checksum = self._data.get_stored_checksum()

1425

if idx_stored_checksum != data_stored_checksum:

1426

raise ChecksumMismatch(sha_to_hex(idx_stored_checksum),

1427

sha_to_hex(data_stored_checksum))

1760

self.check_length_and_checksum()

1428

1761

return self._data

1429

1762

1430

1763

@property

1450

1783

return len(self.index)

1451

1784

1452

1785

def __repr__(self):

1453

return "%s(%r)" % (self.__class__.__name__, self._basename)

1786

return '%s(%r)' % (self.__class__.__name__, self._basename)

1454

1787

1455

1788

def __iter__(self):

1456

1789

"""Iterate over all the sha1s of the objects in this pack."""

1457

1790

return iter(self.index)

1458

1791

1792

def check_length_and_checksum(self):

1793

"""Sanity check the length and checksum of the pack index and data."""

1794

assert len(self.index) == len(self.data)

1795

idx_stored_checksum = self.index.get_pack_checksum()

1796

data_stored_checksum = self.data.get_stored_checksum()

1797

if idx_stored_checksum != data_stored_checksum:

1798

raise ChecksumMismatch(sha_to_hex(idx_stored_checksum),

1799

sha_to_hex(data_stored_checksum))

1800

1459

1801

def check(self):

1460

1802

"""Check the integrity of this pack.

1461

1803

1484

1826

if type(offset) is long:

1485

1827

offset = int(offset)

1486

1828

type_num, chunks = self.data.resolve_object(offset, obj_type, obj)

1487

return type_num, "".join(chunks)

1829

return type_num, ''.join(chunks)

1488

1830

1489

1831

def __getitem__(self, sha1):

1490

1832

"""Retrieve the specified SHA1."""

1493

1835

1494

1836

def iterobjects(self):

1495

1837

"""Iterate over the objects in this pack."""

1496

for offset, type, obj, crc32 in self.data.iterobjects():

1497

assert isinstance(offset, int)

1498

yield ShaFile.from_raw_chunks(

1499

*self.data.resolve_object(offset, type, obj))

1838

return iter(PackInflater.for_pack_data(self.data))

1839

1840

def pack_tuples(self):

1841

"""Provide an iterable for use with write_pack_objects.

1842

1843

:return: Object that can iterate over (object, path) tuples

1844

and provides __len__

1845

"""

1846

class PackTupleIterable(object):

1847

1848

def __init__(self, pack):

1849

self.pack = pack

1850

1851

def __len__(self):

1852

return len(self.pack)

1853

1854

def __iter__(self):

1855

return ((o, None) for o in self.pack.iterobjects())

1856

1857

return PackTupleIterable(self)

1500

1858

1501

1859

def keep(self, msg=None):

1502

1860

"""Add a .keep file for the pack, preventing git from garbage collecting it.