~jameinel/+junk/bzr-index2-stuff

« back to all changes in this revision

Viewing changes to chunk_writer.py

  • Committer: Robert Collins
  • Date: 2008-07-01 22:44:10 UTC
  • mfrom: (7.1.4 index2)
  • Revision ID: robertc@robertcollins.net-20080701224410-2lbqoqc2dc5v3iey
Merge jams higher compression patches, fixing tests.

Show diffs side-by-side

added added

removed removed

Lines of Context:
37
37
        """Create a ChunkWriter to write chunk_size chunks."""
38
38
        self.chunk_size = chunk_size
39
39
        self.compressor = zlib.compressobj()
 
40
        self.bytes_in = []
40
41
        self.bytes_list = []
41
 
        self.position = 0
42
 
        self.seen_bytes = 0
 
42
        self.compressed = None
43
43
        self.unused_bytes = None
44
44
 
45
45
    def finish(self):
48
48
        This returns the final compressed chunk, and either None, or the
49
49
        bytes that did not fit in the chunk.
50
50
        """
 
51
        self.bytes_in = None # Free the data cached so far, we don't need it
51
52
        self.bytes_list.append(self.compressor.flush(Z_FINISH))
52
 
        self.position += len(self.bytes_list[-1])
53
 
        nulls_needed = self.chunk_size - self.position % self.chunk_size
 
53
        total_len = sum(len(b) for b in self.bytes_list)
 
54
        nulls_needed = self.chunk_size - total_len % self.chunk_size
54
55
        if nulls_needed:
55
56
            self.bytes_list.append("\x00" * nulls_needed)
56
57
        return self.bytes_list, self.unused_bytes
57
58
 
 
59
    def _recompress_all_bytes_in(self, extra_bytes=None):
 
60
        compressor = zlib.compressobj()
 
61
        bytes_out = []
 
62
        for accepted_bytes in self.bytes_in:
 
63
            out = compressor.compress(accepted_bytes)
 
64
            if out:
 
65
                bytes_out.append(out)
 
66
        if extra_bytes:
 
67
            out = compressor.compress(extra_bytes)
 
68
            if out:
 
69
                bytes_out.append(out)
 
70
            out = compressor.flush(Z_SYNC_FLUSH)
 
71
            if out:
 
72
                bytes_out.append(out)
 
73
        return bytes_out, compressor
 
74
 
58
75
    def write(self, bytes):
59
76
        """Write some bytes to the chunk.
60
77
 
61
78
        If the bytes fit, False is returned. Otherwise True is returned
62
79
        and the bytes have not been added to the chunk.
63
80
        """
64
 
        # Reject content if its likely to fail to fit. The 10 constant is to
65
 
        # allow room for the zlib END_STREAM record in the Z_FINISH flush call.
66
 
        if (self.seen_bytes > self.chunk_size and
67
 
            self.position + 10 + len(bytes) > self.chunk_size):
68
 
            self.unused_bytes = bytes
69
 
            return True
70
 
        self.bytes_list.append(self.compressor.compress(bytes))
71
 
        self.position += len(self.bytes_list[-1])
72
 
        self.seen_bytes += len(bytes)
73
 
        # If we are at the end of what we know will fit, flush.
74
 
        if self.seen_bytes > self.chunk_size:
75
 
            # Note: we could strip the \x00\x00\xff\xff and reinsert it in the
76
 
            # reader - see rfc1979. syncing on every call imposes a increase in
77
 
            # compressed size. e.g. 3661 vs 4050 bytes for 40 200 byte rows.
78
 
            self.bytes_list.append(self.compressor.flush(Z_SYNC_FLUSH))
79
 
            self.position += len(self.bytes_list[-1])
 
81
        # Add these bytes using Z_SYNC_FLUSH, if it puts us over budget, we
 
82
        # will try packing everything tighter, if that still fails, then we
 
83
        # will reject this request.
 
84
        out = self.compressor.compress(bytes)
 
85
        if out:
 
86
            self.bytes_list.append(out)
 
87
        out = self.compressor.flush(Z_SYNC_FLUSH)
 
88
        if out:
 
89
            self.bytes_list.append(out)
 
90
        total_len = sum(len(b) for b in self.bytes_list)
 
91
        # Give us some extra room for a final Z_FINISH call.
 
92
        if total_len + 10 > self.chunk_size:
 
93
            # We are over budget, try to squeeze this in without any
 
94
            # Z_SYNC_FLUSH calls
 
95
            bytes_out, compressor = self._recompress_all_bytes_in(bytes)
 
96
            this_len = sum(len(b) for b in bytes_out)
 
97
            if this_len + 10 > self.chunk_size:
 
98
                # No way we can add anymore, we need to re-pack because our
 
99
                # compressor is now out of sync
 
100
                bytes_out, compressor = self._recompress_all_bytes_in()
 
101
                self.compressor = compressor
 
102
                self.bytes_list = bytes_out
 
103
                self.unused_bytes = bytes
 
104
                return True
 
105
            else:
 
106
                # This fits when we pack it tighter, so use the new packing
 
107
                self.compressor = compressor
 
108
                self.bytes_in.append(bytes)
 
109
                self.bytes_list = bytes_out
 
110
        else:
 
111
            # It fit, so mark it added
 
112
            self.bytes_in.append(bytes)
80
113
        return False
81
114