~al-maisan/ubuntu/karmic/pristine-tar/gendelta-fix

1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
1
#!/usr/bin/perl
2
3
=head1 NAME
4
5
pristine-gz - regenerate pristine gz files
6
7
=head1 SYNOPSIS
8
9
B<pristine-gz> [-vdk] gengz delta file
2 by Joey Hess
* Allow the delta file to be read or written from stdio.
10
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
11
B<pristine-gz> [-vdk] gendelta file.gz delta
12
13
=head1 DESCRIPTION
14
15
This is a complement to the pristine-tar(1) command. Normally you don't
16
need to run it by hand, since pristine-tar calls it as necessary to handle
17
.tar.gz files.
18
19
pristine-gz gendelta takes the specified gz file, and generates a
20
small binary delta file that can later be used by pristine-gz gengz
21
to recreate the original file.
22
23
pristine-gz gengz takes the specified delta file, and compresses
24
the specified input file (which must be identical to the contents
25
of the original gz file). The resulting gz file will be identical to the
26
original gz file.
27
28
The approach used to regenerate the original gz file is to figure out how
29
it was produced -- what compression level was used, whether it was built
6 by Joey Hess
* pristine-gz: Fall back to storing a binary delta, in the rare
30
with GNU gzip(1) or with a library or BSD version, whether the --rsyncable
31
option was used, etc, and to reproduce this build environment when
32
regenerating the gz.
33
34
This approach will work for about 99.5% of cases. One example of a case it
35
cannot currently support is a gz file that has been produced by appending
36
together multiple gz files.
37
38
For the few where it doesn't work, a binary diff will be included in the
39
delta between the closest regneratable gz file and the original. In
40
the worst case, the diff will include the entire content of the original
41
gz file, resulting in a larger than usual delta. If the delta is much
42
larger than usual, pristine-gz will print a warning.
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
43
2 by Joey Hess
* Allow the delta file to be read or written from stdio.
44
If the delta filename is "-", pristine-gz reads or writes it to stdio.
45
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
46
=head1 OPTIONS
47
48
=over 4
49
50
=item -v
51
4.1.2 by Joey Hess, Josh Triplett, Joey Hess
[ Josh Triplett ]
52
=item --verbose
53
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
54
Verbose mode, show each command that is run.
55
56
=item -d
57
4.1.2 by Joey Hess, Josh Triplett, Joey Hess
[ Josh Triplett ]
58
=item --debug
59
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
60
Debug mode.
61
62
=item -k
63
4.1.2 by Joey Hess, Josh Triplett, Joey Hess
[ Josh Triplett ]
64
=item --keep
65
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
66
Don't clean up the temporary directory on exit.
67
3.1.3 by Joey Hess
* Fix POD issues. Closes: #484165
68
=back
69
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
70
=head1 AUTHOR
71
72
Joey Hess <joeyh@debian.org>,
73
Faidon Liambotis <paravoid@debian.org>
4.1.2 by Joey Hess, Josh Triplett, Joey Hess
[ Josh Triplett ]
74
Josh Triplett <josh@joshtriplett.org>
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
75
76
Licensed under the GPL, version 2.
77
78
=cut
79
80
use warnings;
81
use strict;
82
use File::Temp;
83
use Getopt::Long;
84
use File::Basename qw/basename/;
4.1.2 by Joey Hess, Josh Triplett, Joey Hess
[ Josh Triplett ]
85
use IPC::Open2;
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
86
87
use constant GZIP_DEBUG		 => 1;
88
89
# magic identification
90
use constant GZIP_ID1		 => 0x1F;
91
use constant GZIP_ID2		 => 0x8B;
92
93
# compression methods, 0x00-0x07 are reserved
94
use constant GZIP_METHOD_DEFLATE => 0x08;
95
96
# flags
97
use constant {
98
	GZIP_FLAG_FTEXT		 => 0,
99
	GZIP_FLAG_FHCRC		 => 1,
100
	GZIP_FLAG_FEXTRA	 => 2,
101
	GZIP_FLAG_FNAME		 => 3,
102
	GZIP_FLAG_FCOMMENT	 => 4,
103
	# the rest are reserved
104
};
105
# compression level
106
use constant {
107
	GZIP_COMPRESSION_NORMAL	 => 0,
108
	GZIP_COMPRESSION_BEST	 => 2,
109
	GZIP_COMPRESSION_FAST	 => 4,
110
};
111
# operating systems
112
use constant {
113
	GZIP_OS_MSDOS		 => 0,
114
	GZIP_OS_AMIGA		 => 1,
115
	GZIP_OS_VMS		 => 2,
116
	GZIP_OS_UNIX		 => 3,
117
	GZIP_OS_VMCMS		 => 4,
118
	GZIP_OS_ATARI		 => 5,
119
	GZIP_OS_HPFS		 => 6,
120
	GZIP_OS_MACINTOSH	 => 7,
121
	GZIP_OS_ZSYSTEM		 => 8,
122
	GZIP_OS_CPM		 => 9,
123
	GZIP_OS_TOPS		 => 10,
124
	GZIP_OS_NTFS		 => 11,
125
	GZIP_OS_QDOS		 => 12,
126
	GZIP_OS_RISCOS		 => 13,
127
	GZIP_OS_UNKNOWN		 => 255,
128
};
129
130
my $verbose=0;
131
my $debug=0;
132
my $keep=0;
133
134
sub usage {
135
	print STDERR "Usage: pristine-gz [-vdk] gengz delta file\n";
136
	print STDERR "       pristine-gz [-vdk] gendelta file.gz delta\n";
137
}
138
139
sub debug {
3.1.1 by Joey Hess
Man page typo fix. Closes: #475698
140
	print STDERR "debug: @_\n" if $debug;
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
141
}
142
143
sub vprint {
3.1.1 by Joey Hess
Man page typo fix. Closes: #475698
144
	print STDERR "pristine-gz: @_\n" if $verbose;
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
145
}
146
147
sub doit {
148
	vprint(@_);
149
	if (system(@_) != 0) {
150
		die "command failed: @_\n";
151
	}
152
}
153
4.1.2 by Joey Hess, Josh Triplett, Joey Hess
[ Josh Triplett ]
154
sub doit_redir {
155
	no warnings 'once';
156
	my ($in, $out, @args) = @_;
157
	vprint(@args, "<", $in, ">", $out);
158
	open INFILE, "<", $in or die("Could not open '$in' for reading: $!\n");
159
	open OUTFILE, ">", $out or die("Could not open '$out' for reading: $!\n");
160
	my $pid = open2(">&OUTFILE", "<&INFILE", @args);
161
	waitpid $pid, 0;
162
}
163
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
164
sub tempdir {
165
	return File::Temp::tempdir("pristine-gz.XXXXXXXXXX",
166
		TMPDIR => 1, CLEANUP => !$keep);
167
}
168
169
sub readgzip {
170
	my $filename = shift;
171
	my $chars;
172
173
	open(GZIP, "< $filename")
174
		or die("Could not open '$filename' for reading: $!\n");
175
176
	if (read(GZIP, $chars, 10) != 10) {
177
		die("Unable to read from input\n");
178
	}
179
180
	my ($id1, $id2, $method, $flags, $timestamp, $level, $os, $name)
181
		= (unpack("CCCb8VCC", $chars), '');
182
183
	if ($id1 != GZIP_ID1 || $id2 != GZIP_ID2 || $method != GZIP_METHOD_DEFLATE) {
184
		die("This is not a valid GZip archive.\n");
185
	}
186
	my @flags = split(//, $flags);
187
	
188
	if ($flags[GZIP_FLAG_FNAME]) {
189
		# read a null-terminated string
190
		$name .= $chars
191
			while (read(GZIP, $chars, 1) == 1 && ord($chars) != 0);
192
	}
193
	close(GZIP);
194
195
	return (\@flags, $timestamp, $level, $os, $name);
196
}
197
198
sub predictgzipargs {
199
	my ($flags, $timestamp, $level) = @_;
200
	my @flags = @$flags;
201
202
	my @args;
203
	unless ($flags[GZIP_FLAG_FNAME]) {
204
		push @args, '-n';
205
		push @args, '-M' if $timestamp;
206
	}
4.1.2 by Joey Hess, Josh Triplett, Joey Hess
[ Josh Triplett ]
207
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
208
	if ($level == GZIP_COMPRESSION_BEST) {
209
		push @args, '-9'
210
	} elsif ($level == GZIP_COMPRESSION_FAST) {
211
		push @args, '-1'
212
	}
213
214
	return @args;
215
}
216
217
sub comparefiles {
218
	my ($old, $new) = (shift, shift);
219
	system('cmp', '-s', $old, $new);
220
221
	if ($? == -1 || $? & 127) {
222
		die("Failed to execute cmp: $!\n");
223
	}
224
225
	return $? >> 8;
226
}
227
228
sub reproducegz {
6 by Joey Hess
* pristine-gz: Fall back to storing a binary delta, in the rare
229
	my ($orig, $tempdir, $tempin) = @_;
230
	my $tempout="$tempdir/test.gz";
4.1.2 by Joey Hess, Josh Triplett, Joey Hess
[ Josh Triplett ]
231
	doit_redir($orig, $tempin, "gzip", "-dc");
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
232
233
	# read fields from gzip headers
4.1.2 by Joey Hess, Josh Triplett, Joey Hess
[ Josh Triplett ]
234
	my ($flags, $timestamp, $level, $os, $name) = readgzip($orig);
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
235
	debug("flags: [".join(", ", @$flags).
236
		"] timestamp: $timestamp level: $level os: $os name: $name");
237
238
	# try to guess the gzip arguments that are needed by the header
239
	# information
240
	my @args = predictgzipargs($flags, $timestamp, $level);
4.1.2 by Joey Hess, Josh Triplett, Joey Hess
[ Josh Triplett ]
241
	my @extraargs = ("-F", $name, "-T", $timestamp);
242
6 by Joey Hess
* pristine-gz: Fall back to storing a binary delta, in the rare
243
	my @try;
244
4.1.2 by Joey Hess, Josh Triplett, Joey Hess
[ Josh Triplett ]
245
	if ($os == GZIP_OS_UNIX) {
246
		# for 98% of the cases the simple heuristic above works
6 by Joey Hess
* pristine-gz: Fall back to storing a binary delta, in the rare
247
		# and it was produced by gnu gzip.
248
		push @try, ['--gnu', @args];
249
		push @try, ['--gnu', @args, '--rsyncable'];
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
250
	}
4.1.2 by Joey Hess, Josh Triplett, Joey Hess
[ Josh Triplett ]
251
252
	if ($name =~ /\//) {
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
253
		push @args, "--original-name", $name;
4.1.2 by Joey Hess, Josh Triplett, Joey Hess
[ Josh Triplett ]
254
		@extraargs = ("-T", $timestamp);
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
255
		$name = basename($name);
256
	}
257
258
	# set the Operating System flag to the one found in the original
259
	# archive
260
	push @args, ("--osflag", $os) if $os != GZIP_OS_UNIX;
261
262
	# many of the .gz out there are created using the BSD version of
263
	# gzip which is using the zlib library; try with our version of
264
	# bsd-gzip with added support for the undocumented GNU gzip options
265
	# -m and -M
6 by Joey Hess
* pristine-gz: Fall back to storing a binary delta, in the rare
266
	push @try, [@args];
4.1.2 by Joey Hess, Josh Triplett, Joey Hess
[ Josh Triplett ]
267
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
268
	# apparently, there is an old version of bsd-gzip (or a similar tool
269
	# based on zlib) that creates gz using maximum compression (-9) but
270
	# does not indicate so in the headers. surprisingly, there are many
271
	# .gz out there.
6 by Joey Hess
* pristine-gz: Fall back to storing a binary delta, in the rare
272
	push @try, [@args, '--quirk', 'buggy-bsd'];
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
273
274
	# Windows' NTFS gzip implementation; quirk is really really evil
275
	# it should be the last test: it can result in a corrupted archive!
276
	if ($os == GZIP_OS_NTFS) {
277
		pop @args; pop @args; # ntfs quirk implies NTFS osflag
6 by Joey Hess
* pristine-gz: Fall back to storing a binary delta, in the rare
278
		push @try, [@args, '--quirk', 'ntfs'];
279
	}
280
281
	my $origsize=(stat($orig))[7];
282
	my ($bestvariant, $bestsize);
283
284
	foreach my $variant (@try) {
285
		doit_redir($tempin, $tempout, 'zgz', @$variant, @extraargs, '-c');
286
		if (!comparefiles($orig, $tempout)) {
287
			# success
288
			return $name, $timestamp, undef, @$variant;
289
		}
290
		else {
291
			# generate a binary delta and see if this is the
292
			# best variant so far
293
			my $ret=system("xdelta delta -0 --pristine $tempout $orig $tempdir/tmpdelta 2>/dev/null") >> 8;
294
			# xdelta exits 1 on success
295
			if ($ret == 1) {
296
				my $size=(stat("$tempdir/tmpdelta"))[7];
297
				if (! defined $bestsize || $size < $bestsize) {
298
					$bestvariant = $variant;
299
					$bestsize=$size;
300
					rename("$tempdir/tmpdelta", "$tempdir/bestdelta") || die "rename: $!";
301
				}
302
			}
303
		}
304
	}
305
306
	# Nothing worked perfectly, so use the delta that was generated for
307
	# the best variant
308
	my $percentover=100 - int (($origsize-$bestsize)/$origsize*100);
309
	debug("Using delta to best variant, bloating $percentover%: @$bestvariant");
310
	if ($percentover > 10) {
311
		print STDERR "warning: pristine-gz cannot reproduce build of $orig; ";
312
		if ($percentover >= 100) {
313
			print STDERR "storing entire file in delta!\n";
314
		}
315
		else {
316
			print STDERR "storing $percentover% size diff in delta\n";
317
		}
318
		print STDERR "(Please consider filing a bug report so the delta size can be improved.)\n";
319
	}
320
	return $name, $timestamp, "$tempdir/bestdelta", @$bestvariant;
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
321
}
322
323
sub gengz {
324
	my $delta=shift;
325
	my $file=shift;
326
327
	my $tempdir=tempdir();
2 by Joey Hess
* Allow the delta file to be read or written from stdio.
328
329
	if ($delta eq "-") {
330
		$delta="$tempdir/in";
331
		open (OUT, ">", $delta) || die "$delta: $!";
332
		while (<STDIN>) {
333
			print OUT $_;
334
		}
335
		close OUT;
336
	}
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
337
	
338
	doit("tar", "xf", File::Spec->rel2abs($delta), "-C", $tempdir);
339
	if (! -e "$tempdir/type") {
340
		die "failed to gengz delta $delta\n";
341
	}
342
343
	open (IN, "$tempdir/version") || die "delta lacks version number ($!)";
344
	my $version=<IN>;
6 by Joey Hess
* pristine-gz: Fall back to storing a binary delta, in the rare
345
	if ($version >= 4) {
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
346
		die "delta is version $version, not supported\n";
347
	}
348
	close IN;
349
	if (open (IN, "$tempdir/type")) {
350
		my $type=<IN>;
351
		chomp $type;
352
		if ($type ne "gz") {
353
			die "delta is for a $type, not a gz\n";
354
		}
355
		close IN;
356
	}
357
358
	
359
	open (IN, "$tempdir/params") || die "delta lacks params file ($!)";
360
	my $params=<IN>;
361
	chomp $params;
362
	my @params=split(' ', $params);
363
	while (@params) {
364
		$_=shift @params;
365
		next if /^(--gnu|--rsyncable|-[nmM1-9])$/;
366
		if (/^(--original-name|--quirk|--osflag)$/) {
367
			shift @params;
3 by Joey Hess
* If a tarball contains files all in one subdirectory, and the source
368
			next;
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
369
		}
370
		die "paranoia check failed on params file from delta ($params)";
371
	}
372
	@params=split(' ', $params);
373
	close IN;
374
	open (IN, "$tempdir/filename") || die "delta lacks filename file ($!)";
375
	my $filename=<IN>;
376
	chomp $filename;
377
	$filename=~s/^.*\///; # basename isn't strong enough
378
	close IN;
379
	open (IN, "$tempdir/timestamp") || die "delta lacks timestamp file ($!)";
380
	my $timestamp=<IN>;
381
	chomp $timestamp;
382
	close IN;
383
6 by Joey Hess
* pristine-gz: Fall back to storing a binary delta, in the rare
384
	my @zgz=("zgz", @params, "-T", $timestamp);
385
	if (! grep { $_ eq "--original-name" } @params) {
386
		push @zgz, "-F", "$filename";
387
	}
388
	push @zgz, "-c";
389
390
	if (-e "$tempdir/delta") {
391
		my $tfile="$tempdir/".basename($file).".gz";
392
		doit_redir($file, $tfile, @zgz);
393
		doit("xdelta", "patch", "--pristine", "$tempdir/delta", $tfile, "$file.gz");
394
	}
395
	else {
396
		doit_redir("$file", "$file.gz", @zgz);
397
	}
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
398
}
399
400
sub gendelta {
401
	my $gzfile=shift;
402
	my $delta=shift;
403
404
	my $tempdir=tempdir();
2 by Joey Hess
* Allow the delta file to be read or written from stdio.
405
	
406
	my $stdout=0;
407
	if ($delta eq "-") {
408
		$stdout=1;
409
		$delta="$tempdir/out";
410
	}
411
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
412
	my @files=qw(version type params filename timestamp);
413
6 by Joey Hess
* pristine-gz: Fall back to storing a binary delta, in the rare
414
	my ($filename, $timestamp, $xdelta, @params)=
415
		reproducegz($gzfile, $tempdir, "$tempdir/test");
416
	
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
417
	open(OUT, ">", "$tempdir/version") || die "$!";
6 by Joey Hess
* pristine-gz: Fall back to storing a binary delta, in the rare
418
	print OUT (defined $xdelta ? "3.0" : "2.0")."\n";
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
419
	close OUT;
420
	open(OUT, ">", "$tempdir/type") || die "$!";
421
	print OUT "gz\n";
422
	close OUT;
423
	open(OUT, ">", "$tempdir/params") || die "$!";
424
	print OUT "@params\n";
425
	close OUT;
426
	open(OUT, ">", "$tempdir/filename") || die "$!";
427
	print OUT basename($filename)."\n";
428
	close OUT;
429
	open(OUT, ">", "$tempdir/timestamp") || die "$!";
430
	print OUT "$timestamp\n";
431
	close OUT;
6 by Joey Hess
* pristine-gz: Fall back to storing a binary delta, in the rare
432
	if (defined $xdelta) {
433
		rename($xdelta, "$tempdir/delta") || die "rename: $!";
434
		push @files, "delta";
435
	}
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
436
437
	doit("tar", "czf", $delta, "-C", $tempdir, @files);
2 by Joey Hess
* Allow the delta file to be read or written from stdio.
438
439
	if ($stdout) {
440
		doit("cat", $delta);
441
	}
1 by Joey Hess
* pristine-tar sometimes got confused about tarballs that did not unpack
442
}
443
444
Getopt::Long::Configure("bundling");
445
if (! GetOptions(
446
	"v|verbose!" => \$verbose,
447
	"d|debug!" => \$debug,
448
	"k|keep!" => \$keep,
449
   ) || @ARGV != 3) {
450
	usage();
451
	exit 1;
452
}
453
454
my $command=shift;
455
if ($command eq 'gengz') {
456
	gengz(@ARGV);
457
}
458
elsif ($command eq 'gendelta') {
459
	gendelta(@ARGV);
460
}
461
else {
462
	print STDERR "Unknown subcommand \"$command\"\n";
463
	usage();
464
	exit 1;
465
}