447
447
/* opt_str = guess_gzip_options(filename_str)
448
448
* For the given (gzip) file, try to guess the options that were used with gzip
449
* to create it. Returns the option string for gzip, or NULL */
450
* Returns a malloced string containing the options for gzip, or NULL */
450
451
static const char *const try_opts[] =
451
452
{ "--best", "", "--rsync", "--rsync --best", NULL };
452
453
#define SAMPLE 1024
454
const char *guess_gzip_options(const char *f) {
455
char *guess_gzip_options(const char *f) {
455
456
char orig[SAMPLE];
456
457
{ /* Read sample of the header of the compressed file */
457
458
FILE *s = fopen(f, "r");
468
469
char *enc_f = encode_filename(f);
473
int has_mtime = zhead_has_mtime(orig);
474
int has_fname = zhead_has_fname(orig);
476
if (has_mtime && !has_fname) {
477
fprintf(stderr, "can't recompress, stream has mtime but no fname\n");
480
else if (has_fname && !has_mtime) {
481
fprintf(stderr, "can't recompress, stream has fname but no mtime\n");
485
has_mtime_fname = has_fname; /* which = has_mtime */
470
489
/* For each likely set of options, try recompressing the content with
471
490
* those options */
472
491
for (i = 0; (o = try_opts[i]) != NULL; i++) {
474
snprintf(cmd, sizeof(cmd), "zcat %s | gzip -n %s 2> /dev/null",
493
{ /* Compose command line */
495
snprintf(cmd, sizeof(cmd), "zcat %s | gzip -n %s 2> /dev/null",
477
{ /* Read the recompressed content */
478
FILE *p = popen(cmd, "r");
481
500
fprintf(stderr, "running %s to determine gzip options\n",
486
else if (!read_sample_and_close(p, SAMPLE, samp)) {
508
if (p) { /* Read the recompressed content */
510
if (!read_sample_and_close(p, SAMPLE, samp)) {
511
; /* Read error - just fail this one and let the loop
491
515
/* We have the compressed version with these options.
492
516
* Compare with the original */
493
517
const char *a = skip_zhead(orig);
671
712
read_stream_write_blocksums(instream, tf);
673
714
{ /* Decide how long a rsum hash and checksum hash per block we need for this file */
677
((log(len) + log(blocksize)) / log(2) - 8.6) / seq_matches) / 8;
715
seq_matches = len > blocksize ? 2 : 1;
716
rsum_len = ceil(((log(len) + log(blocksize)) / log(2) - 8.6) / seq_matches / 8);
679
718
/* min and max lengths of rsums to store */
680
719
if (rsum_len > 4) rsum_len = 4;
681
720
if (rsum_len < 2) rsum_len = 2;
683
722
/* Now the checksum length; min of two calculations */
688
log(1 + len / blocksize)) / log(2)) / seq_matches) / 8;
724
(20 + (log(len) + log(1 + len / blocksize)) / log(2))
690
727
int checksum_len2 =
691
728
(7.9 + (20 + log(1 + len / blocksize) / log(2))) / 8;
836
873
if (do_recompress) /* Write Recompress header if wanted */
837
874
fprintf(fout, "Recompress: %s %s\n", zhead, gzopts);
839
878
/* If we have a zmap, write it, header first and then the map itself */
840
879
if (zmapentries) {