~ubuntu-branches/ubuntu/lucid/wget/lucid

Committer: Bazaar Package Importer
Author(s): Noèl Köthe
Date: 2005-06-26 16:46:25 UTC
mfrom: (1.1.1 upstream) (2.1.1 sarge)
Revision ID: james.westby@ubuntu.com-20050626164625-jjcde8hyztx7xq7o

Tags: 1.10-2

http://bugs.debian.org/314728

http://bugs.debian.org/163243

http://bugs.debian.org/313883

* wget-fix_error--save-headers patch from upstream
  (closes: Bug#314728)
* don't pattern-match server redirects patch from upstream
  (closes: Bug#163243)
* correct de.po typos
  (closes: Bug#313883)
* wget-E_html_behind_file_counting fix problem with adding the
  numbers after the html extension
* updated Standards-Version: to 3.6.2

files added:
ChangeLog-branches/1.8_branch.ChangeLog

ChangeLog-branches/1.9_branch.ChangeLog

debian/patches

debian/patches/00list

debian/patches/00template

debian/patches/wget-E_html_behind_file_counting

debian/patches/wget-de.po-spelling-correction

debian/patches/wget-doc-remove-usr-local-in-sample.wgetrc

debian/patches/wget-doc-remove-usr-local-in-wget.texi

debian/patches/wget-dont_pattern_match_server_redirects

debian/patches/wget-fix_error--save-headers

debian/patches/wget-fr.po-spelling-correction

debian/patches/wget-passive_ftp-default

doc/ChangeLog-branches/1.8_branch.ChangeLog

doc/ChangeLog-branches/1.9_branch.ChangeLog

doc/fdl.texi

po/en_GB.po

po/eo.po

po/eu.po

po/fi.po

po/ga.po

po/sr.po

po/vi.po

src/ChangeLog-branches/1.9_branch.ChangeLog

src/config-post.h

src/http-ntlm.c

src/http-ntlm.h

src/log.h

src/openssl.c

src/ptimer.c

src/ptimer.h

src/ssl.h

src/xmalloc.c

src/xmalloc.h

windows/ChangeLog

windows/Makefile.src.mingw

windows/Makefile.top.mingw

windows/config.h.mingw

files removed:
MACHINES

doc/wget.info-1

doc/wget.info-2

doc/wget.info-3

doc/wget.info-4

po/bg.gmo

po/ca.gmo

po/cs.gmo

po/da.gmo

po/de.gmo

po/el.gmo

po/es.gmo

po/et.gmo

po/fr.gmo

po/gl.gmo

po/he.gmo

po/hr.gmo

po/hu.gmo

po/it.gmo

po/ja.gmo

po/nl.gmo

po/no.gmo

po/pl.gmo

po/pt_BR.gmo

po/ro.gmo

po/ru.gmo

po/sk.gmo

po/sl.gmo

po/sv.gmo

po/tr.gmo

po/uk.gmo

po/wget.pot

po/zh_CN.gmo

po/zh_TW.gmo

src/gen_sslfunc.c

src/gen_sslfunc.h

src/headers.c

src/headers.h

src/rbuf.c

src/rbuf.h

files modified:
AUTHORS

ChangeLog

INSTALL

MAILING-LIST

Makefile.cvs

Makefile.in

NEWS

README

TODO

aclocal.m4

config.guess

config.sub

configure

configure.bat

configure.bat.in

configure.in

debian/changelog

debian/control

debian/rules

doc/ChangeLog

doc/Makefile.in

doc/sample.wgetrc

doc/sample.wgetrc.munged_for_texi_inclusion

doc/texi2pod.pl.in

doc/version.texi

doc/wget.info

doc/wget.texi

libtool.m4

ltmain.sh

po/Makefile.in.in

po/POTFILES.in

po/bg.po

po/ca.po

po/cs.po

po/da.po

po/de.po

po/el.po

po/es.po

po/et.po

po/fr.po

po/gl.po

po/he.po

po/hr.po

po/hu.po

po/it.po

po/ja.po

po/nl.po

po/no.po

po/pl.po

po/pt_BR.po

po/ro.po

po/ru.po

po/sk.po

po/sl.po

po/sv.po

po/tr.po

po/uk.po

po/zh_CN.po

po/zh_TW.po

src/ChangeLog

src/Makefile.in

src/alloca.c

src/cmpt.c

src/config.h.in

src/connect.c

src/connect.h

src/convert.c

src/convert.h

src/cookies.c

src/cookies.h

src/ftp-basic.c

src/ftp-ls.c

src/ftp-opie.c

src/ftp.c

src/ftp.h

src/gen-md5.c

src/gen-md5.h

src/getopt.h

src/gnu-md5.h

src/hash.c

src/hash.h

src/host.c

src/host.h

src/html-parse.c

src/html-url.c

src/http.c

src/init.c

src/init.h

src/log.c

src/main.c

src/mswindows.c

src/mswindows.h

src/netrc.c

src/options.h

src/progress.c

src/progress.h

src/recur.c

src/recur.h

src/res.c

src/retr.c

src/retr.h

src/snprintf.c

src/sysdep.h

src/url.c

src/utils.c

src/utils.h

src/version.c

src/wget.h

util/dist-wget

windows/Makefile.doc

windows/Makefile.src

windows/Makefile.src.bor

windows/Makefile.top

windows/Makefile.top.bor

windows/Makefile.watcom

windows/README

windows/config.h.bor

windows/config.h.ms

windows/wget.dep

Show diffs side-by-side

added added

removed removed

src/convert.c

/* Conversion of links to local files.

This file is part of GNU Wget.

#include "recur.h"

#include "utils.h"

#include "hash.h"

#include "ptimer.h"

static struct hash_table *dl_file_url_map;

struct hash_table *dl_url_file_map;

/* List of HTML files downloaded in this Wget run, used for link

conversion after Wget is done. The list and the set contain the

same information, except the list maintains the order. Perhaps I

should get rid of the list, it's there for historical reasons. */

static slist *downloaded_html_list;

/* Set of HTML files downloaded in this Wget run, used for link

conversion after Wget is done. */

struct hash_table *downloaded_html_set;

static void convert_links PARAMS ((const char *, struct urlpos *));

void

convert_all_links (void)

{

slist *html;

long msecs;

int i;

double secs;

int file_count = 0;

struct wget_timer *timer = wtimer_new ();

/* Destructively reverse downloaded_html_files to get it in the right order.

recursive_retrieve() used slist_prepend() consistently. */

downloaded_html_list = slist_nreverse (downloaded_html_list);

for (html = downloaded_html_list; html; html = html->next)

struct ptimer *timer = ptimer_new ();

int cnt;

char **file_array;

cnt = 0;

if (downloaded_html_set)

cnt = hash_table_count (downloaded_html_set);

if (cnt == 0)

return;

file_array = alloca_array (char *, cnt);

string_set_to_array (downloaded_html_set, file_array);

for (i = 0; i < cnt; i++)

{

100

struct urlpos *urls, *cur_url;

101

char *url;

char *file = html->string;

102

char *file = file_array[i];

103

104

/* Determine the URL of the HTML file. get_urls_html will need

100

105

it. */

166

171

free_urlpos (urls);

167

172

}

168

173

169

msecs = wtimer_elapsed (timer);

170

wtimer_delete (timer);

171

logprintf (LOG_VERBOSE, _("Converted %d files in %.2f seconds.\n"),

172

file_count, (double)msecs / 1000);

174

secs = ptimer_measure (timer) / 1000;

175

ptimer_destroy (timer);

176

logprintf (LOG_VERBOSE, _("Converted %d files in %.*f seconds.\n"),

177

file_count, secs < 10 ? 3 : 1, secs);

173

178

}

174

179

175

180

static void write_backup_file PARAMS ((const char *, downloaded_file_t));

201

206

any URL needs to be converted in the first place. If not, just

202

207

leave the file alone. */

203

208

int dry_count = 0;

204

struct urlpos *dry = links;

209

struct urlpos *dry;

205

210

for (dry = links; dry; dry = dry->next)

206

211

if (dry->convert != CO_NOCONVERT)

207

212

++dry_count;

327

332

logprintf (LOG_VERBOSE, "%d-%d\n", to_file_count, to_url_count);

328

333

}

329

334

330

/* Construct and return a malloced copy of the relative link from two

331

pieces of information: local name S1 of the referring file and

332

local name S2 of the referred file.

333

334

So, if S1 is "jagor.srce.hr/index.html" and S2 is

335

"jagor.srce.hr/images/news.gif", the function will return

336

"images/news.gif".

337

338

Alternately, if S1 is "fly.cc.fer.hr/ioccc/index.html", and S2 is

339

"fly.cc.fer.hr/images/fly.gif", the function will return

340

"../images/fly.gif".

341

342

Caveats: S1 should not begin with `/', unless S2 also begins with

343

'/'. S1 should not contain things like ".." and such --

344

construct_relative ("fly/ioccc/../index.html",

345

"fly/images/fly.gif") will fail. (A workaround is to call

346

something like path_simplify() on S1). */

335

/* Construct and return a link that points from BASEFILE to LINKFILE.

336

Both files should be local file names, BASEFILE of the referrering

337

file, and LINKFILE of the referred file.

338

339

Examples:

340

341

cr("foo", "bar") -> "bar"

342

cr("A/foo", "A/bar") -> "bar"

343

cr("A/foo", "A/B/bar") -> "B/bar"

344

cr("A/X/foo", "A/Y/bar") -> "../Y/bar"

345

cr("X/", "Y/bar") -> "../Y/bar" (trailing slash does matter in BASE)

346

347

Both files should be absolute or relative, otherwise strange

348

results might ensue. The function makes no special efforts to

349

handle "." and ".." in links, so make sure they're not there

350

(e.g. using path_simplify). */

351

347

352

static char *

348

construct_relative (const char *s1, const char *s2)

353

construct_relative (const char *basefile, const char *linkfile)

349

354

{

350

int i, cnt, sepdirs1;

351

char *res;

352

353

if (*s2 == '/')

354

return xstrdup (s2);

355

/* S1 should *not* be absolute, if S2 wasn't. */

356

assert (*s1 != '/');

357

i = cnt = 0;

358

/* Skip the directories common to both strings. */

359

while (1)

360

{

361

while (s1[i] && s2[i]

362

&& (s1[i] == s2[i])

363

&& (s1[i] != '/')

364

&& (s2[i] != '/'))

365

++i;

366

if (s1[i] == '/' && s2[i] == '/')

367

cnt = ++i;

368

else

369

break;

370

}

371

for (sepdirs1 = 0; s1[i]; i++)

372

if (s1[i] == '/')

373

++sepdirs1;

374

/* Now, construct the file as of:

375

- ../ repeated sepdirs1 time

376

- all the non-mutual directories of S2. */

377

res = (char *)xmalloc (3 * sepdirs1 + strlen (s2 + cnt) + 1);

378

for (i = 0; i < sepdirs1; i++)

379

memcpy (res + 3 * i, "../", 3);

380

strcpy (res + 3 * i, s2 + cnt);

381

return res;

355

char *link;

356

int basedirs;

357

const char *b, *l;

358

int i, start;

359

360

/* First, skip the initial directory components common to both

361

files. */

362

start = 0;

363

for (b = basefile, l = linkfile; *b == *l && *b != '\0'; ++b, ++l)

364

{

365

if (*b == '/')

366

start = (b - basefile) + 1;

367

}

368

basefile += start;

369

linkfile += start;

370

371

/* With common directories out of the way, the situation we have is

372

as follows:

373

b - b1/b2/[...]/bfile

374

l - l1/l2/[...]/lfile

375

376

The link we're constructing needs to be:

377

lnk - ../../l1/l2/[...]/lfile

378

379

Where the number of ".."'s equals the number of bN directory

380

components in B. */

381

382

/* Count the directory components in B. */

383

basedirs = 0;

384

for (b = basefile; *b; b++)

385

{

386

if (*b == '/')

387

++basedirs;

388

}

389

390

/* Construct LINK as explained above. */

391

link = (char *)xmalloc (3 * basedirs + strlen (linkfile) + 1);

392

for (i = 0; i < basedirs; i++)

393

memcpy (link + 3 * i, "../", 3);

394

strcpy (link + 3 * i, linkfile);

395

return link;

382

396

}

383

397

398

/* Used by write_backup_file to remember which files have been

399

written. */

400

static struct hash_table *converted_files;

401

384

402

static void

385

403

write_backup_file (const char *file, downloaded_file_t downloaded_file_return)

386

404

{

390

408

clobber .orig files sitting around from previous invocations. */

391

409

392

410

/* Construct the backup filename as the original name plus ".orig". */

393

size_t filename_len = strlen(file);

411

size_t filename_len = strlen (file);

394

412

char* filename_plus_orig_suffix;

395

boolean already_wrote_backup_file = FALSE;

396

slist* converted_file_ptr;

397

static slist* converted_files = NULL;

398

413

399

414

if (downloaded_file_return == FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED)

400

415

{

406

421

".html", so we need to compare vs. the original URL plus

407

422

".orig", not the original URL plus ".html.orig". */

408

423

filename_plus_orig_suffix = alloca (filename_len + 1);

409

strcpy(filename_plus_orig_suffix, file);

410

strcpy((filename_plus_orig_suffix + filename_len) - 4, "orig");

424

strcpy (filename_plus_orig_suffix, file);

425

strcpy ((filename_plus_orig_suffix + filename_len) - 4, "orig");

411

426

}

412

427

else /* downloaded_file_return == FILE_DOWNLOADED_NORMALLY */

413

428

{

414

429

/* Append ".orig" to the name. */

415

filename_plus_orig_suffix = alloca (filename_len + sizeof(".orig"));

416

strcpy(filename_plus_orig_suffix, file);

417

strcpy(filename_plus_orig_suffix + filename_len, ".orig");

430

filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));

431

strcpy (filename_plus_orig_suffix, file);

432

strcpy (filename_plus_orig_suffix + filename_len, ".orig");

418

433

}

419

434

435

if (!converted_files)

436

converted_files = make_string_hash_table (0);

437

420

438

/* We can get called twice on the same URL thanks to the

421

439

convert_all_links() call in main(). If we write the .orig file

422

440

each time in such a case, it'll end up containing the first-pass

423

441

conversion, not the original file. So, see if we've already been

424

442

called on this file. */

425

converted_file_ptr = converted_files;

426

while (converted_file_ptr != NULL)

427

if (strcmp(converted_file_ptr->string, file) == 0)

428

{

429

already_wrote_backup_file = TRUE;

430

break;

431

}

432

else

433

converted_file_ptr = converted_file_ptr->next;

434

435

if (!already_wrote_backup_file)

443

if (!string_set_contains (converted_files, file))

436

444

{

437

445

/* Rename <file> to <file>.orig before former gets written over. */

438

if (rename(file, filename_plus_orig_suffix) != 0)

446

if (rename (file, filename_plus_orig_suffix) != 0)

439

447

logprintf (LOG_NOTQUIET, _("Cannot back up %s as %s: %s\n"),

440

448

file, filename_plus_orig_suffix, strerror (errno));

441

449

456

464

list.

457

465

-- Hrvoje Niksic <hniksic@xemacs.org>

458

466

459

converted_file_ptr = xmalloc(sizeof(*converted_file_ptr));

460

converted_file_ptr->string = xstrdup(file); /* die on out-of-mem. */

461

converted_file_ptr->next = converted_files;

462

converted_files = converted_file_ptr;

467

string_set_add (converted_files, file);

463

468

}

464

469

}

465

470

566

571

"index.html%3Ffoo=bar" would break local browsing, as the latter

567

572

isn't even recognized as an HTML file! However, converting

568

573

"index.html?foo=bar.html" to "index.html%3Ffoo=bar.html" should be

569

safe for both local and HTTP-served browsing. */

574

safe for both local and HTTP-served browsing.

575

576

We always quote "#" as "%23" and "%" as "%25" because those

577

characters have special meanings in URLs. */

570

578

571

579

static char *

572

580

local_quote_string (const char *file)

573

581

{

574

const char *file_sans_qmark;

575

int qm;

582

const char *from;

583

char *newname, *to;

576

584

577

if (!opt.html_extension)

585

char *any = strpbrk (file, "?#%");

586

if (!any)

578

587

return html_quote_string (file);

579

588

580

qm = count_char (file, '?');

581

582

if (qm)

583

{

584

const char *from = file;

585

char *to, *newname;

586

587

/* qm * 2 because we replace each question mark with "%3F",

588

i.e. replace one char with three, hence two more. */

589

int fsqlen = strlen (file) + qm * 2;

590

591

to = newname = (char *)alloca (fsqlen + 1);

592

for (; *from; from++)

593

{

594

if (*from != '?')

595

*to++ = *from;

596

else

597

{

598

*to++ = '%';

599

*to++ = '3';

600

*to++ = 'F';

601

}

602

}

603

assert (to - newname == fsqlen);

604

*to = '\0';

605

606

file_sans_qmark = newname;

607

}

608

else

609

file_sans_qmark = file;

610

611

return html_quote_string (file_sans_qmark);

589

/* Allocate space assuming the worst-case scenario, each character

590

having to be quoted. */

591

to = newname = (char *)alloca (3 * strlen (file) + 1);

592

for (from = file; *from; from++)

593

switch (*from)

594

{

595

case '%':

596

*to++ = '%';

597

*to++ = '2';

598

*to++ = '5';

599

break;

600

case '#':

601

*to++ = '%';

602

*to++ = '2';

603

*to++ = '3';

604

break;

605

case '?':

606

if (opt.html_extension)

607

{

608

*to++ = '%';

609

*to++ = '3';

610

*to++ = 'F';

611

break;

612

}

613

/* fallthrough */

614

default:

615

*to++ = *from;

616

}

617

*to = '\0';

618

619

return html_quote_string (newname);

612

620

}

613

621

614

622

/* Book-keeping code for dl_file_url_map, dl_url_file_map,

826

834

{

827

835

if (!downloaded_html_set)

828

836

downloaded_html_set = make_string_hash_table (0);

829

else if (hash_table_contains (downloaded_html_set, file))

830

return;

831

832

/* The set and the list should use the same copy of FILE, but the

833

slist interface insists on strduping the string it gets. Oh

834

well. */

835

837

string_set_add (downloaded_html_set, file);

836

downloaded_html_list = slist_prepend (downloaded_html_list, file);

837

838

}

838

839

/* Cleanup the data structures associated with recursive retrieving

840

(the variables above). */

840

static void downloaded_files_free PARAMS ((void));

841

842

/* Cleanup the data structures associated with this file. */

843

841

844

void

842

845

convert_cleanup (void)

843

846

{

855

858

}

856

859

if (downloaded_html_set)

857

860

string_set_free (downloaded_html_set);

858

slist_free (downloaded_html_list);

859

downloaded_html_list = NULL;

861

downloaded_files_free ();

862

if (converted_files)

863

string_set_free (converted_files);

860

864

}

861

865

862

866

/* Book-keeping code for downloaded files that enables extension

947

951

return 0;

948

952

}

949

953

950

void

954

static void

951

955

downloaded_files_free (void)

952

956

{

953

957

if (downloaded_files_hash)

957

961

downloaded_files_hash = NULL;

958

962

}

959

963

}

964

965

/* The function returns the pointer to the malloc-ed quoted version of

966

string s. It will recognize and quote numeric and special graphic

967

entities, as per RFC1866:

968

969

`&' -> `&'

970

`<' -> `<'

971

`>' -> `>'

972

`"' -> `"'

973

SP -> ` '

974

975

No other entities are recognized or replaced. */

976

char *

977

html_quote_string (const char *s)

978

{

979

const char *b = s;

980

char *p, *res;

981

int i;

982

983

/* Pass through the string, and count the new size. */

984

for (i = 0; *s; s++, i++)

985

{

986

if (*s == '&')

987

i += 4; /* `amp;' */

988

else if (*s == '<' || *s == '>')

989

i += 3; /* `lt;' and `gt;' */

990

else if (*s == '\"')

991

i += 5; /* `quot;' */

992

else if (*s == ' ')

993

i += 4; /* #32; */

994

}

995

res = (char *)xmalloc (i + 1);

996

s = b;

997

for (p = res; *s; s++)

998

{

999

switch (*s)

1000

{

1001

case '&':

1002

*p++ = '&';

1003

*p++ = 'a';

1004

*p++ = 'm';

1005

*p++ = 'p';

1006

*p++ = ';';

1007

break;

1008

case '<': case '>':

1009

*p++ = '&';

1010

*p++ = (*s == '<' ? 'l' : 'g');

1011

*p++ = 't';

1012

*p++ = ';';

1013

break;

1014

case '\"':

1015

*p++ = '&';

1016

*p++ = 'q';

1017

*p++ = 'u';

1018

*p++ = 'o';

1019

*p++ = 't';

1020

*p++ = ';';

1021

break;

1022

case ' ':

1023

*p++ = '&';

1024

*p++ = '#';

1025

*p++ = '3';

1026

*p++ = '2';

1027

*p++ = ';';

1028

break;

1029

default:

1030

*p++ = *s;

1031

}

1032

}

1033

*p = '\0';

1034

return res;

1035

}

Older »