~ubuntu-branches/ubuntu/dapper/wget/dapper-updates

« back to all changes in this revision

Viewing changes to src/utils.c

Committer: Bazaar Package Importer
Author(s): Noèl Köthe
Date: 2004-02-13 20:26:44 UTC
Revision ID: james.westby@ubuntu.com-20040213202644-skxj93qs15sskqfy

Tags: upstream-1.9.1

Import upstream version 1.9.1

files added:

AUTHORS

COPYING

ChangeLog

ChangeLog-branches

ChangeLog-branches/1.6_branch.ChangeLog

ChangeLog.README

INSTALL

MACHINES

MAILING-LIST

Makefile.cvs

Makefile.in

NEWS

PATCHES

README

README.cvs

TODO

aclocal.m4

config.guess

config.sub

configure

configure.bat

configure.bat.in

configure.in

doc/ChangeLog

doc/ChangeLog-branches

doc/ChangeLog-branches/1.6_branch.ChangeLog

doc/Makefile.in

doc/ansi2knr.1

doc/sample.wgetrc

doc/sample.wgetrc.munged_for_texi_inclusion

doc/texi2pod.pl.in

doc/texinfo.tex

doc/version.texi

doc/wget.info

doc/wget.info-1

doc/wget.info-2

doc/wget.info-3

doc/wget.info-4

doc/wget.texi

install-sh

libtool.m4

ltmain.sh

mkinstalldirs

po/Makefile.in.in

po/POTFILES.in

po/bg.gmo

po/bg.po

po/ca.gmo

po/ca.po

po/cs.gmo

po/cs.po

po/da.gmo

po/da.po

po/de.gmo

po/de.po

po/el.gmo

po/el.po

po/es.gmo

po/es.po

po/et.gmo

po/et.po

po/fr.gmo

po/fr.po

po/gl.gmo

po/gl.po

po/he.gmo

po/he.po

po/hr.gmo

po/hr.po

po/hu.gmo

po/hu.po

po/it.gmo

po/it.po

po/ja.gmo

po/ja.po

po/nl.gmo

po/nl.po

po/no.gmo

po/no.po

po/pl.gmo

po/pl.po

po/pt_BR.gmo

po/pt_BR.po

po/ro.gmo

po/ro.po

po/ru.gmo

po/ru.po

po/sk.gmo

po/sk.po

po/sl.gmo

po/sl.po

po/sv.gmo

po/sv.po

po/tr.gmo

po/tr.po

po/uk.gmo

po/uk.po

po/wget.pot

po/zh_CN.gmo

po/zh_CN.po

po/zh_TW.gmo

po/zh_TW.po

src/ChangeLog

src/ChangeLog-branches

src/ChangeLog-branches/1.6_branch.ChangeLog

src/ChangeLog-branches/1.8_branch.ChangeLog

src/Makefile.in

src/alloca.c

src/ansi2knr.c

src/cmpt.c

src/config.h.in

src/connect.c

src/connect.h

src/convert.c

src/convert.h

src/cookies.c

src/cookies.h

src/ftp-basic.c

src/ftp-ls.c

src/ftp-opie.c

src/ftp.c

src/ftp.h

src/gen-md5.c

src/gen-md5.h

src/gen_sslfunc.c

src/gen_sslfunc.h

src/getopt.c

src/getopt.h

src/gnu-md5.c

src/gnu-md5.h

src/hash.c

src/hash.h

src/headers.c

src/headers.h

src/host.c

src/host.h

src/html-parse.c

src/html-parse.h

src/html-url.c

src/http.c

src/init.c

src/init.h

src/log.c

src/main.c

src/mswindows.c

src/mswindows.h

src/netrc.c

src/netrc.h

src/options.h

src/progress.c

src/progress.h

src/rbuf.c

src/rbuf.h

src/recur.c

src/recur.h

src/res.c

src/res.h

src/retr.c

src/retr.h

src/safe-ctype.c

src/safe-ctype.h

src/snprintf.c

src/sysdep.h

src/url.c

src/url.h

src/utils.c

src/utils.h

src/version.c

src/wget.h

stamp-h.in

util

util/Makefile.in

util/README

util/dist-wget

util/download-netscape.html

util/download.html

util/rmold.pl

util/wget.spec

windows

windows/Makefile.doc

windows/Makefile.in

windows/Makefile.src

windows/Makefile.src.bor

windows/Makefile.top

windows/Makefile.top.bor

windows/Makefile.watcom

windows/README

windows/config.h.bor

windows/config.h.ms

windows/wget.dep

Show diffs side-by-side

added added

removed removed

src/utils.c

/* Various functions of utilitarian nature.

Free Software Foundation, Inc.

This file is part of GNU Wget.

GNU Wget is free software; you can redistribute it and/or modify

it under the terms of the GNU General Public License as published by

the Free Software Foundation; either version 2 of the License, or

(at your option) any later version.

GNU Wget is distributed in the hope that it will be useful,

but WITHOUT ANY WARRANTY; without even the implied warranty of

MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

GNU General Public License for more details.

You should have received a copy of the GNU General Public License

along with Wget; if not, write to the Free Software

Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

In addition, as a special exception, the Free Software Foundation

gives permission to link the code of its release of Wget with the

OpenSSL project's "OpenSSL" library (or with modified versions of it

that use the same license as the "OpenSSL" library), and distribute

the linked executables. You must obey the GNU General Public License

in all respects for all of the code used other than "OpenSSL". If you

modify this file, you may extend this exception to your version of the

file, but you are not obligated to do so. If you do not wish to do

so, delete this exception statement from your version. */

#include <config.h>

#include <stdio.h>

#include <stdlib.h>

#ifdef HAVE_STRING_H

# include <string.h>

#else /* not HAVE_STRING_H */

# include <strings.h>

#endif /* not HAVE_STRING_H */

#include <sys/types.h>

#ifdef HAVE_UNISTD_H

# include <unistd.h>

#endif

#ifdef HAVE_MMAP

# include <sys/mman.h>

#endif

#ifdef HAVE_PWD_H

# include <pwd.h>

#endif

#include <limits.h>

#ifdef HAVE_UTIME_H

# include <utime.h>

#endif

#ifdef HAVE_SYS_UTIME_H

# include <sys/utime.h>

#endif

#include <errno.h>

#ifdef NeXT

# include <libc.h> /* for access() */

#endif

#include <fcntl.h>

#include <assert.h>

/* For TIOCGWINSZ and friends: */

#ifdef HAVE_SYS_IOCTL_H

# include <sys/ioctl.h>

#endif

#ifdef HAVE_TERMIOS_H

# include <termios.h>

#endif

/* Needed for run_with_timeout. */

#undef USE_SIGNAL_TIMEOUT

#ifdef HAVE_SIGNAL_H

# include <signal.h>

#endif

#ifdef HAVE_SETJMP_H

# include <setjmp.h>

#endif

#ifndef HAVE_SIGSETJMP

/* If sigsetjmp is a macro, configure won't pick it up. */

# ifdef sigsetjmp

# define HAVE_SIGSETJMP

# endif

#endif

#ifdef HAVE_SIGNAL

# ifdef HAVE_SIGSETJMP

# define USE_SIGNAL_TIMEOUT

# endif

# ifdef HAVE_SIGBLOCK

# define USE_SIGNAL_TIMEOUT

# endif

#endif

#include "wget.h"

#include "utils.h"

#include "hash.h"

100

101

#ifndef errno

102

extern int errno;

103

#endif

104

105

/* This section implements several wrappers around the basic

106

allocation routines. This is done for two reasons: first, so that

107

the callers of these functions need not consistently check for

108

errors. If there is not enough virtual memory for running Wget,

109

something is seriously wrong, and Wget exits with an appropriate

110

error message.

111

112

The second reason why these are useful is that, if DEBUG_MALLOC is

113

defined, they also provide a handy (if crude) malloc debugging

114

interface that checks memory leaks. */

115

116

/* Croak the fatal memory error and bail out with non-zero exit

117

status. */

118

static void

119

memfatal (const char *what)

120

{

121

/* Make sure we don't try to store part of the log line, and thus

122

call malloc. */

123

log_set_save_context (0);

124

logprintf (LOG_ALWAYS, _("%s: %s: Not enough memory.\n"), exec_name, what);

125

exit (1);

126

}

127

128

/* These functions end with _real because they need to be

129

distinguished from the debugging functions, and from the macros.

130

Explanation follows:

131

132

If memory debugging is not turned on, wget.h defines these:

133

134

#define xmalloc xmalloc_real

135

#define xrealloc xrealloc_real

136

#define xstrdup xstrdup_real

137

#define xfree free

138

139

In case of memory debugging, the definitions are a bit more

140

complex, because we want to provide more information, *and* we want

141

to call the debugging code. (The former is the reason why xmalloc

142

and friends need to be macros in the first place.) Then it looks

143

like this:

144

145

#define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__)

146

#define xfree(a) xfree_debug (a, __FILE__, __LINE__)

147

#define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__)

148

#define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__)

149

150

Each of the *_debug function does its magic and calls the real one. */

151

152

#ifdef DEBUG_MALLOC

153

# define STATIC_IF_DEBUG static

154

#else

155

# define STATIC_IF_DEBUG

156

#endif

157

158

STATIC_IF_DEBUG void *

159

xmalloc_real (size_t size)

160

{

161

void *ptr = malloc (size);

162

if (!ptr)

163

memfatal ("malloc");

164

return ptr;

165

}

166

167

STATIC_IF_DEBUG void *

168

xrealloc_real (void *ptr, size_t newsize)

169

{

170

void *newptr;

171

172

/* Not all Un*xes have the feature of realloc() that calling it with

173

a NULL-pointer is the same as malloc(), but it is easy to

174

simulate. */

175

if (ptr)

176

newptr = realloc (ptr, newsize);

177

else

178

newptr = malloc (newsize);

179

if (!newptr)

180

memfatal ("realloc");

181

return newptr;

182

}

183

184

STATIC_IF_DEBUG char *

185

xstrdup_real (const char *s)

186

{

187

char *copy;

188

189

#ifndef HAVE_STRDUP

190

int l = strlen (s);

191

copy = malloc (l + 1);

192

if (!copy)

193

memfatal ("strdup");

194

memcpy (copy, s, l + 1);

195

#else /* HAVE_STRDUP */

196

copy = strdup (s);

197

if (!copy)

198

memfatal ("strdup");

199

#endif /* HAVE_STRDUP */

200

201

return copy;

202

}

203

204

#ifdef DEBUG_MALLOC

205

206

/* Crude home-grown routines for debugging some malloc-related

207

problems. Featured:

208

209

* Counting the number of malloc and free invocations, and reporting

210

the "balance", i.e. how many times more malloc was called than it

211

was the case with free.

212

213

* Making malloc store its entry into a simple array and free remove

214

stuff from that array. At the end, print the pointers which have

215

not been freed, along with the source file and the line number.

216

This also has the side-effect of detecting freeing memory that

217

was never allocated.

218

219

Note that this kind of memory leak checking strongly depends on

220

every malloc() being followed by a free(), even if the program is

221

about to finish. Wget is careful to free the data structure it

222

allocated in init.c. */

223

224

static int malloc_count, free_count;

225

226

static struct {

227

char *ptr;

228

const char *file;

229

int line;

230

} malloc_debug[100000];

231

232

/* Both register_ptr and unregister_ptr take O(n) operations to run,

233

which can be a real problem. It would be nice to use a hash table

234

for malloc_debug, but the functions in hash.c are not suitable

235

because they can call malloc() themselves. Maybe it would work if

236

the hash table were preallocated to a huge size, and if we set the

237

rehash threshold to 1.0. */

238

239

/* Register PTR in malloc_debug. Abort if this is not possible

240

(presumably due to the number of current allocations exceeding the

241

size of malloc_debug.) */

242

243

static void

244

register_ptr (void *ptr, const char *file, int line)

245

{

246

int i;

247

for (i = 0; i < countof (malloc_debug); i++)

248

if (malloc_debug[i].ptr == NULL)

249

{

250

malloc_debug[i].ptr = ptr;

251

malloc_debug[i].file = file;

252

malloc_debug[i].line = line;

253

return;

254

}

255

abort ();

256

}

257

258

/* Unregister PTR from malloc_debug. Abort if PTR is not present in

259

malloc_debug. (This catches calling free() with a bogus pointer.) */

260

261

static void

262

unregister_ptr (void *ptr)

263

{

264

int i;

265

for (i = 0; i < countof (malloc_debug); i++)

266

if (malloc_debug[i].ptr == ptr)

267

{

268

malloc_debug[i].ptr = NULL;

269

return;

270

}

271

abort ();

272

}

273

274

/* Print the malloc debug stats that can be gathered from the above

275

information. Currently this is the count of mallocs, frees, the

276

difference between the two, and the dump of the contents of

277

malloc_debug. The last part are the memory leaks. */

278

279

void

280

print_malloc_debug_stats (void)

281

{

282

int i;

283

printf ("\nMalloc: %d\nFree: %d\nBalance: %d\n\n",

284

malloc_count, free_count, malloc_count - free_count);

285

for (i = 0; i < countof (malloc_debug); i++)

286

if (malloc_debug[i].ptr != NULL)

287

printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr,

288

malloc_debug[i].file, malloc_debug[i].line);

289

}

290

291

void *

292

xmalloc_debug (size_t size, const char *source_file, int source_line)

293

{

294

void *ptr = xmalloc_real (size);

295

++malloc_count;

296

register_ptr (ptr, source_file, source_line);

297

return ptr;

298

}

299

300

void

301

xfree_debug (void *ptr, const char *source_file, int source_line)

302

{

303

assert (ptr != NULL);

304

++free_count;

305

unregister_ptr (ptr);

306

free (ptr);

307

}

308

309

void *

310

xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line)

311

{

312

void *newptr = xrealloc_real (ptr, newsize);

313

if (!ptr)

314

{

315

++malloc_count;

316

register_ptr (newptr, source_file, source_line);

317

}

318

else if (newptr != ptr)

319

{

320

unregister_ptr (ptr);

321

register_ptr (newptr, source_file, source_line);

322

}

323

return newptr;

324

}

325

326

char *

327

xstrdup_debug (const char *s, const char *source_file, int source_line)

328

{

329

char *copy = xstrdup_real (s);

330

++malloc_count;

331

register_ptr (copy, source_file, source_line);

332

return copy;

333

}

334

335

#endif /* DEBUG_MALLOC */

336

337

/* Utility function: like xstrdup(), but also lowercases S. */

338

339

char *

340

xstrdup_lower (const char *s)

341

{

342

char *copy = xstrdup (s);

343

char *p = copy;

344

for (; *p; p++)

345

*p = TOLOWER (*p);

346

return copy;

347

}

348

349

/* Return a count of how many times CHR occurs in STRING. */

350

351

int

352

count_char (const char *string, char chr)

353

{

354

const char *p;

355

int count = 0;

356

for (p = string; *p; p++)

357

if (*p == chr)

358

++count;

359

return count;

360

}

361

362

/* Copy the string formed by two pointers (one on the beginning, other

363

on the char after the last char) to a new, malloc-ed location.

364

0-terminate it. */

365

char *

366

strdupdelim (const char *beg, const char *end)

367

{

368

char *res = (char *)xmalloc (end - beg + 1);

369

memcpy (res, beg, end - beg);

370

res[end - beg] = '\0';

371

return res;

372

}

373

374

/* Parse a string containing comma-separated elements, and return a

375

vector of char pointers with the elements. Spaces following the

376

commas are ignored. */

377

char **

378

sepstring (const char *s)

379

{

380

char **res;

381

const char *p;

382

int i = 0;

383

384

if (!s || !*s)

385

return NULL;

386

res = NULL;

387

p = s;

388

while (*s)

389

{

390

if (*s == ',')

391

{

392

res = (char **)xrealloc (res, (i + 2) * sizeof (char *));

393

res[i] = strdupdelim (p, s);

394

res[++i] = NULL;

395

++s;

396

/* Skip the blanks following the ','. */

397

while (ISSPACE (*s))

398

++s;

399

p = s;

400

}

401

else

402

++s;

403

}

404

res = (char **)xrealloc (res, (i + 2) * sizeof (char *));

405

res[i] = strdupdelim (p, s);

406

res[i + 1] = NULL;

407

return res;

408

}

409

410

/* Return pointer to a static char[] buffer in which zero-terminated

411

string-representation of TM (in form hh:mm:ss) is printed.

412

413

If TM is non-NULL, the current time-in-seconds will be stored

414

there.

415

416

(#### This is misleading: one would expect TM would be used instead

417

of the current time in that case. This design was probably

418

influenced by the design time(2), and should be changed at some

419

points. No callers use non-NULL TM anyway.) */

420

421

char *

422

time_str (time_t *tm)

423

{

424

static char output[15];

425

struct tm *ptm;

426

time_t secs = time (tm);

427

428

if (secs == -1)

429

{

430

/* In case of error, return the empty string. Maybe we should

431

just abort if this happens? */

432

*output = '\0';

433

return output;

434

}

435

ptm = localtime (&secs);

436

sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);

437

return output;

438

}

439

440

/* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */

441

442

char *

443

datetime_str (time_t *tm)

444

{

445

static char output[20]; /* "YYYY-MM-DD hh:mm:ss" + \0 */

446

struct tm *ptm;

447

time_t secs = time (tm);

448

449

if (secs == -1)

450

{

451

/* In case of error, return the empty string. Maybe we should

452

just abort if this happens? */

453

*output = '\0';

454

return output;

455

}

456

ptm = localtime (&secs);

457

sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",

458

ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,

459

ptm->tm_hour, ptm->tm_min, ptm->tm_sec);

460

return output;

461

}

462

463

/* The Windows versions of the following two functions are defined in

464

mswindows.c. */

465

466

#ifndef WINDOWS

467

void

468

fork_to_background (void)

469

{

470

pid_t pid;

471

/* Whether we arrange our own version of opt.lfilename here. */

472

int changedp = 0;

473

474

if (!opt.lfilename)

475

{

476

opt.lfilename = unique_name (DEFAULT_LOGFILE, 0);

477

changedp = 1;

478

}

479

pid = fork ();

480

if (pid < 0)

481

{

482

/* parent, error */

483

perror ("fork");

484

exit (1);

485

}

486

else if (pid != 0)

487

{

488

/* parent, no error */

489

printf (_("Continuing in background, pid %d.\n"), (int)pid);

490

if (changedp)

491

printf (_("Output will be written to `%s'.\n"), opt.lfilename);

492

exit (0); /* #### should we use _exit()? */

493

}

494

495

/* child: give up the privileges and keep running. */

496

setsid ();

497

freopen ("/dev/null", "r", stdin);

498

freopen ("/dev/null", "w", stdout);

499

freopen ("/dev/null", "w", stderr);

500

}

501

#endif /* not WINDOWS */

502

503

/* "Touch" FILE, i.e. make its atime and mtime equal to the time

504

specified with TM. */

505

void

506

touch (const char *file, time_t tm)

507

{

508

#ifdef HAVE_STRUCT_UTIMBUF

509

struct utimbuf times;

510

times.actime = times.modtime = tm;

511

#else

512

time_t times[2];

513

times[0] = times[1] = tm;

514

#endif

515

516

if (utime (file, &times) == -1)

517

logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));

518

}

519

520

/* Checks if FILE is a symbolic link, and removes it if it is. Does

521

nothing under MS-Windows. */

522

int

523

remove_link (const char *file)

524

{

525

int err = 0;

526

struct stat st;

527

528

if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))

529

{

530

DEBUGP (("Unlinking %s (symlink).\n", file));

531

err = unlink (file);

532

if (err != 0)

533

logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),

534

file, strerror (errno));

535

}

536

return err;

537

}

538

539

/* Does FILENAME exist? This is quite a lousy implementation, since

540

it supplies no error codes -- only a yes-or-no answer. Thus it

541

will return that a file does not exist if, e.g., the directory is

542

unreadable. I don't mind it too much currently, though. The

543

proper way should, of course, be to have a third, error state,

544

other than true/false, but that would introduce uncalled-for

545

additional complexity to the callers. */

546

int

547

file_exists_p (const char *filename)

548

{

549

#ifdef HAVE_ACCESS

550

return access (filename, F_OK) >= 0;

551

#else

552

struct stat buf;

553

return stat (filename, &buf) >= 0;

554

#endif

555

}

556

557

/* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).

558

Returns 0 on error. */

559

int

560

file_non_directory_p (const char *path)

561

{

562

struct stat buf;

563

/* Use lstat() rather than stat() so that symbolic links pointing to

564

directories can be identified correctly. */

565

if (lstat (path, &buf) != 0)

566

return 0;

567

return S_ISDIR (buf.st_mode) ? 0 : 1;

568

}

569

570

/* Return the size of file named by FILENAME, or -1 if it cannot be

571

opened or seeked into. */

572

long

573

file_size (const char *filename)

574

{

575

long size;

576

/* We use fseek rather than stat to determine the file size because

577

that way we can also verify whether the file is readable.

578

Inspired by the POST patch by Arnaud Wylie. */

579

FILE *fp = fopen (filename, "rb");

580

if (!fp)

581

return -1;

582

fseek (fp, 0, SEEK_END);

583

size = ftell (fp);

584

fclose (fp);

585

return size;

586

}

587

588

/* stat file names named PREFIX.1, PREFIX.2, etc., until one that

589

doesn't exist is found. Return a freshly allocated copy of the

590

unused file name. */

591

592

static char *

593

unique_name_1 (const char *prefix)

594

{

595

int count = 1;

596

int plen = strlen (prefix);

597

char *template = (char *)alloca (plen + 1 + 24);

598

char *template_tail = template + plen;

599

600

memcpy (template, prefix, plen);

601

*template_tail++ = '.';

602

603

604

number_to_string (template_tail, count++);

605

while (file_exists_p (template));

606

607

return xstrdup (template);

608

}

609

610

/* Return a unique file name, based on FILE.

611

612

More precisely, if FILE doesn't exist, it is returned unmodified.

613

If not, FILE.1 is tried, then FILE.2, etc. The first FILE.<number>

614

file name that doesn't exist is returned.

615

616

The resulting file is not created, only verified that it didn't

617

exist at the point in time when the function was called.

618

Therefore, where security matters, don't rely that the file created

619

by this function exists until you open it with O_EXCL or

620

something.

621

622

If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated

623

string. Otherwise, it may return FILE if the file doesn't exist

624

(and therefore doesn't need changing). */

625

626

char *

627

unique_name (const char *file, int allow_passthrough)

628

{

629

/* If the FILE itself doesn't exist, return it without

630

modification. */

631

if (!file_exists_p (file))

632

return allow_passthrough ? (char *)file : xstrdup (file);

633

634

/* Otherwise, find a numeric suffix that results in unused file name

635

and return it. */

636

return unique_name_1 (file);

637

}

638

639

/* Create DIRECTORY. If some of the pathname components of DIRECTORY

640

are missing, create them first. In case any mkdir() call fails,

641

return its error status. Returns 0 on successful completion.

642

643

The behaviour of this function should be identical to the behaviour

644

of `mkdir -p' on systems where mkdir supports the `-p' option. */

645

int

646

make_directory (const char *directory)

647

{

648

int quit = 0;

649

int i;

650

int ret = 0;

651

char *dir;

652

653

/* Make a copy of dir, to be able to write to it. Otherwise, the

654

function is unsafe if called with a read-only char *argument. */

655

STRDUP_ALLOCA (dir, directory);

656

657

/* If the first character of dir is '/', skip it (and thus enable

658

creation of absolute-pathname directories. */

659

for (i = (*dir == '/'); 1; ++i)

660

{

661

for (; dir[i] && dir[i] != '/'; i++)

662

;

663

if (!dir[i])

664

quit = 1;

665

dir[i] = '\0';

666

/* Check whether the directory already exists. Allow creation of

667

of intermediate directories to fail, as the initial path components

668

are not necessarily directories! */

669

if (!file_exists_p (dir))

670

ret = mkdir (dir, 0777);

671

else

672

ret = 0;

673

if (quit)

674

break;

675

else

676

dir[i] = '/';

677

}

678

return ret;

679

}

680

681

/* Merge BASE with FILE. BASE can be a directory or a file name, FILE

682

should be a file name.

683

684

file_merge("/foo/bar", "baz") => "/foo/baz"

685

file_merge("/foo/bar/", "baz") => "/foo/bar/baz"

686

file_merge("foo", "bar") => "bar"

687

688

In other words, it's a simpler and gentler version of uri_merge_1. */

689

690

char *

691

file_merge (const char *base, const char *file)

692

{

693

char *result;

694

const char *cut = (const char *)strrchr (base, '/');

695

696

if (!cut)

697

return xstrdup (file);

698

699

result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);

700

memcpy (result, base, cut - base);

701

result[cut - base] = '/';

702

strcpy (result + (cut - base) + 1, file);

703

704

return result;

705

}

706

707

static int in_acclist PARAMS ((const char *const *, const char *, int));

708

709

/* Determine whether a file is acceptable to be followed, according to

710

lists of patterns to accept/reject. */

711

int

712

acceptable (const char *s)

713

{

714

int l = strlen (s);

715

716

while (l && s[l] != '/')

717

--l;

718

if (s[l] == '/')

719

s += (l + 1);

720

if (opt.accepts)

721

{

722

if (opt.rejects)

723

return (in_acclist ((const char *const *)opt.accepts, s, 1)

724

&& !in_acclist ((const char *const *)opt.rejects, s, 1));

725

else

726

return in_acclist ((const char *const *)opt.accepts, s, 1);

727

}

728

else if (opt.rejects)

729

return !in_acclist ((const char *const *)opt.rejects, s, 1);

730

return 1;

731

}

732

733

/* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is

734

`/something', frontcmp() will return 1 only if S2 begins with

735

`/something'. Otherwise, 0 is returned. */

736

int

737

frontcmp (const char *s1, const char *s2)

738

{

739

for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);

740

return !*s1;

741

}

742

743

/* Iterate through STRLIST, and return the first element that matches

744

S, through wildcards or front comparison (as appropriate). */

745

static char *

746

proclist (char **strlist, const char *s, enum accd flags)

747

{

748

char **x;

749

750

for (x = strlist; *x; x++)

751

if (has_wildcards_p (*x))

752

{

753

if (fnmatch (*x, s, FNM_PATHNAME) == 0)

754

break;

755

}

756

else

757

{

758

char *p = *x + ((flags & ALLABS) && (**x == '/')); /* Remove '/' */

759

if (frontcmp (p, s))

760

break;

761

}

762

return *x;

763

}

764

765

/* Returns whether DIRECTORY is acceptable for download, wrt the

766

include/exclude lists.

767

768

If FLAGS is ALLABS, the leading `/' is ignored in paths; relative

769

and absolute paths may be freely intermixed. */

770

int

771

accdir (const char *directory, enum accd flags)

772

{

773

/* Remove starting '/'. */

774

if (flags & ALLABS && *directory == '/')

775

++directory;

776

if (opt.includes)

777

{

778

if (!proclist (opt.includes, directory, flags))

779

return 0;

780

}

781

if (opt.excludes)

782

{

783

if (proclist (opt.excludes, directory, flags))

784

return 0;

785

}

786

return 1;

787

}

788

789

/* Return non-zero if STRING ends with TAIL. For instance:

790

791

match_tail ("abc", "bc", 0) -> 1

792

match_tail ("abc", "ab", 0) -> 0

793

match_tail ("abc", "abc", 0) -> 1

794

795

If FOLD_CASE_P is non-zero, the comparison will be

796

case-insensitive. */

797

798

int

799

match_tail (const char *string, const char *tail, int fold_case_p)

800

{

801

int i, j;

802

803

/* We want this to be fast, so we code two loops, one with

804

case-folding, one without. */

805

806

if (!fold_case_p)

807

{

808

for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)

809

if (string[i] != tail[j])

810

break;

811

}

812

else

813

{

814

for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)

815

if (TOLOWER (string[i]) != TOLOWER (tail[j]))

816

break;

817

}

818

819

/* If the tail was exhausted, the match was succesful. */

820

if (j == -1)

821

return 1;

822

else

823

return 0;

824

}

825

826

/* Checks whether string S matches each element of ACCEPTS. A list

827

element are matched either with fnmatch() or match_tail(),

828

according to whether the element contains wildcards or not.

829

830

If the BACKWARD is 0, don't do backward comparison -- just compare

831

them normally. */

832

static int

833

in_acclist (const char *const *accepts, const char *s, int backward)

834

{

835

for (; *accepts; accepts++)

836

{

837

if (has_wildcards_p (*accepts))

838

{

839

/* fnmatch returns 0 if the pattern *does* match the

840

string. */

841

if (fnmatch (*accepts, s, 0) == 0)

842

return 1;

843

}

844

else

845

{

846

if (backward)

847

{

848

if (match_tail (s, *accepts, 0))

849

return 1;

850

}

851

else

852

{

853

if (!strcmp (s, *accepts))

854

return 1;

855

}

856

}

857

}

858

return 0;

859

}

860

861

/* Return the location of STR's suffix (file extension). Examples:

862

suffix ("foo.bar") -> "bar"

863

suffix ("foo.bar.baz") -> "baz"

864

suffix ("/foo/bar") -> NULL

865

suffix ("/foo.bar/baz") -> NULL */

866

char *

867

suffix (const char *str)

868

{

869

int i;

870

871

for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)

872

;

873

874

if (str[i++] == '.')

875

return (char *)str + i;

876

else

877

return NULL;

878

}

879

880

/* Return non-zero if S contains globbing wildcards (`*', `?', `[' or

881

`]'). */

882

883

int

884

has_wildcards_p (const char *s)

885

{

886

for (; *s; s++)

887

if (*s == '*' || *s == '?' || *s == '[' || *s == ']')

888

return 1;

889

return 0;

890

}

891

892

/* Return non-zero if FNAME ends with a typical HTML suffix. The

893

following (case-insensitive) suffixes are presumed to be HTML files:

894

895

html

896

htm

897

?html (`?' matches one character)

898

899

#### CAVEAT. This is not necessarily a good indication that FNAME

900

refers to a file that contains HTML! */

901

int

902

has_html_suffix_p (const char *fname)

903

{

904

char *suf;

905

906

if ((suf = suffix (fname)) == NULL)

907

return 0;

908

if (!strcasecmp (suf, "html"))

909

return 1;

910

if (!strcasecmp (suf, "htm"))

911

return 1;

912

if (suf[0] && !strcasecmp (suf + 1, "html"))

913

return 1;

914

return 0;

915

}

916

917

/* Read a line from FP and return the pointer to freshly allocated

918

storage. The storage space is obtained through malloc() and should

919

be freed with free() when it is no longer needed.

920

921

The length of the line is not limited, except by available memory.

922

The newline character at the end of line is retained. The line is

923

terminated with a zero character.

924

925

After end-of-file is encountered without anything being read, NULL

926

is returned. NULL is also returned on error. To distinguish

927

between these two cases, use the stdio function ferror(). */

928

929

char *

930

read_whole_line (FILE *fp)

931

{

932

int length = 0;

933

int bufsize = 82;

934

char *line = (char *)xmalloc (bufsize);

935

936

while (fgets (line + length, bufsize - length, fp))

937

{

938

length += strlen (line + length);

939

if (length == 0)

940

/* Possible for example when reading from a binary file where

941

a line begins with \0. */

942

continue;

943

944

if (line[length - 1] == '\n')

945

break;

946

947

/* fgets() guarantees to read the whole line, or to use up the

948

space we've given it. We can double the buffer

949

unconditionally. */

950

bufsize <<= 1;

951

line = xrealloc (line, bufsize);

952

}

953

if (length == 0 || ferror (fp))

954

{

955

xfree (line);

956

return NULL;

957

}

958

if (length + 1 < bufsize)

959

/* Relieve the memory from our exponential greediness. We say

960

`length + 1' because the terminating \0 is not included in

961

LENGTH. We don't need to zero-terminate the string ourselves,

962

though, because fgets() does that. */

963

line = xrealloc (line, length + 1);

964

return line;

965

}

966

967

/* Read FILE into memory. A pointer to `struct file_memory' are

968

returned; use struct element `content' to access file contents, and

969

the element `length' to know the file length. `content' is *not*

970

zero-terminated, and you should *not* read or write beyond the [0,

971

length) range of characters.

972

973

After you are done with the file contents, call read_file_free to

974

release the memory.

975

976

Depending on the operating system and the type of file that is

977

being read, read_file() either mmap's the file into memory, or

978

reads the file into the core using read().

979

980

If file is named "-", fileno(stdin) is used for reading instead.

981

If you want to read from a real file named "-", use "./-" instead. */

982

983

struct file_memory *

984

read_file (const char *file)

985

{

986

int fd;

987

struct file_memory *fm;

988

long size;

989

int inhibit_close = 0;

990

991

/* Some magic in the finest tradition of Perl and its kin: if FILE

992

is "-", just use stdin. */

993

if (HYPHENP (file))

994

{

995

fd = fileno (stdin);

996

inhibit_close = 1;

997

/* Note that we don't inhibit mmap() in this case. If stdin is

998

redirected from a regular file, mmap() will still work. */

999

}

1000

else

1001

fd = open (file, O_RDONLY);

1002

if (fd < 0)

1003

return NULL;

1004

fm = xmalloc (sizeof (struct file_memory));

1005

1006

#ifdef HAVE_MMAP

1007

{

1008

struct stat buf;

1009

if (fstat (fd, &buf) < 0)

1010

goto mmap_lose;

1011

fm->length = buf.st_size;

1012

/* NOTE: As far as I know, the callers of this function never

1013

modify the file text. Relying on this would enable us to

1014

specify PROT_READ and MAP_SHARED for a marginal gain in

1015

efficiency, but at some cost to generality. */

1016

fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,

1017

MAP_PRIVATE, fd, 0);

1018

if (fm->content == (char *)MAP_FAILED)

1019

goto mmap_lose;

1020

if (!inhibit_close)

1021

close (fd);

1022

1023

fm->mmap_p = 1;

1024

return fm;

1025

}

1026

1027

mmap_lose:

1028

/* The most common reason why mmap() fails is that FD does not point

1029

to a plain file. However, it's also possible that mmap() doesn't

1030

work for a particular type of file. Therefore, whenever mmap()

1031

fails, we just fall back to the regular method. */

1032

#endif /* HAVE_MMAP */

1033

1034

fm->length = 0;

1035

size = 512; /* number of bytes fm->contents can

1036

hold at any given time. */

1037

fm->content = xmalloc (size);

1038

while (1)

1039

{

1040

long nread;

1041

if (fm->length > size / 2)

1042

{

1043

/* #### I'm not sure whether the whole exponential-growth

1044

thing makes sense with kernel read. On Linux at least,

1045

read() refuses to read more than 4K from a file at a

1046

single chunk anyway. But other Unixes might optimize it

1047

better, and it doesn't *hurt* anything, so I'm leaving

1048

it. */

1049

1050

/* Normally, we grow SIZE exponentially to make the number

1051

of calls to read() and realloc() logarithmic in relation

1052

to file size. However, read() can read an amount of data

1053

smaller than requested, and it would be unreasonable to

1054

double SIZE every time *something* was read. Therefore,

1055

we double SIZE only when the length exceeds half of the

1056

entire allocated size. */

1057

size <<= 1;

1058

fm->content = xrealloc (fm->content, size);

1059

}

1060

nread = read (fd, fm->content + fm->length, size - fm->length);

1061

if (nread > 0)

1062

/* Successful read. */

1063

fm->length += nread;

1064

else if (nread < 0)

1065

/* Error. */

1066

goto lose;

1067

else

1068

/* EOF */

1069

break;

1070

}

1071

if (!inhibit_close)

1072

close (fd);

1073

if (size > fm->length && fm->length != 0)

1074

/* Due to exponential growth of fm->content, the allocated region

1075

might be much larger than what is actually needed. */

1076

fm->content = xrealloc (fm->content, fm->length);

1077

fm->mmap_p = 0;

1078

return fm;

1079

1080

lose:

1081

if (!inhibit_close)

1082

close (fd);

1083

xfree (fm->content);

1084

xfree (fm);

1085

return NULL;

1086

}

1087

1088

/* Release the resources held by FM. Specifically, this calls

1089

munmap() or xfree() on fm->content, depending whether mmap or

1090

malloc/read were used to read in the file. It also frees the

1091

memory needed to hold the FM structure itself. */

1092

1093

void

1094

read_file_free (struct file_memory *fm)

1095

{

1096

#ifdef HAVE_MMAP

1097

if (fm->mmap_p)

1098

{

1099

munmap (fm->content, fm->length);

1100

}

1101

else

1102

#endif

1103

{

1104

xfree (fm->content);

1105

}

1106

xfree (fm);

1107

}

1108

1109

/* Free the pointers in a NULL-terminated vector of pointers, then

1110

free the pointer itself. */

1111

void

1112

free_vec (char **vec)

1113

{

1114

if (vec)

1115

{

1116

char **p = vec;

1117

while (*p)

1118

xfree (*p++);

1119

xfree (vec);

1120

}

1121

}

1122

1123

/* Append vector V2 to vector V1. The function frees V2 and

1124

reallocates V1 (thus you may not use the contents of neither

1125

pointer after the call). If V1 is NULL, V2 is returned. */

1126

char **

1127

merge_vecs (char **v1, char **v2)

1128

{

1129

int i, j;

1130

1131

if (!v1)

1132

return v2;

1133

if (!v2)

1134

return v1;

1135

if (!*v2)

1136

{

1137

/* To avoid j == 0 */

1138

xfree (v2);

1139

return v1;

1140

}

1141

/* Count v1. */

1142

for (i = 0; v1[i]; i++);

1143

/* Count v2. */

1144

for (j = 0; v2[j]; j++);

1145

/* Reallocate v1. */

1146

v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));

1147

memcpy (v1 + i, v2, (j + 1) * sizeof (char *));

1148

xfree (v2);

1149

return v1;

1150

}

1151

1152

/* A set of simple-minded routines to store strings in a linked list.

1153

This used to also be used for searching, but now we have hash

1154

tables for that. */

1155

1156

/* It's a shame that these simple things like linked lists and hash

1157

tables (see hash.c) need to be implemented over and over again. It

1158

would be nice to be able to use the routines from glib -- see

1159

www.gtk.org for details. However, that would make Wget depend on

1160

glib, and I want to avoid dependencies to external libraries for

1161

reasons of convenience and portability (I suspect Wget is more

1162

portable than anything ever written for Gnome). */

1163

1164

/* Append an element to the list. If the list has a huge number of

1165

elements, this can get slow because it has to find the list's

1166

ending. If you think you have to call slist_append in a loop,

1167

think about calling slist_prepend() followed by slist_nreverse(). */

1168

1169

slist *

1170

slist_append (slist *l, const char *s)

1171

{

1172

slist *newel = (slist *)xmalloc (sizeof (slist));

1173

slist *beg = l;

1174

1175

newel->string = xstrdup (s);

1176

newel->next = NULL;

1177

1178

if (!l)

1179

return newel;

1180

/* Find the last element. */

1181

while (l->next)

1182

l = l->next;

1183

l->next = newel;

1184

return beg;

1185

}

1186

1187

/* Prepend S to the list. Unlike slist_append(), this is O(1). */

1188

1189

slist *

1190

slist_prepend (slist *l, const char *s)

1191

{

1192

slist *newel = (slist *)xmalloc (sizeof (slist));

1193

newel->string = xstrdup (s);

1194

newel->next = l;

1195

return newel;

1196

}

1197

1198

/* Destructively reverse L. */

1199

1200

slist *

1201

slist_nreverse (slist *l)

1202

{

1203

slist *prev = NULL;

1204

while (l)

1205

{

1206

slist *next = l->next;

1207

l->next = prev;

1208

prev = l;

1209

l = next;

1210

}

1211

return prev;

1212

}

1213

1214

/* Is there a specific entry in the list? */

1215

int

1216

slist_contains (slist *l, const char *s)

1217

{

1218

for (; l; l = l->next)

1219

if (!strcmp (l->string, s))

1220

return 1;

1221

return 0;

1222

}

1223

1224

/* Free the whole slist. */

1225

void

1226

slist_free (slist *l)

1227

{

1228

while (l)

1229

{

1230

slist *n = l->next;

1231

xfree (l->string);

1232

xfree (l);

1233

l = n;

1234

}

1235

}

1236

1237

/* Sometimes it's useful to create "sets" of strings, i.e. special

1238

hash tables where you want to store strings as keys and merely

1239

query for their existence. Here is a set of utility routines that

1240

makes that transparent. */

1241

1242

void

1243

string_set_add (struct hash_table *ht, const char *s)

1244

{

1245

/* First check whether the set element already exists. If it does,

1246

do nothing so that we don't have to free() the old element and

1247

then strdup() a new one. */

1248

if (hash_table_contains (ht, s))

1249

return;

1250

1251

/* We use "1" as value. It provides us a useful and clear arbitrary

1252

value, and it consumes no memory -- the pointers to the same

1253

string "1" will be shared by all the key-value pairs in all `set'

1254

hash tables. */

1255

hash_table_put (ht, xstrdup (s), "1");

1256

}

1257

1258

/* Synonym for hash_table_contains... */

1259

1260

int

1261

string_set_contains (struct hash_table *ht, const char *s)

1262

{

1263

return hash_table_contains (ht, s);

1264

}

1265

1266

static int

1267

string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)

1268

{

1269

xfree (key);

1270

return 0;

1271

}

1272

1273

void

1274

string_set_free (struct hash_table *ht)

1275

{

1276

hash_table_map (ht, string_set_free_mapper, NULL);

1277

hash_table_destroy (ht);

1278

}

1279

1280

static int

1281

free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)

1282

{

1283

xfree (key);

1284

xfree (value);

1285

return 0;

1286

}

1287

1288

/* Another utility function: call free() on all keys and values of HT. */

1289

1290

void

1291

free_keys_and_values (struct hash_table *ht)

1292

{

1293

hash_table_map (ht, free_keys_and_values_mapper, NULL);

1294

}

1295

1296

1297

/* Engine for legible and legible_large_int; add thousand separators

1298

to numbers printed in strings. */

1299

1300

static char *

1301

legible_1 (const char *repr)

1302

{

1303

static char outbuf[48];

1304

int i, i1, mod;

1305

char *outptr;

1306

const char *inptr;

1307

1308

/* Reset the pointers. */

1309

outptr = outbuf;

1310

inptr = repr;

1311

1312

/* Ignore the sign for the purpose of adding thousand

1313

separators. */

1314

if (*inptr == '-')

1315

{

1316

*outptr++ = '-';

1317

++inptr;

1318

}

1319

/* How many digits before the first separator? */

1320

mod = strlen (inptr) % 3;

1321

/* Insert them. */

1322

for (i = 0; i < mod; i++)

1323

*outptr++ = inptr[i];

1324

/* Now insert the rest of them, putting separator before every

1325

third digit. */

1326

for (i1 = i, i = 0; inptr[i1]; i++, i1++)

1327

{

1328

if (i % 3 == 0 && i1 != 0)

1329

*outptr++ = ',';

1330

*outptr++ = inptr[i1];

1331

}

1332

/* Zero-terminate the string. */

1333

*outptr = '\0';

1334

return outbuf;

1335

}

1336

1337

/* Legible -- return a static pointer to the legibly printed long. */

1338

1339

char *

1340

legible (long l)

1341

{

1342

char inbuf[24];

1343

/* Print the number into the buffer. */

1344

number_to_string (inbuf, l);

1345

return legible_1 (inbuf);

1346

}

1347

1348

/* Write a string representation of LARGE_INT NUMBER into the provided

1349

buffer. The buffer should be able to accept 24 characters,

1350

including the terminating zero.

1351

1352

It would be dangerous to use sprintf, because the code wouldn't

1353

work on a machine with gcc-provided long long support, but without

1354

libc support for "%lld". However, such platforms will typically

1355

not have snprintf and will use our version, which does support

1356

"%lld" where long longs are available. */

1357

1358

static void

1359

large_int_to_string (char *buffer, LARGE_INT number)

1360

{

1361

snprintf (buffer, 24, LARGE_INT_FMT, number);

1362

}

1363

1364

/* The same as legible(), but works on LARGE_INT. */

1365

1366

char *

1367

legible_large_int (LARGE_INT l)

1368

{

1369

char inbuf[48];

1370

large_int_to_string (inbuf, l);

1371

return legible_1 (inbuf);

1372

}

1373

1374

/* Count the digits in a (long) integer. */

1375

int

1376

numdigit (long number)

1377

{

1378

int cnt = 1;

1379

if (number < 0)

1380

{

1381

number = -number;

1382

++cnt;

1383

}

1384

while ((number /= 10) > 0)

1385

++cnt;

1386

return cnt;

1387

}

1388

1389

/* A half-assed implementation of INT_MAX on machines that don't

1390

bother to define one. */

1391

#ifndef INT_MAX

1392

# define INT_MAX ((int) ~((unsigned)1 << 8 * sizeof (int) - 1))

1393

#endif

1394

1395

#define ONE_DIGIT(figure) *p++ = n / (figure) + '0'

1396

#define ONE_DIGIT_ADVANCE(figure) (ONE_DIGIT (figure), n %= (figure))

1397

1398

#define DIGITS_1(figure) ONE_DIGIT (figure)

1399

#define DIGITS_2(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_1 ((figure) / 10)

1400

#define DIGITS_3(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_2 ((figure) / 10)

1401

#define DIGITS_4(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_3 ((figure) / 10)

1402

#define DIGITS_5(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_4 ((figure) / 10)

1403

#define DIGITS_6(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_5 ((figure) / 10)

1404

#define DIGITS_7(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_6 ((figure) / 10)

1405

#define DIGITS_8(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_7 ((figure) / 10)

1406

#define DIGITS_9(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_8 ((figure) / 10)

1407

#define DIGITS_10(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_9 ((figure) / 10)

1408

1409

/* DIGITS_<11-20> are only used on machines with 64-bit longs. */

1410

1411

#define DIGITS_11(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_10 ((figure) / 10)

1412

#define DIGITS_12(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_11 ((figure) / 10)

1413

#define DIGITS_13(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_12 ((figure) / 10)

1414

#define DIGITS_14(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_13 ((figure) / 10)

1415

#define DIGITS_15(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_14 ((figure) / 10)

1416

#define DIGITS_16(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_15 ((figure) / 10)

1417

#define DIGITS_17(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_16 ((figure) / 10)

1418

#define DIGITS_18(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_17 ((figure) / 10)

1419

#define DIGITS_19(figure) ONE_DIGIT_ADVANCE (figure); DIGITS_18 ((figure) / 10)

1420

1421

/* Print NUMBER to BUFFER in base 10. This should be completely

1422

equivalent to `sprintf(buffer, "%ld", number)', only much faster.

1423

1424

The speedup may make a difference in programs that frequently

1425

convert numbers to strings. Some implementations of sprintf,

1426

particularly the one in GNU libc, have been known to be extremely

1427

slow compared to this function.

1428

1429

Return the pointer to the location where the terminating zero was

1430

printed. (Equivalent to calling buffer+strlen(buffer) after the

1431

function is done.)

1432

1433

BUFFER should be big enough to accept as many bytes as you expect

1434

the number to take up. On machines with 64-bit longs the maximum

1435

needed size is 24 bytes. That includes the digits needed for the

1436

largest 64-bit number, the `-' sign in case it's negative, and the

1437

terminating '\0'. */

1438

1439

char *

1440

number_to_string (char *buffer, long number)

1441

{

1442

char *p = buffer;

1443

long n = number;

1444

1445

#if (SIZEOF_LONG != 4) && (SIZEOF_LONG != 8)

1446

/* We are running in a strange or misconfigured environment. Let

1447

sprintf cope with it. */

1448

sprintf (buffer, "%ld", n);

1449

p += strlen (buffer);

1450

#else /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */

1451

1452

if (n < 0)

1453

{

1454

if (n < -INT_MAX)

1455

{

1456

/* We cannot print a '-' and assign -n to n because -n would

1457

overflow. Let sprintf deal with this border case. */

1458

sprintf (buffer, "%ld", n);

1459

p += strlen (buffer);

1460

return p;

1461

}

1462

1463

*p++ = '-';

1464

n = -n;

1465

}

1466

1467

if (n < 10) { DIGITS_1 (1); }

1468

else if (n < 100) { DIGITS_2 (10); }

1469

else if (n < 1000) { DIGITS_3 (100); }

1470

else if (n < 10000) { DIGITS_4 (1000); }

1471

else if (n < 100000) { DIGITS_5 (10000); }

1472

else if (n < 1000000) { DIGITS_6 (100000); }

1473

else if (n < 10000000) { DIGITS_7 (1000000); }

1474

else if (n < 100000000) { DIGITS_8 (10000000); }

1475

else if (n < 1000000000) { DIGITS_9 (100000000); }

1476

#if SIZEOF_LONG == 4

1477

/* ``if (1)'' serves only to preserve editor indentation. */

1478

else if (1) { DIGITS_10 (1000000000); }

1479

#else /* SIZEOF_LONG != 4 */

1480

else if (n < 10000000000L) { DIGITS_10 (1000000000L); }

1481

else if (n < 100000000000L) { DIGITS_11 (10000000000L); }

1482

else if (n < 1000000000000L) { DIGITS_12 (100000000000L); }

1483

else if (n < 10000000000000L) { DIGITS_13 (1000000000000L); }

1484

else if (n < 100000000000000L) { DIGITS_14 (10000000000000L); }

1485

else if (n < 1000000000000000L) { DIGITS_15 (100000000000000L); }

1486

else if (n < 10000000000000000L) { DIGITS_16 (1000000000000000L); }

1487

else if (n < 100000000000000000L) { DIGITS_17 (10000000000000000L); }

1488

else if (n < 1000000000000000000L) { DIGITS_18 (100000000000000000L); }

1489

else { DIGITS_19 (1000000000000000000L); }

1490

#endif /* SIZEOF_LONG != 4 */

1491

1492

*p = '\0';

1493

#endif /* (SIZEOF_LONG == 4) || (SIZEOF_LONG == 8) */

1494

1495

return p;

1496

}

1497

1498

#undef ONE_DIGIT

1499

#undef ONE_DIGIT_ADVANCE

1500

1501

#undef DIGITS_1

1502

#undef DIGITS_2

1503

#undef DIGITS_3

1504

#undef DIGITS_4

1505

#undef DIGITS_5

1506

#undef DIGITS_6

1507

#undef DIGITS_7

1508

#undef DIGITS_8

1509

#undef DIGITS_9

1510

#undef DIGITS_10

1511

#undef DIGITS_11

1512

#undef DIGITS_12

1513

#undef DIGITS_13

1514

#undef DIGITS_14

1515

#undef DIGITS_15

1516

#undef DIGITS_16

1517

#undef DIGITS_17

1518

#undef DIGITS_18

1519

#undef DIGITS_19

1520

1521

/* Support for timers. */

1522

1523

#undef TIMER_WINDOWS

1524

#undef TIMER_GETTIMEOFDAY

1525

#undef TIMER_TIME

1526

1527

/* Depending on the OS and availability of gettimeofday(), one and

1528

only one of the above constants will be defined. Virtually all

1529

modern Unix systems will define TIMER_GETTIMEOFDAY; Windows will

1530

use TIMER_WINDOWS. TIMER_TIME is a catch-all method for

1531

non-Windows systems without gettimeofday.

1532

1533

#### Perhaps we should also support ftime(), which exists on old

1534

BSD 4.2-influenced systems? (It also existed under MS DOS Borland

1535

C, if memory serves me.) */

1536

1537

#ifdef WINDOWS

1538

# define TIMER_WINDOWS

1539

#else /* not WINDOWS */

1540

# ifdef HAVE_GETTIMEOFDAY

1541

# define TIMER_GETTIMEOFDAY

1542

# else

1543

# define TIMER_TIME

1544

# endif

1545

#endif /* not WINDOWS */

1546

1547

#ifdef TIMER_GETTIMEOFDAY

1548

typedef struct timeval wget_sys_time;

1549

#endif

1550

1551

#ifdef TIMER_TIME

1552

typedef time_t wget_sys_time;

1553

#endif

1554

1555

#ifdef TIMER_WINDOWS

1556

typedef ULARGE_INTEGER wget_sys_time;

1557

#endif

1558

1559

struct wget_timer {

1560

/* The starting point in time which, subtracted from the current

1561

time, yields elapsed time. */

1562

wget_sys_time start;

1563

1564

/* The most recent elapsed time, calculated by wtimer_elapsed().

1565

Measured in milliseconds. */

1566

double elapsed_last;

1567

1568

/* Approximately, the time elapsed between the true start of the

1569

measurement and the time represented by START. */

1570

double elapsed_pre_start;

1571

};

1572

1573

/* Allocate a timer. It is not legal to do anything with a freshly

1574

allocated timer, except call wtimer_reset() or wtimer_delete(). */

1575

1576

struct wget_timer *

1577

wtimer_allocate (void)

1578

{

1579

struct wget_timer *wt =

1580

(struct wget_timer *)xmalloc (sizeof (struct wget_timer));

1581

return wt;

1582

}

1583

1584

/* Allocate a new timer and reset it. Return the new timer. */

1585

1586

struct wget_timer *

1587

wtimer_new (void)

1588

{

1589

struct wget_timer *wt = wtimer_allocate ();

1590

wtimer_reset (wt);

1591

return wt;

1592

}

1593

1594

/* Free the resources associated with the timer. Its further use is

1595

prohibited. */

1596

1597

void

1598

wtimer_delete (struct wget_timer *wt)

1599

{

1600

xfree (wt);

1601

}

1602

1603

/* Store system time to WST. */

1604

1605

static void

1606

wtimer_sys_set (wget_sys_time *wst)

1607

{

1608

#ifdef TIMER_GETTIMEOFDAY

1609

gettimeofday (wst, NULL);

1610

#endif

1611

1612

#ifdef TIMER_TIME

1613

time (wst);

1614

#endif

1615

1616

#ifdef TIMER_WINDOWS

1617

/* We use GetSystemTime to get the elapsed time. MSDN warns that

1618

system clock adjustments can skew the output of GetSystemTime

1619

when used as a timer and gives preference to GetTickCount and

1620

high-resolution timers. But GetTickCount can overflow, and hires

1621

timers are typically used for profiling, not for regular time

1622

measurement. Since we handle clock skew anyway, we just use

1623

GetSystemTime. */

1624

FILETIME ft;

1625

SYSTEMTIME st;

1626

GetSystemTime (&st);

1627

1628

/* As recommended by MSDN, we convert SYSTEMTIME to FILETIME, copy

1629

FILETIME to ULARGE_INTEGER, and use regular 64-bit integer

1630

arithmetic on that. */

1631

SystemTimeToFileTime (&st, &ft);

1632

wst->HighPart = ft.dwHighDateTime;

1633

wst->LowPart = ft.dwLowDateTime;

1634

#endif

1635

}

1636

1637

/* Reset timer WT. This establishes the starting point from which

1638

wtimer_elapsed() will return the number of elapsed

1639

milliseconds. It is allowed to reset a previously used timer. */

1640

1641

void

1642

wtimer_reset (struct wget_timer *wt)

1643

{

1644

/* Set the start time to the current time. */

1645

wtimer_sys_set (&wt->start);

1646

wt->elapsed_last = 0;

1647

wt->elapsed_pre_start = 0;

1648

}

1649

1650

static double

1651

wtimer_sys_diff (wget_sys_time *wst1, wget_sys_time *wst2)

1652

{

1653

#ifdef TIMER_GETTIMEOFDAY

1654

return ((double)(wst1->tv_sec - wst2->tv_sec) * 1000

1655

+ (double)(wst1->tv_usec - wst2->tv_usec) / 1000);

1656

#endif

1657

1658

#ifdef TIMER_TIME

1659

return 1000 * (*wst1 - *wst2);

1660

#endif

1661

1662

#ifdef WINDOWS

1663

/* VC++ 6 doesn't support direct cast of uint64 to double. To work

1664

around this, we subtract, then convert to signed, then finally to

1665

double. */

1666

return (double)(signed __int64)(wst1->QuadPart - wst2->QuadPart) / 10000;

1667

#endif

1668

}

1669

1670

/* Return the number of milliseconds elapsed since the timer was last

1671

reset. It is allowed to call this function more than once to get

1672

increasingly higher elapsed values. These timers handle clock

1673

skew. */

1674

1675

double

1676

wtimer_elapsed (struct wget_timer *wt)

1677

{

1678

wget_sys_time now;

1679

double elapsed;

1680

1681

wtimer_sys_set (&now);

1682

elapsed = wt->elapsed_pre_start + wtimer_sys_diff (&now, &wt->start);

1683

1684

/* Ideally we'd just return the difference between NOW and

1685

wt->start. However, the system timer can be set back, and we

1686

could return a value smaller than when we were last called, even

1687

a negative value. Both of these would confuse the callers, which

1688

expect us to return monotonically nondecreasing values.

1689

1690

Therefore: if ELAPSED is smaller than its previous known value,

1691

we reset wt->start to the current time and effectively start

1692

measuring from this point. But since we don't want the elapsed

1693

value to start from zero, we set elapsed_pre_start to the last

1694

elapsed time and increment all future calculations by that

1695

amount. */

1696

1697

if (elapsed < wt->elapsed_last)

1698

{

1699

wt->start = now;

1700

wt->elapsed_pre_start = wt->elapsed_last;

1701

elapsed = wt->elapsed_last;

1702

}

1703

1704

wt->elapsed_last = elapsed;

1705

return elapsed;

1706

}

1707

1708

/* Return the assessed granularity of the timer implementation, in

1709

milliseconds. This is used by code that tries to substitute a

1710

better value for timers that have returned zero. */

1711

1712

double

1713

wtimer_granularity (void)

1714

{

1715

#ifdef TIMER_GETTIMEOFDAY

1716

/* Granularity of gettimeofday varies wildly between architectures.

1717

However, it appears that on modern machines it tends to be better

1718

than 1ms. Assume 100 usecs. (Perhaps the configure process

1719

could actually measure this?) */

1720

return 0.1;

1721

#endif

1722

1723

#ifdef TIMER_TIME

1724

return 1000;

1725

#endif

1726

1727

#ifdef TIMER_WINDOWS

1728

/* According to MSDN, GetSystemTime returns a broken-down time

1729

structure the smallest member of which are milliseconds. */

1730

return 1;

1731

#endif

1732

}

1733

1734

/* This should probably be at a better place, but it doesn't really

1735

fit into html-parse.c. */

1736

1737

/* The function returns the pointer to the malloc-ed quoted version of

1738

string s. It will recognize and quote numeric and special graphic

1739

entities, as per RFC1866:

1740

1741

`&' -> `&'

1742

`<' -> `<'

1743

`>' -> `>'

1744

`"' -> `"'

1745

SP -> ` '

1746

1747

No other entities are recognized or replaced. */

1748

char *

1749

html_quote_string (const char *s)

1750

{

1751

const char *b = s;

1752

char *p, *res;

1753

int i;

1754

1755

/* Pass through the string, and count the new size. */

1756

for (i = 0; *s; s++, i++)

1757

{

1758

if (*s == '&')

1759

i += 4; /* `amp;' */

1760

else if (*s == '<' || *s == '>')

1761

i += 3; /* `lt;' and `gt;' */

1762

else if (*s == '\"')

1763

i += 5; /* `quot;' */

1764

else if (*s == ' ')

1765

i += 4; /* #32; */

1766

}

1767

res = (char *)xmalloc (i + 1);

1768

s = b;

1769

for (p = res; *s; s++)

1770

{

1771

switch (*s)

1772

{

1773

case '&':

1774

*p++ = '&';

1775

*p++ = 'a';

1776

*p++ = 'm';

1777

*p++ = 'p';

1778

*p++ = ';';

1779

break;

1780

case '<': case '>':

1781

*p++ = '&';

1782

*p++ = (*s == '<' ? 'l' : 'g');

1783

*p++ = 't';

1784

*p++ = ';';

1785

break;

1786

case '\"':

1787

*p++ = '&';

1788

*p++ = 'q';

1789

*p++ = 'u';

1790

*p++ = 'o';

1791

*p++ = 't';

1792

*p++ = ';';

1793

break;

1794

case ' ':

1795

*p++ = '&';

1796

*p++ = '#';

1797

*p++ = '3';

1798

*p++ = '2';

1799

*p++ = ';';

1800

break;

1801

default:

1802

*p++ = *s;

1803

}

1804

}

1805

*p = '\0';

1806

return res;

1807

}

1808

1809

/* Determine the width of the terminal we're running on. If that's

1810

not possible, return 0. */

1811

1812

int

1813

determine_screen_width (void)

1814

{

1815

/* If there's a way to get the terminal size using POSIX

1816

tcgetattr(), somebody please tell me. */

1817

#ifndef TIOCGWINSZ

1818

return 0;

1819

#else /* TIOCGWINSZ */

1820

int fd;

1821

struct winsize wsz;

1822

1823

if (opt.lfilename != NULL)

1824

return 0;

1825

1826

fd = fileno (stderr);

1827

if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)

1828

return 0; /* most likely ENOTTY */

1829

1830

return wsz.ws_col;

1831

#endif /* TIOCGWINSZ */

1832

}

1833

1834

/* Return a random number between 0 and MAX-1, inclusive.

1835

1836

If MAX is greater than the value of RAND_MAX+1 on the system, the

1837

returned value will be in the range [0, RAND_MAX]. This may be

1838

fixed in a future release.

1839

1840

The random number generator is seeded automatically the first time

1841

it is called.

1842

1843

This uses rand() for portability. It has been suggested that

1844

random() offers better randomness, but this is not required for

1845

Wget, so I chose to go for simplicity and use rand

1846

unconditionally.

1847

1848

DO NOT use this for cryptographic purposes. It is only meant to be

1849

used in situations where quality of the random numbers returned

1850

doesn't really matter. */

1851

1852

int

1853

random_number (int max)

1854

{

1855

static int seeded;

1856

double bounded;

1857

int rnd;

1858

1859

if (!seeded)

1860

{

1861

srand (time (NULL));

1862

seeded = 1;

1863

}

1864

rnd = rand ();

1865

1866

/* On systems that don't define RAND_MAX, assume it to be 2**15 - 1,

1867

and enforce that assumption by masking other bits. */

1868

#ifndef RAND_MAX

1869

# define RAND_MAX 32767

1870

rnd &= RAND_MAX;

1871

#endif

1872

1873

/* This is equivalent to rand() % max, but uses the high-order bits

1874

for better randomness on architecture where rand() is implemented

1875

using a simple congruential generator. */

1876

1877

bounded = (double)max * rnd / (RAND_MAX + 1.0);

1878

return (int)bounded;

1879

}

1880

1881

/* Return a random uniformly distributed floating point number in the

1882

[0, 1) range. The precision of returned numbers is 9 digits.

1883

1884

Modify this to use erand48() where available! */

1885

1886

double

1887

random_float (void)

1888

{

1889

/* We can't rely on any specific value of RAND_MAX, but I'm pretty

1890

sure it's greater than 1000. */

1891

int rnd1 = random_number (1000);

1892

int rnd2 = random_number (1000);

1893

int rnd3 = random_number (1000);

1894

return rnd1 / 1000.0 + rnd2 / 1000000.0 + rnd3 / 1000000000.0;

1895

}

1896

1897

#if 0

1898

/* A debugging function for checking whether an MD5 library works. */

1899

1900

#include "gen-md5.h"

1901

1902

char *

1903

debug_test_md5 (char *buf)

1904

{

1905

unsigned char raw[16];

1906

static char res[33];

1907

unsigned char *p1;

1908

char *p2;

1909

int cnt;

1910

ALLOCA_MD5_CONTEXT (ctx);

1911

1912

gen_md5_init (ctx);

1913

gen_md5_update ((unsigned char *)buf, strlen (buf), ctx);

1914

gen_md5_finish (ctx, raw);

1915

1916

p1 = raw;

1917

p2 = res;

1918

cnt = 16;

1919

while (cnt--)

1920

{

1921

*p2++ = XNUM_TO_digit (*p1 >> 4);

1922

*p2++ = XNUM_TO_digit (*p1 & 0xf);

1923

++p1;

1924

}

1925

*p2 = '\0';

1926

1927

return res;

1928

}

1929

#endif

1930

1931

/* Implementation of run_with_timeout, a generic timeout-forcing

1932

routine for systems with Unix-like signal handling. */

1933

1934

#ifdef USE_SIGNAL_TIMEOUT

1935

# ifdef HAVE_SIGSETJMP

1936

# define SETJMP(env) sigsetjmp (env, 1)

1937

1938

static sigjmp_buf run_with_timeout_env;

1939

1940

static RETSIGTYPE

1941

abort_run_with_timeout (int sig)

1942

{

1943

assert (sig == SIGALRM);

1944

siglongjmp (run_with_timeout_env, -1);

1945

}

1946

# else /* not HAVE_SIGSETJMP */

1947

# define SETJMP(env) setjmp (env)

1948

1949

static jmp_buf run_with_timeout_env;

1950

1951

static RETSIGTYPE

1952

abort_run_with_timeout (int sig)

1953

{

1954

assert (sig == SIGALRM);

1955

/* We don't have siglongjmp to preserve the set of blocked signals;

1956

if we longjumped out of the handler at this point, SIGALRM would

1957

remain blocked. We must unblock it manually. */

1958

int mask = siggetmask ();

1959

mask &= ~sigmask (SIGALRM);

1960

sigsetmask (mask);

1961

1962

/* Now it's safe to longjump. */

1963

longjmp (run_with_timeout_env, -1);

1964

}

1965

# endif /* not HAVE_SIGSETJMP */

1966

1967

/* Arrange for SIGALRM to be delivered in TIMEOUT seconds. This uses

1968

setitimer where available, alarm otherwise.

1969

1970

TIMEOUT should be non-zero. If the timeout value is so small that

1971

it would be rounded to zero, it is rounded to the least legal value

1972

instead (1us for setitimer, 1s for alarm). That ensures that

1973

SIGALRM will be delivered in all cases. */

1974

1975

static void

1976

alarm_set (double timeout)

1977

{

1978

#ifdef ITIMER_REAL

1979

/* Use the modern itimer interface. */

1980

struct itimerval itv;

1981

memset (&itv, 0, sizeof (itv));

1982

itv.it_value.tv_sec = (long) timeout;

1983

itv.it_value.tv_usec = 1000000L * (timeout - (long)timeout);

1984

if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)

1985

/* Ensure that we wait for at least the minimum interval.

1986

Specifying zero would mean "wait forever". */

1987

itv.it_value.tv_usec = 1;

1988

setitimer (ITIMER_REAL, &itv, NULL);

1989

#else /* not ITIMER_REAL */

1990

/* Use the old alarm() interface. */

1991

int secs = (int) timeout;

1992

if (secs == 0)

1993

/* Round TIMEOUTs smaller than 1 to 1, not to zero. This is

1994

because alarm(0) means "never deliver the alarm", i.e. "wait

1995

forever", which is not what someone who specifies a 0.5s

1996

timeout would expect. */

1997

secs = 1;

1998

alarm (secs);

1999

#endif /* not ITIMER_REAL */

2000

}

2001

2002

/* Cancel the alarm set with alarm_set. */

2003

2004

static void

2005

alarm_cancel (void)

2006

{

2007

#ifdef ITIMER_REAL

2008

struct itimerval disable;

2009

memset (&disable, 0, sizeof (disable));

2010

setitimer (ITIMER_REAL, &disable, NULL);

2011

#else /* not ITIMER_REAL */

2012

alarm (0);

2013

#endif /* not ITIMER_REAL */

2014

}

2015

2016

/* Call FUN(ARG), but don't allow it to run for more than TIMEOUT

2017

seconds. Returns non-zero if the function was interrupted with a

2018

timeout, zero otherwise.

2019

2020

This works by setting up SIGALRM to be delivered in TIMEOUT seconds

2021

using setitimer() or alarm(). The timeout is enforced by

2022

longjumping out of the SIGALRM handler. This has several

2023

advantages compared to the traditional approach of relying on

2024

signals causing system calls to exit with EINTR:

2025

2026

* The callback function is *forcibly* interrupted after the

2027

timeout expires, (almost) regardless of what it was doing and

2028

whether it was in a syscall. For example, a calculation that

2029

takes a long time is interrupted as reliably as an IO

2030

operation.

2031

2032

* It works with both SYSV and BSD signals because it doesn't

2033

depend on the default setting of SA_RESTART.

2034

2035

* It doesn't special handler setup beyond a simple call to

2036

signal(). (It does use sigsetjmp/siglongjmp, but they're

2037

optional.)

2038

2039

The only downside is that, if FUN allocates internal resources that

2040

are normally freed prior to exit from the functions, they will be

2041

lost in case of timeout. */

2042

2043

int

2044

run_with_timeout (double timeout, void (*fun) (void *), void *arg)

2045

{

2046

int saved_errno;

2047

2048

if (timeout == 0)

2049

{

2050

fun (arg);

2051

return 0;

2052

}

2053

2054

signal (SIGALRM, abort_run_with_timeout);

2055

if (SETJMP (run_with_timeout_env) != 0)

2056

{

2057

/* Longjumped out of FUN with a timeout. */

2058

signal (SIGALRM, SIG_DFL);

2059

return 1;

2060

}

2061

alarm_set (timeout);

2062

fun (arg);

2063

2064

/* Preserve errno in case alarm() or signal() modifies it. */

2065

saved_errno = errno;

2066

alarm_cancel ();

2067

signal (SIGALRM, SIG_DFL);

2068

errno = saved_errno;

2069

2070

return 0;

2071

}

2072

2073

#else /* not USE_SIGNAL_TIMEOUT */

2074

2075

#ifndef WINDOWS

2076

/* A stub version of run_with_timeout that just calls FUN(ARG). Don't

2077

define it under Windows, because Windows has its own version of

2078

run_with_timeout that uses threads. */

2079

2080

int

2081

run_with_timeout (double timeout, void (*fun) (void *), void *arg)

2082

{

2083

fun (arg);

2084

return 0;

2085

}

2086

#endif /* not WINDOWS */

2087

#endif /* not USE_SIGNAL_TIMEOUT */

Older »