1
/* Licensed to the Apache Software Foundation (ASF) under one or more
2
* contributor license agreements. See the NOTICE file distributed with
3
* this work for additional information regarding copyright ownership.
4
* The ASF licenses this file to You under the Apache License, Version 2.0
5
* (the "License"); you may not use this file except in compliance with
6
* the License. You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
18
* htcacheclean.c: simple program for cleaning of
19
* the disk cache of the Apache HTTP server
21
* Contributed by Andreas Steinmetz <ast domdv.de>
27
#include "apr_strings.h"
28
#include "apr_file_io.h"
29
#include "apr_file_info.h"
30
#include "apr_pools.h"
32
#include "apr_thread_proc.h"
33
#include "apr_signal.h"
34
#include "apr_getopt.h"
37
#include "../modules/cache/mod_disk_cache.h"
46
/* define the following for debugging */
50
* Note: on Linux delays <= 2ms are busy waits without
51
* scheduling, so never use a delay <= 2ms below
54
#define NICE_DELAY 10000 /* usecs */
55
#define DELETE_NICE 10 /* be nice after this amount of delete ops */
56
#define STAT_ATTEMPTS 10 /* maximum stat attempts for a file */
57
#define STAT_DELAY 5000 /* usecs */
58
#define HEADER 1 /* headers file */
59
#define DATA 2 /* body file */
60
#define TEMP 4 /* temporary file */
61
#define HEADERDATA (HEADER|DATA)
62
#define MAXDEVIATION 3600 /* secs */
63
#define SECS_PER_MIN 60
66
#define GBYTE 1073741824
68
#define DIRINFO (APR_FINFO_MTIME|APR_FINFO_SIZE|APR_FINFO_TYPE|APR_FINFO_LINK)
70
typedef struct _direntry {
71
APR_RING_ENTRY(_direntry) link;
72
int type; /* type of file/fileset: TEMP, HEADER, DATA, HEADERDATA */
73
apr_time_t htime; /* headers file modification time */
74
apr_time_t dtime; /* body file modification time */
75
apr_off_t hsize; /* headers file size */
76
apr_off_t dsize; /* body or temporary file size */
77
char *basename; /* file/fileset base name */
80
typedef struct _entry {
81
APR_RING_ENTRY(_entry) link;
82
apr_time_t expire; /* cache entry exiration time */
83
apr_time_t response_time; /* cache entry time of last response to client */
84
apr_time_t htime; /* headers file modification time */
85
apr_time_t dtime; /* body file modification time */
86
apr_off_t hsize; /* headers file size */
87
apr_off_t dsize; /* body or temporary file size */
88
char *basename; /* fileset base name */
92
static int delcount; /* file deletion count for nice mode */
93
static int interrupted; /* flag: true if SIGINT or SIGTERM occurred */
94
static int realclean; /* flag: true means user said apache is not running */
95
static int verbose; /* flag: true means print statistics */
96
static int benice; /* flag: true means nice mode is activated */
97
static int dryrun; /* flag: true means dry run, don't actually delete
99
static int deldirs; /* flag: true means directories should be deleted */
100
static int baselen; /* string length of the path to the proxy directory */
101
static apr_time_t now; /* start time of this processing run */
103
static apr_file_t *errfile; /* stderr file handle */
104
static apr_off_t unsolicited; /* file size summary for deleted unsolicited
106
static APR_RING_ENTRY(_entry) root; /* ENTRY ring anchor */
108
/* short program name as called */
109
static const char *shortname = "htcacheclean";
113
* fake delete for debug purposes
115
#define apr_file_remove fake_file_remove
116
static void fake_file_remove(char *pathname, apr_pool_t *p)
120
/* stat and printing to simulate some deletion system load and to
121
display what would actually have happened */
122
apr_stat(&info, pathname, DIRINFO, p);
123
apr_file_printf(errfile, "would delete %s" APR_EOL_STR, pathname);
128
* called on SIGINT or SIGTERM
130
static void setterm(int unused)
133
apr_file_printf(errfile, "interrupt" APR_EOL_STR);
139
* called in out of memory condition
141
static int oom(int unused)
143
static int called = 0;
145
/* be careful to call exit() only once */
154
* print purge statistics
156
static void printstats(apr_off_t total, apr_off_t sum, apr_off_t max,
157
apr_off_t etotal, apr_off_t entries)
159
char ttype, stype, mtype, utype;
160
apr_off_t tfrag, sfrag, ufrag;
167
tfrag = ((total * 10) / KBYTE) % 10;
169
if (total >= KBYTE) {
171
tfrag = ((total * 10) / KBYTE) % 10;
176
sfrag = ((sum * 10) / KBYTE) % 10;
180
sfrag = ((sum * 10) / KBYTE) % 10;
191
apr_file_printf(errfile, "Statistics:" APR_EOL_STR);
194
ufrag = ((unsolicited * 10) / KBYTE) % 10;
195
unsolicited /= KBYTE;
196
if (unsolicited >= KBYTE) {
198
ufrag = ((unsolicited * 10) / KBYTE) % 10;
199
unsolicited /= KBYTE;
201
if (!unsolicited && !ufrag) {
204
apr_file_printf(errfile, "unsolicited size %d.%d%c" APR_EOL_STR,
205
(int)(unsolicited), (int)(ufrag), utype);
207
apr_file_printf(errfile, "size limit %d.0%c" APR_EOL_STR,
209
apr_file_printf(errfile, "total size was %d.%d%c, total size now "
210
"%d.%d%c" APR_EOL_STR,
211
(int)(total), (int)(tfrag), ttype, (int)(sum),
212
(int)(sfrag), stype);
213
apr_file_printf(errfile, "total entries was %d, total entries now %d"
214
APR_EOL_STR, (int)(etotal), (int)(entries));
218
* delete a single file
220
static void delete_file(char *path, char *basename, apr_pool_t *pool)
229
/* temp pool, otherwise lots of memory could be allocated */
230
apr_pool_create(&p, pool);
231
nextpath = apr_pstrcat(p, path, "/", basename, NULL);
232
apr_file_remove(nextpath, p);
236
if (++delcount >= DELETE_NICE) {
237
apr_sleep(NICE_DELAY);
244
* delete cache file set
246
static void delete_entry(char *path, char *basename, apr_pool_t *pool)
255
/* temp pool, otherwise lots of memory could be allocated */
256
apr_pool_create(&p, pool);
258
nextpath = apr_pstrcat(p, path, "/", basename, CACHE_HEADER_SUFFIX, NULL);
259
apr_file_remove(nextpath, p);
261
nextpath = apr_pstrcat(p, path, "/", basename, CACHE_DATA_SUFFIX, NULL);
262
apr_file_remove(nextpath, p);
268
if (delcount >= DELETE_NICE) {
269
apr_sleep(NICE_DELAY);
276
* walk the cache directory tree
278
static int process_dir(char *path, apr_pool_t *pool)
288
apr_time_t current, deviation;
289
char *nextpath, *base, *ext, *orig_basename;
290
APR_RING_ENTRY(_direntry) anchor;
294
disk_cache_info_t disk_info;
296
APR_RING_INIT(&anchor, _direntry, link);
297
apr_pool_create(&p, pool);
298
h = apr_hash_make(p);
301
deviation = MAXDEVIATION * APR_USEC_PER_SEC;
303
if (apr_dir_open(&dir, path, p) != APR_SUCCESS) {
307
while (apr_dir_read(&info, 0, dir) == APR_SUCCESS && !interrupted) {
308
if (!strcmp(info.name, ".") || !strcmp(info.name, "..")) {
311
d = apr_pcalloc(p, sizeof(DIRENTRY));
312
d->basename = apr_pstrcat(p, path, "/", info.name, NULL);
313
APR_RING_INSERT_TAIL(&anchor, d, _direntry, link);
324
for (d = APR_RING_FIRST(&anchor);
325
!interrupted && d != APR_RING_SENTINEL(&anchor, _direntry, link);
327
n = APR_RING_NEXT(d, link);
328
base = strrchr(d->basename, '/');
332
ext = strchr(base, '.');
334
/* there may be temporary files which may be gone before
335
* processing, always skip these if not in realclean mode
337
if (!ext && !realclean) {
338
if (!strncasecmp(base, AP_TEMPFILE_BASE, AP_TEMPFILE_BASELEN)
339
&& strlen(base) == AP_TEMPFILE_NAMELEN) {
344
/* this may look strange but apr_stat() may return errno which
345
* is system dependent and there may be transient failures,
346
* so just blindly retry for a short while
348
retries = STAT_ATTEMPTS;
349
status = APR_SUCCESS;
351
if (status != APR_SUCCESS) {
352
apr_sleep(STAT_DELAY);
354
status = apr_stat(&info, d->basename, DIRINFO, p);
355
} while (status != APR_SUCCESS && !interrupted && --retries);
357
/* what may happen here is that apache did create a file which
358
* we did detect but then does delete the file before we can
359
* get file information, so if we don't get any file information
360
* we will ignore the file in this case
362
if (status != APR_SUCCESS) {
363
if (!realclean && !interrupted) {
369
if (info.filetype == APR_DIR) {
370
/* Make a copy of the basename, as process_dir modifies it */
371
orig_basename = apr_pstrdup(pool, d->basename);
372
if (process_dir(d->basename, pool)) {
376
/* If asked to delete dirs, do so now. We don't care if it fails.
377
* If it fails, it likely means there was something else there.
379
if (deldirs && !dryrun) {
380
apr_dir_remove(orig_basename, pool);
385
if (info.filetype != APR_REG) {
390
if (!strncasecmp(base, AP_TEMPFILE_BASE, AP_TEMPFILE_BASELEN)
391
&& strlen(base) == AP_TEMPFILE_NAMELEN) {
394
d->dsize = info.size;
395
apr_hash_set(h, d->basename, APR_HASH_KEY_STRING, d);
400
if (!strcasecmp(ext, CACHE_HEADER_SUFFIX)) {
403
/* if a user manually creates a '.header' file */
404
if (d->basename[0] == '\0') {
407
t = apr_hash_get(h, d->basename, APR_HASH_KEY_STRING);
412
d->htime = info.mtime;
413
d->hsize = info.size;
414
apr_hash_set(h, d->basename, APR_HASH_KEY_STRING, d);
418
if (!strcasecmp(ext, CACHE_DATA_SUFFIX)) {
421
/* if a user manually creates a '.data' file */
422
if (d->basename[0] == '\0') {
425
t = apr_hash_get(h, d->basename, APR_HASH_KEY_STRING);
430
d->dtime = info.mtime;
431
d->dsize = info.size;
432
apr_hash_set(h, d->basename, APR_HASH_KEY_STRING, d);
440
path[baselen] = '\0';
442
for (i = apr_hash_first(p, h); i && !interrupted; i = apr_hash_next(i)) {
446
apr_hash_this(i, NULL, NULL, &hvalue);
451
nextpath = apr_pstrcat(p, path, "/", d->basename,
452
CACHE_HEADER_SUFFIX, NULL);
453
if (apr_file_open(&fd, nextpath, APR_FOPEN_READ | APR_FOPEN_BINARY,
454
APR_OS_DEFAULT, p) == APR_SUCCESS) {
455
len = sizeof(format);
456
if (apr_file_read_full(fd, &format, len,
457
&len) == APR_SUCCESS) {
458
if (format == DISK_FORMAT_VERSION) {
459
apr_off_t offset = 0;
461
apr_file_seek(fd, APR_SET, &offset);
463
len = sizeof(disk_cache_info_t);
465
if (apr_file_read_full(fd, &disk_info, len,
466
&len) == APR_SUCCESS) {
468
e = apr_palloc(pool, sizeof(ENTRY));
469
APR_RING_INSERT_TAIL(&root, e, _entry, link);
470
e->expire = disk_info.expire;
471
e->response_time = disk_info.response_time;
476
e->basename = apr_palloc(pool,
477
strlen(d->basename) + 1);
478
strcpy(e->basename, d->basename);
485
else if (format == VARY_FORMAT_VERSION) {
486
/* This must be a URL that added Vary headers later,
487
* so kill the orphaned .data file
490
apr_file_remove(apr_pstrcat(p, path, "/", d->basename,
491
CACHE_DATA_SUFFIX, NULL),
500
/* we have a somehow unreadable headers file which is associated
501
* with a data file. this may be caused by apache currently
502
* rewriting the headers file. thus we may delete the file set
503
* either in realclean mode or if the headers file modification
504
* timestamp is not within a specified positive or negative offset
505
* to the current time.
507
current = apr_time_now();
508
if (realclean || d->htime < current - deviation
509
|| d->htime > current + deviation) {
510
delete_entry(path, d->basename, p);
511
unsolicited += d->hsize;
512
unsolicited += d->dsize;
516
/* single data and header files may be deleted either in realclean
517
* mode or if their modification timestamp is not within a
518
* specified positive or negative offset to the current time.
519
* this handling is necessary due to possible race conditions
520
* between apache and this process
523
current = apr_time_now();
524
nextpath = apr_pstrcat(p, path, "/", d->basename,
525
CACHE_HEADER_SUFFIX, NULL);
526
if (apr_file_open(&fd, nextpath, APR_FOPEN_READ | APR_FOPEN_BINARY,
527
APR_OS_DEFAULT, p) == APR_SUCCESS) {
528
len = sizeof(format);
529
if (apr_file_read_full(fd, &format, len,
530
&len) == APR_SUCCESS) {
531
if (format == VARY_FORMAT_VERSION) {
534
len = sizeof(expires);
536
apr_file_read_full(fd, &expires, len, &len);
540
if (expires < current) {
541
delete_entry(path, d->basename, p);
549
if (realclean || d->htime < current - deviation
550
|| d->htime > current + deviation) {
551
delete_entry(path, d->basename, p);
552
unsolicited += d->hsize;
557
current = apr_time_now();
558
if (realclean || d->dtime < current - deviation
559
|| d->dtime > current + deviation) {
560
delete_entry(path, d->basename, p);
561
unsolicited += d->dsize;
565
/* temp files may only be deleted in realclean mode which
566
* is asserted above if a tempfile is in the hash array
569
delete_file(path, d->basename, p);
570
unsolicited += d->dsize;
582
apr_sleep(NICE_DELAY);
593
* purge cache entries
595
static void purge(char *path, apr_pool_t *pool, apr_off_t max)
597
apr_off_t sum, total, entries, etotal;
598
ENTRY *e, *n, *oldest;
603
for (e = APR_RING_FIRST(&root);
604
e != APR_RING_SENTINEL(&root, _entry, link);
605
e = APR_RING_NEXT(e, link)) {
615
printstats(total, sum, max, etotal, entries);
619
/* process all entries with a timestamp in the future, this may
620
* happen if a wrong system time is corrected
623
for (e = APR_RING_FIRST(&root);
624
e != APR_RING_SENTINEL(&root, _entry, link) && !interrupted;) {
625
n = APR_RING_NEXT(e, link);
626
if (e->response_time > now || e->htime > now || e->dtime > now) {
627
delete_entry(path, e->basename, pool);
631
APR_RING_REMOVE(e, link);
634
printstats(total, sum, max, etotal, entries);
646
/* process all entries with are expired */
647
for (e = APR_RING_FIRST(&root);
648
e != APR_RING_SENTINEL(&root, _entry, link) && !interrupted;) {
649
n = APR_RING_NEXT(e, link);
650
if (e->expire != APR_DATE_BAD && e->expire < now) {
651
delete_entry(path, e->basename, pool);
655
APR_RING_REMOVE(e, link);
658
printstats(total, sum, max, etotal, entries);
670
/* process remaining entries oldest to newest, the check for an emtpy
671
* ring actually isn't necessary except when the compiler does
672
* corrupt 64bit arithmetics which happend to me once, so better safe
675
while (sum > max && !interrupted && !APR_RING_EMPTY(&root, _entry, link)) {
676
oldest = APR_RING_FIRST(&root);
678
for (e = APR_RING_NEXT(oldest, link);
679
e != APR_RING_SENTINEL(&root, _entry, link);
680
e = APR_RING_NEXT(e, link)) {
681
if (e->dtime < oldest->dtime) {
686
delete_entry(path, oldest->basename, pool);
687
sum -= oldest->hsize;
688
sum -= oldest->dsize;
690
APR_RING_REMOVE(oldest, link);
694
printstats(total, sum, max, etotal, entries);
701
#define NL APR_EOL_STR
702
static void usage(void)
704
apr_file_printf(errfile,
705
"%s -- program for cleaning the disk cache." NL
706
"Usage: %s [-Dvtrn] -pPATH -lLIMIT" NL
707
" %s [-nti] -dINTERVAL -pPATH -lLIMIT" NL
710
" -d Daemonize and repeat cache cleaning every INTERVAL minutes." NL
711
" This option is mutually exclusive with the -D, -v and -r" NL
714
" -D Do a dry run and don't delete anything. This option is mutually" NL
715
" exclusive with the -d option." NL
717
" -v Be verbose and print statistics. This option is mutually" NL
718
" exclusive with the -d option." NL
720
" -r Clean thoroughly. This assumes that the Apache web server is " NL
721
" not running. This option is mutually exclusive with the -d" NL
722
" option and implies -t." NL
724
" -n Be nice. This causes slower processing in favour of other" NL
727
" -t Delete all empty directories. By default only cache files are" NL
728
" removed, however with some configurations the large number of" NL
729
" directories created may require attention." NL
731
" -p Specify PATH as the root directory of the disk cache." NL
733
" -l Specify LIMIT as the total disk cache size limit. Attach 'K'" NL
734
" or 'M' to the number for specifying KBytes or MBytes." NL
736
" -i Be intelligent and run only when there was a modification of" NL
737
" the disk cache. This option is only possible together with the" NL
751
int main(int argc, const char * const argv[])
754
apr_time_t current, repeat, delay, previous;
756
apr_pool_t *pool, *instance;
759
int retries, isdaemon, limit_found, intelligent, dowork;
762
char *proxypath, *path;
775
previous = 0; /* avoid compiler warning */
778
if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) {
781
atexit(apr_terminate);
784
shortname = apr_filepath_name_get(argv[0]);
787
if (apr_pool_create(&pool, NULL) != APR_SUCCESS) {
790
apr_pool_abort_set(oom, pool);
791
apr_file_open_stderr(&errfile, pool);
792
apr_signal(SIGINT, setterm);
793
apr_signal(SIGTERM, setterm);
795
apr_getopt_init(&o, pool, argc, argv);
798
status = apr_getopt(o, "iDnvrtd:l:L:p:", &opt, &arg);
799
if (status == APR_EOF) {
802
else if (status != APR_SUCCESS) {
855
repeat = apr_atoi64(arg);
856
repeat *= SECS_PER_MIN;
857
repeat *= APR_USEC_PER_SEC;
870
rv = apr_strtoff(&max, arg, &end, 10);
871
if (rv == APR_SUCCESS) {
872
if ((*end == 'K' || *end == 'k') && !end[1]) {
875
else if ((*end == 'M' || *end == 'm') && !end[1]) {
878
else if ((*end == 'G' || *end == 'g') && !end[1]) {
881
else if (*end && /* neither empty nor [Bb] */
882
((*end != 'B' && *end != 'b') || end[1])) {
886
if (rv != APR_SUCCESS) {
887
apr_file_printf(errfile, "Invalid limit: %s"
888
APR_EOL_STR APR_EOL_STR, arg);
898
proxypath = apr_pstrdup(pool, arg);
899
if (apr_filepath_set(proxypath, pool) != APR_SUCCESS) {
907
if (o->ind != argc) {
911
if (isdaemon && (repeat <= 0 || verbose || realclean || dryrun)) {
915
if (!isdaemon && intelligent) {
919
if (!proxypath || max <= 0) {
923
if (apr_filepath_get(&path, 0, pool) != APR_SUCCESS) {
926
baselen = strlen(path);
930
apr_file_close(errfile);
931
apr_proc_detach(APR_PROC_DETACH_DAEMONIZE);
936
apr_pool_create(&instance, pool);
938
now = apr_time_now();
939
APR_RING_INIT(&root, _entry, link);
944
switch (intelligent) {
950
retries = STAT_ATTEMPTS;
951
status = APR_SUCCESS;
954
if (status != APR_SUCCESS) {
955
apr_sleep(STAT_DELAY);
957
status = apr_stat(&info, path, APR_FINFO_MTIME, instance);
958
} while (status != APR_SUCCESS && !interrupted && --retries);
960
if (status == APR_SUCCESS) {
961
previous = info.mtime;
968
retries = STAT_ATTEMPTS;
969
status = APR_SUCCESS;
972
if (status != APR_SUCCESS) {
973
apr_sleep(STAT_DELAY);
975
status = apr_stat(&info, path, APR_FINFO_MTIME, instance);
976
} while (status != APR_SUCCESS && !interrupted && --retries);
978
if (status == APR_SUCCESS) {
979
if (previous != info.mtime) {
982
previous = info.mtime;
990
if (dowork && !interrupted) {
991
if (!process_dir(path, instance) && !interrupted) {
992
purge(path, instance, max);
994
else if (!isdaemon && !interrupted) {
995
apr_file_printf(errfile, "An error occurred, cache cleaning "
996
"aborted." APR_EOL_STR);
1000
if (intelligent && !interrupted) {
1001
retries = STAT_ATTEMPTS;
1002
status = APR_SUCCESS;
1004
if (status != APR_SUCCESS) {
1005
apr_sleep(STAT_DELAY);
1007
status = apr_stat(&info, path, APR_FINFO_MTIME, instance);
1008
} while (status != APR_SUCCESS && !interrupted && --retries);
1010
if (status == APR_SUCCESS) {
1011
previous = info.mtime;
1020
apr_pool_destroy(instance);
1022
current = apr_time_now();
1023
if (current < now) {
1026
else if (current - now >= repeat) {
1030
delay = now + repeat - current;
1033
/* we can't sleep the whole delay time here apiece as this is racy
1034
* with respect to interrupt delivery - think about what happens
1035
* if we have tested for an interrupt, then get scheduled
1036
* before the apr_sleep() call and while waiting for the cpu
1037
* we do get an interrupt
1040
while (delay && !interrupted) {
1041
if (delay > APR_USEC_PER_SEC) {
1042
apr_sleep(APR_USEC_PER_SEC);
1043
delay -= APR_USEC_PER_SEC;
1051
} while (isdaemon && !interrupted);
1053
if (!isdaemon && interrupted) {
1054
apr_file_printf(errfile, "Cache cleaning aborted due to user "
1055
"request." APR_EOL_STR);