2
* (C) Copyright 2008 Intel Corporation
4
* Author: Arjan van de Ven <arjan@linux.intel.com>
6
* This program is free software; you can redistribute it and/or
7
* modify it under the terms of the GNU General Public License
8
* as published by the Free Software Foundation; version 2
17
#include <sys/types.h>
20
#include <sys/times.h>
25
#include <sys/types.h>
27
#include <sys/syscall.h>
28
#include <sys/mount.h>
29
#include <sys/signal.h>
40
# define __NR_ioprio_set 289
41
#elif defined(__x86_64__)
43
# define __NR_ioprio_set 251
45
# warn "Architecture does not support ioprio modification"
47
#define IOPRIO_WHO_PROCESS 1
48
#define IOPRIO_CLASS_IDLE 3
49
#define IOPRIO_CLASS_SHIFT 13
50
#define IOPRIO_IDLE_LOWEST (7 | (IOPRIO_CLASS_IDLE << IOPRIO_CLASS_SHIFT))
52
#define PACK_PATH "/var/lib/sreadahead"
53
#define DEBUGFS_MNT "/var/lib/sreadahead/debugfs"
54
#define PACK_FILE "/var/lib/sreadahead/pack"
56
#define MAXR 40000 /* trace file can be long */
58
#define MAXRECS 6 /* reduce nr of fragments to this amount */
61
* By default, the kernel reads ahead for 128kb. This throws off our
62
* measurements since we don't need the extra 128kb for each file.
63
* On top of that, at the accelerated boot, we would be reading another
64
* 128kb too much potentially, wasting a lot of time.
66
* By lowering the read_ahead_kb, we get more fragments (since they
67
* are not glued together by the artifical kernel readahead). So
68
* lowering this number too much doesn't actually gain much.
70
* XX kb seems to be a good balance with not too many fragments, but
71
* keeping the total size low enough to make a difference.
73
* 8-16kb seems to be a good median value, with good total size savings
74
* over anything higher. Lower sizes result in more separate blocks
75
* and only minimal total size savings.
77
#define RA_NORMAL 128 /* default read_ahead_kb size */
78
#define RA_SMALL 16 /* our tuned down value */
85
/* disk format used, when reading pack */
88
struct ra_record data[MAXRECS];
91
/* memory format used with sorting/filtering */
94
struct ra_record data[MAXRECS];
95
struct ra_struct *next;
96
struct ra_struct *prev;
100
static struct ra_struct *ra[MAXR];
101
static struct ra_disk rd[MAXR];
102
static struct ra_struct *first_ra;
103
static int racount = 0;
104
static int rdcount = 0;
105
static int fcount = 0;
106
static int rdsize = 0;
108
static unsigned int total_files = 0;
109
static unsigned int cursor = 0;
111
static int debug = 0;
114
static void readahead_set_len(int size)
119
/* changes readahead size to "size" for local block devices */
121
unmount = chdir("/sys/block");
123
if (mount("sysfs", "/sys", "sysfs", 0, NULL) != 0) {
124
perror("Unable to mount sysfs\n");
131
sprintf(ractl, "sda/queue/read_ahead_kb");
133
/* check first 4 sata discs */
134
FILE *file = fopen(ractl, "w");
136
fprintf(file, "%d", size);
139
ractl[2]++; /* a -> b, etc */
149
static void readahead_one(int index)
155
fd = open(rd[index].filename, O_RDONLY|O_NOATIME);
157
fd = open(rd[index].filename, O_RDONLY);
159
fprintf(stderr, "%s: open failed (%s)\n",
160
rd[index].filename, strerror_r(errno, buf, sizeof buf));
164
for (i = 0; i < MAXRECS; i++) {
165
if (rd[index].data[i].len)
166
readahead(fd, rd[index].data[i].offset,
167
rd[index].data[i].len);
172
static void *one_thread(void *ptr)
177
mine = __sync_fetch_and_add(&cursor, 1);
178
if (mine < total_files)
186
static void sort_ra_by_name(void)
193
for (i = 0; i < racount - 1; i++) {
196
c = strcmp(ra[i]->filename, ra[i+1]->filename);
198
struct ra_struct *tmp;
208
static void remove_dupes(void)
213
for (i = 0; i < racount - 1; i++) {
214
for (j = i + 1; j < racount; j++) {
218
if (strcmp(ra[i]->filename, ra[j]->filename) != 0) {
223
ra[j]->next->prev = ra[j]->prev;
225
ra[j]->prev->next = ra[j]->next;
232
static int smallest_gap(struct ra_record *record, int count)
237
maxgap = 1024*1024*512;
239
for (i = 0; i < count; i++, record++) {
240
if ((i + 1) < count) {
242
gap = (record + 1)->offset - record->offset - record->len;
252
static int merge_record(struct ra_record *record, int count, int to_merge)
254
record[to_merge].len = record[to_merge+1].offset
255
+ record[to_merge+1].len - record[to_merge].offset;
256
memcpy(&record[to_merge+1], &record[to_merge+2],
257
sizeof(struct ra_record) * (count-to_merge - 2));
261
static int reduce_blocks(struct ra_record *record, int count, int target)
263
while (count > target) {
265
tomerge = smallest_gap(record, count);
266
count = merge_record(record, count, tomerge);
271
static int get_blocks(struct ra_struct *r)
277
unsigned char *mincorebuf;
278
struct ra_record record[4096];
289
memset(record, 0, sizeof(record));
291
file = fopen(r->filename, "r");
297
mmapptr = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0);
299
mincorebuf = malloc(statbuf.st_size/4096 + 1);
300
mincore(mmapptr, statbuf.st_size, mincorebuf);
309
for (i = 0; i <= statbuf.st_size; i += 4096) {
310
if (mincorebuf[i / 4096])
314
if (phase == 1 && !mincorebuf[i / 4096]) {
316
if (i > statbuf.st_size)
317
i = statbuf.st_size + 1;
318
record[rcount].offset = start;
319
record[rcount].len = i - 1 - start;
321
if (rcount >= 4000) rcount = 4000;
322
} else if (phase == 0 && mincorebuf[i / 4096]) {
329
if (i > statbuf.st_size)
330
i = statbuf.st_size + 1;
331
record[rcount].offset = start;
332
record[rcount].len = i - 1 - start;
337
munmap(mmapptr, statbuf.st_size);
340
rcount = reduce_blocks(record, rcount, MAXRECS);
342
/* some empty files slip through */
343
if (record[0].len == 0)
349
while (tc < rcount) {
350
tlen += record[tc].len;
354
rdsize += (tlen <= 0 ? 1024 : tlen);
355
printf("%s: %d fragment(s), %dkb, %3.1f%%\n",
357
(tlen <= 1024 ? 1024 : tlen) / 1024,
358
100.0 * there / (there + notthere));
361
memcpy(r->data, record, sizeof(r->data));
369
static void get_ra_blocks(void)
371
struct ra_struct *r = first_ra;
374
if (!get_blocks(r)) {
375
/* no blocks, remove from list */
377
r->next->prev = r->prev;
379
r->prev->next = r->next;
385
static void trace_start(void)
392
* at this time during boot we can guarantee that things like
393
* debugfs, sysfs are not mounted yet (at least they should be)
394
* so we mount it temporarily to enable tracing, and umount
396
ret = mount("debugfs", DEBUGFS_MNT, "debugfs", 0, NULL);
398
perror("Unable to mount debugfs\n");
404
file = fopen("tracing/current_tracer", "w");
406
perror("Unable to select tracer\n");
409
fprintf(file, "open");
412
file = fopen("tracing/current_tracer", "r");
413
fgets(buf, 4096, file);
415
if (strcmp(buf, "open\n") != 0) {
416
perror("Unable to select open tracer\n");
420
file = fopen("tracing/tracing_enabled", "w");
422
perror("Unable to enable tracing\n");
428
file = fopen("tracing/tracing_enabled", "r");
429
fgets(buf, 4096, file);
431
if (strcmp(buf, "1\n") != 0) {
432
perror("Enabling tracing failed\n");
440
/* set this low, so we don't readahead way too much */
441
readahead_set_len(RA_SMALL);
444
static void trace_stop(int signal)
452
struct tms start_time;
453
struct tms stop_time;
460
/* return readahead size to normal */
461
readahead_set_len(RA_NORMAL);
464
* by now the init process should have mounted debugf on a logical
465
* location like /sys/kernel/debug, but if not then we temporarily
466
* re-mount it ourselves
468
unmount = chdir("/sys/kernel/debug/tracing");
470
ret = mount("debugfs", DEBUGFS_MNT, "debugfs", 0, NULL);
472
perror("Unable to mount debugfs\n");
479
file = fopen("tracing/tracing_enabled", "w");
481
perror("Unable to disable tracing\n");
488
file = fopen("tracing/trace", "r");
490
perror("Unable to open trace file\n");
494
while (fgets(buf, 4095, file) != NULL) {
501
start = strchr(buf, '"') + 1;
505
len = strrchr(start, '"');
506
strncpy(filename, start, len - start);
508
filename[len - start] = '\0';
510
/* ignore sys, dev, proc stuff */
511
if (strncmp(filename, "/dev/", 5) == 0)
513
if (strncmp(filename, "/sys/", 5) == 0)
515
if (strncmp(filename, "/proc/", 6) == 0)
518
if (racount >= MAXR) {
519
perror("Max records exceeded!");
523
if (strlen(filename) <= MAXFL) {
524
struct ra_struct *tmp;
525
tmp = malloc(sizeof(struct ra_struct));
528
perror("Out of memory\n");
531
memset(tmp, 0, sizeof(struct ra_struct));
535
strcpy(ra[racount]->filename, filename);
537
ra[racount]->prev = ra[racount - 1];
538
ra[racount - 1]->next = ra[racount];
540
ra[racount]->number = racount;
547
printf("Trace contained %d records\n", racount);
557
* sort and filter duplicates, and get memory blocks
564
* and write out the new pack file
566
file = fopen(PACK_FILE, "w");
568
perror("Unable to open output file\n");
574
fwrite(r->filename, MAXFL, 1, file);
575
fwrite(r->data, sizeof(r->data), 1, file);
582
printf("Took %.3f seconds\n", (double)(stop_time.tms_utime -
583
start_time.tms_utime) / 1000.0f);
584
printf("Total %d files, %dkb, %d fragments\n", rdcount,
586
rdsize / 1024, fcount);
592
static void print_usage(const char *name)
594
printf("Usage: %s [OPTION...]\n", name);
595
printf(" -d, --debug Print debug output to stdout\n");
596
printf(" -h, --help Show this help message\n");
597
printf(" -v, --version Show version information and exit\n");
601
static void print_version(void)
603
printf("sreadahead version %s\n", VERSION);
604
printf("Copyright (C) 2008, 2009 Intel Corporation\n");
608
int main(int argc, char **argv)
612
pthread_t one, two, three, four;
615
static struct option opts[] = {
616
{ "debug", 0, NULL, 'd' },
617
{ "help", 0, NULL, 'h' },
618
{ "version", 0, NULL, 'v' },
624
c = getopt_long(argc, argv, "dhv", opts, &index);
635
print_usage(argv[0]);
642
file = fopen(PACK_FILE, "r");
644
/* enable tracing open calls before we fork! */
649
signal(SIGUSR1, trace_stop);
651
* "" 15 seconds should be enough for everyone to boot""
656
* abort if we don't get a signal, so we can stop
657
* the tracing and minimize the trace buffer size
659
signal(SIGUSR1, NULL);
666
total_files = fread(&rd, sizeof(struct ra_disk), MAXR, file);
669
perror("Can't open sreadahead pack file");
675
if (syscall(__NR_ioprio_set, IOPRIO_WHO_PROCESS, pid,
676
IOPRIO_IDLE_LOWEST) == -1)
677
perror("Can not set IO priority to idle class");
680
readahead_set_len(RA_SMALL);
684
pthread_create(&one, NULL, one_thread, NULL);
685
pthread_create(&two, NULL, one_thread, NULL);
686
pthread_create(&three, NULL, one_thread, NULL);
687
pthread_create(&four, NULL, one_thread, NULL);
689
pthread_join(one, NULL);
690
pthread_join(two, NULL);
691
pthread_join(three, NULL);
692
pthread_join(four, NULL);
694
readahead_set_len(RA_NORMAL);