~ubuntu-branches/ubuntu/saucy/systemd/saucy-proposed

« back to all changes in this revision

Viewing changes to .pc/v44..upstream-fixes_44-5/src/journal/journal-file.c

  • Committer: Package Import Robot
  • Author(s): Michael Biebl, Tollef Fog Heen, Michael Biebl
  • Date: 2012-10-25 21:41:23 UTC
  • mto: This revision was merged to the branch mainline in revision 10.
  • Revision ID: package-import@ubuntu.com-20121025214123-0wjk4sble84q7mki
Tags: 44-5
* Team upload.

[ Tollef Fog Heen ]
* disable killing on entering START_PRE, START, thanks to Michael
  Stapelberg for patch.  This avoids killing VMs run through libvirt
  when restarting libvirtd.  Closes: #688635.
* Avoid reloading services when shutting down, since that won't work and
  makes no sense.  Thanks to Michael Stapelberg for the patch.
  Closes: #624599.
* Try to determine which init scripts support the reload action
  heuristically.  Closes: #686115, #650382.

[ Michael Biebl ]
* Update Vcs-* fields, the Git repository is hosted on alioth now. Set the
  default branch to "debian".
* Avoid reload and (re)start requests during early boot which can lead to
  deadlocks.  Closes: #624599
* Make systemd-cgroup work even if not all cgroup mounts are available on
  startup.  Closes: #690916
* Fix typos in the systemd.path and systemd.unit man page.  Closes: #668344
* Add watch file to track new upstream releases.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
 
2
 
 
3
/***
 
4
  This file is part of systemd.
 
5
 
 
6
  Copyright 2011 Lennart Poettering
 
7
 
 
8
  systemd is free software; you can redistribute it and/or modify it
 
9
  under the terms of the GNU General Public License as published by
 
10
  the Free Software Foundation; either version 2 of the License, or
 
11
  (at your option) any later version.
 
12
 
 
13
  systemd is distributed in the hope that it will be useful, but
 
14
  WITHOUT ANY WARRANTY; without even the implied warranty of
 
15
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 
16
  General Public License for more details.
 
17
 
 
18
  You should have received a copy of the GNU General Public License
 
19
  along with systemd; If not, see <http://www.gnu.org/licenses/>.
 
20
***/
 
21
 
 
22
#include <sys/mman.h>
 
23
#include <errno.h>
 
24
#include <sys/uio.h>
 
25
#include <unistd.h>
 
26
#include <sys/statvfs.h>
 
27
#include <fcntl.h>
 
28
#include <stddef.h>
 
29
 
 
30
#include "journal-def.h"
 
31
#include "journal-file.h"
 
32
#include "lookup3.h"
 
33
#include "compress.h"
 
34
 
 
35
#define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*16ULL)
 
36
#define DEFAULT_FIELD_HASH_TABLE_SIZE (2047ULL*16ULL)
 
37
 
 
38
#define DEFAULT_WINDOW_SIZE (128ULL*1024ULL*1024ULL)
 
39
 
 
40
#define COMPRESSION_SIZE_THRESHOLD (512ULL)
 
41
 
 
42
/* This is the minimum journal file size */
 
43
#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL)                  /* 64 KiB */
 
44
 
 
45
/* These are the lower and upper bounds if we deduce the max_use value
 
46
 * from the file system size */
 
47
#define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
 
48
#define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
 
49
 
 
50
/* This is the upper bound if we deduce max_size from max_use */
 
51
#define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
 
52
 
 
53
/* This is the upper bound if we deduce the keep_free value from the
 
54
 * file system size */
 
55
#define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
 
56
 
 
57
/* This is the keep_free value when we can't determine the system
 
58
 * size */
 
59
#define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
 
60
 
 
61
static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' };
 
62
 
 
63
#define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
 
64
 
 
65
void journal_file_close(JournalFile *f) {
 
66
        int t;
 
67
 
 
68
        assert(f);
 
69
 
 
70
        if (f->header && f->writable)
 
71
                f->header->state = STATE_OFFLINE;
 
72
 
 
73
 
 
74
        for (t = 0; t < _WINDOW_MAX; t++)
 
75
                if (f->windows[t].ptr)
 
76
                        munmap(f->windows[t].ptr, f->windows[t].size);
 
77
 
 
78
        if (f->fd >= 0)
 
79
                close_nointr_nofail(f->fd);
 
80
 
 
81
        free(f->path);
 
82
 
 
83
#ifdef HAVE_XZ
 
84
        free(f->compress_buffer);
 
85
#endif
 
86
 
 
87
        free(f);
 
88
}
 
89
 
 
90
static int journal_file_init_header(JournalFile *f, JournalFile *template) {
 
91
        Header h;
 
92
        ssize_t k;
 
93
        int r;
 
94
 
 
95
        assert(f);
 
96
 
 
97
        zero(h);
 
98
        memcpy(h.signature, signature, 8);
 
99
        h.arena_offset = htole64(ALIGN64(sizeof(h)));
 
100
 
 
101
        r = sd_id128_randomize(&h.file_id);
 
102
        if (r < 0)
 
103
                return r;
 
104
 
 
105
        if (template) {
 
106
                h.seqnum_id = template->header->seqnum_id;
 
107
                h.seqnum = template->header->seqnum;
 
108
        } else
 
109
                h.seqnum_id = h.file_id;
 
110
 
 
111
        k = pwrite(f->fd, &h, sizeof(h), 0);
 
112
        if (k < 0)
 
113
                return -errno;
 
114
 
 
115
        if (k != sizeof(h))
 
116
                return -EIO;
 
117
 
 
118
        return 0;
 
119
}
 
120
 
 
121
static int journal_file_refresh_header(JournalFile *f) {
 
122
        int r;
 
123
        sd_id128_t boot_id;
 
124
 
 
125
        assert(f);
 
126
 
 
127
        r = sd_id128_get_machine(&f->header->machine_id);
 
128
        if (r < 0)
 
129
                return r;
 
130
 
 
131
        r = sd_id128_get_boot(&boot_id);
 
132
        if (r < 0)
 
133
                return r;
 
134
 
 
135
        if (sd_id128_equal(boot_id, f->header->boot_id))
 
136
                f->tail_entry_monotonic_valid = true;
 
137
 
 
138
        f->header->boot_id = boot_id;
 
139
 
 
140
        f->header->state = STATE_ONLINE;
 
141
 
 
142
        __sync_synchronize();
 
143
 
 
144
        return 0;
 
145
}
 
146
 
 
147
static int journal_file_verify_header(JournalFile *f) {
 
148
        assert(f);
 
149
 
 
150
        if (memcmp(f->header, signature, 8))
 
151
                return -EBADMSG;
 
152
 
 
153
#ifdef HAVE_XZ
 
154
        if ((le64toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
 
155
                return -EPROTONOSUPPORT;
 
156
#else
 
157
        if (f->header->incompatible_flags != 0)
 
158
                return -EPROTONOSUPPORT;
 
159
#endif
 
160
 
 
161
        if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->arena_offset) + le64toh(f->header->arena_size)))
 
162
                return -ENODATA;
 
163
 
 
164
        if (f->writable) {
 
165
                uint8_t state;
 
166
                sd_id128_t machine_id;
 
167
                int r;
 
168
 
 
169
                r = sd_id128_get_machine(&machine_id);
 
170
                if (r < 0)
 
171
                        return r;
 
172
 
 
173
                if (!sd_id128_equal(machine_id, f->header->machine_id))
 
174
                        return -EHOSTDOWN;
 
175
 
 
176
                state = f->header->state;
 
177
 
 
178
                if (state == STATE_ONLINE)
 
179
                        log_debug("Journal file %s is already online. Assuming unclean closing. Ignoring.", f->path);
 
180
                else if (state == STATE_ARCHIVED)
 
181
                        return -ESHUTDOWN;
 
182
                else if (state != STATE_OFFLINE)
 
183
                        log_debug("Journal file %s has unknown state %u. Ignoring.", f->path, state);
 
184
        }
 
185
 
 
186
        return 0;
 
187
}
 
188
 
 
189
static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
 
190
        uint64_t old_size, new_size;
 
191
 
 
192
        assert(f);
 
193
 
 
194
        /* We assume that this file is not sparse, and we know that
 
195
         * for sure, since we always call posix_fallocate()
 
196
         * ourselves */
 
197
 
 
198
        old_size =
 
199
                le64toh(f->header->arena_offset) +
 
200
                le64toh(f->header->arena_size);
 
201
 
 
202
        new_size = PAGE_ALIGN(offset + size);
 
203
        if (new_size < le64toh(f->header->arena_offset))
 
204
                new_size = le64toh(f->header->arena_offset);
 
205
 
 
206
        if (new_size <= old_size)
 
207
                return 0;
 
208
 
 
209
        if (f->metrics.max_size > 0 &&
 
210
            new_size > f->metrics.max_size)
 
211
                return -E2BIG;
 
212
 
 
213
        if (new_size > f->metrics.min_size &&
 
214
            f->metrics.keep_free > 0) {
 
215
                struct statvfs svfs;
 
216
 
 
217
                if (fstatvfs(f->fd, &svfs) >= 0) {
 
218
                        uint64_t available;
 
219
 
 
220
                        available = svfs.f_bfree * svfs.f_bsize;
 
221
 
 
222
                        if (available >= f->metrics.keep_free)
 
223
                                available -= f->metrics.keep_free;
 
224
                        else
 
225
                                available = 0;
 
226
 
 
227
                        if (new_size - old_size > available)
 
228
                                return -E2BIG;
 
229
                }
 
230
        }
 
231
 
 
232
        /* Note that the glibc fallocate() fallback is very
 
233
           inefficient, hence we try to minimize the allocation area
 
234
           as we can. */
 
235
        if (posix_fallocate(f->fd, old_size, new_size - old_size) < 0)
 
236
                return -errno;
 
237
 
 
238
        if (fstat(f->fd, &f->last_stat) < 0)
 
239
                return -errno;
 
240
 
 
241
        f->header->arena_size = htole64(new_size - le64toh(f->header->arena_offset));
 
242
 
 
243
        return 0;
 
244
}
 
245
 
 
246
static int journal_file_map(
 
247
                JournalFile *f,
 
248
                uint64_t offset,
 
249
                uint64_t size,
 
250
                void **_window,
 
251
                uint64_t *_woffset,
 
252
                uint64_t *_wsize,
 
253
                void **ret) {
 
254
 
 
255
        uint64_t woffset, wsize;
 
256
        void *window;
 
257
 
 
258
        assert(f);
 
259
        assert(size > 0);
 
260
        assert(ret);
 
261
 
 
262
        woffset = offset & ~((uint64_t) page_size() - 1ULL);
 
263
        wsize = size + (offset - woffset);
 
264
        wsize = PAGE_ALIGN(wsize);
 
265
 
 
266
        /* Avoid SIGBUS on invalid accesses */
 
267
        if (woffset + wsize > (uint64_t) PAGE_ALIGN(f->last_stat.st_size))
 
268
                return -EADDRNOTAVAIL;
 
269
 
 
270
        window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset);
 
271
        if (window == MAP_FAILED)
 
272
                return -errno;
 
273
 
 
274
        if (_window)
 
275
                *_window = window;
 
276
 
 
277
        if (_woffset)
 
278
                *_woffset = woffset;
 
279
 
 
280
        if (_wsize)
 
281
                *_wsize = wsize;
 
282
 
 
283
        *ret = (uint8_t*) window + (offset - woffset);
 
284
 
 
285
        return 0;
 
286
}
 
287
 
 
288
static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_t size, void **ret) {
 
289
        void *p = NULL;
 
290
        uint64_t delta;
 
291
        int r;
 
292
        Window *w;
 
293
 
 
294
        assert(f);
 
295
        assert(ret);
 
296
        assert(wt >= 0);
 
297
        assert(wt < _WINDOW_MAX);
 
298
 
 
299
        if (offset + size > (uint64_t) f->last_stat.st_size) {
 
300
                /* Hmm, out of range? Let's refresh the fstat() data
 
301
                 * first, before we trust that check. */
 
302
 
 
303
                if (fstat(f->fd, &f->last_stat) < 0 ||
 
304
                    offset + size > (uint64_t) f->last_stat.st_size)
 
305
                        return -EADDRNOTAVAIL;
 
306
        }
 
307
 
 
308
        w = f->windows + wt;
 
309
 
 
310
        if (_likely_(w->ptr &&
 
311
                     w->offset <= offset &&
 
312
                     w->offset + w->size >= offset + size)) {
 
313
 
 
314
                *ret = (uint8_t*) w->ptr + (offset - w->offset);
 
315
                return 0;
 
316
        }
 
317
 
 
318
        if (w->ptr) {
 
319
                if (munmap(w->ptr, w->size) < 0)
 
320
                        return -errno;
 
321
 
 
322
                w->ptr = NULL;
 
323
                w->size = w->offset = 0;
 
324
        }
 
325
 
 
326
        if (size < DEFAULT_WINDOW_SIZE) {
 
327
                /* If the default window size is larger then what was
 
328
                 * asked for extend the mapping a bit in the hope to
 
329
                 * minimize needed remappings later on. We add half
 
330
                 * the window space before and half behind the
 
331
                 * requested mapping */
 
332
 
 
333
                delta = (DEFAULT_WINDOW_SIZE - size) / 2;
 
334
 
 
335
                if (delta > offset)
 
336
                        delta = offset;
 
337
 
 
338
                offset -= delta;
 
339
                size = DEFAULT_WINDOW_SIZE;
 
340
        } else
 
341
                delta = 0;
 
342
 
 
343
        if (offset + size > (uint64_t) f->last_stat.st_size)
 
344
                size = (uint64_t) f->last_stat.st_size - offset;
 
345
 
 
346
        if (size <= 0)
 
347
                return -EADDRNOTAVAIL;
 
348
 
 
349
        r = journal_file_map(f,
 
350
                             offset, size,
 
351
                             &w->ptr, &w->offset, &w->size,
 
352
                             &p);
 
353
 
 
354
        if (r < 0)
 
355
                return r;
 
356
 
 
357
        *ret = (uint8_t*) p + delta;
 
358
        return 0;
 
359
}
 
360
 
 
361
static bool verify_hash(Object *o) {
 
362
        uint64_t h1, h2;
 
363
 
 
364
        assert(o);
 
365
 
 
366
        if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) {
 
367
                h1 = le64toh(o->data.hash);
 
368
                h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
 
369
        } else if (o->object.type == OBJECT_FIELD) {
 
370
                h1 = le64toh(o->field.hash);
 
371
                h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
 
372
        } else
 
373
                return true;
 
374
 
 
375
        return h1 == h2;
 
376
}
 
377
 
 
378
int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
 
379
        int r;
 
380
        void *t;
 
381
        Object *o;
 
382
        uint64_t s;
 
383
 
 
384
        assert(f);
 
385
        assert(ret);
 
386
        assert(type < _OBJECT_TYPE_MAX);
 
387
 
 
388
        r = journal_file_move_to(f, type >= 0 ? type : WINDOW_UNKNOWN, offset, sizeof(ObjectHeader), &t);
 
389
        if (r < 0)
 
390
                return r;
 
391
 
 
392
        o = (Object*) t;
 
393
        s = le64toh(o->object.size);
 
394
 
 
395
        if (s < sizeof(ObjectHeader))
 
396
                return -EBADMSG;
 
397
 
 
398
        if (type >= 0 && o->object.type != type)
 
399
                return -EBADMSG;
 
400
 
 
401
        if (s > sizeof(ObjectHeader)) {
 
402
                r = journal_file_move_to(f, o->object.type, offset, s, &t);
 
403
                if (r < 0)
 
404
                        return r;
 
405
 
 
406
                o = (Object*) t;
 
407
        }
 
408
 
 
409
        if (!verify_hash(o))
 
410
                return -EBADMSG;
 
411
 
 
412
        *ret = o;
 
413
        return 0;
 
414
}
 
415
 
 
416
static uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) {
 
417
        uint64_t r;
 
418
 
 
419
        assert(f);
 
420
 
 
421
        r = le64toh(f->header->seqnum) + 1;
 
422
 
 
423
        if (seqnum) {
 
424
                /* If an external seqnum counter was passed, we update
 
425
                 * both the local and the external one, and set it to
 
426
                 * the maximum of both */
 
427
 
 
428
                if (*seqnum + 1 > r)
 
429
                        r = *seqnum + 1;
 
430
 
 
431
                *seqnum = r;
 
432
        }
 
433
 
 
434
        f->header->seqnum = htole64(r);
 
435
 
 
436
        if (f->header->first_seqnum == 0)
 
437
                f->header->first_seqnum = htole64(r);
 
438
 
 
439
        return r;
 
440
}
 
441
 
 
442
static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
 
443
        int r;
 
444
        uint64_t p;
 
445
        Object *tail, *o;
 
446
        void *t;
 
447
 
 
448
        assert(f);
 
449
        assert(size >= sizeof(ObjectHeader));
 
450
        assert(offset);
 
451
        assert(ret);
 
452
 
 
453
        p = le64toh(f->header->tail_object_offset);
 
454
        if (p == 0)
 
455
                p = le64toh(f->header->arena_offset);
 
456
        else {
 
457
                r = journal_file_move_to_object(f, -1, p, &tail);
 
458
                if (r < 0)
 
459
                        return r;
 
460
 
 
461
                p += ALIGN64(le64toh(tail->object.size));
 
462
        }
 
463
 
 
464
        r = journal_file_allocate(f, p, size);
 
465
        if (r < 0)
 
466
                return r;
 
467
 
 
468
        r = journal_file_move_to(f, type, p, size, &t);
 
469
        if (r < 0)
 
470
                return r;
 
471
 
 
472
        o = (Object*) t;
 
473
 
 
474
        zero(o->object);
 
475
        o->object.type = type;
 
476
        o->object.size = htole64(size);
 
477
 
 
478
        f->header->tail_object_offset = htole64(p);
 
479
        f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
 
480
 
 
481
        *ret = o;
 
482
        *offset = p;
 
483
 
 
484
        return 0;
 
485
}
 
486
 
 
487
static int journal_file_setup_data_hash_table(JournalFile *f) {
 
488
        uint64_t s, p;
 
489
        Object *o;
 
490
        int r;
 
491
 
 
492
        assert(f);
 
493
 
 
494
        s = DEFAULT_DATA_HASH_TABLE_SIZE;
 
495
        r = journal_file_append_object(f,
 
496
                                       OBJECT_DATA_HASH_TABLE,
 
497
                                       offsetof(Object, hash_table.items) + s,
 
498
                                       &o, &p);
 
499
        if (r < 0)
 
500
                return r;
 
501
 
 
502
        memset(o->hash_table.items, 0, s);
 
503
 
 
504
        f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
 
505
        f->header->data_hash_table_size = htole64(s);
 
506
 
 
507
        return 0;
 
508
}
 
509
 
 
510
static int journal_file_setup_field_hash_table(JournalFile *f) {
 
511
        uint64_t s, p;
 
512
        Object *o;
 
513
        int r;
 
514
 
 
515
        assert(f);
 
516
 
 
517
        s = DEFAULT_FIELD_HASH_TABLE_SIZE;
 
518
        r = journal_file_append_object(f,
 
519
                                       OBJECT_FIELD_HASH_TABLE,
 
520
                                       offsetof(Object, hash_table.items) + s,
 
521
                                       &o, &p);
 
522
        if (r < 0)
 
523
                return r;
 
524
 
 
525
        memset(o->hash_table.items, 0, s);
 
526
 
 
527
        f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
 
528
        f->header->field_hash_table_size = htole64(s);
 
529
 
 
530
        return 0;
 
531
}
 
532
 
 
533
static int journal_file_map_data_hash_table(JournalFile *f) {
 
534
        uint64_t s, p;
 
535
        void *t;
 
536
        int r;
 
537
 
 
538
        assert(f);
 
539
 
 
540
        p = le64toh(f->header->data_hash_table_offset);
 
541
        s = le64toh(f->header->data_hash_table_size);
 
542
 
 
543
        r = journal_file_move_to(f,
 
544
                                 WINDOW_DATA_HASH_TABLE,
 
545
                                 p, s,
 
546
                                 &t);
 
547
        if (r < 0)
 
548
                return r;
 
549
 
 
550
        f->data_hash_table = t;
 
551
        return 0;
 
552
}
 
553
 
 
554
static int journal_file_map_field_hash_table(JournalFile *f) {
 
555
        uint64_t s, p;
 
556
        void *t;
 
557
        int r;
 
558
 
 
559
        assert(f);
 
560
 
 
561
        p = le64toh(f->header->field_hash_table_offset);
 
562
        s = le64toh(f->header->field_hash_table_size);
 
563
 
 
564
        r = journal_file_move_to(f,
 
565
                                 WINDOW_FIELD_HASH_TABLE,
 
566
                                 p, s,
 
567
                                 &t);
 
568
        if (r < 0)
 
569
                return r;
 
570
 
 
571
        f->field_hash_table = t;
 
572
        return 0;
 
573
}
 
574
 
 
575
static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
 
576
        uint64_t p, h;
 
577
        int r;
 
578
 
 
579
        assert(f);
 
580
        assert(o);
 
581
        assert(offset > 0);
 
582
        assert(o->object.type == OBJECT_DATA);
 
583
 
 
584
        /* This might alter the window we are looking at */
 
585
 
 
586
        o->data.next_hash_offset = o->data.next_field_offset = 0;
 
587
        o->data.entry_offset = o->data.entry_array_offset = 0;
 
588
        o->data.n_entries = 0;
 
589
 
 
590
        h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
 
591
        p = le64toh(f->data_hash_table[h].head_hash_offset);
 
592
        if (p == 0) {
 
593
                /* Only entry in the hash table is easy */
 
594
                f->data_hash_table[h].head_hash_offset = htole64(offset);
 
595
        } else {
 
596
                /* Move back to the previous data object, to patch in
 
597
                 * pointer */
 
598
 
 
599
                r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
 
600
                if (r < 0)
 
601
                        return r;
 
602
 
 
603
                o->data.next_hash_offset = htole64(offset);
 
604
        }
 
605
 
 
606
        f->data_hash_table[h].tail_hash_offset = htole64(offset);
 
607
 
 
608
        return 0;
 
609
}
 
610
 
 
611
int journal_file_find_data_object_with_hash(
 
612
                JournalFile *f,
 
613
                const void *data, uint64_t size, uint64_t hash,
 
614
                Object **ret, uint64_t *offset) {
 
615
 
 
616
        uint64_t p, osize, h;
 
617
        int r;
 
618
 
 
619
        assert(f);
 
620
        assert(data || size == 0);
 
621
 
 
622
        osize = offsetof(Object, data.payload) + size;
 
623
 
 
624
        if (f->header->data_hash_table_size == 0)
 
625
                return -EBADMSG;
 
626
 
 
627
        h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
 
628
        p = le64toh(f->data_hash_table[h].head_hash_offset);
 
629
 
 
630
        while (p > 0) {
 
631
                Object *o;
 
632
 
 
633
                r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
 
634
                if (r < 0)
 
635
                        return r;
 
636
 
 
637
                if (le64toh(o->data.hash) != hash)
 
638
                        goto next;
 
639
 
 
640
                if (o->object.flags & OBJECT_COMPRESSED) {
 
641
#ifdef HAVE_XZ
 
642
                        uint64_t l, rsize;
 
643
 
 
644
                        l = le64toh(o->object.size);
 
645
                        if (l <= offsetof(Object, data.payload))
 
646
                                return -EBADMSG;
 
647
 
 
648
                        l -= offsetof(Object, data.payload);
 
649
 
 
650
                        if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
 
651
                                return -EBADMSG;
 
652
 
 
653
                        if (rsize == size &&
 
654
                            memcmp(f->compress_buffer, data, size) == 0) {
 
655
 
 
656
                                if (ret)
 
657
                                        *ret = o;
 
658
 
 
659
                                if (offset)
 
660
                                        *offset = p;
 
661
 
 
662
                                return 1;
 
663
                        }
 
664
#else
 
665
                        return -EPROTONOSUPPORT;
 
666
#endif
 
667
 
 
668
                } else if (le64toh(o->object.size) == osize &&
 
669
                           memcmp(o->data.payload, data, size) == 0) {
 
670
 
 
671
                        if (ret)
 
672
                                *ret = o;
 
673
 
 
674
                        if (offset)
 
675
                                *offset = p;
 
676
 
 
677
                        return 1;
 
678
                }
 
679
 
 
680
        next:
 
681
                p = le64toh(o->data.next_hash_offset);
 
682
        }
 
683
 
 
684
        return 0;
 
685
}
 
686
 
 
687
int journal_file_find_data_object(
 
688
                JournalFile *f,
 
689
                const void *data, uint64_t size,
 
690
                Object **ret, uint64_t *offset) {
 
691
 
 
692
        uint64_t hash;
 
693
 
 
694
        assert(f);
 
695
        assert(data || size == 0);
 
696
 
 
697
        hash = hash64(data, size);
 
698
 
 
699
        return journal_file_find_data_object_with_hash(f,
 
700
                                                       data, size, hash,
 
701
                                                       ret, offset);
 
702
}
 
703
 
 
704
static int journal_file_append_data(
 
705
                JournalFile *f,
 
706
                const void *data, uint64_t size,
 
707
                Object **ret, uint64_t *offset) {
 
708
 
 
709
        uint64_t hash, p;
 
710
        uint64_t osize;
 
711
        Object *o;
 
712
        int r;
 
713
        bool compressed = false;
 
714
 
 
715
        assert(f);
 
716
        assert(data || size == 0);
 
717
 
 
718
        hash = hash64(data, size);
 
719
 
 
720
        r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
 
721
        if (r < 0)
 
722
                return r;
 
723
        else if (r > 0) {
 
724
 
 
725
                if (ret)
 
726
                        *ret = o;
 
727
 
 
728
                if (offset)
 
729
                        *offset = p;
 
730
 
 
731
                return 0;
 
732
        }
 
733
 
 
734
        osize = offsetof(Object, data.payload) + size;
 
735
        r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
 
736
        if (r < 0)
 
737
                return r;
 
738
 
 
739
        o->data.hash = htole64(hash);
 
740
 
 
741
#ifdef HAVE_XZ
 
742
        if (f->compress &&
 
743
            size >= COMPRESSION_SIZE_THRESHOLD) {
 
744
                uint64_t rsize;
 
745
 
 
746
                compressed = compress_blob(data, size, o->data.payload, &rsize);
 
747
 
 
748
                if (compressed) {
 
749
                        o->object.size = htole64(offsetof(Object, data.payload) + rsize);
 
750
                        o->object.flags |= OBJECT_COMPRESSED;
 
751
 
 
752
                        f->header->incompatible_flags = htole32(le32toh(f->header->incompatible_flags) | HEADER_INCOMPATIBLE_COMPRESSED);
 
753
 
 
754
                        log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
 
755
                }
 
756
        }
 
757
#endif
 
758
 
 
759
        if (!compressed)
 
760
                memcpy(o->data.payload, data, size);
 
761
 
 
762
        r = journal_file_link_data(f, o, p, hash);
 
763
        if (r < 0)
 
764
                return r;
 
765
 
 
766
        /* The linking might have altered the window, so let's
 
767
         * refresh our pointer */
 
768
        r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
 
769
        if (r < 0)
 
770
                return r;
 
771
 
 
772
        if (ret)
 
773
                *ret = o;
 
774
 
 
775
        if (offset)
 
776
                *offset = p;
 
777
 
 
778
        return 0;
 
779
}
 
780
 
 
781
uint64_t journal_file_entry_n_items(Object *o) {
 
782
        assert(o);
 
783
        assert(o->object.type == OBJECT_ENTRY);
 
784
 
 
785
        return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
 
786
}
 
787
 
 
788
static uint64_t journal_file_entry_array_n_items(Object *o) {
 
789
        assert(o);
 
790
        assert(o->object.type == OBJECT_ENTRY_ARRAY);
 
791
 
 
792
        return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
 
793
}
 
794
 
 
795
static int link_entry_into_array(JournalFile *f,
 
796
                                 uint64_t *first,
 
797
                                 uint64_t *idx,
 
798
                                 uint64_t p) {
 
799
        int r;
 
800
        uint64_t n = 0, ap = 0, q, i, a, hidx;
 
801
        Object *o;
 
802
 
 
803
        assert(f);
 
804
        assert(first);
 
805
        assert(idx);
 
806
        assert(p > 0);
 
807
 
 
808
        a = le64toh(*first);
 
809
        i = hidx = le64toh(*idx);
 
810
        while (a > 0) {
 
811
 
 
812
                r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
 
813
                if (r < 0)
 
814
                        return r;
 
815
 
 
816
                n = journal_file_entry_array_n_items(o);
 
817
                if (i < n) {
 
818
                        o->entry_array.items[i] = htole64(p);
 
819
                        *idx = htole64(hidx + 1);
 
820
                        return 0;
 
821
                }
 
822
 
 
823
                i -= n;
 
824
                ap = a;
 
825
                a = le64toh(o->entry_array.next_entry_array_offset);
 
826
        }
 
827
 
 
828
        if (hidx > n)
 
829
                n = (hidx+1) * 2;
 
830
        else
 
831
                n = n * 2;
 
832
 
 
833
        if (n < 4)
 
834
                n = 4;
 
835
 
 
836
        r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
 
837
                                       offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
 
838
                                       &o, &q);
 
839
        if (r < 0)
 
840
                return r;
 
841
 
 
842
        o->entry_array.items[i] = htole64(p);
 
843
 
 
844
        if (ap == 0)
 
845
                *first = htole64(q);
 
846
        else {
 
847
                r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
 
848
                if (r < 0)
 
849
                        return r;
 
850
 
 
851
                o->entry_array.next_entry_array_offset = htole64(q);
 
852
        }
 
853
 
 
854
        *idx = htole64(hidx + 1);
 
855
 
 
856
        return 0;
 
857
}
 
858
 
 
859
static int link_entry_into_array_plus_one(JournalFile *f,
 
860
                                          uint64_t *extra,
 
861
                                          uint64_t *first,
 
862
                                          uint64_t *idx,
 
863
                                          uint64_t p) {
 
864
 
 
865
        int r;
 
866
 
 
867
        assert(f);
 
868
        assert(extra);
 
869
        assert(first);
 
870
        assert(idx);
 
871
        assert(p > 0);
 
872
 
 
873
        if (*idx == 0)
 
874
                *extra = htole64(p);
 
875
        else {
 
876
                uint64_t i;
 
877
 
 
878
                i = htole64(le64toh(*idx) - 1);
 
879
                r = link_entry_into_array(f, first, &i, p);
 
880
                if (r < 0)
 
881
                        return r;
 
882
        }
 
883
 
 
884
        *idx = htole64(le64toh(*idx) + 1);
 
885
        return 0;
 
886
}
 
887
 
 
888
static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
 
889
        uint64_t p;
 
890
        int r;
 
891
        assert(f);
 
892
        assert(o);
 
893
        assert(offset > 0);
 
894
 
 
895
        p = le64toh(o->entry.items[i].object_offset);
 
896
        if (p == 0)
 
897
                return -EINVAL;
 
898
 
 
899
        r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
 
900
        if (r < 0)
 
901
                return r;
 
902
 
 
903
        return link_entry_into_array_plus_one(f,
 
904
                                              &o->data.entry_offset,
 
905
                                              &o->data.entry_array_offset,
 
906
                                              &o->data.n_entries,
 
907
                                              offset);
 
908
}
 
909
 
 
910
static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
 
911
        uint64_t n, i;
 
912
        int r;
 
913
 
 
914
        assert(f);
 
915
        assert(o);
 
916
        assert(offset > 0);
 
917
        assert(o->object.type == OBJECT_ENTRY);
 
918
 
 
919
        __sync_synchronize();
 
920
 
 
921
        /* Link up the entry itself */
 
922
        r = link_entry_into_array(f,
 
923
                                  &f->header->entry_array_offset,
 
924
                                  &f->header->n_entries,
 
925
                                  offset);
 
926
        if (r < 0)
 
927
                return r;
 
928
 
 
929
        /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
 
930
 
 
931
        if (f->header->head_entry_realtime == 0)
 
932
                f->header->head_entry_realtime = o->entry.realtime;
 
933
 
 
934
        f->header->tail_entry_realtime = o->entry.realtime;
 
935
        f->header->tail_entry_monotonic = o->entry.monotonic;
 
936
 
 
937
        f->tail_entry_monotonic_valid = true;
 
938
 
 
939
        /* Link up the items */
 
940
        n = journal_file_entry_n_items(o);
 
941
        for (i = 0; i < n; i++) {
 
942
                r = journal_file_link_entry_item(f, o, offset, i);
 
943
                if (r < 0)
 
944
                        return r;
 
945
        }
 
946
 
 
947
        return 0;
 
948
}
 
949
 
 
950
static int journal_file_append_entry_internal(
 
951
                JournalFile *f,
 
952
                const dual_timestamp *ts,
 
953
                uint64_t xor_hash,
 
954
                const EntryItem items[], unsigned n_items,
 
955
                uint64_t *seqnum,
 
956
                Object **ret, uint64_t *offset) {
 
957
        uint64_t np;
 
958
        uint64_t osize;
 
959
        Object *o;
 
960
        int r;
 
961
 
 
962
        assert(f);
 
963
        assert(items || n_items == 0);
 
964
        assert(ts);
 
965
 
 
966
        osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
 
967
 
 
968
        r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
 
969
        if (r < 0)
 
970
                return r;
 
971
 
 
972
        o->entry.seqnum = htole64(journal_file_seqnum(f, seqnum));
 
973
        memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
 
974
        o->entry.realtime = htole64(ts->realtime);
 
975
        o->entry.monotonic = htole64(ts->monotonic);
 
976
        o->entry.xor_hash = htole64(xor_hash);
 
977
        o->entry.boot_id = f->header->boot_id;
 
978
 
 
979
        r = journal_file_link_entry(f, o, np);
 
980
        if (r < 0)
 
981
                return r;
 
982
 
 
983
        if (ret)
 
984
                *ret = o;
 
985
 
 
986
        if (offset)
 
987
                *offset = np;
 
988
 
 
989
        return 0;
 
990
}
 
991
 
 
992
void journal_file_post_change(JournalFile *f) {
 
993
        assert(f);
 
994
 
 
995
        /* inotify() does not receive IN_MODIFY events from file
 
996
         * accesses done via mmap(). After each access we hence
 
997
         * trigger IN_MODIFY by truncating the journal file to its
 
998
         * current size which triggers IN_MODIFY. */
 
999
 
 
1000
        __sync_synchronize();
 
1001
 
 
1002
        if (ftruncate(f->fd, f->last_stat.st_size) < 0)
 
1003
                log_error("Failed to to truncate file to its own size: %m");
 
1004
}
 
1005
 
 
1006
int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
 
1007
        unsigned i;
 
1008
        EntryItem *items;
 
1009
        int r;
 
1010
        uint64_t xor_hash = 0;
 
1011
        struct dual_timestamp _ts;
 
1012
 
 
1013
        assert(f);
 
1014
        assert(iovec || n_iovec == 0);
 
1015
 
 
1016
        if (!f->writable)
 
1017
                return -EPERM;
 
1018
 
 
1019
        if (!ts) {
 
1020
                dual_timestamp_get(&_ts);
 
1021
                ts = &_ts;
 
1022
        }
 
1023
 
 
1024
        if (f->tail_entry_monotonic_valid &&
 
1025
            ts->monotonic < le64toh(f->header->tail_entry_monotonic))
 
1026
                return -EINVAL;
 
1027
 
 
1028
        items = alloca(sizeof(EntryItem) * n_iovec);
 
1029
 
 
1030
        for (i = 0; i < n_iovec; i++) {
 
1031
                uint64_t p;
 
1032
                Object *o;
 
1033
 
 
1034
                r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
 
1035
                if (r < 0)
 
1036
                        return r;
 
1037
 
 
1038
                xor_hash ^= le64toh(o->data.hash);
 
1039
                items[i].object_offset = htole64(p);
 
1040
                items[i].hash = o->data.hash;
 
1041
        }
 
1042
 
 
1043
        r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
 
1044
 
 
1045
        journal_file_post_change(f);
 
1046
 
 
1047
        return r;
 
1048
}
 
1049
 
 
1050
static int generic_array_get(JournalFile *f,
 
1051
                             uint64_t first,
 
1052
                             uint64_t i,
 
1053
                             Object **ret, uint64_t *offset) {
 
1054
 
 
1055
        Object *o;
 
1056
        uint64_t p = 0, a;
 
1057
        int r;
 
1058
 
 
1059
        assert(f);
 
1060
 
 
1061
        a = first;
 
1062
        while (a > 0) {
 
1063
                uint64_t n;
 
1064
 
 
1065
                r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
 
1066
                if (r < 0)
 
1067
                        return r;
 
1068
 
 
1069
                n = journal_file_entry_array_n_items(o);
 
1070
                if (i < n) {
 
1071
                        p = le64toh(o->entry_array.items[i]);
 
1072
                        break;
 
1073
                }
 
1074
 
 
1075
                i -= n;
 
1076
                a = le64toh(o->entry_array.next_entry_array_offset);
 
1077
        }
 
1078
 
 
1079
        if (a <= 0 || p <= 0)
 
1080
                return 0;
 
1081
 
 
1082
        r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
 
1083
        if (r < 0)
 
1084
                return r;
 
1085
 
 
1086
        if (ret)
 
1087
                *ret = o;
 
1088
 
 
1089
        if (offset)
 
1090
                *offset = p;
 
1091
 
 
1092
        return 1;
 
1093
}
 
1094
 
 
1095
static int generic_array_get_plus_one(JournalFile *f,
 
1096
                                      uint64_t extra,
 
1097
                                      uint64_t first,
 
1098
                                      uint64_t i,
 
1099
                                      Object **ret, uint64_t *offset) {
 
1100
 
 
1101
        Object *o;
 
1102
 
 
1103
        assert(f);
 
1104
 
 
1105
        if (i == 0) {
 
1106
                int r;
 
1107
 
 
1108
                r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
 
1109
                if (r < 0)
 
1110
                        return r;
 
1111
 
 
1112
                if (ret)
 
1113
                        *ret = o;
 
1114
 
 
1115
                if (offset)
 
1116
                        *offset = extra;
 
1117
 
 
1118
                return 1;
 
1119
        }
 
1120
 
 
1121
        return generic_array_get(f, first, i-1, ret, offset);
 
1122
}
 
1123
 
 
1124
enum {
 
1125
        TEST_FOUND,
 
1126
        TEST_LEFT,
 
1127
        TEST_RIGHT
 
1128
};
 
1129
 
 
1130
static int generic_array_bisect(JournalFile *f,
 
1131
                                uint64_t first,
 
1132
                                uint64_t n,
 
1133
                                uint64_t needle,
 
1134
                                int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
 
1135
                                direction_t direction,
 
1136
                                Object **ret,
 
1137
                                uint64_t *offset,
 
1138
                                uint64_t *idx) {
 
1139
 
 
1140
        uint64_t a, p, t = 0, i = 0, last_p = 0;
 
1141
        bool subtract_one = false;
 
1142
        Object *o, *array = NULL;
 
1143
        int r;
 
1144
 
 
1145
        assert(f);
 
1146
        assert(test_object);
 
1147
 
 
1148
        a = first;
 
1149
        while (a > 0) {
 
1150
                uint64_t left, right, k, lp;
 
1151
 
 
1152
                r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
 
1153
                if (r < 0)
 
1154
                        return r;
 
1155
 
 
1156
                k = journal_file_entry_array_n_items(array);
 
1157
                right = MIN(k, n);
 
1158
                if (right <= 0)
 
1159
                        return 0;
 
1160
 
 
1161
                i = right - 1;
 
1162
                lp = p = le64toh(array->entry_array.items[i]);
 
1163
                if (p <= 0)
 
1164
                        return -EBADMSG;
 
1165
 
 
1166
                r = test_object(f, p, needle);
 
1167
                if (r < 0)
 
1168
                        return r;
 
1169
 
 
1170
                if (r == TEST_FOUND)
 
1171
                        r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
 
1172
 
 
1173
                if (r == TEST_RIGHT) {
 
1174
                        left = 0;
 
1175
                        right -= 1;
 
1176
                        for (;;) {
 
1177
                                if (left == right) {
 
1178
                                        if (direction == DIRECTION_UP)
 
1179
                                                subtract_one = true;
 
1180
 
 
1181
                                        i = left;
 
1182
                                        goto found;
 
1183
                                }
 
1184
 
 
1185
                                assert(left < right);
 
1186
 
 
1187
                                i = (left + right) / 2;
 
1188
                                p = le64toh(array->entry_array.items[i]);
 
1189
                                if (p <= 0)
 
1190
                                        return -EBADMSG;
 
1191
 
 
1192
                                r = test_object(f, p, needle);
 
1193
                                if (r < 0)
 
1194
                                        return r;
 
1195
 
 
1196
                                if (r == TEST_FOUND)
 
1197
                                        r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
 
1198
 
 
1199
                                if (r == TEST_RIGHT)
 
1200
                                        right = i;
 
1201
                                else
 
1202
                                        left = i + 1;
 
1203
                        }
 
1204
                }
 
1205
 
 
1206
                if (k > n)
 
1207
                        return 0;
 
1208
 
 
1209
                last_p = lp;
 
1210
 
 
1211
                n -= k;
 
1212
                t += k;
 
1213
                a = le64toh(array->entry_array.next_entry_array_offset);
 
1214
        }
 
1215
 
 
1216
        return 0;
 
1217
 
 
1218
found:
 
1219
        if (subtract_one && t == 0 && i == 0)
 
1220
                return 0;
 
1221
 
 
1222
        if (subtract_one && i == 0)
 
1223
                p = last_p;
 
1224
        else if (subtract_one)
 
1225
                p = le64toh(array->entry_array.items[i-1]);
 
1226
        else
 
1227
                p = le64toh(array->entry_array.items[i]);
 
1228
 
 
1229
        r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
 
1230
        if (r < 0)
 
1231
                return r;
 
1232
 
 
1233
        if (ret)
 
1234
                *ret = o;
 
1235
 
 
1236
        if (offset)
 
1237
                *offset = p;
 
1238
 
 
1239
        if (idx)
 
1240
                *idx = t + i - (subtract_one ? 1 : 0);
 
1241
 
 
1242
        return 1;
 
1243
}
 
1244
 
 
1245
static int generic_array_bisect_plus_one(JournalFile *f,
 
1246
                                         uint64_t extra,
 
1247
                                         uint64_t first,
 
1248
                                         uint64_t n,
 
1249
                                         uint64_t needle,
 
1250
                                         int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
 
1251
                                         direction_t direction,
 
1252
                                         Object **ret,
 
1253
                                         uint64_t *offset,
 
1254
                                         uint64_t *idx) {
 
1255
 
 
1256
        int r;
 
1257
 
 
1258
        assert(f);
 
1259
        assert(test_object);
 
1260
 
 
1261
        if (n <= 0)
 
1262
                return 0;
 
1263
 
 
1264
        /* This bisects the array in object 'first', but first checks
 
1265
         * an extra  */
 
1266
        r = test_object(f, extra, needle);
 
1267
        if (r < 0)
 
1268
                return r;
 
1269
        else if (r == TEST_FOUND) {
 
1270
                Object *o;
 
1271
 
 
1272
                r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
 
1273
                if (r < 0)
 
1274
                        return r;
 
1275
 
 
1276
                if (ret)
 
1277
                        *ret = o;
 
1278
 
 
1279
                if (offset)
 
1280
                        *offset = extra;
 
1281
 
 
1282
                if (idx)
 
1283
                        *idx = 0;
 
1284
 
 
1285
                return 1;
 
1286
        } else if (r == TEST_RIGHT)
 
1287
                return 0;
 
1288
 
 
1289
        r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
 
1290
 
 
1291
        if (r > 0)
 
1292
                (*idx) ++;
 
1293
 
 
1294
        return r;
 
1295
}
 
1296
 
 
1297
static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
 
1298
        Object *o;
 
1299
        int r;
 
1300
 
 
1301
        assert(f);
 
1302
        assert(p > 0);
 
1303
 
 
1304
        r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
 
1305
        if (r < 0)
 
1306
                return r;
 
1307
 
 
1308
        if (le64toh(o->entry.seqnum) == needle)
 
1309
                return TEST_FOUND;
 
1310
        else if (le64toh(o->entry.seqnum) < needle)
 
1311
                return TEST_LEFT;
 
1312
        else
 
1313
                return TEST_RIGHT;
 
1314
}
 
1315
 
 
1316
int journal_file_move_to_entry_by_seqnum(
 
1317
                JournalFile *f,
 
1318
                uint64_t seqnum,
 
1319
                direction_t direction,
 
1320
                Object **ret,
 
1321
                uint64_t *offset) {
 
1322
 
 
1323
        return generic_array_bisect(f,
 
1324
                                    le64toh(f->header->entry_array_offset),
 
1325
                                    le64toh(f->header->n_entries),
 
1326
                                    seqnum,
 
1327
                                    test_object_seqnum,
 
1328
                                    direction,
 
1329
                                    ret, offset, NULL);
 
1330
}
 
1331
 
 
1332
static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
 
1333
        Object *o;
 
1334
        int r;
 
1335
 
 
1336
        assert(f);
 
1337
        assert(p > 0);
 
1338
 
 
1339
        r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
 
1340
        if (r < 0)
 
1341
                return r;
 
1342
 
 
1343
        if (le64toh(o->entry.realtime) == needle)
 
1344
                return TEST_FOUND;
 
1345
        else if (le64toh(o->entry.realtime) < needle)
 
1346
                return TEST_LEFT;
 
1347
        else
 
1348
                return TEST_RIGHT;
 
1349
}
 
1350
 
 
1351
int journal_file_move_to_entry_by_realtime(
 
1352
                JournalFile *f,
 
1353
                uint64_t realtime,
 
1354
                direction_t direction,
 
1355
                Object **ret,
 
1356
                uint64_t *offset) {
 
1357
 
 
1358
        return generic_array_bisect(f,
 
1359
                                    le64toh(f->header->entry_array_offset),
 
1360
                                    le64toh(f->header->n_entries),
 
1361
                                    realtime,
 
1362
                                    test_object_realtime,
 
1363
                                    direction,
 
1364
                                    ret, offset, NULL);
 
1365
}
 
1366
 
 
1367
static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
 
1368
        Object *o;
 
1369
        int r;
 
1370
 
 
1371
        assert(f);
 
1372
        assert(p > 0);
 
1373
 
 
1374
        r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
 
1375
        if (r < 0)
 
1376
                return r;
 
1377
 
 
1378
        if (le64toh(o->entry.monotonic) == needle)
 
1379
                return TEST_FOUND;
 
1380
        else if (le64toh(o->entry.monotonic) < needle)
 
1381
                return TEST_LEFT;
 
1382
        else
 
1383
                return TEST_RIGHT;
 
1384
}
 
1385
 
 
1386
int journal_file_move_to_entry_by_monotonic(
 
1387
                JournalFile *f,
 
1388
                sd_id128_t boot_id,
 
1389
                uint64_t monotonic,
 
1390
                direction_t direction,
 
1391
                Object **ret,
 
1392
                uint64_t *offset) {
 
1393
 
 
1394
        char t[8+32+1] = "_BOOT_ID=";
 
1395
        Object *o;
 
1396
        int r;
 
1397
 
 
1398
        sd_id128_to_string(boot_id, t + 8);
 
1399
 
 
1400
        r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
 
1401
        if (r < 0)
 
1402
                return r;
 
1403
        else if (r == 0)
 
1404
                return -ENOENT;
 
1405
 
 
1406
        return generic_array_bisect_plus_one(f,
 
1407
                                             le64toh(o->data.entry_offset),
 
1408
                                             le64toh(o->data.entry_array_offset),
 
1409
                                             le64toh(o->data.n_entries),
 
1410
                                             monotonic,
 
1411
                                             test_object_monotonic,
 
1412
                                             direction,
 
1413
                                             ret, offset, NULL);
 
1414
}
 
1415
 
 
1416
static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
 
1417
        assert(f);
 
1418
        assert(p > 0);
 
1419
 
 
1420
        if (p == needle)
 
1421
                return TEST_FOUND;
 
1422
        else if (p < needle)
 
1423
                return TEST_LEFT;
 
1424
        else
 
1425
                return TEST_RIGHT;
 
1426
}
 
1427
 
 
1428
int journal_file_next_entry(
 
1429
                JournalFile *f,
 
1430
                Object *o, uint64_t p,
 
1431
                direction_t direction,
 
1432
                Object **ret, uint64_t *offset) {
 
1433
 
 
1434
        uint64_t i, n;
 
1435
        int r;
 
1436
 
 
1437
        assert(f);
 
1438
        assert(p > 0 || !o);
 
1439
 
 
1440
        n = le64toh(f->header->n_entries);
 
1441
        if (n <= 0)
 
1442
                return 0;
 
1443
 
 
1444
        if (!o)
 
1445
                i = direction == DIRECTION_DOWN ? 0 : n - 1;
 
1446
        else {
 
1447
                if (o->object.type != OBJECT_ENTRY)
 
1448
                        return -EINVAL;
 
1449
 
 
1450
                r = generic_array_bisect(f,
 
1451
                                         le64toh(f->header->entry_array_offset),
 
1452
                                         le64toh(f->header->n_entries),
 
1453
                                         p,
 
1454
                                         test_object_offset,
 
1455
                                         DIRECTION_DOWN,
 
1456
                                         NULL, NULL,
 
1457
                                         &i);
 
1458
                if (r <= 0)
 
1459
                        return r;
 
1460
 
 
1461
                if (direction == DIRECTION_DOWN) {
 
1462
                        if (i >= n - 1)
 
1463
                                return 0;
 
1464
 
 
1465
                        i++;
 
1466
                } else {
 
1467
                        if (i <= 0)
 
1468
                                return 0;
 
1469
 
 
1470
                        i--;
 
1471
                }
 
1472
        }
 
1473
 
 
1474
        /* And jump to it */
 
1475
        return generic_array_get(f,
 
1476
                                 le64toh(f->header->entry_array_offset),
 
1477
                                 i,
 
1478
                                 ret, offset);
 
1479
}
 
1480
 
 
1481
int journal_file_skip_entry(
 
1482
                JournalFile *f,
 
1483
                Object *o, uint64_t p,
 
1484
                int64_t skip,
 
1485
                Object **ret, uint64_t *offset) {
 
1486
 
 
1487
        uint64_t i, n;
 
1488
        int r;
 
1489
 
 
1490
        assert(f);
 
1491
        assert(o);
 
1492
        assert(p > 0);
 
1493
 
 
1494
        if (o->object.type != OBJECT_ENTRY)
 
1495
                return -EINVAL;
 
1496
 
 
1497
        r = generic_array_bisect(f,
 
1498
                                 le64toh(f->header->entry_array_offset),
 
1499
                                 le64toh(f->header->n_entries),
 
1500
                                 p,
 
1501
                                 test_object_offset,
 
1502
                                 DIRECTION_DOWN,
 
1503
                                 NULL, NULL,
 
1504
                                 &i);
 
1505
        if (r <= 0)
 
1506
                return r;
 
1507
 
 
1508
        /* Calculate new index */
 
1509
        if (skip < 0) {
 
1510
                if ((uint64_t) -skip >= i)
 
1511
                        i = 0;
 
1512
                else
 
1513
                        i = i - (uint64_t) -skip;
 
1514
        } else
 
1515
                i  += (uint64_t) skip;
 
1516
 
 
1517
        n = le64toh(f->header->n_entries);
 
1518
        if (n <= 0)
 
1519
                return -EBADMSG;
 
1520
 
 
1521
        if (i >= n)
 
1522
                i = n-1;
 
1523
 
 
1524
        return generic_array_get(f,
 
1525
                                 le64toh(f->header->entry_array_offset),
 
1526
                                 i,
 
1527
                                 ret, offset);
 
1528
}
 
1529
 
 
1530
int journal_file_next_entry_for_data(
 
1531
                JournalFile *f,
 
1532
                Object *o, uint64_t p,
 
1533
                uint64_t data_offset,
 
1534
                direction_t direction,
 
1535
                Object **ret, uint64_t *offset) {
 
1536
 
 
1537
        uint64_t n, i;
 
1538
        int r;
 
1539
        Object *d;
 
1540
 
 
1541
        assert(f);
 
1542
        assert(p > 0 || !o);
 
1543
 
 
1544
        r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
 
1545
        if (r < 0)
 
1546
                return r;
 
1547
 
 
1548
        n = le64toh(d->data.n_entries);
 
1549
        if (n <= 0)
 
1550
                return n;
 
1551
 
 
1552
        if (!o)
 
1553
                i = direction == DIRECTION_DOWN ? 0 : n - 1;
 
1554
        else {
 
1555
                if (o->object.type != OBJECT_ENTRY)
 
1556
                        return -EINVAL;
 
1557
 
 
1558
                r = generic_array_bisect_plus_one(f,
 
1559
                                                  le64toh(d->data.entry_offset),
 
1560
                                                  le64toh(d->data.entry_array_offset),
 
1561
                                                  le64toh(d->data.n_entries),
 
1562
                                                  p,
 
1563
                                                  test_object_offset,
 
1564
                                                  DIRECTION_DOWN,
 
1565
                                                  NULL, NULL,
 
1566
                                                  &i);
 
1567
 
 
1568
                if (r <= 0)
 
1569
                        return r;
 
1570
 
 
1571
                if (direction == DIRECTION_DOWN) {
 
1572
                        if (i >= n - 1)
 
1573
                                return 0;
 
1574
 
 
1575
                        i++;
 
1576
                } else {
 
1577
                        if (i <= 0)
 
1578
                                return 0;
 
1579
 
 
1580
                        i--;
 
1581
                }
 
1582
 
 
1583
        }
 
1584
 
 
1585
        return generic_array_get_plus_one(f,
 
1586
                                          le64toh(d->data.entry_offset),
 
1587
                                          le64toh(d->data.entry_array_offset),
 
1588
                                          i,
 
1589
                                          ret, offset);
 
1590
}
 
1591
 
 
1592
int journal_file_move_to_entry_by_seqnum_for_data(
 
1593
                JournalFile *f,
 
1594
                uint64_t data_offset,
 
1595
                uint64_t seqnum,
 
1596
                direction_t direction,
 
1597
                Object **ret, uint64_t *offset) {
 
1598
 
 
1599
        Object *d;
 
1600
        int r;
 
1601
 
 
1602
        r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
 
1603
        if (r <= 0)
 
1604
                return r;
 
1605
 
 
1606
        return generic_array_bisect_plus_one(f,
 
1607
                                             le64toh(d->data.entry_offset),
 
1608
                                             le64toh(d->data.entry_array_offset),
 
1609
                                             le64toh(d->data.n_entries),
 
1610
                                             seqnum,
 
1611
                                             test_object_seqnum,
 
1612
                                             direction,
 
1613
                                             ret, offset, NULL);
 
1614
}
 
1615
 
 
1616
int journal_file_move_to_entry_by_realtime_for_data(
 
1617
                JournalFile *f,
 
1618
                uint64_t data_offset,
 
1619
                uint64_t realtime,
 
1620
                direction_t direction,
 
1621
                Object **ret, uint64_t *offset) {
 
1622
 
 
1623
        Object *d;
 
1624
        int r;
 
1625
 
 
1626
        r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
 
1627
        if (r <= 0)
 
1628
                return r;
 
1629
 
 
1630
        return generic_array_bisect_plus_one(f,
 
1631
                                             le64toh(d->data.entry_offset),
 
1632
                                             le64toh(d->data.entry_array_offset),
 
1633
                                             le64toh(d->data.n_entries),
 
1634
                                             realtime,
 
1635
                                             test_object_realtime,
 
1636
                                             direction,
 
1637
                                             ret, offset, NULL);
 
1638
}
 
1639
 
 
1640
void journal_file_dump(JournalFile *f) {
 
1641
        char a[33], b[33], c[33];
 
1642
        Object *o;
 
1643
        int r;
 
1644
        uint64_t p;
 
1645
 
 
1646
        assert(f);
 
1647
 
 
1648
        printf("File Path: %s\n"
 
1649
               "File ID: %s\n"
 
1650
               "Machine ID: %s\n"
 
1651
               "Boot ID: %s\n"
 
1652
               "Arena size: %llu\n"
 
1653
               "Objects: %lu\n"
 
1654
               "Entries: %lu\n",
 
1655
               f->path,
 
1656
               sd_id128_to_string(f->header->file_id, a),
 
1657
               sd_id128_to_string(f->header->machine_id, b),
 
1658
               sd_id128_to_string(f->header->boot_id, c),
 
1659
               (unsigned long long) le64toh(f->header->arena_size),
 
1660
               (unsigned long) le64toh(f->header->n_objects),
 
1661
               (unsigned long) le64toh(f->header->n_entries));
 
1662
 
 
1663
        p = le64toh(f->header->arena_offset);
 
1664
        while (p != 0) {
 
1665
                r = journal_file_move_to_object(f, -1, p, &o);
 
1666
                if (r < 0)
 
1667
                        goto fail;
 
1668
 
 
1669
                switch (o->object.type) {
 
1670
 
 
1671
                case OBJECT_UNUSED:
 
1672
                        printf("Type: OBJECT_UNUSED\n");
 
1673
                        break;
 
1674
 
 
1675
                case OBJECT_DATA:
 
1676
                        printf("Type: OBJECT_DATA\n");
 
1677
                        break;
 
1678
 
 
1679
                case OBJECT_ENTRY:
 
1680
                        printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
 
1681
                               (unsigned long long) le64toh(o->entry.seqnum),
 
1682
                               (unsigned long long) le64toh(o->entry.monotonic),
 
1683
                               (unsigned long long) le64toh(o->entry.realtime));
 
1684
                        break;
 
1685
 
 
1686
                case OBJECT_FIELD_HASH_TABLE:
 
1687
                        printf("Type: OBJECT_FIELD_HASH_TABLE\n");
 
1688
                        break;
 
1689
 
 
1690
                case OBJECT_DATA_HASH_TABLE:
 
1691
                        printf("Type: OBJECT_DATA_HASH_TABLE\n");
 
1692
                        break;
 
1693
 
 
1694
                case OBJECT_ENTRY_ARRAY:
 
1695
                        printf("Type: OBJECT_ENTRY_ARRAY\n");
 
1696
                        break;
 
1697
                }
 
1698
 
 
1699
                if (o->object.flags & OBJECT_COMPRESSED)
 
1700
                        printf("Flags: COMPRESSED\n");
 
1701
 
 
1702
                if (p == le64toh(f->header->tail_object_offset))
 
1703
                        p = 0;
 
1704
                else
 
1705
                        p = p + ALIGN64(le64toh(o->object.size));
 
1706
        }
 
1707
 
 
1708
        return;
 
1709
fail:
 
1710
        log_error("File corrupt");
 
1711
}
 
1712
 
 
1713
int journal_file_open(
 
1714
                const char *fname,
 
1715
                int flags,
 
1716
                mode_t mode,
 
1717
                JournalFile *template,
 
1718
                JournalFile **ret) {
 
1719
 
 
1720
        JournalFile *f;
 
1721
        int r;
 
1722
        bool newly_created = false;
 
1723
 
 
1724
        assert(fname);
 
1725
 
 
1726
        if ((flags & O_ACCMODE) != O_RDONLY &&
 
1727
            (flags & O_ACCMODE) != O_RDWR)
 
1728
                return -EINVAL;
 
1729
 
 
1730
        if (!endswith(fname, ".journal"))
 
1731
                return -EINVAL;
 
1732
 
 
1733
        f = new0(JournalFile, 1);
 
1734
        if (!f)
 
1735
                return -ENOMEM;
 
1736
 
 
1737
        f->fd = -1;
 
1738
        f->flags = flags;
 
1739
        f->mode = mode;
 
1740
        f->writable = (flags & O_ACCMODE) != O_RDONLY;
 
1741
        f->prot = prot_from_flags(flags);
 
1742
 
 
1743
        if (template) {
 
1744
                f->metrics = template->metrics;
 
1745
                f->compress = template->compress;
 
1746
        }
 
1747
 
 
1748
        f->path = strdup(fname);
 
1749
        if (!f->path) {
 
1750
                r = -ENOMEM;
 
1751
                goto fail;
 
1752
        }
 
1753
 
 
1754
        f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
 
1755
        if (f->fd < 0) {
 
1756
                r = -errno;
 
1757
                goto fail;
 
1758
        }
 
1759
 
 
1760
        if (fstat(f->fd, &f->last_stat) < 0) {
 
1761
                r = -errno;
 
1762
                goto fail;
 
1763
        }
 
1764
 
 
1765
        if (f->last_stat.st_size == 0 && f->writable) {
 
1766
                newly_created = true;
 
1767
 
 
1768
                r = journal_file_init_header(f, template);
 
1769
                if (r < 0)
 
1770
                        goto fail;
 
1771
 
 
1772
                if (fstat(f->fd, &f->last_stat) < 0) {
 
1773
                        r = -errno;
 
1774
                        goto fail;
 
1775
                }
 
1776
        }
 
1777
 
 
1778
        if (f->last_stat.st_size < (off_t) sizeof(Header)) {
 
1779
                r = -EIO;
 
1780
                goto fail;
 
1781
        }
 
1782
 
 
1783
        f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
 
1784
        if (f->header == MAP_FAILED) {
 
1785
                f->header = NULL;
 
1786
                r = -errno;
 
1787
                goto fail;
 
1788
        }
 
1789
 
 
1790
        if (!newly_created) {
 
1791
                r = journal_file_verify_header(f);
 
1792
                if (r < 0)
 
1793
                        goto fail;
 
1794
        }
 
1795
 
 
1796
        if (f->writable) {
 
1797
                r = journal_file_refresh_header(f);
 
1798
                if (r < 0)
 
1799
                        goto fail;
 
1800
        }
 
1801
 
 
1802
        if (newly_created) {
 
1803
 
 
1804
                r = journal_file_setup_field_hash_table(f);
 
1805
                if (r < 0)
 
1806
                        goto fail;
 
1807
 
 
1808
                r = journal_file_setup_data_hash_table(f);
 
1809
                if (r < 0)
 
1810
                        goto fail;
 
1811
        }
 
1812
 
 
1813
        r = journal_file_map_field_hash_table(f);
 
1814
        if (r < 0)
 
1815
                goto fail;
 
1816
 
 
1817
        r = journal_file_map_data_hash_table(f);
 
1818
        if (r < 0)
 
1819
                goto fail;
 
1820
 
 
1821
        if (ret)
 
1822
                *ret = f;
 
1823
 
 
1824
        return 0;
 
1825
 
 
1826
fail:
 
1827
        journal_file_close(f);
 
1828
 
 
1829
        return r;
 
1830
}
 
1831
 
 
1832
int journal_file_rotate(JournalFile **f) {
 
1833
        char *p;
 
1834
        size_t l;
 
1835
        JournalFile *old_file, *new_file = NULL;
 
1836
        int r;
 
1837
 
 
1838
        assert(f);
 
1839
        assert(*f);
 
1840
 
 
1841
        old_file = *f;
 
1842
 
 
1843
        if (!old_file->writable)
 
1844
                return -EINVAL;
 
1845
 
 
1846
        if (!endswith(old_file->path, ".journal"))
 
1847
                return -EINVAL;
 
1848
 
 
1849
        l = strlen(old_file->path);
 
1850
 
 
1851
        p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
 
1852
        if (!p)
 
1853
                return -ENOMEM;
 
1854
 
 
1855
        memcpy(p, old_file->path, l - 8);
 
1856
        p[l-8] = '@';
 
1857
        sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
 
1858
        snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
 
1859
                 "-%016llx-%016llx.journal",
 
1860
                 (unsigned long long) le64toh((*f)->header->seqnum),
 
1861
                 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
 
1862
 
 
1863
        r = rename(old_file->path, p);
 
1864
        free(p);
 
1865
 
 
1866
        if (r < 0)
 
1867
                return -errno;
 
1868
 
 
1869
        old_file->header->state = STATE_ARCHIVED;
 
1870
 
 
1871
        r = journal_file_open(old_file->path, old_file->flags, old_file->mode, old_file, &new_file);
 
1872
        journal_file_close(old_file);
 
1873
 
 
1874
        *f = new_file;
 
1875
        return r;
 
1876
}
 
1877
 
 
1878
int journal_file_open_reliably(
 
1879
                const char *fname,
 
1880
                int flags,
 
1881
                mode_t mode,
 
1882
                JournalFile *template,
 
1883
                JournalFile **ret) {
 
1884
 
 
1885
        int r;
 
1886
        size_t l;
 
1887
        char *p;
 
1888
 
 
1889
        r = journal_file_open(fname, flags, mode, template, ret);
 
1890
        if (r != -EBADMSG)
 
1891
                return r;
 
1892
 
 
1893
        if ((flags & O_ACCMODE) == O_RDONLY)
 
1894
                return r;
 
1895
 
 
1896
        if (!(flags & O_CREAT))
 
1897
                return r;
 
1898
 
 
1899
        /* The file is corrupted. Rotate it away and try it again (but only once) */
 
1900
 
 
1901
        l = strlen(fname);
 
1902
        if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
 
1903
                     (int) (l-8), fname,
 
1904
                     (unsigned long long) now(CLOCK_REALTIME),
 
1905
                     random_ull()) < 0)
 
1906
                return -ENOMEM;
 
1907
 
 
1908
        r = rename(fname, p);
 
1909
        free(p);
 
1910
        if (r < 0)
 
1911
                return -errno;
 
1912
 
 
1913
        log_warning("File %s corrupted, renaming and replacing.", fname);
 
1914
 
 
1915
        return journal_file_open(fname, flags, mode, template, ret);
 
1916
}
 
1917
 
 
1918
struct vacuum_info {
 
1919
        off_t usage;
 
1920
        char *filename;
 
1921
 
 
1922
        uint64_t realtime;
 
1923
        sd_id128_t seqnum_id;
 
1924
        uint64_t seqnum;
 
1925
 
 
1926
        bool have_seqnum;
 
1927
};
 
1928
 
 
1929
static int vacuum_compare(const void *_a, const void *_b) {
 
1930
        const struct vacuum_info *a, *b;
 
1931
 
 
1932
        a = _a;
 
1933
        b = _b;
 
1934
 
 
1935
        if (a->have_seqnum && b->have_seqnum &&
 
1936
            sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
 
1937
                if (a->seqnum < b->seqnum)
 
1938
                        return -1;
 
1939
                else if (a->seqnum > b->seqnum)
 
1940
                        return 1;
 
1941
                else
 
1942
                        return 0;
 
1943
        }
 
1944
 
 
1945
        if (a->realtime < b->realtime)
 
1946
                return -1;
 
1947
        else if (a->realtime > b->realtime)
 
1948
                return 1;
 
1949
        else if (a->have_seqnum && b->have_seqnum)
 
1950
                return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
 
1951
        else
 
1952
                return strcmp(a->filename, b->filename);
 
1953
}
 
1954
 
 
1955
int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
 
1956
        DIR *d;
 
1957
        int r = 0;
 
1958
        struct vacuum_info *list = NULL;
 
1959
        unsigned n_list = 0, n_allocated = 0, i;
 
1960
        uint64_t sum = 0;
 
1961
 
 
1962
        assert(directory);
 
1963
 
 
1964
        if (max_use <= 0)
 
1965
                return 0;
 
1966
 
 
1967
        d = opendir(directory);
 
1968
        if (!d)
 
1969
                return -errno;
 
1970
 
 
1971
        for (;;) {
 
1972
                int k;
 
1973
                struct dirent buf, *de;
 
1974
                size_t q;
 
1975
                struct stat st;
 
1976
                char *p;
 
1977
                unsigned long long seqnum, realtime;
 
1978
                sd_id128_t seqnum_id;
 
1979
                bool have_seqnum;
 
1980
 
 
1981
                k = readdir_r(d, &buf, &de);
 
1982
                if (k != 0) {
 
1983
                        r = -k;
 
1984
                        goto finish;
 
1985
                }
 
1986
 
 
1987
                if (!de)
 
1988
                        break;
 
1989
 
 
1990
                if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
 
1991
                        continue;
 
1992
 
 
1993
                if (!S_ISREG(st.st_mode))
 
1994
                        continue;
 
1995
 
 
1996
                q = strlen(de->d_name);
 
1997
 
 
1998
                if (endswith(de->d_name, ".journal")) {
 
1999
 
 
2000
                        /* Vacuum archived files */
 
2001
 
 
2002
                        if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
 
2003
                                continue;
 
2004
 
 
2005
                        if (de->d_name[q-8-16-1] != '-' ||
 
2006
                            de->d_name[q-8-16-1-16-1] != '-' ||
 
2007
                            de->d_name[q-8-16-1-16-1-32-1] != '@')
 
2008
                                continue;
 
2009
 
 
2010
                        p = strdup(de->d_name);
 
2011
                        if (!p) {
 
2012
                                r = -ENOMEM;
 
2013
                                goto finish;
 
2014
                        }
 
2015
 
 
2016
                        de->d_name[q-8-16-1-16-1] = 0;
 
2017
                        if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
 
2018
                                free(p);
 
2019
                                continue;
 
2020
                        }
 
2021
 
 
2022
                        if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
 
2023
                                free(p);
 
2024
                                continue;
 
2025
                        }
 
2026
 
 
2027
                        have_seqnum = true;
 
2028
 
 
2029
                } else if (endswith(de->d_name, ".journal~")) {
 
2030
                        unsigned long long tmp;
 
2031
 
 
2032
                        /* Vacuum corrupted files */
 
2033
 
 
2034
                        if (q < 1 + 16 + 1 + 16 + 8 + 1)
 
2035
                                continue;
 
2036
 
 
2037
                        if (de->d_name[q-1-8-16-1] != '-' ||
 
2038
                            de->d_name[q-1-8-16-1-16-1] != '@')
 
2039
                                continue;
 
2040
 
 
2041
                        p = strdup(de->d_name);
 
2042
                        if (!p) {
 
2043
                                r = -ENOMEM;
 
2044
                                goto finish;
 
2045
                        }
 
2046
 
 
2047
                        if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
 
2048
                                free(p);
 
2049
                                continue;
 
2050
                        }
 
2051
 
 
2052
                        have_seqnum = false;
 
2053
                } else
 
2054
                        continue;
 
2055
 
 
2056
                if (n_list >= n_allocated) {
 
2057
                        struct vacuum_info *j;
 
2058
 
 
2059
                        n_allocated = MAX(n_allocated * 2U, 8U);
 
2060
                        j = realloc(list, n_allocated * sizeof(struct vacuum_info));
 
2061
                        if (!j) {
 
2062
                                free(p);
 
2063
                                r = -ENOMEM;
 
2064
                                goto finish;
 
2065
                        }
 
2066
 
 
2067
                        list = j;
 
2068
                }
 
2069
 
 
2070
                list[n_list].filename = p;
 
2071
                list[n_list].usage = 512UL * (uint64_t) st.st_blocks;
 
2072
                list[n_list].seqnum = seqnum;
 
2073
                list[n_list].realtime = realtime;
 
2074
                list[n_list].seqnum_id = seqnum_id;
 
2075
                list[n_list].have_seqnum = have_seqnum;
 
2076
 
 
2077
                sum += list[n_list].usage;
 
2078
 
 
2079
                n_list ++;
 
2080
        }
 
2081
 
 
2082
        qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
 
2083
 
 
2084
        for(i = 0; i < n_list; i++) {
 
2085
                struct statvfs ss;
 
2086
 
 
2087
                if (fstatvfs(dirfd(d), &ss) < 0) {
 
2088
                        r = -errno;
 
2089
                        goto finish;
 
2090
                }
 
2091
 
 
2092
                if (sum <= max_use &&
 
2093
                    (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
 
2094
                        break;
 
2095
 
 
2096
                if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
 
2097
                        log_info("Deleted archived journal %s/%s.", directory, list[i].filename);
 
2098
                        sum -= list[i].usage;
 
2099
                } else if (errno != ENOENT)
 
2100
                        log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
 
2101
        }
 
2102
 
 
2103
finish:
 
2104
        for (i = 0; i < n_list; i++)
 
2105
                free(list[i].filename);
 
2106
 
 
2107
        free(list);
 
2108
 
 
2109
        if (d)
 
2110
                closedir(d);
 
2111
 
 
2112
        return r;
 
2113
}
 
2114
 
 
2115
int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
 
2116
        uint64_t i, n;
 
2117
        uint64_t q, xor_hash = 0;
 
2118
        int r;
 
2119
        EntryItem *items;
 
2120
        dual_timestamp ts;
 
2121
 
 
2122
        assert(from);
 
2123
        assert(to);
 
2124
        assert(o);
 
2125
        assert(p);
 
2126
 
 
2127
        if (!to->writable)
 
2128
                return -EPERM;
 
2129
 
 
2130
        ts.monotonic = le64toh(o->entry.monotonic);
 
2131
        ts.realtime = le64toh(o->entry.realtime);
 
2132
 
 
2133
        if (to->tail_entry_monotonic_valid &&
 
2134
            ts.monotonic < le64toh(to->header->tail_entry_monotonic))
 
2135
                return -EINVAL;
 
2136
 
 
2137
        if (ts.realtime < le64toh(to->header->tail_entry_realtime))
 
2138
                return -EINVAL;
 
2139
 
 
2140
        n = journal_file_entry_n_items(o);
 
2141
        items = alloca(sizeof(EntryItem) * n);
 
2142
 
 
2143
        for (i = 0; i < n; i++) {
 
2144
                uint64_t le_hash, l, h;
 
2145
                size_t t;
 
2146
                void *data;
 
2147
                Object *u;
 
2148
 
 
2149
                q = le64toh(o->entry.items[i].object_offset);
 
2150
                le_hash = o->entry.items[i].hash;
 
2151
 
 
2152
                r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
 
2153
                if (r < 0)
 
2154
                        return r;
 
2155
 
 
2156
                if (le_hash != o->data.hash)
 
2157
                        return -EBADMSG;
 
2158
 
 
2159
                l = le64toh(o->object.size) - offsetof(Object, data.payload);
 
2160
                t = (size_t) l;
 
2161
 
 
2162
                /* We hit the limit on 32bit machines */
 
2163
                if ((uint64_t) t != l)
 
2164
                        return -E2BIG;
 
2165
 
 
2166
                if (o->object.flags & OBJECT_COMPRESSED) {
 
2167
#ifdef HAVE_XZ
 
2168
                        uint64_t rsize;
 
2169
 
 
2170
                        if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
 
2171
                                return -EBADMSG;
 
2172
 
 
2173
                        data = from->compress_buffer;
 
2174
                        l = rsize;
 
2175
#else
 
2176
                        return -EPROTONOSUPPORT;
 
2177
#endif
 
2178
                } else
 
2179
                        data = o->data.payload;
 
2180
 
 
2181
                r = journal_file_append_data(to, data, l, &u, &h);
 
2182
                if (r < 0)
 
2183
                        return r;
 
2184
 
 
2185
                xor_hash ^= le64toh(u->data.hash);
 
2186
                items[i].object_offset = htole64(h);
 
2187
                items[i].hash = u->data.hash;
 
2188
 
 
2189
                r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
 
2190
                if (r < 0)
 
2191
                        return r;
 
2192
        }
 
2193
 
 
2194
        return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
 
2195
}
 
2196
 
 
2197
void journal_default_metrics(JournalMetrics *m, int fd) {
 
2198
        uint64_t fs_size = 0;
 
2199
        struct statvfs ss;
 
2200
        char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
 
2201
 
 
2202
        assert(m);
 
2203
        assert(fd >= 0);
 
2204
 
 
2205
        if (fstatvfs(fd, &ss) >= 0)
 
2206
                fs_size = ss.f_frsize * ss.f_blocks;
 
2207
 
 
2208
        if (m->max_use == (uint64_t) -1) {
 
2209
 
 
2210
                if (fs_size > 0) {
 
2211
                        m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
 
2212
 
 
2213
                        if (m->max_use > DEFAULT_MAX_USE_UPPER)
 
2214
                                m->max_use = DEFAULT_MAX_USE_UPPER;
 
2215
 
 
2216
                        if (m->max_use < DEFAULT_MAX_USE_LOWER)
 
2217
                                m->max_use = DEFAULT_MAX_USE_LOWER;
 
2218
                } else
 
2219
                        m->max_use = DEFAULT_MAX_USE_LOWER;
 
2220
        } else {
 
2221
                m->max_use = PAGE_ALIGN(m->max_use);
 
2222
 
 
2223
                if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
 
2224
                        m->max_use = JOURNAL_FILE_SIZE_MIN*2;
 
2225
        }
 
2226
 
 
2227
        if (m->max_size == (uint64_t) -1) {
 
2228
                m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
 
2229
 
 
2230
                if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
 
2231
                        m->max_size = DEFAULT_MAX_SIZE_UPPER;
 
2232
        } else
 
2233
                m->max_size = PAGE_ALIGN(m->max_size);
 
2234
 
 
2235
        if (m->max_size < JOURNAL_FILE_SIZE_MIN)
 
2236
                m->max_size = JOURNAL_FILE_SIZE_MIN;
 
2237
 
 
2238
        if (m->max_size*2 > m->max_use)
 
2239
                m->max_use = m->max_size*2;
 
2240
 
 
2241
        if (m->min_size == (uint64_t) -1)
 
2242
                m->min_size = JOURNAL_FILE_SIZE_MIN;
 
2243
        else {
 
2244
                m->min_size = PAGE_ALIGN(m->min_size);
 
2245
 
 
2246
                if (m->min_size < JOURNAL_FILE_SIZE_MIN)
 
2247
                        m->min_size = JOURNAL_FILE_SIZE_MIN;
 
2248
 
 
2249
                if (m->min_size > m->max_size)
 
2250
                        m->max_size = m->min_size;
 
2251
        }
 
2252
 
 
2253
        if (m->keep_free == (uint64_t) -1) {
 
2254
 
 
2255
                if (fs_size > 0) {
 
2256
                        m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
 
2257
 
 
2258
                        if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
 
2259
                                m->keep_free = DEFAULT_KEEP_FREE_UPPER;
 
2260
 
 
2261
                } else
 
2262
                        m->keep_free = DEFAULT_KEEP_FREE;
 
2263
        }
 
2264
 
 
2265
        log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
 
2266
                 format_bytes(a, sizeof(a), m->max_use),
 
2267
                 format_bytes(b, sizeof(b), m->max_size),
 
2268
                 format_bytes(c, sizeof(c), m->min_size),
 
2269
                 format_bytes(d, sizeof(d), m->keep_free));
 
2270
}