1
/* fsfs-reorg.c -- prototypic tool to reorganize packed FSFS repositories
4
* ====================================================================
5
* Licensed to the Apache Software Foundation (ASF) under one
6
* or more contributor license agreements. See the NOTICE file
7
* distributed with this work for additional information
8
* regarding copyright ownership. The ASF licenses this file
9
* to you under the Apache License, Version 2.0 (the
10
* "License"); you may not use this file except in compliance
11
* with the License. You may obtain a copy of the License at
13
* http://www.apache.org/licenses/LICENSE-2.0
15
* Unless required by applicable law or agreed to in writing,
16
* software distributed under the License is distributed on an
17
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18
* KIND, either express or implied. See the License for the
19
* specific language governing permissions and limitations
21
* ====================================================================
28
#include <apr_general.h>
29
#include <apr_file_io.h>
32
#include "svn_pools.h"
36
#include "svn_dirent_uri.h"
37
#include "svn_sorts.h"
38
#include "svn_delta.h"
41
#include "private/svn_string_private.h"
42
#include "private/svn_subr_private.h"
43
#include "private/svn_dep_compat.h"
49
#define ERROR_TAG "fsfs-reporg: "
51
/* forward declarations */
52
typedef struct noderev_t noderev_t;
53
typedef struct revision_info_t revision_info_t;
55
/* A FSFS rev file is sequence of fragments and unused space (the latter
56
* only being inserted by this tool and not during ordinary SVN operation).
58
* This type defines the type of any fragment.
60
* Please note that the classification as "property", "dir" or "file"
61
* fragments is only to be used while determining the future placement
62
* of a representation. If the rep is shared, the same rep may be used
63
* as *any* of the 3 kinds.
67
/* the 2 number line containing changes and root node offsets */
70
/* list of all changes in a revision */
73
/* (the textual representation of) a noderev */
76
/* a property rep (including PLAIN / DELTA header) */
79
/* a directory rep (including PLAIN / DELTA header) */
82
/* a file rep (including PLAIN / DELTA header) */
86
/* A fragment. This is used to represent the final ordering, i.e. there
87
* will be an array containing elements of this type that basically put
88
* a fragment at some location in the target file.
90
typedef struct fragment_t
92
/* position in the target file */
95
/* kind of fragment */
96
enum fragment_kind_t kind;
98
/* pointer to the fragment struct; type depends on KIND */
102
/* Location info for a single revision.
104
typedef struct revision_location_t
106
/* pack file offset (manifest value), 0 for non-packed files */
109
/* offset of the changes list relative to OFFSET */
112
/* length of the changes list on bytes */
113
apr_size_t changes_len;
115
/* first offset behind the revision data in the pack file (file length
116
* for non-packed revs) */
118
} revision_location_t;
120
/* Absolute position and size of some item.
122
typedef struct location_t
124
/* absolute offset in the file */
127
/* item length in bytes */
131
/* A parsed directory entry. Note that instances of this struct may be
132
* shared between different DIRECTORY_T containers.
134
typedef struct direntry_t
136
/* (local) entry / path name */
142
/* node rev providing ID and representation(s) */
146
/* Representation of a parsed directory content.
148
typedef struct directory_t
150
/* array of pointers to DIRENTRY_T */
151
apr_array_header_t *entries;
153
/* MD5 of the textual representation. Will be set lazily as a side-effect
154
* of determining the length of this dir's textual representation. */
155
unsigned char target_md5[16];
157
/* (expanded) length of the textual representation.
158
* Determined lazily during the write process. */
162
/* A representation fragment.
164
typedef struct representation_t
166
/* location in the source file */
169
/* location in the reordered target file */
172
/* length of the PLAIN / DELTA line in the source file in bytes */
173
apr_size_t header_size;
175
/* deltification base, or NULL if there is none */
176
struct representation_t *delta_base;
178
/* revision that contains this representation
179
* (may be referenced by other revisions, though) */
180
revision_info_t *revision;
182
/* representation content parsed as a directory. This will be NULL, if
183
* *no* directory noderev uses this representation. */
186
/* the source content has a PLAIN header, so we may simply copy the
187
* source content into the target */
188
svn_boolean_t is_plain;
190
/* coloring flag used in the reordering algorithm to keep track of
191
* representations that still need to be placed. */
192
svn_boolean_t covered;
199
/* location within the source file */
202
/* location within the reorganized target file. */
205
/* predecessor node, or NULL if there is none */
206
noderev_t *predecessor;
208
/* content representation; may be NULL if there is none */
209
representation_t *text;
211
/* properties representation; may be NULL if there is none */
212
representation_t *props;
214
/* revision that this noderev belongs to */
215
revision_info_t *revision;
217
/* coloring flag used in the reordering algorithm to keep track of
218
* representations that still need to be placed. */
219
svn_boolean_t covered;
222
/* Represents a single revision.
223
* There will be only one instance per revision. */
224
struct revision_info_t
226
/* number of this revision */
227
svn_revnum_t revision;
229
/* position in the source file */
230
revision_location_t original;
232
/* position in the reorganized target file */
233
revision_location_t target;
235
/* noderev of the root directory */
236
noderev_t *root_noderev;
238
/* all noderevs_t of this revision (ordered by source file offset),
239
* i.e. those that point back to this struct */
240
apr_array_header_t *node_revs;
242
/* all representation_t of this revision (ordered by source file offset),
243
* i.e. those that point back to this struct */
244
apr_array_header_t *representations;
247
/* Represents a packed revision file.
249
typedef struct revision_pack_t
251
/* first revision in the pack file */
254
/* revision_info_t* of all revisions in the pack file; in revision order. */
255
apr_array_header_t *info;
257
/* list of fragments to place in the target pack file; in target order. */
258
apr_array_header_t *fragments;
260
/* source pack file length */
263
/* temporary value. Equal to the number of bytes in the target pack file
264
* already allocated to fragments. */
265
apr_size_t target_offset;
268
/* Cache for revision source content. All content is stored in DATA and
269
* the HASH maps revision number to an svn_string_t instance whose data
270
* member points into DATA.
272
* Once TOTAL_SIZE exceeds LIMIT, all content will be discarded. Similarly,
273
* the hash gets cleared every 10000 insertions to keep the HASH_POOL
274
* memory usage in check.
276
typedef struct content_cache_t
278
/* pool used for HASH */
279
apr_pool_t *hash_pool;
281
/* svn_revnum_t -> svn_string_t.
282
* The strings become (potentially) invalid when adding new cache entries. */
285
/* data buffer. the first TOTAL_SIZE bytes are actually being used. */
291
/* number of bytes used in DATA */
292
apr_size_t total_size;
294
/* number of insertions since the last hash cleanup */
295
apr_size_t insert_count;
298
/* A cached directory. In contrast to directory_t, this stored the data as
299
* the plain hash that the normal FSFS will use to serialize & diff dirs.
301
typedef struct dir_cache_entry_t
303
/* revision containing the representation */
304
svn_revnum_t revision;
306
/* offset of the representation within that revision */
309
/* key-value representation of the directory entries */
313
/* Directory cache. (revision, offset) will be mapped directly into the
314
* ENTRIES array of ENTRY_COUNT buckets (many entries will be NULL).
315
* Two alternating pools will be used to allocate dir content.
317
* If the INSERT_COUNT exceeds a given limit, the pools get exchanged and
318
* the older of the two will be cleared. This is to keep dir objects valid
319
* for at least one insertion.
321
typedef struct dir_cache_t
323
/* fixed-size array of ENTRY_COUNT elements */
324
dir_cache_entry_t *entries;
326
/* currently used for entry allocations */
329
/* previously used for entry allocations */
332
/* size of ENTRIES in elements */
333
apr_size_t entry_count;
335
/* number of directory elements added. I.e. usually >> #cached dirs */
336
apr_size_t insert_count;
339
/* A cached, undeltified txdelta window.
341
typedef struct window_cache_entry_t
343
/* revision containing the window */
344
svn_revnum_t revision;
346
/* offset of the deltified window within that revision */
350
svn_stringbuf_t *window;
351
} window_cache_entry_t;
353
/* Cache for undeltified txdelta windows. (revision, offset) will be mapped
354
* directly into the ENTRIES array of INSERT_COUNT buckets (most entries
357
* The cache will be cleared when USED exceeds CAPACITY.
359
typedef struct window_cache_t
361
/* fixed-size array of ENTRY_COUNT elements */
362
window_cache_entry_t *entries;
364
/* used to allocate windows */
367
/* size of ENTRIES in elements */
368
apr_size_t entry_count;
370
/* maximum combined size of all cached windows */
373
/* current combined size of all cached windows */
377
/* Root data structure containing all information about a given repository.
379
typedef struct fs_fs_t
381
/* repository to reorg */
384
/* revision to start at (must be 0, ATM) */
385
svn_revnum_t start_revision;
387
/* FSFS format number */
390
/* highest revision number in the repo */
391
svn_revnum_t max_revision;
393
/* first non-packed revision */
394
svn_revnum_t min_unpacked_rev;
397
int max_files_per_dir;
400
apr_array_header_t *revisions;
402
/* all packed files */
403
apr_array_header_t *packs;
405
/* empty representation.
406
* Used as a dummy base for DELTA reps without base. */
407
representation_t *null_base;
409
/* revision content cache */
410
content_cache_t *cache;
412
/* directory hash cache */
413
dir_cache_t *dir_cache;
415
/* undeltified txdelta window cache */
416
window_cache_t *window_cache;
419
/* Return the rev pack folder for revision REV in FS.
422
get_pack_folder(fs_fs_t *fs,
426
return apr_psprintf(pool, "%s/db/revs/%ld.pack",
427
fs->path, rev / fs->max_files_per_dir);
430
/* Return the path of the file containing revision REV in FS.
433
rev_or_pack_file_name(fs_fs_t *fs,
437
return fs->min_unpacked_rev > rev
438
? svn_dirent_join(get_pack_folder(fs, rev, pool), "pack", pool)
439
: apr_psprintf(pool, "%s/db/revs/%ld/%ld", fs->path,
440
rev / fs->max_files_per_dir, rev);
443
/* Open the file containing revision REV in FS and return it in *FILE.
446
open_rev_or_pack_file(apr_file_t **file,
451
return svn_io_file_open(file,
452
rev_or_pack_file_name(fs, rev, pool),
453
APR_READ | APR_BUFFERED,
458
/* Read the whole content of the file containing REV in FS and return that
462
read_rev_or_pack_file(svn_stringbuf_t **content,
467
return svn_stringbuf_from_file2(content,
468
rev_or_pack_file_name(fs, rev, pool),
472
/* Return a new content cache with the given size LIMIT. Use POOL for
473
* all cache-related allocations.
475
static content_cache_t *
476
create_content_cache(apr_pool_t *pool,
479
content_cache_t *result = apr_pcalloc(pool, sizeof(*result));
481
result->hash_pool = svn_pool_create(pool);
482
result->hash = svn_hash__make(result->hash_pool);
483
result->limit = limit;
484
result->total_size = 0;
485
result->insert_count = 0;
486
result->data = apr_palloc(pool, limit);
491
/* Return the content of revision REVISION from CACHE. Return NULL upon a
492
* cache miss. This is a cache-internal function.
494
static svn_string_t *
495
get_cached_content(content_cache_t *cache,
496
svn_revnum_t revision)
498
return apr_hash_get(cache->hash, &revision, sizeof(revision));
501
/* Take the content in DATA and store it under REVISION in CACHE.
502
* This is a cache-internal function.
505
set_cached_content(content_cache_t *cache,
506
svn_revnum_t revision,
509
svn_string_t *content;
512
/* double insertion? -> broken cache logic */
513
assert(get_cached_content(cache, revision) == NULL);
515
/* purge the cache upon overflow */
516
if (cache->total_size + data->len > cache->limit)
518
/* the hash pool grows slowly over time; clear it once in a while */
519
if (cache->insert_count > 10000)
521
svn_pool_clear(cache->hash_pool);
522
cache->hash = svn_hash__make(cache->hash_pool);
523
cache->insert_count = 0;
526
cache->hash = svn_hash__make(cache->hash_pool);
528
cache->total_size = 0;
530
/* buffer overflow / revision too large */
531
if (data->len > cache->limit)
532
SVN_ERR_MALFUNCTION_NO_RETURN();
535
/* copy data to cache and update he index (hash) */
536
content = apr_palloc(cache->hash_pool, sizeof(*content));
537
content->data = cache->data + cache->total_size;
538
content->len = data->len;
540
memcpy(cache->data + cache->total_size, data->data, data->len);
541
cache->total_size += data->len;
543
key = apr_palloc(cache->hash_pool, sizeof(*key));
546
apr_hash_set(cache->hash, key, sizeof(*key), content);
547
++cache->insert_count;
550
/* Get the file content of revision REVISION in FS and return it in *DATA.
551
* Use SCRATCH_POOL for temporary allocations.
554
get_content(svn_string_t **data,
556
svn_revnum_t revision,
557
apr_pool_t *scratch_pool)
560
revision_info_t *revision_info;
561
svn_stringbuf_t *temp;
562
apr_off_t temp_offset;
564
/* try getting the data from our cache */
565
svn_string_t *result = get_cached_content(fs->cache, revision);
572
/* not in cache. Is the revision valid at all? */
573
if (revision - fs->start_revision > fs->revisions->nelts)
574
return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL,
575
_("Unknown revision %ld"), revision);
576
revision_info = APR_ARRAY_IDX(fs->revisions,
577
revision - fs->start_revision,
580
/* read the revision content. Assume that the file has *not* been
581
* reorg'ed, yet, i.e. all data is in one place. */
582
temp = svn_stringbuf_create_ensure( revision_info->original.end
583
- revision_info->original.offset,
585
temp->len = revision_info->original.end - revision_info->original.offset;
586
SVN_ERR(open_rev_or_pack_file(&file, fs, revision, scratch_pool));
588
temp_offset = revision_info->original.offset;
589
SVN_ERR(svn_io_file_seek(file, APR_SET, &temp_offset,
591
SVN_ERR_ASSERT(temp_offset < APR_SIZE_MAX);
592
revision_info->original.offset = (apr_size_t)temp_offset;
593
SVN_ERR(svn_io_file_read(file, temp->data, &temp->len, scratch_pool));
595
/* cache the result and return it */
596
set_cached_content(fs->cache, revision,
597
svn_stringbuf__morph_into_string(temp));
598
*data = get_cached_content(fs->cache, revision);
603
/* Return a new directory cache with ENTRY_COUNT buckets in its index.
604
* Use POOL for all cache-related allocations.
607
create_dir_cache(apr_pool_t *pool,
608
apr_size_t entry_count)
610
dir_cache_t *result = apr_pcalloc(pool, sizeof(*result));
612
result->pool1 = svn_pool_create(pool);
613
result->pool2 = svn_pool_create(pool);
614
result->entry_count = entry_count;
615
result->insert_count = 0;
616
result->entries = apr_pcalloc(pool, sizeof(*result->entries) * entry_count);
621
/* Return the position within FS' dir cache ENTRIES index for the given
622
* (REVISION, OFFSET) pair. This is a cache-internal function.
625
get_dir_cache_index(fs_fs_t *fs,
626
svn_revnum_t revision,
629
return (revision + offset * 0xd1f3da69) % fs->dir_cache->entry_count;
632
/* Return the currently active pool of FS' dir cache. Note that it may be
633
* cleared after *2* insertions.
636
get_cached_dir_pool(fs_fs_t *fs)
638
return fs->dir_cache->pool1;
641
/* Return the cached directory content stored in REPRESENTATION within FS.
642
* If that has not been found in cache, return NULL.
645
get_cached_dir(fs_fs_t *fs,
646
representation_t *representation)
648
svn_revnum_t revision = representation->revision->revision;
649
apr_size_t offset = representation->original.offset;
651
apr_size_t i = get_dir_cache_index(fs, revision, offset);
652
dir_cache_entry_t *entry = &fs->dir_cache->entries[i];
654
return entry->offset == offset && entry->revision == revision
659
/* Cache the directory HASH for REPRESENTATION within FS.
662
set_cached_dir(fs_fs_t *fs,
663
representation_t *representation,
666
/* select the entry to use */
667
svn_revnum_t revision = representation->revision->revision;
668
apr_size_t offset = representation->original.offset;
670
apr_size_t i = get_dir_cache_index(fs, revision, offset);
671
dir_cache_entry_t *entry = &fs->dir_cache->entries[i];
673
/* clean the cache and rotate pools at regular intervals */
674
fs->dir_cache->insert_count += apr_hash_count(hash);
675
if (fs->dir_cache->insert_count >= fs->dir_cache->entry_count * 100)
679
svn_pool_clear(fs->dir_cache->pool2);
680
memset(fs->dir_cache->entries,
682
sizeof(*fs->dir_cache->entries) * fs->dir_cache->entry_count);
683
fs->dir_cache->insert_count = 0;
685
pool = fs->dir_cache->pool2;
686
fs->dir_cache->pool2 = fs->dir_cache->pool1;
687
fs->dir_cache->pool1 = pool;
690
/* write data to cache */
692
entry->offset = offset;
693
entry->revision = revision;
696
/* Return a new txdelta window cache with ENTRY_COUNT buckets in its index
697
* and a the total CAPACITY given in bytes.
698
* Use POOL for all cache-related allocations.
700
static window_cache_t *
701
create_window_cache(apr_pool_t *pool,
702
apr_size_t entry_count,
705
window_cache_t *result = apr_pcalloc(pool, sizeof(*result));
707
result->pool = svn_pool_create(pool);
708
result->entry_count = entry_count;
709
result->capacity = capacity;
711
result->entries = apr_pcalloc(pool, sizeof(*result->entries) * entry_count);
716
/* Return the position within FS' window cache ENTRIES index for the given
717
* (REVISION, OFFSET) pair. This is a cache-internal function.
720
get_window_cache_index(fs_fs_t *fs,
721
svn_revnum_t revision,
724
return (revision + offset * 0xd1f3da69) % fs->window_cache->entry_count;
727
/* Return the cached txdelta window stored in REPRESENTATION within FS.
728
* If that has not been found in cache, return NULL.
730
static svn_stringbuf_t *
731
get_cached_window(fs_fs_t *fs,
732
representation_t *representation,
735
svn_revnum_t revision = representation->revision->revision;
736
apr_size_t offset = representation->original.offset;
738
apr_size_t i = get_window_cache_index(fs, revision, offset);
739
window_cache_entry_t *entry = &fs->window_cache->entries[i];
741
return entry->offset == offset && entry->revision == revision
742
? svn_stringbuf_dup(entry->window, pool)
746
/* Cache the undeltified txdelta WINDOW for REPRESENTATION within FS.
749
set_cached_window(fs_fs_t *fs,
750
representation_t *representation,
751
svn_stringbuf_t *window)
754
svn_revnum_t revision = representation->revision->revision;
755
apr_size_t offset = representation->original.offset;
757
apr_size_t i = get_window_cache_index(fs, revision, offset);
758
window_cache_entry_t *entry = &fs->window_cache->entries[i];
760
/* if the capacity is exceeded, clear the cache */
761
fs->window_cache->used += window->len;
762
if (fs->window_cache->used >= fs->window_cache->capacity)
764
svn_pool_clear(fs->window_cache->pool);
765
memset(fs->window_cache->entries,
767
sizeof(*fs->window_cache->entries) * fs->window_cache->entry_count);
768
fs->window_cache->used = window->len;
771
/* set the entry to a copy of the window data */
772
entry->window = svn_stringbuf_dup(window, fs->window_cache->pool);
773
entry->offset = offset;
774
entry->revision = revision;
777
/* Given rev pack PATH in FS, read the manifest file and return the offsets
778
* in *MANIFEST. Use POOL for allocations.
781
read_manifest(apr_array_header_t **manifest,
786
svn_stream_t *manifest_stream;
787
apr_pool_t *iterpool;
789
/* Open the manifest file. */
790
SVN_ERR(svn_stream_open_readonly(&manifest_stream,
791
svn_dirent_join(path, "manifest", pool),
794
/* While we're here, let's just read the entire manifest file into an array,
795
so we can cache the entire thing. */
796
iterpool = svn_pool_create(pool);
797
*manifest = apr_array_make(pool, fs->max_files_per_dir, sizeof(apr_size_t));
805
svn_pool_clear(iterpool);
806
SVN_ERR(svn_stream_readline(manifest_stream, &sb, "\n", &eof, iterpool));
810
err = svn_cstring_strtoui64(&val, sb->data, 0, APR_SIZE_MAX, 10);
812
return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
813
_("Manifest offset '%s' too large"),
815
APR_ARRAY_PUSH(*manifest, apr_size_t) = (apr_size_t)val;
817
svn_pool_destroy(iterpool);
819
return svn_stream_close(manifest_stream);
822
/* Read header information for the revision stored in FILE_CONTENT at
823
* offsets START or END. Return the offsets within FILE_CONTENT for the
824
* *ROOT_NODEREV, the list of *CHANGES and its len in *CHANGES_LEN.
825
* Use POOL for temporary allocations. */
827
read_revision_header(apr_size_t *changes,
828
apr_size_t *changes_len,
829
apr_size_t *root_noderev,
830
svn_stringbuf_t *file_content,
841
/* Read in this last block, from which we will identify the last line. */
843
if (start + len > end)
846
memcpy(buf, file_content->data + end - len, len);
848
/* The last byte should be a newline. */
849
if (buf[(apr_ssize_t)len - 1] != '\n')
850
return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
851
_("Revision lacks trailing newline"));
853
/* Look for the next previous newline. */
855
line = strrchr(buf, '\n');
857
return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
858
_("Final line in revision file longer "
859
"than 64 characters"));
861
space = strchr(line, ' ');
863
return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
864
_("Final line in revision file missing space"));
866
/* terminate the header line */
869
/* extract information */
870
SVN_ERR(svn_cstring_strtoui64(&val, line+1, 0, APR_SIZE_MAX, 10));
871
*root_noderev = (apr_size_t)val;
872
SVN_ERR(svn_cstring_strtoui64(&val, space+1, 0, APR_SIZE_MAX, 10));
873
*changes = (apr_size_t)val;
874
*changes_len = end - *changes - start - (buf + len - line) + 1;
879
/* Read the FSFS format number and sharding size from the format file at
880
* PATH and return it in *PFORMAT and *MAX_FILES_PER_DIR respectively.
881
* Use POOL for temporary allocations.
884
read_format(int *pformat, int *max_files_per_dir,
885
const char *path, apr_pool_t *pool)
892
/* open format file and read the first line */
893
err = svn_io_file_open(&file, path, APR_READ | APR_BUFFERED,
894
APR_OS_DEFAULT, pool);
895
if (err && APR_STATUS_IS_ENOENT(err->apr_err))
897
/* Treat an absent format file as format 1. Do not try to
898
create the format file on the fly, because the repository
899
might be read-only for us, or this might be a read-only
900
operation, and the spirit of FSFS is to make no changes
901
whatseover in read-only operations. See thread starting at
902
http://subversion.tigris.org/servlets/ReadMsg?list=dev&msgNo=97600
904
svn_error_clear(err);
906
*max_files_per_dir = 0;
913
err = svn_io_read_length_line(file, buf, &len, pool);
914
if (err && APR_STATUS_IS_EOF(err->apr_err))
916
/* Return a more useful error message. */
917
svn_error_clear(err);
918
return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL,
919
_("Can't read first line of format file '%s'"),
920
svn_dirent_local_style(path, pool));
924
/* Check that the first line contains only digits. */
925
SVN_ERR(svn_cstring_atoi(pformat, buf));
927
/* Set the default values for anything that can be set via an option. */
928
*max_files_per_dir = 0;
930
/* Read any options. */
934
err = svn_io_read_length_line(file, buf, &len, pool);
935
if (err && APR_STATUS_IS_EOF(err->apr_err))
937
/* No more options; that's okay. */
938
svn_error_clear(err);
943
if (strncmp(buf, "layout ", 7) == 0)
945
if (strcmp(buf+7, "linear") == 0)
947
*max_files_per_dir = 0;
951
if (strncmp(buf+7, "sharded ", 8) == 0)
953
/* Check that the argument is numeric. */
954
SVN_ERR(svn_cstring_atoi(max_files_per_dir, buf + 15));
959
return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL,
960
_("'%s' contains invalid filesystem format option '%s'"),
961
svn_dirent_local_style(path, pool), buf);
964
return svn_io_file_close(file, pool);
967
/* Read the content of the file at PATH and return it in *RESULT.
968
* Use POOL for temporary allocations.
971
read_number(svn_revnum_t *result, const char *path, apr_pool_t *pool)
973
svn_stringbuf_t *content;
976
SVN_ERR(svn_stringbuf_from_file2(&content, path, pool));
978
content->data[content->len-1] = 0;
979
SVN_ERR(svn_cstring_strtoui64(&number, content->data, 0, LONG_MAX, 10));
980
*result = (svn_revnum_t)number;
985
/* Create *FS for the repository at PATH and read the format and size info.
986
* Use POOL for temporary allocations.
989
fs_open(fs_fs_t **fs, const char *path, apr_pool_t *pool)
991
*fs = apr_pcalloc(pool, sizeof(**fs));
992
(*fs)->path = apr_pstrdup(pool, path);
993
(*fs)->max_files_per_dir = 1000;
995
/* Read the FS format number. */
996
SVN_ERR(read_format(&(*fs)->format,
997
&(*fs)->max_files_per_dir,
998
svn_dirent_join(path, "db/format", pool),
1000
if (((*fs)->format != 4) && ((*fs)->format != 6))
1001
return svn_error_create(SVN_ERR_FS_UNSUPPORTED_FORMAT, NULL, NULL);
1003
/* read size (HEAD) info */
1004
SVN_ERR(read_number(&(*fs)->min_unpacked_rev,
1005
svn_dirent_join(path, "db/min-unpacked-rev", pool),
1007
return read_number(&(*fs)->max_revision,
1008
svn_dirent_join(path, "db/current", pool),
1012
/* Utility function that returns true if STRING->DATA matches KEY.
1014
static svn_boolean_t
1015
key_matches(svn_string_t *string, const char *key)
1017
return strcmp(string->data, key) == 0;
1020
/* Comparator used for binary search comparing the absolute file offset
1021
* of a noderev to some other offset. DATA is a *noderev_t, KEY is pointer
1025
compare_noderev_offsets(const void *data, const void *key)
1027
apr_ssize_t diff = (*(const noderev_t *const *)data)->original.offset
1028
- *(const apr_size_t *)key;
1030
/* sizeof(int) may be < sizeof(ssize_t) */
1033
return diff > 0 ? 1 : 0;
1036
/* Get the revision and offset info from the node ID with FS. Return the
1037
* data as *REVISION_INFO and *OFFSET, respectively.
1039
* Note that we assume that the revision_info_t object ID's revision has
1040
* already been created. That can be guaranteed for standard FSFS pack
1041
* files as IDs never point to future revisions.
1043
static svn_error_t *
1044
parse_revnode_pos(revision_info_t **revision_info,
1052
/* split the ID and verify the format */
1053
const char *revision_pos = strrchr(id->data, 'r');
1054
char *offset_pos = (char *)strchr(id->data, '/');
1056
if (revision_pos == NULL || offset_pos == NULL)
1057
return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL,
1058
_("Invalid node id '%s'"), id->data);
1060
/* extract the numbers (temp. modifying the ID)*/
1062
SVN_ERR(svn_cstring_atoi(&revision, revision_pos + 1));
1063
SVN_ERR(svn_cstring_strtoui64(&temp, offset_pos + 1, 0, APR_SIZE_MAX, 10));
1064
*offset = (apr_size_t)temp;
1067
/* validate the revision number and return the revision info */
1068
if (revision - fs->start_revision > fs->revisions->nelts)
1069
return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL,
1070
_("Unknown revision %d"), revision);
1072
*revision_info = APR_ARRAY_IDX(fs->revisions,
1073
revision - fs->start_revision,
1076
return SVN_NO_ERROR;
1079
/* Returns in *RESULT the noderev at OFFSET relative the revision given in
1080
* REVISION_INFO. If no such noderev has been parsed, yet, error out.
1082
* Since we require the noderev to already have been parsed, we can use
1083
* this functions only to access "older", i.e. predecessor noderevs.
1085
static svn_error_t *
1086
find_noderev(noderev_t **result,
1087
revision_info_t *revision_info,
1090
int idx = svn_sort__bsearch_lower_bound(&offset,
1091
revision_info->node_revs,
1092
compare_noderev_offsets);
1093
if ((idx < 0) || (idx >= revision_info->node_revs->nelts))
1094
return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL,
1095
_("No noderev found at offset %ld"),
1098
*result = APR_ARRAY_IDX(revision_info->node_revs, idx, noderev_t *);
1099
if ((*result)->original.offset != offset)
1100
return svn_error_createf(SVN_ERR_BAD_VERSION_FILE_FORMAT, NULL,
1101
_("No noderev found at offset %ld"),
1104
return SVN_NO_ERROR;
1107
/* In *RESULT, return the noderev given by ID in FS. The noderev must
1108
* already have been parsed and put into the FS data structures.
1110
static svn_error_t *
1111
parse_pred(noderev_t **result,
1116
revision_info_t *revision_info;
1118
SVN_ERR(parse_revnode_pos(&revision_info, &offset, fs, id));
1119
SVN_ERR(find_noderev(result, revision_info, offset));
1121
return SVN_NO_ERROR;
1124
/* Comparator used for binary search comparing the absolute file offset
1125
* of a representation to some other offset. DATA is a *representation_t,
1126
* KEY is a pointer to an apr_size_t.
1129
compare_representation_offsets(const void *data, const void *key)
1131
apr_ssize_t diff = (*(const representation_t *const *)data)->original.offset
1132
- *(const apr_size_t *)key;
1134
/* sizeof(int) may be < sizeof(ssize_t) */
1137
return diff > 0 ? 1 : 0;
1140
/* Find the revision_info_t object to the given REVISION in FS and return
1141
* it in *REVISION_INFO. For performance reasons, we skip the lookup if
1142
* the info is already provided.
1144
* In that revision, look for the representation_t object for offset OFFSET.
1145
* If it already exists, set *idx to its index in *REVISION_INFO's
1146
* representations list and return the representation object. Otherwise,
1147
* set the index to where it must be inserted and return NULL.
1149
static representation_t *
1150
find_representation(int *idx,
1152
revision_info_t **revision_info,
1156
revision_info_t *info;
1159
/* first let's find the revision '*/
1160
info = revision_info ? *revision_info : NULL;
1161
if (info == NULL || info->revision != revision)
1163
info = APR_ARRAY_IDX(fs->revisions,
1164
revision - fs->start_revision,
1167
*revision_info = info;
1170
/* not found -> no result */
1174
assert(revision == info->revision);
1176
/* look for the representation */
1177
*idx = svn_sort__bsearch_lower_bound(&offset,
1178
info->representations,
1179
compare_representation_offsets);
1180
if (*idx < info->representations->nelts)
1182
/* return the representation, if this is the one we were looking for */
1183
representation_t *result
1184
= APR_ARRAY_IDX(info->representations, *idx, representation_t *);
1185
if (result->original.offset == offset)
1189
/* not parsed, yet */
1193
/* Read the representation header in FILE_CONTENT at OFFSET. Return its
1194
* size in *HEADER_SIZE, set *IS_PLAIN if no deltification was used and
1195
* return the deltification base representation in *REPRESENTATION. If
1196
* there is none, set it to NULL. Use FS to it look up.
1198
* Use SCRATCH_POOL for temporary allocations.
1200
static svn_error_t *
1201
read_rep_base(representation_t **representation,
1202
apr_size_t *header_size,
1203
svn_boolean_t *is_plain,
1205
svn_stringbuf_t *file_content,
1207
apr_pool_t *scratch_pool)
1209
char *str, *last_str;
1213
/* identify representation header (1 line) */
1214
const char *buffer = file_content->data + offset;
1215
const char *line_end = strchr(buffer, '\n');
1216
*header_size = line_end - buffer + 1;
1218
/* check for PLAIN rep */
1219
if (strncmp(buffer, "PLAIN\n", *header_size) == 0)
1222
*representation = NULL;
1223
return SVN_NO_ERROR;
1226
/* check for DELTA against empty rep */
1228
if (strncmp(buffer, "DELTA\n", *header_size) == 0)
1230
/* This is a delta against the empty stream. */
1231
*representation = fs->null_base;
1232
return SVN_NO_ERROR;
1235
/* it's delta against some other rep. Duplicate the header info such
1236
* that we may modify it during parsing. */
1237
str = apr_pstrndup(scratch_pool, buffer, line_end - buffer);
1241
str = svn_cstring_tokenize(" ", &last_str);
1242
str = svn_cstring_tokenize(" ", &last_str);
1243
SVN_ERR(svn_cstring_atoi(&revision, str));
1245
str = svn_cstring_tokenize(" ", &last_str);
1246
SVN_ERR(svn_cstring_strtoui64(&temp, str, 0, APR_SIZE_MAX, 10));
1248
/* it should refer to a rep in an earlier revision. Look it up */
1249
*representation = find_representation(&idx, fs, NULL, revision, (apr_size_t)temp);
1250
return SVN_NO_ERROR;
1253
/* Parse the representation reference (text: or props:) in VALUE, look
1254
* it up in FS and return it in *REPRESENTATION. To be able to parse the
1255
* base rep, we pass the FILE_CONTENT as well.
1257
* If necessary, allocate the result in POOL; use SCRATCH_POOL for temp.
1260
static svn_error_t *
1261
parse_representation(representation_t **representation,
1263
svn_stringbuf_t *file_content,
1264
svn_string_t *value,
1265
revision_info_t *revision_info,
1267
apr_pool_t *scratch_pool)
1269
representation_t *result;
1272
apr_uint64_t offset;
1276
/* read location (revision, offset) and size */
1277
char *c = (char *)value->data;
1278
SVN_ERR(svn_cstring_atoi(&revision, svn_cstring_tokenize(" ", &c)));
1279
SVN_ERR(svn_cstring_strtoui64(&offset, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10));
1280
SVN_ERR(svn_cstring_strtoui64(&size, svn_cstring_tokenize(" ", &c), 0, APR_SIZE_MAX, 10));
1283
result = find_representation(&idx, fs, &revision_info, revision, (apr_size_t)offset);
1286
/* not parsed, yet (probably a rep in the same revision).
1287
* Create a new rep object and determine its base rep as well.
1289
result = apr_pcalloc(pool, sizeof(*result));
1290
result->revision = revision_info;
1291
result->original.offset = (apr_size_t)offset;
1292
result->original.size = (apr_size_t)size;
1293
SVN_ERR(read_rep_base(&result->delta_base, &result->header_size,
1294
&result->is_plain, fs, file_content,
1295
(apr_size_t)offset + revision_info->original.offset,
1298
svn_sort__array_insert(&result, revision_info->representations, idx);
1301
*representation = result;
1303
return SVN_NO_ERROR;
1306
/* Read the delta window contents of all windows in REPRESENTATION in FS.
1307
* Return the data as svn_txdelta_window_t* instances in *WINDOWS.
1308
* Use POOL for allocations.
1310
static svn_error_t *
1311
read_windows(apr_array_header_t **windows,
1313
representation_t *representation,
1316
svn_string_t *content;
1318
svn_stream_t *stream;
1319
apr_size_t offset = representation->original.offset
1320
+ representation->header_size;
1322
apr_size_t len = sizeof(version);
1324
*windows = apr_array_make(pool, 0, sizeof(svn_txdelta_window_t *));
1326
/* get the whole revision content */
1327
SVN_ERR(get_content(&content, fs, representation->revision->revision, pool));
1329
/* create a read stream and position it directly after the rep header */
1330
data.data = content->data + offset + 3;
1331
data.len = representation->original.size - 3;
1332
stream = svn_stream_from_string(&data, pool);
1333
SVN_ERR(svn_stream_read(stream, &version, &len));
1335
/* read the windows from that stream */
1338
svn_txdelta_window_t *window;
1339
svn_stream_mark_t *mark;
1342
len = sizeof(dummy);
1343
SVN_ERR(svn_stream_mark(stream, &mark, pool));
1344
SVN_ERR(svn_stream_read(stream, &dummy, &len));
1348
SVN_ERR(svn_stream_seek(stream, mark));
1349
SVN_ERR(svn_txdelta_read_svndiff_window(&window, stream, version, pool));
1350
APR_ARRAY_PUSH(*windows, svn_txdelta_window_t *) = window;
1353
return SVN_NO_ERROR;
1356
/* Read the content of the PLAIN REPRESENTATION in FS and return it in
1357
* *CONTENT. Use POOL for allocations.
1359
static svn_error_t *
1360
read_plain(svn_stringbuf_t **content,
1362
representation_t *representation,
1366
apr_size_t offset = representation->original.offset
1367
+ representation->header_size;
1369
SVN_ERR(get_content(&data, fs, representation->revision->revision, pool));
1371
/* content is stored as fulltext already */
1372
*content = svn_stringbuf_ncreate(data->data + offset,
1373
representation->original.size,
1376
return SVN_NO_ERROR;
1379
/* Get the undeltified representation that is a result of combining all
1380
* deltas from the current desired REPRESENTATION in FS with its base
1381
* representation. Store the result in *CONTENT.
1382
* Use POOL for allocations. */
1383
static svn_error_t *
1384
get_combined_window(svn_stringbuf_t **content,
1386
representation_t *representation,
1390
apr_array_header_t *windows;
1391
svn_stringbuf_t *base_content, *result;
1393
apr_pool_t *sub_pool;
1394
apr_pool_t *iter_pool;
1396
/* special case: no un-deltification necessary */
1397
if (representation->is_plain)
1398
return read_plain(content, fs, representation, pool);
1400
/* special case: data already in cache */
1401
*content = get_cached_window(fs, representation, pool);
1403
return SVN_NO_ERROR;
1405
/* read the delta windows for this representation */
1406
sub_pool = svn_pool_create(pool);
1407
iter_pool = svn_pool_create(pool);
1408
SVN_ERR(read_windows(&windows, fs, representation, sub_pool));
1410
/* fetch the / create a base content */
1411
if (representation->delta_base && representation->delta_base->revision)
1412
SVN_ERR(get_combined_window(&base_content, fs,
1413
representation->delta_base, sub_pool));
1415
base_content = svn_stringbuf_create_empty(sub_pool);
1418
result = svn_stringbuf_create_empty(pool);
1419
source = base_content->data;
1421
for (i = 0; i < windows->nelts; ++i)
1423
svn_txdelta_window_t *window
1424
= APR_ARRAY_IDX(windows, i, svn_txdelta_window_t *);
1425
svn_stringbuf_t *buf
1426
= svn_stringbuf_create_ensure(window->tview_len, iter_pool);
1428
buf->len = window->tview_len;
1429
svn_txdelta_apply_instructions(window, window->src_ops ? source : NULL,
1430
buf->data, &buf->len);
1432
svn_stringbuf_appendbytes(result, buf->data, buf->len);
1433
source += window->sview_len;
1435
svn_pool_clear(iter_pool);
1438
svn_pool_destroy(iter_pool);
1439
svn_pool_destroy(sub_pool);
1441
/* cache result and return it */
1442
set_cached_window(fs, representation, result);
1445
return SVN_NO_ERROR;
1448
/* forward declaration */
1449
static svn_error_t *
1450
read_noderev(noderev_t **noderev,
1452
svn_stringbuf_t *file_content,
1454
revision_info_t *revision_info,
1456
apr_pool_t *scratch_pool);
1458
/* Get the noderev at OFFSET in FILE_CONTENT in FS. The file content must
1459
* pertain to the revision given in REVISION_INFO. If the data has not
1460
* been read yet, parse it and store it in REVISION_INFO. Return the result
1463
* Use POOL for allocations and SCRATCH_POOL for temporaries.
1465
static svn_error_t *
1466
get_noderev(noderev_t **noderev,
1468
svn_stringbuf_t *file_content,
1470
revision_info_t *revision_info,
1472
apr_pool_t *scratch_pool)
1474
int idx = svn_sort__bsearch_lower_bound(&offset,
1475
revision_info->node_revs,
1476
compare_noderev_offsets);
1477
if ((idx < 0) || (idx >= revision_info->node_revs->nelts))
1478
SVN_ERR(read_noderev(noderev, fs, file_content, offset, revision_info,
1479
pool, scratch_pool));
1482
*noderev = APR_ARRAY_IDX(revision_info->node_revs, idx, noderev_t *);
1483
if ((*noderev)->original.offset != offset)
1484
SVN_ERR(read_noderev(noderev, fs, file_content, offset, revision_info,
1485
pool, scratch_pool));
1488
return SVN_NO_ERROR;
1491
/* Read the directory stored in REPRESENTATION in FS into *HASH. The result
1492
* will be allocated in FS' directory cache and it will be plain key-value
1493
* hash. Use SCRATCH_POOL for temporary allocations.
1495
static svn_error_t *
1496
read_dir(apr_hash_t **hash,
1498
representation_t *representation,
1499
apr_pool_t *scratch_pool)
1501
svn_stringbuf_t *text;
1502
apr_pool_t *text_pool;
1503
svn_stream_t *stream;
1506
/* chances are, we find the info in cache already */
1507
*hash = get_cached_dir(fs, representation);
1509
return SVN_NO_ERROR;
1511
/* create the result container */
1512
pool = get_cached_dir_pool(fs);
1513
*hash = svn_hash__make(pool);
1515
/* if this is a non-empty rep, read it and de-serialize the hash */
1516
if (representation != NULL)
1518
text_pool = svn_pool_create(scratch_pool);
1519
SVN_ERR(get_combined_window(&text, fs, representation, text_pool));
1520
stream = svn_stream_from_stringbuf(text, text_pool);
1521
SVN_ERR(svn_hash_read2(*hash, stream, SVN_HASH_TERMINATOR, pool));
1522
svn_pool_destroy(text_pool);
1525
/* cache the result */
1526
set_cached_dir(fs, representation, *hash);
1528
return SVN_NO_ERROR;
1531
/* Starting at the directory in REPRESENTATION in FILE_CONTENT, read all
1532
* DAG nodes, directories and representations linked in that tree structure.
1533
* Store them in FS and read them only once.
1535
* Use POOL for persistent allocations and SCRATCH_POOL for temporaries.
1537
static svn_error_t *
1538
parse_dir(fs_fs_t *fs,
1539
svn_stringbuf_t *file_content,
1540
representation_t *representation,
1542
apr_pool_t *scratch_pool)
1545
apr_hash_index_t *hi;
1546
apr_pool_t *iter_pool = svn_pool_create(scratch_pool);
1547
apr_hash_t *base_dir = svn_hash__make(scratch_pool);
1549
/* special case: empty dir rep */
1550
if (representation == NULL)
1551
return SVN_NO_ERROR;
1553
/* if we have a previous representation of that dir, hash it by name */
1554
if (representation->delta_base && representation->delta_base->dir)
1556
apr_array_header_t *dir = representation->delta_base->dir->entries;
1559
for (i = 0; i < dir->nelts; ++i)
1561
direntry_t *entry = APR_ARRAY_IDX(dir, i, direntry_t *);
1562
apr_hash_set(base_dir, entry->name, entry->name_len, entry);
1566
/* read this directory */
1567
SVN_ERR(read_dir(&hash, fs, representation, scratch_pool));
1569
/* add it as an array to the representation (entries yet to be filled) */
1570
representation->dir = apr_pcalloc(pool, sizeof(*representation->dir));
1571
representation->dir->entries
1572
= apr_array_make(pool, apr_hash_count(hash), sizeof(direntry_t *));
1574
/* Translate the string dir entries into real entries. Reuse existing
1575
* objects as much as possible to keep memory consumption low.
1577
for (hi = apr_hash_first(pool, hash); hi; hi = apr_hash_next(hi))
1579
const char *name = svn__apr_hash_index_key(hi);
1580
svn_string_t *str_val = svn__apr_hash_index_val(hi);
1582
revision_info_t *revision_info;
1584
/* look for corresponding entry in previous version */
1585
apr_size_t name_len = strlen(name);
1586
direntry_t *entry = base_dir
1587
? apr_hash_get(base_dir, name, name_len)
1590
/* parse the new target revnode ID (revision, offset) */
1591
SVN_ERR(parse_revnode_pos(&revision_info, &offset, fs, str_val));
1593
/* if this is a new entry or if the content changed, create a new
1594
* instance for it. */
1596
|| !entry->node->text
1597
|| entry->node->text->revision != revision_info
1598
|| entry->node->original.offset != offset)
1600
/* create & init the new entry. Reuse the name string if possible */
1601
direntry_t *new_entry = apr_pcalloc(pool, sizeof(*entry));
1602
new_entry->name_len = name_len;
1604
new_entry->name = entry->name;
1606
new_entry->name = apr_pstrdup(pool, name);
1608
/* Link it to the content noderev. Recurse. */
1610
SVN_ERR(get_noderev(&entry->node, fs, file_content, offset,
1611
revision_info, pool, iter_pool));
1614
/* set the directory entry */
1615
APR_ARRAY_PUSH(representation->dir->entries, direntry_t *) = entry;
1616
svn_pool_clear(iter_pool);
1619
svn_pool_destroy(iter_pool);
1620
return SVN_NO_ERROR;
1623
/* Starting at the noderev at OFFSET in FILE_CONTENT, read all DAG nodes,
1624
* directories and representations linked in that tree structure. Store
1625
* them in FS and read them only once. Return the result in *NODEREV.
1627
* Use POOL for persistent allocations and SCRATCH_POOL for temporaries.
1629
static svn_error_t *
1630
read_noderev(noderev_t **noderev,
1632
svn_stringbuf_t *file_content,
1634
revision_info_t *revision_info,
1636
apr_pool_t *scratch_pool)
1638
noderev_t *result = apr_pcalloc(pool, sizeof(*result));
1640
svn_boolean_t is_dir = FALSE;
1642
scratch_pool = svn_pool_create(scratch_pool);
1644
/* parse the noderev line-by-line until we find an empty line */
1645
result->original.offset = offset;
1648
/* for this line, extract key and value. Ignore invalid values */
1652
const char *start = file_content->data + offset
1653
+ revision_info->original.offset;
1654
const char *end = strchr(start, '\n');
1656
line = svn_string_ncreate(start, end - start, scratch_pool);
1657
offset += end - start + 1;
1659
/* empty line -> end of noderev data */
1663
sep = strchr(line->data, ':');
1667
key.data = line->data;
1668
key.len = sep - key.data;
1671
if (key.len + 2 > line->len)
1674
value.data = sep + 2;
1675
value.len = line->len - (key.len + 2);
1677
/* translate (key, value) into noderev elements */
1678
if (key_matches(&key, "type"))
1679
is_dir = strcmp(value.data, "dir") == 0;
1680
else if (key_matches(&key, "pred"))
1681
SVN_ERR(parse_pred(&result->predecessor, fs, &value));
1682
else if (key_matches(&key, "text"))
1683
SVN_ERR(parse_representation(&result->text, fs, file_content,
1684
&value, revision_info,
1685
pool, scratch_pool));
1686
else if (key_matches(&key, "props"))
1687
SVN_ERR(parse_representation(&result->props, fs, file_content,
1688
&value, revision_info,
1689
pool, scratch_pool));
1692
/* link noderev to revision info */
1693
result->revision = revision_info;
1694
result->original.size = offset - result->original.offset;
1696
svn_sort__array_insert(&result,
1697
revision_info->node_revs,
1698
svn_sort__bsearch_lower_bound(&offset,
1699
revision_info->node_revs,
1700
compare_noderev_offsets));
1702
/* if this is a directory, read and process that recursively */
1704
SVN_ERR(parse_dir(fs, file_content, result->text,
1705
pool, scratch_pool));
1708
svn_pool_destroy(scratch_pool);
1711
return SVN_NO_ERROR;
1714
/* Simple utility to print a REVISION number and make it appear immediately.
1717
print_progress(svn_revnum_t revision)
1719
printf("%8ld", revision);
1723
/* Read the content of the pack file staring at revision BASE and store it
1724
* in FS. Use POOL for allocations.
1726
static svn_error_t *
1727
read_pack_file(fs_fs_t *fs,
1731
apr_array_header_t *manifest = NULL;
1732
apr_pool_t *local_pool = svn_pool_create(pool);
1733
apr_pool_t *iter_pool = svn_pool_create(local_pool);
1735
svn_stringbuf_t *file_content;
1736
revision_pack_t *revisions;
1737
const char *pack_folder = get_pack_folder(fs, base, local_pool);
1739
/* read the whole pack file into memory */
1740
SVN_ERR(read_rev_or_pack_file(&file_content, fs, base, local_pool));
1742
/* create the revision container */
1743
revisions = apr_pcalloc(pool, sizeof(*revisions));
1744
revisions->base = base;
1745
revisions->fragments = NULL;
1746
revisions->info = apr_array_make(pool,
1747
fs->max_files_per_dir,
1748
sizeof(revision_info_t*));
1749
revisions->filesize = file_content->len;
1750
APR_ARRAY_PUSH(fs->packs, revision_pack_t*) = revisions;
1752
/* parse the manifest file */
1753
SVN_ERR(read_manifest(&manifest, fs, pack_folder, local_pool));
1754
if (manifest->nelts != fs->max_files_per_dir)
1755
return svn_error_create(SVN_ERR_FS_CORRUPT, NULL, NULL);
1757
/* process each revision in the pack file */
1758
for (i = 0; i < manifest->nelts; ++i)
1760
apr_size_t root_node_offset;
1761
svn_string_t rev_content;
1763
/* create the revision info for the current rev */
1764
revision_info_t *info = apr_pcalloc(pool, sizeof(*info));
1765
info->node_revs = apr_array_make(iter_pool, 4, sizeof(noderev_t*));
1766
info->representations = apr_array_make(iter_pool, 4, sizeof(representation_t*));
1768
info->revision = base + i;
1769
info->original.offset = APR_ARRAY_IDX(manifest, i, apr_size_t);
1770
info->original.end = i+1 < manifest->nelts
1771
? APR_ARRAY_IDX(manifest, i+1 , apr_size_t)
1772
: file_content->len;
1773
SVN_ERR(read_revision_header(&info->original.changes,
1774
&info->original.changes_len,
1777
APR_ARRAY_IDX(manifest, i , apr_size_t),
1781
/* put it into our containers */
1782
APR_ARRAY_PUSH(revisions->info, revision_info_t*) = info;
1783
APR_ARRAY_PUSH(fs->revisions, revision_info_t*) = info;
1785
/* cache the revision content */
1786
rev_content.data = file_content->data + info->original.offset;
1787
rev_content.len = info->original.end - info->original.offset;
1788
set_cached_content(fs->cache, info->revision, &rev_content);
1790
/* parse the revision content recursively. */
1791
SVN_ERR(read_noderev(&info->root_noderev, fs, file_content,
1792
root_node_offset, info, pool, iter_pool));
1794
/* copy dynamically grown containers from temp into result pool */
1795
info->node_revs = apr_array_copy(pool, info->node_revs);
1796
info->representations = apr_array_copy(pool, info->representations);
1799
svn_pool_clear(iter_pool);
1802
/* one more pack file processed */
1803
print_progress(base);
1804
svn_pool_destroy(local_pool);
1806
return SVN_NO_ERROR;
1809
/* Read the content of REVSION file and store it in FS.
1810
* Use POOL for allocations.
1812
static svn_error_t *
1813
read_revision_file(fs_fs_t *fs,
1814
svn_revnum_t revision,
1817
apr_size_t root_node_offset;
1818
apr_pool_t *local_pool = svn_pool_create(pool);
1819
svn_stringbuf_t *file_content;
1820
svn_string_t rev_content;
1821
revision_pack_t *revisions = apr_pcalloc(pool, sizeof(*revisions));
1822
revision_info_t *info = apr_pcalloc(pool, sizeof(*info));
1824
/* read the whole pack file into memory */
1825
SVN_ERR(read_rev_or_pack_file(&file_content, fs, revision, local_pool));
1827
/* create the revision info for the current rev */
1828
info->node_revs = apr_array_make(pool, 4, sizeof(noderev_t*));
1829
info->representations = apr_array_make(pool, 4, sizeof(representation_t*));
1831
info->revision = revision;
1832
info->original.offset = 0;
1833
info->original.end = file_content->len;
1834
SVN_ERR(read_revision_header(&info->original.changes,
1835
&info->original.changes_len,
1842
/* put it into our containers */
1843
APR_ARRAY_PUSH(fs->revisions, revision_info_t*) = info;
1845
/* create a pseudo-pack file container for just this rev to keep our
1846
* data structures as uniform as possible.
1848
revisions->base = revision;
1849
revisions->fragments = NULL;
1850
revisions->info = apr_array_make(pool, 1, sizeof(revision_info_t*));
1851
revisions->filesize = file_content->len;
1852
APR_ARRAY_PUSH(revisions->info, revision_info_t*) = info;
1853
APR_ARRAY_PUSH(fs->packs, revision_pack_t*) = revisions;
1855
/* cache the revision content */
1856
rev_content.data = file_content->data + info->original.offset;
1857
rev_content.len = info->original.end - info->original.offset;
1858
set_cached_content(fs->cache, info->revision, &rev_content);
1860
/* parse the revision content recursively. */
1861
SVN_ERR(read_noderev(&info->root_noderev, fs, file_content,
1862
root_node_offset, info,
1864
APR_ARRAY_PUSH(info->node_revs, noderev_t*) = info->root_noderev;
1866
/* show progress every 1000 revs or so */
1867
if (revision % fs->max_files_per_dir == 0)
1868
print_progress(revision);
1870
svn_pool_destroy(local_pool);
1872
return SVN_NO_ERROR;
1875
/* Read the repository at PATH beginning with revision START_REVISION and
1876
* return the result in *FS. Allocate caches with MEMSIZE bytes total
1877
* capacity. Use POOL for non-cache allocations.
1879
static svn_error_t *
1880
read_revisions(fs_fs_t **fs,
1882
svn_revnum_t start_revision,
1886
svn_revnum_t revision;
1887
apr_size_t content_cache_size;
1888
apr_size_t window_cache_size;
1889
apr_size_t dir_cache_size;
1891
/* determine cache sizes */
1895
content_cache_size = memsize * 7 / 10 > 4000 ? 4000 : memsize * 7 / 10;
1896
window_cache_size = memsize * 2 / 10 * 1024 * 1024;
1897
dir_cache_size = (memsize / 10) * 16000;
1899
/* read repo format and such */
1900
SVN_ERR(fs_open(fs, path, pool));
1902
/* create data containers and caches */
1903
(*fs)->start_revision = start_revision
1904
- (start_revision % (*fs)->max_files_per_dir);
1905
(*fs)->revisions = apr_array_make(pool,
1906
(*fs)->max_revision + 1 - (*fs)->start_revision,
1907
sizeof(revision_info_t *));
1908
(*fs)->packs = apr_array_make(pool,
1909
((*fs)->min_unpacked_rev - (*fs)->start_revision)
1910
/ (*fs)->max_files_per_dir,
1911
sizeof(revision_pack_t *));
1912
(*fs)->null_base = apr_pcalloc(pool, sizeof(*(*fs)->null_base));
1913
(*fs)->cache = create_content_cache
1914
(apr_allocator_owner_get
1915
(svn_pool_create_allocator(FALSE)),
1916
content_cache_size * 1024 * 1024);
1917
(*fs)->dir_cache = create_dir_cache
1918
(apr_allocator_owner_get
1919
(svn_pool_create_allocator(FALSE)),
1921
(*fs)->window_cache = create_window_cache
1922
(apr_allocator_owner_get
1923
(svn_pool_create_allocator(FALSE)),
1924
10000, window_cache_size);
1926
/* read all packed revs */
1927
for ( revision = start_revision
1928
; revision < (*fs)->min_unpacked_rev
1929
; revision += (*fs)->max_files_per_dir)
1930
SVN_ERR(read_pack_file(*fs, revision, pool));
1932
/* read non-packed revs */
1933
for ( ; revision <= (*fs)->max_revision; ++revision)
1934
SVN_ERR(read_revision_file(*fs, revision, pool));
1936
return SVN_NO_ERROR;
1939
/* Return the maximum number of decimal digits required to represent offsets
1940
* in the given PACK file.
1943
get_max_offset_len(const revision_pack_t *pack)
1945
/* the pack files may grow a few percent.
1946
* Fudge it up to be on safe side.
1948
apr_size_t max_future_size = pack->filesize * 2 + 10000;
1949
apr_size_t result = 0;
1951
while (max_future_size > 0)
1954
max_future_size /= 10;
1960
/* Create the fragments container in PACK and add revision header fragments
1961
* to it. Use POOL for allocations.
1963
static svn_error_t *
1964
add_revisions_pack_heads(revision_pack_t *pack,
1968
revision_info_t *info;
1969
apr_size_t offset_len = get_max_offset_len(pack);
1970
fragment_t fragment;
1972
/* allocate fragment arrays */
1974
int fragment_count = 1;
1975
for (i = 0; i < pack->info->nelts; ++i)
1977
info = APR_ARRAY_IDX(pack->info, i, revision_info_t*);
1978
fragment_count += info->node_revs->nelts
1979
+ info->representations->nelts
1983
pack->target_offset = pack->info->nelts > 1 ? 64 : 0;
1984
pack->fragments = apr_array_make(pool,
1986
sizeof(fragment_t));
1988
/* put revision headers first */
1990
for (i = 0; i < pack->info->nelts - 1; ++i)
1992
info = APR_ARRAY_IDX(pack->info, i, revision_info_t*);
1993
info->target.offset = pack->target_offset;
1995
fragment.data = info;
1996
fragment.kind = header_fragment;
1997
fragment.position = pack->target_offset;
1998
APR_ARRAY_PUSH(pack->fragments, fragment_t) = fragment;
2000
pack->target_offset += 2 * offset_len + 3;
2003
info = APR_ARRAY_IDX(pack->info, pack->info->nelts - 1, revision_info_t*);
2004
info->target.offset = pack->target_offset;
2006
/* followed by the changes list */
2008
for (i = 0; i < pack->info->nelts; ++i)
2010
info = APR_ARRAY_IDX(pack->info, i, revision_info_t*);
2012
info->target.changes = pack->target_offset - info->target.offset;
2013
info->target.changes_len = info->original.changes_len;
2015
fragment.data = info;
2016
fragment.kind = changes_fragment;
2017
fragment.position = pack->target_offset;
2018
APR_ARRAY_PUSH(pack->fragments, fragment_t) = fragment;
2020
pack->target_offset += info->original.changes_len;
2023
return SVN_NO_ERROR;
2026
/* For the revision given by INFO in FS, return the fragment container in
2027
* *FRAGMENTS and the current placement offset in *CURRENT_POS.
2029
static svn_error_t *
2030
get_target_offset(apr_size_t **current_pos,
2031
apr_array_header_t **fragments,
2033
revision_info_t *info)
2036
revision_pack_t *pack;
2037
svn_revnum_t revision = info->revision;
2039
/* identify the pack object */
2040
if (fs->min_unpacked_rev > revision)
2042
i = (revision - fs->start_revision) / fs->max_files_per_dir;
2046
i = (fs->min_unpacked_rev - fs->start_revision) / fs->max_files_per_dir;
2047
i += revision - fs->min_unpacked_rev;
2050
/* extract the desired info from it */
2051
pack = APR_ARRAY_IDX(fs->packs, i, revision_pack_t*);
2052
*current_pos = &pack->target_offset;
2053
*fragments = pack->fragments;
2055
return SVN_NO_ERROR;
2058
/* forward declaration */
2059
static svn_error_t *
2060
add_noderev_recursively(fs_fs_t *fs,
2064
/* Place fragments for the given REPRESENTATION of the given KIND, iff it
2065
* has not been covered, yet. Place the base reps along the deltification
2066
* chain as far as those reps have not been covered, yet. If REPRESENTATION
2067
* is a directory, recursively place its elements.
2069
* Use POOL for allocations.
2071
static svn_error_t *
2072
add_representation_recursively(fs_fs_t *fs,
2073
representation_t *representation,
2074
enum fragment_kind_t kind,
2077
apr_size_t *current_pos;
2078
apr_array_header_t *fragments;
2079
fragment_t fragment;
2081
/* place REPRESENTATION only once and only if it exists and will not
2082
* be covered later as a directory. */
2083
if ( representation == NULL
2084
|| representation->covered
2085
|| (representation->dir && kind != dir_fragment)
2086
|| representation == fs->null_base)
2087
return SVN_NO_ERROR;
2089
/* add and place a fragment for REPRESENTATION */
2090
SVN_ERR(get_target_offset(¤t_pos, &fragments,
2091
fs, representation->revision));
2092
representation->target.offset = *current_pos;
2093
representation->covered = TRUE;
2095
fragment.data = representation;
2096
fragment.kind = kind;
2097
fragment.position = *current_pos;
2098
APR_ARRAY_PUSH(fragments, fragment_t) = fragment;
2100
/* determine the size of data to be added to the target file */
2101
if ( kind != dir_fragment
2102
&& representation->delta_base && representation->delta_base->dir)
2104
/* base rep is a dir -> would change -> need to store it as fulltext
2105
* in our target file */
2106
apr_pool_t *text_pool = svn_pool_create(pool);
2107
svn_stringbuf_t *content;
2109
SVN_ERR(get_combined_window(&content, fs, representation, text_pool));
2110
representation->target.size = content->len;
2111
*current_pos += representation->target.size + 13;
2113
svn_pool_destroy(text_pool);
2116
if ( kind == dir_fragment
2117
|| (representation->delta_base && representation->delta_base->dir))
2119
/* deltified directories may grow considerably */
2120
if (representation->original.size < 50)
2121
*current_pos += 300;
2123
*current_pos += representation->original.size * 3 + 150;
2127
/* plain / deltified content will not change but the header may
2128
* grow slightly due to larger offsets. */
2129
representation->target.size = representation->original.size;
2131
if (representation->delta_base &&
2132
(representation->delta_base != fs->null_base))
2133
*current_pos += representation->original.size + 50;
2135
*current_pos += representation->original.size + 13;
2138
/* follow the delta chain and place base revs immediately after this */
2139
if (representation->delta_base)
2140
SVN_ERR(add_representation_recursively(fs,
2141
representation->delta_base,
2145
/* finally, recurse into directories */
2146
if (representation->dir)
2149
apr_array_header_t *entries = representation->dir->entries;
2151
for (i = 0; i < entries->nelts; ++i)
2153
direntry_t *entry = APR_ARRAY_IDX(entries, i, direntry_t *);
2155
SVN_ERR(add_noderev_recursively(fs, entry->node, pool));
2159
return SVN_NO_ERROR;
2162
/* Place fragments for the given NODE in FS, iff it has not been covered,
2163
* yet. Place the reps (text, props) immediately after the node.
2165
* Use POOL for allocations.
2167
static svn_error_t *
2168
add_noderev_recursively(fs_fs_t *fs,
2172
apr_size_t *current_pos;
2173
apr_array_header_t *fragments;
2174
fragment_t fragment;
2176
/* don't add it twice */
2178
return SVN_NO_ERROR;
2180
/* add and place a fragment for NODE */
2181
SVN_ERR(get_target_offset(¤t_pos, &fragments, fs, node->revision));
2182
node->covered = TRUE;
2183
node->target.offset = *current_pos;
2185
fragment.data = node;
2186
fragment.kind = noderev_fragment;
2187
fragment.position = *current_pos;
2188
APR_ARRAY_PUSH(fragments, fragment_t) = fragment;
2190
/* size may slightly increase */
2191
*current_pos += node->original.size + 40;
2193
/* recurse into representations */
2194
if (node->text && node->text->dir)
2195
SVN_ERR(add_representation_recursively(fs, node->text, dir_fragment, pool));
2197
SVN_ERR(add_representation_recursively(fs, node->text, file_fragment, pool));
2199
SVN_ERR(add_representation_recursively(fs, node->props, property_fragment, pool));
2201
return SVN_NO_ERROR;
2204
/* Place a fragment for the last revision in PACK. Use POOL for allocations.
2206
static svn_error_t *
2207
add_revisions_pack_tail(revision_pack_t *pack,
2211
revision_info_t *info;
2212
apr_size_t offset_len = get_max_offset_len(pack);
2213
fragment_t fragment;
2215
/* put final revision header last and fix up revision lengths */
2217
info = APR_ARRAY_IDX(pack->info, pack->info->nelts-1, revision_info_t*);
2219
fragment.data = info;
2220
fragment.kind = header_fragment;
2221
fragment.position = pack->target_offset;
2222
APR_ARRAY_PUSH(pack->fragments, fragment_t) = fragment;
2224
pack->target_offset += 2 * offset_len + 3;
2226
/* end of target file reached. Store that info in all revs. */
2227
for (i = 0; i < pack->info->nelts; ++i)
2229
info = APR_ARRAY_IDX(pack->info, i, revision_info_t*);
2230
info->target.end = pack->target_offset;
2233
return SVN_NO_ERROR;
2236
/* Place all fragments for all revisions / packs in FS.
2237
* Use POOL for allocations.
2239
static svn_error_t *
2240
reorder_revisions(fs_fs_t *fs,
2245
/* headers and changes */
2247
for (i = 0; i < fs->packs->nelts; ++i)
2249
revision_pack_t *pack = APR_ARRAY_IDX(fs->packs, i, revision_pack_t*);
2250
SVN_ERR(add_revisions_pack_heads(pack, pool));
2253
/* representations & nodes */
2255
for (i = fs->revisions->nelts-1; i >= 0; --i)
2257
revision_info_t *info = APR_ARRAY_IDX(fs->revisions, i, revision_info_t*);
2258
for (k = info->node_revs->nelts - 1; k >= 0; --k)
2260
noderev_t *node = APR_ARRAY_IDX(info->node_revs, k, noderev_t*);
2261
SVN_ERR(add_noderev_recursively(fs, node, pool));
2264
if (info->revision % fs->max_files_per_dir == 0)
2265
print_progress(info->revision);
2268
/* pack file tails */
2270
for (i = 0; i < fs->packs->nelts; ++i)
2272
revision_pack_t *pack = APR_ARRAY_IDX(fs->packs, i, revision_pack_t*);
2273
SVN_ERR(add_revisions_pack_tail(pack, pool));
2276
return SVN_NO_ERROR;
2279
/* forward declaration */
2280
static svn_error_t *
2281
get_fragment_content(svn_string_t **content,
2283
fragment_t *fragment,
2286
/* Directory content may change and with it, the deltified representations
2287
* may significantly. This function causes all directory target reps in
2288
* PACK of FS to be built and their new MD5 as well as rep sizes be updated.
2289
* We must do that before attempting to write noderevs.
2291
* Use POOL for allocations.
2293
static svn_error_t *
2294
update_noderevs(fs_fs_t *fs,
2295
revision_pack_t *pack,
2299
apr_pool_t *itempool = svn_pool_create(pool);
2301
for (i = 0; i < pack->fragments->nelts; ++i)
2303
fragment_t *fragment = &APR_ARRAY_IDX(pack->fragments, i, fragment_t);
2304
if (fragment->kind == dir_fragment)
2306
svn_string_t *content;
2308
/* request updated rep content but ignore the result.
2309
* We are only interested in the MD5, content and rep size updates. */
2310
SVN_ERR(get_fragment_content(&content, fs, fragment, itempool));
2311
svn_pool_clear(itempool);
2315
svn_pool_destroy(itempool);
2317
return SVN_NO_ERROR;
2320
/* Determine the target size of the FRAGMENT in FS and return the value
2321
* in *LENGTH. If ADD_PADDING has been set, slightly fudge the numbers
2322
* to account for changes in offset lengths etc. Use POOL for temporary
2325
static svn_error_t *
2326
get_content_length(apr_size_t *length,
2328
fragment_t *fragment,
2329
svn_boolean_t add_padding,
2332
svn_string_t *content;
2334
SVN_ERR(get_fragment_content(&content, fs, fragment, pool));
2336
switch (fragment->kind)
2339
*length = content->len + 16;
2341
case noderev_fragment:
2342
*length = content->len + 3;
2345
*length = content->len;
2349
*length = content->len;
2351
return SVN_NO_ERROR;
2354
/* Move the FRAGMENT to global file offset NEW_POSITION. Update the target
2355
* location info of the underlying object as well.
2358
move_fragment(fragment_t *fragment,
2359
apr_size_t new_position)
2361
revision_info_t *info;
2362
representation_t *representation;
2365
/* move the fragment */
2366
fragment->position = new_position;
2368
/* move the underlying object */
2369
switch (fragment->kind)
2371
case header_fragment:
2372
info = fragment->data;
2373
info->target.offset = new_position;
2376
case changes_fragment:
2377
info = fragment->data;
2378
info->target.changes = new_position - info->target.offset;
2381
case property_fragment:
2384
representation = fragment->data;
2385
representation->target.offset = new_position;
2388
case noderev_fragment:
2389
node = fragment->data;
2390
node->target.offset = new_position;
2395
/* Move the fragments in PACK's target fragment list to their final offsets.
2396
* This may require several iterations if the fudge factors turned out to
2397
* be insufficient. Use POOL for allocations.
2399
static svn_error_t *
2400
pack_revisions(fs_fs_t *fs,
2401
revision_pack_t *pack,
2405
fragment_t *fragment, *next;
2406
svn_boolean_t needed_to_expand;
2407
revision_info_t *info;
2408
apr_size_t current_pos, len, old_len;
2410
apr_pool_t *itempool = svn_pool_create(pool);
2412
/* update all directory reps. Chances are that most of the target rep
2413
* sizes are now close to accurate. */
2414
SVN_ERR(update_noderevs(fs, pack, pool));
2416
/* compression phase: pack all fragments tightly with only a very small
2417
* fudge factor. This should cause offsets to shrink, thus all the
2418
* actual fragment rate should tend to be even smaller afterwards. */
2419
current_pos = pack->info->nelts > 1 ? 64 : 0;
2420
for (i = 0; i + 1 < pack->fragments->nelts; ++i)
2422
fragment = &APR_ARRAY_IDX(pack->fragments, i, fragment_t);
2423
SVN_ERR(get_content_length(&len, fs, fragment, TRUE, itempool));
2424
move_fragment(fragment, current_pos);
2427
svn_pool_clear(itempool);
2430
/* don't forget the final fragment (last revision's revision header) */
2431
fragment = &APR_ARRAY_IDX(pack->fragments, pack->fragments->nelts-1, fragment_t);
2432
fragment->position = current_pos;
2434
/* expansion phase: check whether all fragments fit into their allotted
2435
* slots. Grow them geometrically if they don't fit. Retry until they
2437
* Note: there is an upper limit to which fragments can grow. So, this
2438
* loop will terminate. Often, no expansion will be necessary at all. */
2441
needed_to_expand = FALSE;
2442
current_pos = pack->info->nelts > 1 ? 64 : 0;
2444
for (i = 0; i + 1 < pack->fragments->nelts; ++i)
2446
fragment = &APR_ARRAY_IDX(pack->fragments, i, fragment_t);
2447
next = &APR_ARRAY_IDX(pack->fragments, i + 1, fragment_t);
2448
old_len = next->position - fragment->position;
2450
SVN_ERR(get_content_length(&len, fs, fragment, FALSE, itempool));
2454
len = (apr_size_t)(len * 1.1) + 10;
2455
needed_to_expand = TRUE;
2460
if (i == pack->info->nelts - 1)
2462
info = APR_ARRAY_IDX(pack->info, pack->info->nelts - 1, revision_info_t*);
2463
info->target.offset = current_pos;
2466
move_fragment(fragment, current_pos);
2469
svn_pool_clear(itempool);
2472
fragment = &APR_ARRAY_IDX(pack->fragments, pack->fragments->nelts-1, fragment_t);
2473
fragment->position = current_pos;
2475
/* update the revision
2476
* sizes (they all end at the end of the pack file now) */
2477
SVN_ERR(get_content_length(&len, fs, fragment, FALSE, itempool));
2480
for (i = 0; i < pack->info->nelts; ++i)
2482
info = APR_ARRAY_IDX(pack->info, i, revision_info_t*);
2483
info->target.end = current_pos;
2486
while (needed_to_expand);
2488
svn_pool_destroy(itempool);
2490
return SVN_NO_ERROR;
2493
/* Write reorg'ed target content for PACK in FS. Use POOL for allocations.
2495
static svn_error_t *
2496
write_revisions(fs_fs_t *fs,
2497
revision_pack_t *pack,
2501
fragment_t *fragment = NULL;
2502
svn_string_t *content;
2504
apr_pool_t *itempool = svn_pool_create(pool);
2505
apr_pool_t *iterpool = svn_pool_create(pool);
2508
apr_size_t current_pos = 0;
2509
svn_stringbuf_t *null_buffer = svn_stringbuf_create_empty(iterpool);
2511
/* create the target file */
2512
const char *dir = apr_psprintf(iterpool, "%s/new/%ld%s",
2513
fs->path, pack->base / fs->max_files_per_dir,
2514
pack->info->nelts > 1 ? ".pack" : "");
2515
SVN_ERR(svn_io_make_dir_recursively(dir, pool));
2516
SVN_ERR(svn_io_file_open(&file,
2517
pack->info->nelts > 1
2518
? apr_psprintf(iterpool, "%s/pack", dir)
2519
: apr_psprintf(iterpool, "%s/%ld", dir, pack->base),
2520
APR_WRITE | APR_CREATE | APR_BUFFERED,
2524
/* write all fragments */
2525
for (i = 0; i < pack->fragments->nelts; ++i)
2529
/* get fragment content to write */
2530
fragment = &APR_ARRAY_IDX(pack->fragments, i, fragment_t);
2531
SVN_ERR(get_fragment_content(&content, fs, fragment, itempool));
2532
SVN_ERR_ASSERT(fragment->position >= current_pos);
2534
/* number of bytes between this and the previous fragment */
2535
if ( fragment->kind == header_fragment
2536
&& i+1 < pack->fragments->nelts)
2537
/* special case: header fragments are aligned to the slot end */
2538
padding = APR_ARRAY_IDX(pack->fragments, i+1, fragment_t).position -
2539
content->len - current_pos;
2541
/* standard case: fragments are aligned to the slot start */
2542
padding = fragment->position - current_pos;
2544
/* write padding between fragments */
2547
while (null_buffer->len < padding)
2548
svn_stringbuf_appendbyte(null_buffer, 0);
2550
SVN_ERR(svn_io_file_write_full(file,
2555
current_pos += padding;
2558
/* write fragment content */
2559
SVN_ERR(svn_io_file_write_full(file,
2564
current_pos += content->len;
2566
svn_pool_clear(itempool);
2569
apr_file_close(file);
2571
/* write new manifest file */
2572
if (pack->info->nelts > 1)
2574
svn_stream_t *stream;
2575
SVN_ERR(svn_io_file_open(&file,
2576
apr_psprintf(iterpool, "%s/manifest", dir),
2577
APR_WRITE | APR_CREATE | APR_BUFFERED,
2580
stream = svn_stream_from_aprfile2(file, FALSE, iterpool);
2582
for (i = 0; i < pack->info->nelts; ++i)
2584
revision_info_t *info = APR_ARRAY_IDX(pack->info, i,
2586
SVN_ERR(svn_stream_printf(stream, itempool,
2587
"%" APR_SIZE_T_FMT "\n",
2588
info->target.offset));
2589
svn_pool_clear(itempool);
2594
svn_pool_destroy(itempool);
2595
svn_pool_destroy(iterpool);
2597
return SVN_NO_ERROR;
2600
/* Write reorg'ed target content for all revisions in FS. To maximize
2601
* data locality, pack and write in one go per pack file.
2602
* Use POOL for allocations.
2604
static svn_error_t *
2605
pack_and_write_revisions(fs_fs_t *fs,
2610
SVN_ERR(svn_io_make_dir_recursively(apr_psprintf(pool, "%s/new",
2614
for (i = 0; i < fs->packs->nelts; ++i)
2616
revision_pack_t *pack = APR_ARRAY_IDX(fs->packs, i, revision_pack_t*);
2617
if (pack->base % fs->max_files_per_dir == 0)
2618
print_progress(pack->base);
2620
SVN_ERR(pack_revisions(fs, pack, pool));
2621
SVN_ERR(write_revisions(fs, pack, pool));
2624
return SVN_NO_ERROR;
2627
/* For the directory REPRESENTATION in FS, construct the new (target)
2628
* serialized plaintext representation and return it in *CONTENT.
2629
* Allocate the result in POOL and temporaries in SCRATCH_POOL.
2631
static svn_error_t *
2632
get_updated_dir(svn_string_t **content,
2634
representation_t *representation,
2636
apr_pool_t *scratch_pool)
2639
apr_pool_t *hash_pool = svn_pool_create(scratch_pool);
2640
apr_array_header_t *dir = representation->dir->entries;
2642
svn_stream_t *stream;
2643
svn_stringbuf_t *result;
2645
/* get the original content */
2646
SVN_ERR(read_dir(&hash, fs, representation, scratch_pool));
2647
hash = apr_hash_copy(hash_pool, hash);
2649
/* update all entries */
2650
for (i = 0; i < dir->nelts; ++i)
2653
svn_string_t *new_val;
2656
/* find the original entry for for the current name */
2657
direntry_t *entry = APR_ARRAY_IDX(dir, i, direntry_t *);
2658
svn_string_t *str_val = apr_hash_get(hash, entry->name, entry->name_len);
2659
if (str_val == NULL)
2660
return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2661
_("Dir entry '%s' not found"), entry->name);
2663
SVN_ERR_ASSERT(str_val->len < sizeof(buffer));
2665
/* create and updated node ID */
2666
memcpy(buffer, str_val->data, str_val->len+1);
2667
pos = strchr(buffer, '/') - buffer + 1;
2668
pos += svn__ui64toa(buffer + pos, entry->node->target.offset - entry->node->revision->target.offset);
2669
new_val = svn_string_ncreate(buffer, pos, hash_pool);
2671
/* store it in the hash */
2672
apr_hash_set(hash, entry->name, entry->name_len, new_val);
2675
/* serialize the updated hash */
2676
result = svn_stringbuf_create_ensure(representation->target.size, pool);
2677
stream = svn_stream_from_stringbuf(result, hash_pool);
2678
SVN_ERR(svn_hash_write2(hash, stream, SVN_HASH_TERMINATOR, hash_pool));
2679
svn_pool_destroy(hash_pool);
2682
*content = svn_stringbuf__morph_into_string(result);
2684
return SVN_NO_ERROR;
2687
/* Calculate the delta representation for the given CONTENT and BASE.
2688
* Return the rep in *DIFF. Use POOL for allocations.
2690
static svn_error_t *
2691
diff_stringbufs(svn_stringbuf_t *diff,
2693
svn_string_t *content,
2696
svn_txdelta_window_handler_t diff_wh;
2699
svn_stream_t *stream;
2700
svn_stream_t *source = svn_stream_from_string(base, pool);
2701
svn_stream_t *target = svn_stream_from_stringbuf(diff, pool);
2703
/* Prepare to write the svndiff data. */
2704
svn_txdelta_to_svndiff3(&diff_wh,
2708
SVN_DELTA_COMPRESSION_LEVEL_DEFAULT,
2711
/* create delta stream */
2712
stream = svn_txdelta_target_push(diff_wh, diff_whb, source, pool);
2715
SVN_ERR(svn_stream_write(stream, content->data, &content->len));
2716
SVN_ERR(svn_stream_close(stream));
2718
return SVN_NO_ERROR;
2721
/* Update the noderev id value for KEY in the textual noderev representation
2722
* in NODE_REV. Take the new id from NODE. This is a no-op if the KEY
2726
update_id(svn_stringbuf_t *node_rev,
2730
char *newline_pos = 0;
2733
/* we need to update the offset only -> find its position */
2734
pos = strstr(node_rev->data, key);
2736
pos = strchr(pos, '/');
2738
newline_pos = strchr(++pos, '\n');
2740
if (pos && newline_pos)
2742
/* offset data has been found -> replace it */
2743
char temp[SVN_INT64_BUFFER_SIZE];
2744
apr_size_t len = svn__i64toa(temp, node->target.offset - node->revision->target.offset);
2745
svn_stringbuf_replace(node_rev,
2746
pos - node_rev->data, newline_pos - pos,
2751
/* Update the representation id value for KEY in the textual noderev
2752
* representation in NODE_REV. Take the offset, sizes and new MD5 from
2753
* REPRESENTATION. Use SCRATCH_POOL for allocations.
2754
* This is a no-op if the KEY cannot be found.
2757
update_text(svn_stringbuf_t *node_rev,
2759
representation_t *representation,
2760
apr_pool_t *scratch_pool)
2762
apr_size_t key_len = strlen(key);
2763
char *pos = strstr(node_rev->data, key);
2769
val_pos = pos + key_len;
2770
if (representation->dir)
2772
/* for directories, we need to write all rep info anew */
2773
char *newline_pos = strchr(val_pos, '\n');
2774
svn_checksum_t checksum;
2775
const char* temp = apr_psprintf(scratch_pool, "%ld %" APR_SIZE_T_FMT " %"
2776
APR_SIZE_T_FMT" %" APR_SIZE_T_FMT " %s",
2777
representation->revision->revision,
2778
representation->target.offset - representation->revision->target.offset,
2779
representation->target.size,
2780
representation->dir->size,
2781
svn_checksum_to_cstring(&checksum,
2784
checksum.digest = representation->dir->target_md5;
2785
checksum.kind = svn_checksum_md5;
2786
svn_stringbuf_replace(node_rev,
2787
val_pos - node_rev->data, newline_pos - val_pos,
2788
temp, strlen(temp));
2792
/* ordinary representation: replace offset and rep size only.
2793
* Content size and checksums are unchanged. */
2795
char *end_pos = strchr(val_pos, ' ');
2797
val_pos = end_pos + 1;
2798
end_pos = strchr(strchr(val_pos, ' ') + 1, ' ');
2799
temp = apr_psprintf(scratch_pool, "%" APR_SIZE_T_FMT " %" APR_SIZE_T_FMT,
2800
representation->target.offset - representation->revision->target.offset,
2801
representation->target.size);
2803
svn_stringbuf_replace(node_rev,
2804
val_pos - node_rev->data, end_pos - val_pos,
2805
temp, strlen(temp));
2809
/* Get the target content (data block as to be written to the file) for
2810
* the given FRAGMENT in FS. Return the content in *CONTENT. Use POOL
2813
* Note that, as a side-effect, this will update the target rep. info for
2816
static svn_error_t *
2817
get_fragment_content(svn_string_t **content,
2819
fragment_t *fragment,
2822
revision_info_t *info;
2823
representation_t *representation;
2825
svn_string_t *revision_content, *base_content;
2826
svn_stringbuf_t *header, *node_rev, *text;
2827
apr_size_t header_size;
2828
svn_checksum_t *checksum = NULL;
2830
switch (fragment->kind)
2832
/* revision headers can be constructed from target position info */
2833
case header_fragment:
2834
info = fragment->data;
2835
*content = svn_string_createf(pool,
2836
"\n%" APR_SIZE_T_FMT " %" APR_SIZE_T_FMT "\n",
2837
info->root_noderev->target.offset - info->target.offset,
2838
info->target.changes);
2839
return SVN_NO_ERROR;
2841
/* The changes list remains untouched */
2842
case changes_fragment:
2843
info = fragment->data;
2844
SVN_ERR(get_content(&revision_content, fs, info->revision, pool));
2846
*content = svn_string_create_empty(pool);
2847
(*content)->data = revision_content->data + info->original.changes;
2848
(*content)->len = info->target.changes_len;
2849
return SVN_NO_ERROR;
2851
/* property and file reps get new headers any need to be rewritten,
2852
* iff the base rep is a directory. The actual (deltified) content
2853
* remains unchanged, though. MD5 etc. do not change. */
2854
case property_fragment:
2856
representation = fragment->data;
2857
SVN_ERR(get_content(&revision_content, fs,
2858
representation->revision->revision, pool));
2860
if (representation->delta_base)
2861
if (representation->delta_base->dir)
2863
/* if the base happens to be a directory, reconstruct the
2864
* full text and represent it as PLAIN rep. */
2865
SVN_ERR(get_combined_window(&text, fs, representation, pool));
2866
representation->target.size = text->len;
2868
svn_stringbuf_insert(text, 0, "PLAIN\n", 6);
2869
svn_stringbuf_appendcstr(text, "ENDREP\n");
2870
*content = svn_stringbuf__morph_into_string(text);
2872
return SVN_NO_ERROR;
2875
/* construct a new rep header */
2876
if (representation->delta_base == fs->null_base)
2877
header = svn_stringbuf_create("DELTA\n", pool);
2879
header = svn_stringbuf_createf(pool,
2880
"DELTA %ld %" APR_SIZE_T_FMT " %" APR_SIZE_T_FMT "\n",
2881
representation->delta_base->revision->revision,
2882
representation->delta_base->target.offset
2883
- representation->delta_base->revision->target.offset,
2884
representation->delta_base->target.size);
2886
header = svn_stringbuf_create("PLAIN\n", pool);
2888
/* if it exists, the actual delta base is unchanged. Hence, this
2889
* rep is unchanged even if it has been deltified. */
2890
header_size = strchr(revision_content->data +
2891
representation->original.offset, '\n') -
2892
revision_content->data -
2893
representation->original.offset + 1;
2894
svn_stringbuf_appendbytes(header,
2895
revision_content->data +
2896
representation->original.offset +
2898
representation->original.size);
2899
svn_stringbuf_appendcstr(header, "ENDREP\n");
2900
*content = svn_stringbuf__morph_into_string(header);
2901
return SVN_NO_ERROR;
2903
/* directory reps need to be rewritten (and deltified) completely.
2904
* As a side-effect, update the MD5 and target content size. */
2906
/* construct new content and update MD5 */
2907
representation = fragment->data;
2908
SVN_ERR(get_updated_dir(&revision_content, fs, representation,
2910
SVN_ERR(svn_checksum(&checksum, svn_checksum_md5,
2911
revision_content->data, revision_content->len,
2913
memcpy(representation->dir->target_md5,
2915
sizeof(representation->dir->target_md5));
2917
/* deltify against the base rep if necessary */
2918
if (representation->delta_base)
2920
if (representation->delta_base->dir == NULL)
2922
/* dummy or non-dir base rep -> self-compress only */
2923
header = svn_stringbuf_create("DELTA\n", pool);
2924
base_content = svn_string_create_empty(pool);
2928
/* deltify against base rep (which is a directory, too)*/
2929
representation_t *base_rep = representation->delta_base;
2930
header = svn_stringbuf_createf(pool,
2931
"DELTA %ld %" APR_SIZE_T_FMT " %" APR_SIZE_T_FMT "\n",
2932
base_rep->revision->revision,
2933
base_rep->target.offset - base_rep->revision->target.offset,
2934
base_rep->target.size);
2935
SVN_ERR(get_updated_dir(&base_content, fs, base_rep,
2939
/* run deltification and update target content size */
2940
header_size = header->len;
2941
SVN_ERR(diff_stringbufs(header, base_content,
2942
revision_content, pool));
2943
representation->dir->size = revision_content->len;
2944
representation->target.size = header->len - header_size;
2945
svn_stringbuf_appendcstr(header, "ENDREP\n");
2946
*content = svn_stringbuf__morph_into_string(header);
2950
/* no delta base (not even a dummy) -> PLAIN rep */
2951
representation->target.size = revision_content->len;
2952
representation->dir->size = revision_content->len;
2953
*content = svn_string_createf(pool, "PLAIN\n%sENDREP\n",
2954
revision_content->data);
2957
return SVN_NO_ERROR;
2959
/* construct the new noderev content. No side-effects.*/
2960
case noderev_fragment:
2961
/* get the original noderev as string */
2962
node = fragment->data;
2963
SVN_ERR(get_content(&revision_content, fs,
2964
node->revision->revision, pool));
2965
node_rev = svn_stringbuf_ncreate(revision_content->data +
2966
node->original.offset,
2967
node->original.size,
2970
/* update the values that may have hanged for target */
2971
update_id(node_rev, "id: ", node);
2972
update_id(node_rev, "pred: ", node->predecessor);
2973
update_text(node_rev, "text: ", node->text, pool);
2974
update_text(node_rev, "props: ", node->props, pool);
2976
*content = svn_stringbuf__morph_into_string(node_rev);
2977
return SVN_NO_ERROR;
2982
return SVN_NO_ERROR;
2985
/* In the repository at PATH, restore the original content in case we ran
2986
* this reorg tool before. Use POOL for allocations.
2988
static svn_error_t *
2989
prepare_repo(const char *path, apr_pool_t *pool)
2991
svn_node_kind_t kind;
2993
const char *old_path = svn_dirent_join(path, "db/old", pool);
2994
const char *new_path = svn_dirent_join(path, "new", pool);
2995
const char *revs_path = svn_dirent_join(path, "db/revs", pool);
2996
const char *old_rep_cache_path = svn_dirent_join(path, "db/rep-cache.db.old", pool);
2997
const char *rep_cache_path = svn_dirent_join(path, "db/rep-cache.db", pool);
2999
/* is there a backup? */
3000
SVN_ERR(svn_io_check_path(old_path, &kind, pool));
3001
if (kind == svn_node_dir)
3003
/* yes, restore the org content from it */
3004
SVN_ERR(svn_io_remove_dir2(new_path, TRUE, NULL, NULL, pool));
3005
SVN_ERR(svn_io_file_move(revs_path, new_path, pool));
3006
SVN_ERR(svn_io_file_move(old_path, revs_path, pool));
3007
SVN_ERR(svn_io_remove_dir2(new_path, TRUE, NULL, NULL, pool));
3010
/* same for the rep cache db */
3011
SVN_ERR(svn_io_check_path(old_rep_cache_path, &kind, pool));
3012
if (kind == svn_node_file)
3013
SVN_ERR(svn_io_file_move(old_rep_cache_path, rep_cache_path, pool));
3015
return SVN_NO_ERROR;
3018
/* In the repository at PATH, create a backup of the orig content and
3019
* replace it with the reorg'ed. Use POOL for allocations.
3021
static svn_error_t *
3022
activate_new_revs(const char *path, apr_pool_t *pool)
3024
svn_node_kind_t kind;
3026
const char *old_path = svn_dirent_join(path, "db/old", pool);
3027
const char *new_path = svn_dirent_join(path, "new", pool);
3028
const char *revs_path = svn_dirent_join(path, "db/revs", pool);
3029
const char *old_rep_cache_path = svn_dirent_join(path, "db/rep-cache.db.old", pool);
3030
const char *rep_cache_path = svn_dirent_join(path, "db/rep-cache.db", pool);
3032
/* if there is no backup, yet, move the current repo content to the backup
3033
* and place it with the new (reorg'ed) data. */
3034
SVN_ERR(svn_io_check_path(old_path, &kind, pool));
3035
if (kind == svn_node_none)
3037
SVN_ERR(svn_io_file_move(revs_path, old_path, pool));
3038
SVN_ERR(svn_io_file_move(new_path, revs_path, pool));
3041
/* same for the rep cache db */
3042
SVN_ERR(svn_io_check_path(old_rep_cache_path, &kind, pool));
3043
if (kind == svn_node_none)
3044
SVN_ERR(svn_io_file_move(rep_cache_path, old_rep_cache_path, pool));
3046
return SVN_NO_ERROR;
3049
/* Write tool usage info text to OSTREAM using PROGNAME as a prefix and
3050
* POOL for allocations.
3053
print_usage(svn_stream_t *ostream, const char *progname,
3056
svn_error_clear(svn_stream_printf(ostream, pool,
3058
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"
3059
"!!! This is an experimental tool. Don't use it on production data !!!\n"
3060
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"
3062
"Usage: %s <repo> <cachesize>\n"
3064
"Optimize the repository at local path <repo> staring from revision 0.\n"
3065
"Use up to <cachesize> MB of memory for caching. This does not include\n"
3066
"temporary representation of the repository structure, i.e. the actual\n"
3067
"memory will be higher and <cachesize> be the lower limit.\n",
3071
/* linear control flow */
3072
int main(int argc, const char *argv[])
3075
svn_stream_t *ostream;
3076
svn_error_t *svn_err;
3077
const char *repo_path = NULL;
3078
svn_revnum_t start_revision = 0;
3079
apr_size_t memsize = 0;
3080
apr_uint64_t temp = 0;
3084
atexit(apr_terminate);
3086
pool = apr_allocator_owner_get(svn_pool_create_allocator(FALSE));
3088
svn_err = svn_stream_for_stdout(&ostream, pool);
3091
svn_handle_error2(svn_err, stdout, FALSE, ERROR_TAG);
3097
print_usage(ostream, argv[0], pool);
3101
svn_err = svn_cstring_strtoui64(&temp, argv[2], 0, APR_SIZE_MAX, 10);
3104
print_usage(ostream, argv[0], pool);
3105
svn_error_clear(svn_err);
3109
memsize = (apr_size_t)temp;
3110
repo_path = argv[1];
3113
printf("\nPreparing repository\n");
3114
svn_err = prepare_repo(repo_path, pool);
3118
printf("Reading revisions\n");
3119
svn_err = read_revisions(&fs, repo_path, start_revision, memsize, pool);
3124
printf("\nReordering revision content\n");
3125
svn_err = reorder_revisions(fs, pool);
3130
printf("\nPacking and writing revisions\n");
3131
svn_err = pack_and_write_revisions(fs, pool);
3136
printf("\nSwitch to new revs\n");
3137
svn_err = activate_new_revs(repo_path, pool);
3142
svn_handle_error2(svn_err, stdout, FALSE, ERROR_TAG);