2
* ====================================================================
3
* Licensed to the Apache Software Foundation (ASF) under one
4
* or more contributor license agreements. See the NOTICE file
5
* distributed with this work for additional information
6
* regarding copyright ownership. The ASF licenses this file
7
* to you under the Apache License, Version 2.0 (the
8
* "License"); you may not use this file except in compliance
9
* with the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing,
14
* software distributed under the License is distributed on an
15
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16
* KIND, either express or implied. See the License for the
17
* specific language governing permissions and limitations
19
* ====================================================================
22
#include <apr_signal.h>
24
#include "svn_cmdline.h"
25
#include "svn_dirent_uri.h"
26
#include "svn_pools.h"
27
#include "svn_repos.h"
30
#include "svn_version.h"
32
#include "../../subversion/libsvn_fs_fs/fs.h"
33
#include "../../subversion/libsvn_fs_fs/fs_fs.h"
34
/* for svn_fs_fs__id_* (used in assertions only) */
35
#include "../../subversion/libsvn_fs_fs/id.h"
37
#include "private/svn_cmdline_private.h"
39
#include "svn_private_config.h"
42
/** Help messages and version checking. **/
45
version(apr_pool_t *pool)
47
return svn_opt_print_help4(NULL, "svn-rep-sharing-stats", TRUE, FALSE, FALSE,
48
NULL, NULL, NULL, NULL, NULL, NULL, pool);
52
usage(apr_pool_t *pool)
54
svn_error_clear(svn_cmdline_fprintf
56
_("Type 'svn-rep-sharing-stats --help' for usage.\n")));
61
help(const apr_getopt_option_t *options, apr_pool_t *pool)
66
_("usage: svn-rep-sharing-stats [OPTIONS] REPOS_PATH\n\n"
67
" Prints the reference count statistics for representations\n"
68
" in an FSFS repository.\n"
70
" At least one of the options --data/--prop/--both must be specified.\n"
72
"Valid options:\n")));
73
while (options->description)
76
svn_opt_format_option(&optstr, options, TRUE, pool);
77
svn_error_clear(svn_cmdline_fprintf(stdout, pool, " %s\n", optstr));
80
svn_error_clear(svn_cmdline_fprintf(stdout, pool, "\n"));
85
/* Version compatibility check */
87
check_lib_versions(void)
89
static const svn_version_checklist_t checklist[] =
91
/* ### check FSFS version */
92
{ "svn_subr", svn_subr_version },
93
{ "svn_fs", svn_fs_version },
96
SVN_VERSION_DEFINE(my_version);
98
return svn_error_trace(svn_ver_check_list(&my_version, checklist));
103
/** Cancellation stuff, ### copied from subversion/svn/main.c */
105
/* A flag to see if we've been cancelled by the client or not. */
106
static volatile sig_atomic_t cancelled = FALSE;
108
/* A signal handler to support cancellation. */
110
signal_handler(int signum)
112
apr_signal(signum, SIG_IGN);
116
/* Our cancellation callback. */
118
svn_cl__check_cancel(void *baton)
121
return svn_error_create(SVN_ERR_CANCELLED, NULL, _("Caught signal"));
126
static svn_cancel_func_t cancel_func = svn_cl__check_cancel;
128
static void set_up_cancellation(void)
130
/* Set up our cancellation support. */
131
apr_signal(SIGINT, signal_handler);
133
/* SIGBREAK is a Win32 specific signal generated by ctrl-break. */
134
apr_signal(SIGBREAK, signal_handler);
137
apr_signal(SIGHUP, signal_handler);
140
apr_signal(SIGTERM, signal_handler);
144
/* Disable SIGPIPE generation for the platforms that have it. */
145
apr_signal(SIGPIPE, SIG_IGN);
149
/* Disable SIGXFSZ generation for the platforms that have it, otherwise
150
* working with large files when compiled against an APR that doesn't have
151
* large file support will crash the program, which is uncool. */
152
apr_signal(SIGXFSZ, SIG_IGN);
157
/** Program-specific code. **/
159
OPT_VERSION = SVN_OPT_FIRST_LONGOPT_ID,
165
static svn_error_t *check_experimental(void)
167
if (getenv("SVN_REP_SHARING_STATS_IS_EXPERIMENTAL"))
170
return svn_error_create(APR_EGENERAL, NULL,
171
"This code is experimental and should not "
172
"be used on live data.");
175
/* The parts of a rep that determine whether it's being shared. */
178
svn_revnum_t revision;
182
/* What we need to know about a rep. */
185
svn_checksum_t *sha1_checksum;
186
apr_uint64_t refcount;
189
/* Increment records[rep] if both are non-NULL and REP contains a sha1.
190
* Allocate keys and values in RESULT_POOL.
192
static svn_error_t *record(apr_hash_t *records,
193
representation_t *rep,
194
apr_pool_t *result_pool)
197
struct value_t *value;
199
/* Skip if we ignore this particular kind of reps, or if the rep doesn't
200
* exist or doesn't have the checksum we are after. (The latter case
201
* often corresponds to node_rev->kind == svn_node_dir.)
203
if (records == NULL || rep == NULL || rep->sha1_checksum == NULL)
206
/* Construct the key.
208
* Must use calloc() because apr_hash_* pay attention to padding bytes too.
210
key = apr_pcalloc(result_pool, sizeof(*key));
211
key->revision = rep->revision;
212
key->offset = rep->offset;
214
/* Update or create the value. */
215
if ((value = apr_hash_get(records, key, sizeof(*key))))
218
SVN_ERR_ASSERT(value->sha1_checksum != NULL);
219
SVN_ERR_ASSERT(svn_checksum_match(value->sha1_checksum,
220
rep->sha1_checksum));
226
value = apr_palloc(result_pool, sizeof(*value));
227
value->sha1_checksum = svn_checksum_dup(rep->sha1_checksum, result_pool);
232
apr_hash_set(records, key, sizeof(*key), value);
237
/* Inspect the data and/or prop reps of revision REVNUM in FS. Store
238
* reference count tallies in passed hashes (allocated in RESULT_POOL).
240
* If PROP_REPS or DATA_REPS is NULL, the respective kind of reps are not
243
* Print progress report to STDERR unless QUIET is true.
245
* Use SCRATCH_POOL for temporary allocations.
248
process_one_revision(svn_fs_t *fs,
251
apr_hash_t *prop_reps,
252
apr_hash_t *data_reps,
253
apr_hash_t *both_reps,
254
apr_pool_t *result_pool,
255
apr_pool_t *scratch_pool)
257
svn_fs_root_t *rev_root;
258
apr_hash_t *paths_changed;
259
apr_hash_index_t *hi;
262
SVN_ERR(svn_cmdline_fprintf(stderr, scratch_pool,
263
"processing r%ld\n", revnum));
265
/* Get the changed paths. */
266
SVN_ERR(svn_fs_revision_root(&rev_root, fs, revnum, scratch_pool));
267
SVN_ERR(svn_fs_paths_changed2(&paths_changed, rev_root, scratch_pool));
270
/* ### use iterpool? */
271
for (hi = apr_hash_first(scratch_pool, paths_changed);
272
hi; hi = apr_hash_next(hi))
275
const svn_fs_path_change2_t *change;
276
const svn_fs_id_t *node_rev_id1, *node_rev_id2;
277
const svn_fs_id_t *the_id;
279
node_revision_t *node_rev;
281
path = svn__apr_hash_index_key(hi);
282
change = svn__apr_hash_index_val(hi);
284
SVN_ERR(svn_cmdline_fprintf(stderr, scratch_pool,
285
"processing r%ld:%s\n", revnum, path));
287
if (change->change_kind == svn_fs_path_change_delete)
288
/* Can't ask for reps of PATH at REVNUM if the path no longer exists
289
* at that revision! */
292
/* Okay, we have two node_rev id's for this change: the txn one and
293
* the revision one. We'll use the latter. */
294
node_rev_id1 = change->node_rev_id;
295
SVN_ERR(svn_fs_node_id(&node_rev_id2, rev_root, path, scratch_pool));
297
SVN_ERR_ASSERT(svn_fs_fs__id_txn_id(node_rev_id1) != NULL);
298
SVN_ERR_ASSERT(svn_fs_fs__id_rev(node_rev_id2) != SVN_INVALID_REVNUM);
300
the_id = node_rev_id2;
302
/* Get the node_rev using the chosen node_rev_id. */
303
SVN_ERR(svn_fs_fs__get_node_revision(&node_rev, fs, the_id, scratch_pool));
305
/* Maybe record the sha1's. */
306
SVN_ERR(record(prop_reps, node_rev->prop_rep, result_pool));
307
SVN_ERR(record(data_reps, node_rev->data_rep, result_pool));
308
SVN_ERR(record(both_reps, node_rev->prop_rep, result_pool));
309
SVN_ERR(record(both_reps, node_rev->data_rep, result_pool));
315
/* Print REPS_REF_COUNT (a hash as for process_one_revision())
316
* to stdout in "refcount => sha1" format. A sha1 may appear
317
* more than once if not all its instances are shared. Prepend
320
* Use SCRATCH_POOL for temporary allocations.
323
pretty_print(const char *name,
324
apr_hash_t *reps_ref_counts,
325
apr_pool_t *scratch_pool)
327
apr_hash_index_t *hi;
329
if (reps_ref_counts == NULL)
332
for (hi = apr_hash_first(scratch_pool, reps_ref_counts);
333
hi; hi = apr_hash_next(hi))
335
struct value_t *value;
337
SVN_ERR(cancel_func(NULL));
339
value = svn__apr_hash_index_val(hi);
340
SVN_ERR(svn_cmdline_printf(scratch_pool, "%s %" APR_UINT64_T_FMT " %s\n",
341
name, value->refcount,
342
svn_checksum_to_cstring_display(
343
value->sha1_checksum,
350
/* Return an error unless FS is an fsfs fs. */
351
static svn_error_t *is_fs_fsfs(svn_fs_t *fs, apr_pool_t *scratch_pool)
353
const char *actual, *expected, *path;
355
path = svn_fs_path(fs, scratch_pool);
357
expected = SVN_FS_TYPE_FSFS;
358
SVN_ERR(svn_fs_type(&actual, path, scratch_pool));
360
if (strcmp(actual, expected) != 0)
361
return svn_error_createf(SVN_ERR_FS_UNKNOWN_FS_TYPE, NULL,
362
"Filesystem '%s' is not of type '%s'",
363
svn_dirent_local_style(path, scratch_pool),
369
/* The core logic. This function iterates the repository REPOS_PATH
370
* and sends all the (DATA and/or PROP) reps in each revision for counting
371
* by process_one_revision(). QUIET is passed to process_one_revision().
373
static svn_error_t *process(const char *repos_path,
377
apr_pool_t *scratch_pool)
379
apr_hash_t *prop_reps = NULL;
380
apr_hash_t *data_reps = NULL;
381
apr_hash_t *both_reps = NULL;
382
svn_revnum_t rev, youngest;
383
apr_pool_t *iterpool;
388
prop_reps = apr_hash_make(scratch_pool);
390
data_reps = apr_hash_make(scratch_pool);
392
both_reps = apr_hash_make(scratch_pool);
395
SVN_ERR(svn_repos_open2(&repos, repos_path, NULL, scratch_pool));
396
fs = svn_repos_fs(repos);
398
SVN_ERR(is_fs_fsfs(fs, scratch_pool));
400
SVN_ERR(svn_fs_youngest_rev(&youngest, fs, scratch_pool));
402
/* Iterate the revisions. */
403
iterpool = svn_pool_create(scratch_pool);
404
for (rev = 0; rev <= youngest; rev++)
406
svn_pool_clear(iterpool);
407
SVN_ERR(cancel_func(NULL));
408
SVN_ERR(process_one_revision(fs, rev, quiet,
409
prop_reps, data_reps, both_reps,
410
scratch_pool, iterpool));
412
svn_pool_destroy(iterpool);
415
SVN_ERR(pretty_print("prop", prop_reps, scratch_pool));
416
SVN_ERR(pretty_print("data", data_reps, scratch_pool));
417
SVN_ERR(pretty_print("both", both_reps, scratch_pool));
423
main(int argc, const char *argv[])
425
const char *repos_path;
427
svn_boolean_t prop = FALSE, data = FALSE;
428
svn_boolean_t quiet = FALSE;
431
const apr_getopt_option_t options[] =
433
{"data", OPT_DATA, 0, N_("display data reps stats")},
434
{"prop", OPT_PROP, 0, N_("display prop reps stats")},
435
{"both", OPT_BOTH, 0, N_("display combined (data+prop) reps stats")},
436
{"quiet", 'q', 0, N_("no progress (only errors) to stderr")},
437
{"help", 'h', 0, N_("display this help")},
438
{"version", OPT_VERSION, 0,
439
N_("show program version information")},
443
/* Initialize the app. */
444
if (svn_cmdline_init("svn-rep-sharing-stats", stderr) != EXIT_SUCCESS)
447
/* Create our top-level pool. Use a separate mutexless allocator,
448
* given this application is single threaded.
450
pool = apr_allocator_owner_get(svn_pool_create_allocator(FALSE));
452
/* Check library versions */
453
err = check_lib_versions();
455
return svn_cmdline_handle_exit_error(err, pool, "svn-rep-sharing-stats: ");
457
err = svn_cmdline__getopt_init(&os, argc, argv, pool);
459
return svn_cmdline_handle_exit_error(err, pool, "svn-rep-sharing-stats: ");
461
SVN_INT_ERR(check_experimental());
468
apr_status_t status = apr_getopt_long(os, options, &opt, &arg);
469
if (APR_STATUS_IS_EOF(status))
471
if (status != APR_SUCCESS)
481
/* It seems we don't actually rep-share props yet. */
496
SVN_INT_ERR(version(pool));
505
/* Exactly 1 non-option argument,
506
* and at least one of "--data"/"--prop"/"--both".
508
if (os->ind + 1 != argc || (!data && !prop))
514
/* Grab REPOS_PATH from argv. */
515
SVN_INT_ERR(svn_utf_cstring_to_utf8(&repos_path, os->argv[os->ind], pool));
516
repos_path = svn_dirent_internal_style(repos_path, pool);
518
set_up_cancellation();
521
SVN_INT_ERR(process(repos_path, prop, data, quiet, pool));
525
svn_pool_destroy(pool);
526
/* Flush stdout to make sure that the user will see any printing errors. */
527
SVN_INT_ERR(svn_cmdline_fflush(stdout));