2
* ====================================================================
3
* Licensed to the Apache Software Foundation (ASF) under one
4
* or more contributor license agreements. See the NOTICE file
5
* distributed with this work for additional information
6
* regarding copyright ownership. The ASF licenses this file
7
* to you under the Apache License, Version 2.0 (the
8
* "License"); you may not use this file except in compliance
9
* with the License. You may obtain a copy of the License at
11
* http://www.apache.org/licenses/LICENSE-2.0
13
* Unless required by applicable law or agreed to in writing,
14
* software distributed under the License is distributed on an
15
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16
* KIND, either express or implied. See the License for the
17
* specific language governing permissions and limitations
19
* ====================================================================
22
#include <apr_signal.h>
24
#include "svn_cmdline.h"
25
#include "svn_dirent_uri.h"
26
#include "svn_pools.h"
27
#include "svn_repos.h"
30
#include "svn_version.h"
32
#include "../../subversion/libsvn_fs_fs/fs.h"
33
#include "../../subversion/libsvn_fs_fs/fs_fs.h"
34
/* for svn_fs_fs__id_* (used in assertions only) */
35
#include "../../subversion/libsvn_fs_fs/id.h"
37
#include "svn_private_config.h"
40
/** Help messages and version checking. **/
43
version(apr_pool_t *pool)
45
return svn_opt_print_help3(NULL, "svn-rep-sharing-stats", TRUE, FALSE, NULL,
46
NULL, NULL, NULL, NULL, NULL, pool);
50
usage(apr_pool_t *pool)
52
svn_error_clear(svn_cmdline_fprintf
54
_("Type 'svn-rep-sharing-stats --help' for usage.\n")));
59
help(const apr_getopt_option_t *options, apr_pool_t *pool)
64
_("usage: svn-rep-sharing-stats [OPTIONS] REPOS_PATH\n\n"
65
" Prints the reference count statistics for representations\n"
66
" in an FSFS repository.\n"
68
" At least one of the options --data/--prop/--both must be specified.\n"
70
"Valid options:\n")));
71
while (options->description)
74
svn_opt_format_option(&optstr, options, TRUE, pool);
75
svn_error_clear(svn_cmdline_fprintf(stdout, pool, " %s\n", optstr));
78
svn_error_clear(svn_cmdline_fprintf(stdout, pool, "\n"));
83
/* Version compatibility check */
85
check_lib_versions(void)
87
static const svn_version_checklist_t checklist[] =
89
/* ### check FSFS version */
90
{ "svn_subr", svn_subr_version },
91
{ "svn_fs", svn_fs_version },
95
SVN_VERSION_DEFINE(my_version);
96
return svn_error_trace(svn_ver_check_list(&my_version, checklist));
101
/** Cancellation stuff, ### copied from subversion/svn/main.c */
103
/* A flag to see if we've been cancelled by the client or not. */
104
static volatile sig_atomic_t cancelled = FALSE;
106
/* A signal handler to support cancellation. */
108
signal_handler(int signum)
110
apr_signal(signum, SIG_IGN);
114
/* Our cancellation callback. */
116
svn_cl__check_cancel(void *baton)
119
return svn_error_create(SVN_ERR_CANCELLED, NULL, _("Caught signal"));
124
static svn_cancel_func_t cancel_func = svn_cl__check_cancel;
126
static void set_up_cancellation(void)
128
/* Set up our cancellation support. */
129
apr_signal(SIGINT, signal_handler);
131
/* SIGBREAK is a Win32 specific signal generated by ctrl-break. */
132
apr_signal(SIGBREAK, signal_handler);
135
apr_signal(SIGHUP, signal_handler);
138
apr_signal(SIGTERM, signal_handler);
142
/* Disable SIGPIPE generation for the platforms that have it. */
143
apr_signal(SIGPIPE, SIG_IGN);
147
/* Disable SIGXFSZ generation for the platforms that have it, otherwise
148
* working with large files when compiled against an APR that doesn't have
149
* large file support will crash the program, which is uncool. */
150
apr_signal(SIGXFSZ, SIG_IGN);
155
/** Program-specific code. **/
157
OPT_VERSION = SVN_OPT_FIRST_LONGOPT_ID,
163
static svn_error_t *check_experimental(void)
165
if (getenv("SVN_REP_SHARING_STATS_IS_EXPERIMENTAL"))
168
return svn_error_create(APR_EGENERAL, NULL,
169
"This code is experimental and should not "
170
"be used on live data.");
173
/* The parts of a rep that determine whether it's being shared. */
176
svn_revnum_t revision;
180
/* What we need to know about a rep. */
183
svn_checksum_t *sha1_checksum;
184
apr_uint64_t refcount;
187
/* Increment records[rep] if both are non-NULL and REP contains a sha1.
188
* Allocate keys and values in RESULT_POOL.
190
static svn_error_t *record(apr_hash_t *records,
191
representation_t *rep,
192
apr_pool_t *result_pool)
195
struct value_t *value;
197
/* Skip if we ignore this particular kind of reps, or if the rep doesn't
198
* exist or doesn't have the checksum we are after. (The latter case
199
* often corresponds to node_rev->kind == svn_node_dir.)
201
if (records == NULL || rep == NULL || rep->sha1_checksum == NULL)
204
/* Construct the key.
206
* Must use calloc() because apr_hash_* pay attention to padding bytes too.
208
key = apr_pcalloc(result_pool, sizeof(*key));
209
key->revision = rep->revision;
210
key->offset = rep->offset;
212
/* Update or create the value. */
213
if ((value = apr_hash_get(records, key, sizeof(*key))))
216
SVN_ERR_ASSERT(value->sha1_checksum != NULL);
217
SVN_ERR_ASSERT(svn_checksum_match(value->sha1_checksum,
218
rep->sha1_checksum));
224
value = apr_palloc(result_pool, sizeof(*value));
225
value->sha1_checksum = svn_checksum_dup(rep->sha1_checksum, result_pool);
230
apr_hash_set(records, key, sizeof(*key), value);
235
/* Inspect the data and/or prop reps of revision REVNUM in FS. Store
236
* reference count tallies in passed hashes (allocated in RESULT_POOL).
238
* If PROP_REPS or DATA_REPS is NULL, the respective kind of reps are not
241
* Print progress report to STDERR unless QUIET is true.
243
* Use SCRATCH_POOL for temporary allocations.
246
process_one_revision(svn_fs_t *fs,
249
apr_hash_t *prop_reps,
250
apr_hash_t *data_reps,
251
apr_hash_t *both_reps,
252
apr_pool_t *result_pool,
253
apr_pool_t *scratch_pool)
255
svn_fs_root_t *rev_root;
256
apr_hash_t *paths_changed;
257
apr_hash_index_t *hi;
260
SVN_ERR(svn_cmdline_fprintf(stderr, scratch_pool,
261
"processing r%ld\n", revnum));
263
/* Get the changed paths. */
264
SVN_ERR(svn_fs_revision_root(&rev_root, fs, revnum, scratch_pool));
265
SVN_ERR(svn_fs_paths_changed2(&paths_changed, rev_root, scratch_pool));
268
/* ### use iterpool? */
269
for (hi = apr_hash_first(scratch_pool, paths_changed);
270
hi; hi = apr_hash_next(hi))
273
const svn_fs_path_change2_t *change;
274
const svn_fs_id_t *node_rev_id1, *node_rev_id2;
275
const svn_fs_id_t *the_id;
277
node_revision_t *node_rev;
279
path = svn__apr_hash_index_key(hi);
280
change = svn__apr_hash_index_val(hi);
282
SVN_ERR(svn_cmdline_fprintf(stderr, scratch_pool,
283
"processing r%ld:%s\n", revnum, path));
285
if (change->change_kind == svn_fs_path_change_delete)
286
/* Can't ask for reps of PATH at REVNUM if the path no longer exists
287
* at that revision! */
290
/* Okay, we have two node_rev id's for this change: the txn one and
291
* the revision one. We'll use the latter. */
292
node_rev_id1 = change->node_rev_id;
293
SVN_ERR(svn_fs_node_id(&node_rev_id2, rev_root, path, scratch_pool));
295
SVN_ERR_ASSERT(svn_fs_fs__id_txn_id(node_rev_id1) != NULL);
296
SVN_ERR_ASSERT(svn_fs_fs__id_rev(node_rev_id2) != SVN_INVALID_REVNUM);
298
the_id = node_rev_id2;
300
/* Get the node_rev using the chosen node_rev_id. */
301
SVN_ERR(svn_fs_fs__get_node_revision(&node_rev, fs, the_id, scratch_pool));
303
/* Maybe record the sha1's. */
304
SVN_ERR(record(prop_reps, node_rev->prop_rep, result_pool));
305
SVN_ERR(record(data_reps, node_rev->data_rep, result_pool));
306
SVN_ERR(record(both_reps, node_rev->prop_rep, result_pool));
307
SVN_ERR(record(both_reps, node_rev->data_rep, result_pool));
313
/* Print REPS_REF_COUNT (a hash as for process_one_revision())
314
* to stdout in "refcount => sha1" format. A sha1 may appear
315
* more than once if not all its instances are shared. Prepend
318
* Use SCRATCH_POOL for temporary allocations.
321
pretty_print(const char *name,
322
apr_hash_t *reps_ref_counts,
323
apr_pool_t *scratch_pool)
325
apr_hash_index_t *hi;
327
if (reps_ref_counts == NULL)
330
for (hi = apr_hash_first(scratch_pool, reps_ref_counts);
331
hi; hi = apr_hash_next(hi))
333
struct value_t *value;
335
SVN_ERR(cancel_func(NULL));
337
value = svn__apr_hash_index_val(hi);
338
SVN_ERR(svn_cmdline_printf(scratch_pool, "%s %" APR_UINT64_T_FMT " %s\n",
339
name, value->refcount,
340
svn_checksum_to_cstring_display(
341
value->sha1_checksum,
348
/* Return an error unless FS is an fsfs fs. */
349
static svn_error_t *is_fs_fsfs(svn_fs_t *fs, apr_pool_t *scratch_pool)
351
const char *actual, *expected, *path;
353
path = svn_fs_path(fs, scratch_pool);
355
expected = SVN_FS_TYPE_FSFS;
356
SVN_ERR(svn_fs_type(&actual, path, scratch_pool));
358
if (strcmp(actual, expected) != 0)
359
return svn_error_createf(SVN_ERR_FS_UNKNOWN_FS_TYPE, NULL,
360
"Filesystem '%s' is not of type '%s'",
361
svn_dirent_local_style(path, scratch_pool),
367
/* The core logic. This function iterates the repository REPOS_PATH
368
* and sends all the (DATA and/or PROP) reps in each revision for counting
369
* by process_one_revision(). QUIET is passed to process_one_revision().
371
static svn_error_t *process(const char *repos_path,
375
apr_pool_t *scratch_pool)
377
apr_hash_t *prop_reps = NULL;
378
apr_hash_t *data_reps = NULL;
379
apr_hash_t *both_reps = NULL;
380
svn_revnum_t rev, youngest;
381
apr_pool_t *iterpool;
386
prop_reps = apr_hash_make(scratch_pool);
388
data_reps = apr_hash_make(scratch_pool);
390
both_reps = apr_hash_make(scratch_pool);
393
SVN_ERR(svn_repos_open2(&repos, repos_path, NULL, scratch_pool));
394
fs = svn_repos_fs(repos);
396
SVN_ERR(is_fs_fsfs(fs, scratch_pool));
398
SVN_ERR(svn_fs_youngest_rev(&youngest, fs, scratch_pool));
400
/* Iterate the revisions. */
401
iterpool = svn_pool_create(scratch_pool);
402
for (rev = 0; rev <= youngest; rev++)
404
svn_pool_clear(iterpool);
405
SVN_ERR(cancel_func(NULL));
406
SVN_ERR(process_one_revision(fs, rev, quiet,
407
prop_reps, data_reps, both_reps,
408
scratch_pool, iterpool));
410
svn_pool_destroy(iterpool);
413
SVN_ERR(pretty_print("prop", prop_reps, scratch_pool));
414
SVN_ERR(pretty_print("data", data_reps, scratch_pool));
415
SVN_ERR(pretty_print("both", both_reps, scratch_pool));
421
main(int argc, const char *argv[])
423
const char *repos_path;
424
apr_allocator_t *allocator;
426
svn_boolean_t prop = FALSE, data = FALSE;
427
svn_boolean_t quiet = FALSE;
430
const apr_getopt_option_t options[] =
432
{"data", OPT_DATA, 0, N_("display data reps stats")},
433
{"prop", OPT_PROP, 0, N_("display prop reps stats")},
434
{"both", OPT_BOTH, 0, N_("display combined (data+prop) reps stats")},
435
{"quiet", 'q', 0, N_("no progress (only errors) to stderr")},
436
{"help", 'h', 0, N_("display this help")},
437
{"version", OPT_VERSION, 0,
438
N_("show program version information")},
442
/* Initialize the app. */
443
if (svn_cmdline_init("svn-rep-sharing-stats", stderr) != EXIT_SUCCESS)
446
/* Create our top-level pool. Use a separate mutexless allocator,
447
* given this application is single threaded.
449
if (apr_allocator_create(&allocator))
452
apr_allocator_max_free_set(allocator, SVN_ALLOCATOR_RECOMMENDED_MAX_FREE);
454
pool = svn_pool_create_ex(NULL, allocator);
455
apr_allocator_owner_set(allocator, pool);
457
/* Check library versions */
458
err = check_lib_versions();
460
return svn_cmdline_handle_exit_error(err, pool, "svn-rep-sharing-stats: ");
462
err = svn_cmdline__getopt_init(&os, argc, argv, pool);
464
return svn_cmdline_handle_exit_error(err, pool, "svn-rep-sharing-stats: ");
466
SVN_INT_ERR(check_experimental());
473
apr_status_t status = apr_getopt_long(os, options, &opt, &arg);
474
if (APR_STATUS_IS_EOF(status))
476
if (status != APR_SUCCESS)
486
/* It seems we don't actually rep-share props yet. */
501
SVN_INT_ERR(version(pool));
510
/* Exactly 1 non-option argument,
511
* and at least one of "--data"/"--prop"/"--both".
513
if (os->ind + 1 != argc || (!data && !prop))
519
/* Grab REPOS_PATH from argv. */
520
SVN_INT_ERR(svn_utf_cstring_to_utf8(&repos_path, os->argv[os->ind], pool));
521
repos_path = svn_dirent_internal_style(repos_path, pool);
523
set_up_cancellation();
526
SVN_INT_ERR(process(repos_path, prop, data, quiet, pool));
530
svn_pool_destroy(pool);
531
/* Flush stdout to make sure that the user will see any printing errors. */
532
SVN_INT_ERR(svn_cmdline_fflush(stdout));