1
/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
3
* ssgrep.c: Search spreadsheets of selected strings
5
* Copyright (C) 2008 Jody Goldberg
6
* Copyright (C) 2008-2009 Morten Welinder (terra@gnome.org)
8
#include <gnumeric-config.h>
10
#include "libgnumeric.h"
11
#include <goffice/app/go-plugin.h>
12
#include "command-context-stderr.h"
13
#include <goffice/app/io-context.h>
14
#include <goffice/app/error-info.h>
15
#include "workbook-view.h"
18
#include "gnm-plugin.h"
25
#include "parse-util.h"
26
#include "sheet-object-cell-comment.h"
28
#include <goffice/utils/go-file.h>
29
#include <goffice/app/go-cmd-context.h>
30
#include <gsf/gsf-input-stdio.h>
31
#include <gsf/gsf-input-textline.h>
32
#include <glib/gi18n.h>
35
static gboolean ssgrep_locus_values = TRUE;
36
static gboolean ssgrep_locus_expressions = TRUE;
37
static gboolean ssgrep_locus_results = FALSE;
38
static gboolean ssgrep_locus_comments = TRUE;
39
static gboolean ssgrep_locus_scripts = TRUE;
40
static gboolean ssgrep_ignore_case = FALSE;
41
static gboolean ssgrep_match_words = FALSE;
42
static gboolean ssgrep_quiet = FALSE;
43
static gboolean ssgrep_count = FALSE;
44
static gboolean ssgrep_print_filenames = (gboolean)2;
45
static gboolean ssgrep_print_matching_filenames = FALSE;
46
static gboolean ssgrep_print_nonmatching_filenames = FALSE;
47
static gboolean ssgrep_print_locus = FALSE;
48
static gboolean ssgrep_print_type = FALSE;
49
static char *ssgrep_pattern = NULL;
50
static gboolean ssgrep_fixed_strings = FALSE;
51
static gboolean ssgrep_recalc = FALSE;
52
static gboolean ssgrep_invert_match = FALSE;
53
static gboolean ssgrep_string_table = FALSE;
55
static gboolean ssgrep_show_version = FALSE;
56
static char *ssgrep_pattern_file = NULL;
58
static gboolean ssgrep_error = FALSE;
59
static gboolean ssgrep_any_matches = FALSE;
61
static GOptionEntry const ssgrep_options [] = {
64
0, G_OPTION_ARG_NONE, &ssgrep_count,
65
N_("Only print a count of matches per file"),
69
"string-table-count", 'C',
70
0, G_OPTION_ARG_NONE, &ssgrep_string_table,
71
N_("Search only via the string table, display a count of the references."),
77
0, G_OPTION_ARG_STRING, &ssgrep_pattern_file,
78
N_("Get patterns from a file, one per line"),
84
0, G_OPTION_ARG_NONE, &ssgrep_fixed_strings,
85
N_("Pattern is a set of fixed strings"),
91
0, G_OPTION_ARG_NONE, &ssgrep_print_filenames,
92
N_("Print the filename for each match"),
97
"without-filename", 'h',
98
G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &ssgrep_print_filenames,
99
N_("Do not print the filename for each match"),
105
0, G_OPTION_ARG_NONE, &ssgrep_ignore_case,
106
N_("Ignore differences in letter case"),
111
"files-with-matches", 'l',
112
0, G_OPTION_ARG_NONE, &ssgrep_print_matching_filenames,
113
N_("Print filenames with matches"),
118
"files-without-matches", 'L',
119
0, G_OPTION_ARG_NONE, &ssgrep_print_nonmatching_filenames,
120
N_("Print filenames without matches"),
126
0, G_OPTION_ARG_NONE, &ssgrep_print_locus,
127
N_("Print the location of each match"),
133
0, G_OPTION_ARG_NONE, &ssgrep_quiet,
134
N_("Suppress all normal output"),
139
"search-results", 'R',
140
0, G_OPTION_ARG_NONE, &ssgrep_locus_results,
141
N_("Search results of expressions too"),
147
0, G_OPTION_ARG_NONE, &ssgrep_print_type,
148
N_("Print the location type of each match"),
154
0, G_OPTION_ARG_NONE, &ssgrep_invert_match,
155
N_("Search for cells that do not match"),
161
0, G_OPTION_ARG_NONE, &ssgrep_show_version,
162
N_("Display program version"),
168
0, G_OPTION_ARG_NONE, &ssgrep_match_words,
169
N_("Match only whole words"),
175
0, G_OPTION_ARG_NONE, &ssgrep_recalc,
176
N_("Recalculate all cells"),
180
/* ---------------------------------------- */
193
add_result (StringTableSearch *state, char const *clean, unsigned int n)
197
if (NULL == state->results)
198
state->results = g_hash_table_new (g_str_hash, g_str_equal);
199
else if (NULL != (prev = g_hash_table_lookup (state->results, clean)))
200
n += GPOINTER_TO_UINT (prev);
201
g_hash_table_replace (state->results, (gpointer) clean, GUINT_TO_POINTER (n));
205
cb_check_strings (G_GNUC_UNUSED gpointer key, gpointer str, gpointer user_data)
207
StringTableSearch *state = user_data;
208
char *clean = g_utf8_strdown (key, -1);
209
char const *orig = g_hash_table_lookup (state->targets, clean);
211
add_result (state, clean, ((GnmString *)str)->ref_count);
216
cb_check_func (gpointer clean, gpointer orig, gpointer user_data)
218
StringTableSearch *state = user_data;
219
GnmFunc *func = gnm_func_lookup (clean, state->wb);
221
add_result (state, clean, func->ref_count);
225
cb_find_target_in_module (gpointer clean, gpointer orig, gpointer user_data)
227
StringTableSearch *state = user_data;
229
char const *ptr = state->lc_code;
231
while (NULL != (ptr = strstr (ptr, clean))) {
237
add_result (state, clean, n);
241
cb_check_module (gpointer name, gpointer code, gpointer user_data)
243
StringTableSearch *state = user_data;
244
state->lc_code = g_utf8_strdown (code, -1);
245
g_hash_table_foreach (state->targets, &cb_find_target_in_module, state);
246
g_free ((gpointer)state->lc_code);
247
state->lc_code = NULL;
251
cb_dump_results (gpointer name, gpointer count)
253
g_print ("\t%s : %u\n", (char const *)name, GPOINTER_TO_UINT (count));
257
search_string_table (Workbook *wb, char const *file_name, GHashTable *targets)
259
StringTableSearch state;
263
state.targets = targets;
264
state.results = NULL;
265
gnm_string_foreach (&cb_check_strings, &state);
266
g_hash_table_foreach (targets, &cb_check_func, &state);
268
if (NULL != (modules = g_object_get_data (G_OBJECT (wb), "VBA")))
269
g_hash_table_foreach (modules, &cb_check_module, &state);
270
if (NULL != state.results) {
271
g_print ("%s\n", file_name);
272
g_hash_table_foreach (state.results, (GHFunc)&cb_dump_results, NULL);
273
g_hash_table_destroy (state.results);
278
ssgrep (const char *arg, char const *uri, IOContext *ioc, GHashTable *targets, char const *pattern)
282
GnmSearchReplace *search;
287
wbv = wb_view_new_from_uri (uri, NULL, ioc, NULL);
292
wb = wb_view_get_workbook (wbv);
294
if (ssgrep_locus_results) {
296
workbook_recalc_all (wb);
298
workbook_recalc (wb);
301
if (ssgrep_string_table) {
302
search_string_table (wb, arg, targets);
307
search = (GnmSearchReplace*)
308
g_object_new (GNM_SEARCH_REPLACE_TYPE,
309
"search-text", ssgrep_pattern,
311
"invert", ssgrep_invert_match,
312
"ignore-case", ssgrep_ignore_case,
313
"match-words", ssgrep_match_words,
314
"search-strings", ssgrep_locus_values,
315
"search-other-values", ssgrep_locus_values,
316
"search-expressions", ssgrep_locus_expressions,
317
"search-expression-results", ssgrep_locus_results,
318
"search-comments", ssgrep_locus_comments,
319
"search-scripts", ssgrep_locus_scripts,
320
"sheet", workbook_sheet_by_index (wb, 0),
321
"scope", GNM_SRS_WORKBOOK,
324
cells = gnm_search_collect_cells (search);
325
matches = gnm_search_filter_matching (search, cells);
326
has_match = (matches->len > 0);
329
ssgrep_any_matches = TRUE;
333
} else if (ssgrep_print_nonmatching_filenames) {
335
g_print ("%s\n", arg);
336
} else if (ssgrep_print_matching_filenames) {
338
g_print ("%s\n", arg);
339
} else if (ssgrep_count) {
340
if (ssgrep_print_filenames)
341
g_print ("%s:", arg);
342
g_print ("%u\n", matches->len);
345
for (ui = 0; ui < matches->len; ui++) {
346
const GnmSearchFilterResult *item = g_ptr_array_index (matches, ui);
348
const char *locus_type = "";
350
switch (item->locus) {
351
case GNM_SRL_CONTENTS: {
352
GnmCell const *cell =
353
sheet_cell_get (item->ep.sheet,
356
txt = gnm_cell_get_entered_text (cell);
357
locus_type = _("cell");
361
case GNM_SRL_VALUE: {
362
GnmCell const *cell =
363
sheet_cell_get (item->ep.sheet,
366
if (cell && cell->value)
367
txt = value_get_as_string (cell->value);
368
locus_type = _("result");
372
case GNM_SRL_COMMENT: {
373
GnmComment *comment = sheet_get_comment (item->ep.sheet, &item->ep.eval);
374
txt = g_strdup (cell_comment_text_get (comment));
375
locus_type = _("comment");
379
; /* Probably should not happen. */
382
if (ssgrep_print_filenames)
383
g_print ("%s:", arg);
385
if (ssgrep_print_type)
386
g_print ("%s:", locus_type);
388
if (ssgrep_print_locus)
390
item->ep.sheet->name_quoted,
391
cellpos_as_string (&item->ep.eval));
394
g_print ("%s\n", txt);
401
gnm_search_filter_matching_free (matches);
402
gnm_search_collect_cells_free (cells);
403
g_object_unref (search);
407
/* simple stripped down hash of lower case target, only used for string table
410
add_target (GHashTable *ssgrep_targets, char const *target)
412
char *orig = g_strstrip (g_strdup (target));
413
char *clean = g_utf8_strdown (orig, -1);
414
g_hash_table_insert (ssgrep_targets, clean, orig);
418
main (int argc, char const **argv)
420
GHashTable *ssgrep_targets;
421
ErrorInfo *plugin_errs;
424
GOptionContext *ocontext;
425
GError *error = NULL;
427
const char *argv_stdin[] = { "fd://1", NULL };
429
/* No code before here, we need to init threads */
430
argv = gnm_pre_parse_init (argc, argv);
432
ocontext = g_option_context_new (_("PATTERN INFILE..."));
433
g_option_context_add_main_entries (ocontext, ssgrep_options, GETTEXT_PACKAGE);
434
g_option_context_add_group (ocontext, gnm_get_option_group ());
435
g_option_context_parse (ocontext, &argc, (gchar ***)&argv, &error);
436
g_option_context_free (ocontext);
439
g_printerr (_("%s\nRun '%s --help' to see a full list of available command line options.\n"),
440
error->message, g_get_prgname ());
441
g_error_free (error);
445
if (ssgrep_show_version) {
446
g_printerr (_("version '%s'\ndatadir := '%s'\nlibdir := '%s'\n"),
447
GNM_VERSION_FULL, gnm_sys_data_dir (), gnm_sys_lib_dir ());
453
ssgrep_targets = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free);
454
if (ssgrep_pattern_file) {
455
char *uri = go_shell_arg_to_uri (ssgrep_pattern_file);
457
GsfInputTextline *textline;
459
const unsigned char *line;
462
input = go_file_open (uri, &err);
466
g_printerr (_("%s: Cannot read %s: %s\n"),
467
g_get_prgname (), ssgrep_pattern_file, err->message);
472
textline = (GsfInputTextline *)gsf_input_textline_new (input);
473
g_object_unref (G_OBJECT (input));
475
pat = g_string_new (NULL);
476
while (NULL != (line = gsf_input_textline_ascii_gets (textline))) {
478
g_string_append_c (pat, '|');
480
if (ssgrep_fixed_strings)
481
go_regexp_quote (pat, line);
483
g_string_append (pat, line);
485
add_target (ssgrep_targets, line);
488
ssgrep_pattern = g_string_free (pat, FALSE);
490
g_object_unref (G_OBJECT (textline));
496
g_printerr (_("%s: Missing pattern\n"), g_get_prgname ());
500
if (ssgrep_fixed_strings) {
501
GString *pat = g_string_new (NULL);
502
go_regexp_quote (pat, argv[1]);
503
ssgrep_pattern = g_string_free (pat, FALSE);
505
ssgrep_pattern = g_strdup (argv[1]);
506
add_target (ssgrep_targets, argv[1]);
512
if (argv[i] == NULL) {
518
cc = cmd_context_stderr_new ();
519
gnm_plugins_init (GO_CMD_CONTEXT (cc));
520
go_plugin_db_activate_plugin_list (
521
go_plugins_get_available_plugins (), &plugin_errs);
523
/* FIXME: What do we want to do here? */
524
error_info_free (plugin_errs);
527
ioc = gnumeric_io_context_new (cc);
528
gnm_io_context_set_num_files (ioc, N);
530
if (ssgrep_print_filenames == (gboolean)2)
531
ssgrep_print_filenames = (N > 1);
533
for (; argv[i]; i++) {
534
const char *arg = argv[i];
535
char *uri = go_shell_arg_to_uri (arg);
536
gnm_io_context_processing_file (ioc, uri);
537
ssgrep (arg, uri, ioc, ssgrep_targets, ssgrep_pattern);
541
g_hash_table_destroy (ssgrep_targets);
543
g_object_unref (ioc);
547
gnm_pre_parse_shutdown ();
549
/* This special case matches what "man grep" says. */
550
if (ssgrep_quiet && ssgrep_any_matches)
556
return ssgrep_any_matches ? 0 : 1;