~ubuntu-branches/ubuntu/quantal/gnumeric/quantal

« back to all changes in this revision

Viewing changes to src/ssgrep.c

  • Committer: Bazaar Package Importer
  • Author(s): Gauvain Pocentek
  • Date: 2009-06-07 11:10:47 UTC
  • mfrom: (1.1.19 upstream) (2.1.2 squeeze)
  • Revision ID: james.westby@ubuntu.com-20090607111047-l3rtbzfjxvmi1kx0
Tags: 1.9.8-1ubuntu1
* Merge from debian unstable, remaining changes:
  - Promoted gnumeric-doc to Recommends in gnumeric package for help to be
    installed automatically
  - gnumeric-gtk is a transitional package
  - gnumeric conflicts with gnumeric-gtk << 1.8.3-3ubuntu1
  - call initltool-update in po*
  - remove psiconv support (psiconv is in universe):
    o debian/control: remove B-D on libpsiconv-dev
    o debian/rules: don't pass --with-psiconv to ./configure

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
 
2
/*
 
3
 * ssgrep.c: Search spreadsheets of selected strings
 
4
 *
 
5
 * Copyright (C) 2008 Jody Goldberg
 
6
 * Copyright (C) 2008-2009 Morten Welinder (terra@gnome.org)
 
7
 */
 
8
#include <gnumeric-config.h>
 
9
#include "gnumeric.h"
 
10
#include "libgnumeric.h"
 
11
#include <goffice/app/go-plugin.h>
 
12
#include "command-context-stderr.h"
 
13
#include <goffice/app/io-context.h>
 
14
#include <goffice/app/error-info.h>
 
15
#include "workbook-view.h"
 
16
#include "workbook.h"
 
17
#include "gutils.h"
 
18
#include "gnm-plugin.h"
 
19
#include "search.h"
 
20
#include "sheet.h"
 
21
#include "cell.h"
 
22
#include "value.h"
 
23
#include "str.h"
 
24
#include "func.h"
 
25
#include "parse-util.h"
 
26
#include "sheet-object-cell-comment.h"
 
27
 
 
28
#include <goffice/utils/go-file.h>
 
29
#include <goffice/app/go-cmd-context.h>
 
30
#include <gsf/gsf-input-stdio.h>
 
31
#include <gsf/gsf-input-textline.h>
 
32
#include <glib/gi18n.h>
 
33
#include <string.h>
 
34
 
 
35
static gboolean ssgrep_locus_values = TRUE;
 
36
static gboolean ssgrep_locus_expressions = TRUE;
 
37
static gboolean ssgrep_locus_results = FALSE;
 
38
static gboolean ssgrep_locus_comments = TRUE;
 
39
static gboolean ssgrep_locus_scripts = TRUE;
 
40
static gboolean ssgrep_ignore_case = FALSE;
 
41
static gboolean ssgrep_match_words = FALSE;
 
42
static gboolean ssgrep_quiet = FALSE;
 
43
static gboolean ssgrep_count = FALSE;
 
44
static gboolean ssgrep_print_filenames = (gboolean)2;
 
45
static gboolean ssgrep_print_matching_filenames = FALSE;
 
46
static gboolean ssgrep_print_nonmatching_filenames = FALSE;
 
47
static gboolean ssgrep_print_locus = FALSE;
 
48
static gboolean ssgrep_print_type = FALSE;
 
49
static char *ssgrep_pattern = NULL;
 
50
static gboolean ssgrep_fixed_strings = FALSE;
 
51
static gboolean ssgrep_recalc = FALSE;
 
52
static gboolean ssgrep_invert_match = FALSE;
 
53
static gboolean ssgrep_string_table = FALSE;
 
54
 
 
55
static gboolean ssgrep_show_version = FALSE;
 
56
static char *ssgrep_pattern_file = NULL;
 
57
 
 
58
static gboolean ssgrep_error = FALSE;
 
59
static gboolean ssgrep_any_matches = FALSE;
 
60
 
 
61
static GOptionEntry const ssgrep_options [] = {
 
62
        {
 
63
                "count", 'c',
 
64
                0, G_OPTION_ARG_NONE, &ssgrep_count,
 
65
                N_("Only print a count of matches per file"),
 
66
                NULL
 
67
        },
 
68
        {
 
69
                "string-table-count", 'C',
 
70
                0, G_OPTION_ARG_NONE, &ssgrep_string_table,
 
71
                N_("Search only via the string table, display a count of the references."),
 
72
                NULL
 
73
        },
 
74
 
 
75
        {
 
76
                "pattern-file", 'f',
 
77
                0, G_OPTION_ARG_STRING, &ssgrep_pattern_file,
 
78
                N_("Get patterns from a file, one per line"),
 
79
                N_("FILE")
 
80
        },
 
81
 
 
82
        {
 
83
                "fixed-strings", 'F',
 
84
                0, G_OPTION_ARG_NONE, &ssgrep_fixed_strings,
 
85
                N_("Pattern is a set of fixed strings"),
 
86
                NULL
 
87
        },
 
88
 
 
89
        {
 
90
                "with-filename", 'H',
 
91
                0, G_OPTION_ARG_NONE, &ssgrep_print_filenames,
 
92
                N_("Print the filename for each match"),
 
93
                NULL
 
94
        },
 
95
 
 
96
        {
 
97
                "without-filename", 'h',
 
98
                G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &ssgrep_print_filenames,
 
99
                N_("Do not print the filename for each match"),
 
100
                NULL
 
101
        },
 
102
 
 
103
        {
 
104
                "ignore-case", 'i',
 
105
                0, G_OPTION_ARG_NONE, &ssgrep_ignore_case,
 
106
                N_("Ignore differences in letter case"),
 
107
                NULL
 
108
        },
 
109
 
 
110
        {
 
111
                "files-with-matches", 'l',
 
112
                0, G_OPTION_ARG_NONE, &ssgrep_print_matching_filenames,
 
113
                N_("Print filenames with matches"),
 
114
                NULL
 
115
        },
 
116
 
 
117
        {
 
118
                "files-without-matches", 'L',
 
119
                0, G_OPTION_ARG_NONE, &ssgrep_print_nonmatching_filenames,
 
120
                N_("Print filenames without matches"),
 
121
                NULL
 
122
        },
 
123
 
 
124
        {
 
125
                "print-locus", 'n',
 
126
                0, G_OPTION_ARG_NONE, &ssgrep_print_locus,
 
127
                N_("Print the location of each match"),
 
128
                NULL
 
129
        },
 
130
 
 
131
        {
 
132
                "quiet", 'q',
 
133
                0, G_OPTION_ARG_NONE, &ssgrep_quiet,
 
134
                N_("Suppress all normal output"),
 
135
                NULL
 
136
        },
 
137
 
 
138
        {
 
139
                "search-results", 'R',
 
140
                0, G_OPTION_ARG_NONE, &ssgrep_locus_results,
 
141
                N_("Search results of expressions too"),
 
142
                NULL
 
143
        },
 
144
 
 
145
        {
 
146
                "print-type", 'T',
 
147
                0, G_OPTION_ARG_NONE, &ssgrep_print_type,
 
148
                N_("Print the location type of each match"),
 
149
                NULL
 
150
        },
 
151
 
 
152
        {
 
153
                "invert-match", 'v',
 
154
                0, G_OPTION_ARG_NONE, &ssgrep_invert_match,
 
155
                N_("Search for cells that do not match"),
 
156
                NULL
 
157
        },
 
158
 
 
159
        {
 
160
                "version", 'V',
 
161
                0, G_OPTION_ARG_NONE, &ssgrep_show_version,
 
162
                N_("Display program version"),
 
163
                NULL
 
164
        },
 
165
 
 
166
        {
 
167
                "word-regexp", 'w',
 
168
                0, G_OPTION_ARG_NONE, &ssgrep_match_words,
 
169
                N_("Match only whole words"),
 
170
                NULL
 
171
        },
 
172
 
 
173
        {
 
174
                "recalc", 0,
 
175
                0, G_OPTION_ARG_NONE, &ssgrep_recalc,
 
176
                N_("Recalculate all cells"),
 
177
                NULL
 
178
        },
 
179
 
 
180
        /* ---------------------------------------- */
 
181
 
 
182
        { NULL }
 
183
};
 
184
 
 
185
typedef struct {
 
186
        Workbook   *wb;
 
187
        GHashTable *targets;
 
188
        GHashTable *results;
 
189
        char const *lc_code;
 
190
} StringTableSearch;
 
191
 
 
192
static void
 
193
add_result (StringTableSearch *state, char const *clean, unsigned int n)
 
194
{
 
195
        gpointer prev;
 
196
 
 
197
        if (NULL == state->results)
 
198
                state->results = g_hash_table_new (g_str_hash, g_str_equal);
 
199
        else if (NULL != (prev = g_hash_table_lookup (state->results, clean)))
 
200
                n += GPOINTER_TO_UINT (prev);
 
201
        g_hash_table_replace (state->results, (gpointer) clean, GUINT_TO_POINTER (n));
 
202
}
 
203
 
 
204
static void
 
205
cb_check_strings (G_GNUC_UNUSED gpointer key, gpointer str, gpointer user_data)
 
206
{
 
207
        StringTableSearch *state = user_data;
 
208
        char *clean = g_utf8_strdown (key, -1);
 
209
        char const *orig = g_hash_table_lookup (state->targets, clean);
 
210
        if (NULL != orig)
 
211
                add_result (state, clean, ((GnmString *)str)->ref_count);
 
212
        g_free (clean);
 
213
}
 
214
 
 
215
static void
 
216
cb_check_func (gpointer clean, gpointer orig, gpointer user_data)
 
217
{
 
218
        StringTableSearch *state = user_data;
 
219
        GnmFunc *func = gnm_func_lookup (clean, state->wb);
 
220
        if (NULL != func)
 
221
                add_result (state, clean, func->ref_count);
 
222
}
 
223
 
 
224
static void
 
225
cb_find_target_in_module (gpointer clean, gpointer orig, gpointer user_data)
 
226
{
 
227
        StringTableSearch *state = user_data;
 
228
        unsigned n = 0;
 
229
        char const *ptr = state->lc_code;
 
230
 
 
231
        while (NULL != (ptr = strstr (ptr, clean))) {
 
232
                n++;
 
233
                ptr++;
 
234
        }
 
235
 
 
236
        if (n > 0)
 
237
                add_result (state, clean, n);
 
238
}
 
239
 
 
240
static void
 
241
cb_check_module (gpointer name, gpointer code, gpointer user_data)
 
242
{
 
243
        StringTableSearch *state = user_data;
 
244
        state->lc_code = g_utf8_strdown (code, -1);
 
245
        g_hash_table_foreach (state->targets, &cb_find_target_in_module, state);
 
246
        g_free ((gpointer)state->lc_code);
 
247
        state->lc_code = NULL;
 
248
}
 
249
 
 
250
static void
 
251
cb_dump_results (gpointer name, gpointer count)
 
252
{
 
253
        g_print ("\t%s : %u\n", (char const *)name, GPOINTER_TO_UINT (count));
 
254
}
 
255
 
 
256
static void
 
257
search_string_table (Workbook *wb, char const *file_name, GHashTable *targets)
 
258
{
 
259
        StringTableSearch        state;
 
260
        GHashTable *modules;
 
261
 
 
262
        state.wb        = wb;
 
263
        state.targets   = targets;
 
264
        state.results   = NULL;
 
265
        gnm_string_foreach (&cb_check_strings, &state);
 
266
        g_hash_table_foreach (targets, &cb_check_func, &state);
 
267
 
 
268
        if (NULL != (modules = g_object_get_data (G_OBJECT (wb), "VBA")))
 
269
                g_hash_table_foreach (modules, &cb_check_module, &state);
 
270
        if (NULL != state.results) {
 
271
                g_print ("%s\n", file_name);
 
272
                g_hash_table_foreach (state.results, (GHFunc)&cb_dump_results, NULL);
 
273
                g_hash_table_destroy (state.results);
 
274
        }
 
275
}
 
276
 
 
277
static void
 
278
ssgrep (const char *arg, char const *uri, IOContext *ioc, GHashTable *targets, char const *pattern)
 
279
{
 
280
        WorkbookView *wbv;
 
281
        Workbook *wb;
 
282
        GnmSearchReplace *search;
 
283
        GPtrArray *cells;
 
284
        GPtrArray *matches;
 
285
        gboolean has_match;
 
286
 
 
287
        wbv = wb_view_new_from_uri (uri, NULL, ioc, NULL);
 
288
        if (wbv == NULL) {
 
289
                ssgrep_error = TRUE;
 
290
                return;
 
291
        }
 
292
        wb = wb_view_get_workbook (wbv);
 
293
 
 
294
        if (ssgrep_locus_results) {
 
295
                if (ssgrep_recalc)
 
296
                        workbook_recalc_all (wb);
 
297
                else
 
298
                        workbook_recalc (wb);
 
299
        }
 
300
 
 
301
        if (ssgrep_string_table) {
 
302
                search_string_table (wb, arg, targets);
 
303
                g_object_unref (wb);
 
304
                return;
 
305
        }
 
306
 
 
307
        search = (GnmSearchReplace*)
 
308
                g_object_new (GNM_SEARCH_REPLACE_TYPE,
 
309
                              "search-text", ssgrep_pattern,
 
310
                              "is-regexp", TRUE,
 
311
                              "invert", ssgrep_invert_match,
 
312
                              "ignore-case", ssgrep_ignore_case,
 
313
                              "match-words", ssgrep_match_words,
 
314
                              "search-strings", ssgrep_locus_values,
 
315
                              "search-other-values", ssgrep_locus_values,
 
316
                              "search-expressions", ssgrep_locus_expressions,
 
317
                              "search-expression-results", ssgrep_locus_results,
 
318
                              "search-comments", ssgrep_locus_comments,
 
319
                              "search-scripts", ssgrep_locus_scripts,
 
320
                              "sheet", workbook_sheet_by_index (wb, 0),
 
321
                              "scope", GNM_SRS_WORKBOOK,
 
322
                              NULL);
 
323
 
 
324
        cells = gnm_search_collect_cells (search);
 
325
        matches = gnm_search_filter_matching (search, cells);
 
326
        has_match = (matches->len > 0);
 
327
 
 
328
        if (has_match)
 
329
                ssgrep_any_matches = TRUE;
 
330
 
 
331
        if (ssgrep_quiet) {
 
332
                /* Nothing */
 
333
        } else if (ssgrep_print_nonmatching_filenames) {
 
334
                if (!has_match)
 
335
                        g_print ("%s\n", arg);
 
336
        } else if (ssgrep_print_matching_filenames) {
 
337
                if (has_match)
 
338
                        g_print ("%s\n", arg);
 
339
        } else if (ssgrep_count) {
 
340
                if (ssgrep_print_filenames)
 
341
                        g_print ("%s:", arg);
 
342
                g_print ("%u\n", matches->len);
 
343
        } else {
 
344
                unsigned ui;
 
345
                for (ui = 0; ui < matches->len; ui++) {
 
346
                        const GnmSearchFilterResult *item = g_ptr_array_index (matches, ui);
 
347
                        char *txt = NULL;
 
348
                        const char *locus_type = "";
 
349
 
 
350
                        switch (item->locus) {
 
351
                        case GNM_SRL_CONTENTS: {
 
352
                                GnmCell const *cell =
 
353
                                        sheet_cell_get (item->ep.sheet,
 
354
                                                        item->ep.eval.col,
 
355
                                                        item->ep.eval.row);
 
356
                                txt = gnm_cell_get_entered_text (cell);
 
357
                                locus_type = _("cell");
 
358
                                break;
 
359
                        }
 
360
 
 
361
                        case GNM_SRL_VALUE: {
 
362
                                GnmCell const *cell =
 
363
                                        sheet_cell_get (item->ep.sheet,
 
364
                                                        item->ep.eval.col,
 
365
                                                        item->ep.eval.row);
 
366
                                if (cell && cell->value)
 
367
                                        txt = value_get_as_string (cell->value);
 
368
                                locus_type = _("result");
 
369
                                break;
 
370
                        }
 
371
 
 
372
                        case GNM_SRL_COMMENT: {
 
373
                                GnmComment *comment = sheet_get_comment (item->ep.sheet, &item->ep.eval);
 
374
                                txt = g_strdup (cell_comment_text_get (comment));
 
375
                                locus_type = _("comment");
 
376
                                break;
 
377
                        }
 
378
                        default:
 
379
                                ; /* Probably should not happen.  */
 
380
                        }
 
381
 
 
382
                        if (ssgrep_print_filenames)
 
383
                                g_print ("%s:", arg);
 
384
 
 
385
                        if (ssgrep_print_type)
 
386
                                g_print ("%s:", locus_type);
 
387
 
 
388
                        if (ssgrep_print_locus)
 
389
                                g_print ("%s!%s:",
 
390
                                         item->ep.sheet->name_quoted,
 
391
                                         cellpos_as_string (&item->ep.eval));
 
392
 
 
393
                        if (txt) {
 
394
                                g_print ("%s\n", txt);
 
395
                                g_free (txt);
 
396
                        } else
 
397
                                g_print ("\n");
 
398
                }
 
399
        }
 
400
 
 
401
        gnm_search_filter_matching_free (matches);
 
402
        gnm_search_collect_cells_free (cells);
 
403
        g_object_unref (search);
 
404
        g_object_unref (wb);
 
405
}
 
406
 
 
407
/* simple stripped down hash of lower case target, only used for string table
 
408
 * searches */
 
409
static void
 
410
add_target (GHashTable *ssgrep_targets, char const *target)
 
411
{
 
412
        char *orig = g_strstrip (g_strdup (target));
 
413
        char *clean = g_utf8_strdown (orig, -1);
 
414
        g_hash_table_insert (ssgrep_targets, clean, orig);
 
415
}
 
416
 
 
417
int
 
418
main (int argc, char const **argv)
 
419
{
 
420
        GHashTable      *ssgrep_targets;
 
421
        ErrorInfo       *plugin_errs;
 
422
        IOContext       *ioc;
 
423
        GOCmdContext    *cc;
 
424
        GOptionContext  *ocontext;
 
425
        GError          *error = NULL;
 
426
        int              i, N;
 
427
        const char *argv_stdin[] = { "fd://1", NULL };
 
428
 
 
429
        /* No code before here, we need to init threads */
 
430
        argv = gnm_pre_parse_init (argc, argv);
 
431
 
 
432
        ocontext = g_option_context_new (_("PATTERN INFILE..."));
 
433
        g_option_context_add_main_entries (ocontext, ssgrep_options, GETTEXT_PACKAGE);
 
434
        g_option_context_add_group        (ocontext, gnm_get_option_group ());
 
435
        g_option_context_parse (ocontext, &argc, (gchar ***)&argv, &error);
 
436
        g_option_context_free (ocontext);
 
437
 
 
438
        if (error) {
 
439
                g_printerr (_("%s\nRun '%s --help' to see a full list of available command line options.\n"),
 
440
                            error->message, g_get_prgname ());
 
441
                g_error_free (error);
 
442
                return 1;
 
443
        }
 
444
 
 
445
        if (ssgrep_show_version) {
 
446
                g_printerr (_("version '%s'\ndatadir := '%s'\nlibdir := '%s'\n"),
 
447
                            GNM_VERSION_FULL, gnm_sys_data_dir (), gnm_sys_lib_dir ());
 
448
                return 0;
 
449
        }
 
450
 
 
451
        gnm_init (FALSE);
 
452
 
 
453
        ssgrep_targets = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free);
 
454
        if (ssgrep_pattern_file) {
 
455
                char *uri = go_shell_arg_to_uri (ssgrep_pattern_file);
 
456
                GsfInput         *input;
 
457
                GsfInputTextline *textline;
 
458
                GError           *err = NULL;
 
459
                const unsigned char *line;
 
460
                GString *pat;
 
461
 
 
462
                input = go_file_open (uri, &err);
 
463
                g_free (uri);
 
464
 
 
465
                if (!input) {
 
466
                        g_printerr (_("%s: Cannot read %s: %s\n"),
 
467
                                    g_get_prgname (), ssgrep_pattern_file, err->message);
 
468
                        g_error_free (err);
 
469
                        return 1;
 
470
                }
 
471
 
 
472
                textline = (GsfInputTextline *)gsf_input_textline_new (input);
 
473
                g_object_unref (G_OBJECT (input));
 
474
 
 
475
                pat = g_string_new (NULL);
 
476
                while (NULL != (line = gsf_input_textline_ascii_gets (textline))) {
 
477
                        if (pat->len)
 
478
                                g_string_append_c (pat, '|');
 
479
 
 
480
                        if (ssgrep_fixed_strings)
 
481
                                go_regexp_quote (pat, line);
 
482
                        else
 
483
                                g_string_append (pat, line);
 
484
 
 
485
                        add_target (ssgrep_targets, line);
 
486
                }
 
487
 
 
488
                ssgrep_pattern = g_string_free (pat, FALSE);
 
489
 
 
490
                g_object_unref (G_OBJECT (textline));
 
491
 
 
492
                i = 1;
 
493
                N = argc - i;
 
494
        } else {
 
495
                if (argc < 2) {
 
496
                        g_printerr (_("%s: Missing pattern\n"), g_get_prgname ());
 
497
                        return 1;
 
498
                }
 
499
 
 
500
                if (ssgrep_fixed_strings) {
 
501
                        GString *pat = g_string_new (NULL);
 
502
                        go_regexp_quote (pat, argv[1]);
 
503
                        ssgrep_pattern = g_string_free (pat, FALSE);
 
504
                } else
 
505
                        ssgrep_pattern = g_strdup (argv[1]);
 
506
                add_target (ssgrep_targets, argv[1]);
 
507
 
 
508
                i = 2;
 
509
                N = argc - i;
 
510
        }
 
511
 
 
512
        if (argv[i] == NULL) {
 
513
                argv = argv_stdin;
 
514
                i = 0;
 
515
                N = 1;
 
516
        }
 
517
 
 
518
        cc = cmd_context_stderr_new ();
 
519
        gnm_plugins_init (GO_CMD_CONTEXT (cc));
 
520
        go_plugin_db_activate_plugin_list (
 
521
                go_plugins_get_available_plugins (), &plugin_errs);
 
522
        if (plugin_errs) {
 
523
                /* FIXME: What do we want to do here? */
 
524
                error_info_free (plugin_errs);
 
525
        }
 
526
 
 
527
        ioc = gnumeric_io_context_new (cc);
 
528
        gnm_io_context_set_num_files (ioc, N);
 
529
 
 
530
        if (ssgrep_print_filenames == (gboolean)2)
 
531
                ssgrep_print_filenames = (N > 1);
 
532
 
 
533
        for (; argv[i]; i++) {
 
534
                const char *arg = argv[i];
 
535
                char *uri = go_shell_arg_to_uri (arg);
 
536
                gnm_io_context_processing_file (ioc, uri);
 
537
                ssgrep (arg, uri, ioc, ssgrep_targets, ssgrep_pattern);
 
538
                g_free (uri);
 
539
        }
 
540
 
 
541
        g_hash_table_destroy (ssgrep_targets);
 
542
 
 
543
        g_object_unref (ioc);
 
544
 
 
545
        g_object_unref (cc);
 
546
        gnm_shutdown ();
 
547
        gnm_pre_parse_shutdown ();
 
548
 
 
549
        /* This special case matches what "man grep" says.  */
 
550
        if (ssgrep_quiet && ssgrep_any_matches)
 
551
                return 0;
 
552
 
 
553
        if (ssgrep_error)
 
554
                return 2;
 
555
 
 
556
        return ssgrep_any_matches ? 0 : 1;
 
557
}