2
* Implements search and copy functionality for Djvu files.
3
* Copyright (C) 2006 Michael Hofmann <mh21@piware.de>
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2, or (at your option)
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20
#include "djvu-text-page.h"
22
#include <libdjvu/miniexp.h>
25
* djvu_text_page_selection_process:
26
* @page: #DjvuTextPage instance
27
* @p: s-expression to append
28
* @delimit: character/word/... delimiter
30
* Appends the string in @p to the page text.
32
* Returns: whether the end was not reached in this s-expression
35
djvu_text_page_selection_process (DjvuTextPage *page,
39
if (page->text || p == page->start) {
40
char *token_text = (char *) miniexp_to_str (miniexp_nth (5, p));
43
g_strjoin (delimit & 2 ? "\n" :
44
delimit & 1 ? " " : NULL,
45
page->text, token_text,
48
page->text = new_text;
50
page->text = g_strdup (token_text);
58
* djvu_text_page_selection:
59
* @page: #DjvuTextPage instance
61
* @delimit: character/word/... delimiter
63
* Walks the tree in @p and appends the text with
64
* djvu_text_page_selection_process() for all s-expressions
65
* between the start and end fields.
67
* Returns: whether the end was not reached in this subtree
70
djvu_text_page_selection (DjvuTextPage *page,
74
g_return_val_if_fail (miniexp_consp (p) && miniexp_symbolp
75
(miniexp_car (p)), FALSE);
77
if (miniexp_car (p) != page->char_symbol)
78
delimit |= miniexp_car (p) == page->word_symbol ? 1 : 2;
80
miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p));
81
while (deeper != miniexp_nil) {
82
miniexp_t str = miniexp_car (deeper);
83
if (miniexp_stringp (str)) {
84
if (!djvu_text_page_selection_process
88
if (!djvu_text_page_selection
93
deeper = miniexp_cdr (deeper);
99
djvu_text_page_limits_process (DjvuTextPage *page,
105
current.x1 = miniexp_to_int (miniexp_nth (1, p));
106
current.y1 = miniexp_to_int (miniexp_nth (2, p));
107
current.x2 = miniexp_to_int (miniexp_nth (3, p));
108
current.y2 = miniexp_to_int (miniexp_nth (4, p));
109
if (current.x2 >= rect->x1 && current.y1 <= rect->y2 &&
110
current.x1 <= rect->x2 && current.y2 >= rect->y1) {
111
if (page->start == miniexp_nil)
119
djvu_text_page_limits (DjvuTextPage *page,
125
g_return_if_fail (miniexp_consp (p) &&
126
miniexp_symbolp (miniexp_car (p)));
128
miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p));
129
while (deeper != miniexp_nil) {
130
miniexp_t str = miniexp_car (deeper);
131
if (miniexp_stringp (str))
132
djvu_text_page_limits_process (page, p, rect);
134
djvu_text_page_limits (page, str, rect);
136
deeper = miniexp_cdr (deeper);
141
djvu_text_page_copy (DjvuTextPage *page,
142
EvRectangle *rectangle)
146
page->start = miniexp_nil;
147
page->end = miniexp_nil;
148
djvu_text_page_limits (page, page->text_structure, rectangle);
149
djvu_text_page_selection (page, page->text_structure, 0);
151
/* Do not free the string */
159
* djvu_text_page_position:
160
* @page: #DjvuTextPage instance
161
* @position: index in the page text
163
* Returns the closest s-expression that contains the given position in
166
* Returns: closest s-expression
169
djvu_text_page_position (DjvuTextPage *page,
172
GArray *links = page->links;
174
int hi = links->len - 1;
177
g_return_val_if_fail (hi >= 0, miniexp_nil);
179
/* Shamelessly copied from GNU classpath */
181
mid = (low + hi) >> 1;
183
&g_array_index (links, DjvuTextLink, mid);
184
if (link->position == position)
186
else if (link->position > position)
192
return g_array_index (page->links, DjvuTextLink, mid).pair;
196
* djvu_text_page_union:
197
* @target: first rectangle and result
198
* @source: second rectangle
200
* Calculates the bounding box of two rectangles and stores the reuslt
204
djvu_text_page_union (EvRectangle *target,
207
if (source->x1 < target->x1)
208
target->x1 = source->x1;
209
if (source->x2 > target->x2)
210
target->x2 = source->x2;
211
if (source->y1 < target->y1)
212
target->y1 = source->y1;
213
if (source->y2 > target->y2)
214
target->y2 = source->y2;
218
* djvu_text_page_sexpr_process:
219
* @page: #DjvuTextPage instance
220
* @p: s-expression to append
221
* @start: first s-expression in the selection
222
* @end: last s-expression in the selection
224
* Appends the rectangle defined by @p to the internal bounding box rectangle.
226
* Returns: whether the end was not reached in this s-expression
229
djvu_text_page_sexpr_process (DjvuTextPage *page,
234
if (page->bounding_box || p == start) {
235
EvRectangle *new_rectangle = g_new (EvRectangle, 1);
236
new_rectangle->x1 = miniexp_to_int (miniexp_nth (1, p));
237
new_rectangle->y1 = miniexp_to_int (miniexp_nth (2, p));
238
new_rectangle->x2 = miniexp_to_int (miniexp_nth (3, p));
239
new_rectangle->y2 = miniexp_to_int (miniexp_nth (4, p));
240
if (page->bounding_box) {
241
djvu_text_page_union (page->bounding_box,
243
g_free (new_rectangle);
245
page->bounding_box = new_rectangle;
253
* djvu_text_page_sexpr:
254
* @page: #DjvuTextPage instance
256
* @start: first s-expression in the selection
257
* @end: last s-expression in the selection
259
* Walks the tree in @p and extends the rectangle with
260
* djvu_text_page_process() for all s-expressions between @start and @end.
262
* Returns: whether the end was not reached in this subtree
265
djvu_text_page_sexpr (DjvuTextPage *page,
270
g_return_val_if_fail (miniexp_consp (p) && miniexp_symbolp
271
(miniexp_car (p)), FALSE);
273
miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p));
274
while (deeper != miniexp_nil) {
275
miniexp_t str = miniexp_car (deeper);
276
if (miniexp_stringp (str)) {
277
if (!djvu_text_page_sexpr_process
278
(page, p, start, end))
281
if (!djvu_text_page_sexpr
282
(page, str, start, end))
285
deeper = miniexp_cdr (deeper);
291
* djvu_text_page_box:
292
* @page: #DjvuTextPage instance
293
* @start: first s-expression in the selection
294
* @end: last s-expression in the selection
296
* Builds a rectangle that contains all s-expressions in the given range.
299
djvu_text_page_box (DjvuTextPage *page,
303
page->bounding_box = NULL;
304
djvu_text_page_sexpr (page, page->text_structure, start, end);
305
return page->bounding_box;
309
* djvu_text_page_append_search:
310
* @page: #DjvuTextPage instance
312
* @case_sensitive: do not ignore case
313
* @delimit: insert spaces because of higher (sentence/paragraph/...) break
315
* Appends the tree in @p to the internal text string.
318
djvu_text_page_append_text (DjvuTextPage *page,
320
gboolean case_sensitive,
325
g_return_if_fail (miniexp_consp (p) &&
326
miniexp_symbolp (miniexp_car (p)));
328
delimit |= page->char_symbol != miniexp_car (p);
330
miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p));
331
while (deeper != miniexp_nil) {
332
miniexp_t data = miniexp_car (deeper);
333
if (miniexp_stringp (data)) {
335
link.position = page->text == NULL ? 0 :
338
g_array_append_val (page->links, link);
340
token_text = (char *) miniexp_to_str (data);
342
token_text = g_utf8_casefold (token_text, -1);
343
if (page->text == NULL)
344
page->text = g_strdup (token_text);
347
g_strjoin (delimit ? " " : NULL,
348
page->text, token_text,
351
page->text = new_text;
356
djvu_text_page_append_text (page, data,
357
case_sensitive, delimit);
359
deeper = miniexp_cdr (deeper);
364
* djvu_text_page_search:
365
* @page: #DjvuTextPage instance
366
* @text: text to search
368
* Searches the page for the given text. The results list has to be
369
* externally freed afterwards.
372
djvu_text_page_search (DjvuTextPage *page,
375
char *haystack = page->text;
378
if (page->links->len == 0)
381
search_len = strlen (text);
382
while ((haystack = strstr (haystack, text)) != NULL) {
383
int start_p = haystack - page->text;
384
miniexp_t start = djvu_text_page_position (page, start_p);
385
int end_p = start_p + search_len - 1;
386
miniexp_t end = djvu_text_page_position (page, end_p);
387
result = djvu_text_page_box (page, start, end);
389
page->results = g_list_prepend (page->results, result);
390
haystack = haystack + search_len;
392
page->results = g_list_reverse (page->results);
397
* djvu_text_page_prepare_search:
398
* @page: #DjvuTextPage instance
399
* @case_sensitive: do not ignore case
401
* Indexes the page text and prepares the page for subsequent searches.
404
djvu_text_page_prepare_search (DjvuTextPage *page,
405
gboolean case_sensitive)
407
djvu_text_page_append_text (page, page->text_structure,
408
case_sensitive, FALSE);
412
* djvu_text_page_new:
413
* @text: S-expression of the page text
415
* Creates a new page to search.
417
* Returns: new #DjvuTextPage instance
420
djvu_text_page_new (miniexp_t text)
424
page = g_new0 (DjvuTextPage, 1);
425
page->links = g_array_new (FALSE, FALSE, sizeof (DjvuTextLink));
426
page->char_symbol = miniexp_symbol ("char");
427
page->word_symbol = miniexp_symbol ("word");
428
page->text_structure = text;
433
* djvu_text_page_free:
434
* @page: #DjvuTextPage instance
436
* Frees the given #DjvuTextPage instance.
439
djvu_text_page_free (DjvuTextPage *page)
442
g_array_free (page->links, TRUE);