2
** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
3
** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
5
** This program and library is free software; you can redistribute it and/or
6
** modify it under the terms of the GNU (Library) General Public License
7
** as published by the Free Software Foundation; either version 2
8
** of the License, or any later version.
10
** This program is distributed in the hope that it will be useful,
11
** but WITHOUT ANY WARRANTY; without even the implied warranty of
12
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
** GNU (Library) General Public License for more details.
15
** You should have received a copy of the GNU (Library) General Public License
16
** long with this program; if not, write to the Free Software
17
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
**--------------------------------------------------------------------
20
** Mar 27, 2001 - created moseley
36
#include "parse_conffile.h"
37
#include "bash.h" /* for locating a program */
39
static char *find_command_in_path(const char *name, const char *path_list, int *path_index);
40
static char *get_env_path_with_libexecdir( void );
45
/* prog system specific configuration parameters */
46
struct swline *progparameterslist;
51
-- init structures for this module
54
void initModule_Prog (SWISH *sw)
56
struct MOD_Prog *self;
58
self = (struct MOD_Prog *) emalloc(sizeof(struct MOD_Prog));
61
/* initialize buffers used by indexstring */
62
self->progparameterslist = (struct swline *) NULL;
67
void freeModule_Prog (SWISH *sw)
69
struct MOD_Prog *self = sw->Prog;
72
if ( self->progparameterslist )
73
efree( self->progparameterslist );
81
int configModule_Prog (SWISH *sw, StringList *sl)
84
struct MOD_Prog *self = sw->Prog;
85
char *w0 = sl->word[0];
87
if (strcasecmp(w0, "SwishProgParameters") == 0)
91
grabCmdOptions(sl, 1, &self->progparameterslist);
94
progerr("%s: requires at least one value", w0);
99
return 0; /* not a module directive */
107
static FILE *open_external_program(SWISH * sw, char *prog)
113
struct swline *progparameterslist = sw->Prog->progparameterslist;
114
int path_index = 0; /* index into $PATH */
115
char *env_path = get_env_path_with_libexecdir();
117
if ( ! strcmp( prog, "stdin") )
120
normalize_path( prog ); /* flip backslashes to forward slashes */
122
full_path = find_command_in_path( (const char *)prog, env_path, &path_index );
124
progerr("Failed to find program '%s' in PATH: %s ", prog, env_path );
129
printf("External Program found: %s\n", full_path );
133
/* get total length of configuration parameters */
135
while (progparameterslist)
137
total_len += strlen(progparameterslist->line) + 1; /* separate by spaces */
138
progparameterslist = progparameterslist->next;
141
cmd = emalloc(total_len + strlen( full_path ) + 1);
142
strcpy(cmd, full_path);
148
make_windows_path( cmd );
153
progparameterslist = sw->Prog->progparameterslist;
154
while (progparameterslist)
157
strcat(cmd, progparameterslist->line);
158
progparameterslist = progparameterslist->next;
161
fp = popen(cmd, F_READ_TEXT);
164
progerrno("Failed to spawn external program '%s': ", cmd);
170
/* To make filters work with prog, need to write the file out to a temp file */
171
/* It will be faster to do the filtering from within the "prog" program */
172
/* This may not be safe if running as a threaded app, and I'm not clear on how portable this is */
173
/* This also uses read_stream to read in the file -- so the entire file is read into memory instead of chunked to the temp file */
175
/* Notice that the data is read out in TEXT mode -- this is because it's read from the */
176
/* external program in TEXT mode. Binary files will be modified while in memory */
177
/* (under Windows) but writing back in TEXT mode should restore the file to its */
178
/* original binary format for use by the filter. Really, don't use FileFilter with -S prog */
180
static void save_to_temp_file(SWISH *sw, FileProp *fprop)
183
char *rd_buffer = NULL; /* complete file read into buffer */
185
struct FilterList *filter_save = fprop->hasfilter;
188
/* slirp entire file into memory -- yuck */
189
fprop->hasfilter = NULL; /* force reading fprop->fsize bytes */
190
rd_buffer = read_stream(sw, fprop, 0);
192
fprop->hasfilter = filter_save;
194
/* Save content to a temporary file */
195
efree( fprop->work_path );
196
out = create_tempfile(sw, F_WRITE_TEXT, "fltr", &fprop->work_path, 0 );
198
bytes = fwrite( rd_buffer, 1, fprop->fsize, out );
200
if ( bytes != (size_t)fprop->fsize )
201
progerrno("Failed to write temporary filter file '%s': ", fprop->work_path);
204
/* hide the fact that it's an external program */
205
fprop->fp = (FILE *) NULL;
208
//***JMRUIZ efree(rd_buffer);
215
static void extprog_indexpath(SWISH * sw, char *prog)
223
int index_no_content;
224
long truncate_doc_size;
229
index_no_content = 0;
232
fp = open_external_program(sw, prog);
234
ln = emalloc(MAXSTRLEN + 1);
236
truncate_doc_size = sw->truncateDocSize;
237
sw->truncateDocSize = 0; /* can't truncate -- prog should make sure doc is not too large */
238
// $$$ This is no longer true with libxml push parser
240
// $$$ next time, break out the header parsing in its own function, please
242
/* loop on headers */
243
while (fgets(ln, MAXSTRLEN, fp) != NULL)
249
line = str_skip_ws(ln); /* skip leading white space */
250
end = strrchr(line, '\n'); /* replace \n with null -- better to remove trailing white space */
252
/* trim white space */
255
while ( end > line && isspace( (int)*(end-1) ) )
261
if (strlen(line) == 0) /* blank line indicates body */
264
progerr("External program failed to return required headers Path-Name:");
267
progerr("External program failed to return required headers Content-Length: when processing file '%s'", real_path);
269
if ( fsize == 0 && sw->verbose >= 2)
270
progwarn("External program returned zero Content-Length when processing file'%s'", real_path);
274
/* Create the FileProp entry to describe this "file" */
276
/* This is not great -- really should make creating a fprop more generic */
277
/* this was done because file.c assumed that the "file" was on disk */
278
/* which has changed over time due to filters, http, and prog */
280
fprop = init_file_properties();
281
fprop->real_path = real_path;
282
fprop->work_path = estrdup( real_path );
283
fprop->orig_path = estrdup( real_path );
285
/* Set the doc type from the header */
288
fprop->doctype = docType;
293
/* set real_path, doctype, index_no_content, filter, stordesc */
294
init_file_prop_settings(sw, fprop);
296
fprop->fp = fp; /* stream to read from */
297
fprop->fsize = fsize; /* how much to read */
298
fprop->source_size = fsize; /* original size of input document - should be an extra header! */
299
fprop->mtime = mtime;
301
/* header can force index_no_content */
302
if (index_no_content)
303
fprop->index_no_content++;
306
/* the quick hack to make filters work is for FilterOpen
307
* to see that fprop->fp is set, read it into a buffer
308
* write it to a temporary file, then call the filter
309
* program as noramlly is done. But much smarter to
310
* simply filter in the prog, after all. Faster, too.
313
if (fprop->hasfilter)
315
save_to_temp_file( sw , fprop );
316
has_filter++; /* save locally, in case it gets reset somewhere else */
319
if (sw->verbose >= 3)
320
printf("%s", real_path);
321
else if (sw->verbose >= 2)
322
printf("Processing %s...\n", real_path);
325
do_index_file(sw, fprop);
327
if ( has_filter && remove( fprop->work_path ) )
328
progwarnno("Error removing temporary file '%s': ", fprop->work_path);
330
free_file_properties(fprop);
331
// efree(real_path); free_file_properties will free the paths
335
index_no_content = 0;
340
else /* we are reading headers */
342
if (strncasecmp(line, "Content-Length", 14) == 0)
344
char *x = strchr(line, ':');
346
progerr("Failed to parse Content-Length header '%s'", line);
347
fsize = strtol(++x, NULL, 10);
351
if (strncasecmp(line, "Last-Mtime", 10) == 0)
353
char *x = strchr(line, ':');
355
progerr("Failed to parse Last-Mtime header '%s'", line);
356
mtime = strtol(++x, NULL, 10);
360
if (strncasecmp(line, "No-Contents:", 12) == 0)
367
if (strncasecmp(line, "Path-Name", 9) == 0)
369
char *x = strchr(line, ':');
371
progerr("Failed to parse Path-Name header '%s'", line);
373
x = str_skip_ws(++x);
375
progerr("Failed to find path name in Path-Name header '%s'", line);
377
real_path = emalloc(strlen(x) + 1);
378
strcpy(real_path, x);
382
if (strncasecmp(line, "Document-Type", 13) == 0)
384
char *x = strchr(line, ':');
386
progerr("Failed to parse Document-Type '%s'", line);
388
x = str_skip_ws(++x);
390
progerr("Failed to documnet type in Document-Type header '%s'", line);
392
if ( !(docType = strtoDocType( x )) )
393
progerr("documnet type '%s' not a valid Swish-e document type in Document-Type header '%s'", x, line);
398
progwarn("Unknown header line: '%s' from program %s", line, prog);
405
/* restore the setting */
406
sw->truncateDocSize = truncate_doc_size;
409
if ( pclose(fp) == -1 )
410
progwarnno("Failed to properly close external program: ");
418
/* Don't use old method of config checking */
419
static int extprog_parseconfline(SWISH * sw, StringList *l)
426
struct _indexing_data_source_def ExternalProgramDataSource = {
430
extprog_parseconfline
435
static char *find_command_in_path(const char *name, const char *path_list, int *path_index)
437
char *found = NULL, *full_path;
438
int status, name_len;
439
int absolute_path_given = 0;
440
char *abs_path = NULL;
441
name_len = strlen(name);
443
if (!absolute_program(name))
444
absolute_path_given = 0;
448
absolute_path_given = 1;
453
if (*name != '.' && *name != '/' && *name != '~')
455
abs_path = (char *)xmalloc(3 + name_len);
456
strcpy(abs_path, "./");
457
strcat(abs_path, name);
461
abs_path = (char *)xmalloc(1 + name_len);
462
strcpy(abs_path, name);
465
path_list = abs_path;
466
p = strrchr(abs_path, '/');
471
while (path_list && path_list[*path_index])
475
if (absolute_path_given)
477
path = savestring(path_list);
478
*path_index = strlen(path);
481
path = get_next_path_element(path_list, path_index);
489
char *t = tilde_expand(path);
500
if (skip_dot && *path != '/')
506
found_path_starts_with_dot = (*path == '.');
509
full_path = make_full_pathname(path, name, name_len);
512
status = file_status(full_path);
514
/* This is different from "where" because it stops at the first found file */
515
/* but where (and shells) continue to find first executable program in path */
516
if (status & FS_EXISTS)
518
if (status & FS_EXECABLE)
524
progwarn("Found '%s' in PATH but is not executable", full_path);
533
static char *get_env_path_with_libexecdir( void )
536
char *path = getenv("PATH");
537
char *execdir = get_libexec(); /* Should free */
542
pathbuf = (char *)emalloc( strlen( path ) + strlen( execdir ) + strlen( PATH_SEPARATOR ) + 1 );
544
sprintf(pathbuf, "%s%s%s", path, PATH_SEPARATOR, execdir );