1
/* File: xsbpattern.c -- XSB-side interface to match() and substitute()
3
** Contact: xsb-contact@cs.sunysb.edu
5
** Copyright (C) The Research Foundation of SUNY, 1998
7
** XSB is free software; you can redistribute it and/or modify it under the
8
** terms of the GNU Library General Public License as published by the Free
9
** Software Foundation; either version 2 of the License, or (at your option)
12
** XSB is distributed in the hope that it will be useful, but WITHOUT ANY
13
** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14
** FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for
17
** You should have received a copy of the GNU Library General Public License
18
** along with XSB; if not, write to the Free Software Foundation,
19
** Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21
** $Id: xsbpattern.c,v 1.10 2003/04/15 17:16:24 kostis Exp $
26
/*----------------------------------------------------------------------------
27
try_match__() -- find the match pattern,
28
next_match__() -- find the next match pattern,
29
do_bulk_match__() -- find the global match patterns,
30
perl_substitute__() -- substitute the string with expected pattern,
31
load_perl__() -- load the perl interpretor,
32
unload_perl__() -- release the perl interpretor object,
33
get_match_resultC__() -- get the perl pattern match C function,
34
get_bulk_match_result__() -- get Perl global pattern match results.
36
----------------------------------------------------------------------------*/
39
#include "interface.h"
40
#include "perlpattern.c" /* pattern match basic functions */
42
void build_sub_match_spec( void );
43
int is_global_pattern( char *);
44
int global_pattern_mode = FALSE;
46
extern void xsb_abort(char *, ...);
48
#define xsb_warn(warning) fprintf(stderr, "++Warning: %s\n", warning)
51
/*----------------------------------------------------------------------------
53
The pattern matching function which includes loading perl interpreter and
54
trying the perl pattern matching.
56
input: char* string, -- input text
57
char* pattern -- match pattern
58
output:if no match found, return FAILURE (0).
59
----------------------------------------------------------------------------*/
60
int try_match__( void )
62
SV *text; /* the storage for the string in embedded Perl */
63
SV *string_buff; /* the storage for the string in embedded Perl */
64
int was_match; /* number of the matches */
65
char *string = ptoc_string(1),
66
*pattern = ptoc_string(2);
68
/* first load the perl interpreter, if unloaded */
69
if (perlObjectStatus == UNLOADED) load_perl__();
72
string_buff = newSV(0);
73
sv_setpv(text, string); /* store the string in the SV */
75
was_match = match(text, pattern );
77
global_pattern_mode = is_global_pattern(pattern);
79
SvREFCNT_dec(string_buff);
86
/*----------------------------------------------------------------------------
88
The pattern match function which repeats pattern match after
89
the pattern match of the function try_match__().
90
If there is no calling of function try_match__() before, give warning!
91
output: if no match found, return FAILURE.
92
----------------------------------------------------------------------------*/
93
int next_match__( void )
95
int was_match; /* return code */
97
if ( matchPattern == NULL ) { /*didn't try_match__ before*/
98
xsb_warn("call try_match/2 first!");
101
else /*do next match*/
102
was_match = match_again( );
104
if (global_pattern_mode)
106
/* always fail, if Perl pattern is not global */
110
/*----------------------------------------------------------------------------
112
The pattern match function which includes loading perl interpreter and
113
doing the global perl pattern match, and storing the results in the global
114
array of bulkMatchList.
116
input: char* string -- input text
117
char* pattern -- match pattern
118
output: int* num_match -- the number of the matches
119
----------------------------------------------------------------------------*/
120
int do_bulk_match__( void )
122
AV *match_list; /* AV storage of matches list*/
123
SV *text; /* storage for the embedded perl cmd */
124
SV *string_buff; /* storage for the embedded perl cmd */
125
int num_match; /* the number of the matches */
128
/* first load the perl interpreter, if unloaded */
129
if (perlObjectStatus == UNLOADED) load_perl__();
132
string_buff = newSV(0);
133
sv_setpv(text, ptoc_string(1)); /*put the string into an SV */
135
/*------------------------------------------------------------------------
136
free the old match list space and allocate new space for current match list
137
-----------------------------------------------------------------------*/
138
for ( i=0; i<preBulkMatchNumber; i++ )
139
free(bulkMatchList[i]);
140
if (bulkMatchList != NULL ) free(bulkMatchList);
141
bulkMatchList = NULL;
143
/*------------------------------------------------------------------------
145
----------------------------------------------------------------------*/
146
num_match = all_matches(text, ptoc_string(2),&match_list);
148
/* allocate the space to store the matches */
149
if ( num_match != 0 ) {
150
preBulkMatchNumber = num_match; /* reset the pre bulk match number */
151
bulkMatchList = (char **)malloc(num_match*sizeof(char *));
152
if ( bulkMatchList == NULL )
153
xsb_abort("Cannot alocate memory to store the results for bulk match");
156
/*get the matches from the AV */
157
for ( i=0;i<num_match;i++ ) {
158
string_buff = av_shift(match_list);
159
bulkMatchList[i] = (char *)malloc( strlen(SvPV(string_buff,PL_na))+1 );
160
strcpy((char *)bulkMatchList[i], SvPV(string_buff,PL_na) );
163
SvREFCNT_dec(string_buff); /* release space*/
166
ctop_int(3, num_match); /*return the number of matches*/
170
/*----------------------------------------------------------------------------
172
The pattern substitution function which includes loading perl interpreter
173
and doing the pattern substitution, then returning the replaced string.
175
input: char* string, input text
176
char* pattern, match pattern
177
output:char* string, output text
178
----------------------------------------------------------------------------*/
179
int perl_substitute__( void )
181
SV *text; /* Perl representation for the string to be
182
modified by substitution */
183
char *subst_cmd = ptoc_string(2);
186
/* first load the perl interpreter, if unloaded */
187
if (perlObjectStatus == UNLOADED) load_perl__();
190
sv_setpv(text, ptoc_string(1)); /* put the string to the SV */
192
if( !substitute(&text, subst_cmd) )
195
global_pattern_mode = is_global_pattern(subst_cmd);
197
if (substituteString != NULL ) free(substituteString);
199
substituteString = malloc(strlen(SvPV(text,PL_na))+1);
200
strcpy(substituteString,SvPV(text,PL_na));
202
SvREFCNT_dec(text); /*release space*/
204
ctop_string(3, string_find(substituteString,1)); /*return changed text*/
208
/*----------------------------------------------------------------------------
210
The function to implement the interface of C and Perl, load the perl
211
interpreter and initial the global variables. Then the Perl function
213
----------------------------------------------------------------------------*/
215
int load_perl__( void )
217
char *embedding[] = {"","-e","0"}; /* perl interpreter config params */
220
/* check if the perl interpreter is loaded already*/
221
if ( perlObjectStatus == LOADED ) return SUCCESS;
223
/*------------------------------------------------------------------------
224
initial the global variables
225
----------------------------------------------------------------------*/
226
for ( i=0; i<MAX_TOTAL_MATCH; i++ )
227
matchResults[i] = NULL;
228
preBulkMatchNumber = 0;
229
bulkMatchList = NULL;
231
substituteString = NULL;
232
build_sub_match_spec(); /*build the submatch arguments string constant*/
234
my_perl = perl_alloc();
235
perl_construct( my_perl );
236
perl_parse( my_perl, NULL, 3, embedding, (char **)NULL );
239
perlObjectStatus = LOADED;
244
/*---------------------------------------------------------------------------
246
The function to release the Perl interpreter, and deallocat the memory
247
---------------------------------------------------------------------------*/
249
int unload_perl__( void )
253
PL_perl_destruct_level = 1;
254
perl_destruct( my_perl );
255
perl_free( my_perl );
257
/*-------------------------------------------------------------------------
258
free all the space allocated for perl match functions
259
-------------------------------------------------------------------------*/
260
for ( i=0; i<preBulkMatchNumber; i++ )
261
free(bulkMatchList[i]);
262
if (bulkMatchList != NULL ) free(bulkMatchList);
263
if (matchPattern != NULL ) free(matchPattern);
264
if (substituteString != NULL ) free(substituteString);
267
perlObjectStatus = UNLOADED;
272
/*----------------------------------------------------------------------------
273
get_bulk_match_result__(order, argumentValue):
274
The function to get the values of all the matches.
275
input: is the position of the argument;
276
output: is the string of match result.
277
----------------------------------------------------------------------------*/
279
int get_bulk_match_result__( void ) {
281
if (perlObjectStatus == UNLOADED ) {
286
if ( bulkMatchList[ptoc_int(1)] == NULL )
287
return FAILURE; /*no match*/
289
int match_seq_number= ptoc_int(1);
290
int match_array_sz= ptoc_int(3);
291
if (match_seq_number < match_array_sz) {
292
/* c2p_string( bulkMatchList[match_seq_number], reg_term(2)); */
293
ctop_string(2, (char *)string_find(bulkMatchList[match_seq_number],1));
301
/*----------------------------------------------------------------------------
302
get_match_resultC__(matchCode, matchResult):
303
Get the value of the submatch string $1, $2, ... from
304
the global string array of matchResults
306
input: is the match code. Match codes correspond to Perl match variables as
319
output: the string of match result.
321
The results of the matches (the values of Perl vars) are in consecutive
322
cells of the matchResults array as follows:
332
----------------------------------------------------------------------------*/
334
int get_match_resultC__( void ) {
338
int submatch_number=ptoc_int(1);
340
/*--------------------------------------------------------------------------
341
Convert from Prolog-side convention for refering to submatches to
342
the corresponding array index numbers in match result storage.
343
--------------------------------------------------------------------------*/
344
switch (submatch_number) {
345
case MATCH: /*MATCH = -1*/
346
order = 0; /* actual position in the memory */
348
case PREMATCH: /*PREMATCH = -2*/
351
case POSTMATCH: /*POSTMATCH = -3*/
354
case LAST_PAREN_MATCH: /*LAST_PAREN_MATCH = -4*/
358
if ( submatch_number > MAX_SUB_MATCH ) {
361
"Specified submatch number %d exceeds the limit: %d\n",
362
submatch_number, MAX_SUB_MATCH);
366
else order = submatch_number+3; /* actual position in the memory */
370
if (order == -99) return(FAILURE);
372
if ( matchPattern == NULL ) { /*didn't try_match before*/
373
xsb_warn("Call try_match/2 first!");
375
} else if ( !strcmp(matchResults[order],"") || matchResults[order] == NULL )
376
return(FAILURE); /*no match found, return FAILURE */
378
c2p_string( matchResults[order], reg_term(2));
383
/*----------------------------------------------------------------------------
384
void build_sub_match_string( void )
385
This function is used to build the submatch arguments list string,
386
"($&,$`,$',$+,$1,$2,$3.....,$MAX_SUB_MATCH)"
387
here the value of MAX_SUB_MATCH is defined in the include file
388
----------------------------------------------------------------------------*/
390
void build_sub_match_spec( void ) {
392
int i,j,k; /*counter flags*/
393
int spaceSize; /*memory space size for the submatch string*/
395
/*get the size of the submatch string, the size of string $1, $2 etc., is 2,
396
when the digit is bigger than 10, the size of string $10, $11 etc., is 3,
397
so whenever the digit increases by 10, the size of the string $digit will
398
increase by 1. Following code is to calculate the size of string $1,$2...*/
402
for (i=1;i<=MAX_SUB_MATCH;i++) {
407
spaceSize += 2+j; /* the size of ",$" is 2 */
409
spaceSize+=(sizeof(FIXEDSUBMATCHSPEC)+1);
412
/*build the submatch string*/
413
subMatchSpec=(char *)malloc(spaceSize);
414
strcpy(subMatchSpec, FIXEDSUBMATCHSPEC); /*build the fixed part $&,$`,$',$+*/
416
/* add string $1, $2 etc., to the end of the string */
417
for (i=1; i<=MAX_SUB_MATCH;i++)
418
sprintf(&(subMatchSpec[strlen(subMatchSpec)]), ",$%d\0", i);
419
/*add one of $1, $2 etc., to the string each time by order*/
420
strcat(subMatchSpec, ")");
427
/* Check if the Perl pattern is global, i.e., contains the `g' modifier.
428
** This is needed so that next_match will know that it has to fail immediately,
429
** if no `g' has been specified.
431
int is_global_pattern(char *pattern) {
432
int len = strlen(pattern), i = len-1;
434
/* skip other Perl pattern modifiers and spaces */
436
( *(pattern+i) == ' ' || *(pattern+i) == '\t'
437
|| *(pattern+i) == 'o' || *(pattern+i) == 'i' ))
440
if (*(pattern+i) == 'g')