3
-- This module does metaname handling for swish-e
5
** This program and library is free software; you can redistribute it and/or
6
** modify it under the terms of the GNU (Library) General Public License
7
** as published by the Free Software Foundation; either version 2
8
** of the License, or any later version.
10
** This program is distributed in the hope that it will be useful,
11
** but WITHOUT ANY WARRANTY; without even the implied warranty of
12
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
** GNU (Library) General Public License for more details.
15
** You should have received a copy of the GNU (Library) General Public License
16
** along with this program; if not, write to the Free Software
17
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20
-- 2001-02-12 rasc minor changes, concering the tolower problem
21
(unsigned char) problem!!!
34
#include "metanames.h"
41
int metaType; /* see metanames.h for values. All values must be "ored" */
46
/**************************************************************************
47
* List of *internal* meta names
50
* Removing any of these will prevent access for result output
51
* Removing any of the "real" meta names will also prevent storing
52
* of the data in the property file.
53
* That is, they may be commented out and then selected in the
54
* configuration file as needed.
55
* Hard to imagine not wanting the doc path!
57
***************************************************************************/
60
static defaultMetaNames SwishDefaultMetaNames[] = {
62
/* This is the default meta ID ( number 1 ) that plain text is stored as */
63
{ AUTOPROPERTY_DEFAULT, META_INDEX }, /* REQUIRED */
66
/* These are the "internal" meta names generated at search time they are all required */
67
{ AUTOPROPERTY_REC_COUNT, META_PROP | META_INTERNAL | META_NUMBER },
68
{ AUTOPROPERTY_RESULT_RANK, META_PROP | META_INTERNAL | META_NUMBER },
69
{ AUTOPROPERTY_FILENUM, META_PROP | META_INTERNAL | META_NUMBER },
70
{ AUTOPROPERTY_INDEXFILE, META_PROP | META_INTERNAL | META_STRING },
72
/* These meta names "real" meta names that are available by default */
73
/* These can be commented out (e.g. to save disk space) and added back in with PropertyNames */
74
{ AUTOPROPERTY_DOCPATH, META_PROP | META_STRING },
75
{ AUTOPROPERTY_TITLE, META_PROP | META_STRING | META_IGNORE_CASE },
76
{ AUTOPROPERTY_DOCSIZE, META_PROP | META_NUMBER},
77
{ AUTOPROPERTY_LASTMODIFIED, META_PROP | META_DATE},
78
// { AUTOPROPERTY_SUMMARY, META_PROP | META_STRING},
79
// { AUTOPROPERTY_STARTPOS, META_PROP | META_NUMBER}, // should be added only if LST is selected
82
/* Add the Internal swish metanames to the index file structure */
83
void add_default_metanames(IndexFILE * indexf)
87
for (i = 0; i < (int)(sizeof(SwishDefaultMetaNames) / sizeof(SwishDefaultMetaNames[0])); i++)
88
addMetaEntry(&indexf->header, SwishDefaultMetaNames[i].metaName, SwishDefaultMetaNames[i].metaType, 0);
93
/**************************************************************************
94
* These next routines add a new property/metaname to the list
97
***************************************************************************/
101
/* Add an entry to the metaEntryArray if one doesn't already exist */
104
struct metaEntry *addMetaEntry(INDEXDATAHEADER *header, char *metaname, int metaType, int metaID)
106
struct metaEntry *tmpEntry = NULL;
109
if (metaname == NULL || metaname[0] == '\0')
110
progerr("internal error - called addMetaEntry without a name");
113
metaWord = estrdup( metaname );
114
strtolower(metaWord);
117
/* See if there is a previous metaname with the same name */
118
// tmpEntry = metaType & META_PROP
119
// ? getPropNameByName(header, metaWord)
120
// : getMetaNameByName(header, metaWord);
123
if (!tmpEntry) /* metaName not found - Create a new one */
124
tmpEntry = addNewMetaEntry( header, metaWord, metaType, metaID);
127
/* This allows adding Numeric or Date onto an existing property. */
128
/* Probably not needed */
129
tmpEntry->metaType |= metaType;
138
static struct metaEntry *create_meta_entry( char *name )
140
struct metaEntry *newEntry = (struct metaEntry *) emalloc(sizeof(struct metaEntry));
142
memset(newEntry, 0, sizeof(struct metaEntry));
143
newEntry->metaName = (char *) estrdup( name );
144
newEntry->sort_len = MAX_SORT_STRING_LEN; /* default for sorting strings */
150
struct metaEntry *addNewMetaEntry(INDEXDATAHEADER *header, char *metaWord, int metaType, int metaID)
152
int metaCounter = header->metaCounter;
153
struct metaEntry *newEntry;
154
struct metaEntry **metaEntryArray = header->metaEntryArray;
155
newEntry = create_meta_entry( metaWord );
157
newEntry->metaType = metaType;
159
/* If metaID is 0 asign a value using metaCounter */
160
/* Loaded stored metanames from index specifically sets the metaID */
162
newEntry->metaID = metaID ? metaID : metaCounter + 1;
165
/* Create or enlarge the array, as needed */
168
metaEntryArray = (struct metaEntry **) emalloc(sizeof(struct metaEntry *));
172
metaEntryArray = (struct metaEntry **) erealloc(metaEntryArray,(metaCounter + 1) * sizeof(struct metaEntry *));
175
/* And save it in the array */
176
metaEntryArray[metaCounter++] = newEntry;
178
/* Now update the header */
179
header->metaCounter = metaCounter;
180
header->metaEntryArray = metaEntryArray;
185
/**************************************************************************
186
* Clear in_tag flags on all metanames
187
* The flags are used for indexing
189
***************************************************************************/
192
/** Lookup META_INDEX -- these only return meta names, not properties **/
194
void ClearInMetaFlags(INDEXDATAHEADER * header)
198
for (i = 0; i < header->metaCounter; i++)
199
header->metaEntryArray[i]->in_tag = 0;
205
/**************************************************************************
206
* Initialize the property mapping array
207
* Used to get the property seek pointers from the index file
209
* THIS IS TEMPORARY until I break up the metanames and properties
211
* This just creates two arrays to map metaIDs between property index numbers.
213
***************************************************************************/
215
void init_property_list(INDEXDATAHEADER *header)
219
/* only needs to be called one time */
220
if ( header->property_count )
223
if ( header->propIDX_to_metaID )
224
progerr("Called init_property_list with non-null header->propIDX_to_metaID");
226
if ( !header->metaCounter )
228
header->property_count = -1;
233
header->propIDX_to_metaID = emalloc( (1 + header->metaCounter) * sizeof( int ) );
234
header->metaID_to_PropIDX = emalloc( (1 + header->metaCounter) * sizeof( int ) );
236
for (i = 0; i < header->metaCounter; i++)
238
if (is_meta_property(header->metaEntryArray[i]) && !header->metaEntryArray[i]->alias && !is_meta_internal(header->metaEntryArray[i]) )
240
header->metaID_to_PropIDX[header->metaEntryArray[i]->metaID] = header->property_count;
241
header->propIDX_to_metaID[header->property_count++] = header->metaEntryArray[i]->metaID;
244
header->metaID_to_PropIDX[header->metaEntryArray[i]->metaID] = -1;
247
if ( !header->property_count )
248
header->property_count = -1;
254
/**************************************************************************
255
* These routines lookup either a property or a metaname
258
* The routines only look at either properites or metanames
260
* Note: probably could save a bit by just saying that if not META_PROP then
261
* it's a a meta index entry. In otherwords, the type flag of zero could mean
262
* META_INDEX, otherwise it's a PROPERTY. $$$ todo...
265
***************************************************************************/
268
/** Lookup META_INDEX -- these only return meta names, not properties **/
270
struct metaEntry *getMetaNameByNameNoAlias(INDEXDATAHEADER * header, char *word)
274
for (i = 0; i < header->metaCounter; i++)
275
if (is_meta_index(header->metaEntryArray[i]) && !strcasecmp(header->metaEntryArray[i]->metaName, word))
276
return header->metaEntryArray[i];
282
/* Returns the structure associated with the metaName if it exists
283
* Requests for Aliased names returns the base meta entry, not the alias meta entry.
284
* Note that on a alias it checks the *alias*'s type, so it must match.
287
struct metaEntry *getMetaNameByName(INDEXDATAHEADER * header, char *word)
291
for (i = 0; i < header->metaCounter; i++)
292
if (is_meta_index(header->metaEntryArray[i]) && !strcasecmp(header->metaEntryArray[i]->metaName, word))
293
return header->metaEntryArray[i]->alias
294
? getMetaNameByID( header, header->metaEntryArray[i]->alias )
295
: header->metaEntryArray[i];
301
/* Returns the structure associated with the metaName ID if it exists
304
struct metaEntry *getMetaNameByID(INDEXDATAHEADER *header, int number)
308
for (i = 0; i < header->metaCounter; i++)
310
if (is_meta_index(header->metaEntryArray[i]) && number == header->metaEntryArray[i]->metaID)
311
return header->metaEntryArray[i];
318
/** Lookup META_PROP -- these only return properties **/
320
struct metaEntry *getPropNameByNameNoAlias(INDEXDATAHEADER * header, char *word)
324
for (i = 0; i < header->metaCounter; i++)
325
if (is_meta_property(header->metaEntryArray[i]) && !strcasecmp(header->metaEntryArray[i]->metaName, word))
326
return header->metaEntryArray[i];
332
/* Returns the structure associated with the metaName if it exists
333
* Requests for Aliased names returns the base meta entry, not the alias meta entry.
334
* Note that on a alias it checks the *alias*'s type, so it must match.
337
struct metaEntry *getPropNameByName(INDEXDATAHEADER * header, char *word)
342
for (i = 0; i < header->metaCounter; i++)
343
if (is_meta_property(header->metaEntryArray[i]) && !strcasecmp(header->metaEntryArray[i]->metaName, word))
344
return header->metaEntryArray[i]->alias
345
? getPropNameByID( header, header->metaEntryArray[i]->alias )
346
: header->metaEntryArray[i];
352
/* Returns the structure associated with the metaName ID if it exists
355
struct metaEntry *getPropNameByID(INDEXDATAHEADER *header, int number)
359
for (i = 0; i < header->metaCounter; i++)
361
if (is_meta_property(header->metaEntryArray[i]) && number == header->metaEntryArray[i]->metaID)
362
return header->metaEntryArray[i];
372
/* This is really used to check for seeing which internal metaname is being requested */
374
int is_meta_entry( struct metaEntry *meta_entry, char *name )
376
return strcasecmp( meta_entry->metaName, name ) == 0;
380
/**************************************************************************
381
* Free list of MetaEntry's
383
***************************************************************************/
387
/* Free meta entries for an index file */
389
void freeMetaEntries( INDEXDATAHEADER *header )
393
/* Make sure there are meta names assigned */
394
if ( !header->metaCounter )
398
/* should the elements be set to NULL? */
399
for( i = 0; i < header->metaCounter; i++ )
401
struct metaEntry *meta = header->metaEntryArray[i];
403
efree( meta->metaName );
406
if ( meta->sorted_data)
407
efree( meta->sorted_data );
410
if ( meta->extractpath_default )
411
efree( meta->extractpath_default );
417
/* And free the pointer to the list */
418
efree( header->metaEntryArray);
419
header->metaEntryArray = NULL;
420
header->metaCounter = 0;
424
/**************************************************************************
425
* Check if should bump word position on this meta name
427
***************************************************************************/
430
int isDontBumpMetaName( struct swline *tmplist, char *tag)
434
if (!tmplist) return 0;
435
if (strcmp(tmplist->line,"*")==0) return 1;
438
tmptag=strtolower(tmptag);
442
if( strcasecmp(tmptag,tmplist->line)==0 )
447
tmplist=tmplist->next;
454
/*************************************************
455
* int properties_compatible -
457
* checks to see if two properties can be compared
459
**************************************************/
460
int properties_compatible( struct metaEntry *m1, struct metaEntry *m2 )
462
int mask = META_STRING | META_NUMBER | META_DATE | META_IGNORE_CASE;
463
return (m1->metaType & mask ) == ( m2->metaType & mask);