1
/*------------------------------------------------------------------------------
2
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
4
* Distributable under the terms of either the Apache License (Version 2.0) or
5
* the GNU Lesser General Public License, as specified in the COPYING file.
6
------------------------------------------------------------------------------*/
7
#include "CLucene/StdHeader.h"
8
#include "FieldCacheImpl.h"
14
FieldCacheImpl::FieldCacheImpl():
17
FieldCacheImpl::~FieldCacheImpl(){
21
FieldCacheImpl::FileEntry::FileEntry (const TCHAR* field, int32_t type) {
22
this->field = CLStringIntern::intern(field CL_FILELINE);
28
/** Creates one of these objects for a custom comparator. */
29
FieldCacheImpl::FileEntry::FileEntry (const TCHAR* field, SortComparatorSource* custom) {
30
this->field = CLStringIntern::intern(field CL_FILELINE);
31
this->type = SortField::CUSTOM;
32
this->custom = custom;
35
FieldCacheImpl::FileEntry::~FileEntry(){
36
CLStringIntern::unintern(field);
39
size_t FieldCacheImpl::FileEntry::hashCode(){
40
if ( _hashCode == 0 ){
41
//todo: cache hashcode?
42
size_t ret = Misc::thashCode(field);
44
ret = ret ^ custom->hashCode();
45
ret = ret ^ (type*7); //type with a seed
50
int32_t FieldCacheImpl::FileEntry::compareTo(const FieldCacheImpl::FileEntry* other) const{
51
if ( other->field == this->field ){
52
if ( other->type == this->type ){
53
if ( other->custom == NULL ){
54
if ( this->custom == NULL )
58
}else if ( this->custom == NULL )
60
else if ( other->custom < this->custom )
62
else if ( other->custom > this->custom )
66
}else if ( other->type > this->type )
72
return _tcscmp(other->field,this->field);
75
/** Two of these are equal iff they reference the same field and type. */
76
/*bool FieldCacheImpl::FileEntry::equals (FileEntry* other) {
77
if (other->field == field && other->type == type) {
78
if (other->custom == NULL) {
81
} else if (other->custom->equals (custom)) {
87
/** Composes a hashcode based on the field and type. */
88
/*size_t FieldCacheImpl::FileEntry::hashCode() {
89
return field->hashCode() ^ type ^ (custom==NULL ? 0 : custom->hashCode());
96
/** See if an object is in the cache. */
97
FieldCacheAuto* FieldCacheImpl::lookup (IndexReader* reader, const TCHAR* field, int32_t type) {
98
FieldCacheAuto* ret = NULL;
99
FileEntry* entry = _CLNEW FileEntry (field, type);
101
SCOPED_LOCK_MUTEX(THIS_LOCK)
102
fieldcacheCacheReaderType* readerCache = cache.get(reader);
103
if (readerCache != NULL)
104
ret = readerCache->get (entry);
111
/** See if a custom object is in the cache. */
112
FieldCacheAuto* FieldCacheImpl::lookup (IndexReader* reader, const TCHAR* field, SortComparatorSource* comparer) {
113
FieldCacheAuto* ret = NULL;
114
FileEntry* entry = _CLNEW FileEntry (field, comparer);
116
SCOPED_LOCK_MUTEX(THIS_LOCK)
117
fieldcacheCacheReaderType* readerCache = cache.get(reader);
118
if (readerCache != NULL)
119
ret = readerCache->get (entry);
125
void FieldCacheImpl::closeCallback(CL_NS(index)::IndexReader* reader, void* fieldCacheImpl){
126
FieldCacheImpl* fci = (FieldCacheImpl*)fieldCacheImpl;
127
SCOPED_LOCK_MUTEX(fci->THIS_LOCK)
128
fci->cache.remove(reader);
131
/** Put an object into the cache. */
132
void FieldCacheImpl::store (IndexReader* reader, const TCHAR* field, int32_t type, FieldCacheAuto* value) {
133
FileEntry* entry = _CLNEW FileEntry (field, type);
135
SCOPED_LOCK_MUTEX(THIS_LOCK)
136
fieldcacheCacheReaderType* readerCache = cache.get(reader);
137
if (readerCache == NULL) {
138
readerCache = _CLNEW fieldcacheCacheReaderType;
139
cache.put(reader,readerCache);
140
reader->addCloseCallback(closeCallback, this);
142
readerCache->put (entry, value);
143
//this is supposed to return the previous value, but it needs to be deleted!!!
147
/** Put a custom object into the cache. */
148
void FieldCacheImpl::store (IndexReader* reader, const TCHAR* field, SortComparatorSource* comparer, FieldCacheAuto* value) {
149
FileEntry* entry = _CLNEW FileEntry (field, comparer);
151
SCOPED_LOCK_MUTEX(THIS_LOCK)
152
fieldcacheCacheReaderType* readerCache = cache.get(reader);
153
if (readerCache == NULL) {
154
readerCache = _CLNEW fieldcacheCacheReaderType;
155
cache.put(reader, readerCache);
156
reader->addCloseCallback(FieldCacheImpl::closeCallback, this);
158
readerCache->put(entry, value);
159
//this is supposed to return the previous value, but it needs to be deleted!!!
168
FieldCacheAuto* FieldCacheImpl::getInts (IndexReader* reader, const TCHAR* field) {
169
field = CLStringIntern::intern(field CL_FILELINE);
170
FieldCacheAuto* ret = lookup (reader, field, SortField::INT);
172
int32_t retLen = reader->maxDoc();
173
int32_t* retArray = _CL_NEWARRAY(int32_t,retLen);
174
memset(retArray,0,sizeof(int32_t)*retLen);
176
TermDocs* termDocs = reader->termDocs();
178
Term* term = _CLNEW Term (field, LUCENE_BLANK_STRING, false);
179
TermEnum* termEnum = reader->terms (term);
182
if (termEnum->term(false) == NULL) {
183
_CLTHROWA(CL_ERR_Runtime,"no terms in field"); //todo: add detailed error: + field);
186
Term* term = termEnum->term(false);
187
if (term->field() != field)
191
int32_t termval = (int32_t)_tcstoi64(term->text(), &end, 10);
192
termDocs->seek (termEnum);
193
while (termDocs->next()) {
194
retArray[termDocs->doc()] = termval;
196
} while (termEnum->next());
205
FieldCacheAuto* fa = _CLNEW FieldCacheAuto(retLen,FieldCacheAuto::INT_ARRAY);
206
fa->intArray = retArray;
208
store (reader, field, SortField::INT, fa);
209
CLStringIntern::unintern(field);
212
CLStringIntern::unintern(field);
217
FieldCacheAuto* FieldCacheImpl::getFloats (IndexReader* reader, const TCHAR* field){
218
field = CLStringIntern::intern(field CL_FILELINE);
219
FieldCacheAuto* ret = lookup (reader, field, SortField::FLOAT);
221
int32_t retLen = reader->maxDoc();
222
float_t* retArray = _CL_NEWARRAY(float_t,retLen);
223
memset(retArray,0,sizeof(float_t)*retLen);
225
TermDocs* termDocs = reader->termDocs();
227
Term* term = _CLNEW Term (field, LUCENE_BLANK_STRING, false);
228
TermEnum* termEnum = reader->terms (term);
232
if (termEnum->term(false) == NULL) {
233
_CLTHROWA(CL_ERR_Runtime,"no terms in field "); //todo: make richer error + field);
236
Term* term = termEnum->term(false);
237
if (term->field() != field)
241
float_t termval = _tcstod(term->text(),&tmp);
242
termDocs->seek (termEnum);
243
while (termDocs->next()) {
244
retArray[termDocs->doc()] = termval;
246
} while (termEnum->next());
255
FieldCacheAuto* fa = _CLNEW FieldCacheAuto(retLen,FieldCacheAuto::FLOAT_ARRAY);
256
fa->floatArray = retArray;
258
store (reader, field, SortField::FLOAT, fa);
259
CLStringIntern::unintern(field);
262
CLStringIntern::unintern(field);
268
FieldCacheAuto* FieldCacheImpl::getStrings (IndexReader* reader, const TCHAR* field){
269
//todo: this is not really used, i think?
270
field = CLStringIntern::intern(field CL_FILELINE);
271
FieldCacheAuto* ret = lookup (reader, field, SortField::STRING);
273
int32_t retLen = reader->maxDoc();
274
TCHAR** retArray = _CL_NEWARRAY(TCHAR*,retLen+1);
275
memset(retArray,0,sizeof(TCHAR*)*(retLen+1));
277
TermDocs* termDocs = reader->termDocs();
279
Term* term = _CLNEW Term (field, LUCENE_BLANK_STRING, false);
280
TermEnum* termEnum = reader->terms (term);
284
if (termEnum->term(false) == NULL) {
285
_CLTHROWA(CL_ERR_Runtime,"no terms in field "); //todo: extend to + field);
288
Term* term = termEnum->term(false);
289
if (term->field() != field)
291
const TCHAR* termval = term->text();
292
termDocs->seek (termEnum);
293
while (termDocs->next()) {
294
retArray[termDocs->doc()] = STRDUP_TtoT(termval); //todo: any better way of doing this???
296
} while (termEnum->next());
298
retArray[retLen]=NULL;
307
FieldCacheAuto* fa = _CLNEW FieldCacheAuto(retLen,FieldCacheAuto::STRING_ARRAY);
308
fa->stringArray = retArray;
309
fa->ownContents=true;
310
store (reader, field, SortField::STRING, fa);
311
CLStringIntern::unintern(field);
314
CLStringIntern::unintern(field);
319
FieldCacheAuto* FieldCacheImpl::getStringIndex (IndexReader* reader, const TCHAR* field){
320
field = CLStringIntern::intern(field CL_FILELINE);
321
FieldCacheAuto* ret = lookup (reader, field, STRING_INDEX);
322
int32_t t = 0; // current term number
324
int32_t retLen = reader->maxDoc();
325
int32_t* retArray = _CL_NEWARRAY(int32_t,retLen);
326
memset(retArray,0,sizeof(int32_t)*retLen);
328
TCHAR** mterms = _CL_NEWARRAY(TCHAR*,retLen+2);
331
TermDocs* termDocs = reader->termDocs();
333
Term* term = _CLNEW Term (field, LUCENE_BLANK_STRING, false);
334
TermEnum* termEnum = reader->terms (term);
338
CND_PRECONDITION(t+1 <= retLen, "t out of bounds");
340
// an entry for documents that have no terms in this field
341
// should a document with no terms be at top or bottom?
342
// this puts them at the top - if it is changed, FieldDocSortedHitQueue
343
// needs to change as well.
347
if (termEnum->term(false) == NULL) {
348
_CLTHROWA(CL_ERR_Runtime,"no terms in field"); //todo: make rich message " + field);
351
Term* term = termEnum->term(false);
352
if (term->field() != field)
356
// we expect that there is at most one term per document
358
_CLTHROWA(CL_ERR_Runtime,"there are more terms than documents in field"); //todo: rich error \"" + field + "\"");
359
mterms[t] = STRDUP_TtoT(term->text());
361
termDocs->seek (termEnum);
362
while (termDocs->next()) {
363
retArray[termDocs->doc()] = t;
367
} while (termEnum->next());
368
CND_PRECONDITION(t<retLen+2,"t out of bounds");
378
// if there are no terms, make the term array
379
// have a single NULL entry
380
_CLDELETE_ARRAY(mterms);
381
mterms = _CL_NEWARRAY(TCHAR*,1); //todo: delete old mterms?
383
} else if (t < retLen) { //todo: check, was mterms.length
384
// if there are less terms than documents,
385
// trim off the dead array space
386
//const TCHAR** terms = _CL_NEWARRAY(TCHAR,t);
387
//System.arraycopy (mterms, 0, terms, 0, t);
390
//we simply shorten the length of the array...
394
FieldCache::StringIndex* value = _CLNEW FieldCache::StringIndex (retArray, mterms,t);
396
FieldCacheAuto* fa = _CLNEW FieldCacheAuto(retLen,FieldCacheAuto::STRING_INDEX);
397
fa->stringIndex = value;
398
fa->ownContents=true;
399
store (reader, field, STRING_INDEX, fa);
400
CLStringIntern::unintern(field);
403
CLStringIntern::unintern(field);
408
FieldCacheAuto* FieldCacheImpl::getAuto (IndexReader* reader, const TCHAR* field) {
409
field = CLStringIntern::intern(field CL_FILELINE);
410
FieldCacheAuto* ret = lookup (reader, field, SortField::AUTO);
412
Term* term = _CLNEW Term (field, LUCENE_BLANK_STRING, false);
413
TermEnum* enumerator = reader->terms (term);
417
Term* term = enumerator->term(false);
419
_CLTHROWA(CL_ERR_Runtime,"no terms in field - cannot determine sort type"); //todo: make rich error: " + field + "
421
if (term->field() == field) {
422
const TCHAR* termtext = term->text();
423
size_t termTextLen = term->textLength();
426
for ( size_t i=0;i<termTextLen;i++ ){
427
if ( _tcschr(_T("0123456789 +-"),termtext[i]) == NULL ){
433
ret = getInts (reader, field);
437
int32_t searchLen = termTextLen;
438
if ( termtext[termTextLen-1] == 'f' )
440
for ( int32_t i=0;i<searchLen;i++ ){
441
if ( _tcschr(_T("0123456789 Ee.+-"),termtext[i]) == NULL ){
447
ret = getFloats (reader, field);
449
ret = getStringIndex (reader, field);
454
store (reader, field, SortField::AUTO, ret);
457
_CLTHROWA (CL_ERR_Runtime,"field does not appear to be indexed"); //todo: make rich error: \"" + field + "\"
459
} _CLFINALLY( enumerator->close(); _CLDELETE(enumerator) );
462
CLStringIntern::unintern(field);
468
FieldCacheAuto* FieldCacheImpl::getCustom (IndexReader* reader, const TCHAR* field, SortComparator* comparator){
469
field = CLStringIntern::intern(field CL_FILELINE);
471
FieldCacheAuto* ret = lookup (reader, field, comparator);
473
int32_t retLen = reader->maxDoc();
474
Comparable** retArray = _CL_NEWARRAY(Comparable*,retLen);
475
memset(retArray,0,sizeof(Comparable*)*retLen);
477
TermDocs* termDocs = reader->termDocs();
478
TermEnum* termEnum = reader->terms ();
481
if (termEnum->term(false) == NULL) {
482
_CLTHROWA(CL_ERR_Runtime,"no terms in field "); //todo: make rich error + field);
485
Term* term = termEnum->term(false);
486
if (term->field() != field)
488
Comparable* termval = comparator->getComparable (term->text());
489
termDocs->seek (termEnum);
490
while (termDocs->next()) {
491
retArray[termDocs->doc()] = termval;
493
} while (termEnum->next());
502
FieldCacheAuto* fa = _CLNEW FieldCacheAuto(retLen,FieldCacheAuto::COMPARABLE_ARRAY);
503
fa->comparableArray = retArray;
504
fa->ownContents=true;
505
store (reader, field, SortField::CUSTOM, fa);
506
CLStringIntern::unintern(field);
509
CLStringIntern::unintern(field);
514
FieldCacheImpl::fieldcacheCacheReaderType::fieldcacheCacheReaderType(){
516
setDeleteValue(false);
518
FieldCacheImpl::fieldcacheCacheReaderType::~fieldcacheCacheReaderType(){
519
iterator itr = begin();
520
while ( itr != end() ){
521
FileEntry* f = itr->first;
522
if ( f->getType() != SortField::AUTO )
523
_CLDELETE( itr->second );