1
/*------------------------------------------------------------------------------
2
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
4
* Distributable under the terms of either the Apache License (Version 2.0) or
5
* the GNU Lesser General Public License, as specified in the COPYING file.
6
------------------------------------------------------------------------------*/
7
#include "CLucene/StdHeader.h"
8
#include "FieldsWriter.h"
10
#include "CLucene/util/VoidMap.h"
11
#include "CLucene/util/Reader.h"
12
#include "CLucene/util/Misc.h"
13
#include "CLucene/store/Directory.h"
14
#include "CLucene/store/IndexOutput.h"
15
#include "CLucene/document/Document.h"
16
#include "CLucene/document/Field.h"
17
#include "FieldInfos.h"
24
FieldsWriter::FieldsWriter(Directory* d, const char* segment, FieldInfos* fn):
28
//Pre - d contains a valid reference to a directory
29
// segment != NULL and contains the name of the segment
30
//Post - fn contains a valid reference toa a FieldInfos
32
CND_PRECONDITION(segment != NULL,"segment is NULL");
34
const char* buf = Misc::segmentname(segment,".fdt");
35
fieldsStream = d->createOutput ( buf );
36
_CLDELETE_CaARRAY( buf );
38
buf = Misc::segmentname(segment,".fdx");
39
indexStream = d->createOutput( buf );
40
_CLDELETE_CaARRAY( buf );
42
CND_CONDITION(indexStream != NULL,"indexStream is NULL");
45
FieldsWriter::~FieldsWriter(){
48
//Post - Instance has been destroyed
53
void FieldsWriter::close() {
54
//Func - Closes all streams and frees all resources
56
//Post - All streams have been closed all resources have been freed
58
//Check if fieldsStream is valid
61
fieldsStream->close();
62
_CLDELETE( fieldsStream );
65
//Check if indexStream is valid
69
_CLDELETE( indexStream );
73
void FieldsWriter::addDocument(Document* doc) {
74
//Func - Adds a document
75
//Pre - doc contains a valid reference to a Document
76
// indexStream != NULL
77
// fieldsStream != NULL
78
//Post - The document doc has been added
80
CND_PRECONDITION(indexStream != NULL,"indexStream is NULL");
81
CND_PRECONDITION(fieldsStream != NULL,"fieldsStream is NULL");
83
indexStream->writeLong(fieldsStream->getFilePointer());
85
int32_t storedCount = 0;
86
DocumentFieldEnumeration* fields = doc->fields();
87
while (fields->hasMoreElements()) {
88
Field* field = fields->nextElement();
89
if (field->isStored())
93
fieldsStream->writeVInt(storedCount);
95
fields = doc->fields();
96
while (fields->hasMoreElements()) {
97
Field* field = fields->nextElement();
98
if (field->isStored()) {
99
fieldsStream->writeVInt(fieldInfos->fieldNumber(field->name()));
102
if (field->isTokenized())
103
bits |= FieldsWriter::FIELD_IS_TOKENIZED;
104
if (field->isBinary())
105
bits |= FieldsWriter::FIELD_IS_BINARY;
106
if (field->isCompressed())
107
bits |= FieldsWriter::FIELD_IS_COMPRESSED;
109
fieldsStream->writeByte(bits);
111
if ( field->isCompressed() ){
112
_CLTHROWA(CL_ERR_Runtime, "CLucene does not directly support compressed fields. Write a compressed byte array instead");
115
//FEATURE: this problem in Java Lucene too, if using Reader, data is not stored.
116
//todo: this is a logic bug...
117
//if the field is stored, and indexed, and is using a reader the field wont get indexed
119
//if we could write zero prefixed vints (therefore static length), then we could
120
//write a reader directly to the field indexoutput and then go back and write the data
121
//length. however this is not supported in lucene yet...
122
//if this is ever implemented, then it would make sense to also be able to combine the
123
//FieldsWriter and DocumentWriter::invertDocument process, and use a streamfilter to
124
//write the field data while the documentwrite analyses the document! how cool would
125
//that be! it would cut out all these buffers!!!
128
// compression is disabled for the current field
129
if (field->isBinary()) {
130
//todo: since we currently don't support static length vints, we have to
131
//read the entire stream into memory first.... ugly!
132
jstreams::StreamBase<char>* stream = field->streamValue();
134
//how do wemake sure we read the entire index in now???
135
//todo: we need to have a max amount, and guarantee its all in or throw an error...
136
int32_t rl = stream->read(sd,10000000,0);
139
fieldsStream->writeVInt(0); //todo: could we detect this earlier and not actually write the field??
141
//todo: if this int could be written with a constant length, then
142
//the stream could be read and written a bit at a time then the length
143
//is re-written at the end.
144
fieldsStream->writeVInt(rl);
145
fieldsStream->writeBytes((uint8_t*)sd, rl);
148
}else if ( field->stringValue() == NULL ){ //we must be using readerValue
149
CND_PRECONDITION(!field->isIndexed(), "Cannot store reader if it is indexed too")
150
Reader* r = field->readerValue();
152
//read the entire string
154
int64_t rl = r->read(rv, LUCENE_INT32_MAX_SHOULDBE);
155
if ( rl > LUCENE_INT32_MAX_SHOULDBE )
156
_CLTHROWA(CL_ERR_Runtime,"Field length too long");
160
fieldsStream->writeString( rv, (int32_t)rl);
161
}else if ( field->stringValue() != NULL ){
162
fieldsStream->writeString(field->stringValue(),_tcslen(field->stringValue()));
164
_CLTHROWA(CL_ERR_Runtime, "No values are set for the field");