~ubuntu-branches/ubuntu/raring/clucene-core/raring-proposed

« back to all changes in this revision

Viewing changes to src/CLucene/index/FieldsWriter.cpp

  • Committer: Package Import Robot
  • Author(s): Fathi Boudra
  • Date: 2012-08-11 09:33:38 UTC
  • mfrom: (1.1.5)
  • Revision ID: package-import@ubuntu.com-20120811093338-fgrx41ftqew3qt6a
Tags: 2.3.3.4-1
* New upstream release (Closes: #661703).
* Convert package to multiarch.
* Drop obsolete patches:
  - 01_add_missing_include_bug505667.diff
  - 02_posixness_fix_bug530308.diff
* Add patches:
  - Fixing_ZLIB_configuration_in_shared_CMakeLists.patch
  - Fix-pkgconfig-file-by-adding-clucene-shared-library.patch
  - Install-contribs-lib.patch
  - multiarch.patch
* Update debian/compat: bump to 8.
* Update debian/control:
  - update build dependencies (add cmake, libboost-dev and libz-dev).
  - bump Standards-Version to 3.9.3.
  - rename packages due to ABI bump: libclucene0ldbl -> libclucene-core1.
  - add libclucene-contribs1 package.
* Update debian/rules:
  - rewrite to use CMake.
  - add multiarch support.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*------------------------------------------------------------------------------
2
 
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
3
 
4
 
* Distributable under the terms of either the Apache License (Version 2.0) or 
5
 
* the GNU Lesser General Public License, as specified in the COPYING file.
6
 
------------------------------------------------------------------------------*/
7
 
#include "CLucene/StdHeader.h"
8
 
#include "FieldsWriter.h"
9
 
 
10
 
#include "CLucene/util/VoidMap.h"
11
 
#include "CLucene/util/Reader.h"
12
 
#include "CLucene/util/Misc.h"
13
 
#include "CLucene/store/Directory.h"
14
 
#include "CLucene/store/IndexOutput.h"
15
 
#include "CLucene/document/Document.h"
16
 
#include "CLucene/document/Field.h"
17
 
#include "FieldInfos.h"
18
 
 
19
 
CL_NS_USE(store)
20
 
CL_NS_USE(util)
21
 
CL_NS_USE(document)
22
 
CL_NS_DEF(index)
23
 
        
24
 
FieldsWriter::FieldsWriter(Directory* d, const char* segment, FieldInfos* fn):
25
 
        fieldInfos(fn)
26
 
{
27
 
//Func - Constructor
28
 
//Pre  - d contains a valid reference to a directory
29
 
//       segment != NULL and contains the name of the segment
30
 
//Post - fn contains a valid reference toa a FieldInfos
31
 
 
32
 
        CND_PRECONDITION(segment != NULL,"segment is NULL");
33
 
 
34
 
        const char* buf = Misc::segmentname(segment,".fdt");
35
 
    fieldsStream = d->createOutput ( buf );
36
 
    _CLDELETE_CaARRAY( buf );
37
 
    
38
 
        buf = Misc::segmentname(segment,".fdx");
39
 
    indexStream = d->createOutput( buf );
40
 
    _CLDELETE_CaARRAY( buf );
41
 
      
42
 
        CND_CONDITION(indexStream != NULL,"indexStream is NULL");
43
 
}
44
 
 
45
 
FieldsWriter::~FieldsWriter(){
46
 
//Func - Destructor
47
 
//Pre  - true
48
 
//Post - Instance has been destroyed
49
 
 
50
 
        close();
51
 
}
52
 
 
53
 
void FieldsWriter::close() {
54
 
//Func - Closes all streams and frees all resources
55
 
//Pre  - true
56
 
//Post - All streams have been closed all resources have been freed
57
 
 
58
 
        //Check if fieldsStream is valid
59
 
        if (fieldsStream){
60
 
                //Close fieldsStream
61
 
                fieldsStream->close();
62
 
                _CLDELETE( fieldsStream );
63
 
                }
64
 
 
65
 
        //Check if indexStream is valid
66
 
        if (indexStream){
67
 
                //Close indexStream
68
 
                indexStream->close();
69
 
                _CLDELETE( indexStream );
70
 
                }
71
 
}
72
 
 
73
 
void FieldsWriter::addDocument(Document* doc) {
74
 
//Func - Adds a document
75
 
//Pre  - doc contains a valid reference to a Document
76
 
//       indexStream != NULL
77
 
//       fieldsStream != NULL
78
 
//Post - The document doc has been added
79
 
 
80
 
        CND_PRECONDITION(indexStream != NULL,"indexStream is NULL");
81
 
        CND_PRECONDITION(fieldsStream != NULL,"fieldsStream is NULL");
82
 
 
83
 
        indexStream->writeLong(fieldsStream->getFilePointer());
84
 
 
85
 
        int32_t storedCount = 0;
86
 
        DocumentFieldEnumeration* fields = doc->fields();
87
 
        while (fields->hasMoreElements()) {
88
 
                Field* field = fields->nextElement();
89
 
                if (field->isStored())
90
 
                        storedCount++;
91
 
        }
92
 
        _CLDELETE(fields);
93
 
        fieldsStream->writeVInt(storedCount);
94
 
 
95
 
        fields = doc->fields();
96
 
        while (fields->hasMoreElements()) {
97
 
                Field* field = fields->nextElement();
98
 
                if (field->isStored()) {
99
 
                        fieldsStream->writeVInt(fieldInfos->fieldNumber(field->name()));
100
 
 
101
 
                        uint8_t bits = 0;
102
 
                        if (field->isTokenized())
103
 
                                bits |= FieldsWriter::FIELD_IS_TOKENIZED;
104
 
            if (field->isBinary())
105
 
                bits |= FieldsWriter::FIELD_IS_BINARY;
106
 
            if (field->isCompressed())
107
 
                bits |= FieldsWriter::FIELD_IS_COMPRESSED;
108
 
 
109
 
                        fieldsStream->writeByte(bits);
110
 
 
111
 
                        if ( field->isCompressed() ){
112
 
                                _CLTHROWA(CL_ERR_Runtime, "CLucene does not directly support compressed fields. Write a compressed byte array instead");
113
 
                        }else{
114
 
 
115
 
                                //FEATURE: this problem in Java Lucene too, if using Reader, data is not stored.
116
 
                                //todo: this is a logic bug...
117
 
                                //if the field is stored, and indexed, and is using a reader the field wont get indexed
118
 
                                //
119
 
                                //if we could write zero prefixed vints (therefore static length), then we could
120
 
                                //write a reader directly to the field indexoutput and then go back and write the data
121
 
                                //length. however this is not supported in lucene yet...
122
 
                                //if this is ever implemented, then it would make sense to also be able to combine the
123
 
                                //FieldsWriter and DocumentWriter::invertDocument process, and use a streamfilter to
124
 
                                //write the field data while the documentwrite analyses the document! how cool would
125
 
                                //that be! it would cut out all these buffers!!!
126
 
                                
127
 
                                
128
 
                                // compression is disabled for the current field
129
 
                                if (field->isBinary()) {
130
 
                                        //todo: since we currently don't support static length vints, we have to
131
 
                                        //read the entire stream into memory first.... ugly!
132
 
                                        jstreams::StreamBase<char>* stream = field->streamValue();
133
 
                                        const char* sd;
134
 
                                        //how do wemake sure we read the entire index in now???
135
 
                                        //todo: we need to have a max amount, and guarantee its all in or throw an error...
136
 
                                        int32_t rl = stream->read(sd,10000000,0);
137
 
 
138
 
                                        if ( rl < 0 ){
139
 
                                                fieldsStream->writeVInt(0); //todo: could we detect this earlier and not actually write the field??
140
 
                                        }else{
141
 
                                                //todo: if this int could be written with a constant length, then
142
 
                                                //the stream could be read and written a bit at a time then the length
143
 
                                                //is re-written at the end.
144
 
                                                fieldsStream->writeVInt(rl);
145
 
                                                fieldsStream->writeBytes((uint8_t*)sd, rl);
146
 
                                        }
147
 
 
148
 
                                }else if ( field->stringValue() == NULL ){ //we must be using readerValue
149
 
                                        CND_PRECONDITION(!field->isIndexed(), "Cannot store reader if it is indexed too")
150
 
                                        Reader* r = field->readerValue();
151
 
        
152
 
                                        //read the entire string
153
 
                                        const TCHAR* rv;
154
 
                                        int64_t rl = r->read(rv, LUCENE_INT32_MAX_SHOULDBE);
155
 
                                        if ( rl > LUCENE_INT32_MAX_SHOULDBE )
156
 
                                                _CLTHROWA(CL_ERR_Runtime,"Field length too long");
157
 
                                        else if ( rl < 0 )
158
 
                                                rl = 0;
159
 
 
160
 
                                        fieldsStream->writeString( rv, (int32_t)rl);
161
 
                                }else if ( field->stringValue() != NULL ){
162
 
                                        fieldsStream->writeString(field->stringValue(),_tcslen(field->stringValue()));
163
 
                                }else
164
 
                                        _CLTHROWA(CL_ERR_Runtime, "No values are set for the field");
165
 
                        }
166
 
                }
167
 
        }
168
 
        _CLDELETE(fields);
169
 
}
170
 
 
171
 
CL_NS_END