~ubuntu-branches/ubuntu/raring/clucene-core/raring-proposed

« back to all changes in this revision

Viewing changes to src/core/CLucene/index/SegmentTermPositions.cpp

  • Committer: Package Import Robot
  • Author(s): Fathi Boudra
  • Date: 2012-08-11 09:33:38 UTC
  • mfrom: (1.1.5)
  • Revision ID: package-import@ubuntu.com-20120811093338-fgrx41ftqew3qt6a
Tags: 2.3.3.4-1
* New upstream release (Closes: #661703).
* Convert package to multiarch.
* Drop obsolete patches:
  - 01_add_missing_include_bug505667.diff
  - 02_posixness_fix_bug530308.diff
* Add patches:
  - Fixing_ZLIB_configuration_in_shared_CMakeLists.patch
  - Fix-pkgconfig-file-by-adding-clucene-shared-library.patch
  - Install-contribs-lib.patch
  - multiarch.patch
* Update debian/compat: bump to 8.
* Update debian/control:
  - update build dependencies (add cmake, libboost-dev and libz-dev).
  - bump Standards-Version to 3.9.3.
  - rename packages due to ABI bump: libclucene0ldbl -> libclucene-core1.
  - add libclucene-contribs1 package.
* Update debian/rules:
  - rewrite to use CMake.
  - add multiarch support.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*------------------------------------------------------------------------------
 
2
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
 
3
 
4
* Distributable under the terms of either the Apache License (Version 2.0) or 
 
5
* the GNU Lesser General Public License, as specified in the COPYING file.
 
6
------------------------------------------------------------------------------*/
 
7
#include "CLucene/_ApiHeader.h"
 
8
#include "_SegmentHeader.h"
 
9
 
 
10
#include "Terms.h"
 
11
 
 
12
CL_NS_USE(util)
 
13
CL_NS_DEF(index)
 
14
 
 
15
SegmentTermPositions::SegmentTermPositions(const SegmentReader* _parent):
 
16
        SegmentTermDocs(_parent), proxStream(NULL)// the proxStream will be cloned lazily when nextPosition() is called for the first time
 
17
        ,lazySkipPointer(-1), lazySkipProxCount(0)
 
18
{
 
19
    CND_CONDITION(_parent != NULL, "Parent is NULL");
 
20
}
 
21
 
 
22
SegmentTermPositions::~SegmentTermPositions() {
 
23
    close();
 
24
}
 
25
 
 
26
TermDocs* SegmentTermPositions::__asTermDocs(){
 
27
    return (TermDocs*) this;
 
28
}
 
29
TermPositions* SegmentTermPositions::__asTermPositions(){
 
30
    return (TermPositions*) this;
 
31
}
 
32
 
 
33
void SegmentTermPositions::seek(const TermInfo* ti, Term* term) {
 
34
    SegmentTermDocs::seek(ti, term);
 
35
    if (ti != NULL)
 
36
        lazySkipPointer = ti->proxPointer;
 
37
    
 
38
    lazySkipProxCount = 0;
 
39
    proxCount = 0;
 
40
    payloadLength = 0;
 
41
    needToLoadPayload = false;
 
42
}
 
43
 
 
44
void SegmentTermPositions::close() {
 
45
    SegmentTermDocs::close();
 
46
    //Check if proxStream still exists
 
47
    if(proxStream){
 
48
        proxStream->close();
 
49
        _CLDELETE( proxStream );
 
50
    }
 
51
}
 
52
 
 
53
int32_t SegmentTermPositions::nextPosition() {
 
54
    // perform lazy skips if neccessary
 
55
        lazySkip();
 
56
    proxCount--;
 
57
    return position += readDeltaPosition();
 
58
}
 
59
 
 
60
int32_t SegmentTermPositions::readDeltaPosition() {
 
61
        int32_t delta = proxStream->readVInt();
 
62
        if (currentFieldStoresPayloads) {
 
63
                // if the current field stores payloads then
 
64
                // the position delta is shifted one bit to the left.
 
65
                // if the LSB is set, then we have to read the current
 
66
                // payload length
 
67
                if ((delta & 1) != 0) {
 
68
                        payloadLength = proxStream->readVInt();
 
69
                } 
 
70
                delta = (int32_t)((uint32_t)delta >> (uint32_t)1);
 
71
                needToLoadPayload = true;
 
72
        }
 
73
        return delta;
 
74
}
 
75
 
 
76
void SegmentTermPositions::skippingDoc() {
 
77
        lazySkipProxCount += _freq;
 
78
}
 
79
 
 
80
bool SegmentTermPositions::next() {
 
81
        // we remember to skip the remaining positions of the current
 
82
    // document lazily
 
83
    lazySkipProxCount += proxCount;
 
84
 
 
85
    if (SegmentTermDocs::next()) {                                // run super
 
86
        proxCount = _freq;                                // note frequency
 
87
        position = 0;                             // reset position
 
88
        return true;
 
89
    }
 
90
    return false;
 
91
}
 
92
 
 
93
int32_t SegmentTermPositions::read(int32_t* /*docs*/, int32_t* /*freqs*/, int32_t /*length*/) {
 
94
    _CLTHROWA(CL_ERR_UnsupportedOperation,"TermPositions does not support processing multiple documents in one call. Use TermDocs instead.");
 
95
}
 
96
 
 
97
void SegmentTermPositions::skipProx(const int64_t proxPointer, const int32_t _payloadLength){
 
98
    // we save the pointer, we might have to skip there lazily
 
99
    lazySkipPointer = proxPointer;
 
100
    lazySkipProxCount = 0;
 
101
    proxCount = 0;
 
102
    this->payloadLength = _payloadLength;
 
103
    needToLoadPayload = false;
 
104
}
 
105
 
 
106
void SegmentTermPositions::skipPositions(const int32_t n) {
 
107
        for ( int32_t f = n; f > 0; f-- ) {             // skip unread positions
 
108
                readDeltaPosition();
 
109
                skipPayload();
 
110
        }
 
111
}
 
112
 
 
113
void SegmentTermPositions::skipPayload() {
 
114
        if (needToLoadPayload && payloadLength > 0) {
 
115
                proxStream->seek(proxStream->getFilePointer() + payloadLength);
 
116
        }
 
117
        needToLoadPayload = false;
 
118
}
 
119
 
 
120
void SegmentTermPositions::lazySkip() {
 
121
    if (proxStream == NULL) {
 
122
      // clone lazily
 
123
      proxStream = parent->proxStream->clone();
 
124
    }
 
125
    
 
126
    // we might have to skip the current payload
 
127
    // if it was not read yet
 
128
    skipPayload();
 
129
      
 
130
    if (lazySkipPointer != -1) {
 
131
      proxStream->seek(lazySkipPointer);
 
132
      lazySkipPointer = -1;
 
133
    }
 
134
     
 
135
    if (lazySkipProxCount != 0) {
 
136
      skipPositions(lazySkipProxCount);
 
137
      lazySkipProxCount = 0;
 
138
    }
 
139
}
 
140
 
 
141
int32_t SegmentTermPositions::getPayloadLength() const { return payloadLength; }
 
142
 
 
143
uint8_t* SegmentTermPositions::getPayload(uint8_t* data) {
 
144
        if (!needToLoadPayload) {
 
145
                _CLTHROWA(CL_ERR_IO, "Payload cannot be loaded more than once for the same term position.");
 
146
        }
 
147
 
 
148
        // read payloads lazily
 
149
        uint8_t* retArray;
 
150
        // TODO: Complete length logic ( possibly using ValueArray ? )
 
151
        if (data == NULL /*|| data.length - offset < payloadLength*/) {
 
152
                // the array is too small to store the payload data,
 
153
                // so we allocate a new one
 
154
                _CLDELETE_ARRAY(data);
 
155
                retArray = _CL_NEWARRAY(uint8_t, payloadLength);
 
156
        } else {
 
157
                retArray = data;
 
158
        }
 
159
        proxStream->readBytes(retArray, payloadLength);
 
160
        needToLoadPayload = false;
 
161
        return retArray;
 
162
}
 
163
bool SegmentTermPositions::isPayloadAvailable() const { return needToLoadPayload && (payloadLength > 0); }
 
164
 
 
165
CL_NS_END