~ubuntu-branches/ubuntu/raring/clucene-core/raring-proposed

« back to all changes in this revision

Viewing changes to src/core/CLucene/search/TermScorer.cpp

  • Committer: Package Import Robot
  • Author(s): Fathi Boudra
  • Date: 2012-08-11 09:33:38 UTC
  • mfrom: (1.1.5)
  • Revision ID: package-import@ubuntu.com-20120811093338-fgrx41ftqew3qt6a
Tags: 2.3.3.4-1
* New upstream release (Closes: #661703).
* Convert package to multiarch.
* Drop obsolete patches:
  - 01_add_missing_include_bug505667.diff
  - 02_posixness_fix_bug530308.diff
* Add patches:
  - Fixing_ZLIB_configuration_in_shared_CMakeLists.patch
  - Fix-pkgconfig-file-by-adding-clucene-shared-library.patch
  - Install-contribs-lib.patch
  - multiarch.patch
* Update debian/compat: bump to 8.
* Update debian/control:
  - update build dependencies (add cmake, libboost-dev and libz-dev).
  - bump Standards-Version to 3.9.3.
  - rename packages due to ABI bump: libclucene0ldbl -> libclucene-core1.
  - add libclucene-contribs1 package.
* Update debian/rules:
  - rewrite to use CMake.
  - add multiarch support.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*------------------------------------------------------------------------------
 
2
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
 
3
 
4
* Distributable under the terms of either the Apache License (Version 2.0) or 
 
5
* the GNU Lesser General Public License, as specified in the COPYING file.
 
6
------------------------------------------------------------------------------*/
 
7
#include "CLucene/_ApiHeader.h"
 
8
#include "_TermScorer.h"
 
9
#include "SearchHeader.h"
 
10
#include "Explanation.h"
 
11
#include "CLucene/index/Term.h"
 
12
#include "CLucene/index/Terms.h"
 
13
#include "TermQuery.h"
 
14
#include "Similarity.h"
 
15
#include "Explanation.h"
 
16
 
 
17
CL_NS_USE(index)
 
18
CL_NS_DEF(search)
 
19
 
 
20
        TermScorer::TermScorer(Weight* w, CL_NS(index)::TermDocs* td, 
 
21
                        Similarity* similarity,uint8_t* _norms):
 
22
            Scorer(similarity),
 
23
            termDocs(td),
 
24
            norms(_norms),
 
25
            weight(w),
 
26
            weightValue(w->getValue()),
 
27
            _doc(0),
 
28
            pointer(0),
 
29
            pointerMax(0)
 
30
        {
 
31
                memset(docs,0,32*sizeof(int32_t));
 
32
                memset(freqs,0,32*sizeof(int32_t));
 
33
 
 
34
                for (int32_t i = 0; i < LUCENE_SCORE_CACHE_SIZE; i++)
 
35
                        scoreCache[i] = getSimilarity()->tf(i) * weightValue;
 
36
        }
 
37
 
 
38
        TermScorer::~TermScorer(){
 
39
                _CLLDELETE(termDocs);
 
40
        }
 
41
  bool TermScorer::next(){
 
42
    pointer++;
 
43
    if (pointer >= pointerMax) {
 
44
      pointerMax = termDocs->read(docs, freqs, 32);    // refill buffer
 
45
      if (pointerMax != 0) {
 
46
        pointer = 0;
 
47
      } else {
 
48
        termDocs->close();                        // close stream
 
49
        _doc = LUCENE_INT32_MAX_SHOULDBE;                 // set to sentinel value
 
50
        return false;
 
51
      }
 
52
    } 
 
53
    _doc = docs[pointer];
 
54
    return true;
 
55
  }
 
56
 
 
57
  bool TermScorer::skipTo(int32_t target) {
 
58
    // first scan in cache
 
59
    for (pointer++; pointer < pointerMax; pointer++) {
 
60
      if (docs[pointer] >= target) {
 
61
        _doc = docs[pointer];
 
62
        return true;
 
63
      }
 
64
    }
 
65
 
 
66
    // not found in cache, seek underlying stream
 
67
    bool result = termDocs->skipTo(target);
 
68
      if (result) {
 
69
         pointerMax = 1;
 
70
         pointer = 0;
 
71
         docs[pointer] = _doc = termDocs->doc();
 
72
         freqs[pointer] = termDocs->freq();
 
73
      } else {
 
74
         _doc = LUCENE_INT32_MAX_SHOULDBE;
 
75
      }
 
76
      return result;
 
77
  }
 
78
 
 
79
  Explanation* TermScorer::explain(int32_t doc) {
 
80
    TermQuery* query = (TermQuery*)weight->getQuery();
 
81
        Explanation* tfExplanation = _CLNEW Explanation();
 
82
    int32_t tf = 0;
 
83
    while (pointer < pointerMax) {
 
84
      if (docs[pointer] == doc)
 
85
        tf = freqs[pointer];
 
86
      pointer++;
 
87
    }
 
88
    if (tf == 0) {
 
89
      if (termDocs->skipTo(doc)) {
 
90
        if (termDocs->doc() == doc) {
 
91
          tf = termDocs->freq();
 
92
        }
 
93
      }
 
94
    }
 
95
    termDocs->close();
 
96
    tfExplanation->setValue(getSimilarity()->tf(tf));
 
97
 
 
98
    TCHAR buf[LUCENE_SEARCH_EXPLANATION_DESC_LEN+1];
 
99
        TCHAR* termToString = query->getTerm(false)->toString();
 
100
        _sntprintf(buf,LUCENE_SEARCH_EXPLANATION_DESC_LEN,_T("tf(termFreq(%s)=%d)"), termToString, tf);
 
101
    _CLDELETE_LCARRAY(termToString);
 
102
    tfExplanation->setDescription(buf);
 
103
        return tfExplanation;
 
104
  }
 
105
 
 
106
  TCHAR* TermScorer::toString() { 
 
107
     TCHAR* wb = weight->toString();
 
108
     int32_t rl = _tcslen(wb) + 9; //9=_tcslen("scorer("  ")") + 1
 
109
     TCHAR* ret = _CL_NEWARRAY(TCHAR,rl);
 
110
         _sntprintf(ret,rl,_T("scorer(%s)"), wb);
 
111
     _CLDELETE_LCARRAY(wb);
 
112
     return ret;
 
113
  }
 
114
 
 
115
  float_t TermScorer::score(){
 
116
         int32_t f = freqs[pointer];
 
117
    float_t raw =                                   // compute tf(f)*weight
 
118
      f < LUCENE_SCORE_CACHE_SIZE                         // check cache
 
119
      ? scoreCache[f]                             // cache hit
 
120
      : getSimilarity()->tf(f) * weightValue;        // cache miss
 
121
 
 
122
      return raw * Similarity::decodeNorm(norms[_doc]); // normalize for field
 
123
  }
 
124
 
 
125
  int32_t TermScorer::doc() const { return _doc; }
 
126
        
 
127
CL_NS_END