7
* This source file is subject to the new BSD license that is bundled
8
* with this package in the file LICENSE.txt.
9
* It is also available through the world-wide-web at this URL:
10
* http://framework.zend.com/license/new-bsd
11
* If you did not receive a copy of the license and are unable to
12
* obtain it through the world-wide-web, please send an email
13
* to license@zend.com so we can send you a copy immediately.
16
* @package Zend_Search_Lucene
18
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
19
* @license http://framework.zend.com/license/new-bsd New BSD License
23
/** Zend_Search_Lucene_Exception */
24
require_once 'Zend/Search/Lucene/Exception.php';
26
/** Zend_Search_Lucene_Analysis_Analyzer */
27
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
29
/** Zend_Search_Lucene_Index_SegmentWriter */
30
require_once 'Zend/Search/Lucene/Index/SegmentWriter.php';
35
* @package Zend_Search_Lucene
37
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
38
* @license http://framework.zend.com/license/new-bsd New BSD License
40
class Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter extends Zend_Search_Lucene_Index_SegmentWriter
44
* Array of the Zend_Search_Lucene_Index_Term objects
45
* Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
49
protected $_termDictionary;
52
* Documents, which contain the term
61
* @param Zend_Search_Lucene_Storage_Directory $directory
64
public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name)
66
parent::__construct($directory, $name);
68
$this->_termDocs = array();
69
$this->_termDictionary = array();
74
* Adds a document to this segment.
76
* @param Zend_Search_Lucene_Document $document
77
* @throws Zend_Search_Lucene_Exception
79
public function addDocument(Zend_Search_Lucene_Document $document)
81
$storedFields = array();
83
$similarity = Zend_Search_Lucene_Search_Similarity::getDefault();
85
foreach ($document->getFieldNames() as $fieldName) {
86
$field = $document->getField($fieldName);
87
$this->addField($field);
89
if ($field->storeTermVector) {
91
* @todo term vector storing support
93
throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.');
96
if ($field->isIndexed) {
97
if ($field->isTokenized) {
98
$analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
99
$analyzer->setInput($field->value, $field->encoding);
103
while (($token = $analyzer->nextToken()) !== null) {
106
$term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name);
107
$termKey = $term->key();
109
if (!isset($this->_termDictionary[$termKey])) {
111
$this->_termDictionary[$termKey] = $term;
112
$this->_termDocs[$termKey] = array();
113
$this->_termDocs[$termKey][$this->_docCount] = array();
114
} else if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
115
// Existing term, but new term entry
116
$this->_termDocs[$termKey][$this->_docCount] = array();
118
$position += $token->getPositionIncrement();
119
$this->_termDocs[$termKey][$this->_docCount][] = $position;
122
$docNorms[$field->name] = chr($similarity->encodeNorm( $similarity->lengthNorm($field->name,
127
$term = new Zend_Search_Lucene_Index_Term($field->getUtf8Value(), $field->name);
128
$termKey = $term->key();
130
if (!isset($this->_termDictionary[$termKey])) {
132
$this->_termDictionary[$termKey] = $term;
133
$this->_termDocs[$termKey] = array();
134
$this->_termDocs[$termKey][$this->_docCount] = array();
135
} else if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
136
// Existing term, but new term entry
137
$this->_termDocs[$termKey][$this->_docCount] = array();
139
$this->_termDocs[$termKey][$this->_docCount][] = 0; // position
141
$docNorms[$field->name] = chr($similarity->encodeNorm( $similarity->lengthNorm($field->name, 1)*
147
if ($field->isStored) {
148
$storedFields[] = $field;
153
foreach ($this->_fields as $fieldName => $field) {
154
if (!$field->isIndexed) {
158
if (!isset($this->_norms[$fieldName])) {
159
$this->_norms[$fieldName] = str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )),
163
if (isset($docNorms[$fieldName])){
164
$this->_norms[$fieldName] .= $docNorms[$fieldName];
166
$this->_norms[$fieldName] .= chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) ));
170
$this->addStoredFields($storedFields);
175
* Dump Term Dictionary (.tis) and Term Dictionary Index (.tii) segment files
177
protected function _dumpDictionary()
179
ksort($this->_termDictionary, SORT_STRING);
181
$this->initializeDictionaryFiles();
183
foreach ($this->_termDictionary as $termId => $term) {
184
$this->addTerm($term, $this->_termDocs[$termId]);
187
$this->closeDictionaryFiles();
192
* Close segment, write it to disk and return segment info
194
* @return Zend_Search_Lucene_Index_SegmentInfo
196
public function close()
198
if ($this->_docCount == 0) {
203
$this->_dumpDictionary();
205
$this->_generateCFS();
207
return new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,