7
* This source file is subject to the new BSD license that is bundled
8
* with this package in the file LICENSE.txt.
9
* It is also available through the world-wide-web at this URL:
10
* http://framework.zend.com/license/new-bsd
11
* If you did not receive a copy of the license and are unable to
12
* obtain it through the world-wide-web, please send an email
13
* to license@zend.com so we can send you a copy immediately.
16
* @package Zend_Search_Lucene
18
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
19
* @license http://framework.zend.com/license/new-bsd New BSD License
23
/** Zend_Search_Lucene_Search_Query */
24
require_once 'Zend/Search/Lucene/Search/Query.php';
26
/** Zend_Search_Lucene_Search_Weight_Boolean */
27
require_once 'Zend/Search/Lucene/Search/Weight/Boolean.php';
32
* @package Zend_Search_Lucene
34
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
35
* @license http://framework.zend.com/license/new-bsd New BSD License
37
class Zend_Search_Lucene_Search_Query_Boolean extends Zend_Search_Lucene_Search_Query
42
* Array of Zend_Search_Lucene_Search_Query
46
private $_subqueries = array();
50
* If true then subquery is required.
51
* If false then subquery is prohibited.
52
* If null then subquery is neither prohibited, nor required
54
* If array is null then all subqueries are required
58
private $_signs = array();
65
private $_resVector = null;
68
* A score factor based on the fraction of all query subqueries
69
* that a document contains.
70
* float for conjunction queries
71
* array of float for non conjunction queries
75
private $_coord = null;
79
* Class constructor. Create a new Boolean query object.
81
* if $signs array is omitted then all subqueries are required
82
* it differs from addSubquery() behavior, but should never be used
84
* @param array $subqueries Array of Zend_Search_Search_Query objects
85
* @param array $signs Array of signs. Sign is boolean|null.
88
public function __construct($subqueries = null, $signs = null)
90
if (is_array($subqueries)) {
91
$this->_subqueries = $subqueries;
94
// Check if all subqueries are required
95
if (is_array($signs)) {
96
foreach ($signs as $sign ) {
98
$this->_signs = $signs;
108
* Add a $subquery (Zend_Search_Lucene_Search_Query) to this query.
110
* The sign is specified as:
111
* TRUE - subquery is required
112
* FALSE - subquery is prohibited
113
* NULL - subquery is neither prohibited, nor required
115
* @param Zend_Search_Lucene_Search_Query $subquery
116
* @param boolean|null $sign
119
public function addSubquery(Zend_Search_Lucene_Search_Query $subquery, $sign=null) {
120
if ($sign !== true || $this->_signs !== null) { // Skip, if all subqueries are required
121
if ($this->_signs === null) { // Check, If all previous subqueries are required
122
$this->_signs = array();
123
foreach ($this->_subqueries as $prevSubquery) {
124
$this->_signs[] = true;
127
$this->_signs[] = $sign;
130
$this->_subqueries[] = $subquery;
134
* Re-write queries into primitive queries
136
* @param Zend_Search_Lucene_Interface $index
137
* @return Zend_Search_Lucene_Search_Query
139
public function rewrite(Zend_Search_Lucene_Interface $index)
141
$query = new Zend_Search_Lucene_Search_Query_Boolean();
142
$query->setBoost($this->getBoost());
144
foreach ($this->_subqueries as $subqueryId => $subquery) {
145
$query->addSubquery($subquery->rewrite($index),
146
($this->_signs === null)? true : $this->_signs[$subqueryId]);
153
* Optimize query in the context of specified index
155
* @param Zend_Search_Lucene_Interface $index
156
* @return Zend_Search_Lucene_Search_Query
158
public function optimize(Zend_Search_Lucene_Interface $index)
160
$subqueries = array();
163
// Optimize all subqueries
164
foreach ($this->_subqueries as $id => $subquery) {
165
$subqueries[] = $subquery->optimize($index);
166
$signs[] = ($this->_signs === null)? true : $this->_signs[$id];
169
// Remove insignificant subqueries
170
foreach ($subqueries as $id => $subquery) {
171
if ($subquery instanceof Zend_Search_Lucene_Search_Query_Insignificant) {
172
// Insignificant subquery has to be removed anyway
173
unset($subqueries[$id]);
177
if (count($subqueries) == 0) {
178
// Boolean query doesn't has non-insignificant subqueries
179
return new Zend_Search_Lucene_Search_Query_Insignificant();
181
// Check if all non-insignificant subqueries are prohibited
182
$allProhibited = true;
183
foreach ($signs as $sign) {
184
if ($sign !== false) {
185
$allProhibited = false;
189
if ($allProhibited) {
190
return new Zend_Search_Lucene_Search_Query_Insignificant();
194
// Check for empty subqueries
195
foreach ($subqueries as $id => $subquery) {
196
if ($subquery instanceof Zend_Search_Lucene_Search_Query_Empty) {
197
if ($signs[$id] === true) {
198
// Matching is required, but is actually empty
199
return new Zend_Search_Lucene_Search_Query_Empty();
201
// Matching is optional or prohibited, but is empty
202
// Remove it from subqueries and signs list
203
unset($subqueries[$id]);
209
// Check, if reduced subqueries list is empty
210
if (count($subqueries) == 0) {
211
return new Zend_Search_Lucene_Search_Query_Empty();
214
// Check if all non-empty subqueries are prohibited
215
$allProhibited = true;
216
foreach ($signs as $sign) {
217
if ($sign !== false) {
218
$allProhibited = false;
222
if ($allProhibited) {
223
return new Zend_Search_Lucene_Search_Query_Empty();
227
// Check, if reduced subqueries list has only one entry
228
if (count($subqueries) == 1) {
229
// It's a query with only one required or optional clause
230
// (it's already checked, that it's not a prohibited clause)
232
if ($this->getBoost() == 1) {
233
return reset($subqueries);
236
$optimizedQuery = clone reset($subqueries);
237
$optimizedQuery->setBoost($optimizedQuery->getBoost()*$this->getBoost());
239
return $optimizedQuery;
243
// Prepare first candidate for optimized query
244
$optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs);
245
$optimizedQuery->setBoost($this->getBoost());
250
$boostFactors = array();
252
// Try to decompose term and multi-term subqueries
253
foreach ($subqueries as $id => $subquery) {
254
if ($subquery instanceof Zend_Search_Lucene_Search_Query_Term) {
255
$terms[] = $subquery->getTerm();
256
$tsigns[] = $signs[$id];
257
$boostFactors[] = $subquery->getBoost();
259
// remove subquery from a subqueries list
260
unset($subqueries[$id]);
262
} else if ($subquery instanceof Zend_Search_Lucene_Search_Query_MultiTerm) {
263
$subTerms = $subquery->getTerms();
264
$subSigns = $subquery->getSigns();
266
if ($signs[$id] === true) {
267
// It's a required multi-term subquery.
268
// Something like '... +(+term1 -term2 term3 ...) ...'
270
// Multi-term required subquery can be decomposed only if it contains
271
// required terms and doesn't contain prohibited terms:
272
// ... +(+term1 term2 ...) ... => ... +term1 term2 ...
275
$hasRequired = false;
276
$hasProhibited = false;
277
if ($subSigns === null) {
278
// All subterms are required
281
foreach ($subSigns as $sign) {
282
if ($sign === true) {
284
} else if ($sign === false) {
285
$hasProhibited = true;
290
// Continue if subquery has prohibited terms or doesn't have required terms
291
if ($hasProhibited || !$hasRequired) {
295
foreach ($subTerms as $termId => $term) {
297
$tsigns[] = ($subSigns === null)? true : $subSigns[$termId];
298
$boostFactors[] = $subquery->getBoost();
301
// remove subquery from a subqueries list
302
unset($subqueries[$id]);
305
} else { // $signs[$id] === null || $signs[$id] === false
306
// It's an optional or prohibited multi-term subquery.
307
// Something like '... (+term1 -term2 term3 ...) ...'
309
// something like '... -(+term1 -term2 term3 ...) ...'
311
// Multi-term optional and required subqueries can be decomposed
312
// only if all terms are optional.
314
// Check if all terms are optional.
315
$onlyOptional = true;
316
if ($subSigns === null) {
317
// All subterms are required
318
$onlyOptional = false;
320
foreach ($subSigns as $sign) {
321
if ($sign !== null) {
322
$onlyOptional = false;
328
// Continue if non-optional terms are presented in this multi-term subquery
329
if (!$onlyOptional) {
333
foreach ($subTerms as $termId => $term) {
335
$tsigns[] = ($signs[$id] === null)? null /* optional */ :
336
false /* prohibited */;
337
$boostFactors[] = $subquery->getBoost();
340
// remove subquery from a subqueries list
341
unset($subqueries[$id]);
348
// Check, if there are no decomposed subqueries
349
if (count($terms) == 0 ) {
350
// return prepared candidate
351
return $optimizedQuery;
355
// Check, if all subqueries have been decomposed and all terms has the same boost factor
356
if (count($subqueries) == 0 && count(array_unique($boostFactors)) == 1) {
357
$optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns);
358
$optimizedQuery->setBoost(reset($boostFactors)*$this->getBoost());
360
return $optimizedQuery;
364
// This boolean query can't be transformed to Term/MultiTerm query and still contains
365
// several subqueries
367
// Separate prohibited terms
368
$prohibitedTerms = array();
369
foreach ($terms as $id => $term) {
370
if ($tsigns[$id] === false) {
371
$prohibitedTerms[] = $term;
375
unset($boostFactors[$id]);
379
if (count($terms) == 1) {
380
$clause = new Zend_Search_Lucene_Search_Query_Term(reset($terms));
381
$clause->setBoost(reset($boostFactors));
383
$subqueries[] = $clause;
384
$signs[] = reset($tsigns);
388
} else if (count($terms) > 1 && count(array_unique($boostFactors)) == 1) {
389
$clause = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns);
390
$clause->setBoost(reset($boostFactors));
392
$subqueries[] = $clause;
393
// Clause sign is 'required' if clause contains required terms. 'Optional' otherwise.
394
$signs[] = (in_array(true, $tsigns))? true : null;
400
if (count($prohibitedTerms) == 1) {
401
// (boost factors are not significant for prohibited clauses)
402
$subqueries[] = new Zend_Search_Lucene_Search_Query_Term(reset($prohibitedTerms));
405
// Clear prohibited terms list
406
$prohibitedTerms = array();
407
} else if (count($prohibitedTerms) > 1) {
408
// prepare signs array
409
$prohibitedSigns = array();
410
foreach ($prohibitedTerms as $id => $term) {
411
// all prohibited term are grouped as optional into multi-term query
412
$prohibitedSigns[$id] = null;
415
// (boost factors are not significant for prohibited clauses)
416
$subqueries[] = new Zend_Search_Lucene_Search_Query_MultiTerm($prohibitedTerms, $prohibitedSigns);
417
// Clause sign is 'prohibited'
421
$prohibitedTerms = array();
424
/** @todo Group terms with the same boost factors together */
426
// Check, that all terms are processed
427
// Replace candidate for optimized query
428
if (count($terms) == 0 && count($prohibitedTerms) == 0) {
429
$optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs);
430
$optimizedQuery->setBoost($this->getBoost());
433
return $optimizedQuery;
441
public function getSubqueries()
443
return $this->_subqueries;
448
* Return subqueries signs
452
public function getSigns()
454
return $this->_signs;
459
* Constructs an appropriate Weight implementation for this query.
461
* @param Zend_Search_Lucene_Interface $reader
462
* @return Zend_Search_Lucene_Search_Weight
464
public function createWeight(Zend_Search_Lucene_Interface $reader)
466
$this->_weight = new Zend_Search_Lucene_Search_Weight_Boolean($this, $reader);
467
return $this->_weight;
472
* Calculate result vector for Conjunction query
473
* (like '<subquery1> AND <subquery2> AND <subquery3>')
475
private function _calculateConjunctionResult()
477
$this->_resVector = null;
479
if (count($this->_subqueries) == 0) {
480
$this->_resVector = array();
483
$resVectors = array();
484
$resVectorsSizes = array();
485
$resVectorsIds = array(); // is used to prevent arrays comparison
486
foreach ($this->_subqueries as $subqueryId => $subquery) {
487
$resVectors[] = $subquery->matchedDocs();
488
$resVectorsSizes[] = count(end($resVectors));
489
$resVectorsIds[] = $subqueryId;
491
// sort resvectors in order of subquery cardinality increasing
492
array_multisort($resVectorsSizes, SORT_ASC, SORT_NUMERIC,
493
$resVectorsIds, SORT_ASC, SORT_NUMERIC,
496
foreach ($resVectors as $nextResVector) {
497
if($this->_resVector === null) {
498
$this->_resVector = $nextResVector;
500
//$this->_resVector = array_intersect_key($this->_resVector, $nextResVector);
503
* This code is used as workaround for array_intersect_key() slowness problem.
505
$updatedVector = array();
506
foreach ($this->_resVector as $id => $value) {
507
if (isset($nextResVector[$id])) {
508
$updatedVector[$id] = $value;
511
$this->_resVector = $updatedVector;
514
if (count($this->_resVector) == 0) {
515
// Empty result set, we don't need to check other terms
520
// ksort($this->_resVector, SORT_NUMERIC);
521
// Used algorithm doesn't change elements order
526
* Calculate result vector for non Conjunction query
527
* (like '<subquery1> AND <subquery2> AND NOT <subquery3> OR <subquery4>')
529
private function _calculateNonConjunctionResult()
531
$requiredVectors = array();
532
$requiredVectorsSizes = array();
533
$requiredVectorsIds = array(); // is used to prevent arrays comparison
537
foreach ($this->_subqueries as $subqueryId => $subquery) {
538
if ($this->_signs[$subqueryId] === true) {
540
$requiredVectors[] = $subquery->matchedDocs();
541
$requiredVectorsSizes[] = count(end($requiredVectors));
542
$requiredVectorsIds[] = $subqueryId;
543
} elseif ($this->_signs[$subqueryId] === false) {
545
// Do nothing. matchedDocs() may include non-matching id's
546
// Calculating prohibited vector may take significant time, but do not affect the result
549
// neither required, nor prohibited
551
$optional += $subquery->matchedDocs();
555
// sort resvectors in order of subquery cardinality increasing
556
array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC,
557
$requiredVectorsIds, SORT_ASC, SORT_NUMERIC,
561
foreach ($requiredVectors as $nextResVector) {
562
if($required === null) {
563
$required = $nextResVector;
565
//$required = array_intersect_key($required, $nextResVector);
568
* This code is used as workaround for array_intersect_key() slowness problem.
570
$updatedVector = array();
571
foreach ($required as $id => $value) {
572
if (isset($nextResVector[$id])) {
573
$updatedVector[$id] = $value;
576
$required = $updatedVector;
579
if (count($required) == 0) {
580
// Empty result set, we don't need to check other terms
586
if ($required !== null) {
587
$this->_resVector = &$required;
589
$this->_resVector = &$optional;
592
ksort($this->_resVector, SORT_NUMERIC);
597
* Score calculator for conjunction queries (all subqueries are required)
599
* @param integer $docId
600
* @param Zend_Search_Lucene_Interface $reader
603
public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
605
if ($this->_coord === null) {
606
$this->_coord = $reader->getSimilarity()->coord(count($this->_subqueries),
607
count($this->_subqueries) );
612
foreach ($this->_subqueries as $subquery) {
613
$subscore = $subquery->score($docId, $reader);
615
if ($subscore == 0) {
619
$score += $subquery->score($docId, $reader) * $this->_coord;
622
return $score * $this->_coord * $this->getBoost();
627
* Score calculator for non conjunction queries (not all subqueries are required)
629
* @param integer $docId
630
* @param Zend_Search_Lucene_Interface $reader
633
public function _nonConjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
635
if ($this->_coord === null) {
636
$this->_coord = array();
639
foreach ($this->_signs as $sign) {
640
if ($sign !== false /* not prohibited */) {
645
for ($count = 0; $count <= $maxCoord; $count++) {
646
$this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
651
$matchedSubqueries = 0;
652
foreach ($this->_subqueries as $subqueryId => $subquery) {
653
$subscore = $subquery->score($docId, $reader);
656
if ($this->_signs[$subqueryId] === false && $subscore != 0) {
660
// is required, but doen't match
661
if ($this->_signs[$subqueryId] === true && $subscore == 0) {
665
if ($subscore != 0) {
666
$matchedSubqueries++;
671
return $score * $this->_coord[$matchedSubqueries] * $this->getBoost();
675
* Execute query in context of index reader
676
* It also initializes necessary internal structures
678
* @param Zend_Search_Lucene_Interface $reader
680
public function execute(Zend_Search_Lucene_Interface $reader)
682
// Initialize weight if it's not done yet
683
$this->_initWeight($reader);
685
foreach ($this->_subqueries as $subquery) {
686
$subquery->execute($reader);
689
if ($this->_signs === null) {
690
$this->_calculateConjunctionResult();
692
$this->_calculateNonConjunctionResult();
699
* Get document ids likely matching the query
701
* It's an array with document ids as keys (performance considerations)
705
public function matchedDocs()
707
return $this->_resVector;
711
* Score specified document
713
* @param integer $docId
714
* @param Zend_Search_Lucene_Interface $reader
717
public function score($docId, Zend_Search_Lucene_Interface $reader)
719
if (isset($this->_resVector[$docId])) {
720
if ($this->_signs === null) {
721
return $this->_conjunctionScore($docId, $reader);
723
return $this->_nonConjunctionScore($docId, $reader);
735
public function getQueryTerms()
739
foreach ($this->_subqueries as $id => $subquery) {
740
if ($this->_signs === null || $this->_signs[$id] !== false) {
741
$terms = array_merge($terms, $subquery->getQueryTerms());
749
* Highlight query terms
751
* @param integer &$colorIndex
752
* @param Zend_Search_Lucene_Document_Html $doc
754
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
756
foreach ($this->_subqueries as $id => $subquery) {
757
if ($this->_signs === null || $this->_signs[$id] !== false) {
758
$subquery->highlightMatchesDOM($doc, $colorIndex);
768
public function __toString()
770
// It's used only for query visualisation, so we don't care about characters escaping
774
foreach ($this->_subqueries as $id => $subquery) {
779
if ($this->_signs === null || $this->_signs[$id] === true) {
781
} else if ($this->_signs[$id] === false) {
785
$query .= '(' . $subquery->__toString() . ')';
787
if ($subquery->getBoost() != 1) {
788
$query .= '^' . round($subquery->getBoost(), 4);