7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
16 * @package Zend_Search_Lucene
18 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
19 * @license http://framework.zend.com/license/new-bsd New BSD License
20 * @version $Id: Boolean.php 16541 2009-07-07 06:59:03Z bkarwin $
24 /** Zend_Search_Lucene_Search_Query */
25 require_once 'Zend/Search/Lucene/Search/Query.php';
27 /** Zend_Search_Lucene_Search_Weight_Boolean */
28 require_once 'Zend/Search/Lucene/Search/Weight/Boolean.php';
33 * @package Zend_Search_Lucene
35 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
36 * @license http://framework.zend.com/license/new-bsd New BSD License
38 class Zend_Search_Lucene_Search_Query_Boolean extends Zend_Search_Lucene_Search_Query
43 * Array of Zend_Search_Lucene_Search_Query
47 private $_subqueries = array();
51 * If true then subquery is required.
52 * If false then subquery is prohibited.
53 * If null then subquery is neither prohibited, nor required
55 * If array is null then all subqueries are required
59 private $_signs = array();
66 private $_resVector = null;
69 * A score factor based on the fraction of all query subqueries
70 * that a document contains.
71 * float for conjunction queries
72 * array of float for non conjunction queries
76 private $_coord = null;
80 * Class constructor. Create a new Boolean query object.
82 * if $signs array is omitted then all subqueries are required
83 * it differs from addSubquery() behavior, but should never be used
85 * @param array $subqueries Array of Zend_Search_Search_Query objects
86 * @param array $signs Array of signs. Sign is boolean|null.
89 public function __construct($subqueries = null, $signs = null)
91 if (is_array($subqueries)) {
92 $this->_subqueries = $subqueries;
95 // Check if all subqueries are required
96 if (is_array($signs)) {
97 foreach ($signs as $sign ) {
99 $this->_signs = $signs;
109 * Add a $subquery (Zend_Search_Lucene_Search_Query) to this query.
111 * The sign is specified as:
112 * TRUE - subquery is required
113 * FALSE - subquery is prohibited
114 * NULL - subquery is neither prohibited, nor required
116 * @param Zend_Search_Lucene_Search_Query $subquery
117 * @param boolean|null $sign
120 public function addSubquery(Zend_Search_Lucene_Search_Query $subquery, $sign=null) {
121 if ($sign !== true || $this->_signs !== null) { // Skip, if all subqueries are required
122 if ($this->_signs === null) { // Check, If all previous subqueries are required
123 $this->_signs = array();
124 foreach ($this->_subqueries as $prevSubquery) {
125 $this->_signs[] = true;
128 $this->_signs[] = $sign;
131 $this->_subqueries[] = $subquery;
135 * Re-write queries into primitive queries
137 * @param Zend_Search_Lucene_Interface $index
138 * @return Zend_Search_Lucene_Search_Query
140 public function rewrite(Zend_Search_Lucene_Interface $index)
142 $query = new Zend_Search_Lucene_Search_Query_Boolean();
143 $query->setBoost($this->getBoost());
145 foreach ($this->_subqueries as $subqueryId => $subquery) {
146 $query->addSubquery($subquery->rewrite($index),
147 ($this->_signs === null)? true : $this->_signs[$subqueryId]);
154 * Optimize query in the context of specified index
156 * @param Zend_Search_Lucene_Interface $index
157 * @return Zend_Search_Lucene_Search_Query
159 public function optimize(Zend_Search_Lucene_Interface $index)
161 $subqueries = array();
164 // Optimize all subqueries
165 foreach ($this->_subqueries as $id => $subquery) {
166 $subqueries[] = $subquery->optimize($index);
167 $signs[] = ($this->_signs === null)? true : $this->_signs[$id];
170 // Remove insignificant subqueries
171 foreach ($subqueries as $id => $subquery) {
172 if ($subquery instanceof Zend_Search_Lucene_Search_Query_Insignificant) {
173 // Insignificant subquery has to be removed anyway
174 unset($subqueries[$id]);
178 if (count($subqueries) == 0) {
179 // Boolean query doesn't has non-insignificant subqueries
180 return new Zend_Search_Lucene_Search_Query_Insignificant();
182 // Check if all non-insignificant subqueries are prohibited
183 $allProhibited = true;
184 foreach ($signs as $sign) {
185 if ($sign !== false) {
186 $allProhibited = false;
190 if ($allProhibited) {
191 return new Zend_Search_Lucene_Search_Query_Insignificant();
195 // Check for empty subqueries
196 foreach ($subqueries as $id => $subquery) {
197 if ($subquery instanceof Zend_Search_Lucene_Search_Query_Empty) {
198 if ($signs[$id] === true) {
199 // Matching is required, but is actually empty
200 return new Zend_Search_Lucene_Search_Query_Empty();
202 // Matching is optional or prohibited, but is empty
203 // Remove it from subqueries and signs list
204 unset($subqueries[$id]);
210 // Check, if reduced subqueries list is empty
211 if (count($subqueries) == 0) {
212 return new Zend_Search_Lucene_Search_Query_Empty();
215 // Check if all non-empty subqueries are prohibited
216 $allProhibited = true;
217 foreach ($signs as $sign) {
218 if ($sign !== false) {
219 $allProhibited = false;
223 if ($allProhibited) {
224 return new Zend_Search_Lucene_Search_Query_Empty();
228 // Check, if reduced subqueries list has only one entry
229 if (count($subqueries) == 1) {
230 // It's a query with only one required or optional clause
231 // (it's already checked, that it's not a prohibited clause)
233 if ($this->getBoost() == 1) {
234 return reset($subqueries);
237 $optimizedQuery = clone reset($subqueries);
238 $optimizedQuery->setBoost($optimizedQuery->getBoost()*$this->getBoost());
240 return $optimizedQuery;
244 // Prepare first candidate for optimized query
245 $optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs);
246 $optimizedQuery->setBoost($this->getBoost());
251 $boostFactors = array();
253 // Try to decompose term and multi-term subqueries
254 foreach ($subqueries as $id => $subquery) {
255 if ($subquery instanceof Zend_Search_Lucene_Search_Query_Term) {
256 $terms[] = $subquery->getTerm();
257 $tsigns[] = $signs[$id];
258 $boostFactors[] = $subquery->getBoost();
260 // remove subquery from a subqueries list
261 unset($subqueries[$id]);
263 } else if ($subquery instanceof Zend_Search_Lucene_Search_Query_MultiTerm) {
264 $subTerms = $subquery->getTerms();
265 $subSigns = $subquery->getSigns();
267 if ($signs[$id] === true) {
268 // It's a required multi-term subquery.
269 // Something like '... +(+term1 -term2 term3 ...) ...'
271 // Multi-term required subquery can be decomposed only if it contains
272 // required terms and doesn't contain prohibited terms:
273 // ... +(+term1 term2 ...) ... => ... +term1 term2 ...
276 $hasRequired = false;
277 $hasProhibited = false;
278 if ($subSigns === null) {
279 // All subterms are required
282 foreach ($subSigns as $sign) {
283 if ($sign === true) {
285 } else if ($sign === false) {
286 $hasProhibited = true;
291 // Continue if subquery has prohibited terms or doesn't have required terms
292 if ($hasProhibited || !$hasRequired) {
296 foreach ($subTerms as $termId => $term) {
298 $tsigns[] = ($subSigns === null)? true : $subSigns[$termId];
299 $boostFactors[] = $subquery->getBoost();
302 // remove subquery from a subqueries list
303 unset($subqueries[$id]);
306 } else { // $signs[$id] === null || $signs[$id] === false
307 // It's an optional or prohibited multi-term subquery.
308 // Something like '... (+term1 -term2 term3 ...) ...'
310 // something like '... -(+term1 -term2 term3 ...) ...'
312 // Multi-term optional and required subqueries can be decomposed
313 // only if all terms are optional.
315 // Check if all terms are optional.
316 $onlyOptional = true;
317 if ($subSigns === null) {
318 // All subterms are required
319 $onlyOptional = false;
321 foreach ($subSigns as $sign) {
322 if ($sign !== null) {
323 $onlyOptional = false;
329 // Continue if non-optional terms are presented in this multi-term subquery
330 if (!$onlyOptional) {
334 foreach ($subTerms as $termId => $term) {
336 $tsigns[] = ($signs[$id] === null)? null /* optional */ :
337 false /* prohibited */;
338 $boostFactors[] = $subquery->getBoost();
341 // remove subquery from a subqueries list
342 unset($subqueries[$id]);
349 // Check, if there are no decomposed subqueries
350 if (count($terms) == 0 ) {
351 // return prepared candidate
352 return $optimizedQuery;
356 // Check, if all subqueries have been decomposed and all terms has the same boost factor
357 if (count($subqueries) == 0 && count(array_unique($boostFactors)) == 1) {
358 $optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns);
359 $optimizedQuery->setBoost(reset($boostFactors)*$this->getBoost());
361 return $optimizedQuery;
365 // This boolean query can't be transformed to Term/MultiTerm query and still contains
366 // several subqueries
368 // Separate prohibited terms
369 $prohibitedTerms = array();
370 foreach ($terms as $id => $term) {
371 if ($tsigns[$id] === false) {
372 $prohibitedTerms[] = $term;
376 unset($boostFactors[$id]);
380 if (count($terms) == 1) {
381 $clause = new Zend_Search_Lucene_Search_Query_Term(reset($terms));
382 $clause->setBoost(reset($boostFactors));
384 $subqueries[] = $clause;
385 $signs[] = reset($tsigns);
389 } else if (count($terms) > 1 && count(array_unique($boostFactors)) == 1) {
390 $clause = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns);
391 $clause->setBoost(reset($boostFactors));
393 $subqueries[] = $clause;
394 // Clause sign is 'required' if clause contains required terms. 'Optional' otherwise.
395 $signs[] = (in_array(true, $tsigns))? true : null;
401 if (count($prohibitedTerms) == 1) {
402 // (boost factors are not significant for prohibited clauses)
403 $subqueries[] = new Zend_Search_Lucene_Search_Query_Term(reset($prohibitedTerms));
406 // Clear prohibited terms list
407 $prohibitedTerms = array();
408 } else if (count($prohibitedTerms) > 1) {
409 // prepare signs array
410 $prohibitedSigns = array();
411 foreach ($prohibitedTerms as $id => $term) {
412 // all prohibited term are grouped as optional into multi-term query
413 $prohibitedSigns[$id] = null;
416 // (boost factors are not significant for prohibited clauses)
417 $subqueries[] = new Zend_Search_Lucene_Search_Query_MultiTerm($prohibitedTerms, $prohibitedSigns);
418 // Clause sign is 'prohibited'
422 $prohibitedTerms = array();
425 /** @todo Group terms with the same boost factors together */
427 // Check, that all terms are processed
428 // Replace candidate for optimized query
429 if (count($terms) == 0 && count($prohibitedTerms) == 0) {
430 $optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs);
431 $optimizedQuery->setBoost($this->getBoost());
434 return $optimizedQuery;
442 public function getSubqueries()
444 return $this->_subqueries;
449 * Return subqueries signs
453 public function getSigns()
455 return $this->_signs;
460 * Constructs an appropriate Weight implementation for this query.
462 * @param Zend_Search_Lucene_Interface $reader
463 * @return Zend_Search_Lucene_Search_Weight
465 public function createWeight(Zend_Search_Lucene_Interface $reader)
467 $this->_weight = new Zend_Search_Lucene_Search_Weight_Boolean($this, $reader);
468 return $this->_weight;
473 * Calculate result vector for Conjunction query
474 * (like '<subquery1> AND <subquery2> AND <subquery3>')
476 private function _calculateConjunctionResult()
478 $this->_resVector = null;
480 if (count($this->_subqueries) == 0) {
481 $this->_resVector = array();
484 $resVectors = array();
485 $resVectorsSizes = array();
486 $resVectorsIds = array(); // is used to prevent arrays comparison
487 foreach ($this->_subqueries as $subqueryId => $subquery) {
488 $resVectors[] = $subquery->matchedDocs();
489 $resVectorsSizes[] = count(end($resVectors));
490 $resVectorsIds[] = $subqueryId;
492 // sort resvectors in order of subquery cardinality increasing
493 array_multisort($resVectorsSizes, SORT_ASC, SORT_NUMERIC,
494 $resVectorsIds, SORT_ASC, SORT_NUMERIC,
497 foreach ($resVectors as $nextResVector) {
498 if($this->_resVector === null) {
499 $this->_resVector = $nextResVector;
501 //$this->_resVector = array_intersect_key($this->_resVector, $nextResVector);
504 * This code is used as workaround for array_intersect_key() slowness problem.
506 $updatedVector = array();
507 foreach ($this->_resVector as $id => $value) {
508 if (isset($nextResVector[$id])) {
509 $updatedVector[$id] = $value;
512 $this->_resVector = $updatedVector;
515 if (count($this->_resVector) == 0) {
516 // Empty result set, we don't need to check other terms
521 // ksort($this->_resVector, SORT_NUMERIC);
522 // Used algorithm doesn't change elements order
527 * Calculate result vector for non Conjunction query
528 * (like '<subquery1> AND <subquery2> AND NOT <subquery3> OR <subquery4>')
530 private function _calculateNonConjunctionResult()
532 $requiredVectors = array();
533 $requiredVectorsSizes = array();
534 $requiredVectorsIds = array(); // is used to prevent arrays comparison
538 foreach ($this->_subqueries as $subqueryId => $subquery) {
539 if ($this->_signs[$subqueryId] === true) {
541 $requiredVectors[] = $subquery->matchedDocs();
542 $requiredVectorsSizes[] = count(end($requiredVectors));
543 $requiredVectorsIds[] = $subqueryId;
544 } elseif ($this->_signs[$subqueryId] === false) {
546 // Do nothing. matchedDocs() may include non-matching id's
547 // Calculating prohibited vector may take significant time, but do not affect the result
550 // neither required, nor prohibited
552 $optional += $subquery->matchedDocs();
556 // sort resvectors in order of subquery cardinality increasing
557 array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC,
558 $requiredVectorsIds, SORT_ASC, SORT_NUMERIC,
562 foreach ($requiredVectors as $nextResVector) {
563 if($required === null) {
564 $required = $nextResVector;
566 //$required = array_intersect_key($required, $nextResVector);
569 * This code is used as workaround for array_intersect_key() slowness problem.
571 $updatedVector = array();
572 foreach ($required as $id => $value) {
573 if (isset($nextResVector[$id])) {
574 $updatedVector[$id] = $value;
577 $required = $updatedVector;
580 if (count($required) == 0) {
581 // Empty result set, we don't need to check other terms
587 if ($required !== null) {
588 $this->_resVector = &$required;
590 $this->_resVector = &$optional;
593 ksort($this->_resVector, SORT_NUMERIC);
598 * Score calculator for conjunction queries (all subqueries are required)
600 * @param integer $docId
601 * @param Zend_Search_Lucene_Interface $reader
604 public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
606 if ($this->_coord === null) {
607 $this->_coord = $reader->getSimilarity()->coord(count($this->_subqueries),
608 count($this->_subqueries) );
613 foreach ($this->_subqueries as $subquery) {
614 $subscore = $subquery->score($docId, $reader);
616 if ($subscore == 0) {
620 $score += $subquery->score($docId, $reader) * $this->_coord;
623 return $score * $this->_coord * $this->getBoost();
628 * Score calculator for non conjunction queries (not all subqueries are required)
630 * @param integer $docId
631 * @param Zend_Search_Lucene_Interface $reader
634 public function _nonConjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
636 if ($this->_coord === null) {
637 $this->_coord = array();
640 foreach ($this->_signs as $sign) {
641 if ($sign !== false /* not prohibited */) {
646 for ($count = 0; $count <= $maxCoord; $count++) {
647 $this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
652 $matchedSubqueries = 0;
653 foreach ($this->_subqueries as $subqueryId => $subquery) {
654 $subscore = $subquery->score($docId, $reader);
657 if ($this->_signs[$subqueryId] === false && $subscore != 0) {
661 // is required, but doen't match
662 if ($this->_signs[$subqueryId] === true && $subscore == 0) {
666 if ($subscore != 0) {
667 $matchedSubqueries++;
672 return $score * $this->_coord[$matchedSubqueries] * $this->getBoost();
676 * Execute query in context of index reader
677 * It also initializes necessary internal structures
679 * @param Zend_Search_Lucene_Interface $reader
680 * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
682 public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
684 // Initialize weight if it's not done yet
685 $this->_initWeight($reader);
687 if ($docsFilter === null) {
688 // Create local documents filter if it's not provided by upper query
689 $docsFilter = new Zend_Search_Lucene_Index_DocsFilter();
692 foreach ($this->_subqueries as $subqueryId => $subquery) {
693 if ($this->_signs == null || $this->_signs[$subqueryId] === true) {
694 // Subquery is required
695 $subquery->execute($reader, $docsFilter);
697 $subquery->execute($reader);
701 if ($this->_signs === null) {
702 $this->_calculateConjunctionResult();
704 $this->_calculateNonConjunctionResult();
711 * Get document ids likely matching the query
713 * It's an array with document ids as keys (performance considerations)
717 public function matchedDocs()
719 return $this->_resVector;
723 * Score specified document
725 * @param integer $docId
726 * @param Zend_Search_Lucene_Interface $reader
729 public function score($docId, Zend_Search_Lucene_Interface $reader)
731 if (isset($this->_resVector[$docId])) {
732 if ($this->_signs === null) {
733 return $this->_conjunctionScore($docId, $reader);
735 return $this->_nonConjunctionScore($docId, $reader);
747 public function getQueryTerms()
751 foreach ($this->_subqueries as $id => $subquery) {
752 if ($this->_signs === null || $this->_signs[$id] !== false) {
753 $terms = array_merge($terms, $subquery->getQueryTerms());
761 * Query specific matches highlighting
763 * @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
765 protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
767 foreach ($this->_subqueries as $id => $subquery) {
768 if ($this->_signs === null || $this->_signs[$id] !== false) {
769 $subquery->_highlightMatches($highlighter);
779 public function __toString()
781 // It's used only for query visualisation, so we don't care about characters escaping
785 foreach ($this->_subqueries as $id => $subquery) {
790 if ($this->_signs === null || $this->_signs[$id] === true) {
792 } else if ($this->_signs[$id] === false) {
796 $query .= '(' . $subquery->__toString() . ')';
799 if ($this->getBoost() != 1) {
800 $query = '(' . $query . ')^' . round($this->getBoost(), 4);