7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
16 * @package Zend_Search_Lucene
18 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
19 * @license http://framework.zend.com/license/new-bsd New BSD License
20 * @version $Id: QueryParserContext.php 16971 2009-07-22 18:05:45Z mikaelkael $
23 /** Zend_Search_Lucene_FSM */
24 require_once 'Zend/Search/Lucene/FSM.php';
26 /** Zend_Search_Lucene_Index_Term */
27 require_once 'Zend/Search/Lucene/Index/Term.php';
29 /** Zend_Search_Lucene_Search_QueryToken */
30 require_once 'Zend/Search/Lucene/Search/QueryToken.php';
32 /** Zend_Search_Lucene_Search_Query_Term */
33 require_once 'Zend/Search/Lucene/Search/Query/Term.php';
35 /** Zend_Search_Lucene_Search_Query_MultiTerm */
36 require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
38 /** Zend_Search_Lucene_Search_Query_Boolean */
39 require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
41 /** Zend_Search_Lucene_Search_Query_Phrase */
42 require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
44 /** Zend_Search_Lucene_Search_BooleanExpressionRecognizer */
45 require_once 'Zend/Search/Lucene/Search/BooleanExpressionRecognizer.php';
47 /** Zend_Search_Lucene_Search_QueryEntry */
48 require_once 'Zend/Search/Lucene/Search/QueryEntry.php';
52 * @package Zend_Search_Lucene
54 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
55 * @license http://framework.zend.com/license/new-bsd New BSD License
57 class Zend_Search_Lucene_Search_QueryParserContext
60 * Default field for the context.
62 * null means, that term should be searched through all fields
63 * Zend_Search_Lucene_Search_Query::rewriteQuery($index) transletes such queries to several
67 private $_defaultField;
70 * Field specified for next entry
74 private $_nextEntryField = null;
77 * True means, that term is required.
78 * False means, that term is prohibited.
79 * null means, that term is neither prohibited, nor required
83 private $_nextEntrySign = null;
87 * Entries grouping mode
89 const GM_SIGNS = 0; // Signs mode: '+term1 term2 -term3 +(subquery1) -(subquery2)'
90 const GM_BOOLEAN = 1; // Boolean operators mode: 'term1 and term2 or (subquery1) and not (subquery2)'
97 private $_mode = null;
101 * Used in GM_SIGNS grouping mode
105 private $_signs = array();
109 * Each entry is a Zend_Search_Lucene_Search_QueryEntry object or
110 * boolean operator (Zend_Search_Lucene_Search_QueryToken class constant)
114 private $_entries = array();
117 * Query string encoding
125 * Context object constructor
127 * @param string $encoding
128 * @param string|null $defaultField
130 public function __construct($encoding, $defaultField = null)
132 $this->_encoding = $encoding;
133 $this->_defaultField = $defaultField;
138 * Get context default field
140 * @return string|null
142 public function getField()
144 return ($this->_nextEntryField !== null) ? $this->_nextEntryField : $this->_defaultField;
148 * Set field for next entry
150 * @param string $field
152 public function setNextEntryField($field)
154 $this->_nextEntryField = $field;
159 * Set sign for next entry
161 * @param integer $sign
162 * @throws Zend_Search_Lucene_Exception
164 public function setNextEntrySign($sign)
166 if ($this->_mode === self::GM_BOOLEAN) {
167 require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
168 throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.');
171 $this->_mode = self::GM_SIGNS;
173 if ($sign == Zend_Search_Lucene_Search_QueryToken::TT_REQUIRED) {
174 $this->_nextEntrySign = true;
175 } else if ($sign == Zend_Search_Lucene_Search_QueryToken::TT_PROHIBITED) {
176 $this->_nextEntrySign = false;
178 require_once 'Zend/Search/Lucene/Exception.php';
179 throw new Zend_Search_Lucene_Exception('Unrecognized sign type.');
185 * Add entry to a query
187 * @param Zend_Search_Lucene_Search_QueryEntry $entry
189 public function addEntry(Zend_Search_Lucene_Search_QueryEntry $entry)
191 if ($this->_mode !== self::GM_BOOLEAN) {
192 $this->_signs[] = $this->_nextEntrySign;
195 $this->_entries[] = $entry;
197 $this->_nextEntryField = null;
198 $this->_nextEntrySign = null;
203 * Process fuzzy search or proximity search modifier
205 * @throws Zend_Search_Lucene_Search_QueryParserException
207 public function processFuzzyProximityModifier($parameter = null)
209 // Check, that modifier has came just after word or phrase
210 if ($this->_nextEntryField !== null || $this->_nextEntrySign !== null) {
211 require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
212 throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.');
215 $lastEntry = array_pop($this->_entries);
217 if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) {
218 // there are no entries or last entry is boolean operator
219 require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
220 throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.');
223 $lastEntry->processFuzzyProximityModifier($parameter);
225 $this->_entries[] = $lastEntry;
229 * Set boost factor to the entry
231 * @param float $boostFactor
233 public function boost($boostFactor)
235 // Check, that modifier has came just after word or phrase
236 if ($this->_nextEntryField !== null || $this->_nextEntrySign !== null) {
237 require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
238 throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.');
241 $lastEntry = array_pop($this->_entries);
243 if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) {
244 // there are no entries or last entry is boolean operator
245 require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
246 throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.');
249 $lastEntry->boost($boostFactor);
251 $this->_entries[] = $lastEntry;
255 * Process logical operator
257 * @param integer $operator
259 public function addLogicalOperator($operator)
261 if ($this->_mode === self::GM_SIGNS) {
262 require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
263 throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.');
266 $this->_mode = self::GM_BOOLEAN;
268 $this->_entries[] = $operator;
273 * Generate 'signs style' query from the context
274 * '+term1 term2 -term3 +(<subquery1>) ...'
276 * @return Zend_Search_Lucene_Search_Query
278 public function _signStyleExpressionQuery()
280 $query = new Zend_Search_Lucene_Search_Query_Boolean();
282 if (Zend_Search_Lucene_Search_QueryParser::getDefaultOperator() == Zend_Search_Lucene_Search_QueryParser::B_AND) {
283 $defaultSign = true; // required
285 // Zend_Search_Lucene_Search_QueryParser::B_OR
286 $defaultSign = null; // optional
289 foreach ($this->_entries as $entryId => $entry) {
290 $sign = ($this->_signs[$entryId] !== null) ? $this->_signs[$entryId] : $defaultSign;
291 $query->addSubquery($entry->getQuery($this->_encoding), $sign);
299 * Generate 'boolean style' query from the context
300 * 'term1 and term2 or term3 and (<subquery1>) and not (<subquery2>)'
302 * @return Zend_Search_Lucene_Search_Query
303 * @throws Zend_Search_Lucene
305 private function _booleanExpressionQuery()
308 * We treat each level of an expression as a boolean expression in
309 * a Disjunctive Normal Form
311 * AND operator has higher precedence than OR
313 * Thus logical query is a disjunction of one or more conjunctions of
314 * one or more query entries
317 $expressionRecognizer = new Zend_Search_Lucene_Search_BooleanExpressionRecognizer();
319 require_once 'Zend/Search/Lucene/Exception.php';
321 foreach ($this->_entries as $entry) {
322 if ($entry instanceof Zend_Search_Lucene_Search_QueryEntry) {
323 $expressionRecognizer->processLiteral($entry);
326 case Zend_Search_Lucene_Search_QueryToken::TT_AND_LEXEME:
327 $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_AND_OPERATOR);
330 case Zend_Search_Lucene_Search_QueryToken::TT_OR_LEXEME:
331 $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_OR_OPERATOR);
334 case Zend_Search_Lucene_Search_QueryToken::TT_NOT_LEXEME:
335 $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_NOT_OPERATOR);
339 throw new Zend_Search_Lucene('Boolean expression error. Unknown operator type.');
344 $conjuctions = $expressionRecognizer->finishExpression();
345 } catch (Zend_Search_Exception $e) {
346 // throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error. Error message: \'' .
347 // $e->getMessage() . '\'.' );
348 // It's query syntax error message and it should be user friendly. So FSM message is omitted
349 require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
350 throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error.');
353 // Remove 'only negative' conjunctions
354 foreach ($conjuctions as $conjuctionId => $conjuction) {
355 $nonNegativeEntryFound = false;
357 foreach ($conjuction as $conjuctionEntry) {
358 if ($conjuctionEntry[1]) {
359 $nonNegativeEntryFound = true;
364 if (!$nonNegativeEntryFound) {
365 unset($conjuctions[$conjuctionId]);
370 $subqueries = array();
371 foreach ($conjuctions as $conjuction) {
372 // Check, if it's a one term conjuction
373 if (count($conjuction) == 1) {
374 $subqueries[] = $conjuction[0][0]->getQuery($this->_encoding);
376 $subquery = new Zend_Search_Lucene_Search_Query_Boolean();
378 foreach ($conjuction as $conjuctionEntry) {
379 $subquery->addSubquery($conjuctionEntry[0]->getQuery($this->_encoding), $conjuctionEntry[1]);
382 $subqueries[] = $subquery;
386 if (count($subqueries) == 0) {
387 return new Zend_Search_Lucene_Search_Query_Insignificant();
390 if (count($subqueries) == 1) {
391 return $subqueries[0];
395 $query = new Zend_Search_Lucene_Search_Query_Boolean();
397 foreach ($subqueries as $subquery) {
398 // Non-requirered entry/subquery
399 $query->addSubquery($subquery);
406 * Generate query from current context
408 * @return Zend_Search_Lucene_Search_Query
410 public function getQuery()
412 if ($this->_mode === self::GM_BOOLEAN) {
413 return $this->_booleanExpressionQuery();
415 return $this->_signStyleExpressionQuery();