7 * This source file is subject to the new BSD license that is bundled
\r
8 * with this package in the file LICENSE.txt.
\r
9 * It is also available through the world-wide-web at this URL:
\r
10 * http://framework.zend.com/license/new-bsd
\r
11 * If you did not receive a copy of the license and are unable to
\r
12 * obtain it through the world-wide-web, please send an email
\r
13 * to license@zend.com so we can send you a copy immediately.
\r
16 * @package Zend_Search_Lucene
\r
17 * @subpackage Search
\r
18 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
\r
19 * @license http://framework.zend.com/license/new-bsd New BSD License
\r
20 * @version $Id: Phrase.php 16971 2009-07-22 18:05:45Z mikaelkael $
\r
24 /** Zend_Search_Lucene_Search_Query_Processing */
\r
25 require_once 'Zend/Search/Lucene/Search/Query/Preprocessing.php';
\r
27 /** Zend_Search_Lucene_Search_Query_Phrase */
\r
28 require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
\r
30 /** Zend_Search_Lucene_Search_Query_Insignificant */
\r
31 require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
\r
33 /** Zend_Search_Lucene_Search_Query_Empty */
\r
34 require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
\r
36 /** Zend_Search_Lucene_Search_Query_Term */
\r
37 require_once 'Zend/Search/Lucene/Search/Query/Term.php';
\r
39 /** Zend_Search_Lucene_Index_Term */
\r
40 require_once 'Zend/Search/Lucene/Index/Term.php';
\r
44 * It's an internal abstract class intended to finalize ase a query processing after query parsing.
\r
45 * This type of query is not actually involved into query execution.
\r
48 * @package Zend_Search_Lucene
\r
49 * @subpackage Search
\r
51 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
\r
52 * @license http://framework.zend.com/license/new-bsd New BSD License
\r
54 class Zend_Search_Lucene_Search_Query_Preprocessing_Phrase extends Zend_Search_Lucene_Search_Query_Preprocessing
\r
64 * Phrase encoding (field name is always provided using UTF-8 encoding since it may be retrieved from index).
\r
68 private $_phraseEncoding;
\r
79 * Sets the number of other words permitted between words in query phrase.
\r
80 * If zero, then this is an exact phrase search. For larger values this works
\r
81 * like a WITHIN or NEAR operator.
\r
83 * The slop is in fact an edit-distance, where the units correspond to
\r
84 * moves of terms in the query phrase out of position. For example, to switch
\r
85 * the order of two words requires two moves (the first move places the words
\r
86 * atop one another), so to permit re-orderings of phrases, the slop must be
\r
88 * More exact matches are scored higher than sloppier matches, thus search
\r
89 * results are sorted by exactness.
\r
91 * The slop is zero by default, requiring exact matches.
\r
98 * Class constructor. Create a new preprocessing object for prase query.
\r
100 * @param string $phrase Phrase to search.
\r
101 * @param string $phraseEncoding Phrase encoding.
\r
102 * @param string $fieldName Field name.
\r
104 public function __construct($phrase, $phraseEncoding, $fieldName)
\r
106 $this->_phrase = $phrase;
\r
107 $this->_phraseEncoding = $phraseEncoding;
\r
108 $this->_field = $fieldName;
\r
114 * @param integer $slop
\r
116 public function setSlop($slop)
\r
118 $this->_slop = $slop;
\r
127 public function getSlop()
\r
129 return $this->_slop;
\r
133 * Re-write query into primitive queries in the context of specified index
\r
135 * @param Zend_Search_Lucene_Interface $index
\r
136 * @return Zend_Search_Lucene_Search_Query
\r
138 public function rewrite(Zend_Search_Lucene_Interface $index)
\r
140 // Allow to use wildcards within phrases
\r
141 // They are either removed by text analyzer or used as a part of keyword for keyword fields
\r
143 // if (strpos($this->_phrase, '?') !== false || strpos($this->_phrase, '*') !== false) {
\r
144 // require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
\r
145 // throw new Zend_Search_Lucene_Search_QueryParserException('Wildcards are only allowed in a single terms.');
\r
148 // Split query into subqueries if field name is not specified
\r
149 if ($this->_field === null) {
\r
150 $query = new Zend_Search_Lucene_Search_Query_Boolean();
\r
151 $query->setBoost($this->getBoost());
\r
153 if (Zend_Search_Lucene::getDefaultSearchField() === null) {
\r
154 $searchFields = $index->getFieldNames(true);
\r
156 $searchFields = array(Zend_Search_Lucene::getDefaultSearchField());
\r
159 foreach ($searchFields as $fieldName) {
\r
160 $subquery = new Zend_Search_Lucene_Search_Query_Preprocessing_Phrase($this->_phrase,
\r
161 $this->_phraseEncoding,
\r
163 $subquery->setSlop($this->getSlop());
\r
165 $query->addSubquery($subquery->rewrite($index));
\r
168 $this->_matches = $query->getQueryTerms();
\r
172 // Recognize exact term matching (it corresponds to Keyword fields stored in the index)
\r
173 // encoding is not used since we expect binary matching
\r
174 $term = new Zend_Search_Lucene_Index_Term($this->_phrase, $this->_field);
\r
175 if ($index->hasTerm($term)) {
\r
176 $query = new Zend_Search_Lucene_Search_Query_Term($term);
\r
177 $query->setBoost($this->getBoost());
\r
179 $this->_matches = $query->getQueryTerms();
\r
184 // tokenize phrase using current analyzer and process it as a phrase query
\r
185 $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_phrase, $this->_phraseEncoding);
\r
187 if (count($tokens) == 0) {
\r
188 $this->_matches = array();
\r
189 return new Zend_Search_Lucene_Search_Query_Insignificant();
\r
192 if (count($tokens) == 1) {
\r
193 $term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
\r
194 $query = new Zend_Search_Lucene_Search_Query_Term($term);
\r
195 $query->setBoost($this->getBoost());
\r
197 $this->_matches = $query->getQueryTerms();
\r
201 //It's non-trivial phrase query
\r
203 $query = new Zend_Search_Lucene_Search_Query_Phrase();
\r
204 foreach ($tokens as $token) {
\r
205 $position += $token->getPositionIncrement();
\r
206 $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field);
\r
207 $query->addTerm($term, $position);
\r
208 $query->setSlop($this->getSlop());
\r
210 $this->_matches = $query->getQueryTerms();
\r
215 * Query specific matches highlighting
\r
217 * @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
\r
219 protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
\r
221 /** Skip fields detection. We don't need it, since we expect all fields presented in the HTML body and don't differentiate them */
\r
223 /** Skip exact term matching recognition, keyword fields highlighting is not supported */
\r
225 /** Skip wildcard queries recognition. Supported wildcards are removed by text analyzer */
\r
227 // tokenize phrase using current analyzer and process it as a phrase query
\r
228 $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_phrase, $this->_phraseEncoding);
\r
230 if (count($tokens) == 0) {
\r
235 if (count($tokens) == 1) {
\r
236 $highlighter->highlight($tokens[0]->getTermText());
\r
240 //It's non-trivial phrase query
\r
242 foreach ($tokens as $token) {
\r
243 $words[] = $token->getTermText();
\r
245 $highlighter->highlight($words);
\r
253 public function __toString()
\r
255 // It's used only for query visualisation, so we don't care about characters escaping
\r
256 if ($this->_field !== null) {
\r
257 $query = $this->_field . ':';
\r
262 $query .= '"' . $this->_phrase . '"';
\r
264 if ($this->_slop != 0) {
\r
265 $query .= '~' . $this->_slop;
\r
268 if ($this->getBoost() != 1) {
\r
269 $query .= '^' . round($this->getBoost(), 4);
\r