--- /dev/null
+<?php\r
+/**\r
+ * Zend Framework\r
+ *\r
+ * LICENSE\r
+ *\r
+ * This source file is subject to the new BSD license that is bundled\r
+ * with this package in the file LICENSE.txt.\r
+ * It is also available through the world-wide-web at this URL:\r
+ * http://framework.zend.com/license/new-bsd\r
+ * If you did not receive a copy of the license and are unable to\r
+ * obtain it through the world-wide-web, please send an email\r
+ * to license@zend.com so we can send you a copy immediately.\r
+ *\r
+ * @category Zend\r
+ * @package Zend_Search_Lucene\r
+ * @subpackage Search\r
+ * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)\r
+ * @license http://framework.zend.com/license/new-bsd New BSD License\r
+ * @version $Id: Term.php 16971 2009-07-22 18:05:45Z mikaelkael $\r
+ */\r
+\r
+\r
+/** Zend_Search_Lucene_Search_Query_Processing */\r
+require_once 'Zend/Search/Lucene/Search/Query/Preprocessing.php';\r
+\r
+/** Zend_Search_Lucene_Search_Query_Phrase */\r
+require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';\r
+\r
+/** Zend_Search_Lucene_Search_Query_Insignificant */\r
+require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';\r
+\r
+/** Zend_Search_Lucene_Search_Query_Empty */\r
+require_once 'Zend/Search/Lucene/Search/Query/Empty.php';\r
+\r
+/** Zend_Search_Lucene_Search_Query_Term */\r
+require_once 'Zend/Search/Lucene/Search/Query/Term.php';\r
+\r
+/** Zend_Search_Lucene_Index_Term */\r
+require_once 'Zend/Search/Lucene/Index/Term.php';\r
+\r
+\r
+/**\r
+ * It's an internal abstract class intended to finalize ase a query processing after query parsing.\r
+ * This type of query is not actually involved into query execution.\r
+ *\r
+ * @category Zend\r
+ * @package Zend_Search_Lucene\r
+ * @subpackage Search\r
+ * @internal\r
+ * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)\r
+ * @license http://framework.zend.com/license/new-bsd New BSD License\r
+ */\r
+class Zend_Search_Lucene_Search_Query_Preprocessing_Term extends Zend_Search_Lucene_Search_Query_Preprocessing\r
+{\r
+ /**\r
+ * word (query parser lexeme) to find.\r
+ *\r
+ * @var string\r
+ */\r
+ private $_word;\r
+\r
+ /**\r
+ * Word encoding (field name is always provided using UTF-8 encoding since it may be retrieved from index).\r
+ *\r
+ * @var string\r
+ */\r
+ private $_encoding;\r
+\r
+\r
+ /**\r
+ * Field name.\r
+ *\r
+ * @var string\r
+ */\r
+ private $_field;\r
+\r
+ /**\r
+ * Class constructor. Create a new preprocessing object for prase query.\r
+ *\r
+ * @param string $word Non-tokenized word (query parser lexeme) to search.\r
+ * @param string $encoding Word encoding.\r
+ * @param string $fieldName Field name.\r
+ */\r
+ public function __construct($word, $encoding, $fieldName)\r
+ {\r
+ $this->_word = $word;\r
+ $this->_encoding = $encoding;\r
+ $this->_field = $fieldName;\r
+ }\r
+\r
+ /**\r
+ * Re-write query into primitive queries in the context of specified index\r
+ *\r
+ * @param Zend_Search_Lucene_Interface $index\r
+ * @return Zend_Search_Lucene_Search_Query\r
+ */\r
+ public function rewrite(Zend_Search_Lucene_Interface $index)\r
+ {\r
+ if ($this->_field === null) {\r
+ $query = new Zend_Search_Lucene_Search_Query_MultiTerm();\r
+ $query->setBoost($this->getBoost());\r
+\r
+ $hasInsignificantSubqueries = false;\r
+\r
+ if (Zend_Search_Lucene::getDefaultSearchField() === null) {\r
+ $searchFields = $index->getFieldNames(true);\r
+ } else {\r
+ $searchFields = array(Zend_Search_Lucene::getDefaultSearchField());\r
+ }\r
+\r
+ foreach ($searchFields as $fieldName) {\r
+ $subquery = new Zend_Search_Lucene_Search_Query_Preprocessing_Term($this->_word,\r
+ $this->_encoding,\r
+ $fieldName);\r
+ $rewrittenSubquery = $subquery->rewrite($index);\r
+ foreach ($rewrittenSubquery->getQueryTerms() as $term) {\r
+ $query->addTerm($term);\r
+ }\r
+\r
+ if ($rewrittenSubquery instanceof Zend_Search_Lucene_Search_Query_Insignificant) {\r
+ $hasInsignificantSubqueries = true;\r
+ }\r
+ }\r
+\r
+ if (count($query->getTerms()) == 0) {\r
+ $this->_matches = array();\r
+ if ($hasInsignificantSubqueries) {\r
+ return new Zend_Search_Lucene_Search_Query_Insignificant();\r
+ } else {\r
+ return new Zend_Search_Lucene_Search_Query_Empty();\r
+ }\r
+ }\r
+\r
+ $this->_matches = $query->getQueryTerms();\r
+ return $query;\r
+ }\r
+\r
+ // -------------------------------------\r
+ // Recognize exact term matching (it corresponds to Keyword fields stored in the index)\r
+ // encoding is not used since we expect binary matching\r
+ $term = new Zend_Search_Lucene_Index_Term($this->_word, $this->_field);\r
+ if ($index->hasTerm($term)) {\r
+ $query = new Zend_Search_Lucene_Search_Query_Term($term);\r
+ $query->setBoost($this->getBoost());\r
+\r
+ $this->_matches = $query->getQueryTerms();\r
+ return $query;\r
+ }\r
+\r
+\r
+ // -------------------------------------\r
+ // Recognize wildcard queries\r
+\r
+ /** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */\r
+ if (@preg_match('/\pL/u', 'a') == 1) {\r
+ $word = iconv($this->_encoding, 'UTF-8', $this->_word);\r
+ $wildcardsPattern = '/[*?]/u';\r
+ $subPatternsEncoding = 'UTF-8';\r
+ } else {\r
+ $word = $this->_word;\r
+ $wildcardsPattern = '/[*?]/';\r
+ $subPatternsEncoding = $this->_encoding;\r
+ }\r
+\r
+ $subPatterns = preg_split($wildcardsPattern, $word, -1, PREG_SPLIT_OFFSET_CAPTURE);\r
+\r
+ if (count($subPatterns) > 1) {\r
+ // Wildcard query is recognized\r
+\r
+ $pattern = '';\r
+\r
+ foreach ($subPatterns as $id => $subPattern) {\r
+ // Append corresponding wildcard character to the pattern before each sub-pattern (except first)\r
+ if ($id != 0) {\r
+ $pattern .= $word[ $subPattern[1] - 1 ];\r
+ }\r
+\r
+ // Check if each subputtern is a single word in terms of current analyzer\r
+ $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($subPattern[0], $subPatternsEncoding);\r
+ if (count($tokens) > 1) {\r
+ require_once 'Zend/Search/Lucene/Search/QueryParserException.php';\r
+ throw new Zend_Search_Lucene_Search_QueryParserException('Wildcard search is supported only for non-multiple word terms');\r
+ }\r
+ foreach ($tokens as $token) {\r
+ $pattern .= $token->getTermText();\r
+ }\r
+ }\r
+\r
+ $term = new Zend_Search_Lucene_Index_Term($pattern, $this->_field);\r
+ $query = new Zend_Search_Lucene_Search_Query_Wildcard($term);\r
+ $query->setBoost($this->getBoost());\r
+\r
+ // Get rewritten query. Important! It also fills terms matching container.\r
+ $rewrittenQuery = $query->rewrite($index);\r
+ $this->_matches = $query->getQueryTerms();\r
+\r
+ return $rewrittenQuery;\r
+ }\r
+\r
+\r
+ // -------------------------------------\r
+ // Recognize one-term multi-term and "insignificant" queries\r
+ $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_word, $this->_encoding);\r
+\r
+ if (count($tokens) == 0) {\r
+ $this->_matches = array();\r
+ return new Zend_Search_Lucene_Search_Query_Insignificant();\r
+ }\r
+\r
+ if (count($tokens) == 1) {\r
+ $term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);\r
+ $query = new Zend_Search_Lucene_Search_Query_Term($term);\r
+ $query->setBoost($this->getBoost());\r
+\r
+ $this->_matches = $query->getQueryTerms();\r
+ return $query;\r
+ }\r
+\r
+ //It's not insignificant or one term query\r
+ $query = new Zend_Search_Lucene_Search_Query_MultiTerm();\r
+\r
+ /**\r
+ * @todo Process $token->getPositionIncrement() to support stemming, synonyms and other\r
+ * analizer design features\r
+ */\r
+ foreach ($tokens as $token) {\r
+ $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field);\r
+ $query->addTerm($term, true); // all subterms are required\r
+ }\r
+\r
+ $query->setBoost($this->getBoost());\r
+\r
+ $this->_matches = $query->getQueryTerms();\r
+ return $query;\r
+ }\r
+\r
+ /**\r
+ * Query specific matches highlighting\r
+ *\r
+ * @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)\r
+ */\r
+ protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)\r
+ {\r
+ /** Skip fields detection. We don't need it, since we expect all fields presented in the HTML body and don't differentiate them */\r
+\r
+ /** Skip exact term matching recognition, keyword fields highlighting is not supported */\r
+\r
+ // -------------------------------------\r
+ // Recognize wildcard queries\r
+ /** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */\r
+ if (@preg_match('/\pL/u', 'a') == 1) {\r
+ $word = iconv($this->_encoding, 'UTF-8', $this->_word);\r
+ $wildcardsPattern = '/[*?]/u';\r
+ $subPatternsEncoding = 'UTF-8';\r
+ } else {\r
+ $word = $this->_word;\r
+ $wildcardsPattern = '/[*?]/';\r
+ $subPatternsEncoding = $this->_encoding;\r
+ }\r
+ $subPatterns = preg_split($wildcardsPattern, $word, -1, PREG_SPLIT_OFFSET_CAPTURE);\r
+ if (count($subPatterns) > 1) {\r
+ // Wildcard query is recognized\r
+\r
+ $pattern = '';\r
+\r
+ foreach ($subPatterns as $id => $subPattern) {\r
+ // Append corresponding wildcard character to the pattern before each sub-pattern (except first)\r
+ if ($id != 0) {\r
+ $pattern .= $word[ $subPattern[1] - 1 ];\r
+ }\r
+\r
+ // Check if each subputtern is a single word in terms of current analyzer\r
+ $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($subPattern[0], $subPatternsEncoding);\r
+ if (count($tokens) > 1) {\r
+ // Do nothing (nothing is highlighted)\r
+ return;\r
+ }\r
+ foreach ($tokens as $token) {\r
+ $pattern .= $token->getTermText();\r
+ }\r
+ }\r
+\r
+ $term = new Zend_Search_Lucene_Index_Term($pattern, $this->_field);\r
+ $query = new Zend_Search_Lucene_Search_Query_Wildcard($term);\r
+\r
+ $query->_highlightMatches($highlighter);\r
+ return;\r
+ }\r
+\r
+ // -------------------------------------\r
+ // Recognize one-term multi-term and "insignificant" queries\r
+ $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_word, $this->_encoding);\r
+\r
+ if (count($tokens) == 0) {\r
+ // Do nothing\r
+ return;\r
+ }\r
+\r
+ if (count($tokens) == 1) {\r
+ $highlighter->highlight($tokens[0]->getTermText());\r
+ return;\r
+ }\r
+\r
+ //It's not insignificant or one term query\r
+ $words = array();\r
+ foreach ($tokens as $token) {\r
+ $words[] = $token->getTermText();\r
+ }\r
+ $highlighter->highlight($words);\r
+ }\r
+\r
+ /**\r
+ * Print a query\r
+ *\r
+ * @return string\r
+ */\r
+ public function __toString()\r
+ {\r
+ // It's used only for query visualisation, so we don't care about characters escaping\r
+ if ($this->_field !== null) {\r
+ $query = $this->_field . ':';\r
+ } else {\r
+ $query = '';\r
+ }\r
+\r
+ $query .= $this->_word;\r
+\r
+ if ($this->getBoost() != 1) {\r
+ $query .= '^' . round($this->getBoost(), 4);\r
+ }\r
+\r
+ return $query;\r
+ }\r
+}\r