7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
16 * @package Zend_Search_Lucene
18 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
19 * @license http://framework.zend.com/license/new-bsd New BSD License
20 * @version $Id: QueryToken.php 16541 2009-07-07 06:59:03Z bkarwin $
25 * @package Zend_Search_Lucene
27 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
28 * @license http://framework.zend.com/license/new-bsd New BSD License
30 class Zend_Search_Lucene_Search_QueryToken
35 const TT_WORD = 0; // Word
36 const TT_PHRASE = 1; // Phrase (one or several quoted words)
37 const TT_FIELD = 2; // Field name in 'field:word', field:<phrase> or field:(<subquery>) pairs
38 const TT_FIELD_INDICATOR = 3; // ':'
39 const TT_REQUIRED = 4; // '+'
40 const TT_PROHIBITED = 5; // '-'
41 const TT_FUZZY_PROX_MARK = 6; // '~'
42 const TT_BOOSTING_MARK = 7; // '^'
43 const TT_RANGE_INCL_START = 8; // '['
44 const TT_RANGE_INCL_END = 9; // ']'
45 const TT_RANGE_EXCL_START = 10; // '{'
46 const TT_RANGE_EXCL_END = 11; // '}'
47 const TT_SUBQUERY_START = 12; // '('
48 const TT_SUBQUERY_END = 13; // ')'
49 const TT_AND_LEXEME = 14; // 'AND' or 'and'
50 const TT_OR_LEXEME = 15; // 'OR' or 'or'
51 const TT_NOT_LEXEME = 16; // 'NOT' or 'not'
52 const TT_TO_LEXEME = 17; // 'TO' or 'to'
53 const TT_NUMBER = 18; // Number, like: 10, 0.8, .64, ....
57 * Returns all possible lexeme types.
58 * It's used for syntax analyzer state machine initialization
62 public static function getTypes()
64 return array( self::TT_WORD,
67 self::TT_FIELD_INDICATOR,
70 self::TT_FUZZY_PROX_MARK,
71 self::TT_BOOSTING_MARK,
72 self::TT_RANGE_INCL_START,
73 self::TT_RANGE_INCL_END,
74 self::TT_RANGE_EXCL_START,
75 self::TT_RANGE_EXCL_END,
76 self::TT_SUBQUERY_START,
77 self::TT_SUBQUERY_END,
90 const TC_WORD = 0; // Word
91 const TC_PHRASE = 1; // Phrase (one or several quoted words)
92 const TC_NUMBER = 2; // Nubers, which are used with syntax elements. Ex. roam~0.8
93 const TC_SYNTAX_ELEMENT = 3; // + - ( ) [ ] { } ! || && ~ ^
111 * Token position within query.
119 * IndexReader constructor needs token type and token text as a parameters.
121 * @param integer $tokenCategory
122 * @param string $tokText
123 * @param integer $position
125 public function __construct($tokenCategory, $tokenText, $position)
127 $this->text = $tokenText;
128 $this->position = $position + 1; // Start from 1
130 switch ($tokenCategory) {
132 if ( strtolower($tokenText) == 'and') {
133 $this->type = self::TT_AND_LEXEME;
134 } else if (strtolower($tokenText) == 'or') {
135 $this->type = self::TT_OR_LEXEME;
136 } else if (strtolower($tokenText) == 'not') {
137 $this->type = self::TT_NOT_LEXEME;
138 } else if (strtolower($tokenText) == 'to') {
139 $this->type = self::TT_TO_LEXEME;
141 $this->type = self::TT_WORD;
145 case self::TC_PHRASE:
146 $this->type = self::TT_PHRASE;
149 case self::TC_NUMBER:
150 $this->type = self::TT_NUMBER;
153 case self::TC_SYNTAX_ELEMENT:
154 switch ($tokenText) {
156 $this->type = self::TT_FIELD_INDICATOR;
160 $this->type = self::TT_REQUIRED;
164 $this->type = self::TT_PROHIBITED;
168 $this->type = self::TT_FUZZY_PROX_MARK;
172 $this->type = self::TT_BOOSTING_MARK;
176 $this->type = self::TT_RANGE_INCL_START;
180 $this->type = self::TT_RANGE_INCL_END;
184 $this->type = self::TT_RANGE_EXCL_START;
188 $this->type = self::TT_RANGE_EXCL_END;
192 $this->type = self::TT_SUBQUERY_START;
196 $this->type = self::TT_SUBQUERY_END;
200 $this->type = self::TT_NOT_LEXEME;
204 $this->type = self::TT_AND_LEXEME;
208 $this->type = self::TT_OR_LEXEME;
212 require_once 'Zend/Search/Lucene/Exception.php';
213 throw new Zend_Search_Lucene_Exception('Unrecognized query syntax lexeme: \'' . $tokenText . '\'');
217 case self::TC_NUMBER:
218 $this->type = self::TT_NUMBER;
221 require_once 'Zend/Search/Lucene/Exception.php';
222 throw new Zend_Search_Lucene_Exception('Unrecognized lexeme type: \'' . $tokenCategory . '\'');