--- /dev/null
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ * @version $Id: QueryToken.php 16541 2009-07-07 06:59:03Z bkarwin $
+ */
+
+/**
+ * @category Zend
+ * @package Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license http://framework.zend.com/license/new-bsd New BSD License
+ */
+class Zend_Search_Lucene_Search_QueryToken
+{
+ /**
+ * Token types.
+ */
+ const TT_WORD = 0; // Word
+ const TT_PHRASE = 1; // Phrase (one or several quoted words)
+ const TT_FIELD = 2; // Field name in 'field:word', field:<phrase> or field:(<subquery>) pairs
+ const TT_FIELD_INDICATOR = 3; // ':'
+ const TT_REQUIRED = 4; // '+'
+ const TT_PROHIBITED = 5; // '-'
+ const TT_FUZZY_PROX_MARK = 6; // '~'
+ const TT_BOOSTING_MARK = 7; // '^'
+ const TT_RANGE_INCL_START = 8; // '['
+ const TT_RANGE_INCL_END = 9; // ']'
+ const TT_RANGE_EXCL_START = 10; // '{'
+ const TT_RANGE_EXCL_END = 11; // '}'
+ const TT_SUBQUERY_START = 12; // '('
+ const TT_SUBQUERY_END = 13; // ')'
+ const TT_AND_LEXEME = 14; // 'AND' or 'and'
+ const TT_OR_LEXEME = 15; // 'OR' or 'or'
+ const TT_NOT_LEXEME = 16; // 'NOT' or 'not'
+ const TT_TO_LEXEME = 17; // 'TO' or 'to'
+ const TT_NUMBER = 18; // Number, like: 10, 0.8, .64, ....
+
+
+ /**
+ * Returns all possible lexeme types.
+ * It's used for syntax analyzer state machine initialization
+ *
+ * @return array
+ */
+ public static function getTypes()
+ {
+ return array( self::TT_WORD,
+ self::TT_PHRASE,
+ self::TT_FIELD,
+ self::TT_FIELD_INDICATOR,
+ self::TT_REQUIRED,
+ self::TT_PROHIBITED,
+ self::TT_FUZZY_PROX_MARK,
+ self::TT_BOOSTING_MARK,
+ self::TT_RANGE_INCL_START,
+ self::TT_RANGE_INCL_END,
+ self::TT_RANGE_EXCL_START,
+ self::TT_RANGE_EXCL_END,
+ self::TT_SUBQUERY_START,
+ self::TT_SUBQUERY_END,
+ self::TT_AND_LEXEME,
+ self::TT_OR_LEXEME,
+ self::TT_NOT_LEXEME,
+ self::TT_TO_LEXEME,
+ self::TT_NUMBER
+ );
+ }
+
+
+ /**
+ * TokenCategories
+ */
+ const TC_WORD = 0; // Word
+ const TC_PHRASE = 1; // Phrase (one or several quoted words)
+ const TC_NUMBER = 2; // Nubers, which are used with syntax elements. Ex. roam~0.8
+ const TC_SYNTAX_ELEMENT = 3; // + - ( ) [ ] { } ! || && ~ ^
+
+
+ /**
+ * Token type.
+ *
+ * @var integer
+ */
+ public $type;
+
+ /**
+ * Token text.
+ *
+ * @var integer
+ */
+ public $text;
+
+ /**
+ * Token position within query.
+ *
+ * @var integer
+ */
+ public $position;
+
+
+ /**
+ * IndexReader constructor needs token type and token text as a parameters.
+ *
+ * @param integer $tokenCategory
+ * @param string $tokText
+ * @param integer $position
+ */
+ public function __construct($tokenCategory, $tokenText, $position)
+ {
+ $this->text = $tokenText;
+ $this->position = $position + 1; // Start from 1
+
+ switch ($tokenCategory) {
+ case self::TC_WORD:
+ if ( strtolower($tokenText) == 'and') {
+ $this->type = self::TT_AND_LEXEME;
+ } else if (strtolower($tokenText) == 'or') {
+ $this->type = self::TT_OR_LEXEME;
+ } else if (strtolower($tokenText) == 'not') {
+ $this->type = self::TT_NOT_LEXEME;
+ } else if (strtolower($tokenText) == 'to') {
+ $this->type = self::TT_TO_LEXEME;
+ } else {
+ $this->type = self::TT_WORD;
+ }
+ break;
+
+ case self::TC_PHRASE:
+ $this->type = self::TT_PHRASE;
+ break;
+
+ case self::TC_NUMBER:
+ $this->type = self::TT_NUMBER;
+ break;
+
+ case self::TC_SYNTAX_ELEMENT:
+ switch ($tokenText) {
+ case ':':
+ $this->type = self::TT_FIELD_INDICATOR;
+ break;
+
+ case '+':
+ $this->type = self::TT_REQUIRED;
+ break;
+
+ case '-':
+ $this->type = self::TT_PROHIBITED;
+ break;
+
+ case '~':
+ $this->type = self::TT_FUZZY_PROX_MARK;
+ break;
+
+ case '^':
+ $this->type = self::TT_BOOSTING_MARK;
+ break;
+
+ case '[':
+ $this->type = self::TT_RANGE_INCL_START;
+ break;
+
+ case ']':
+ $this->type = self::TT_RANGE_INCL_END;
+ break;
+
+ case '{':
+ $this->type = self::TT_RANGE_EXCL_START;
+ break;
+
+ case '}':
+ $this->type = self::TT_RANGE_EXCL_END;
+ break;
+
+ case '(':
+ $this->type = self::TT_SUBQUERY_START;
+ break;
+
+ case ')':
+ $this->type = self::TT_SUBQUERY_END;
+ break;
+
+ case '!':
+ $this->type = self::TT_NOT_LEXEME;
+ break;
+
+ case '&&':
+ $this->type = self::TT_AND_LEXEME;
+ break;
+
+ case '||':
+ $this->type = self::TT_OR_LEXEME;
+ break;
+
+ default:
+ require_once 'Zend/Search/Lucene/Exception.php';
+ throw new Zend_Search_Lucene_Exception('Unrecognized query syntax lexeme: \'' . $tokenText . '\'');
+ }
+ break;
+
+ case self::TC_NUMBER:
+ $this->type = self::TT_NUMBER;
+
+ default:
+ require_once 'Zend/Search/Lucene/Exception.php';
+ throw new Zend_Search_Lucene_Exception('Unrecognized lexeme type: \'' . $tokenCategory . '\'');
+ }
+ }
+}