final move of files
[web.mtrack] / Zend / Search / Lucene / Search / QueryToken.php
diff --git a/Zend/Search/Lucene/Search/QueryToken.php b/Zend/Search/Lucene/Search/QueryToken.php
new file mode 100644 (file)
index 0000000..a231c43
--- /dev/null
@@ -0,0 +1,225 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: QueryToken.php 16541 2009-07-07 06:59:03Z bkarwin $
+ */
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Search_QueryToken
+{
+    /**
+     * Token types.
+     */
+    const TT_WORD                 = 0;  // Word
+    const TT_PHRASE               = 1;  // Phrase (one or several quoted words)
+    const TT_FIELD                = 2;  // Field name in 'field:word', field:<phrase> or field:(<subquery>) pairs
+    const TT_FIELD_INDICATOR      = 3;  // ':'
+    const TT_REQUIRED             = 4;  // '+'
+    const TT_PROHIBITED           = 5;  // '-'
+    const TT_FUZZY_PROX_MARK      = 6;  // '~'
+    const TT_BOOSTING_MARK        = 7;  // '^'
+    const TT_RANGE_INCL_START     = 8;  // '['
+    const TT_RANGE_INCL_END       = 9;  // ']'
+    const TT_RANGE_EXCL_START     = 10; // '{'
+    const TT_RANGE_EXCL_END       = 11; // '}'
+    const TT_SUBQUERY_START       = 12; // '('
+    const TT_SUBQUERY_END         = 13; // ')'
+    const TT_AND_LEXEME           = 14; // 'AND' or 'and'
+    const TT_OR_LEXEME            = 15; // 'OR'  or 'or'
+    const TT_NOT_LEXEME           = 16; // 'NOT' or 'not'
+    const TT_TO_LEXEME            = 17; // 'TO'  or 'to'
+    const TT_NUMBER               = 18; // Number, like: 10, 0.8, .64, ....
+
+
+    /**
+     * Returns all possible lexeme types.
+     * It's used for syntax analyzer state machine initialization
+     *
+     * @return array
+     */
+    public static function getTypes()
+    {
+        return array(   self::TT_WORD,
+                        self::TT_PHRASE,
+                        self::TT_FIELD,
+                        self::TT_FIELD_INDICATOR,
+                        self::TT_REQUIRED,
+                        self::TT_PROHIBITED,
+                        self::TT_FUZZY_PROX_MARK,
+                        self::TT_BOOSTING_MARK,
+                        self::TT_RANGE_INCL_START,
+                        self::TT_RANGE_INCL_END,
+                        self::TT_RANGE_EXCL_START,
+                        self::TT_RANGE_EXCL_END,
+                        self::TT_SUBQUERY_START,
+                        self::TT_SUBQUERY_END,
+                        self::TT_AND_LEXEME,
+                        self::TT_OR_LEXEME,
+                        self::TT_NOT_LEXEME,
+                        self::TT_TO_LEXEME,
+                        self::TT_NUMBER
+                     );
+    }
+
+
+    /**
+     * TokenCategories
+     */
+    const TC_WORD           = 0;   // Word
+    const TC_PHRASE         = 1;   // Phrase (one or several quoted words)
+    const TC_NUMBER         = 2;   // Nubers, which are used with syntax elements. Ex. roam~0.8
+    const TC_SYNTAX_ELEMENT = 3;   // +  -  ( )  [ ]  { }  !  ||  && ~ ^
+
+
+    /**
+     * Token type.
+     *
+     * @var integer
+     */
+    public $type;
+
+    /**
+     * Token text.
+     *
+     * @var integer
+     */
+    public $text;
+
+    /**
+     * Token position within query.
+     *
+     * @var integer
+     */
+    public $position;
+
+
+    /**
+     * IndexReader constructor needs token type and token text as a parameters.
+     *
+     * @param integer $tokenCategory
+     * @param string  $tokText
+     * @param integer $position
+     */
+    public function __construct($tokenCategory, $tokenText, $position)
+    {
+        $this->text     = $tokenText;
+        $this->position = $position + 1; // Start from 1
+
+        switch ($tokenCategory) {
+            case self::TC_WORD:
+                if (  strtolower($tokenText) == 'and') {
+                    $this->type = self::TT_AND_LEXEME;
+                } else if (strtolower($tokenText) == 'or') {
+                    $this->type = self::TT_OR_LEXEME;
+                } else if (strtolower($tokenText) == 'not') {
+                    $this->type = self::TT_NOT_LEXEME;
+                } else if (strtolower($tokenText) == 'to') {
+                    $this->type = self::TT_TO_LEXEME;
+                } else {
+                    $this->type = self::TT_WORD;
+                }
+                break;
+
+            case self::TC_PHRASE:
+                $this->type = self::TT_PHRASE;
+                break;
+
+            case self::TC_NUMBER:
+                $this->type = self::TT_NUMBER;
+                break;
+
+            case self::TC_SYNTAX_ELEMENT:
+                switch ($tokenText) {
+                    case ':':
+                        $this->type = self::TT_FIELD_INDICATOR;
+                        break;
+
+                    case '+':
+                        $this->type = self::TT_REQUIRED;
+                        break;
+
+                    case '-':
+                        $this->type = self::TT_PROHIBITED;
+                        break;
+
+                    case '~':
+                        $this->type = self::TT_FUZZY_PROX_MARK;
+                        break;
+
+                    case '^':
+                        $this->type = self::TT_BOOSTING_MARK;
+                        break;
+
+                    case '[':
+                        $this->type = self::TT_RANGE_INCL_START;
+                        break;
+
+                    case ']':
+                        $this->type = self::TT_RANGE_INCL_END;
+                        break;
+
+                    case '{':
+                        $this->type = self::TT_RANGE_EXCL_START;
+                        break;
+
+                    case '}':
+                        $this->type = self::TT_RANGE_EXCL_END;
+                        break;
+
+                    case '(':
+                        $this->type = self::TT_SUBQUERY_START;
+                        break;
+
+                    case ')':
+                        $this->type = self::TT_SUBQUERY_END;
+                        break;
+
+                    case '!':
+                        $this->type = self::TT_NOT_LEXEME;
+                        break;
+
+                    case '&&':
+                        $this->type = self::TT_AND_LEXEME;
+                        break;
+
+                    case '||':
+                        $this->type = self::TT_OR_LEXEME;
+                        break;
+
+                    default:
+                        require_once 'Zend/Search/Lucene/Exception.php';
+                        throw new Zend_Search_Lucene_Exception('Unrecognized query syntax lexeme: \'' . $tokenText . '\'');
+                }
+                break;
+
+            case self::TC_NUMBER:
+                $this->type = self::TT_NUMBER;
+
+            default:
+                require_once 'Zend/Search/Lucene/Exception.php';
+                throw new Zend_Search_Lucene_Exception('Unrecognized lexeme type: \'' . $tokenCategory . '\'');
+        }
+    }
+}