final move of files
[web.mtrack] / Zend / Search / Lucene / Search / QueryLexer.php
1 <?php
2 /**
3  * Zend Framework
4  *
5  * LICENSE
6  *
7  * This source file is subject to the new BSD license that is bundled
8  * with this package in the file LICENSE.txt.
9  * It is also available through the world-wide-web at this URL:
10  * http://framework.zend.com/license/new-bsd
11  * If you did not receive a copy of the license and are unable to
12  * obtain it through the world-wide-web, please send an email
13  * to license@zend.com so we can send you a copy immediately.
14  *
15  * @category   Zend
16  * @package    Zend_Search_Lucene
17  * @subpackage Search
18  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
19  * @license    http://framework.zend.com/license/new-bsd     New BSD License
20  * @version    $Id: QueryLexer.php 16971 2009-07-22 18:05:45Z mikaelkael $
21  */
22
23 /** Zend_Search_Lucene_FSM */
24 require_once 'Zend/Search/Lucene/FSM.php';
25
26 /** Zend_Search_Lucene_Search_QueryParser */
27 require_once 'Zend/Search/Lucene/Search/QueryToken.php';
28
29 /**
30  * @category   Zend
31  * @package    Zend_Search_Lucene
32  * @subpackage Search
33  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
34  * @license    http://framework.zend.com/license/new-bsd     New BSD License
35  */
36 class Zend_Search_Lucene_Search_QueryLexer extends Zend_Search_Lucene_FSM
37 {
38     /** State Machine states */
39     const ST_WHITE_SPACE     = 0;
40     const ST_SYNT_LEXEME     = 1;
41     const ST_LEXEME          = 2;
42     const ST_QUOTED_LEXEME   = 3;
43     const ST_ESCAPED_CHAR    = 4;
44     const ST_ESCAPED_QCHAR   = 5;
45     const ST_LEXEME_MODIFIER = 6;
46     const ST_NUMBER          = 7;
47     const ST_MANTISSA        = 8;
48     const ST_ERROR           = 9;
49
50     /** Input symbols */
51     const IN_WHITE_SPACE     = 0;
52     const IN_SYNT_CHAR       = 1;
53     const IN_LEXEME_MODIFIER = 2;
54     const IN_ESCAPE_CHAR     = 3;
55     const IN_QUOTE           = 4;
56     const IN_DECIMAL_POINT   = 5;
57     const IN_ASCII_DIGIT     = 6;
58     const IN_CHAR            = 7;
59     const IN_MUTABLE_CHAR    = 8;
60
61     const QUERY_WHITE_SPACE_CHARS      = " \n\r\t";
62     const QUERY_SYNT_CHARS             = ':()[]{}!|&';
63     const QUERY_MUTABLE_CHARS          = '+-';
64     const QUERY_DOUBLECHARLEXEME_CHARS = '|&';
65     const QUERY_LEXEMEMODIFIER_CHARS   = '~^';
66     const QUERY_ASCIIDIGITS_CHARS      = '0123456789';
67
68     /**
69      * List of recognized lexemes
70      *
71      * @var array
72      */
73     private $_lexemes;
74
75     /**
76      * Query string (array of single- or non single-byte characters)
77      *
78      * @var array
79      */
80     private $_queryString;
81
82     /**
83      * Current position within a query string
84      * Used to create appropriate error messages
85      *
86      * @var integer
87      */
88     private $_queryStringPosition;
89
90     /**
91      * Recognized part of current lexeme
92      *
93      * @var string
94      */
95     private $_currentLexeme;
96
97     public function __construct()
98     {
99         parent::__construct( array(self::ST_WHITE_SPACE,
100                                    self::ST_SYNT_LEXEME,
101                                    self::ST_LEXEME,
102                                    self::ST_QUOTED_LEXEME,
103                                    self::ST_ESCAPED_CHAR,
104                                    self::ST_ESCAPED_QCHAR,
105                                    self::ST_LEXEME_MODIFIER,
106                                    self::ST_NUMBER,
107                                    self::ST_MANTISSA,
108                                    self::ST_ERROR),
109                              array(self::IN_WHITE_SPACE,
110                                    self::IN_SYNT_CHAR,
111                                    self::IN_MUTABLE_CHAR,
112                                    self::IN_LEXEME_MODIFIER,
113                                    self::IN_ESCAPE_CHAR,
114                                    self::IN_QUOTE,
115                                    self::IN_DECIMAL_POINT,
116                                    self::IN_ASCII_DIGIT,
117                                    self::IN_CHAR));
118
119
120         $lexemeModifierErrorAction    = new Zend_Search_Lucene_FSMAction($this, 'lexModifierErrException');
121         $quoteWithinLexemeErrorAction = new Zend_Search_Lucene_FSMAction($this, 'quoteWithinLexemeErrException');
122         $wrongNumberErrorAction       = new Zend_Search_Lucene_FSMAction($this, 'wrongNumberErrException');
123
124
125
126         $this->addRules(array( array(self::ST_WHITE_SPACE,   self::IN_WHITE_SPACE,     self::ST_WHITE_SPACE),
127                                array(self::ST_WHITE_SPACE,   self::IN_SYNT_CHAR,       self::ST_SYNT_LEXEME),
128                                array(self::ST_WHITE_SPACE,   self::IN_MUTABLE_CHAR,    self::ST_SYNT_LEXEME),
129                                array(self::ST_WHITE_SPACE,   self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
130                                array(self::ST_WHITE_SPACE,   self::IN_ESCAPE_CHAR,     self::ST_ESCAPED_CHAR),
131                                array(self::ST_WHITE_SPACE,   self::IN_QUOTE,           self::ST_QUOTED_LEXEME),
132                                array(self::ST_WHITE_SPACE,   self::IN_DECIMAL_POINT,   self::ST_LEXEME),
133                                array(self::ST_WHITE_SPACE,   self::IN_ASCII_DIGIT,     self::ST_LEXEME),
134                                array(self::ST_WHITE_SPACE,   self::IN_CHAR,            self::ST_LEXEME)
135                              ));
136         $this->addRules(array( array(self::ST_SYNT_LEXEME,   self::IN_WHITE_SPACE,     self::ST_WHITE_SPACE),
137                                array(self::ST_SYNT_LEXEME,   self::IN_SYNT_CHAR,       self::ST_SYNT_LEXEME),
138                                array(self::ST_SYNT_LEXEME,   self::IN_MUTABLE_CHAR,    self::ST_SYNT_LEXEME),
139                                array(self::ST_SYNT_LEXEME,   self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
140                                array(self::ST_SYNT_LEXEME,   self::IN_ESCAPE_CHAR,     self::ST_ESCAPED_CHAR),
141                                array(self::ST_SYNT_LEXEME,   self::IN_QUOTE,           self::ST_QUOTED_LEXEME),
142                                array(self::ST_SYNT_LEXEME,   self::IN_DECIMAL_POINT,   self::ST_LEXEME),
143                                array(self::ST_SYNT_LEXEME,   self::IN_ASCII_DIGIT,     self::ST_LEXEME),
144                                array(self::ST_SYNT_LEXEME,   self::IN_CHAR,            self::ST_LEXEME)
145                              ));
146         $this->addRules(array( array(self::ST_LEXEME,        self::IN_WHITE_SPACE,     self::ST_WHITE_SPACE),
147                                array(self::ST_LEXEME,        self::IN_SYNT_CHAR,       self::ST_SYNT_LEXEME),
148                                array(self::ST_LEXEME,        self::IN_MUTABLE_CHAR,    self::ST_LEXEME),
149                                array(self::ST_LEXEME,        self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
150                                array(self::ST_LEXEME,        self::IN_ESCAPE_CHAR,     self::ST_ESCAPED_CHAR),
151
152                                // IN_QUOTE     not allowed
153                                array(self::ST_LEXEME,        self::IN_QUOTE,           self::ST_ERROR, $quoteWithinLexemeErrorAction),
154
155                                array(self::ST_LEXEME,        self::IN_DECIMAL_POINT,   self::ST_LEXEME),
156                                array(self::ST_LEXEME,        self::IN_ASCII_DIGIT,     self::ST_LEXEME),
157                                array(self::ST_LEXEME,        self::IN_CHAR,            self::ST_LEXEME)
158                              ));
159         $this->addRules(array( array(self::ST_QUOTED_LEXEME, self::IN_WHITE_SPACE,     self::ST_QUOTED_LEXEME),
160                                array(self::ST_QUOTED_LEXEME, self::IN_SYNT_CHAR,       self::ST_QUOTED_LEXEME),
161                                array(self::ST_QUOTED_LEXEME, self::IN_MUTABLE_CHAR,    self::ST_QUOTED_LEXEME),
162                                array(self::ST_QUOTED_LEXEME, self::IN_LEXEME_MODIFIER, self::ST_QUOTED_LEXEME),
163                                array(self::ST_QUOTED_LEXEME, self::IN_ESCAPE_CHAR,     self::ST_ESCAPED_QCHAR),
164                                array(self::ST_QUOTED_LEXEME, self::IN_QUOTE,           self::ST_WHITE_SPACE),
165                                array(self::ST_QUOTED_LEXEME, self::IN_DECIMAL_POINT,   self::ST_QUOTED_LEXEME),
166                                array(self::ST_QUOTED_LEXEME, self::IN_ASCII_DIGIT,     self::ST_QUOTED_LEXEME),
167                                array(self::ST_QUOTED_LEXEME, self::IN_CHAR,            self::ST_QUOTED_LEXEME)
168                              ));
169         $this->addRules(array( array(self::ST_ESCAPED_CHAR,  self::IN_WHITE_SPACE,     self::ST_LEXEME),
170                                array(self::ST_ESCAPED_CHAR,  self::IN_SYNT_CHAR,       self::ST_LEXEME),
171                                array(self::ST_ESCAPED_CHAR,  self::IN_MUTABLE_CHAR,    self::ST_LEXEME),
172                                array(self::ST_ESCAPED_CHAR,  self::IN_LEXEME_MODIFIER, self::ST_LEXEME),
173                                array(self::ST_ESCAPED_CHAR,  self::IN_ESCAPE_CHAR,     self::ST_LEXEME),
174                                array(self::ST_ESCAPED_CHAR,  self::IN_QUOTE,           self::ST_LEXEME),
175                                array(self::ST_ESCAPED_CHAR,  self::IN_DECIMAL_POINT,   self::ST_LEXEME),
176                                array(self::ST_ESCAPED_CHAR,  self::IN_ASCII_DIGIT,     self::ST_LEXEME),
177                                array(self::ST_ESCAPED_CHAR,  self::IN_CHAR,            self::ST_LEXEME)
178                              ));
179         $this->addRules(array( array(self::ST_ESCAPED_QCHAR, self::IN_WHITE_SPACE,     self::ST_QUOTED_LEXEME),
180                                array(self::ST_ESCAPED_QCHAR, self::IN_SYNT_CHAR,       self::ST_QUOTED_LEXEME),
181                                array(self::ST_ESCAPED_QCHAR, self::IN_MUTABLE_CHAR,    self::ST_QUOTED_LEXEME),
182                                array(self::ST_ESCAPED_QCHAR, self::IN_LEXEME_MODIFIER, self::ST_QUOTED_LEXEME),
183                                array(self::ST_ESCAPED_QCHAR, self::IN_ESCAPE_CHAR,     self::ST_QUOTED_LEXEME),
184                                array(self::ST_ESCAPED_QCHAR, self::IN_QUOTE,           self::ST_QUOTED_LEXEME),
185                                array(self::ST_ESCAPED_QCHAR, self::IN_DECIMAL_POINT,   self::ST_QUOTED_LEXEME),
186                                array(self::ST_ESCAPED_QCHAR, self::IN_ASCII_DIGIT,     self::ST_QUOTED_LEXEME),
187                                array(self::ST_ESCAPED_QCHAR, self::IN_CHAR,            self::ST_QUOTED_LEXEME)
188                              ));
189         $this->addRules(array( array(self::ST_LEXEME_MODIFIER, self::IN_WHITE_SPACE,     self::ST_WHITE_SPACE),
190                                array(self::ST_LEXEME_MODIFIER, self::IN_SYNT_CHAR,       self::ST_SYNT_LEXEME),
191                                array(self::ST_LEXEME_MODIFIER, self::IN_MUTABLE_CHAR,    self::ST_SYNT_LEXEME),
192                                array(self::ST_LEXEME_MODIFIER, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
193
194                                // IN_ESCAPE_CHAR       not allowed
195                                array(self::ST_LEXEME_MODIFIER, self::IN_ESCAPE_CHAR,     self::ST_ERROR, $lexemeModifierErrorAction),
196
197                                // IN_QUOTE             not allowed
198                                array(self::ST_LEXEME_MODIFIER, self::IN_QUOTE,           self::ST_ERROR, $lexemeModifierErrorAction),
199
200
201                                array(self::ST_LEXEME_MODIFIER, self::IN_DECIMAL_POINT,   self::ST_MANTISSA),
202                                array(self::ST_LEXEME_MODIFIER, self::IN_ASCII_DIGIT,     self::ST_NUMBER),
203
204                                // IN_CHAR              not allowed
205                                array(self::ST_LEXEME_MODIFIER, self::IN_CHAR,            self::ST_ERROR, $lexemeModifierErrorAction),
206                              ));
207         $this->addRules(array( array(self::ST_NUMBER, self::IN_WHITE_SPACE,     self::ST_WHITE_SPACE),
208                                array(self::ST_NUMBER, self::IN_SYNT_CHAR,       self::ST_SYNT_LEXEME),
209                                array(self::ST_NUMBER, self::IN_MUTABLE_CHAR,    self::ST_SYNT_LEXEME),
210                                array(self::ST_NUMBER, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
211
212                                // IN_ESCAPE_CHAR       not allowed
213                                array(self::ST_NUMBER, self::IN_ESCAPE_CHAR,     self::ST_ERROR, $wrongNumberErrorAction),
214
215                                // IN_QUOTE             not allowed
216                                array(self::ST_NUMBER, self::IN_QUOTE,           self::ST_ERROR, $wrongNumberErrorAction),
217
218                                array(self::ST_NUMBER, self::IN_DECIMAL_POINT,   self::ST_MANTISSA),
219                                array(self::ST_NUMBER, self::IN_ASCII_DIGIT,     self::ST_NUMBER),
220
221                                // IN_CHAR              not allowed
222                                array(self::ST_NUMBER, self::IN_CHAR,            self::ST_ERROR, $wrongNumberErrorAction),
223                              ));
224         $this->addRules(array( array(self::ST_MANTISSA, self::IN_WHITE_SPACE,     self::ST_WHITE_SPACE),
225                                array(self::ST_MANTISSA, self::IN_SYNT_CHAR,       self::ST_SYNT_LEXEME),
226                                array(self::ST_MANTISSA, self::IN_MUTABLE_CHAR,    self::ST_SYNT_LEXEME),
227                                array(self::ST_MANTISSA, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
228
229                                // IN_ESCAPE_CHAR       not allowed
230                                array(self::ST_MANTISSA, self::IN_ESCAPE_CHAR,     self::ST_ERROR, $wrongNumberErrorAction),
231
232                                // IN_QUOTE             not allowed
233                                array(self::ST_MANTISSA, self::IN_QUOTE,           self::ST_ERROR, $wrongNumberErrorAction),
234
235                                // IN_DECIMAL_POINT     not allowed
236                                array(self::ST_MANTISSA, self::IN_DECIMAL_POINT,   self::ST_ERROR, $wrongNumberErrorAction),
237
238                                array(self::ST_MANTISSA, self::IN_ASCII_DIGIT,     self::ST_MANTISSA),
239
240                                // IN_CHAR              not allowed
241                                array(self::ST_MANTISSA, self::IN_CHAR,            self::ST_ERROR, $wrongNumberErrorAction),
242                              ));
243
244
245         /** Actions */
246         $syntaxLexemeAction    = new Zend_Search_Lucene_FSMAction($this, 'addQuerySyntaxLexeme');
247         $lexemeModifierAction  = new Zend_Search_Lucene_FSMAction($this, 'addLexemeModifier');
248         $addLexemeAction       = new Zend_Search_Lucene_FSMAction($this, 'addLexeme');
249         $addQuotedLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addQuotedLexeme');
250         $addNumberLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addNumberLexeme');
251         $addLexemeCharAction   = new Zend_Search_Lucene_FSMAction($this, 'addLexemeChar');
252
253
254         /** Syntax lexeme */
255         $this->addEntryAction(self::ST_SYNT_LEXEME,  $syntaxLexemeAction);
256         // Two lexemes in succession
257         $this->addTransitionAction(self::ST_SYNT_LEXEME, self::ST_SYNT_LEXEME, $syntaxLexemeAction);
258
259
260         /** Lexeme */
261         $this->addEntryAction(self::ST_LEXEME,                       $addLexemeCharAction);
262         $this->addTransitionAction(self::ST_LEXEME, self::ST_LEXEME, $addLexemeCharAction);
263         // ST_ESCAPED_CHAR => ST_LEXEME transition is covered by ST_LEXEME entry action
264
265         $this->addTransitionAction(self::ST_LEXEME, self::ST_WHITE_SPACE,     $addLexemeAction);
266         $this->addTransitionAction(self::ST_LEXEME, self::ST_SYNT_LEXEME,     $addLexemeAction);
267         $this->addTransitionAction(self::ST_LEXEME, self::ST_QUOTED_LEXEME,   $addLexemeAction);
268         $this->addTransitionAction(self::ST_LEXEME, self::ST_LEXEME_MODIFIER, $addLexemeAction);
269         $this->addTransitionAction(self::ST_LEXEME, self::ST_NUMBER,          $addLexemeAction);
270         $this->addTransitionAction(self::ST_LEXEME, self::ST_MANTISSA,        $addLexemeAction);
271
272
273         /** Quoted lexeme */
274         // We don't need entry action (skeep quote)
275         $this->addTransitionAction(self::ST_QUOTED_LEXEME, self::ST_QUOTED_LEXEME, $addLexemeCharAction);
276         $this->addTransitionAction(self::ST_ESCAPED_QCHAR, self::ST_QUOTED_LEXEME, $addLexemeCharAction);
277         // Closing quote changes state to the ST_WHITE_SPACE   other states are not used
278         $this->addTransitionAction(self::ST_QUOTED_LEXEME, self::ST_WHITE_SPACE,   $addQuotedLexemeAction);
279
280
281         /** Lexeme modifier */
282         $this->addEntryAction(self::ST_LEXEME_MODIFIER, $lexemeModifierAction);
283
284
285         /** Number */
286         $this->addEntryAction(self::ST_NUMBER,                           $addLexemeCharAction);
287         $this->addEntryAction(self::ST_MANTISSA,                         $addLexemeCharAction);
288         $this->addTransitionAction(self::ST_NUMBER,   self::ST_NUMBER,   $addLexemeCharAction);
289         // ST_NUMBER => ST_MANTISSA transition is covered by ST_MANTISSA entry action
290         $this->addTransitionAction(self::ST_MANTISSA, self::ST_MANTISSA, $addLexemeCharAction);
291
292         $this->addTransitionAction(self::ST_NUMBER,   self::ST_WHITE_SPACE,     $addNumberLexemeAction);
293         $this->addTransitionAction(self::ST_NUMBER,   self::ST_SYNT_LEXEME,     $addNumberLexemeAction);
294         $this->addTransitionAction(self::ST_NUMBER,   self::ST_LEXEME_MODIFIER, $addNumberLexemeAction);
295         $this->addTransitionAction(self::ST_MANTISSA, self::ST_WHITE_SPACE,     $addNumberLexemeAction);
296         $this->addTransitionAction(self::ST_MANTISSA, self::ST_SYNT_LEXEME,     $addNumberLexemeAction);
297         $this->addTransitionAction(self::ST_MANTISSA, self::ST_LEXEME_MODIFIER, $addNumberLexemeAction);
298     }
299
300
301
302
303     /**
304      * Translate input char to an input symbol of state machine
305      *
306      * @param string $char
307      * @return integer
308      */
309     private function _translateInput($char)
310     {
311         if        (strpos(self::QUERY_WHITE_SPACE_CHARS,    $char) !== false) { return self::IN_WHITE_SPACE;
312         } else if (strpos(self::QUERY_SYNT_CHARS,           $char) !== false) { return self::IN_SYNT_CHAR;
313         } else if (strpos(self::QUERY_MUTABLE_CHARS,        $char) !== false) { return self::IN_MUTABLE_CHAR;
314         } else if (strpos(self::QUERY_LEXEMEMODIFIER_CHARS, $char) !== false) { return self::IN_LEXEME_MODIFIER;
315         } else if (strpos(self::QUERY_ASCIIDIGITS_CHARS,    $char) !== false) { return self::IN_ASCII_DIGIT;
316         } else if ($char === '"' )                                            { return self::IN_QUOTE;
317         } else if ($char === '.' )                                            { return self::IN_DECIMAL_POINT;
318         } else if ($char === '\\')                                            { return self::IN_ESCAPE_CHAR;
319         } else                                                                { return self::IN_CHAR;
320         }
321     }
322
323
324     /**
325      * This method is used to tokenize query string into lexemes
326      *
327      * @param string $inputString
328      * @param string $encoding
329      * @return array
330      * @throws Zend_Search_Lucene_Search_QueryParserException
331      */
332     public function tokenize($inputString, $encoding)
333     {
334         $this->reset();
335
336         $this->_lexemes     = array();
337         $this->_queryString = array();
338
339         if (PHP_OS == 'AIX' && $encoding == '') {
340             $encoding = 'ISO8859-1';
341         }
342         $strLength = iconv_strlen($inputString, $encoding);
343
344         // Workaround for iconv_substr bug
345         $inputString .= ' ';
346
347         for ($count = 0; $count < $strLength; $count++) {
348             $this->_queryString[$count] = iconv_substr($inputString, $count, 1, $encoding);
349         }
350
351         for ($this->_queryStringPosition = 0;
352              $this->_queryStringPosition < count($this->_queryString);
353              $this->_queryStringPosition++) {
354             $this->process($this->_translateInput($this->_queryString[$this->_queryStringPosition]));
355         }
356
357         $this->process(self::IN_WHITE_SPACE);
358
359         if ($this->getState() != self::ST_WHITE_SPACE) {
360             require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
361             throw new Zend_Search_Lucene_Search_QueryParserException('Unexpected end of query');
362         }
363
364         $this->_queryString = null;
365
366         return $this->_lexemes;
367     }
368
369
370
371     /*********************************************************************
372      * Actions implementation
373      *
374      * Actions affect on recognized lexemes list
375      *********************************************************************/
376
377     /**
378      * Add query syntax lexeme
379      *
380      * @throws Zend_Search_Lucene_Search_QueryParserException
381      */
382     public function addQuerySyntaxLexeme()
383     {
384         $lexeme = $this->_queryString[$this->_queryStringPosition];
385
386         // Process two char lexemes
387         if (strpos(self::QUERY_DOUBLECHARLEXEME_CHARS, $lexeme) !== false) {
388             // increase current position in a query string
389             $this->_queryStringPosition++;
390
391             // check,
392             if ($this->_queryStringPosition == count($this->_queryString)  ||
393                 $this->_queryString[$this->_queryStringPosition] != $lexeme) {
394                     require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
395                     throw new Zend_Search_Lucene_Search_QueryParserException('Two chars lexeme expected. ' . $this->_positionMsg());
396                 }
397
398             // duplicate character
399             $lexeme .= $lexeme;
400         }
401
402         $token = new Zend_Search_Lucene_Search_QueryToken(
403                                 Zend_Search_Lucene_Search_QueryToken::TC_SYNTAX_ELEMENT,
404                                 $lexeme,
405                                 $this->_queryStringPosition);
406
407         // Skip this lexeme if it's a field indicator ':' and treat previous as 'field' instead of 'word'
408         if ($token->type == Zend_Search_Lucene_Search_QueryToken::TT_FIELD_INDICATOR) {
409             $token = array_pop($this->_lexemes);
410             if ($token === null  ||  $token->type != Zend_Search_Lucene_Search_QueryToken::TT_WORD) {
411                 require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
412                 throw new Zend_Search_Lucene_Search_QueryParserException('Field mark \':\' must follow field name. ' . $this->_positionMsg());
413             }
414
415             $token->type = Zend_Search_Lucene_Search_QueryToken::TT_FIELD;
416         }
417
418         $this->_lexemes[] = $token;
419     }
420
421     /**
422      * Add lexeme modifier
423      */
424     public function addLexemeModifier()
425     {
426         $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
427                                     Zend_Search_Lucene_Search_QueryToken::TC_SYNTAX_ELEMENT,
428                                     $this->_queryString[$this->_queryStringPosition],
429                                     $this->_queryStringPosition);
430     }
431
432
433     /**
434      * Add lexeme
435      */
436     public function addLexeme()
437     {
438         $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
439                                     Zend_Search_Lucene_Search_QueryToken::TC_WORD,
440                                     $this->_currentLexeme,
441                                     $this->_queryStringPosition - 1);
442
443         $this->_currentLexeme = '';
444     }
445
446     /**
447      * Add quoted lexeme
448      */
449     public function addQuotedLexeme()
450     {
451         $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
452                                     Zend_Search_Lucene_Search_QueryToken::TC_PHRASE,
453                                     $this->_currentLexeme,
454                                     $this->_queryStringPosition);
455
456         $this->_currentLexeme = '';
457     }
458
459     /**
460      * Add number lexeme
461      */
462     public function addNumberLexeme()
463     {
464         $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
465                                     Zend_Search_Lucene_Search_QueryToken::TC_NUMBER,
466                                     $this->_currentLexeme,
467                                     $this->_queryStringPosition - 1);
468         $this->_currentLexeme = '';
469     }
470
471     /**
472      * Extend lexeme by one char
473      */
474     public function addLexemeChar()
475     {
476         $this->_currentLexeme .= $this->_queryString[$this->_queryStringPosition];
477     }
478
479
480     /**
481      * Position message
482      *
483      * @return string
484      */
485     private function _positionMsg()
486     {
487         return 'Position is ' . $this->_queryStringPosition . '.';
488     }
489
490
491     /*********************************************************************
492      * Syntax errors actions
493      *********************************************************************/
494     public function lexModifierErrException()
495     {
496         require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
497         throw new Zend_Search_Lucene_Search_QueryParserException('Lexeme modifier character can be followed only by number, white space or query syntax element. ' . $this->_positionMsg());
498     }
499     public function quoteWithinLexemeErrException()
500     {
501         require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
502         throw new Zend_Search_Lucene_Search_QueryParserException('Quote within lexeme must be escaped by \'\\\' char. ' . $this->_positionMsg());
503     }
504     public function wrongNumberErrException()
505     {
506         require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
507         throw new Zend_Search_Lucene_Search_QueryParserException('Wrong number syntax.' . $this->_positionMsg());
508     }
509 }
510