import
[web.mtrack] / inc / lib / Zend / Search / Lucene / Search / BooleanExpressionRecognizer.php
1 <?php
2 /**
3  * Zend Framework
4  *
5  * LICENSE
6  *
7  * This source file is subject to the new BSD license that is bundled
8  * with this package in the file LICENSE.txt.
9  * It is also available through the world-wide-web at this URL:
10  * http://framework.zend.com/license/new-bsd
11  * If you did not receive a copy of the license and are unable to
12  * obtain it through the world-wide-web, please send an email
13  * to license@zend.com so we can send you a copy immediately.
14  *
15  * @category   Zend
16  * @package    Zend_Search_Lucene
17  * @subpackage Search
18  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
19  * @license    http://framework.zend.com/license/new-bsd     New BSD License
20  * @version    $Id: BooleanExpressionRecognizer.php 16971 2009-07-22 18:05:45Z mikaelkael $
21  */
22
23
24 /** Zend_Search_Lucene_FSM */
25 require_once 'Zend/Search/Lucene/FSM.php';
26
27 /** Zend_Search_Lucene_Search_QueryToken */
28 require_once 'Zend/Search/Lucene/Search/QueryToken.php';
29
30 /** Zend_Search_Lucene_Search_QueryParser */
31 require_once 'Zend/Search/Lucene/Search/QueryParser.php';
32
33 /**
34  * @category   Zend
35  * @package    Zend_Search_Lucene
36  * @subpackage Search
37  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
38  * @license    http://framework.zend.com/license/new-bsd     New BSD License
39  */
40 class Zend_Search_Lucene_Search_BooleanExpressionRecognizer extends Zend_Search_Lucene_FSM
41 {
42     /** State Machine states */
43     const ST_START           = 0;
44     const ST_LITERAL         = 1;
45     const ST_NOT_OPERATOR    = 2;
46     const ST_AND_OPERATOR    = 3;
47     const ST_OR_OPERATOR     = 4;
48
49     /** Input symbols */
50     const IN_LITERAL         = 0;
51     const IN_NOT_OPERATOR    = 1;
52     const IN_AND_OPERATOR    = 2;
53     const IN_OR_OPERATOR     = 3;
54
55
56     /**
57      * NOT operator signal
58      *
59      * @var boolean
60      */
61     private $_negativeLiteral = false;
62
63     /**
64      * Current literal
65      *
66      * @var mixed
67      */
68     private $_literal;
69
70
71     /**
72      * Set of boolean query conjunctions
73      *
74      * Each conjunction is an array of conjunction elements
75      * Each conjunction element is presented with two-elements array:
76      * array(<literal>, <is_negative>)
77      *
78      * So, it has a structure:
79      * array( array( array(<literal>, <is_negative>), // first literal of first conjuction
80      *               array(<literal>, <is_negative>), // second literal of first conjuction
81      *               ...
82      *               array(<literal>, <is_negative>)
83      *             ), // end of first conjuction
84      *        array( array(<literal>, <is_negative>), // first literal of second conjuction
85      *               array(<literal>, <is_negative>), // second literal of second conjuction
86      *               ...
87      *               array(<literal>, <is_negative>)
88      *             ), // end of second conjuction
89      *        ...
90      *      ) // end of structure
91      *
92      * @var array
93      */
94     private $_conjunctions = array();
95
96     /**
97      * Current conjuction
98      *
99      * @var array
100      */
101     private $_currentConjunction = array();
102
103
104     /**
105      * Object constructor
106      */
107     public function __construct()
108     {
109         parent::__construct( array(self::ST_START,
110                                    self::ST_LITERAL,
111                                    self::ST_NOT_OPERATOR,
112                                    self::ST_AND_OPERATOR,
113                                    self::ST_OR_OPERATOR),
114                              array(self::IN_LITERAL,
115                                    self::IN_NOT_OPERATOR,
116                                    self::IN_AND_OPERATOR,
117                                    self::IN_OR_OPERATOR));
118
119         $emptyOperatorAction    = new Zend_Search_Lucene_FSMAction($this, 'emptyOperatorAction');
120         $emptyNotOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'emptyNotOperatorAction');
121
122         $this->addRules(array( array(self::ST_START,        self::IN_LITERAL,        self::ST_LITERAL),
123                                array(self::ST_START,        self::IN_NOT_OPERATOR,   self::ST_NOT_OPERATOR),
124
125                                array(self::ST_LITERAL,      self::IN_AND_OPERATOR,   self::ST_AND_OPERATOR),
126                                array(self::ST_LITERAL,      self::IN_OR_OPERATOR,    self::ST_OR_OPERATOR),
127                                array(self::ST_LITERAL,      self::IN_LITERAL,        self::ST_LITERAL,      $emptyOperatorAction),
128                                array(self::ST_LITERAL,      self::IN_NOT_OPERATOR,   self::ST_NOT_OPERATOR, $emptyNotOperatorAction),
129
130                                array(self::ST_NOT_OPERATOR, self::IN_LITERAL,        self::ST_LITERAL),
131
132                                array(self::ST_AND_OPERATOR, self::IN_LITERAL,        self::ST_LITERAL),
133                                array(self::ST_AND_OPERATOR, self::IN_NOT_OPERATOR,   self::ST_NOT_OPERATOR),
134
135                                array(self::ST_OR_OPERATOR,  self::IN_LITERAL,        self::ST_LITERAL),
136                                array(self::ST_OR_OPERATOR,  self::IN_NOT_OPERATOR,   self::ST_NOT_OPERATOR),
137                              ));
138
139         $notOperatorAction     = new Zend_Search_Lucene_FSMAction($this, 'notOperatorAction');
140         $orOperatorAction      = new Zend_Search_Lucene_FSMAction($this, 'orOperatorAction');
141         $literalAction         = new Zend_Search_Lucene_FSMAction($this, 'literalAction');
142
143
144         $this->addEntryAction(self::ST_NOT_OPERATOR, $notOperatorAction);
145         $this->addEntryAction(self::ST_OR_OPERATOR,  $orOperatorAction);
146         $this->addEntryAction(self::ST_LITERAL,      $literalAction);
147     }
148
149
150     /**
151      * Process next operator.
152      *
153      * Operators are defined by class constants: IN_AND_OPERATOR, IN_OR_OPERATOR and IN_NOT_OPERATOR
154      *
155      * @param integer $operator
156      */
157     public function processOperator($operator)
158     {
159         $this->process($operator);
160     }
161
162     /**
163      * Process expression literal.
164      *
165      * @param integer $operator
166      */
167     public function processLiteral($literal)
168     {
169         $this->_literal = $literal;
170
171         $this->process(self::IN_LITERAL);
172     }
173
174     /**
175      * Finish an expression and return result
176      *
177      * Result is a set of boolean query conjunctions
178      *
179      * Each conjunction is an array of conjunction elements
180      * Each conjunction element is presented with two-elements array:
181      * array(<literal>, <is_negative>)
182      *
183      * So, it has a structure:
184      * array( array( array(<literal>, <is_negative>), // first literal of first conjuction
185      *               array(<literal>, <is_negative>), // second literal of first conjuction
186      *               ...
187      *               array(<literal>, <is_negative>)
188      *             ), // end of first conjuction
189      *        array( array(<literal>, <is_negative>), // first literal of second conjuction
190      *               array(<literal>, <is_negative>), // second literal of second conjuction
191      *               ...
192      *               array(<literal>, <is_negative>)
193      *             ), // end of second conjuction
194      *        ...
195      *      ) // end of structure
196      *
197      * @return array
198      * @throws Zend_Search_Lucene_Exception
199      */
200     public function finishExpression()
201     {
202         if ($this->getState() != self::ST_LITERAL) {
203             require_once 'Zend/Search/Lucene/Exception.php';
204             throw new Zend_Search_Lucene_Exception('Literal expected.');
205         }
206
207         $this->_conjunctions[] = $this->_currentConjunction;
208
209         return $this->_conjunctions;
210     }
211
212
213
214     /*********************************************************************
215      * Actions implementation
216      *********************************************************************/
217
218     /**
219      * default (omitted) operator processing
220      */
221     public function emptyOperatorAction()
222     {
223         if (Zend_Search_Lucene_Search_QueryParser::getDefaultOperator() == Zend_Search_Lucene_Search_QueryParser::B_AND) {
224             // Do nothing
225         } else {
226             $this->orOperatorAction();
227         }
228
229         // Process literal
230         $this->literalAction();
231     }
232
233     /**
234      * default (omitted) + NOT operator processing
235      */
236     public function emptyNotOperatorAction()
237     {
238         if (Zend_Search_Lucene_Search_QueryParser::getDefaultOperator() == Zend_Search_Lucene_Search_QueryParser::B_AND) {
239             // Do nothing
240         } else {
241             $this->orOperatorAction();
242         }
243
244         // Process NOT operator
245         $this->notOperatorAction();
246     }
247
248
249     /**
250      * NOT operator processing
251      */
252     public function notOperatorAction()
253     {
254         $this->_negativeLiteral = true;
255     }
256
257     /**
258      * OR operator processing
259      * Close current conjunction
260      */
261     public function orOperatorAction()
262     {
263         $this->_conjunctions[]     = $this->_currentConjunction;
264         $this->_currentConjunction = array();
265     }
266
267     /**
268      * Literal processing
269      */
270     public function literalAction()
271     {
272         // Add literal to the current conjunction
273         $this->_currentConjunction[] = array($this->_literal, !$this->_negativeLiteral);
274
275         // Switch off negative signal
276         $this->_negativeLiteral = false;
277     }
278 }