import
[web.mtrack] / inc / lib / Zend / Search / Lucene / Index / SegmentWriter / DocumentWriter.php
1 <?php
2 /**
3  * Zend Framework
4  *
5  * LICENSE
6  *
7  * This source file is subject to the new BSD license that is bundled
8  * with this package in the file LICENSE.txt.
9  * It is also available through the world-wide-web at this URL:
10  * http://framework.zend.com/license/new-bsd
11  * If you did not receive a copy of the license and are unable to
12  * obtain it through the world-wide-web, please send an email
13  * to license@zend.com so we can send you a copy immediately.
14  *
15  * @category   Zend
16  * @package    Zend_Search_Lucene
17  * @subpackage Index
18  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
19  * @license    http://framework.zend.com/license/new-bsd     New BSD License
20  * @version    $Id: DocumentWriter.php 16541 2009-07-07 06:59:03Z bkarwin $
21  */
22
23 /** Zend_Search_Lucene_Analysis_Analyzer */
24 require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
25
26 /** Zend_Search_Lucene_Index_SegmentWriter */
27 require_once 'Zend/Search/Lucene/Index/SegmentWriter.php';
28
29 /**
30  * @category   Zend
31  * @package    Zend_Search_Lucene
32  * @subpackage Index
33  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
34  * @license    http://framework.zend.com/license/new-bsd     New BSD License
35  */
36 class Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter extends Zend_Search_Lucene_Index_SegmentWriter
37 {
38     /**
39      * Term Dictionary
40      * Array of the Zend_Search_Lucene_Index_Term objects
41      * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
42      *
43      * @var array
44      */
45     protected $_termDictionary;
46
47     /**
48      * Documents, which contain the term
49      *
50      * @var array
51      */
52     protected $_termDocs;
53
54     /**
55      * Object constructor.
56      *
57      * @param Zend_Search_Lucene_Storage_Directory $directory
58      * @param string $name
59      */
60     public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name)
61     {
62         parent::__construct($directory, $name);
63
64         $this->_termDocs       = array();
65         $this->_termDictionary = array();
66     }
67
68
69     /**
70      * Adds a document to this segment.
71      *
72      * @param Zend_Search_Lucene_Document $document
73      * @throws Zend_Search_Lucene_Exception
74      */
75     public function addDocument(Zend_Search_Lucene_Document $document)
76     {
77         $storedFields = array();
78         $docNorms     = array();
79         $similarity   = Zend_Search_Lucene_Search_Similarity::getDefault();
80
81         foreach ($document->getFieldNames() as $fieldName) {
82             $field = $document->getField($fieldName);
83             $this->addField($field);
84
85             if ($field->storeTermVector) {
86                 /**
87                  * @todo term vector storing support
88                  */
89                 require_once 'Zend/Search/Lucene/Exception.php';
90                 throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.');
91             }
92
93             if ($field->isIndexed) {
94                 if ($field->isTokenized) {
95                     $analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
96                     $analyzer->setInput($field->value, $field->encoding);
97
98                     $position     = 0;
99                     $tokenCounter = 0;
100                     while (($token = $analyzer->nextToken()) !== null) {
101                         $tokenCounter++;
102
103                         $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name);
104                         $termKey = $term->key();
105
106                         if (!isset($this->_termDictionary[$termKey])) {
107                             // New term
108                             $this->_termDictionary[$termKey] = $term;
109                             $this->_termDocs[$termKey] = array();
110                             $this->_termDocs[$termKey][$this->_docCount] = array();
111                         } else if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
112                             // Existing term, but new term entry
113                             $this->_termDocs[$termKey][$this->_docCount] = array();
114                         }
115                         $position += $token->getPositionIncrement();
116                         $this->_termDocs[$termKey][$this->_docCount][] = $position;
117                     }
118
119                     $docNorms[$field->name] = chr($similarity->encodeNorm( $similarity->lengthNorm($field->name,
120                                                                                                    $tokenCounter)*
121                                                                            $document->boost*
122                                                                            $field->boost ));
123                 } else {
124                     $term = new Zend_Search_Lucene_Index_Term($field->getUtf8Value(), $field->name);
125                     $termKey = $term->key();
126
127                     if (!isset($this->_termDictionary[$termKey])) {
128                         // New term
129                         $this->_termDictionary[$termKey] = $term;
130                         $this->_termDocs[$termKey] = array();
131                         $this->_termDocs[$termKey][$this->_docCount] = array();
132                     } else if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
133                         // Existing term, but new term entry
134                         $this->_termDocs[$termKey][$this->_docCount] = array();
135                     }
136                     $this->_termDocs[$termKey][$this->_docCount][] = 0; // position
137
138                     $docNorms[$field->name] = chr($similarity->encodeNorm( $similarity->lengthNorm($field->name, 1)*
139                                                                            $document->boost*
140                                                                            $field->boost ));
141                 }
142             }
143
144             if ($field->isStored) {
145                 $storedFields[] = $field;
146             }
147         }
148
149
150         foreach ($this->_fields as $fieldName => $field) {
151             if (!$field->isIndexed) {
152                 continue;
153             }
154
155             if (!isset($this->_norms[$fieldName])) {
156                 $this->_norms[$fieldName] = str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )),
157                                                        $this->_docCount);
158             }
159
160             if (isset($docNorms[$fieldName])){
161                 $this->_norms[$fieldName] .= $docNorms[$fieldName];
162             } else {
163                 $this->_norms[$fieldName] .= chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) ));
164             }
165         }
166
167         $this->addStoredFields($storedFields);
168     }
169
170
171     /**
172      * Dump Term Dictionary (.tis) and Term Dictionary Index (.tii) segment files
173      */
174     protected function _dumpDictionary()
175     {
176         ksort($this->_termDictionary, SORT_STRING);
177
178         $this->initializeDictionaryFiles();
179
180         foreach ($this->_termDictionary as $termId => $term) {
181             $this->addTerm($term, $this->_termDocs[$termId]);
182         }
183
184         $this->closeDictionaryFiles();
185     }
186
187
188     /**
189      * Close segment, write it to disk and return segment info
190      *
191      * @return Zend_Search_Lucene_Index_SegmentInfo
192      */
193     public function close()
194     {
195         if ($this->_docCount == 0) {
196             return null;
197         }
198
199         $this->_dumpFNM();
200         $this->_dumpDictionary();
201
202         $this->_generateCFS();
203
204         return new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
205                                                         $this->_name,
206                                                         $this->_docCount,
207                                                         -1,
208                                                         null,
209                                                         true,
210                                                         true);
211     }
212
213 }
214