import
[web.mtrack] / inc / lib / Zend / Search / Lucene / Index / SegmentInfo.php
1 <?php
2 /**
3  * Zend Framework
4  *
5  * LICENSE
6  *
7  * This source file is subject to the new BSD license that is bundled
8  * with this package in the file LICENSE.txt.
9  * It is also available through the world-wide-web at this URL:
10  * http://framework.zend.com/license/new-bsd
11  * If you did not receive a copy of the license and are unable to
12  * obtain it through the world-wide-web, please send an email
13  * to license@zend.com so we can send you a copy immediately.
14  *
15  * @category   Zend
16  * @package    Zend_Search_Lucene
17  * @subpackage Index
18  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
19  * @license    http://framework.zend.com/license/new-bsd     New BSD License
20  * @version    $Id: SegmentInfo.php 16541 2009-07-07 06:59:03Z bkarwin $
21  */
22
23 /** Zend_Search_Lucene_Index_DictionaryLoader */
24 require_once 'Zend/Search/Lucene/Index/DictionaryLoader.php';
25
26 /** Zend_Search_Lucene_Index_DocsFilter */
27 require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
28
29 /** Zend_Search_Lucene_Index_TermsStream_Interface */
30 require_once 'Zend/Search/Lucene/Index/TermsStream/Interface.php';
31
32 /**
33  * @category   Zend
34  * @package    Zend_Search_Lucene
35  * @subpackage Index
36  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
37  * @license    http://framework.zend.com/license/new-bsd     New BSD License
38  */
39 class Zend_Search_Lucene_Index_SegmentInfo implements Zend_Search_Lucene_Index_TermsStream_Interface
40 {
41     /**
42      * "Full scan vs fetch" boundary.
43      *
44      * If filter selectivity is less than this value, then full scan is performed
45      * (since term entries fetching has some additional overhead).
46      */
47     const FULL_SCAN_VS_FETCH_BOUNDARY = 5;
48
49     /**
50      * Number of docs in a segment
51      *
52      * @var integer
53      */
54     private $_docCount;
55
56     /**
57      * Segment name
58      *
59      * @var string
60      */
61     private $_name;
62
63     /**
64      * Term Dictionary Index
65      *
66      * Array of arrays (Zend_Search_Lucene_Index_Term objects are represented as arrays because
67      * of performance considerations)
68      * [0] -> $termValue
69      * [1] -> $termFieldNum
70      *
71      * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
72      *
73      * @var array
74      */
75     private $_termDictionary;
76
77     /**
78      * Term Dictionary Index TermInfos
79      *
80      * Array of arrays (Zend_Search_Lucene_Index_TermInfo objects are represented as arrays because
81      * of performance considerations)
82      * [0] -> $docFreq
83      * [1] -> $freqPointer
84      * [2] -> $proxPointer
85      * [3] -> $skipOffset
86      * [4] -> $indexPointer
87      *
88      * @var array
89      */
90     private $_termDictionaryInfos;
91
92     /**
93      * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
94      *
95      * @var array
96      */
97     private $_fields;
98
99     /**
100      * Field positions in a dictionary.
101      * (Term dictionary contains filelds ordered by names)
102      *
103      * @var array
104      */
105     private $_fieldsDicPositions;
106
107
108     /**
109      * Associative array where the key is the file name and the value is data offset
110      * in a compound segment file (.csf).
111      *
112      * @var array
113      */
114     private $_segFiles;
115
116     /**
117      * Associative array where the key is the file name and the value is file size (.csf).
118      *
119      * @var array
120      */
121     private $_segFileSizes;
122
123     /**
124      * Delete file generation number
125      *
126      * -2 means autodetect latest delete generation
127      * -1 means 'there is no delete file'
128      *  0 means pre-2.1 format delete file
129      *  X specifies used delete file
130      *
131      * @var integer
132      */
133     private $_delGen;
134
135     /**
136      * Segment has single norms file
137      *
138      * If true then one .nrm file is used for all fields
139      * Otherwise .fN files are used
140      *
141      * @var boolean
142      */
143     private $_hasSingleNormFile;
144
145     /**
146      * Use compound segment file (*.cfs) to collect all other segment files
147      * (excluding .del files)
148      *
149      * @var boolean
150      */
151     private $_isCompound;
152
153
154     /**
155      * File system adapter.
156      *
157      * @var Zend_Search_Lucene_Storage_Directory_Filesystem
158      */
159     private $_directory;
160
161     /**
162      * Normalization factors.
163      * An array fieldName => normVector
164      * normVector is a binary string.
165      * Each byte corresponds to an indexed document in a segment and
166      * encodes normalization factor (float value, encoded by
167      * Zend_Search_Lucene_Search_Similarity::encodeNorm())
168      *
169      * @var array
170      */
171     private $_norms = array();
172
173     /**
174      * List of deleted documents.
175      * bitset if bitset extension is loaded or array otherwise.
176      *
177      * @var mixed
178      */
179     private $_deleted = null;
180
181     /**
182      * $this->_deleted update flag
183      *
184      * @var boolean
185      */
186     private $_deletedDirty = false;
187
188     /**
189      * True if segment uses shared doc store
190      *
191      * @var boolean
192      */
193     private $_usesSharedDocStore;
194
195     /*
196      * Shared doc store options.
197      * It's an assotiative array with the following items:
198      * - 'offset'     => $docStoreOffset           The starting document in the shared doc store files where this segment's documents begin
199      * - 'segment'    => $docStoreSegment          The name of the segment that has the shared doc store files.
200      * - 'isCompound' => $docStoreIsCompoundFile   True, if compound file format is used for the shared doc store files (.cfx file).
201      */
202     private $_sharedDocStoreOptions;
203
204
205     /**
206      * Zend_Search_Lucene_Index_SegmentInfo constructor
207      *
208      * @param Zend_Search_Lucene_Storage_Directory $directory
209      * @param string     $name
210      * @param integer    $docCount
211      * @param integer    $delGen
212      * @param array|null $docStoreOptions
213      * @param boolean    $hasSingleNormFile
214      * @param boolean    $isCompound
215      */
216     public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name, $docCount, $delGen = 0, $docStoreOptions = null, $hasSingleNormFile = false, $isCompound = null)
217     {
218         $this->_directory = $directory;
219         $this->_name      = $name;
220         $this->_docCount  = $docCount;
221
222         if ($docStoreOptions !== null) {
223             $this->_usesSharedDocStore    = true;
224             $this->_sharedDocStoreOptions = $docStoreOptions;
225
226             if ($docStoreOptions['isCompound']) {
227                 $cfxFile       = $this->_directory->getFileObject($docStoreOptions['segment'] . '.cfx');
228                 $cfxFilesCount = $cfxFile->readVInt();
229
230                 $cfxFiles     = array();
231                 $cfxFileSizes = array();
232
233                 for ($count = 0; $count < $cfxFilesCount; $count++) {
234                     $dataOffset = $cfxFile->readLong();
235                     if ($count != 0) {
236                         $cfxFileSizes[$fileName] = $dataOffset - end($cfxFiles);
237                     }
238                     $fileName            = $cfxFile->readString();
239                     $cfxFiles[$fileName] = $dataOffset;
240                 }
241                 if ($count != 0) {
242                     $cfxFileSizes[$fileName] = $this->_directory->fileLength($docStoreOptions['segment'] . '.cfx') - $dataOffset;
243                 }
244
245                 $this->_sharedDocStoreOptions['files']     = $cfxFiles;
246                 $this->_sharedDocStoreOptions['fileSizes'] = $cfxFileSizes;
247             }
248         }
249
250         $this->_hasSingleNormFile = $hasSingleNormFile;
251         $this->_delGen            = $delGen;
252         $this->_termDictionary    = null;
253
254
255         if ($isCompound !== null) {
256             $this->_isCompound    = $isCompound;
257         } else {
258             // It's a pre-2.1 segment or isCompound is set to 'unknown'
259             // Detect if segment uses compound file
260             require_once 'Zend/Search/Lucene/Exception.php';
261             try {
262                 // Try to open compound file
263                 $this->_directory->getFileObject($name . '.cfs');
264
265                 // Compound file is found
266                 $this->_isCompound = true;
267             } catch (Zend_Search_Lucene_Exception $e) {
268                 if (strpos($e->getMessage(), 'is not readable') !== false) {
269                     // Compound file is not found or is not readable
270                     $this->_isCompound = false;
271                 } else {
272                     throw $e;
273                 }
274             }
275         }
276
277         $this->_segFiles = array();
278         if ($this->_isCompound) {
279             $cfsFile = $this->_directory->getFileObject($name . '.cfs');
280             $segFilesCount = $cfsFile->readVInt();
281
282             for ($count = 0; $count < $segFilesCount; $count++) {
283                 $dataOffset = $cfsFile->readLong();
284                 if ($count != 0) {
285                     $this->_segFileSizes[$fileName] = $dataOffset - end($this->_segFiles);
286                 }
287                 $fileName = $cfsFile->readString();
288                 $this->_segFiles[$fileName] = $dataOffset;
289             }
290             if ($count != 0) {
291                 $this->_segFileSizes[$fileName] = $this->_directory->fileLength($name . '.cfs') - $dataOffset;
292             }
293         }
294
295         $fnmFile = $this->openCompoundFile('.fnm');
296         $fieldsCount = $fnmFile->readVInt();
297         $fieldNames = array();
298         $fieldNums  = array();
299         $this->_fields = array();
300         for ($count=0; $count < $fieldsCount; $count++) {
301             $fieldName = $fnmFile->readString();
302             $fieldBits = $fnmFile->readByte();
303             $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName,
304                                                                             $fieldBits & 0x01 /* field is indexed */,
305                                                                             $count,
306                                                                             $fieldBits & 0x02 /* termvectors are stored */,
307                                                                             $fieldBits & 0x10 /* norms are omitted */,
308                                                                             $fieldBits & 0x20 /* payloads are stored */);
309             if ($fieldBits & 0x10) {
310                 // norms are omitted for the indexed field
311                 $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount);
312             }
313
314             $fieldNums[$count]  = $count;
315             $fieldNames[$count] = $fieldName;
316         }
317         array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums);
318         $this->_fieldsDicPositions = array_flip($fieldNums);
319
320         if ($this->_delGen == -2) {
321                 // SegmentInfo constructor is invoked from index writer
322                 // Autodetect current delete file generation number
323             $this->_delGen = $this->_detectLatestDelGen();
324         }
325
326         // Load deletions
327         $this->_deleted = $this->_loadDelFile();
328     }
329
330     /**
331      * Load detetions file
332      *
333      * Returns bitset or an array depending on bitset extension availability
334      *
335      * @return mixed
336      * @throws Zend_Search_Lucene_Exception
337      */
338     private function _loadDelFile()
339     {
340         if ($this->_delGen == -1) {
341             // There is no delete file for this segment
342             return null;
343         } else if ($this->_delGen == 0) {
344             // It's a segment with pre-2.1 format delete file
345             // Try to load deletions file
346             return $this->_loadPre21DelFile();
347         } else {
348             // It's 2.1+ format deleteions file
349             return $this->_load21DelFile();
350         }
351     }
352
353     /**
354      * Load pre-2.1 detetions file
355      *
356      * Returns bitset or an array depending on bitset extension availability
357      *
358      * @return mixed
359      * @throws Zend_Search_Lucene_Exception
360      */
361     private function _loadPre21DelFile()
362     {
363         require_once 'Zend/Search/Lucene/Exception.php';
364         try {
365             // '.del' files always stored in a separate file
366             // Segment compound is not used
367             $delFile = $this->_directory->getFileObject($this->_name . '.del');
368
369             $byteCount = $delFile->readInt();
370             $byteCount = ceil($byteCount/8);
371             $bitCount  = $delFile->readInt();
372
373             if ($bitCount == 0) {
374                 $delBytes = '';
375             } else {
376                 $delBytes = $delFile->readBytes($byteCount);
377             }
378
379             if (extension_loaded('bitset')) {
380                 return $delBytes;
381             } else {
382                 $deletions = array();
383                 for ($count = 0; $count < $byteCount; $count++) {
384                     $byte = ord($delBytes[$count]);
385                     for ($bit = 0; $bit < 8; $bit++) {
386                         if ($byte & (1<<$bit)) {
387                             $deletions[$count*8 + $bit] = 1;
388                         }
389                     }
390                 }
391
392                 return $deletions;
393             }
394         } catch(Zend_Search_Lucene_Exception $e) {
395             if (strpos($e->getMessage(), 'is not readable') === false) {
396                 throw $e;
397             }
398             // There is no deletion file
399             $this->_delGen = -1;
400
401             return null;
402         }
403     }
404
405     /**
406      * Load 2.1+ format detetions file
407      *
408      * Returns bitset or an array depending on bitset extension availability
409      *
410      * @return mixed
411      */
412     private function _load21DelFile()
413     {
414         $delFile = $this->_directory->getFileObject($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
415
416         $format = $delFile->readInt();
417
418         if ($format == (int)0xFFFFFFFF) {
419             if (extension_loaded('bitset')) {
420                 $deletions = bitset_empty();
421             } else {
422                 $deletions = array();
423             }
424
425             $byteCount = $delFile->readInt();
426             $bitCount  = $delFile->readInt();
427
428             $delFileSize = $this->_directory->fileLength($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
429             $byteNum = 0;
430
431             do {
432                 $dgap = $delFile->readVInt();
433                 $nonZeroByte = $delFile->readByte();
434
435                 $byteNum += $dgap;
436
437
438                 if (extension_loaded('bitset')) {
439                         for ($bit = 0; $bit < 8; $bit++) {
440                             if ($nonZeroByte & (1<<$bit)) {
441                             bitset_incl($deletions, $byteNum*8 + $bit);
442                             }
443                         }
444                     return $deletions;
445                 } else {
446                         for ($bit = 0; $bit < 8; $bit++) {
447                             if ($nonZeroByte & (1<<$bit)) {
448                             $deletions[$byteNum*8 + $bit] = 1;
449                             }
450                         }
451                     return (count($deletions) > 0) ? $deletions : null;
452                 }
453
454             } while ($delFile->tell() < $delFileSize);
455         } else {
456             // $format is actually byte count
457             $byteCount = ceil($format/8);
458             $bitCount  = $delFile->readInt();
459
460             if ($bitCount == 0) {
461                 $delBytes = '';
462             } else {
463                 $delBytes = $delFile->readBytes($byteCount);
464             }
465
466             if (extension_loaded('bitset')) {
467                 return $delBytes;
468             } else {
469                 $deletions = array();
470                 for ($count = 0; $count < $byteCount; $count++) {
471                     $byte = ord($delBytes[$count]);
472                     for ($bit = 0; $bit < 8; $bit++) {
473                         if ($byte & (1<<$bit)) {
474                             $deletions[$count*8 + $bit] = 1;
475                         }
476                     }
477                 }
478
479                 return (count($deletions) > 0) ? $deletions : null;
480             }
481         }
482     }
483
484     /**
485      * Opens index file stoted within compound index file
486      *
487      * @param string $extension
488      * @param boolean $shareHandler
489      * @throws Zend_Search_Lucene_Exception
490      * @return Zend_Search_Lucene_Storage_File
491      */
492     public function openCompoundFile($extension, $shareHandler = true)
493     {
494         if (($extension == '.fdx'  || $extension == '.fdt')  &&  $this->_usesSharedDocStore) {
495             $fdxFName = $this->_sharedDocStoreOptions['segment'] . '.fdx';
496             $fdtFName = $this->_sharedDocStoreOptions['segment'] . '.fdt';
497
498             if (!$this->_sharedDocStoreOptions['isCompound']) {
499                 $fdxFile = $this->_directory->getFileObject($fdxFName, $shareHandler);
500                 $fdxFile->seek($this->_sharedDocStoreOptions['offset']*8, SEEK_CUR);
501
502                 if ($extension == '.fdx') {
503                     // '.fdx' file is requested
504                     return $fdxFile;
505                 } else {
506                     // '.fdt' file is requested
507                     $fdtStartOffset = $fdxFile->readLong();
508
509                     $fdtFile = $this->_directory->getFileObject($fdtFName, $shareHandler);
510                     $fdtFile->seek($fdtStartOffset, SEEK_CUR);
511
512                     return $fdtFile;
513                 }
514             }
515
516             if( !isset($this->_sharedDocStoreOptions['files'][$fdxFName]) ) {
517                 require_once 'Zend/Search/Lucene/Exception.php';
518                 throw new Zend_Search_Lucene_Exception('Shared doc storage segment compound file doesn\'t contain '
519                                        . $fdxFName . ' file.' );
520             }
521             if( !isset($this->_sharedDocStoreOptions['files'][$fdtFName]) ) {
522                 require_once 'Zend/Search/Lucene/Exception.php';
523                 throw new Zend_Search_Lucene_Exception('Shared doc storage segment compound file doesn\'t contain '
524                                        . $fdtFName . ' file.' );
525             }
526
527             // Open shared docstore segment file
528             $cfxFile = $this->_directory->getFileObject($this->_sharedDocStoreOptions['segment'] . '.cfx', $shareHandler);
529             // Seek to the start of '.fdx' file within compound file
530             $cfxFile->seek($this->_sharedDocStoreOptions['files'][$fdxFName]);
531             // Seek to the start of current segment documents section
532             $cfxFile->seek($this->_sharedDocStoreOptions['offset']*8, SEEK_CUR);
533
534             if ($extension == '.fdx') {
535                 // '.fdx' file is requested
536                 return $cfxFile;
537             } else {
538                 // '.fdt' file is requested
539                 $fdtStartOffset = $cfxFile->readLong();
540
541                 // Seek to the start of '.fdt' file within compound file
542                 $cfxFile->seek($this->_sharedDocStoreOptions['files'][$fdtFName]);
543                 // Seek to the start of current segment documents section
544                 $cfxFile->seek($fdtStartOffset, SEEK_CUR);
545
546                 return $fdtFile;
547             }
548         }
549
550         $filename = $this->_name . $extension;
551
552         if (!$this->_isCompound) {
553             return $this->_directory->getFileObject($filename, $shareHandler);
554         }
555
556         if( !isset($this->_segFiles[$filename]) ) {
557             require_once 'Zend/Search/Lucene/Exception.php';
558             throw new Zend_Search_Lucene_Exception('Segment compound file doesn\'t contain '
559                                        . $filename . ' file.' );
560         }
561
562         $file = $this->_directory->getFileObject($this->_name . '.cfs', $shareHandler);
563         $file->seek($this->_segFiles[$filename]);
564         return $file;
565     }
566
567     /**
568      * Get compound file length
569      *
570      * @param string $extension
571      * @return integer
572      */
573     public function compoundFileLength($extension)
574     {
575         if (($extension == '.fdx'  || $extension == '.fdt')  &&  $this->_usesSharedDocStore) {
576             $filename = $this->_sharedDocStoreOptions['segment'] . $extension;
577
578             if (!$this->_sharedDocStoreOptions['isCompound']) {
579                 return $this->_directory->fileLength($filename);
580             }
581
582             if( !isset($this->_sharedDocStoreOptions['fileSizes'][$filename]) ) {
583                 require_once 'Zend/Search/Lucene/Exception.php';
584                 throw new Zend_Search_Lucene_Exception('Shared doc store compound file doesn\'t contain '
585                                            . $filename . ' file.' );
586             }
587
588             return $this->_sharedDocStoreOptions['fileSizes'][$filename];
589         }
590
591
592         $filename = $this->_name . $extension;
593
594         // Try to get common file first
595         if ($this->_directory->fileExists($filename)) {
596             return $this->_directory->fileLength($filename);
597         }
598
599         if( !isset($this->_segFileSizes[$filename]) ) {
600             require_once 'Zend/Search/Lucene/Exception.php';
601             throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '
602                                        . $filename . ' file.' );
603         }
604
605         return $this->_segFileSizes[$filename];
606     }
607
608     /**
609      * Returns field index or -1 if field is not found
610      *
611      * @param string $fieldName
612      * @return integer
613      */
614     public function getFieldNum($fieldName)
615     {
616         foreach( $this->_fields as $field ) {
617             if( $field->name == $fieldName ) {
618                 return $field->number;
619             }
620         }
621
622         return -1;
623     }
624
625     /**
626      * Returns field info for specified field
627      *
628      * @param integer $fieldNum
629      * @return Zend_Search_Lucene_Index_FieldInfo
630      */
631     public function getField($fieldNum)
632     {
633         return $this->_fields[$fieldNum];
634     }
635
636     /**
637      * Returns array of fields.
638      * if $indexed parameter is true, then returns only indexed fields.
639      *
640      * @param boolean $indexed
641      * @return array
642      */
643     public function getFields($indexed = false)
644     {
645         $result = array();
646         foreach( $this->_fields as $field ) {
647             if( (!$indexed) || $field->isIndexed ) {
648                 $result[ $field->name ] = $field->name;
649             }
650         }
651         return $result;
652     }
653
654     /**
655      * Returns array of FieldInfo objects.
656      *
657      * @return array
658      */
659     public function getFieldInfos()
660     {
661         return $this->_fields;
662     }
663
664     /**
665      * Returns actual deletions file generation number.
666      *
667      * @return integer
668      */
669     public function getDelGen()
670     {
671         return $this->_delGen;
672     }
673
674     /**
675      * Returns the total number of documents in this segment (including deleted documents).
676      *
677      * @return integer
678      */
679     public function count()
680     {
681         return $this->_docCount;
682     }
683
684     /**
685      * Returns number of deleted documents.
686      *
687      * @return integer
688      */
689     private function _deletedCount()
690     {
691         if ($this->_deleted === null) {
692             return 0;
693         }
694
695         if (extension_loaded('bitset')) {
696             return count(bitset_to_array($this->_deleted));
697         } else {
698             return count($this->_deleted);
699         }
700     }
701
702     /**
703      * Returns the total number of non-deleted documents in this segment.
704      *
705      * @return integer
706      */
707     public function numDocs()
708     {
709         if ($this->hasDeletions()) {
710             return $this->_docCount - $this->_deletedCount();
711         } else {
712             return $this->_docCount;
713         }
714     }
715
716     /**
717      * Get field position in a fields dictionary
718      *
719      * @param integer $fieldNum
720      * @return integer
721      */
722     private function _getFieldPosition($fieldNum) {
723         // Treat values which are not in a translation table as a 'direct value'
724         return isset($this->_fieldsDicPositions[$fieldNum]) ?
725                            $this->_fieldsDicPositions[$fieldNum] : $fieldNum;
726     }
727
728     /**
729      * Return segment name
730      *
731      * @return string
732      */
733     public function getName()
734     {
735         return $this->_name;
736     }
737
738
739     /**
740      * TermInfo cache
741      *
742      * Size is 1024.
743      * Numbers are used instead of class constants because of performance considerations
744      *
745      * @var array
746      */
747     private $_termInfoCache = array();
748
749     private function _cleanUpTermInfoCache()
750     {
751         // Clean 256 term infos
752         foreach ($this->_termInfoCache as $key => $termInfo) {
753             unset($this->_termInfoCache[$key]);
754
755             // leave 768 last used term infos
756             if (count($this->_termInfoCache) == 768) {
757                 break;
758             }
759         }
760     }
761
762     /**
763      * Load terms dictionary index
764      *
765      * @throws Zend_Search_Lucene_Exception
766      */
767     private function _loadDictionaryIndex()
768     {
769         // Check, if index is already serialized
770         if ($this->_directory->fileExists($this->_name . '.sti')) {
771             // Load serialized dictionary index data
772             $stiFile = $this->_directory->getFileObject($this->_name . '.sti');
773             $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti'));
774
775             // Load dictionary index data
776             if (($unserializedData = @unserialize($stiFileData)) !== false) {
777                 list($this->_termDictionary, $this->_termDictionaryInfos) = $unserializedData;
778                 return;
779             }
780         }
781
782         // Load data from .tii file and generate .sti file
783
784         // Prefetch dictionary index data
785         $tiiFile = $this->openCompoundFile('.tii');
786         $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii'));
787
788         // Load dictionary index data
789         list($this->_termDictionary, $this->_termDictionaryInfos) =
790                     Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData);
791
792         $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos));
793         $stiFile = $this->_directory->createFile($this->_name . '.sti');
794         $stiFile->writeBytes($stiFileData);
795     }
796
797     /**
798      * Scans terms dictionary and returns term info
799      *
800      * @param Zend_Search_Lucene_Index_Term $term
801      * @return Zend_Search_Lucene_Index_TermInfo
802      */
803     public function getTermInfo(Zend_Search_Lucene_Index_Term $term)
804     {
805         $termKey = $term->key();
806         if (isset($this->_termInfoCache[$termKey])) {
807             $termInfo = $this->_termInfoCache[$termKey];
808
809             // Move termInfo to the end of cache
810             unset($this->_termInfoCache[$termKey]);
811             $this->_termInfoCache[$termKey] = $termInfo;
812
813             return $termInfo;
814         }
815
816
817         if ($this->_termDictionary === null) {
818             $this->_loadDictionaryIndex();
819         }
820
821         $searchField = $this->getFieldNum($term->field);
822
823         if ($searchField == -1) {
824             return null;
825         }
826         $searchDicField = $this->_getFieldPosition($searchField);
827
828         // search for appropriate value in dictionary
829         $lowIndex = 0;
830         $highIndex = count($this->_termDictionary)-1;
831         while ($highIndex >= $lowIndex) {
832             // $mid = ($highIndex - $lowIndex)/2;
833             $mid = ($highIndex + $lowIndex) >> 1;
834             $midTerm = $this->_termDictionary[$mid];
835
836             $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */);
837             $delta = $searchDicField - $fieldNum;
838             if ($delta == 0) {
839                 $delta = strcmp($term->text, $midTerm[1] /* text */);
840             }
841
842             if ($delta < 0) {
843                 $highIndex = $mid-1;
844             } elseif ($delta > 0) {
845                 $lowIndex  = $mid+1;
846             } else {
847                 // return $this->_termDictionaryInfos[$mid]; // We got it!
848                 $a = $this->_termDictionaryInfos[$mid];
849                 $termInfo = new Zend_Search_Lucene_Index_TermInfo($a[0], $a[1], $a[2], $a[3], $a[4]);
850
851                 // Put loaded termInfo into cache
852                 $this->_termInfoCache[$termKey] = $termInfo;
853
854                 return $termInfo;
855             }
856         }
857
858         if ($highIndex == -1) {
859             // Term is out of the dictionary range
860             return null;
861         }
862
863         $prevPosition = $highIndex;
864         $prevTerm = $this->_termDictionary[$prevPosition];
865         $prevTermInfo = $this->_termDictionaryInfos[$prevPosition];
866
867         $tisFile = $this->openCompoundFile('.tis');
868         $tiVersion = $tisFile->readInt();
869         if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */  &&
870             $tiVersion != (int)0xFFFFFFFD /* 2.1+ format    */) {
871             require_once 'Zend/Search/Lucene/Exception.php';
872             throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
873         }
874
875         $termCount     = $tisFile->readLong();
876         $indexInterval = $tisFile->readInt();
877         $skipInterval  = $tisFile->readInt();
878         if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
879             $maxSkipLevels = $tisFile->readInt();
880         }
881
882         $tisFile->seek($prevTermInfo[4] /* indexPointer */ - (($tiVersion == (int)0xFFFFFFFD)? 24 : 20) /* header size*/, SEEK_CUR);
883
884         $termValue    = $prevTerm[1] /* text */;
885         $termFieldNum = $prevTerm[0] /* field */;
886         $freqPointer = $prevTermInfo[1] /* freqPointer */;
887         $proxPointer = $prevTermInfo[2] /* proxPointer */;
888         for ($count = $prevPosition*$indexInterval + 1;
889              $count <= $termCount &&
890              ( $this->_getFieldPosition($termFieldNum) < $searchDicField ||
891               ($this->_getFieldPosition($termFieldNum) == $searchDicField &&
892                strcmp($termValue, $term->text) < 0) );
893              $count++) {
894             $termPrefixLength = $tisFile->readVInt();
895             $termSuffix       = $tisFile->readString();
896             $termFieldNum     = $tisFile->readVInt();
897             $termValue        = Zend_Search_Lucene_Index_Term::getPrefix($termValue, $termPrefixLength) . $termSuffix;
898
899             $docFreq      = $tisFile->readVInt();
900             $freqPointer += $tisFile->readVInt();
901             $proxPointer += $tisFile->readVInt();
902             if( $docFreq >= $skipInterval ) {
903                 $skipOffset = $tisFile->readVInt();
904             } else {
905                 $skipOffset = 0;
906             }
907         }
908
909         if ($termFieldNum == $searchField && $termValue == $term->text) {
910             $termInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);
911         } else {
912             $termInfo = null;
913         }
914
915         // Put loaded termInfo into cache
916         $this->_termInfoCache[$termKey] = $termInfo;
917
918         if (count($this->_termInfoCache) == 1024) {
919             $this->_cleanUpTermInfoCache();
920         }
921
922         return $termInfo;
923     }
924
925     /**
926      * Returns IDs of all the documents containing term.
927      *
928      * @param Zend_Search_Lucene_Index_Term $term
929      * @param integer $shift
930      * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
931      * @return array
932      */
933     public function termDocs(Zend_Search_Lucene_Index_Term $term, $shift = 0, $docsFilter = null)
934     {
935         $termInfo = $this->getTermInfo($term);
936
937         if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
938             if ($docsFilter !== null  &&  $docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
939                 $docsFilter->segmentFilters[$this->_name] = array();
940             }
941             return array();
942         }
943
944         $frqFile = $this->openCompoundFile('.frq');
945         $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
946         $docId  = 0;
947         $result = array();
948
949         if ($docsFilter !== null) {
950             if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
951                 require_once 'Zend/Search/Lucene/Exception.php';
952                 throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
953             }
954
955             if (isset($docsFilter->segmentFilters[$this->_name])) {
956                 // Filter already has some data for the current segment
957
958                 // Make short name for the filter (which doesn't need additional dereferencing)
959                 $filter = &$docsFilter->segmentFilters[$this->_name];
960
961                 // Check if filter is not empty
962                 if (count($filter) == 0) {
963                     return array();
964                 }
965
966                 if ($this->_docCount/count($filter) < self::FULL_SCAN_VS_FETCH_BOUNDARY) {
967                     // Perform fetching
968 // ---------------------------------------------------------------
969                     $updatedFilterData = array();
970
971                     for( $count=0; $count < $termInfo->docFreq; $count++ ) {
972                         $docDelta = $frqFile->readVInt();
973                         if( $docDelta % 2 == 1 ) {
974                             $docId += ($docDelta-1)/2;
975                         } else {
976                             $docId += $docDelta/2;
977                             // read freq
978                             $frqFile->readVInt();
979                         }
980
981                         if (isset($filter[$docId])) {
982                            $result[] = $shift + $docId;
983                            $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
984                         }
985                     }
986                     $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
987 // ---------------------------------------------------------------
988                 } else {
989                     // Perform full scan
990                     $updatedFilterData = array();
991
992                     for( $count=0; $count < $termInfo->docFreq; $count++ ) {
993                         $docDelta = $frqFile->readVInt();
994                         if( $docDelta % 2 == 1 ) {
995                             $docId += ($docDelta-1)/2;
996                         } else {
997                             $docId += $docDelta/2;
998                             // read freq
999                             $frqFile->readVInt();
1000                         }
1001
1002                         if (isset($filter[$docId])) {
1003                            $result[] = $shift + $docId;
1004                            $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1005                         }
1006                     }
1007                     $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
1008                 }
1009             } else {
1010                 // Filter is present, but doesn't has data for the current segment yet
1011                 $filterData = array();
1012                 for( $count=0; $count < $termInfo->docFreq; $count++ ) {
1013                     $docDelta = $frqFile->readVInt();
1014                     if( $docDelta % 2 == 1 ) {
1015                         $docId += ($docDelta-1)/2;
1016                     } else {
1017                         $docId += $docDelta/2;
1018                         // read freq
1019                         $frqFile->readVInt();
1020                     }
1021
1022                     $result[] = $shift + $docId;
1023                     $filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1024                 }
1025                 $docsFilter->segmentFilters[$this->_name] = $filterData;
1026             }
1027         } else {
1028             for( $count=0; $count < $termInfo->docFreq; $count++ ) {
1029                 $docDelta = $frqFile->readVInt();
1030                 if( $docDelta % 2 == 1 ) {
1031                     $docId += ($docDelta-1)/2;
1032                 } else {
1033                     $docId += $docDelta/2;
1034                     // read freq
1035                     $frqFile->readVInt();
1036                 }
1037
1038                 $result[] = $shift + $docId;
1039             }
1040         }
1041
1042         return $result;
1043     }
1044
1045     /**
1046      * Returns term freqs array.
1047      * Result array structure: array(docId => freq, ...)
1048      *
1049      * @param Zend_Search_Lucene_Index_Term $term
1050      * @param integer $shift
1051      * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
1052      * @return Zend_Search_Lucene_Index_TermInfo
1053      */
1054     public function termFreqs(Zend_Search_Lucene_Index_Term $term, $shift = 0, $docsFilter = null)
1055     {
1056         $termInfo = $this->getTermInfo($term);
1057
1058         if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
1059             if ($docsFilter !== null  &&  $docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
1060                 $docsFilter->segmentFilters[$this->_name] = array();
1061             }
1062             return array();
1063         }
1064
1065         $frqFile = $this->openCompoundFile('.frq');
1066         $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
1067         $result = array();
1068         $docId = 0;
1069
1070         $result = array();
1071
1072         if ($docsFilter !== null) {
1073             if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
1074                 require_once 'Zend/Search/Lucene/Exception.php';
1075                 throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
1076             }
1077
1078             if (isset($docsFilter->segmentFilters[$this->_name])) {
1079                 // Filter already has some data for the current segment
1080
1081                 // Make short name for the filter (which doesn't need additional dereferencing)
1082                 $filter = &$docsFilter->segmentFilters[$this->_name];
1083
1084                 // Check if filter is not empty
1085                 if (count($filter) == 0) {
1086                     return array();
1087                 }
1088
1089
1090                 if ($this->_docCount/count($filter) < self::FULL_SCAN_VS_FETCH_BOUNDARY) {
1091                     // Perform fetching
1092 // ---------------------------------------------------------------
1093                     $updatedFilterData = array();
1094
1095                     for ($count = 0; $count < $termInfo->docFreq; $count++) {
1096                         $docDelta = $frqFile->readVInt();
1097                         if ($docDelta % 2 == 1) {
1098                             $docId += ($docDelta-1)/2;
1099                             if (isset($filter[$docId])) {
1100                                 $result[$shift + $docId] = 1;
1101                                 $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1102                             }
1103                         } else {
1104                             $docId += $docDelta/2;
1105                             if (isset($filter[$docId])) {
1106                                 $result[$shift + $docId] = $frqFile->readVInt();
1107                                 $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1108                             }
1109                         }
1110                     }
1111                     $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
1112 // ---------------------------------------------------------------
1113                 } else {
1114                     // Perform full scan
1115                     $updatedFilterData = array();
1116
1117                     for ($count = 0; $count < $termInfo->docFreq; $count++) {
1118                         $docDelta = $frqFile->readVInt();
1119                         if ($docDelta % 2 == 1) {
1120                             $docId += ($docDelta-1)/2;
1121                             if (isset($filter[$docId])) {
1122                                 $result[$shift + $docId] = 1;
1123                                 $updatedFilterData[$docId] = 1; // 1 is just some constant value, so we don't need additional var dereference here
1124                             }
1125                         } else {
1126                             $docId += $docDelta/2;
1127                             if (isset($filter[$docId])) {
1128                                 $result[$shift + $docId] = $frqFile->readVInt();
1129                                 $updatedFilterData[$docId] = 1; // 1 is just some constant value, so we don't need additional var dereference here
1130                             }
1131                         }
1132                     }
1133                     $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
1134                 }
1135             } else {
1136                 // Filter doesn't has data for current segment
1137                 $filterData = array();
1138
1139                 for ($count = 0; $count < $termInfo->docFreq; $count++) {
1140                     $docDelta = $frqFile->readVInt();
1141                     if ($docDelta % 2 == 1) {
1142                         $docId += ($docDelta-1)/2;
1143                         $result[$shift + $docId] = 1;
1144                         $filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1145                     } else {
1146                         $docId += $docDelta/2;
1147                         $result[$shift + $docId] = $frqFile->readVInt();
1148                         $filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1149                     }
1150                 }
1151
1152                 $docsFilter->segmentFilters[$this->_name] = $filterData;
1153             }
1154         } else {
1155             for ($count = 0; $count < $termInfo->docFreq; $count++) {
1156                 $docDelta = $frqFile->readVInt();
1157                 if ($docDelta % 2 == 1) {
1158                     $docId += ($docDelta-1)/2;
1159                     $result[$shift + $docId] = 1;
1160                 } else {
1161                     $docId += $docDelta/2;
1162                     $result[$shift + $docId] = $frqFile->readVInt();
1163                 }
1164             }
1165         }
1166
1167         return $result;
1168     }
1169
1170     /**
1171      * Returns term positions array.
1172      * Result array structure: array(docId => array(pos1, pos2, ...), ...)
1173      *
1174      * @param Zend_Search_Lucene_Index_Term $term
1175      * @param integer $shift
1176      * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
1177      * @return Zend_Search_Lucene_Index_TermInfo
1178      */
1179     public function termPositions(Zend_Search_Lucene_Index_Term $term, $shift = 0, $docsFilter = null)
1180     {
1181         $termInfo = $this->getTermInfo($term);
1182
1183         if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
1184             if ($docsFilter !== null  &&  $docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
1185                 $docsFilter->segmentFilters[$this->_name] = array();
1186             }
1187             return array();
1188         }
1189
1190         $frqFile = $this->openCompoundFile('.frq');
1191         $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
1192
1193         $docId = 0;
1194         $freqs = array();
1195
1196
1197         if ($docsFilter !== null) {
1198             if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
1199                 require_once 'Zend/Search/Lucene/Exception.php';
1200                 throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
1201             }
1202
1203             if (isset($docsFilter->segmentFilters[$this->_name])) {
1204                 // Filter already has some data for the current segment
1205
1206                 // Make short name for the filter (which doesn't need additional dereferencing)
1207                 $filter = &$docsFilter->segmentFilters[$this->_name];
1208
1209                 // Check if filter is not empty
1210                 if (count($filter) == 0) {
1211                     return array();
1212                 }
1213
1214                 if ($this->_docCount/count($filter) < self::FULL_SCAN_VS_FETCH_BOUNDARY) {
1215                     // Perform fetching
1216 // ---------------------------------------------------------------
1217                     for ($count = 0; $count < $termInfo->docFreq; $count++) {
1218                         $docDelta = $frqFile->readVInt();
1219                         if ($docDelta % 2 == 1) {
1220                             $docId += ($docDelta-1)/2;
1221                             $freqs[$docId] = 1;
1222                         } else {
1223                             $docId += $docDelta/2;
1224                             $freqs[$docId] = $frqFile->readVInt();
1225                         }
1226                     }
1227
1228                     $updatedFilterData = array();
1229                     $result = array();
1230                     $prxFile = $this->openCompoundFile('.prx');
1231                     $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
1232                     foreach ($freqs as $docId => $freq) {
1233                         $termPosition = 0;
1234                         $positions = array();
1235
1236                         // we have to read .prx file to get right position for next doc
1237                         // even filter doesn't match current document
1238                         for ($count = 0; $count < $freq; $count++ ) {
1239                             $termPosition += $prxFile->readVInt();
1240                             $positions[] = $termPosition;
1241                         }
1242
1243                         // Include into updated filter and into result only if doc is matched by filter
1244                         if (isset($filter[$docId])) {
1245                             $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1246                             $result[$shift + $docId] = $positions;
1247                         }
1248                     }
1249
1250                     $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
1251 // ---------------------------------------------------------------
1252                 } else {
1253                     // Perform full scan
1254                     for ($count = 0; $count < $termInfo->docFreq; $count++) {
1255                         $docDelta = $frqFile->readVInt();
1256                         if ($docDelta % 2 == 1) {
1257                             $docId += ($docDelta-1)/2;
1258                             $freqs[$docId] = 1;
1259                         } else {
1260                             $docId += $docDelta/2;
1261                             $freqs[$docId] = $frqFile->readVInt();
1262                         }
1263                     }
1264
1265                     $updatedFilterData = array();
1266                     $result = array();
1267                     $prxFile = $this->openCompoundFile('.prx');
1268                     $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
1269                     foreach ($freqs as $docId => $freq) {
1270                         $termPosition = 0;
1271                         $positions = array();
1272
1273                         // we have to read .prx file to get right position for next doc
1274                         // even filter doesn't match current document
1275                         for ($count = 0; $count < $freq; $count++ ) {
1276                             $termPosition += $prxFile->readVInt();
1277                             $positions[] = $termPosition;
1278                         }
1279
1280                         // Include into updated filter and into result only if doc is matched by filter
1281                         if (isset($filter[$docId])) {
1282                             $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1283                             $result[$shift + $docId] = $positions;
1284                         }
1285                     }
1286
1287                     $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
1288                 }
1289             } else {
1290                 // Filter doesn't has data for current segment
1291                 for ($count = 0; $count < $termInfo->docFreq; $count++) {
1292                     $docDelta = $frqFile->readVInt();
1293                     if ($docDelta % 2 == 1) {
1294                         $docId += ($docDelta-1)/2;
1295                         $freqs[$docId] = 1;
1296                     } else {
1297                         $docId += $docDelta/2;
1298                         $freqs[$docId] = $frqFile->readVInt();
1299                     }
1300                 }
1301
1302                 $filterData = array();
1303                 $result = array();
1304                 $prxFile = $this->openCompoundFile('.prx');
1305                 $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
1306                 foreach ($freqs as $docId => $freq) {
1307                     $filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1308
1309                     $termPosition = 0;
1310                     $positions = array();
1311
1312                     for ($count = 0; $count < $freq; $count++ ) {
1313                         $termPosition += $prxFile->readVInt();
1314                         $positions[] = $termPosition;
1315                     }
1316
1317                     $result[$shift + $docId] = $positions;
1318                 }
1319
1320                 $docsFilter->segmentFilters[$this->_name] = $filterData;
1321             }
1322         } else {
1323             for ($count = 0; $count < $termInfo->docFreq; $count++) {
1324                 $docDelta = $frqFile->readVInt();
1325                 if ($docDelta % 2 == 1) {
1326                     $docId += ($docDelta-1)/2;
1327                     $freqs[$docId] = 1;
1328                 } else {
1329                     $docId += $docDelta/2;
1330                     $freqs[$docId] = $frqFile->readVInt();
1331                 }
1332             }
1333
1334             $result = array();
1335             $prxFile = $this->openCompoundFile('.prx');
1336             $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
1337             foreach ($freqs as $docId => $freq) {
1338                 $termPosition = 0;
1339                 $positions = array();
1340
1341                 for ($count = 0; $count < $freq; $count++ ) {
1342                     $termPosition += $prxFile->readVInt();
1343                     $positions[] = $termPosition;
1344                 }
1345
1346                 $result[$shift + $docId] = $positions;
1347             }
1348         }
1349
1350         return $result;
1351     }
1352
1353     /**
1354      * Load normalizatin factors from an index file
1355      *
1356      * @param integer $fieldNum
1357      * @throws Zend_Search_Lucene_Exception
1358      */
1359     private function _loadNorm($fieldNum)
1360     {
1361         if ($this->_hasSingleNormFile) {
1362             $normfFile = $this->openCompoundFile('.nrm');
1363
1364             $header              = $normfFile->readBytes(3);
1365             $headerFormatVersion = $normfFile->readByte();
1366
1367             if ($header != 'NRM'  ||  $headerFormatVersion != (int)0xFF) {
1368                 require_once 'Zend/Search/Lucene/Exception.php';
1369                 throw new  Zend_Search_Lucene_Exception('Wrong norms file format.');
1370             }
1371
1372             foreach ($this->_fields as $fNum => $fieldInfo) {
1373                 if ($fieldInfo->isIndexed) {
1374                     $this->_norms[$fNum] = $normfFile->readBytes($this->_docCount);
1375                 }
1376             }
1377         } else {
1378             $fFile = $this->openCompoundFile('.f' . $fieldNum);
1379             $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount);
1380         }
1381     }
1382
1383     /**
1384      * Returns normalization factor for specified documents
1385      *
1386      * @param integer $id
1387      * @param string $fieldName
1388      * @return float
1389      */
1390     public function norm($id, $fieldName)
1391     {
1392         $fieldNum = $this->getFieldNum($fieldName);
1393
1394         if ( !($this->_fields[$fieldNum]->isIndexed) ) {
1395             return null;
1396         }
1397
1398         if (!isset($this->_norms[$fieldNum])) {
1399             $this->_loadNorm($fieldNum);
1400         }
1401
1402         return Zend_Search_Lucene_Search_Similarity::decodeNorm( ord($this->_norms[$fieldNum][$id]) );
1403     }
1404
1405     /**
1406      * Returns norm vector, encoded in a byte string
1407      *
1408      * @param string $fieldName
1409      * @return string
1410      */
1411     public function normVector($fieldName)
1412     {
1413         $fieldNum = $this->getFieldNum($fieldName);
1414
1415         if ($fieldNum == -1  ||  !($this->_fields[$fieldNum]->isIndexed)) {
1416             $similarity = Zend_Search_Lucene_Search_Similarity::getDefault();
1417
1418             return str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )),
1419                               $this->_docCount);
1420         }
1421
1422         if (!isset($this->_norms[$fieldNum])) {
1423             $this->_loadNorm($fieldNum);
1424         }
1425
1426         return $this->_norms[$fieldNum];
1427     }
1428
1429
1430     /**
1431      * Returns true if any documents have been deleted from this index segment.
1432      *
1433      * @return boolean
1434      */
1435     public function hasDeletions()
1436     {
1437         return $this->_deleted !== null;
1438     }
1439
1440
1441     /**
1442      * Returns true if segment has single norms file.
1443      *
1444      * @return boolean
1445      */
1446     public function hasSingleNormFile()
1447     {
1448         return $this->_hasSingleNormFile ? true : false;
1449     }
1450
1451     /**
1452      * Returns true if segment is stored using compound segment file.
1453      *
1454      * @return boolean
1455      */
1456     public function isCompound()
1457     {
1458         return $this->_isCompound;
1459     }
1460
1461     /**
1462      * Deletes a document from the index segment.
1463      * $id is an internal document id
1464      *
1465      * @param integer
1466      */
1467     public function delete($id)
1468     {
1469         $this->_deletedDirty = true;
1470
1471         if (extension_loaded('bitset')) {
1472             if ($this->_deleted === null) {
1473                 $this->_deleted = bitset_empty($id);
1474             }
1475             bitset_incl($this->_deleted, $id);
1476         } else {
1477             if ($this->_deleted === null) {
1478                 $this->_deleted = array();
1479             }
1480
1481             $this->_deleted[$id] = 1;
1482         }
1483     }
1484
1485     /**
1486      * Checks, that document is deleted
1487      *
1488      * @param integer
1489      * @return boolean
1490      */
1491     public function isDeleted($id)
1492     {
1493         if ($this->_deleted === null) {
1494             return false;
1495         }
1496
1497         if (extension_loaded('bitset')) {
1498             return bitset_in($this->_deleted, $id);
1499         } else {
1500             return isset($this->_deleted[$id]);
1501         }
1502     }
1503
1504     /**
1505      * Detect latest delete generation
1506      *
1507      * Is actualy used from writeChanges() method or from the constructor if it's invoked from
1508      * Index writer. In both cases index write lock is already obtained, so we shouldn't care
1509      * about it
1510      *
1511      * @return integer
1512      */
1513     private function _detectLatestDelGen()
1514     {
1515         $delFileList = array();
1516         foreach ($this->_directory->fileList() as $file) {
1517             if ($file == $this->_name . '.del') {
1518                 // Matches <segment_name>.del file name
1519                 $delFileList[] = 0;
1520             } else if (preg_match('/^' . $this->_name . '_([a-zA-Z0-9]+)\.del$/i', $file, $matches)) {
1521                 // Matches <segment_name>_NNN.del file names
1522                 $delFileList[] = (int)base_convert($matches[1], 36, 10);
1523             }
1524         }
1525
1526         if (count($delFileList) == 0) {
1527             // There is no deletions file for current segment in the directory
1528             // Set deletions file generation number to 1
1529             return -1;
1530         } else {
1531             // There are some deletions files for current segment in the directory
1532             // Set deletions file generation number to the highest nuber
1533             return max($delFileList);
1534         }
1535     }
1536
1537     /**
1538      * Write changes if it's necessary.
1539      *
1540      * This method must be invoked only from the Writer _updateSegments() method,
1541      * so index Write lock has to be already obtained.
1542      *
1543      * @internal
1544      * @throws Zend_Search_Lucene_Exceptions
1545      */
1546     public function writeChanges()
1547     {
1548         // Get new generation number
1549         $latestDelGen = $this->_detectLatestDelGen();
1550
1551         if (!$this->_deletedDirty) {
1552                 // There was no deletions by current process
1553
1554             if ($latestDelGen == $this->_delGen) {
1555                 // Delete file hasn't been updated by any concurrent process
1556                 return;
1557             } else if ($latestDelGen > $this->_delGen) {
1558                 // Delete file has been updated by some concurrent process
1559                 // Reload deletions file
1560                 $this->_delGen  = $latestDelGen;
1561                 $this->_deleted = $this->_loadDelFile();
1562
1563                 return;
1564             } else {
1565                 require_once 'Zend/Search/Lucene/Exception.php';
1566                 throw new Zend_Search_Lucene_Exception('Delete file processing workflow is corrupted for the segment \'' . $this->_name . '\'.');
1567             }
1568         }
1569
1570         if ($latestDelGen > $this->_delGen) {
1571                 // Merge current deletions with latest deletions file
1572                 $this->_delGen = $latestDelGen;
1573
1574                 $latestDelete = $this->_loadDelFile();
1575
1576                 if (extension_loaded('bitset')) {
1577                         $this->_deleted = bitset_union($this->_deleted, $latestDelete);
1578                 } else {
1579                         $this->_deleted += $latestDelete;
1580                 }
1581         }
1582
1583         if (extension_loaded('bitset')) {
1584             $delBytes = $this->_deleted;
1585             $bitCount = count(bitset_to_array($delBytes));
1586         } else {
1587             $byteCount = floor($this->_docCount/8)+1;
1588             $delBytes = str_repeat(chr(0), $byteCount);
1589             for ($count = 0; $count < $byteCount; $count++) {
1590                 $byte = 0;
1591                 for ($bit = 0; $bit < 8; $bit++) {
1592                     if (isset($this->_deleted[$count*8 + $bit])) {
1593                         $byte |= (1<<$bit);
1594                     }
1595                 }
1596                 $delBytes[$count] = chr($byte);
1597             }
1598             $bitCount = count($this->_deleted);
1599         }
1600
1601         if ($this->_delGen == -1) {
1602             // Set delete file generation number to 1
1603             $this->_delGen = 1;
1604         } else {
1605             // Increase delete file generation number by 1
1606             $this->_delGen++;
1607         }
1608
1609         $delFile = $this->_directory->createFile($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
1610         $delFile->writeInt($this->_docCount);
1611         $delFile->writeInt($bitCount);
1612         $delFile->writeBytes($delBytes);
1613
1614         $this->_deletedDirty = false;
1615     }
1616
1617
1618     /**
1619      * Term Dictionary File object for stream like terms reading
1620      *
1621      * @var Zend_Search_Lucene_Storage_File
1622      */
1623     private $_tisFile = null;
1624
1625     /**
1626      * Actual offset of the .tis file data
1627      *
1628      * @var integer
1629      */
1630     private $_tisFileOffset;
1631
1632     /**
1633      * Frequencies File object for stream like terms reading
1634      *
1635      * @var Zend_Search_Lucene_Storage_File
1636      */
1637     private $_frqFile = null;
1638
1639     /**
1640      * Actual offset of the .frq file data
1641      *
1642      * @var integer
1643      */
1644     private $_frqFileOffset;
1645
1646     /**
1647      * Positions File object for stream like terms reading
1648      *
1649      * @var Zend_Search_Lucene_Storage_File
1650      */
1651     private $_prxFile = null;
1652
1653     /**
1654      * Actual offset of the .prx file in the compound file
1655      *
1656      * @var integer
1657      */
1658     private $_prxFileOffset;
1659
1660
1661     /**
1662      * Actual number of terms in term stream
1663      *
1664      * @var integer
1665      */
1666     private $_termCount = 0;
1667
1668     /**
1669      * Overall number of terms in term stream
1670      *
1671      * @var integer
1672      */
1673     private $_termNum = 0;
1674
1675     /**
1676      * Segment index interval
1677      *
1678      * @var integer
1679      */
1680     private $_indexInterval;
1681
1682     /**
1683      * Segment skip interval
1684      *
1685      * @var integer
1686      */
1687     private $_skipInterval;
1688
1689     /**
1690      * Last TermInfo in a terms stream
1691      *
1692      * @var Zend_Search_Lucene_Index_TermInfo
1693      */
1694     private $_lastTermInfo = null;
1695
1696     /**
1697      * Last Term in a terms stream
1698      *
1699      * @var Zend_Search_Lucene_Index_Term
1700      */
1701     private $_lastTerm = null;
1702
1703     /**
1704      * Map of the document IDs
1705      * Used to get new docID after removing deleted documents.
1706      * It's not very effective from memory usage point of view,
1707      * but much more faster, then other methods
1708      *
1709      * @var array|null
1710      */
1711     private $_docMap = null;
1712
1713     /**
1714      * An array of all term positions in the documents.
1715      * Array structure: array( docId => array( pos1, pos2, ...), ...)
1716      *
1717      * Is set to null if term positions loading has to be skipped
1718      *
1719      * @var array|null
1720      */
1721     private $_lastTermPositions;
1722
1723
1724     /**
1725      * Terms scan mode
1726      *
1727      * Values:
1728      *
1729      * self::SM_TERMS_ONLY - terms are scanned, no additional info is retrieved
1730      * self::SM_FULL_INFO  - terms are scanned, frequency and position info is retrieved
1731      * self::SM_MERGE_INFO - terms are scanned, frequency and position info is retrieved
1732      *                       document numbers are compacted (shifted if segment has deleted documents)
1733      *
1734      * @var integer
1735      */
1736     private $_termsScanMode;
1737
1738     /** Scan modes */
1739     const SM_TERMS_ONLY = 0;    // terms are scanned, no additional info is retrieved
1740     const SM_FULL_INFO  = 1;    // terms are scanned, frequency and position info is retrieved
1741     const SM_MERGE_INFO = 2;    // terms are scanned, frequency and position info is retrieved
1742                                 // document numbers are compacted (shifted if segment contains deleted documents)
1743
1744     /**
1745      * Reset terms stream
1746      *
1747      * $startId - id for the fist document
1748      * $compact - remove deleted documents
1749      *
1750      * Returns start document id for the next segment
1751      *
1752      * @param integer $startId
1753      * @param integer $mode
1754      * @throws Zend_Search_Lucene_Exception
1755      * @return integer
1756      */
1757     public function resetTermsStream(/** $startId = 0, $mode = self::SM_TERMS_ONLY */)
1758     {
1759         /**
1760          * SegmentInfo->resetTermsStream() method actually takes two optional parameters:
1761          *   $startId (default value is 0)
1762          *   $mode (default value is self::SM_TERMS_ONLY)
1763          */
1764         $argList = func_get_args();
1765         if (count($argList) > 2) {
1766             require_once 'Zend/Search/Lucene/Exception.php';
1767             throw new Zend_Search_Lucene_Exception('Wrong number of arguments');
1768         } else if (count($argList) == 2) {
1769                 $startId = $argList[0];
1770                 $mode    = $argList[1];
1771         } else if (count($argList) == 1) {
1772             $startId = $argList[0];
1773             $mode    = self::SM_TERMS_ONLY;
1774         } else {
1775             $startId = 0;
1776             $mode    = self::SM_TERMS_ONLY;
1777         }
1778
1779         if ($this->_tisFile !== null) {
1780             $this->_tisFile = null;
1781         }
1782
1783         $this->_tisFile = $this->openCompoundFile('.tis', false);
1784         $this->_tisFileOffset = $this->_tisFile->tell();
1785
1786         $tiVersion = $this->_tisFile->readInt();
1787         if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */  &&
1788             $tiVersion != (int)0xFFFFFFFD /* 2.1+ format    */) {
1789             require_once 'Zend/Search/Lucene/Exception.php';
1790             throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
1791         }
1792
1793         $this->_termCount     =
1794               $this->_termNum = $this->_tisFile->readLong(); // Read terms count
1795         $this->_indexInterval = $this->_tisFile->readInt();  // Read Index interval
1796         $this->_skipInterval  = $this->_tisFile->readInt();  // Read skip interval
1797         if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
1798             $maxSkipLevels = $this->_tisFile->readInt();
1799         }
1800
1801         if ($this->_frqFile !== null) {
1802             $this->_frqFile = null;
1803         }
1804         if ($this->_prxFile !== null) {
1805             $this->_prxFile = null;
1806         }
1807         $this->_docMap = array();
1808
1809         $this->_lastTerm          = new Zend_Search_Lucene_Index_Term('', -1);
1810         $this->_lastTermInfo      = new Zend_Search_Lucene_Index_TermInfo(0, 0, 0, 0);
1811         $this->_lastTermPositions = null;
1812
1813         $this->_termsScanMode = $mode;
1814
1815         switch ($mode) {
1816             case self::SM_TERMS_ONLY:
1817                 // Do nothing
1818                 break;
1819
1820             case self::SM_FULL_INFO:
1821                 // break intentionally omitted
1822             case self::SM_MERGE_INFO:
1823                 $this->_frqFile = $this->openCompoundFile('.frq', false);
1824                 $this->_frqFileOffset = $this->_frqFile->tell();
1825
1826                 $this->_prxFile = $this->openCompoundFile('.prx', false);
1827                 $this->_prxFileOffset = $this->_prxFile->tell();
1828
1829                 for ($count = 0; $count < $this->_docCount; $count++) {
1830                     if (!$this->isDeleted($count)) {
1831                         $this->_docMap[$count] = $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $count);
1832                     }
1833                 }
1834                 break;
1835
1836             default:
1837                 require_once 'Zend/Search/Lucene/Exception.php';
1838                 throw new Zend_Search_Lucene_Exception('Wrong terms scaning mode specified.');
1839                 break;
1840         }
1841
1842
1843         $this->nextTerm();
1844         return $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $this->_docCount);
1845     }
1846
1847
1848     /**
1849      * Skip terms stream up to specified term preffix.
1850      *
1851      * Prefix contains fully specified field info and portion of searched term
1852      *
1853      * @param Zend_Search_Lucene_Index_Term $prefix
1854      * @throws Zend_Search_Lucene_Exception
1855      */
1856     public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
1857     {
1858         if ($this->_termDictionary === null) {
1859             $this->_loadDictionaryIndex();
1860         }
1861
1862         $searchField = $this->getFieldNum($prefix->field);
1863
1864         if ($searchField == -1) {
1865             /**
1866              * Field is not presented in this segment
1867              * Go to the end of dictionary
1868              */
1869             $this->_tisFile = null;
1870             $this->_frqFile = null;
1871             $this->_prxFile = null;
1872
1873             $this->_lastTerm          = null;
1874             $this->_lastTermInfo      = null;
1875             $this->_lastTermPositions = null;
1876
1877             return;
1878         }
1879         $searchDicField = $this->_getFieldPosition($searchField);
1880
1881         // search for appropriate value in dictionary
1882         $lowIndex = 0;
1883         $highIndex = count($this->_termDictionary)-1;
1884         while ($highIndex >= $lowIndex) {
1885             // $mid = ($highIndex - $lowIndex)/2;
1886             $mid = ($highIndex + $lowIndex) >> 1;
1887             $midTerm = $this->_termDictionary[$mid];
1888
1889             $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */);
1890             $delta = $searchDicField - $fieldNum;
1891             if ($delta == 0) {
1892                 $delta = strcmp($prefix->text, $midTerm[1] /* text */);
1893             }
1894
1895             if ($delta < 0) {
1896                 $highIndex = $mid-1;
1897             } elseif ($delta > 0) {
1898                 $lowIndex  = $mid+1;
1899             } else {
1900                 // We have reached term we are looking for
1901                 break;
1902             }
1903         }
1904
1905         if ($highIndex == -1) {
1906             // Term is out of the dictionary range
1907             $this->_tisFile = null;
1908             $this->_frqFile = null;
1909             $this->_prxFile = null;
1910
1911             $this->_lastTerm          = null;
1912             $this->_lastTermInfo      = null;
1913             $this->_lastTermPositions = null;
1914
1915             return;
1916         }
1917
1918         $prevPosition = $highIndex;
1919         $prevTerm = $this->_termDictionary[$prevPosition];
1920         $prevTermInfo = $this->_termDictionaryInfos[$prevPosition];
1921
1922         if ($this->_tisFile === null) {
1923             // The end of terms stream is reached and terms dictionary file is closed
1924             // Perform mini-reset operation
1925             $this->_tisFile = $this->openCompoundFile('.tis', false);
1926
1927             if ($this->_termsScanMode == self::SM_FULL_INFO  ||  $this->_termsScanMode == self::SM_MERGE_INFO) {
1928                 $this->_frqFile = $this->openCompoundFile('.frq', false);
1929                 $this->_prxFile = $this->openCompoundFile('.prx', false);
1930             }
1931         }
1932         $this->_tisFile->seek($this->_tisFileOffset + $prevTermInfo[4], SEEK_SET);
1933
1934         $this->_lastTerm     = new Zend_Search_Lucene_Index_Term($prevTerm[1] /* text */,
1935                                                                  ($prevTerm[0] == -1) ? '' : $this->_fields[$prevTerm[0] /* field */]->name);
1936         $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($prevTermInfo[0] /* docFreq */,
1937                                                                      $prevTermInfo[1] /* freqPointer */,
1938                                                                      $prevTermInfo[2] /* proxPointer */,
1939                                                                      $prevTermInfo[3] /* skipOffset */);
1940         $this->_termCount  =  $this->_termNum - $prevPosition*$this->_indexInterval;
1941
1942         if ($highIndex == 0) {
1943             // skip start entry
1944             $this->nextTerm();
1945         } else if ($prefix->field == $this->_lastTerm->field  &&  $prefix->text  == $this->_lastTerm->text) {
1946             // We got exact match in the dictionary index
1947
1948             if ($this->_termsScanMode == self::SM_FULL_INFO  ||  $this->_termsScanMode == self::SM_MERGE_INFO) {
1949                 $this->_lastTermPositions = array();
1950
1951                 $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);
1952                 $freqs = array();   $docId = 0;
1953                 for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {
1954                     $docDelta = $this->_frqFile->readVInt();
1955                     if( $docDelta % 2 == 1 ) {
1956                         $docId += ($docDelta-1)/2;
1957                         $freqs[ $docId ] = 1;
1958                     } else {
1959                         $docId += $docDelta/2;
1960                         $freqs[ $docId ] = $this->_frqFile->readVInt();
1961                     }
1962                 }
1963
1964                 $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);
1965                 foreach ($freqs as $docId => $freq) {
1966                     $termPosition = 0;  $positions = array();
1967
1968                     for ($count = 0; $count < $freq; $count++ ) {
1969                         $termPosition += $this->_prxFile->readVInt();
1970                         $positions[] = $termPosition;
1971                     }
1972
1973                     if (isset($this->_docMap[$docId])) {
1974                         $this->_lastTermPositions[$this->_docMap[$docId]] = $positions;
1975                     }
1976                 }
1977             }
1978
1979             return;
1980         }
1981
1982         // Search term matching specified prefix
1983         while ($this->_lastTerm !== null) {
1984             if ( strcmp($this->_lastTerm->field, $prefix->field) > 0  ||
1985                  ($prefix->field == $this->_lastTerm->field  &&  strcmp($this->_lastTerm->text, $prefix->text) >= 0) ) {
1986                     // Current term matches or greate than the pattern
1987                     return;
1988             }
1989
1990             $this->nextTerm();
1991         }
1992     }
1993
1994
1995     /**
1996      * Scans terms dictionary and returns next term
1997      *
1998      * @return Zend_Search_Lucene_Index_Term|null
1999      */
2000     public function nextTerm()
2001     {
2002         if ($this->_tisFile === null  ||  $this->_termCount == 0) {
2003             $this->_lastTerm          = null;
2004             $this->_lastTermInfo      = null;
2005             $this->_lastTermPositions = null;
2006             $this->_docMap            = null;
2007
2008             // may be necessary for "empty" segment
2009             $this->_tisFile = null;
2010             $this->_frqFile = null;
2011             $this->_prxFile = null;
2012
2013             return null;
2014         }
2015
2016         $termPrefixLength = $this->_tisFile->readVInt();
2017         $termSuffix       = $this->_tisFile->readString();
2018         $termFieldNum     = $this->_tisFile->readVInt();
2019         $termValue        = Zend_Search_Lucene_Index_Term::getPrefix($this->_lastTerm->text, $termPrefixLength) . $termSuffix;
2020
2021         $this->_lastTerm = new Zend_Search_Lucene_Index_Term($termValue, $this->_fields[$termFieldNum]->name);
2022
2023         $docFreq     = $this->_tisFile->readVInt();
2024         $freqPointer = $this->_lastTermInfo->freqPointer + $this->_tisFile->readVInt();
2025         $proxPointer = $this->_lastTermInfo->proxPointer + $this->_tisFile->readVInt();
2026         if ($docFreq >= $this->_skipInterval) {
2027             $skipOffset = $this->_tisFile->readVInt();
2028         } else {
2029             $skipOffset = 0;
2030         }
2031
2032         $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);
2033
2034
2035         if ($this->_termsScanMode == self::SM_FULL_INFO  ||  $this->_termsScanMode == self::SM_MERGE_INFO) {
2036             $this->_lastTermPositions = array();
2037
2038             $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);
2039             $freqs = array();   $docId = 0;
2040             for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {
2041                 $docDelta = $this->_frqFile->readVInt();
2042                 if( $docDelta % 2 == 1 ) {
2043                     $docId += ($docDelta-1)/2;
2044                     $freqs[ $docId ] = 1;
2045                 } else {
2046                     $docId += $docDelta/2;
2047                     $freqs[ $docId ] = $this->_frqFile->readVInt();
2048                 }
2049             }
2050
2051             $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);
2052             foreach ($freqs as $docId => $freq) {
2053                 $termPosition = 0;  $positions = array();
2054
2055                 for ($count = 0; $count < $freq; $count++ ) {
2056                     $termPosition += $this->_prxFile->readVInt();
2057                     $positions[] = $termPosition;
2058                 }
2059
2060                 if (isset($this->_docMap[$docId])) {
2061                     $this->_lastTermPositions[$this->_docMap[$docId]] = $positions;
2062                 }
2063             }
2064         }
2065
2066         $this->_termCount--;
2067         if ($this->_termCount == 0) {
2068             $this->_tisFile = null;
2069             $this->_frqFile = null;
2070             $this->_prxFile = null;
2071         }
2072
2073         return $this->_lastTerm;
2074     }
2075
2076     /**
2077      * Close terms stream
2078      *
2079      * Should be used for resources clean up if stream is not read up to the end
2080      */
2081     public function closeTermsStream()
2082     {
2083         $this->_tisFile = null;
2084         $this->_frqFile = null;
2085         $this->_prxFile = null;
2086
2087         $this->_lastTerm          = null;
2088         $this->_lastTermInfo      = null;
2089         $this->_lastTermPositions = null;
2090
2091         $this->_docMap            = null;
2092     }
2093
2094
2095     /**
2096      * Returns term in current position
2097      *
2098      * @return Zend_Search_Lucene_Index_Term|null
2099      */
2100     public function currentTerm()
2101     {
2102         return $this->_lastTerm;
2103     }
2104
2105
2106     /**
2107      * Returns an array of all term positions in the documents.
2108      * Return array structure: array( docId => array( pos1, pos2, ...), ...)
2109      *
2110      * @return array
2111      */
2112     public function currentTermPositions()
2113     {
2114         return $this->_lastTermPositions;
2115     }
2116 }
2117