final move of files
[web.mtrack] / Zend / Search / Lucene.php
diff --git a/Zend/Search/Lucene.php b/Zend/Search/Lucene.php
new file mode 100644 (file)
index 0000000..b595993
--- /dev/null
@@ -0,0 +1,1520 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Lucene.php 17164 2009-07-27 03:59:23Z matthew $
+ */
+
+/** Zend_Search_Lucene_Document */
+require_once 'Zend/Search/Lucene/Document.php';
+
+/** Zend_Search_Lucene_Document_Html */
+require_once 'Zend/Search/Lucene/Document/Html.php';
+
+/** Zend_Search_Lucene_Document_Docx */
+require_once 'Zend/Search/Lucene/Document/Docx.php';
+
+/** Zend_Search_Lucene_Document_Pptx */
+require_once 'Zend/Search/Lucene/Document/Pptx.php';
+
+/** Zend_Search_Lucene_Document_Xlsx */
+require_once 'Zend/Search/Lucene/Document/Xlsx.php';
+
+/** Zend_Search_Lucene_Storage_Directory_Filesystem */
+require_once 'Zend/Search/Lucene/Storage/Directory/Filesystem.php';
+
+/** Zend_Search_Lucene_Storage_File_Memory */
+require_once 'Zend/Search/Lucene/Storage/File/Memory.php';
+
+/** Zend_Search_Lucene_Index_Term */
+require_once 'Zend/Search/Lucene/Index/Term.php';
+
+/** Zend_Search_Lucene_Index_TermInfo */
+require_once 'Zend/Search/Lucene/Index/TermInfo.php';
+
+/** Zend_Search_Lucene_Index_SegmentInfo */
+require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
+
+/** Zend_Search_Lucene_Index_FieldInfo */
+require_once 'Zend/Search/Lucene/Index/FieldInfo.php';
+
+/** Zend_Search_Lucene_Index_Writer */
+require_once 'Zend/Search/Lucene/Index/Writer.php';
+
+/** Zend_Search_Lucene_Search_QueryParser */
+require_once 'Zend/Search/Lucene/Search/QueryParser.php';
+
+/** Zend_Search_Lucene_Search_QueryHit */
+require_once 'Zend/Search/Lucene/Search/QueryHit.php';
+
+/** Zend_Search_Lucene_Search_Similarity */
+require_once 'Zend/Search/Lucene/Search/Similarity.php';
+
+/** Zend_Search_Lucene_Index_TermsPriorityQueue */
+require_once 'Zend/Search/Lucene/Index/TermsPriorityQueue.php';
+
+/** Zend_Search_Lucene_TermStreamsPriorityQueue */
+require_once 'Zend/Search/Lucene/TermStreamsPriorityQueue.php';
+
+/** Zend_Search_Lucene_Index_DocsFilter */
+require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
+
+/** Zend_Search_Lucene_LockManager */
+require_once 'Zend/Search/Lucene/LockManager.php';
+
+/** Zend_Search_Lucene_Interface */
+require_once 'Zend/Search/Lucene/Interface.php';
+
+/** Zend_Search_Lucene_Proxy */
+require_once 'Zend/Search/Lucene/Proxy.php';
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
+{
+    /**
+     * Default field name for search
+     *
+     * Null means search through all fields
+     *
+     * @var string
+     */
+    private static $_defaultSearchField = null;
+
+    /**
+     * Result set limit
+     *
+     * 0 means no limit
+     *
+     * @var integer
+     */
+    private static $_resultSetLimit = 0;
+
+    /**
+     * Terms per query limit
+     *
+     * 0 means no limit
+     *
+     * @var integer
+     */
+    private static $_termsPerQueryLimit = 1024;
+
+    /**
+     * File system adapter.
+     *
+     * @var Zend_Search_Lucene_Storage_Directory
+     */
+    private $_directory = null;
+
+    /**
+     * File system adapter closing option
+     *
+     * @var boolean
+     */
+    private $_closeDirOnExit = true;
+
+    /**
+     * Writer for this index, not instantiated unless required.
+     *
+     * @var Zend_Search_Lucene_Index_Writer
+     */
+    private $_writer = null;
+
+    /**
+     * Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index.
+     *
+     * @var array Zend_Search_Lucene_Index_SegmentInfo
+     */
+    private $_segmentInfos = array();
+
+    /**
+     * Number of documents in this index.
+     *
+     * @var integer
+     */
+    private $_docCount = 0;
+
+    /**
+     * Flag for index changes
+     *
+     * @var boolean
+     */
+    private $_hasChanges = false;
+
+
+    /**
+     * Signal, that index is already closed, changes are fixed and resources are cleaned up
+     *
+     * @var boolean
+     */
+    private $_closed = false;
+
+    /**
+     * Number of references to the index object
+     *
+     * @var integer
+     */
+    private $_refCount = 0;
+
+    /**
+     * Current segment generation
+     *
+     * @var integer
+     */
+    private $_generation;
+
+    const FORMAT_PRE_2_1 = 0;
+    const FORMAT_2_1     = 1;
+    const FORMAT_2_3     = 2;
+
+
+    /**
+     * Index format version
+     *
+     * @var integer
+     */
+    private $_formatVersion;
+
+    /**
+     * Create index
+     *
+     * @param mixed $directory
+     * @return Zend_Search_Lucene_Interface
+     */
+    public static function create($directory)
+    {
+        return new Zend_Search_Lucene_Proxy(new Zend_Search_Lucene($directory, true));
+    }
+
+    /**
+     * Open index
+     *
+     * @param mixed $directory
+     * @return Zend_Search_Lucene_Interface
+     */
+    public static function open($directory)
+    {
+        return new Zend_Search_Lucene_Proxy(new Zend_Search_Lucene($directory, false));
+    }
+
+    /** Generation retrieving counter */
+    const GENERATION_RETRIEVE_COUNT = 10;
+
+    /** Pause between generation retrieving attempts in milliseconds */
+    const GENERATION_RETRIEVE_PAUSE = 50;
+
+    /**
+     * Get current generation number
+     *
+     * Returns generation number
+     * 0 means pre-2.1 index format
+     * -1 means there are no segments files.
+     *
+     * @param Zend_Search_Lucene_Storage_Directory $directory
+     * @return integer
+     * @throws Zend_Search_Lucene_Exception
+     */
+    public static function getActualGeneration(Zend_Search_Lucene_Storage_Directory $directory)
+    {
+        /**
+         * Zend_Search_Lucene uses segments.gen file to retrieve current generation number
+         *
+         * Apache Lucene index format documentation mentions this method only as a fallback method
+         *
+         * Nevertheless we use it according to the performance considerations
+         *
+         * @todo check if we can use some modification of Apache Lucene generation determination algorithm
+         *       without performance problems
+         */
+
+        require_once 'Zend/Search/Lucene/Exception.php';
+        try {
+            for ($count = 0; $count < self::GENERATION_RETRIEVE_COUNT; $count++) {
+                // Try to get generation file
+                $genFile = $directory->getFileObject('segments.gen', false);
+
+                $format = $genFile->readInt();
+                if ($format != (int)0xFFFFFFFE) {
+                    throw new Zend_Search_Lucene_Exception('Wrong segments.gen file format');
+                }
+
+                $gen1 = $genFile->readLong();
+                $gen2 = $genFile->readLong();
+
+                if ($gen1 == $gen2) {
+                    return $gen1;
+                }
+
+                usleep(self::GENERATION_RETRIEVE_PAUSE * 1000);
+            }
+
+            // All passes are failed
+            throw new Zend_Search_Lucene_Exception('Index is under processing now');
+        } catch (Zend_Search_Lucene_Exception $e) {
+            if (strpos($e->getMessage(), 'is not readable') !== false) {
+                try {
+                    // Try to open old style segments file
+                    $segmentsFile = $directory->getFileObject('segments', false);
+
+                    // It's pre-2.1 index
+                    return 0;
+                } catch (Zend_Search_Lucene_Exception $e) {
+                    if (strpos($e->getMessage(), 'is not readable') !== false) {
+                        return -1;
+                    } else {
+                        throw $e;
+                    }
+                }
+            } else {
+                throw $e;
+            }
+        }
+
+        return -1;
+    }
+
+    /**
+     * Get segments file name
+     *
+     * @param integer $generation
+     * @return string
+     */
+    public static function getSegmentFileName($generation)
+    {
+        if ($generation == 0) {
+            return 'segments';
+        }
+
+        return 'segments_' . base_convert($generation, 10, 36);
+    }
+
+    /**
+     * Get index format version
+     *
+     * @return integer
+     */
+    public function getFormatVersion()
+    {
+        return $this->_formatVersion;
+    }
+
+    /**
+     * Set index format version.
+     * Index is converted to this format at the nearest upfdate time
+     *
+     * @param int $formatVersion
+     * @throws Zend_Search_Lucene_Exception
+     */
+    public function setFormatVersion($formatVersion)
+    {
+        if ($formatVersion != self::FORMAT_PRE_2_1  &&
+            $formatVersion != self::FORMAT_2_1  &&
+            $formatVersion != self::FORMAT_2_3) {
+            require_once 'Zend/Search/Lucene/Exception.php';
+            throw new Zend_Search_Lucene_Exception('Unsupported index format');
+        }
+
+        $this->_formatVersion = $formatVersion;
+    }
+
+    /**
+     * Read segments file for pre-2.1 Lucene index format
+     *
+     * @throws Zend_Search_Lucene_Exception
+     */
+    private function _readPre21SegmentsFile()
+    {
+        $segmentsFile = $this->_directory->getFileObject('segments');
+
+        $format = $segmentsFile->readInt();
+
+        if ($format != (int)0xFFFFFFFF) {
+            require_once 'Zend/Search/Lucene/Exception.php';
+            throw new Zend_Search_Lucene_Exception('Wrong segments file format');
+        }
+
+        // read version
+        $segmentsFile->readLong();
+
+        // read segment name counter
+        $segmentsFile->readInt();
+
+        $segments = $segmentsFile->readInt();
+
+        $this->_docCount = 0;
+
+        // read segmentInfos
+        for ($count = 0; $count < $segments; $count++) {
+            $segName = $segmentsFile->readString();
+            $segSize = $segmentsFile->readInt();
+            $this->_docCount += $segSize;
+
+            $this->_segmentInfos[$segName] =
+                                new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
+                                                                         $segName,
+                                                                         $segSize);
+        }
+
+        // Use 2.1 as a target version. Index will be reorganized at update time.
+        $this->_formatVersion = self::FORMAT_2_1;
+    }
+
+    /**
+     * Read segments file
+     *
+     * @throws Zend_Search_Lucene_Exception
+     */
+    private function _readSegmentsFile()
+    {
+        $segmentsFile = $this->_directory->getFileObject(self::getSegmentFileName($this->_generation));
+
+        $format = $segmentsFile->readInt();
+
+        if ($format == (int)0xFFFFFFFC) {
+            $this->_formatVersion = self::FORMAT_2_3;
+        } else if ($format == (int)0xFFFFFFFD) {
+            $this->_formatVersion = self::FORMAT_2_1;
+        } else {
+            require_once 'Zend/Search/Lucene/Exception.php';
+            throw new Zend_Search_Lucene_Exception('Unsupported segments file format');
+        }
+
+        // read version
+        $segmentsFile->readLong();
+
+        // read segment name counter
+        $segmentsFile->readInt();
+
+        $segments = $segmentsFile->readInt();
+
+        $this->_docCount = 0;
+
+        // read segmentInfos
+        for ($count = 0; $count < $segments; $count++) {
+            $segName = $segmentsFile->readString();
+            $segSize = $segmentsFile->readInt();
+
+            // 2.1+ specific properties
+            $delGen = $segmentsFile->readLong();
+
+            if ($this->_formatVersion == self::FORMAT_2_3) {
+                $docStoreOffset = $segmentsFile->readInt();
+
+                if ($docStoreOffset != (int)0xFFFFFFFF) {
+                    $docStoreSegment        = $segmentsFile->readString();
+                    $docStoreIsCompoundFile = $segmentsFile->readByte();
+
+                    $docStoreOptions = array('offset'     => $docStoreOffset,
+                                             'segment'    => $docStoreSegment,
+                                             'isCompound' => ($docStoreIsCompoundFile == 1));
+                } else {
+                    $docStoreOptions = null;
+                }
+            } else {
+                $docStoreOptions = null;
+            }
+
+            $hasSingleNormFile = $segmentsFile->readByte();
+            $numField          = $segmentsFile->readInt();
+
+            $normGens = array();
+            if ($numField != (int)0xFFFFFFFF) {
+                for ($count1 = 0; $count1 < $numField; $count1++) {
+                    $normGens[] = $segmentsFile->readLong();
+                }
+
+                require_once 'Zend/Search/Lucene/Exception.php';
+                throw new Zend_Search_Lucene_Exception('Separate norm files are not supported. Optimize index to use it with Zend_Search_Lucene.');
+            }
+
+            $isCompoundByte     = $segmentsFile->readByte();
+
+            if ($isCompoundByte == 0xFF) {
+                // The segment is not a compound file
+                $isCompound = false;
+            } else if ($isCompoundByte == 0x00) {
+                // The status is unknown
+                $isCompound = null;
+            } else if ($isCompoundByte == 0x01) {
+                // The segment is a compound file
+                $isCompound = true;
+            }
+
+            $this->_docCount += $segSize;
+
+            $this->_segmentInfos[$segName] =
+                                new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
+                                                                         $segName,
+                                                                         $segSize,
+                                                                         $delGen,
+                                                                         $docStoreOptions,
+                                                                         $hasSingleNormFile,
+                                                                         $isCompound);
+        }
+    }
+
+    /**
+     * Opens the index.
+     *
+     * IndexReader constructor needs Directory as a parameter. It should be
+     * a string with a path to the index folder or a Directory object.
+     *
+     * @param mixed $directory
+     * @throws Zend_Search_Lucene_Exception
+     */
+    public function __construct($directory = null, $create = false)
+    {
+        if ($directory === null) {
+            require_once 'Zend/Search/Lucene/Exception.php';
+            throw new Zend_Search_Exception('No index directory specified');
+        }
+
+        if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) {
+            $this->_directory      = $directory;
+            $this->_closeDirOnExit = false;
+        } else {
+            $this->_directory      = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory);
+            $this->_closeDirOnExit = true;
+        }
+
+        $this->_segmentInfos = array();
+
+        // Mark index as "under processing" to prevent other processes from premature index cleaning
+        Zend_Search_Lucene_LockManager::obtainReadLock($this->_directory);
+
+        $this->_generation = self::getActualGeneration($this->_directory);
+
+        if ($create) {
+            require_once 'Zend/Search/Lucene/Exception.php';
+            try {
+                Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
+            } catch (Zend_Search_Lucene_Exception $e) {
+                Zend_Search_Lucene_LockManager::releaseReadLock($this->_directory);
+
+                if (strpos($e->getMessage(), 'Can\'t obtain exclusive index lock') === false) {
+                    throw $e;
+                } else {
+                    throw new Zend_Search_Lucene_Exception('Can\'t create index. It\'s under processing now');
+                }
+            }
+
+            if ($this->_generation == -1) {
+                // Directory doesn't contain existing index, start from 1
+                $this->_generation = 1;
+                $nameCounter = 0;
+            } else {
+                // Directory contains existing index
+                $segmentsFile = $this->_directory->getFileObject(self::getSegmentFileName($this->_generation));
+                $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
+
+                $nameCounter = $segmentsFile->readInt();
+                $this->_generation++;
+            }
+
+            Zend_Search_Lucene_Index_Writer::createIndex($this->_directory, $this->_generation, $nameCounter);
+
+            Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
+        }
+
+        if ($this->_generation == -1) {
+            require_once 'Zend/Search/Lucene/Exception.php';
+            throw new Zend_Search_Lucene_Exception('Index doesn\'t exists in the specified directory.');
+        } else if ($this->_generation == 0) {
+            $this->_readPre21SegmentsFile();
+        } else {
+            $this->_readSegmentsFile();
+        }
+    }
+
+    /**
+     * Close current index and free resources
+     */
+    private function _close()
+    {
+        if ($this->_closed) {
+            // index is already closed and resources are cleaned up
+            return;
+        }
+
+        $this->commit();
+
+        // Release "under processing" flag
+        Zend_Search_Lucene_LockManager::releaseReadLock($this->_directory);
+
+        if ($this->_closeDirOnExit) {
+            $this->_directory->close();
+        }
+
+        $this->_directory    = null;
+        $this->_writer       = null;
+        $this->_segmentInfos = null;
+
+        $this->_closed = true;
+    }
+
+    /**
+     * Add reference to the index object
+     *
+     * @internal
+     */
+    public function addReference()
+    {
+        $this->_refCount++;
+    }
+
+    /**
+     * Remove reference from the index object
+     *
+     * When reference count becomes zero, index is closed and resources are cleaned up
+     *
+     * @internal
+     */
+    public function removeReference()
+    {
+        $this->_refCount--;
+
+        if ($this->_refCount == 0) {
+            $this->_close();
+        }
+    }
+
+    /**
+     * Object destructor
+     */
+    public function __destruct()
+    {
+        $this->_close();
+    }
+
+    /**
+     * Returns an instance of Zend_Search_Lucene_Index_Writer for the index
+     *
+     * @return Zend_Search_Lucene_Index_Writer
+     */
+    private function _getIndexWriter()
+    {
+        if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) {
+            $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, $this->_segmentInfos, $this->_formatVersion);
+        }
+
+        return $this->_writer;
+    }
+
+
+    /**
+     * Returns the Zend_Search_Lucene_Storage_Directory instance for this index.
+     *
+     * @return Zend_Search_Lucene_Storage_Directory
+     */
+    public function getDirectory()
+    {
+        return $this->_directory;
+    }
+
+
+    /**
+     * Returns the total number of documents in this index (including deleted documents).
+     *
+     * @return integer
+     */
+    public function count()
+    {
+        return $this->_docCount;
+    }
+
+    /**
+     * Returns one greater than the largest possible document number.
+     * This may be used to, e.g., determine how big to allocate a structure which will have
+     * an element for every document number in an index.
+     *
+     * @return integer
+     */
+    public function maxDoc()
+    {
+        return $this->count();
+    }
+
+    /**
+     * Returns the total number of non-deleted documents in this index.
+     *
+     * @return integer
+     */
+    public function numDocs()
+    {
+        $numDocs = 0;
+
+        foreach ($this->_segmentInfos as $segmentInfo) {
+            $numDocs += $segmentInfo->numDocs();
+        }
+
+        return $numDocs;
+    }
+
+    /**
+     * Checks, that document is deleted
+     *
+     * @param integer $id
+     * @return boolean
+     * @throws Zend_Search_Lucene_Exception    Exception is thrown if $id is out of the range
+     */
+    public function isDeleted($id)
+    {
+        if ($id >= $this->_docCount) {
+            require_once 'Zend/Search/Lucene/Exception.php';
+            throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
+        }
+
+        $segmentStartId = 0;
+        foreach ($this->_segmentInfos as $segmentInfo) {
+            if ($segmentStartId + $segmentInfo->count() > $id) {
+                break;
+            }
+
+            $segmentStartId += $segmentInfo->count();
+        }
+
+        return $segmentInfo->isDeleted($id - $segmentStartId);
+    }
+
+    /**
+     * Set default search field.
+     *
+     * Null means, that search is performed through all fields by default
+     *
+     * Default value is null
+     *
+     * @param string $fieldName
+     */
+    public static function setDefaultSearchField($fieldName)
+    {
+        self::$_defaultSearchField = $fieldName;
+    }
+
+    /**
+     * Get default search field.
+     *
+     * Null means, that search is performed through all fields by default
+     *
+     * @return string
+     */
+    public static function getDefaultSearchField()
+    {
+        return self::$_defaultSearchField;
+    }
+
+    /**
+     * Set result set limit.
+     *
+     * 0 (default) means no limit
+     *
+     * @param integer $limit
+     */
+    public static function setResultSetLimit($limit)
+    {
+        self::$_resultSetLimit = $limit;
+    }
+
+    /**
+     * Get result set limit.
+     *
+     * 0 means no limit
+     *
+     * @return integer
+     */
+    public static function getResultSetLimit()
+    {
+        return self::$_resultSetLimit;
+    }
+
+    /**
+     * Set terms per query limit.
+     *
+     * 0 means no limit
+     *
+     * @param integer $limit
+     */
+    public static function setTermsPerQueryLimit($limit)
+    {
+        self::$_termsPerQueryLimit = $limit;
+    }
+
+    /**
+     * Get result set limit.
+     *
+     * 0 (default) means no limit
+     *
+     * @return integer
+     */
+    public static function getTermsPerQueryLimit()
+    {
+        return self::$_termsPerQueryLimit;
+    }
+
+    /**
+     * Retrieve index maxBufferedDocs option
+     *
+     * maxBufferedDocs is a minimal number of documents required before
+     * the buffered in-memory documents are written into a new Segment
+     *
+     * Default value is 10
+     *
+     * @return integer
+     */
+    public function getMaxBufferedDocs()
+    {
+        return $this->_getIndexWriter()->maxBufferedDocs;
+    }
+
+    /**
+     * Set index maxBufferedDocs option
+     *
+     * maxBufferedDocs is a minimal number of documents required before
+     * the buffered in-memory documents are written into a new Segment
+     *
+     * Default value is 10
+     *
+     * @param integer $maxBufferedDocs
+     */
+    public function setMaxBufferedDocs($maxBufferedDocs)
+    {
+        $this->_getIndexWriter()->maxBufferedDocs = $maxBufferedDocs;
+    }
+
+    /**
+     * Retrieve index maxMergeDocs option
+     *
+     * maxMergeDocs is a largest number of documents ever merged by addDocument().
+     * Small values (e.g., less than 10,000) are best for interactive indexing,
+     * as this limits the length of pauses while indexing to a few seconds.
+     * Larger values are best for batched indexing and speedier searches.
+     *
+     * Default value is PHP_INT_MAX
+     *
+     * @return integer
+     */
+    public function getMaxMergeDocs()
+    {
+        return $this->_getIndexWriter()->maxMergeDocs;
+    }
+
+    /**
+     * Set index maxMergeDocs option
+     *
+     * maxMergeDocs is a largest number of documents ever merged by addDocument().
+     * Small values (e.g., less than 10,000) are best for interactive indexing,
+     * as this limits the length of pauses while indexing to a few seconds.
+     * Larger values are best for batched indexing and speedier searches.
+     *
+     * Default value is PHP_INT_MAX
+     *
+     * @param integer $maxMergeDocs
+     */
+    public function setMaxMergeDocs($maxMergeDocs)
+    {
+        $this->_getIndexWriter()->maxMergeDocs = $maxMergeDocs;
+    }
+
+    /**
+     * Retrieve index mergeFactor option
+     *
+     * mergeFactor determines how often segment indices are merged by addDocument().
+     * With smaller values, less RAM is used while indexing,
+     * and searches on unoptimized indices are faster,
+     * but indexing speed is slower.
+     * With larger values, more RAM is used during indexing,
+     * and while searches on unoptimized indices are slower,
+     * indexing is faster.
+     * Thus larger values (> 10) are best for batch index creation,
+     * and smaller values (< 10) for indices that are interactively maintained.
+     *
+     * Default value is 10
+     *
+     * @return integer
+     */
+    public function getMergeFactor()
+    {
+        return $this->_getIndexWriter()->mergeFactor;
+    }
+
+    /**
+     * Set index mergeFactor option
+     *
+     * mergeFactor determines how often segment indices are merged by addDocument().
+     * With smaller values, less RAM is used while indexing,
+     * and searches on unoptimized indices are faster,
+     * but indexing speed is slower.
+     * With larger values, more RAM is used during indexing,
+     * and while searches on unoptimized indices are slower,
+     * indexing is faster.
+     * Thus larger values (> 10) are best for batch index creation,
+     * and smaller values (< 10) for indices that are interactively maintained.
+     *
+     * Default value is 10
+     *
+     * @param integer $maxMergeDocs
+     */
+    public function setMergeFactor($mergeFactor)
+    {
+        $this->_getIndexWriter()->mergeFactor = $mergeFactor;
+    }
+
+    /**
+     * Performs a query against the index and returns an array
+     * of Zend_Search_Lucene_Search_QueryHit objects.
+     * Input is a string or Zend_Search_Lucene_Search_Query.
+     *
+     * @param mixed $query
+     * @return array Zend_Search_Lucene_Search_QueryHit
+     * @throws Zend_Search_Lucene_Exception
+     */
+    public function find($query)
+    {
+        if (is_string($query)) {
+            $query = Zend_Search_Lucene_Search_QueryParser::parse($query);
+        }
+
+        if (!$query instanceof Zend_Search_Lucene_Search_Query) {
+            require_once 'Zend/Search/Lucene/Exception.php';
+            throw new Zend_Search_Lucene_Exception('Query must be a string or Zend_Search_Lucene_Search_Query object');
+        }
+
+        $this->commit();
+
+        $hits   = array();
+        $scores = array();
+        $ids    = array();
+
+        $query = $query->rewrite($this)->optimize($this);
+
+        $query->execute($this);
+
+        $topScore = 0;
+
+        foreach ($query->matchedDocs() as $id => $num) {
+            $docScore = $query->score($id, $this);
+            if( $docScore != 0 ) {
+                $hit = new Zend_Search_Lucene_Search_QueryHit($this);
+                $hit->id = $id;
+                $hit->score = $docScore;
+
+                $hits[]   = $hit;
+                $ids[]    = $id;
+                $scores[] = $docScore;
+
+                if ($docScore > $topScore) {
+                    $topScore = $docScore;
+                }
+            }
+
+            if (self::$_resultSetLimit != 0  &&  count($hits) >= self::$_resultSetLimit) {
+                break;
+            }
+        }
+
+        if (count($hits) == 0) {
+            // skip sorting, which may cause a error on empty index
+            return array();
+        }
+
+        if ($topScore > 1) {
+            foreach ($hits as $hit) {
+                $hit->score /= $topScore;
+            }
+        }
+
+        if (func_num_args() == 1) {
+            // sort by scores
+            array_multisort($scores, SORT_DESC, SORT_NUMERIC,
+                            $ids,    SORT_ASC,  SORT_NUMERIC,
+                            $hits);
+        } else {
+            // sort by given field names
+
+            $argList    = func_get_args();
+            $fieldNames = $this->getFieldNames();
+            $sortArgs   = array();
+
+            // PHP 5.3 now expects all arguments to array_multisort be passed by
+            // reference; since constants can't be passed by reference, create 
+            // some placeholder variables.
+            $sortReg    = SORT_REGULAR;
+            $sortAsc    = SORT_ASC;
+            $sortNum    = SORT_NUMERIC;
+
+            require_once 'Zend/Search/Lucene/Exception.php';
+            for ($count = 1; $count < count($argList); $count++) {
+                $fieldName = $argList[$count];
+
+                if (!is_string($fieldName)) {
+                    throw new Zend_Search_Lucene_Exception('Field name must be a string.');
+                }
+
+                if (!in_array($fieldName, $fieldNames)) {
+                    throw new Zend_Search_Lucene_Exception('Wrong field name.');
+                }
+
+                $valuesArray = array();
+                foreach ($hits as $hit) {
+                    try {
+                        $value = $hit->getDocument()->getFieldValue($fieldName);
+                    } catch (Zend_Search_Lucene_Exception $e) {
+                        if (strpos($e->getMessage(), 'not found') === false) {
+                            throw $e;
+                        } else {
+                            $value = null;
+                        }
+                    }
+
+                    $valuesArray[] = $value;
+                }
+
+                $sortArgs[] = &$valuesArray;
+
+                if ($count + 1 < count($argList)  &&  is_integer($argList[$count+1])) {
+                    $count++;
+                    $sortArgs[] = &$argList[$count];
+
+                    if ($count + 1 < count($argList)  &&  is_integer($argList[$count+1])) {
+                        $count++;
+                        $sortArgs[] = &$argList[$count];
+                    } else {
+                        if ($argList[$count] == SORT_ASC  || $argList[$count] == SORT_DESC) {
+                            $sortArgs[] = &$sortReg;
+                        } else {
+                            $sortArgs[] = &$sortAsc;
+                        }
+                    }
+                } else {
+                    $sortArgs[] = &$sortAsc;
+                    $sortArgs[] = &$sortReg;
+                }
+            }
+
+            // Sort by id's if values are equal
+            $sortArgs[] = &$ids;
+            $sortArgs[] = &$sortAsc;
+            $sortArgs[] = &$sortNum;
+
+            // Array to be sorted
+            $sortArgs[] = &$hits;
+
+            // Do sort
+            call_user_func_array('array_multisort', $sortArgs);
+        }
+
+        return $hits;
+    }
+
+
+    /**
+     * Returns a list of all unique field names that exist in this index.
+     *
+     * @param boolean $indexed
+     * @return array
+     */
+    public function getFieldNames($indexed = false)
+    {
+        $result = array();
+        foreach( $this->_segmentInfos as $segmentInfo ) {
+            $result = array_merge($result, $segmentInfo->getFields($indexed));
+        }
+        return $result;
+    }
+
+
+    /**
+     * Returns a Zend_Search_Lucene_Document object for the document
+     * number $id in this index.
+     *
+     * @param integer|Zend_Search_Lucene_Search_QueryHit $id
+     * @return Zend_Search_Lucene_Document
+     * @throws Zend_Search_Lucene_Exception    Exception is thrown if $id is out of the range
+     */
+    public function getDocument($id)
+    {
+        if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
+            /* @var $id Zend_Search_Lucene_Search_QueryHit */
+            $id = $id->id;
+        }
+
+        if ($id >= $this->_docCount) {
+            require_once 'Zend/Search/Lucene/Exception.php';
+            throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
+        }
+
+        $segmentStartId = 0;
+        foreach ($this->_segmentInfos as $segmentInfo) {
+            if ($segmentStartId + $segmentInfo->count() > $id) {
+                break;
+            }
+
+            $segmentStartId += $segmentInfo->count();
+        }
+
+        $fdxFile = $segmentInfo->openCompoundFile('.fdx');
+        $fdxFile->seek(($id-$segmentStartId)*8, SEEK_CUR);
+        $fieldValuesPosition = $fdxFile->readLong();
+
+        $fdtFile = $segmentInfo->openCompoundFile('.fdt');
+        $fdtFile->seek($fieldValuesPosition, SEEK_CUR);
+        $fieldCount = $fdtFile->readVInt();
+
+        $doc = new Zend_Search_Lucene_Document();
+        for ($count = 0; $count < $fieldCount; $count++) {
+            $fieldNum = $fdtFile->readVInt();
+            $bits = $fdtFile->readByte();
+
+            $fieldInfo = $segmentInfo->getField($fieldNum);
+
+            if (!($bits & 2)) { // Text data
+                $field = new Zend_Search_Lucene_Field($fieldInfo->name,
+                                                      $fdtFile->readString(),
+                                                      'UTF-8',
+                                                      true,
+                                                      $fieldInfo->isIndexed,
+                                                      $bits & 1 );
+            } else {            // Binary data
+                $field = new Zend_Search_Lucene_Field($fieldInfo->name,
+                                                      $fdtFile->readBinary(),
+                                                      '',
+                                                      true,
+                                                      $fieldInfo->isIndexed,
+                                                      $bits & 1,
+                                                      true );
+            }
+
+            $doc->addField($field);
+        }
+
+        return $doc;
+    }
+
+
+    /**
+     * Returns true if index contain documents with specified term.
+     *
+     * Is used for query optimization.
+     *
+     * @param Zend_Search_Lucene_Index_Term $term
+     * @return boolean
+     */
+    public function hasTerm(Zend_Search_Lucene_Index_Term $term)
+    {
+        foreach ($this->_segmentInfos as $segInfo) {
+            if ($segInfo->getTermInfo($term) instanceof Zend_Search_Lucene_Index_TermInfo) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    /**
+     * Returns IDs of all documents containing term.
+     *
+     * @param Zend_Search_Lucene_Index_Term $term
+     * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
+     * @return array
+     */
+    public function termDocs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
+    {
+        $subResults = array();
+        $segmentStartDocId = 0;
+
+        foreach ($this->_segmentInfos as $segmentInfo) {
+            $subResults[] = $segmentInfo->termDocs($term, $segmentStartDocId, $docsFilter);
+
+            $segmentStartDocId += $segmentInfo->count();
+        }
+
+        if (count($subResults) == 0) {
+            return array();
+        } else if (count($subResults) == 0) {
+            // Index is optimized (only one segment)
+            // Do not perform array reindexing
+            return reset($subResults);
+        } else {
+            $result = call_user_func_array('array_merge', $subResults);
+        }
+
+        return $result;
+    }
+
+    /**
+     * Returns documents filter for all documents containing term.
+     *
+     * It performs the same operation as termDocs, but return result as
+     * Zend_Search_Lucene_Index_DocsFilter object
+     *
+     * @param Zend_Search_Lucene_Index_Term $term
+     * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
+     * @return Zend_Search_Lucene_Index_DocsFilter
+     */
+    public function termDocsFilter(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
+    {
+        $segmentStartDocId = 0;
+        $result = new Zend_Search_Lucene_Index_DocsFilter();
+
+        foreach ($this->_segmentInfos as $segmentInfo) {
+            $subResults[] = $segmentInfo->termDocs($term, $segmentStartDocId, $docsFilter);
+
+            $segmentStartDocId += $segmentInfo->count();
+        }
+
+        if (count($subResults) == 0) {
+            return array();
+        } else if (count($subResults) == 0) {
+            // Index is optimized (only one segment)
+            // Do not perform array reindexing
+            return reset($subResults);
+        } else {
+            $result = call_user_func_array('array_merge', $subResults);
+        }
+
+        return $result;
+    }
+
+
+    /**
+     * Returns an array of all term freqs.
+     * Result array structure: array(docId => freq, ...)
+     *
+     * @param Zend_Search_Lucene_Index_Term $term
+     * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
+     * @return integer
+     */
+    public function termFreqs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
+    {
+        $result = array();
+        $segmentStartDocId = 0;
+        foreach ($this->_segmentInfos as $segmentInfo) {
+            $result += $segmentInfo->termFreqs($term, $segmentStartDocId, $docsFilter);
+
+            $segmentStartDocId += $segmentInfo->count();
+        }
+
+        return $result;
+    }
+
+    /**
+     * Returns an array of all term positions in the documents.
+     * Result array structure: array(docId => array(pos1, pos2, ...), ...)
+     *
+     * @param Zend_Search_Lucene_Index_Term $term
+     * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
+     * @return array
+     */
+    public function termPositions(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
+    {
+        $result = array();
+        $segmentStartDocId = 0;
+        foreach ($this->_segmentInfos as $segmentInfo) {
+            $result += $segmentInfo->termPositions($term, $segmentStartDocId, $docsFilter);
+
+            $segmentStartDocId += $segmentInfo->count();
+        }
+
+        return $result;
+    }
+
+
+    /**
+     * Returns the number of documents in this index containing the $term.
+     *
+     * @param Zend_Search_Lucene_Index_Term $term
+     * @return integer
+     */
+    public function docFreq(Zend_Search_Lucene_Index_Term $term)
+    {
+        $result = 0;
+        foreach ($this->_segmentInfos as $segInfo) {
+            $termInfo = $segInfo->getTermInfo($term);
+            if ($termInfo !== null) {
+                $result += $termInfo->docFreq;
+            }
+        }
+
+        return $result;
+    }
+
+
+    /**
+     * Retrive similarity used by index reader
+     *
+     * @return Zend_Search_Lucene_Search_Similarity
+     */
+    public function getSimilarity()
+    {
+        return Zend_Search_Lucene_Search_Similarity::getDefault();
+    }
+
+
+    /**
+     * Returns a normalization factor for "field, document" pair.
+     *
+     * @param integer $id
+     * @param string $fieldName
+     * @return float
+     */
+    public function norm($id, $fieldName)
+    {
+        if ($id >= $this->_docCount) {
+            return null;
+        }
+
+        $segmentStartId = 0;
+        foreach ($this->_segmentInfos as $segInfo) {
+            if ($segmentStartId + $segInfo->count() > $id) {
+                break;
+            }
+
+            $segmentStartId += $segInfo->count();
+        }
+
+        if ($segInfo->isDeleted($id - $segmentStartId)) {
+            return 0;
+        }
+
+        return $segInfo->norm($id - $segmentStartId, $fieldName);
+    }
+
+    /**
+     * Returns true if any documents have been deleted from this index.
+     *
+     * @return boolean
+     */
+    public function hasDeletions()
+    {
+        foreach ($this->_segmentInfos as $segmentInfo) {
+            if ($segmentInfo->hasDeletions()) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+
+    /**
+     * Deletes a document from the index.
+     * $id is an internal document id
+     *
+     * @param integer|Zend_Search_Lucene_Search_QueryHit $id
+     * @throws Zend_Search_Lucene_Exception
+     */
+    public function delete($id)
+    {
+        if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
+            /* @var $id Zend_Search_Lucene_Search_QueryHit */
+            $id = $id->id;
+        }
+
+        if ($id >= $this->_docCount) {
+            require_once 'Zend/Search/Lucene/Exception.php';
+            throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
+        }
+
+        $segmentStartId = 0;
+        foreach ($this->_segmentInfos as $segmentInfo) {
+            if ($segmentStartId + $segmentInfo->count() > $id) {
+                break;
+            }
+
+            $segmentStartId += $segmentInfo->count();
+        }
+        $segmentInfo->delete($id - $segmentStartId);
+
+        $this->_hasChanges = true;
+    }
+
+
+
+    /**
+     * Adds a document to this index.
+     *
+     * @param Zend_Search_Lucene_Document $document
+     */
+    public function addDocument(Zend_Search_Lucene_Document $document)
+    {
+        $this->_getIndexWriter()->addDocument($document);
+        $this->_docCount++;
+
+        $this->_hasChanges = true;
+    }
+
+
+    /**
+     * Update document counter
+     */
+    private function _updateDocCount()
+    {
+        $this->_docCount = 0;
+        foreach ($this->_segmentInfos as $segInfo) {
+            $this->_docCount += $segInfo->count();
+        }
+    }
+
+    /**
+     * Commit changes resulting from delete() or undeleteAll() operations.
+     *
+     * @todo undeleteAll processing.
+     */
+    public function commit()
+    {
+        if ($this->_hasChanges) {
+            $this->_getIndexWriter()->commit();
+
+            $this->_updateDocCount();
+
+            $this->_hasChanges = false;
+        }
+    }
+
+
+    /**
+     * Optimize index.
+     *
+     * Merges all segments into one
+     */
+    public function optimize()
+    {
+        // Commit changes if any changes have been made
+        $this->commit();
+
+        if (count($this->_segmentInfos) > 1 || $this->hasDeletions()) {
+            $this->_getIndexWriter()->optimize();
+            $this->_updateDocCount();
+        }
+    }
+
+
+    /**
+     * Returns an array of all terms in this index.
+     *
+     * @return array
+     */
+    public function terms()
+    {
+        $result = array();
+
+        $segmentInfoQueue = new Zend_Search_Lucene_Index_TermsPriorityQueue();
+
+        foreach ($this->_segmentInfos as $segmentInfo) {
+            $segmentInfo->resetTermsStream();
+
+            // Skip "empty" segments
+            if ($segmentInfo->currentTerm() !== null) {
+                $segmentInfoQueue->put($segmentInfo);
+            }
+        }
+
+        while (($segmentInfo = $segmentInfoQueue->pop()) !== null) {
+            if ($segmentInfoQueue->top() === null ||
+                $segmentInfoQueue->top()->currentTerm()->key() !=
+                            $segmentInfo->currentTerm()->key()) {
+                // We got new term
+                $result[] = $segmentInfo->currentTerm();
+            }
+
+            if ($segmentInfo->nextTerm() !== null) {
+                // Put segment back into the priority queue
+                $segmentInfoQueue->put($segmentInfo);
+            }
+        }
+
+        return $result;
+    }
+
+
+    /**
+     * Terms stream priority queue object
+     *
+     * @var Zend_Search_Lucene_TermStreamsPriorityQueue
+     */
+    private $_termsStream = null;
+
+    /**
+     * Reset terms stream.
+     */
+    public function resetTermsStream()
+    {
+       if ($this->_termsStream === null) {
+            $this->_termsStream = new Zend_Search_Lucene_TermStreamsPriorityQueue($this->_segmentInfos);
+       } else {
+               $this->_termsStream->resetTermsStream();
+       }
+    }
+
+    /**
+     * Skip terms stream up to specified term preffix.
+     *
+     * Prefix contains fully specified field info and portion of searched term
+     *
+     * @param Zend_Search_Lucene_Index_Term $prefix
+     */
+    public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
+    {
+        $this->_termsStream->skipTo($prefix);
+    }
+
+    /**
+     * Scans terms dictionary and returns next term
+     *
+     * @return Zend_Search_Lucene_Index_Term|null
+     */
+    public function nextTerm()
+    {
+        return $this->_termsStream->nextTerm();
+    }
+
+    /**
+     * Returns term in current position
+     *
+     * @return Zend_Search_Lucene_Index_Term|null
+     */
+    public function currentTerm()
+    {
+        return $this->_termsStream->currentTerm();
+    }
+
+    /**
+     * Close terms stream
+     *
+     * Should be used for resources clean up if stream is not read up to the end
+     */
+    public function closeTermsStream()
+    {
+        $this->_termsStream->closeTermsStream();
+        $this->_termsStream = null;
+    }
+
+
+    /*************************************************************************
+    @todo UNIMPLEMENTED
+    *************************************************************************/
+    /**
+     * Undeletes all documents currently marked as deleted in this index.
+     *
+     * @todo Implementation
+     */
+    public function undeleteAll()
+    {}
+}