7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
16 * @package Zend_Search_Lucene
18 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
19 * @license http://framework.zend.com/license/new-bsd New BSD License
20 * @version $Id: DocumentWriter.php 16541 2009-07-07 06:59:03Z bkarwin $
23 /** Zend_Search_Lucene_Analysis_Analyzer */
24 require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
26 /** Zend_Search_Lucene_Index_SegmentWriter */
27 require_once 'Zend/Search/Lucene/Index/SegmentWriter.php';
31 * @package Zend_Search_Lucene
33 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
34 * @license http://framework.zend.com/license/new-bsd New BSD License
36 class Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter extends Zend_Search_Lucene_Index_SegmentWriter
40 * Array of the Zend_Search_Lucene_Index_Term objects
41 * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
45 protected $_termDictionary;
48 * Documents, which contain the term
57 * @param Zend_Search_Lucene_Storage_Directory $directory
60 public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name)
62 parent::__construct($directory, $name);
64 $this->_termDocs = array();
65 $this->_termDictionary = array();
70 * Adds a document to this segment.
72 * @param Zend_Search_Lucene_Document $document
73 * @throws Zend_Search_Lucene_Exception
75 public function addDocument(Zend_Search_Lucene_Document $document)
77 $storedFields = array();
79 $similarity = Zend_Search_Lucene_Search_Similarity::getDefault();
81 foreach ($document->getFieldNames() as $fieldName) {
82 $field = $document->getField($fieldName);
83 $this->addField($field);
85 if ($field->storeTermVector) {
87 * @todo term vector storing support
89 require_once 'Zend/Search/Lucene/Exception.php';
90 throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.');
93 if ($field->isIndexed) {
94 if ($field->isTokenized) {
95 $analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
96 $analyzer->setInput($field->value, $field->encoding);
100 while (($token = $analyzer->nextToken()) !== null) {
103 $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name);
104 $termKey = $term->key();
106 if (!isset($this->_termDictionary[$termKey])) {
108 $this->_termDictionary[$termKey] = $term;
109 $this->_termDocs[$termKey] = array();
110 $this->_termDocs[$termKey][$this->_docCount] = array();
111 } else if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
112 // Existing term, but new term entry
113 $this->_termDocs[$termKey][$this->_docCount] = array();
115 $position += $token->getPositionIncrement();
116 $this->_termDocs[$termKey][$this->_docCount][] = $position;
119 $docNorms[$field->name] = chr($similarity->encodeNorm( $similarity->lengthNorm($field->name,
124 $term = new Zend_Search_Lucene_Index_Term($field->getUtf8Value(), $field->name);
125 $termKey = $term->key();
127 if (!isset($this->_termDictionary[$termKey])) {
129 $this->_termDictionary[$termKey] = $term;
130 $this->_termDocs[$termKey] = array();
131 $this->_termDocs[$termKey][$this->_docCount] = array();
132 } else if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
133 // Existing term, but new term entry
134 $this->_termDocs[$termKey][$this->_docCount] = array();
136 $this->_termDocs[$termKey][$this->_docCount][] = 0; // position
138 $docNorms[$field->name] = chr($similarity->encodeNorm( $similarity->lengthNorm($field->name, 1)*
144 if ($field->isStored) {
145 $storedFields[] = $field;
150 foreach ($this->_fields as $fieldName => $field) {
151 if (!$field->isIndexed) {
155 if (!isset($this->_norms[$fieldName])) {
156 $this->_norms[$fieldName] = str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )),
160 if (isset($docNorms[$fieldName])){
161 $this->_norms[$fieldName] .= $docNorms[$fieldName];
163 $this->_norms[$fieldName] .= chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) ));
167 $this->addStoredFields($storedFields);
172 * Dump Term Dictionary (.tis) and Term Dictionary Index (.tii) segment files
174 protected function _dumpDictionary()
176 ksort($this->_termDictionary, SORT_STRING);
178 $this->initializeDictionaryFiles();
180 foreach ($this->_termDictionary as $termId => $term) {
181 $this->addTerm($term, $this->_termDocs[$termId]);
184 $this->closeDictionaryFiles();
189 * Close segment, write it to disk and return segment info
191 * @return Zend_Search_Lucene_Index_SegmentInfo
193 public function close()
195 if ($this->_docCount == 0) {
200 $this->_dumpDictionary();
202 $this->_generateCFS();
204 return new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,