7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
16 * @package Zend_Search_Lucene
17 * @subpackage Document
18 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
19 * @license http://framework.zend.com/license/new-bsd New BSD License
20 * @version $Id: Field.php 16541 2009-07-07 06:59:03Z bkarwin $
25 * A field is a section of a Document. Each field has two parts,
26 * a name and a value. Values may be free text or they may be atomic
27 * keywords, which are not further processed. Such keywords may
28 * be used to represent dates, urls, etc. Fields are optionally
29 * stored in the index, so that they may be returned with hits
33 * @package Zend_Search_Lucene
34 * @subpackage Document
35 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
36 * @license http://framework.zend.com/license/new-bsd New BSD License
38 class Zend_Search_Lucene_Field
55 * Field is to be stored in the index for return with search hits.
59 public $isStored = false;
62 * Field is to be indexed, so that it may be searched on.
66 public $isIndexed = true;
69 * Field should be tokenized as text prior to indexing.
73 public $isTokenized = true;
75 * Field is stored as binary.
79 public $isBinary = false;
82 * Field are stored as a term vector
86 public $storeTermVector = false;
90 * It's not stored directly in the index, but affects on normalization factor
97 * Field value encoding.
106 * @param string $name
107 * @param string $value
108 * @param string $encoding
109 * @param boolean $isStored
110 * @param boolean $isIndexed
111 * @param boolean $isTokenized
112 * @param boolean $isBinary
114 public function __construct($name, $value, $encoding, $isStored, $isIndexed, $isTokenized, $isBinary = false)
117 $this->value = $value;
120 $this->encoding = $encoding;
121 $this->isTokenized = $isTokenized;
123 $this->encoding = '';
124 $this->isTokenized = false;
127 $this->isStored = $isStored;
128 $this->isIndexed = $isIndexed;
129 $this->isBinary = $isBinary;
131 $this->storeTermVector = false;
137 * Constructs a String-valued Field that is not tokenized, but is indexed
138 * and stored. Useful for non-text fields, e.g. date or url.
140 * @param string $name
141 * @param string $value
142 * @param string $encoding
143 * @return Zend_Search_Lucene_Field
145 public static function keyword($name, $value, $encoding = '')
147 return new self($name, $value, $encoding, true, true, false);
152 * Constructs a String-valued Field that is not tokenized nor indexed,
153 * but is stored in the index, for return with hits.
155 * @param string $name
156 * @param string $value
157 * @param string $encoding
158 * @return Zend_Search_Lucene_Field
160 public static function unIndexed($name, $value, $encoding = '')
162 return new self($name, $value, $encoding, true, false, false);
167 * Constructs a Binary String valued Field that is not tokenized nor indexed,
168 * but is stored in the index, for return with hits.
170 * @param string $name
171 * @param string $value
172 * @param string $encoding
173 * @return Zend_Search_Lucene_Field
175 public static function binary($name, $value)
177 return new self($name, $value, '', true, false, false, true);
181 * Constructs a String-valued Field that is tokenized and indexed,
182 * and is stored in the index, for return with hits. Useful for short text
183 * fields, like "title" or "subject". Term vector will not be stored for this field.
185 * @param string $name
186 * @param string $value
187 * @param string $encoding
188 * @return Zend_Search_Lucene_Field
190 public static function text($name, $value, $encoding = '')
192 return new self($name, $value, $encoding, true, true, true);
197 * Constructs a String-valued Field that is tokenized and indexed,
198 * but that is not stored in the index.
200 * @param string $name
201 * @param string $value
202 * @param string $encoding
203 * @return Zend_Search_Lucene_Field
205 public static function unStored($name, $value, $encoding = '')
207 return new self($name, $value, $encoding, false, true, true);
211 * Get field value in UTF-8 encoding
215 public function getUtf8Value()
217 if (strcasecmp($this->encoding, 'utf8' ) == 0 ||
218 strcasecmp($this->encoding, 'utf-8') == 0 ) {
222 return (PHP_OS != 'AIX') ? iconv($this->encoding, 'UTF-8', $this->value) : iconv('ISO8859-1', 'UTF-8', $this->value);