7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
16 * @package Zend_Search_Lucene
18 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
19 * @license http://framework.zend.com/license/new-bsd New BSD License
20 * @version $Id: Term.php 16541 2009-07-07 06:59:03Z bkarwin $
25 * A Term represents a word from text. This is the unit of search. It is
26 * composed of two elements, the text of the word, as a string, and the name of
27 * the field that the text occured in, an interned string.
29 * Note that terms may represent more than words from text fields, but also
30 * things like dates, email addresses, urls, etc.
33 * @package Zend_Search_Lucene
35 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
36 * @license http://framework.zend.com/license/new-bsd New BSD License
38 class Zend_Search_Lucene_Index_Term
41 * Field name or field number (depending from context)
58 public function __construct($text, $field = null)
60 $this->field = ($field === null)? Zend_Search_Lucene::getDefaultSearchField() : $field;
72 return $this->field . chr(0) . $this->text;
79 * @param integer $length
82 public static function getPrefix($str, $length)
86 while ($prefixBytes < strlen($str) && $prefixChars < $length) {
88 if ((ord($str[$prefixBytes]) & 0xC0) == 0xC0) {
90 if (ord($str[$prefixBytes]) & 0x20 ) {
92 if (ord($str[$prefixBytes]) & 0x10 ) {
98 if ($prefixBytes + $charBytes > strlen($str)) {
104 $prefixBytes += $charBytes;
107 return substr($str, 0, $prefixBytes);
111 * Get UTF-8 string length
116 public static function getLength($str)
120 while ($bytes < strlen($str)) {
122 if ((ord($str[$bytes]) & 0xC0) == 0xC0) {
124 if (ord($str[$bytes]) & 0x20 ) {
126 if (ord($str[$bytes]) & 0x10 ) {
132 if ($bytes + $charBytes > strlen($str)) {
138 $bytes += $charBytes;