final move of files
[web.mtrack] / Zend / Search / Lucene / Index / Term.php
diff --git a/Zend/Search/Lucene/Index/Term.php b/Zend/Search/Lucene/Index/Term.php
new file mode 100644 (file)
index 0000000..a042cfd
--- /dev/null
@@ -0,0 +1,144 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Index
+ * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: Term.php 16541 2009-07-07 06:59:03Z bkarwin $
+ */
+
+
+/**
+ * A Term represents a word from text.  This is the unit of search.  It is
+ * composed of two elements, the text of the word, as a string, and the name of
+ * the field that the text occured in, an interned string.
+ *
+ * Note that terms may represent more than words from text fields, but also
+ * things like dates, email addresses, urls, etc.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Index
+ * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Index_Term
+{
+    /**
+     * Field name or field number (depending from context)
+     *
+     * @var mixed
+     */
+    public $field;
+
+    /**
+     * Term value
+     *
+     * @var string
+     */
+    public $text;
+
+
+    /**
+     * Object constructor
+     */
+    public function __construct($text, $field = null)
+    {
+        $this->field = ($field === null)?  Zend_Search_Lucene::getDefaultSearchField() : $field;
+        $this->text  = $text;
+    }
+
+
+    /**
+     * Returns term key
+     *
+     * @return string
+     */
+    public function key()
+    {
+        return $this->field . chr(0) . $this->text;
+    }
+
+    /**
+     * Get term prefix
+     *
+     * @param string $str
+     * @param integer $length
+     * @return string
+     */
+    public static function getPrefix($str, $length)
+    {
+        $prefixBytes = 0;
+        $prefixChars = 0;
+        while ($prefixBytes < strlen($str)  &&  $prefixChars < $length) {
+            $charBytes = 1;
+            if ((ord($str[$prefixBytes]) & 0xC0) == 0xC0) {
+                $charBytes++;
+                if (ord($str[$prefixBytes]) & 0x20 ) {
+                    $charBytes++;
+                    if (ord($str[$prefixBytes]) & 0x10 ) {
+                        $charBytes++;
+                    }
+                }
+            }
+
+            if ($prefixBytes + $charBytes > strlen($str)) {
+                // wrong character
+                break;
+            }
+
+            $prefixChars++;
+            $prefixBytes += $charBytes;
+        }
+
+        return substr($str, 0, $prefixBytes);
+    }
+
+    /**
+     * Get UTF-8 string length
+     *
+     * @param string $str
+     * @return string
+     */
+    public static function getLength($str)
+    {
+        $bytes = 0;
+        $chars = 0;
+        while ($bytes < strlen($str)) {
+            $charBytes = 1;
+            if ((ord($str[$bytes]) & 0xC0) == 0xC0) {
+                $charBytes++;
+                if (ord($str[$bytes]) & 0x20 ) {
+                    $charBytes++;
+                    if (ord($str[$bytes]) & 0x10 ) {
+                        $charBytes++;
+                    }
+                }
+            }
+
+            if ($bytes + $charBytes > strlen($str)) {
+                // wrong character
+                break;
+            }
+
+            $chars++;
+            $bytes += $charBytes;
+        }
+
+        return $chars;
+    }
+}
+