git.roojs.org Git - web.mtrack/blob - Zend/Search/Lucene/Analysis/Token.php

   1 <?php
   2 /**
   3  * Zend Framework
   4  *
   5  * LICENSE
   6  *
   7  * This source file is subject to the new BSD license that is bundled
   8  * with this package in the file LICENSE.txt.
   9  * It is also available through the world-wide-web at this URL:
  10  * http://framework.zend.com/license/new-bsd
  11  * If you did not receive a copy of the license and are unable to
  12  * obtain it through the world-wide-web, please send an email
  13  * to license@zend.com so we can send you a copy immediately.
  14  *
  15  * @category   Zend
  16  * @package    Zend_Search_Lucene
  17  * @subpackage Analysis
  18  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  19  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  20  * @version    $Id: Token.php 16541 2009-07-07 06:59:03Z bkarwin $
  21  */
  22
  23
  24 /**
  25  * @category   Zend
  26  * @package    Zend_Search_Lucene
  27  * @subpackage Analysis
  28  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
  29  * @license    http://framework.zend.com/license/new-bsd     New BSD License
  30  */
  31 class Zend_Search_Lucene_Analysis_Token
  32 {
  33     /**
  34      * The text of the term.
  35      *
  36      * @var string
  37      */
  38     private $_termText;
  39
  40     /**
  41      * Start in source text.
  42      *
  43      * @var integer
  44      */
  45     private $_startOffset;
  46
  47     /**
  48      * End in source text
  49      *
  50      * @var integer
  51      */
  52     private $_endOffset;
  53
  54     /**
  55      * The position of this token relative to the previous Token.
  56      *
  57      * The default value is one.
  58      *
  59      * Some common uses for this are:
  60      * Set it to zero to put multiple terms in the same position.  This is
  61      * useful if, e.g., a word has multiple stems.  Searches for phrases
  62      * including either stem will match.  In this case, all but the first stem's
  63      * increment should be set to zero: the increment of the first instance
  64      * should be one.  Repeating a token with an increment of zero can also be
  65      * used to boost the scores of matches on that token.
  66      *
  67      * Set it to values greater than one to inhibit exact phrase matches.
  68      * If, for example, one does not want phrases to match across removed stop
  69      * words, then one could build a stop word filter that removes stop words and
  70      * also sets the increment to the number of stop words removed before each
  71      * non-stop word.  Then exact phrase queries will only match when the terms
  72      * occur with no intervening stop words.
  73      *
  74      * @var integer
  75      */
  76     private $_positionIncrement;
  77
  78
  79     /**
  80      * Object constructor
  81      *
  82      * @param string  $text
  83      * @param integer $start
  84      * @param integer $end
  85      * @param string  $type
  86      */
  87     public function __construct($text, $start, $end)
  88     {
  89         $this->_termText    = $text;
  90         $this->_startOffset = $start;
  91         $this->_endOffset   = $end;
  92
  93         $this->_positionIncrement = 1;
  94     }
  95
  96
  97     /**
  98      * positionIncrement setter
  99      *
 100      * @param integer $positionIncrement
 101      */
 102     public function setPositionIncrement($positionIncrement)
 103     {
 104         $this->_positionIncrement = $positionIncrement;
 105     }
 106
 107     /**
 108      * Returns the position increment of this Token.
 109      *
 110      * @return integer
 111      */
 112     public function getPositionIncrement()
 113     {
 114         return $this->_positionIncrement;
 115     }
 116
 117     /**
 118      * Returns the Token's term text.
 119      *
 120      * @return string
 121      */
 122     public function getTermText()
 123     {
 124         return $this->_termText;
 125     }
 126
 127     /**
 128      * Returns this Token's starting offset, the position of the first character
 129      * corresponding to this token in the source text.
 130      *
 131      * Note:
 132      * The difference between getEndOffset() and getStartOffset() may not be equal
 133      * to strlen(Zend_Search_Lucene_Analysis_Token::getTermText()), as the term text may have been altered
 134      * by a stemmer or some other filter.
 135      *
 136      * @return integer
 137      */
 138     public function getStartOffset()
 139     {
 140         return $this->_startOffset;
 141     }
 142
 143     /**
 144      * Returns this Token's ending offset, one greater than the position of the
 145      * last character corresponding to this token in the source text.
 146      *
 147      * @return integer
 148      */
 149     public function getEndOffset()
 150     {
 151         return $this->_endOffset;
 152     }
 153 }
 154