7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
16 * @package Zend_Search_Lucene
18 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
19 * @license http://framework.zend.com/license/new-bsd New BSD License
20 * @version $Id: File.php 16541 2009-07-07 06:59:03Z bkarwin $
25 * @package Zend_Search_Lucene
27 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
28 * @license http://framework.zend.com/license/new-bsd New BSD License
30 abstract class Zend_Search_Lucene_Storage_File
33 * Reads $length number of bytes at the current position in the
34 * file and advances the file pointer.
36 * @param integer $length
39 abstract protected function _fread($length=1);
43 * Sets the file position indicator and advances the file pointer.
44 * The new position, measured in bytes from the beginning of the file,
45 * is obtained by adding offset to the position specified by whence,
46 * whose values are defined as follows:
47 * SEEK_SET - Set position equal to offset bytes.
48 * SEEK_CUR - Set position to current location plus offset.
49 * SEEK_END - Set position to end-of-file plus offset. (To move to
50 * a position before the end-of-file, you need to pass a negative value
52 * Upon success, returns 0; otherwise, returns -1
54 * @param integer $offset
55 * @param integer $whence
58 abstract public function seek($offset, $whence=SEEK_SET);
65 abstract public function tell();
70 * Returns true on success or false on failure.
74 abstract public function flush();
77 * Writes $length number of bytes (all, if $length===null) to the end
81 * @param integer $length
83 abstract protected function _fwrite($data, $length=null);
88 * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock)
90 * @param integer $lockType
93 abstract public function lock($lockType, $nonBlockinLock = false);
98 abstract public function unlock();
101 * Reads a byte from the current position in the file
102 * and advances the file pointer.
106 public function readByte()
108 return ord($this->_fread(1));
112 * Writes a byte to the end of the file.
114 * @param integer $byte
116 public function writeByte($byte)
118 return $this->_fwrite(chr($byte), 1);
122 * Read num bytes from the current position in the file
123 * and advances the file pointer.
125 * @param integer $num
128 public function readBytes($num)
130 return $this->_fread($num);
134 * Writes num bytes of data (all, if $num===null) to the end
137 * @param string $data
138 * @param integer $num
140 public function writeBytes($data, $num=null)
142 $this->_fwrite($data, $num);
147 * Reads an integer from the current position in the file
148 * and advances the file pointer.
152 public function readInt()
154 $str = $this->_fread(4);
156 return ord($str[0]) << 24 |
164 * Writes an integer to the end of file.
166 * @param integer $value
168 public function writeInt($value)
170 settype($value, 'integer');
171 $this->_fwrite( chr($value>>24 & 0xFF) .
172 chr($value>>16 & 0xFF) .
173 chr($value>>8 & 0xFF) .
174 chr($value & 0xFF), 4 );
179 * Returns a long integer from the current position in the file
180 * and advances the file pointer.
182 * @return integer|float
183 * @throws Zend_Search_Lucene_Exception
185 public function readLong()
188 * Check, that we work in 64-bit mode.
189 * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
191 if (PHP_INT_SIZE > 4) {
192 $str = $this->_fread(8);
194 return ord($str[0]) << 56 |
203 return $this->readLong32Bit();
208 * Writes long integer to the end of file
210 * @param integer $value
211 * @throws Zend_Search_Lucene_Exception
213 public function writeLong($value)
216 * Check, that we work in 64-bit mode.
217 * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
219 if (PHP_INT_SIZE > 4) {
220 settype($value, 'integer');
221 $this->_fwrite( chr($value>>56 & 0xFF) .
222 chr($value>>48 & 0xFF) .
223 chr($value>>40 & 0xFF) .
224 chr($value>>32 & 0xFF) .
225 chr($value>>24 & 0xFF) .
226 chr($value>>16 & 0xFF) .
227 chr($value>>8 & 0xFF) .
228 chr($value & 0xFF), 8 );
230 $this->writeLong32Bit($value);
236 * Returns a long integer from the current position in the file,
237 * advances the file pointer and return it as float (for 32-bit platforms).
239 * @return integer|float
240 * @throws Zend_Search_Lucene_Exception
242 public function readLong32Bit()
244 $wordHigh = $this->readInt();
245 $wordLow = $this->readInt();
247 if ($wordHigh & (int)0x80000000) {
248 // It's a negative value since the highest bit is set
249 if ($wordHigh == (int)0xFFFFFFFF && ($wordLow & (int)0x80000000)) {
252 require_once 'Zend/Search/Lucene/Exception.php';
253 throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
259 // Value is large than 0x7FFF FFFF. Represent low word as float.
260 $wordLow &= 0x7FFFFFFF;
261 $wordLow += (float)0x80000000;
264 if ($wordHigh == 0) {
265 // Return value as integer if possible
269 return $wordHigh*(float)0x100000000/* 0x00000001 00000000 */ + $wordLow;
274 * Writes long integer to the end of file (32-bit platforms implementation)
276 * @param integer|float $value
277 * @throws Zend_Search_Lucene_Exception
279 public function writeLong32Bit($value)
281 if ($value < (int)0x80000000) {
282 require_once 'Zend/Search/Lucene/Exception.php';
283 throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
287 $wordHigh = (int)0xFFFFFFFF;
288 $wordLow = (int)$value;
290 $wordHigh = (int)($value/(float)0x100000000/* 0x00000001 00000000 */);
291 $wordLow = $value - $wordHigh*(float)0x100000000/* 0x00000001 00000000 */;
293 if ($wordLow > 0x7FFFFFFF) {
294 // Highest bit of low word is set. Translate it to the corresponding negative integer value
295 $wordLow -= 0x80000000;
296 $wordLow |= 0x80000000;
300 $this->writeInt($wordHigh);
301 $this->writeInt($wordLow);
306 * Returns a variable-length integer from the current
307 * position in the file and advances the file pointer.
311 public function readVInt()
313 $nextByte = ord($this->_fread(1));
314 $val = $nextByte & 0x7F;
316 for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) {
317 $nextByte = ord($this->_fread(1));
318 $val |= ($nextByte & 0x7F) << $shift;
324 * Writes a variable-length integer to the end of file.
326 * @param integer $value
328 public function writeVInt($value)
330 settype($value, 'integer');
331 while ($value > 0x7F) {
332 $this->_fwrite(chr( ($value & 0x7F)|0x80 ));
335 $this->_fwrite(chr($value));
340 * Reads a string from the current position in the file
341 * and advances the file pointer.
345 public function readString()
347 $strlen = $this->readVInt();
352 * This implementation supports only Basic Multilingual Plane
353 * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
354 * "supplementary characters" (characters whose code points are
355 * greater than 0xFFFF)
356 * Java 2 represents these characters as a pair of char (16-bit)
357 * values, the first from the high-surrogates range (0xD800-0xDBFF),
358 * the second from the low-surrogates range (0xDC00-0xDFFF). Then
359 * they are encoded as usual UTF-8 characters in six bytes.
360 * Standard UTF-8 representation uses four bytes for supplementary
364 $str_val = $this->_fread($strlen);
366 for ($count = 0; $count < $strlen; $count++ ) {
367 if (( ord($str_val[$count]) & 0xC0 ) == 0xC0) {
369 if (ord($str_val[$count]) & 0x20 ) {
372 // Never used. Java2 doesn't encode strings in four bytes
373 if (ord($str_val[$count]) & 0x10 ) {
377 $str_val .= $this->_fread($addBytes);
378 $strlen += $addBytes;
380 // Check for null character. Java2 encodes null character
382 if (ord($str_val[$count]) == 0xC0 &&
383 ord($str_val[$count+1]) == 0x80 ) {
384 $str_val[$count] = 0;
385 $str_val = substr($str_val,0,$count+1)
386 . substr($str_val,$count+2);
397 * Writes a string to the end of file.
400 * @throws Zend_Search_Lucene_Exception
402 public function writeString($str)
405 * This implementation supports only Basic Multilingual Plane
406 * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
407 * "supplementary characters" (characters whose code points are
408 * greater than 0xFFFF)
409 * Java 2 represents these characters as a pair of char (16-bit)
410 * values, the first from the high-surrogates range (0xD800-0xDBFF),
411 * the second from the low-surrogates range (0xDC00-0xDFFF). Then
412 * they are encoded as usual UTF-8 characters in six bytes.
413 * Standard UTF-8 representation uses four bytes for supplementary
417 // convert input to a string before iterating string characters
418 settype($str, 'string');
420 $chars = $strlen = strlen($str);
421 $containNullChars = false;
423 for ($count = 0; $count < $strlen; $count++ ) {
425 * String is already in Java 2 representation.
426 * We should only calculate actual string length and replace
429 if ((ord($str[$count]) & 0xC0) == 0xC0) {
431 if (ord($str[$count]) & 0x20 ) {
434 // Never used. Java2 doesn't encode strings in four bytes
435 // and we dont't support non-BMP characters
436 if (ord($str[$count]) & 0x10 ) {
442 if (ord($str[$count]) == 0 ) {
443 $containNullChars = true;
450 require_once 'Zend/Search/Lucene/Exception.php';
451 throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string');
454 $this->writeVInt($chars);
455 if ($containNullChars) {
456 $this->_fwrite(str_replace($str, "\x00", "\xC0\x80"));
458 $this->_fwrite($str);
464 * Reads binary data from the current position in the file
465 * and advances the file pointer.
469 public function readBinary()
471 return $this->_fread($this->readVInt());