import
[web.mtrack] / inc / lib / Zend / Search / Lucene / Document / Xlsx.php
1 <?php
2 /**
3  * Zend Framework
4  *
5  * LICENSE
6  *
7  * This source file is subject to the new BSD license that is bundled
8  * with this package in the file LICENSE.txt.
9  * It is also available through the world-wide-web at this URL:
10  * http://framework.zend.com/license/new-bsd
11  * If you did not receive a copy of the license and are unable to
12  * obtain it through the world-wide-web, please send an email
13  * to license@zend.com so we can send you a copy immediately.
14  *
15  * @category   Zend
16  * @package    Zend_Search_Lucene
17  * @subpackage Document
18  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
19  * @license    http://framework.zend.com/license/new-bsd     New BSD License
20  * @version    $Id: Xlsx.php 16971 2009-07-22 18:05:45Z mikaelkael $
21  */
22
23
24 /** Zend_Search_Lucene_Document_OpenXml */
25 require_once 'Zend/Search/Lucene/Document/OpenXml.php';
26
27 if (class_exists('ZipArchive', false)) {
28
29 /**
30  * Xlsx document.
31  *
32  * @category   Zend
33  * @package    Zend_Search_Lucene
34  * @subpackage Document
35  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
36  * @license    http://framework.zend.com/license/new-bsd     New BSD License
37  */
38 class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenXml
39 {
40     /**
41      * Xml Schema - SpreadsheetML
42      *
43      * @var string
44      */
45     const SCHEMA_SPREADSHEETML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
46
47     /**
48      * Xml Schema - DrawingML
49      *
50      * @var string
51      */
52     const SCHEMA_DRAWINGML = 'http://schemas.openxmlformats.org/drawingml/2006/main';
53
54     /**
55      * Xml Schema - Shared Strings
56      *
57      * @var string
58      */
59     const SCHEMA_SHAREDSTRINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings';
60
61     /**
62      * Xml Schema - Worksheet relation
63      *
64      * @var string
65      */
66     const SCHEMA_WORKSHEETRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet';
67
68     /**
69      * Xml Schema - Slide notes relation
70      *
71      * @var string
72      */
73     const SCHEMA_SLIDENOTESRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide';
74
75     /**
76      * Object constructor
77      *
78      * @param string  $fileName
79      * @param boolean $storeContent
80      */
81     private function __construct($fileName, $storeContent)
82     {
83         // Document data holders
84         $sharedStrings = array();
85         $worksheets = array();
86         $documentBody = array();
87         $coreProperties = array();
88
89         // Open OpenXML package
90         $package = new ZipArchive();
91         $package->open($fileName);
92
93         // Read relations and search for officeDocument
94         $relations = simplexml_load_string($package->getFromName("_rels/.rels"));
95         foreach ($relations->Relationship as $rel) {
96             if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) {
97                 // Found office document! Read relations for workbook...
98                 $workbookRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/_rels/" . basename($rel["Target"]) . ".rels")) );
99                 $workbookRelations->registerXPathNamespace("rel", Zend_Search_Lucene_Document_OpenXml::SCHEMA_RELATIONSHIP);
100
101                 // Read shared strings
102                 $sharedStringsPath = $workbookRelations->xpath("rel:Relationship[@Type='" . Zend_Search_Lucene_Document_Xlsx::SCHEMA_SHAREDSTRINGS . "']");
103                 $sharedStringsPath = (string)$sharedStringsPath[0]['Target'];
104                 $xmlStrings = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . $sharedStringsPath)) );
105                 if (isset($xmlStrings) && isset($xmlStrings->si)) {
106                     foreach ($xmlStrings->si as $val) {
107                         if (isset($val->t)) {
108                             $sharedStrings[] = (string)$val->t;
109                         } elseif (isset($val->r)) {
110                             $sharedStrings[] = $this->_parseRichText($val);
111                         }
112                     }
113                 }
114
115                 // Loop relations for workbook and extract worksheets...
116                 foreach ($workbookRelations->Relationship as $workbookRelation) {
117                     if ($workbookRelation["Type"] == Zend_Search_Lucene_Document_Xlsx::SCHEMA_WORKSHEETRELATION) {
118                         $worksheets[ str_replace( 'rId', '', (string)$workbookRelation["Id"]) ] = simplexml_load_string(
119                             $package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($workbookRelation["Target"]) . "/" . basename($workbookRelation["Target"])) )
120                         );
121                     }
122                 }
123
124                 break;
125             }
126         }
127
128         // Sort worksheets
129         ksort($worksheets);
130
131         // Extract contents from worksheets
132         foreach ($worksheets as $sheetKey => $worksheet) {
133             foreach ($worksheet->sheetData->row as $row) {
134                 foreach ($row->c as $c) {
135                     // Determine data type
136                     $dataType = (string)$c["t"];
137                     switch ($dataType) {
138                         case "s":
139                             // Value is a shared string
140                             if ((string)$c->v != '') {
141                                 $value = $sharedStrings[intval($c->v)];
142                             } else {
143                                 $value = '';
144                             }
145
146                             break;
147
148                         case "b":
149                             // Value is boolean
150                             $value = (string)$c->v;
151                             if ($value == '0') {
152                                 $value = false;
153                             } else if ($value == '1') {
154                                 $value = true;
155                             } else {
156                                 $value = (bool)$c->v;
157                             }
158
159                             break;
160
161                         case "inlineStr":
162                             // Value is rich text inline
163                             $value = $this->_parseRichText($c->is);
164
165                             break;
166
167                         case "e":
168                             // Value is an error message
169                             if ((string)$c->v != '') {
170                                 $value = (string)$c->v;
171                             } else {
172                                 $value = '';
173                             }
174
175                             break;
176
177                         default:
178                             // Value is a string
179                             $value = (string)$c->v;
180
181                             // Check for numeric values
182                             if (is_numeric($value) && $dataType != 's') {
183                                 if ($value == (int)$value) $value = (int)$value;
184                                 elseif ($value == (float)$value) $value = (float)$value;
185                                 elseif ($value == (double)$value) $value = (double)$value;
186                             }
187                     }
188
189                     $documentBody[] = $value;
190                 }
191             }
192         }
193
194         // Read core properties
195         $coreProperties = $this->extractMetaData($package);
196
197         // Close file
198         $package->close();
199
200         // Store filename
201         $this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8'));
202
203         // Store contents
204         if ($storeContent) {
205             $this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody), 'UTF-8'));
206         } else {
207             $this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody), 'UTF-8'));
208         }
209
210         // Store meta data properties
211         foreach ($coreProperties as $key => $value)
212         {
213             $this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8'));
214         }
215
216         // Store title (if not present in meta data)
217         if (!isset($coreProperties['title']))
218         {
219             $this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8'));
220         }
221     }
222
223     /**
224      * Parse rich text XML
225      *
226      * @param SimpleXMLElement $is
227      * @return string
228      */
229     private function _parseRichText($is = null) {
230         $value = array();
231
232         if (isset($is->t)) {
233             $value[] = (string)$is->t;
234         } else {
235             foreach ($is->r as $run) {
236                 $value[] = (string)$run->t;
237             }
238         }
239
240         return implode('', $value);
241     }
242
243     /**
244      * Load Xlsx document from a file
245      *
246      * @param string  $fileName
247      * @param boolean $storeContent
248      * @return Zend_Search_Lucene_Document_Xlsx
249      */
250     public static function loadXlsxFile($fileName, $storeContent = false)
251     {
252         return new Zend_Search_Lucene_Document_Xlsx($fileName, $storeContent);
253     }
254 }
255
256 } // end if (class_exists('ZipArchive'))