final move of files
[web.mtrack] / Zend / Search / Lucene / Document / Pptx.php
1 <?php
2 /**
3  * Zend Framework
4  *
5  * LICENSE
6  *
7  * This source file is subject to the new BSD license that is bundled
8  * with this package in the file LICENSE.txt.
9  * It is also available through the world-wide-web at this URL:
10  * http://framework.zend.com/license/new-bsd
11  * If you did not receive a copy of the license and are unable to
12  * obtain it through the world-wide-web, please send an email
13  * to license@zend.com so we can send you a copy immediately.
14  *
15  * @category   Zend
16  * @package    Zend_Search_Lucene
17  * @subpackage Document
18  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
19  * @license    http://framework.zend.com/license/new-bsd     New BSD License
20  * @version    $Id: Pptx.php 16971 2009-07-22 18:05:45Z mikaelkael $
21  */
22
23
24 /** Zend_Search_Lucene_Document_OpenXml */
25 require_once 'Zend/Search/Lucene/Document/OpenXml.php';
26
27 if (class_exists('ZipArchive', false)) {
28
29 /**
30  * Pptx document.
31  *
32  * @category   Zend
33  * @package    Zend_Search_Lucene
34  * @subpackage Document
35  * @copyright  Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
36  * @license    http://framework.zend.com/license/new-bsd     New BSD License
37  */
38 class Zend_Search_Lucene_Document_Pptx extends Zend_Search_Lucene_Document_OpenXml
39 {
40     /**
41      * Xml Schema - PresentationML
42      *
43      * @var string
44      */
45     const SCHEMA_PRESENTATIONML = 'http://schemas.openxmlformats.org/presentationml/2006/main';
46
47     /**
48      * Xml Schema - DrawingML
49      *
50      * @var string
51      */
52     const SCHEMA_DRAWINGML = 'http://schemas.openxmlformats.org/drawingml/2006/main';
53
54     /**
55      * Xml Schema - Slide relation
56      *
57      * @var string
58      */
59     const SCHEMA_SLIDERELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide';
60
61     /**
62      * Xml Schema - Slide notes relation
63      *
64      * @var string
65      */
66     const SCHEMA_SLIDENOTESRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide';
67
68     /**
69      * Object constructor
70      *
71      * @param string  $fileName
72      * @param boolean $storeContent
73      */
74     private function __construct($fileName, $storeContent)
75     {
76         // Document data holders
77         $slides = array();
78         $slideNotes = array();
79         $documentBody = array();
80         $coreProperties = array();
81
82         // Open OpenXML package
83         $package = new ZipArchive();
84         $package->open($fileName);
85
86         // Read relations and search for officeDocument
87         $relations = simplexml_load_string($package->getFromName("_rels/.rels"));
88         foreach ($relations->Relationship as $rel) {
89             if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) {
90                 // Found office document! Search for slides...
91                 $slideRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/_rels/" . basename($rel["Target"]) . ".rels")) );
92                 foreach ($slideRelations->Relationship as $slideRel) {
93                     if ($slideRel["Type"] == Zend_Search_Lucene_Document_Pptx::SCHEMA_SLIDERELATION) {
94                         // Found slide!
95                         $slides[ str_replace( 'rId', '', (string)$slideRel["Id"] ) ] = simplexml_load_string(
96                             $package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/" . basename($slideRel["Target"])) )
97                         );
98
99                         // Search for slide notes
100                         $slideNotesRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/_rels/" . basename($slideRel["Target"]) . ".rels")) );
101                         foreach ($slideNotesRelations->Relationship as $slideNoteRel) {
102                             if ($slideNoteRel["Type"] == Zend_Search_Lucene_Document_Pptx::SCHEMA_SLIDENOTESRELATION) {
103                                 // Found slide notes!
104                                 $slideNotes[ str_replace( 'rId', '', (string)$slideRel["Id"] ) ] = simplexml_load_string(
105                                     $package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/" . dirname($slideNoteRel["Target"]) . "/" . basename($slideNoteRel["Target"])) )
106                                 );
107
108                                 break;
109                             }
110                         }
111                     }
112                 }
113
114                 break;
115             }
116         }
117
118         // Sort slides
119         ksort($slides);
120         ksort($slideNotes);
121
122         // Extract contents from slides
123         foreach ($slides as $slideKey => $slide) {
124             // Register namespaces
125             $slide->registerXPathNamespace("p", Zend_Search_Lucene_Document_Pptx::SCHEMA_PRESENTATIONML);
126             $slide->registerXPathNamespace("a", Zend_Search_Lucene_Document_Pptx::SCHEMA_DRAWINGML);
127
128             // Fetch all text
129             $textElements = $slide->xpath('//a:t');
130             foreach ($textElements as $textElement) {
131                 $documentBody[] = (string)$textElement;
132             }
133
134             // Extract contents from slide notes
135             if (isset($slideNotes[$slideKey])) {
136                 // Fetch slide note
137                 $slideNote = $slideNotes[$slideKey];
138
139                 // Register namespaces
140                 $slideNote->registerXPathNamespace("p", Zend_Search_Lucene_Document_Pptx::SCHEMA_PRESENTATIONML);
141                 $slideNote->registerXPathNamespace("a", Zend_Search_Lucene_Document_Pptx::SCHEMA_DRAWINGML);
142
143                 // Fetch all text
144                 $textElements = $slideNote->xpath('//a:t');
145                 foreach ($textElements as $textElement) {
146                     $documentBody[] = (string)$textElement;
147                 }
148             }
149         }
150
151         // Read core properties
152         $coreProperties = $this->extractMetaData($package);
153
154         // Close file
155         $package->close();
156
157         // Store filename
158         $this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8'));
159
160             // Store contents
161         if ($storeContent) {
162             $this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody), 'UTF-8'));
163         } else {
164             $this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody), 'UTF-8'));
165         }
166
167         // Store meta data properties
168         foreach ($coreProperties as $key => $value)
169         {
170             $this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8'));
171         }
172
173         // Store title (if not present in meta data)
174         if (!isset($coreProperties['title']))
175         {
176             $this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8'));
177         }
178     }
179
180     /**
181      * Load Pptx document from a file
182      *
183      * @param string  $fileName
184      * @param boolean $storeContent
185      * @return Zend_Search_Lucene_Document_Pptx
186      */
187     public static function loadPptxFile($fileName, $storeContent = false)
188     {
189         return new Zend_Search_Lucene_Document_Pptx($fileName, $storeContent);
190     }
191 }
192
193 } // end if (class_exists('ZipArchive'))