7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
16 * @package Zend_Search_Lucene
17 * @subpackage Document
18 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
19 * @license http://framework.zend.com/license/new-bsd New BSD License
20 * @version $Id: OpenXml.php 16971 2009-07-22 18:05:45Z mikaelkael $
24 /** Zend_Search_Lucene_Document */
25 require_once 'Zend/Search/Lucene/Document.php';
27 if (class_exists('ZipArchive', false)) {
33 * @package Zend_Search_Lucene
34 * @subpackage Document
35 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
36 * @license http://framework.zend.com/license/new-bsd New BSD License
38 abstract class Zend_Search_Lucene_Document_OpenXml extends Zend_Search_Lucene_Document
41 * Xml Schema - Relationships
45 const SCHEMA_RELATIONSHIP = 'http://schemas.openxmlformats.org/package/2006/relationships';
48 * Xml Schema - Office document
52 const SCHEMA_OFFICEDOCUMENT = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument';
55 * Xml Schema - Core properties
59 const SCHEMA_COREPROPERTIES = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties';
62 * Xml Schema - Dublin Core
66 const SCHEMA_DUBLINCORE = 'http://purl.org/dc/elements/1.1/';
69 * Xml Schema - Dublin Core Terms
73 const SCHEMA_DUBLINCORETERMS = 'http://purl.org/dc/terms/';
76 * Extract metadata from document
78 * @param ZipArchive $package ZipArchive OpenXML package
79 * @return array Key-value pairs containing document meta data
81 protected function extractMetaData(ZipArchive $package)
84 $coreProperties = array();
86 // Read relations and search for core properties
87 $relations = simplexml_load_string($package->getFromName("_rels/.rels"));
88 foreach ($relations->Relationship as $rel) {
89 if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_COREPROPERTIES) {
90 // Found core properties! Read in contents...
91 $contents = simplexml_load_string(
92 $package->getFromName(dirname($rel["Target"]) . "/" . basename($rel["Target"]))
95 foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_DUBLINCORE) as $child) {
96 $coreProperties[$child->getName()] = (string)$child;
98 foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_COREPROPERTIES) as $child) {
99 $coreProperties[$child->getName()] = (string)$child;
101 foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_DUBLINCORETERMS) as $child) {
102 $coreProperties[$child->getName()] = (string)$child;
107 return $coreProperties;
111 * Determine absolute zip path
113 * @param string $path
116 protected function absoluteZipPath($path) {
117 $path = str_replace(array('/', '\\'), DIRECTORY_SEPARATOR, $path);
118 $parts = array_filter(explode(DIRECTORY_SEPARATOR, $path), 'strlen');
119 $absolutes = array();
120 foreach ($parts as $part) {
121 if ('.' == $part) continue;
123 array_pop($absolutes);
125 $absolutes[] = $part;
128 return implode('/', $absolutes);
132 } // end if (class_exists('ZipArchive'))