3 /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
10 * PHP versions 4 and 5
14 * Copyright (c) 2002-2008 The PHP Group
15 * All rights reserved.
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
21 * * Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * * Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * * The name of the author may not be used to endorse or promote products
27 * derived from this software without specific prior written permission.
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
30 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
31 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
32 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
33 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
34 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
35 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
36 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
37 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
38 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
39 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * @author Stig Bakken <ssb@fast.no>
44 * @author Tomas V.V.Cox <cox@idecnet.com>
45 * @author Stephan Schmidt <schst@php.net>
46 * @copyright 2002-2008 The PHP Group
47 * @license http://opensource.org/licenses/bsd-license New BSD License
48 * @version CVS: $Id: Parser.php 302733 2010-08-24 01:09:09Z clockwerx $
49 * @link http://pear.php.net/package/XML_Parser
53 * uses PEAR's error handling
55 require_once 'PEAR.php';
58 * resource could not be created
60 define('XML_PARSER_ERROR_NO_RESOURCE', 200);
65 define('XML_PARSER_ERROR_UNSUPPORTED_MODE', 201);
68 * invalid encoding was given
70 define('XML_PARSER_ERROR_INVALID_ENCODING', 202);
73 * specified file could not be read
75 define('XML_PARSER_ERROR_FILE_NOT_READABLE', 203);
80 define('XML_PARSER_ERROR_INVALID_INPUT', 204);
83 * remote file cannot be retrieved in safe mode
85 define('XML_PARSER_ERROR_REMOTE', 205);
90 * This is an XML parser based on PHP's "xml" extension,
91 * based on the bundled expat library.
94 * - It requires PHP 4.0.4pl1 or greater
95 * - From revision 1.17, the function names used by the 'func' mode
96 * are in the format "xmltag_$elem", for example: use "xmltag_name"
97 * to handle the <name></name> tags of your xml file.
98 * - different parsing modes
101 * @package XML_Parser
102 * @author Stig Bakken <ssb@fast.no>
103 * @author Tomas V.V.Cox <cox@idecnet.com>
104 * @author Stephan Schmidt <schst@php.net>
105 * @copyright 2002-2008 The PHP Group
106 * @license http://opensource.org/licenses/bsd-license New BSD License
107 * @version Release: @package_version@
108 * @link http://pear.php.net/package/XML_Parser
109 * @todo create XML_Parser_Namespace to parse documents with namespaces
110 * @todo create XML_Parser_Pull
111 * @todo Tests that need to be made:
112 * - mixing character encodings
113 * - a test using all expat handlers
114 * - options (folding, output charset)
116 class XML_Parser extends PEAR
124 * @see xml_parser_create()
129 * File handle if parsing from a file
136 * Whether to do case folding
138 * If set to true, all tag and attribute names will
139 * be converted to UPPER CASE.
146 * Mode of operation, one of "event" or "func"
153 * Mapping from expat handler function to class method.
157 var $handler = array(
158 'character_data_handler' => 'cdataHandler',
159 'default_handler' => 'defaultHandler',
160 'processing_instruction_handler' => 'piHandler',
161 'unparsed_entity_decl_handler' => 'unparsedHandler',
162 'notation_decl_handler' => 'notationHandler',
163 'external_entity_ref_handler' => 'entityrefHandler'
192 var $_validEncodings = array('ISO-8859-1', 'UTF-8', 'US-ASCII');
195 // {{{ php4 constructor
198 * Creates an XML parser.
200 * This is needed for PHP4 compatibility, it will
201 * call the constructor, when a new instance is created.
203 * @param string $srcenc source charset encoding, use NULL (default) to use
204 * whatever the document specifies
205 * @param string $mode how this parser object should work, "event" for
206 * startelement/endelement-type events, "func"
207 * to have it call functions named after elements
208 * @param string $tgtenc a valid target encoding
210 function XML_Parser($srcenc = null, $mode = 'event', $tgtenc = null)
212 XML_Parser::__construct($srcenc, $mode, $tgtenc);
215 // {{{ php5 constructor
220 * @param string $srcenc source charset encoding, use NULL (default) to use
221 * whatever the document specifies
222 * @param string $mode how this parser object should work, "event" for
223 * startelement/endelement-type events, "func"
224 * to have it call functions named after elements
225 * @param string $tgtenc a valid target encoding
227 function __construct($srcenc = null, $mode = 'event', $tgtenc = null)
229 parent::__construct('XML_Parser_Error');
232 $this->srcenc = $srcenc;
233 $this->tgtenc = $tgtenc;
238 * Sets the mode of the parser.
240 * Possible modes are:
244 * You can set the mode using the second parameter
245 * in the constructor.
247 * This method is only needed, when switching to a new
248 * mode at a later point.
250 * @param string $mode mode, either 'func' or 'event'
252 * @return boolean|object true on success, PEAR_Error otherwise
255 function setMode($mode)
257 if ($mode != 'func' && $mode != 'event') {
258 $this->raiseError('Unsupported mode given',
259 XML_PARSER_ERROR_UNSUPPORTED_MODE);
267 * Sets the object, that will handle the XML events
269 * This allows you to create a handler object independent of the
270 * parser object that you are using and easily switch the underlying
273 * If no object will be set, XML_Parser assumes that you
274 * extend this class and handle the events in $this.
276 * @param object &$obj object to handle the events
278 * @return boolean will always return true
282 function setHandlerObj(&$obj)
284 $this->_handlerObj = &$obj;
289 * Init the element handlers
294 function _initHandlers()
296 if (!is_resource($this->parser)) {
300 if (!is_object($this->_handlerObj)) {
301 $this->_handlerObj = &$this;
303 switch ($this->mode) {
306 xml_set_object($this->parser, $this->_handlerObj);
307 xml_set_element_handler($this->parser,
308 array(&$this, 'funcStartHandler'), array(&$this, 'funcEndHandler'));
312 xml_set_object($this->parser, $this->_handlerObj);
313 xml_set_element_handler($this->parser, 'startHandler', 'endHandler');
316 return $this->raiseError('Unsupported mode given',
317 XML_PARSER_ERROR_UNSUPPORTED_MODE);
322 * set additional handlers for character data, entities, etc.
324 foreach ($this->handler as $xml_func => $method) {
325 if (method_exists($this->_handlerObj, $method)) {
326 $xml_func = 'xml_set_' . $xml_func;
327 $xml_func($this->parser, $method);
335 * create the XML parser resource
337 * Has been moved from the constructor to avoid
338 * problems with object references.
340 * Furthermore it allows us returning an error
341 * if something fails.
343 * NOTE: uses '@' error suppresion in this method
345 * @return bool|PEAR_Error true on success, PEAR_Error otherwise
347 * @see xml_parser_create
351 if ($this->srcenc === null) {
352 $xp = xml_parser_create();
354 $xp = xml_parser_create($this->srcenc);
356 if (is_resource($xp)) {
357 if ($this->tgtenc !== null) {
358 if (!xml_parser_set_option($xp, XML_OPTION_TARGET_ENCODING,
361 return $this->raiseError('invalid target encoding',
362 XML_PARSER_ERROR_INVALID_ENCODING);
366 $result = $this->_initHandlers($this->mode);
367 if ($this->isError($result)) {
370 xml_parser_set_option($xp, XML_OPTION_CASE_FOLDING, $this->folding);
373 if (!empty($this->srcenc) && !in_array(strtoupper($this->srcenc), $this->_validEncodings)) {
374 return $this->raiseError('invalid source encoding',
375 XML_PARSER_ERROR_INVALID_ENCODING);
377 return $this->raiseError('Unable to create XML parser resource.',
378 XML_PARSER_ERROR_NO_RESOURCE);
387 * This allows you to use one parser instance
388 * to parse multiple XML documents.
391 * @return boolean|object true on success, PEAR_Error otherwise
395 $result = $this->_create();
396 if ($this->isError($result)) {
403 // {{{ setInputFile()
406 * Sets the input xml file to be parsed
408 * @param string $file Filename (full path)
410 * @return resource fopen handle of the given file
412 * @throws XML_Parser_Error
413 * @see setInput(), setInputString(), parse()
415 function setInputFile($file)
418 * check, if file is a remote file
420 if (preg_match('/^(http|ftp):\/\//i', substr($file, 0, 10))) {
421 if (!ini_get('allow_url_fopen')) {
423 raiseError('Remote files cannot be parsed, as safe mode is enabled.',
424 XML_PARSER_ERROR_REMOTE);
428 $fp = @fopen($file, 'rb');
429 if (is_resource($fp)) {
433 return $this->raiseError('File could not be opened.',
434 XML_PARSER_ERROR_FILE_NOT_READABLE);
438 // {{{ setInputString()
441 * XML_Parser::setInputString()
443 * Sets the xml input from a string
445 * @param string $data a string containing the XML document
449 function setInputString($data)
459 * Sets the file handle to use with parse().
461 * You should use setInputFile() or setInputString() if you
464 * @param mixed $fp Can be either a resource returned from fopen(),
465 * a URL, a local filename or a string.
470 * @uses setInputString(), setInputFile()
472 function setInput($fp)
474 if (is_resource($fp)) {
477 } elseif (preg_match('/^[a-z]+:\/\//i', substr($fp, 0, 10))) {
478 // see if it's an absolute URL (has a scheme at the beginning)
479 return $this->setInputFile($fp);
480 } elseif (file_exists($fp)) {
481 // see if it's a local file
482 return $this->setInputFile($fp);
484 // it must be a string
489 return $this->raiseError('Illegal input format',
490 XML_PARSER_ERROR_INVALID_INPUT);
497 * Central parsing function.
499 * @return bool|PEAR_Error returns true on success, or a PEAR_Error otherwise
507 $result = $this->reset();
508 if ($this->isError($result)) {
511 // if $this->fp was fopened previously
512 if (is_resource($this->fp)) {
514 while ($data = fread($this->fp, 4096)) {
515 if (!$this->_parseString($data, feof($this->fp))) {
516 $error = &$this->raiseError();
522 // otherwise, $this->fp must be a string
523 if (!$this->_parseString($this->fp, true)) {
524 $error = &$this->raiseError();
535 * XML_Parser::_parseString()
537 * @param string $data data
538 * @param bool $eof end-of-file flag
544 function _parseString($data, $eof = false)
546 return xml_parse($this->parser, $data, $eof);
553 * XML_Parser::parseString()
557 * @param string $data XML data
558 * @param boolean $eof If set and TRUE, data is the last piece
559 * of data sent in this parser
561 * @return bool|PEAR_Error true on success or a PEAR Error
562 * @throws XML_Parser_Error
563 * @see _parseString()
565 function parseString($data, $eof = false)
567 if (!isset($this->parser) || !is_resource($this->parser)) {
571 if (!$this->_parseString($data, $eof)) {
572 $error = &$this->raiseError();
586 * Free the internal resources associated with the parser
592 if (isset($this->parser) && is_resource($this->parser)) {
593 xml_parser_free($this->parser);
594 unset( $this->parser );
596 if (isset($this->fp) && is_resource($this->fp)) {
604 * XML_Parser::raiseError()
606 * Throws a XML_Parser_Error
608 * @param string $msg the error message
609 * @param integer $ecode the error message code
611 * @return XML_Parser_Error reference to the error object
613 function &raiseError($message= NULL, $code = NULL, $mode = NULL, $options = NULL, $userinfo = NULL, $error_class = NULL, $skipmsg = false)
615 $code = is_null($code) ? 0 : $code;
616 $msg = !is_null($message) ? $message : $this->parser;
617 $err = new XML_Parser_Error($msg, $ecode);
618 return parent::raiseError($err);
622 // {{{ funcStartHandler()
625 * derives and calls the Start Handler function
627 * @param mixed $xp ??
628 * @param mixed $elem ??
629 * @param mixed $attribs ??
633 function funcStartHandler($xp, $elem, $attribs)
635 $func = 'xmltag_' . $elem;
636 $func = str_replace(array('.', '-', ':'), '_', $func);
637 if (method_exists($this->_handlerObj, $func)) {
638 call_user_func(array(&$this->_handlerObj, $func), $xp, $elem, $attribs);
639 } elseif (method_exists($this->_handlerObj, 'xmltag')) {
640 call_user_func(array(&$this->_handlerObj, 'xmltag'),
641 $xp, $elem, $attribs);
646 // {{{ funcEndHandler()
649 * derives and calls the End Handler function
651 * @param mixed $xp ??
652 * @param mixed $elem ??
656 function funcEndHandler($xp, $elem)
658 $func = 'xmltag_' . $elem . '_';
659 $func = str_replace(array('.', '-', ':'), '_', $func);
660 if (method_exists($this->_handlerObj, $func)) {
661 call_user_func(array(&$this->_handlerObj, $func), $xp, $elem);
662 } elseif (method_exists($this->_handlerObj, 'xmltag_')) {
663 call_user_func(array(&$this->_handlerObj, 'xmltag_'), $xp, $elem);
668 // {{{ startHandler()
671 * abstract method signature for Start Handler
673 * @param mixed $xp ??
674 * @param mixed $elem ??
675 * @param mixed &$attribs ??
680 function startHandler($xp, $elem, &$attribs)
689 * abstract method signature for End Handler
691 * @param mixed $xp ??
692 * @param mixed $elem ??
697 function endHandler($xp, $elem)
707 * error class, replaces PEAR_Error
709 * An instance of this class will be returned
710 * if an error occurs inside XML_Parser.
712 * There are three advantages over using the standard PEAR_Error:
713 * - All messages will be prefixed
714 * - check for XML_Parser error, using is_a( $error, 'XML_Parser_Error' )
715 * - messages can be generated from the xml_parser resource
718 * @package XML_Parser
719 * @author Stig Bakken <ssb@fast.no>
720 * @author Tomas V.V.Cox <cox@idecnet.com>
721 * @author Stephan Schmidt <schst@php.net>
722 * @copyright 2002-2008 The PHP Group
723 * @license http://opensource.org/licenses/bsd-license New BSD License
724 * @version Release: @package_version@
725 * @link http://pear.php.net/package/XML_Parser
728 class XML_Parser_Error extends PEAR_Error
733 * prefix for all messages
737 var $error_message_prefix = 'XML_Parser: ';
742 * construct a new error instance
744 * You may either pass a message or an xml_parser resource as first
745 * parameter. If a resource has been passed, the last error that
746 * happened will be retrieved and returned.
748 * @param string|resource $msgorparser message or parser resource
749 * @param integer $code error code
750 * @param integer $mode error handling
751 * @param integer $level error level
754 * @todo PEAR CS - can't meet 85char line limit without arg refactoring
756 function __construct($msgorparser = 'unknown error', $code = NULL, $mode = PEAR_ERROR_RETURN, $level = E_USER_NOTICE)
758 $code = is_null($code) ? 0 : $code;
759 if (is_resource($msgorparser)) {
760 $code = xml_get_error_code($msgorparser);
761 $msgorparser = sprintf('%s at XML input line %d:%d',
762 xml_error_string($code),
763 xml_get_current_line_number($msgorparser),
764 xml_get_current_column_number($msgorparser));
766 parent::__construct($msgorparser, $code, $mode, $level);