From 1f555374bc8fc22b702bda4152e2fb63224f07de Mon Sep 17 00:00:00 2001 From: Alan Knowles Date: Wed, 26 Oct 2016 11:34:10 +0800 Subject: [PATCH] CodeDoc/Parser.php --- CodeDoc/Parser.php | 462 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 462 insertions(+) create mode 100644 CodeDoc/Parser.php diff --git a/CodeDoc/Parser.php b/CodeDoc/Parser.php new file mode 100644 index 0000000..fd21630 --- /dev/null +++ b/CodeDoc/Parser.php @@ -0,0 +1,462 @@ + | +// +----------------------------------------------------------------------+ +// +/** +* Docbook data container (for pages and sections) +* +* @package PHP_CodeDoc +* @access public +* @author Alan Knowles +* +*/ +PEAR::loadExtension('tokenizer'); + +ini_set("memory_limit","128M"); +set_time_limit(0); +error_reporting(E_ALL); + + + +require_once("PHP/CodeDoc/Data/Class.php"); +require_once("PHP/CodeDoc/Data/Var.php"); +require_once("PHP/CodeDoc/Data/Define.php"); +require_once("PHP/CodeDoc/Data/Method.php"); +require_once("PHP/CodeDoc/Data/PhpDoc.php"); +require_once("PHP/CodeDoc/Data/Directory.php"); +require_once("PHP/CodeDoc/Data/Package.php"); +require_once("PHP/CodeDoc/Data/Docbook.php"); + +require_once("PHP/CodeDoc/Parser/Class.php"); +require_once("PHP/CodeDoc/Parser/Var.php"); +require_once("PHP/CodeDoc/Parser/Define.php"); +require_once("PHP/CodeDoc/Parser/Method.php"); +require_once("PHP/CodeDoc/Parser/Comment.php"); +require_once("PHP/CodeDoc/Parser/Docbook.php"); + + + +class PHP_CodeDoc_Parser { + + var $original_URL_base; // base url. + + var $_level; // what level is the parser at (0 = base, 1=in class) + var $pos=0; // current position. + var $total=0; // total number of tokens in current file + var $tokens; // the array of tokens + + var $_active_class =0; // id of active class being created + var $active_class; //during output this is the currently dumped class object + var $activeFile; // active file being parsed + var $active_package; // active used package object + var $active_directory; // active used directory object + + var $start_file = ""; // the file (or directory) that was called at the start.. + var $packages; // associative array of package name -> package object + var $_base_dir; // root of tree ! + var $_base_dir_len; // root of tree ! + + + var $_parser_class; // class parser + var $_parser_method;// method parser + var $_parser_var;// var parser + var $_parser_comment;// comment parser + + var $classes = array(); // array of classes + var $defines = array(); // array of defines + + + + function start() { // Main entry - start parsing a file or directory + /* initiate global parsers */ + $options = PHP_CodeDoc::$options; + + + $filename = $options['source']; + + $this->_initializeParsers(); + if (is_dir($filename)) { + $this->_base_dir = dirname(realpath($filename)); + $this->_base_dir_len = strlen($this->_base_dir ); + $this->_build($filename); + $n=0; + foreach($this->files as $dir => $files) { + foreach($files as $file) + + $this->_parseFile($dir . "/".$file); + $n++; + if (@$options['maxFiles'] && $n > $options['maxFiles']) { + return; + } + + } + } else { + $this->_parseFile($filename); + } + } + + var $_init =0; //initialization flag (has init been run?) + + + function _initializeParsers() { + if ($this->_init) return; + $this->_init = 1; + + + + $this->packages['No Package']= new PHP_CodeDoc_Data_Package; + $this->packages['No Package']->name = "No Package"; + + } + + + + var $_pre_output =0; // pre output flag; + + function _build( $filename) { // build a list of file to parse + + + $this->files[$filename] = array(); + + $fh = opendir($filename); + $dirs = array(); + $files = array(); + while (($file =readdir($fh)) !== FALSE) { + if ($file{0} == ".") continue; + //echo "READ: $filename/$file\n"; + if (is_file($filename."/".$file)) { + + if (!ereg("\.(inc|php|class)$",$file)) continue; + $this->files[$filename][] = $file; + + } elseif (is_dir($filename."/".$file)) { + if ($file == "CVS") continue; + if ($file == "RCS") continue; + + $this->_build($filename . "/". $file); + + } + } + closedir($fh); + //echo "\nDO STUFF\n"; + + + } + function _parseFile($filename) { // tockenize a file and start parsing + echo "PARSING: $filename\n"; + + + $this->activeFile = $filename; + if (!filesize ($filename)) return; + + + $this->rdir = substr(dirname(realpath($this->activeFile)),$this->_base_dir_len); + $this->rfilename = substr(realpath($this->activeFile),$this->_base_dir_len); + $tmpar = explode ('/', $this->rfilename); + $tmpar[1] = ''; + $base = substr(implode('/',$tmpar),2); + $this->file_url = $this->original_URL_base . $base; + + if (@$this->tokens) unset($this->tokens); + $data = implode('',file($filename)); + $this->tokens = token_get_all($data); + /* use this if you have a broken tokenizer + $fh = popen(dirname(__FILE__)."/tokenizer_serialize.php $filename","r"); + $this->tokens= unserialize(fread($fh,filesize($filename) * 16)); + pclose($fh); + */ + + $pname= 'No Package'; + $options = PHP_CodeDoc::$options; + if (!$options['perDirPackages']) { + if (preg_match('/^\s*\*\s*@package\s*(.*)$/im',$data,$args)) { + //echo "\nGot package {$this->package}\n"; + $pname = trim($args[1]); + } + } else { + $parts = explode('/',$this->rfilename); + $pname = $parts[1]; + if ($this->rfilename{0} == "/") $pname = $parts[2]; + $pname = preg_replace('/\.php$/','',$pname); + if (!$pname) $pname = "No Package"; + echo "\nUsing package (from dir {$this->rfilename} ) {$pname}\n"; + } + + + + if (!@$this->packages[$pname]) { + $this->packages[$pname] = new PHP_Codedoc_Data_Package; + $this->packages[$pname]->name = $pname; + } + $this->active_package = &$this->packages[$pname]; + + + if (!@$this->active_package->directories[$this->rdir]) { + $this->active_package->directories[$this->rdir] = new PHP_Codedoc_Data_Directory; + $this->active_package->directories[$this->rdir]->name = $this->rdir; + } + $this->active_directory = &$this->active_package->directories[$this->rdir]; + + //print_r($data); exit; + // $this->all_tokens[$filename] = token_get_all($contents); + // $this->tokens =& $this->all_tokens[$filename]; + //global $tokens; + // + // $this->tokens = &$tokens; + $this->total = count($this->tokens); + if ($this->total) + $this->_parse(); + unset($this->tokens); + unset($this->total); + + } + + var $last_comment_block = ""; // last comment block found + + + function _parse() { // read the tokens and make the classes + $options = PHP_CodeDoc::$options; + + + $this->_level=0; + $this->pos =0; + + $this->copyright= " No Copyright specified "; + $inclass = 0; + $class_found =0; + $inbrak = 0; + $this->total = count($this->tokens); + while ($this->pos < $this->total) { + if (@$options['debug']) { + echo "{$this->activeFile}:{$this->pos}/{$this->total}\n"; + } + $v = $this->tokens[$this->pos]; + if (is_array($v)) { + if (!$inbrak && $v[0] == T_WHITESPACE) { + $this->pos++; + continue 1; + } + //if ($v[0] == T_CURLY_OPEN) { + // $this->level++; + // $this->pos++; + // continue 1; + // $this->debug(__METHOD__, "{$this->pos}:" .token_name($v[0]) . ":". $v[1]); + switch ($v[0]) { + case T_CLASS: + PHP_CodeDoc_Parser_Class::read(); + $class_found=1; + $inclass =1; + break; + case T_FUNCTION: + PHP_CodeDoc_Parser_Method::read($inclass); + break; + case T_VAR: + PHP_CodeDoc_Parser_Var::read($inclass); + break; + case T_STRING: + PHP_CodeDoc_Parser_Define::read($v[1]); + break; + case T_DOC_COMMENT: + case T_COMMENT: + + // merge forthcomming comments + + + $this->last_comment_block = $v[1]; + if (substr(trim($v[1]),0,2) == "//") { + + //echo "Got //\n"; + $this->pos++; + break; + /* + while (1) { + //echo "Got ". serialize($this->tokens[$this->pos]) ."\n"; + if (!isset($this->tokens[$this->pos]) || !is_array($this->tokens[$this->pos])) { + break; + } + if ($this->tokens[$this->pos][0] != T_COMMENT) break; + if (substr(trim($this->tokens[$this->pos][1]),0,2) != "//") break; + $this->last_comment_block .= $this->tokens[$this->pos][1]; + $this->pos++; + } + $this->pos--; + */ + } + if (!$inclass) { + + + if (preg_match('/copyright/is', $this->last_comment_block)) { + $comment = preg_replace('/^\s*\/(\*)+/m','',$this->last_comment_block); + $comment = preg_replace('/(\*)+\/\s*$/m','',$comment); + $comment = preg_replace('/^\s*(\*)+/m','',$comment); + // tecnically should look back and front and gather all comments + $this->copyright = $comment; + //echo "-------storing\n"; + } + } + + + $this->last_comment_block = $v[1]; + PHP_CodeDoc_Parser_Docbook::read($this->last_comment_block,$inclass); + break; + default: + //echo "{$this->pos}:" . + // token_name($v[0]) . + // ":{$this->level}:". $v[1] .":\n"; + //if ($v[1] && $v[1]{0} == "(") + // $inbrak++; + } + //if ($level < 3) + // + } else { + if (!$inbrak && trim($v) == "}") $this->_level--; + if (!$inbrak && trim($v) == "{") $this->_level++; + if (trim($v) == "(") $inbrak++; + if (trim($v) == ")") $inbrak--; + //if ($level < 3) + //echo "{$this->pos}:RAW:{$v}\n"; + } + $this->pos++; + } + return $class_found; + } + var $classes_by_name; // array assoc name=>class + var $classes_by_directory; // array assoc directory=>name=>class + var $classes_by_package; // array assoc package=>directory=>name=>class + + + + + + + + + + + + + + + function debugToken($pos) { // print out a token for debugging + $v = !is_numeric($pos) ? $pos : $this->tokens[$pos]; + if (is_array($v)) { + echo ":" .token_name($v[0]) . ":". $v[1] .":\n"; + } else { + echo ":RAW: ". $v .":\n"; + } + } + + /* + * find token pos(19,array(T_EXTENDS),"{"); + * look can be + * array(T_EXTENDS) + * array(T_STRING,'define') + * '{' or a string + * '' = any string + * look for T_EXTENDS, stop when it gets to + *@ return int Position of token. + * + */ + function find_token_pos($start,$look,$stop) { + $p = $start; + while ($p < $this->total) { + if ($this->compare_token($p,$look)) + return $p; + if ($this->compare_token($p,$stop)) + return FALSE; + $p++; + } + } + + /** + * @returns false|STRING|constant + */ + + function look_nws($num, $stop) + { + $dir = $num > 0 ? 1 : -1; + if (!$num) { + throw new Exception('invalid direction'); + } + $p = $this->pos + $dir; + $c = 0; + while(true) { + if (!isset($this->tokens[$p] )) { + return false; + } + $t = $this->tokens[$p]; + if (in_array($t[0],array(T_DOC_COMMENT,T_WHITESPACE,T_COMMENT))) { + $p += $dir; + continue; + } + $c++; + $p += $dir; + if ($c >= abs($num)) { + return is_array($t) ? $t[0] : $t; + } + + } + + } + + + function compare_token($pos,$token) { + $t = $this->tokens[$pos]; + // array compare! + if (is_array($t)) { + if (!is_array($token)) return FALSE; + if ($t[0] != $token[0]) return FALSE; + if (count($token) == 1) return TRUE; + if (trim($t[1]) != trim($token[1])) return FALSE; + return TRUE; + } + if (is_array($token)) return FALSE; + if ($token == "") return TRUE; // looking for a strng + if (trim($t) != trim($token)) return FALSE; + return TRUE; + } + + /* not sure if this is the best place for this!*/ + function output_prune($dir) { + // say we get /xxx/yyy/zzz + // count = 4 + // we need to replate 1 + $d = explode('/',$dir); + $c= count($d) -2; + for ($i = 1; $i< $c;$i++) + $d[$i] = ".."; + return implode('/',$d); + } + + function debug($meth, $com) { + return; + + $t = $this->tokens[$this->pos]; + + echo "{$t[2]}: $meth : $com \n"; + } + function debugTok($pos) { + if (!isset($this->tokens[$pos]) || empty($this->tokens[$pos])) { + return; + } + $v = $this->tokens[$pos]; + var_dump($v); + echo "{$v[2]}: $pos:" .token_name($v[0]) . ":". $v[1] ."\n"; + } + + +} \ No newline at end of file -- 2.39.2