1 <?php # vim:ts=2:sw=2:et:
2 /* For licensing and copyright terms, see the file named LICENSE */
4 class MTrack_Wiki_Parser {
6 const EMAIL_LOOKALIKE_PATTERN =
7 "[a-zA-Z0-9.'=+_-]+@(?:[a-zA-Z0-9_-]+\.)+[a-zA-Z](?:[-a-zA-Z\d]*[a-zA-Z\d])?";
8 const BOLDITALIC_TOKEN = "'''''";
9 const BOLD_TOKEN = "'''";
10 const ITALIC_TOKEN = "''";
11 const UNDERLINE_TOKEN = "__";
12 const STRIKE_TOKEN = "~~";
13 const SUBSCRIPT_TOKEN = ",,";
14 const SUPERSCRIPT_TOKEN = "\^";
15 const INLINE_TOKEN = "`";
16 const STARTBLOCK_TOKEN = "\{\{\{";
17 const STARTBLOCK = "{{{";
18 const ENDBLOCK_TOKEN = "\}\}\}";
19 const ENDBLOCK = "}}}";
20 const LINK_SCHEME = "[\w.+-]+"; # as per RFC 2396
21 const INTERTRAC_SCHEME = "[a-zA-Z.+-]*?"; # no digits (support for shorthand links)
23 const QUOTED_STRING = "'[^']+'|\"[^\"]+\"";
25 const SHREF_TARGET_FIRST = "[\w/?!#@](?<!_)"; # we don't want "_"
26 const SHREF_TARGET_MIDDLE = "(?:\|(?=[^|\s])|[^|<>\s])";
27 const SHREF_TARGET_LAST = "[\w/=](?<!_)"; # we don't want "_"
29 const LHREF_RELATIVE_TARGET = "[/#][^\s\]]*|\.\.?(?:[/#][^\s\]]*)?";
31 # See http://www.w3.org/TR/REC-xml/#id
32 const XML_NAME = "[\w:](?<!\d)[\w:.-]*";
34 const LOWER = '(?<![A-Z0-9_])';
35 const UPPER = '(?<![a-z0-9_])';
37 static $pre_rules = array(
38 array("(?P<bolditalic>!?%s)", self::BOLDITALIC_TOKEN),
39 array("(?P<bold>!?%s)" , self::BOLD_TOKEN),
40 array("(?P<italic>!?%s)" , self::ITALIC_TOKEN),
41 array("(?P<underline>!?%s)" , self::UNDERLINE_TOKEN),
42 array("(?P<strike>!?%s)" , self::STRIKE_TOKEN),
43 array("(?P<subscript>!?%s)" , self::SUBSCRIPT_TOKEN),
44 array("(?P<superscript>!?%s)" , self::SUPERSCRIPT_TOKEN),
45 array("(?P<inlinecode>!?%s(?P<inline>.*?)%s)" ,
46 self::STARTBLOCK_TOKEN, self::ENDBLOCK_TOKEN),
47 array("(?P<inlinecode2>!?%s(?P<inline2>.*?)%s)",
48 self::INLINE_TOKEN, self::INLINE_TOKEN),
50 static $post_rules = array(
52 array("(?P<wikipagename>!?(?<!/)\\b\w%s(?:\w%s)+(?:\w%s(?:\w%s)*[\w/]%s)+(?:@\d+)?(?:#%s)?(?=:(?:\Z|\s)|[^:a-zA-Z]|\s|\Z))",
53 self::UPPER, self::LOWER, self::UPPER, self::LOWER, self::LOWER, self::XML_NAME),
54 # [WikiPageName with label]
55 array("(?P<wikipagenamewithlabel>!?\[\w%s(?:\w%s)+(?:\w%s(?:\w%s)*[\w/]%s)+(?:@\d+)?(?:#%s)?(?=:(?:\Z|\s)|[^:a-zA-Z]|\s|\Z)\s+(?:%s|[^\]]+)\])",
56 self::UPPER, self::LOWER, self::UPPER, self::LOWER, self::LOWER, self::XML_NAME, self::QUOTED_STRING),
59 "(?P<svnchangeset>!?\[(?:(?:[a-zA-Z]+)?\d+|[a-fA-F0-9]+)\])",
61 "(?P<ticket>!?#(?:(?:[a-zA-Z]+)?\d+|[a-fA-F0-9]+))",
63 "(?P<report>!?\{([^}]*)\})",
66 array("(?P<email>!?%s)" , self::EMAIL_LOOKALIKE_PATTERN),
68 "(?P<citation>^(?P<cdepth>>(?: *>)*))",
69 # &, < and > to &, < and >
70 "(?P<htmlspecialcharsape>[&<>])",
73 "(?P<shref>!?((?P<sns>%s):(?P<stgt>%s|%s(?:%s*%s)?)))",
74 self::LINK_SCHEME, self::QUOTED_STRING,
75 self::SHREF_TARGET_FIRST, self::SHREF_TARGET_MIDDLE,
76 self::SHREF_TARGET_LAST),
78 # [wiki:TracLinks with optional label] or [/relative label]
80 "(?P<lhref>!?\[(?:(?P<rel>%s)|(?P<lns>%s):(?P<ltgt>%s|[^\]\s]*))(?:\s+(?P<label>%s|[^\]]+))?\])",
81 self::LHREF_RELATIVE_TARGET, self::LINK_SCHEME,
82 self::QUOTED_STRING, self::QUOTED_STRING),
85 "(?P<macro>!?\[\[(?P<macroname>[\w/+-]+)(\]\]|\((?P<macroargs>.*?)\)\]\]))",
86 # == heading == #hanchor
88 "(?P<heading>^\s*(?P<hdepth>=+)\s.*\s(?P=hdepth)\s*(?P<hanchor>#%s)?(?:\s|$))", self::XML_NAME),
90 "(?P<list>^(?P<ldepth>\s+)(?:[-*]|\d+\.|[a-zA-Z]\.|[ivxIVX]{1,5}\.) )",
93 "(?P<definition>^\s+((?:%s[^%s]*%s|%s(?:%s{,2}[^%s])*?%s|[^%s%s:]+|:[^:]+)+::)(?:\s+|$))",
94 self::INLINE_TOKEN, self::INLINE_TOKEN, self::INLINE_TOKEN,
95 self::STARTBLOCK_TOKEN, '}', '}',
96 self::ENDBLOCK_TOKEN, self::INLINE_TOKEN, '{'),
98 "(?P<indent>^(?P<idepth>\s+)(?=\S))",
100 "(?P<last_table_cell>\|\|\s*$)",
101 "(?P<table_cell>\|\|)",
104 function get_rules() {
105 $this->prepare_rules();
106 return $this->compiled_rules;
109 private function _build_rule(&$rules, $rule_def) {
110 foreach ($rule_def as $rule) {
111 if (is_array($rule)) {
112 $fmt = array_shift($rule);
113 $rule = vsprintf($fmt, $rule);
119 var $compiled_rules = null;
121 function prepare_rules() {
122 if ($this->compiled_rules) {
123 return $this->compiled_rules;
128 $this->_build_rule($syntax, self::$pre_rules);
129 $this->_build_rule($syntax, self::$post_rules);
131 foreach ($syntax as $rule) {
132 if (preg_match_all("/\?P<([a-z\d_]+)>/", $rule, $matches)) {
133 $helpers[] = $matches[1][0];
136 $this->helper_patterns = $helpers;
138 /* now compose it into a big regex */
139 $this->compiled_rules = "/" .
140 str_replace("/", "\\/", join('|', $syntax)) .