git.roojs.org Git - roojs1/blob - ux/Showdown.js

   1 //
   2 // showdown.js -- A javascript port of Markdown.
   3 //
   4 // Copyright (c) 2007 John Fraser.
   5 //
   6 // Original Markdown Copyright (c) 2004-2005 John Gruber
   7 //   <http://daringfireball.net/projects/markdown/>
   8 //
   9 // Redistributable under a BSD-style open source license.
  10 // See license.txt for more information.
  11 //
  12 // The full source distribution is at:
  13 //
  14 //                              A A L
  15 //                              T C A
  16 //                              T K B
  17 //
  18 //   <http://www.attacklab.net/>
  19 //
  20
  21 //
  22 // Wherever possible, Showdown is a straight, line-by-line port
  23 // of the Perl version of Markdown.
  24 //
  25 // This is not a normal parser design; it's basically just a
  26 // series of string substitutions.  It's hard to read and
  27 // maintain this way,  but keeping Showdown close to the original
  28 // design makes it easier to port new features.
  29 //
  30 // More importantly, Showdown behaves like markdown.pl in most
  31 // edge cases.  So web applications can do client-side preview
  32 // in Javascript, and then build identical HTML on the server.
  33 //
  34 // This port needs the new RegExp functionality of ECMA 262,
  35 // 3rd Edition (i.e. Javascript 1.5).  Most modern web browsers
  36 // should do fine.  Even with the new regular expression features,
  37 // We do a lot of work to emulate Perl's regex functionality.
  38 // The tricky changes in this file mostly have the "attacklab:"
  39 // label.  Major or self-explanatory changes don't.
  40 //
  41 // Smart diff tools like Araxis Merge will be able to match up
  42 // this file with markdown.pl in a useful way.  A little tweaking
  43 // helps: in a copy of markdown.pl, replace "#" with "//" and
  44 // replace "$text" with "text".  Be sure to ignore whitespace
  45 // and line endings.
  46 //
  47
  48
  49 //
  50 // Showdown usage:
  51 //
  52 //   var text = "Markdown *rocks*.";
  53 //
  54 //   var converter = new Showdown.converter();
  55 //   var html = converter.makeHtml(text);
  56 //
  57 //   alert(html);
  58 //
  59 // Note: move the sample code to the bottom of this
  60 // file before uncommenting it.
  61 //
  62
  63
  64 //
  65 // Showdown namespace
  66 //
  67 Roo.ui.Showdown = {};
  68
  69 //
  70 // converter
  71 //
  72 // Wraps all "globals" so that the only thing
  73 // exposed is makeHtml().
  74 //
  75 Roo.ui.Showdown.converter = function() {
  76
  77     //
  78     // Globals:
  79     //
  80
  81     // Global hashes, used by various utility routines
  82     var g_urls;
  83     var g_titles;
  84     var g_html_blocks;
  85
  86     // Used to track when we're inside an ordered or unordered list
  87     // (see _ProcessListItems() for details):
  88     var g_list_level = 0;
  89
  90
  91     this.makeHtml = function(text) {
  92     //
  93     // Main function. The order in which other subs are called here is
  94     // essential. Link and image substitutions need to happen before
  95     // _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
  96     // and <img> tags get encoded.
  97     //
  98
  99             // Clear the global hashes. If we don't clear these, you get conflicts
 100             // from other articles when generating a page which contains more than
 101             // one article (e.g. an index page that shows the N most recent
 102             // articles):
 103             g_urls = new Array();
 104             g_titles = new Array();
 105             g_html_blocks = new Array();
 106
 107             // attacklab: Replace ~ with ~T
 108             // This lets us use tilde as an escape char to avoid md5 hashes
 109             // The choice of character is arbitray; anything that isn't
 110         // magic in Markdown will work.
 111             text = text.replace(/~/g,"~T");
 112
 113             // attacklab: Replace $ with ~D
 114             // RegExp interprets $ as a special character
 115             // when it's in a replacement string
 116             text = text.replace(/\$/g,"~D");
 117
 118             // Standardize line endings
 119             text = text.replace(/\r\n/g,"\n"); // DOS to Unix
 120             text = text.replace(/\r/g,"\n"); // Mac to Unix
 121
 122             // Make sure text begins and ends with a couple of newlines:
 123             text = "\n\n" + text + "\n\n";
 124
 125             // Convert all tabs to spaces.
 126             text = _Detab(text);
 127
 128             // Strip any lines consisting only of spaces and tabs.
 129             // This makes subsequent regexen easier to write, because we can
 130             // match consecutive blank lines with /\n+/ instead of something
 131             // contorted like /[ \t]*\n+/ .
 132             text = text.replace(/^[ \t]+$/mg,"");
 133
 134             // Turn block-level HTML blocks into hash entries
 135             text = _HashHTMLBlocks(text);
 136
 137             // Strip link definitions, store in hashes.
 138             text = _StripLinkDefinitions(text);
 139
 140             text = _RunBlockGamut(text);
 141
 142             text = _UnescapeSpecialChars(text);
 143
 144             // attacklab: Restore dollar signs
 145             text = text.replace(/~D/g,"$$");
 146
 147             // attacklab: Restore tildes
 148             text = text.replace(/~T/g,"~");
 149
 150             return text;
 151     }
 152
 153
 154     var _StripLinkDefinitions = function(text) {
 155     //
 156     // Strips link definitions from text, stores the URLs and titles in
 157     // hash references.
 158     //
 159
 160             // Link defs are in the form: ^[id]: url "optional title"
 161
 162             /*
 163                     var text = text.replace(/
 164                                     ^[ ]{0,3}\[(.+)\]:  // id = $1  attacklab: g_tab_width - 1
 165                                       [ \t]*
 166                                       \n?                               // maybe *one* newline
 167                                       [ \t]*
 168                                     <?(\S+?)>?                  // url = $2
 169                                       [ \t]*
 170                                       \n?                               // maybe one newline
 171                                       [ \t]*
 172                                     (?:
 173                                       (\n*)                             // any lines skipped = $3 attacklab: lookbehind removed
 174                                       ["(]
 175                                       (.+?)                             // title = $4
 176                                       [")]
 177                                       [ \t]*
 178                                     )?                                  // title is optional
 179                                     (?:\n+|$)
 180                               /gm,
 181                               function(){...});
 182             */
 183             var text = text.replace(/^[ ]{0,3}\[(.+)\]:[ \t]*\n?[ \t]*<?(\S+?)>?[ \t]*\n?[ \t]*(?:(\n*)["(](.+?)[")][ \t]*)?(?:\n+|\Z)/gm,
 184                     function (wholeMatch,m1,m2,m3,m4) {
 185                             m1 = m1.toLowerCase();
 186                             g_urls[m1] = _EncodeAmpsAndAngles(m2);  // Link IDs are case-insensitive
 187                             if (m3) {
 188                                     // Oops, found blank lines, so it's not a title.
 189                                     // Put back the parenthetical statement we stole.
 190                                     return m3+m4;
 191                             } else if (m4) {
 192                                     g_titles[m1] = m4.replace(/"/g,"&quot;");
 193                             }
 194
 195                             // Completely remove the definition from the text
 196                             return "";
 197                     }
 198             );
 199
 200             return text;
 201     }
 202
 203
 204     var _HashHTMLBlocks = function(text) {
 205             // attacklab: Double up blank lines to reduce lookaround
 206             text = text.replace(/\n/g,"\n\n");
 207
 208             // Hashify HTML blocks:
 209             // We only want to do this for block-level HTML tags, such as headers,
 210             // lists, and tables. That's because we still want to wrap <p>s around
 211             // "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 212             // phrase emphasis, and spans. The list of tags we're looking for is
 213             // hard-coded:
 214             var block_tags_a = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del"
 215             var block_tags_b = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math"
 216
 217             // First, look for nested blocks, e.g.:
 218             //   <div>
 219             //     <div>
 220             //     tags for inner block must be indented.
 221             //     </div>
 222             //   </div>
 223             //
 224             // The outermost tags must start at the left margin for this to match, and
 225             // the inner nested divs must be indented.
 226             // We need to do this before the next, more liberal match, because the next
 227             // match will start at the first `<div>` and stop at the first `</div>`.
 228
 229             // attacklab: This regex can be expensive when it fails.
 230             /*
 231                     var text = text.replace(/
 232                     (                                           // save in $1
 233                             ^                                   // start of line  (with /m)
 234                             <($block_tags_a)    // start tag = $2
 235                             \b                                  // word break
 236                                                                     // attacklab: hack around khtml/pcre bug...
 237                             [^\r]*?\n                   // any number of lines, minimally matching
 238                             </\2>                               // the matching end tag
 239                             [ \t]*                              // trailing spaces/tabs
 240                             (?=\n+)                             // followed by a newline
 241                     )                                           // attacklab: there are sentinel newlines at end of document
 242                     /gm,function(){...}};
 243             */
 244             text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del)\b[^\r]*?\n<\/\2>[ \t]*(?=\n+))/gm,hashElement);
 245
 246             //
 247             // Now match more liberally, simply from `\n<tag>` to `</tag>\n`
 248             //
 249
 250             /*
 251                     var text = text.replace(/
 252                     (                                           // save in $1
 253                             ^                                   // start of line  (with /m)
 254                             <($block_tags_b)    // start tag = $2
 255                             \b                                  // word break
 256                                                                     // attacklab: hack around khtml/pcre bug...
 257                             [^\r]*?                             // any number of lines, minimally matching
 258                             .*</\2>                             // the matching end tag
 259                             [ \t]*                              // trailing spaces/tabs
 260                             (?=\n+)                             // followed by a newline
 261                     )                                           // attacklab: there are sentinel newlines at end of document
 262                     /gm,function(){...}};
 263             */
 264             text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math)\b[^\r]*?.*<\/\2>[ \t]*(?=\n+)\n)/gm,hashElement);
 265
 266             // Special case just for <hr />. It was easier to make a special case than
 267             // to make the other regex more complicated.
 268
 269             /*
 270                     text = text.replace(/
 271                     (                                           // save in $1
 272                             \n\n                                // Starting after a blank line
 273                             [ ]{0,3}
 274                             (<(hr)                              // start tag = $2
 275                             \b                                  // word break
 276                             ([^<>])*?                   //
 277                             \/?>)                               // the matching end tag
 278                             [ \t]*
 279                             (?=\n{2,})                  // followed by a blank line
 280                     )
 281                     /g,hashElement);
 282             */
 283             text = text.replace(/(\n[ ]{0,3}(<(hr)\b([^<>])*?\/?>)[ \t]*(?=\n{2,}))/g,hashElement);
 284
 285             // Special case for standalone HTML comments:
 286
 287             /*
 288                     text = text.replace(/
 289                     (                                           // save in $1
 290                             \n\n                                // Starting after a blank line
 291                             [ ]{0,3}                    // attacklab: g_tab_width - 1
 292                             <!
 293                             (--[^\r]*?--\s*)+
 294                             >
 295                             [ \t]*
 296                             (?=\n{2,})                  // followed by a blank line
 297                     )
 298                     /g,hashElement);
 299             */
 300             text = text.replace(/(\n\n[ ]{0,3}<!(--[^\r]*?--\s*)+>[ \t]*(?=\n{2,}))/g,hashElement);
 301
 302             // PHP and ASP-style processor instructions (<?...?> and <%...%>)
 303
 304             /*
 305                     text = text.replace(/
 306                     (?:
 307                             \n\n                                // Starting after a blank line
 308                     )
 309                     (                                           // save in $1
 310                             [ ]{0,3}                    // attacklab: g_tab_width - 1
 311                             (?:
 312                                     <([?%])                     // $2
 313                                     [^\r]*?
 314                                     \2>
 315                             )
 316                             [ \t]*
 317                             (?=\n{2,})                  // followed by a blank line
 318                     )
 319                     /g,hashElement);
 320             */
 321             text = text.replace(/(?:\n\n)([ ]{0,3}(?:<([?%])[^\r]*?\2>)[ \t]*(?=\n{2,}))/g,hashElement);
 322
 323             // attacklab: Undo double lines (see comment at top of this function)
 324             text = text.replace(/\n\n/g,"\n");
 325             return text;
 326     }
 327
 328     var hashElement = function(wholeMatch,m1) {
 329             var blockText = m1;
 330
 331             // Undo double lines
 332             blockText = blockText.replace(/\n\n/g,"\n");
 333             blockText = blockText.replace(/^\n/,"");
 334
 335             // strip trailing blank lines
 336             blockText = blockText.replace(/\n+$/g,"");
 337
 338             // Replace the element text with a marker ("~KxK" where x is its key)
 339             blockText = "\n\n~K" + (g_html_blocks.push(blockText)-1) + "K\n\n";
 340
 341             return blockText;
 342     };
 343
 344     var _RunBlockGamut = function(text) {
 345     //
 346     // These are all the transformations that form block-level
 347     // tags like paragraphs, headers, and list items.
 348     //
 349             text = _DoHeaders(text);
 350
 351             // Do Horizontal Rules:
 352             var key = hashBlock("<hr />");
 353             text = text.replace(/^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$/gm,key);
 354             text = text.replace(/^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$/gm,key);
 355             text = text.replace(/^[ ]{0,2}([ ]?\_[ ]?){3,}[ \t]*$/gm,key);
 356
 357             text = _DoLists(text);
 358             text = _DoCodeBlocks(text);
 359             text = _DoBlockQuotes(text);
 360
 361             // We already ran _HashHTMLBlocks() before, in Markdown(), but that
 362             // was to escape raw HTML in the original Markdown source. This time,
 363             // we're escaping the markup we've just created, so that we don't wrap
 364             // <p> tags around block-level tags.
 365             text = _HashHTMLBlocks(text);
 366             text = _FormParagraphs(text);
 367
 368             return text;
 369     }
 370
 371
 372     var _RunSpanGamut = function(text) {
 373     //
 374     // These are all the transformations that occur *within* block-level
 375     // tags like paragraphs, headers, and list items.
 376     //
 377
 378             text = _DoCodeSpans(text);
 379             text = _EscapeSpecialCharsWithinTagAttributes(text);
 380             text = _EncodeBackslashEscapes(text);
 381
 382             // Process anchor and image tags. Images must come first,
 383             // because ![foo][f] looks like an anchor.
 384             text = _DoImages(text);
 385             text = _DoAnchors(text);
 386
 387             // Make links out of things like `<http://example.com/>`
 388             // Must come after _DoAnchors(), because you can use < and >
 389             // delimiters in inline links like [this](<url>).
 390             text = _DoAutoLinks(text);
 391             text = _EncodeAmpsAndAngles(text);
 392             text = _DoItalicsAndBold(text);
 393
 394             // Do hard breaks:
 395             text = text.replace(/  +\n/g," <br />\n");
 396
 397             return text;
 398     }
 399
 400     var _EscapeSpecialCharsWithinTagAttributes = function(text) {
 401     //
 402     // Within tags -- meaning between < and > -- encode [\ ` * _] so they
 403     // don't conflict with their use in Markdown for code, italics and strong.
 404     //
 405
 406             // Build a regex to find HTML tags and comments.  See Friedl's
 407             // "Mastering Regular Expressions", 2nd Ed., pp. 200-201.
 408             var regex = /(<[a-z\/!$]("[^"]*"|'[^']*'|[^'">])*>|<!(--.*?--\s*)+>)/gi;
 409
 410             text = text.replace(regex, function(wholeMatch) {
 411                     var tag = wholeMatch.replace(/(.)<\/?code>(?=.)/g,"$1`");
 412                     tag = escapeCharacters(tag,"\\`*_");
 413                     return tag;
 414             });
 415
 416             return text;
 417     }
 418
 419     var _DoAnchors = function(text) {
 420     //
 421     // Turn Markdown link shortcuts into XHTML <a> tags.
 422     //
 423             //
 424             // First, handle reference-style links: [link text] [id]
 425             //
 426
 427             /*
 428                     text = text.replace(/
 429                     (                                                   // wrap whole match in $1
 430                             \[
 431                             (
 432                                     (?:
 433                                             \[[^\]]*\]          // allow brackets nested one level
 434                                             |
 435                                             [^\[]                       // or anything else
 436                                     )*
 437                             )
 438                             \]
 439
 440                             [ ]?                                        // one optional space
 441                             (?:\n[ ]*)?                         // one optional newline followed by spaces
 442
 443                             \[
 444                             (.*?)                                       // id = $3
 445                             \]
 446                     )()()()()                                   // pad remaining backreferences
 447                     /g,_DoAnchors_callback);
 448             */
 449             text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g,writeAnchorTag);
 450
 451             //
 452             // Next, inline-style links: [link text](url "optional title")
 453             //
 454
 455             /*
 456                     text = text.replace(/
 457                             (                                           // wrap whole match in $1
 458                                     \[
 459                                     (
 460                                             (?:
 461                                                     \[[^\]]*\]  // allow brackets nested one level
 462                                             |
 463                                             [^\[\]]                     // or anything else
 464                                     )
 465                             )
 466                             \]
 467                             \(                                          // literal paren
 468                             [ \t]*
 469                             ()                                          // no id, so leave $3 empty
 470                             <?(.*?)>?                           // href = $4
 471                             [ \t]*
 472                             (                                           // $5
 473                                     (['"])                              // quote char = $6
 474                                     (.*?)                               // Title = $7
 475                                     \6                                  // matching quote
 476                                     [ \t]*                              // ignore any spaces/tabs between closing quote and )
 477                             )?                                          // title is optional
 478                             \)
 479                     )
 480                     /g,writeAnchorTag);
 481             */
 482             text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\]\([ \t]*()<?(.*?)>?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g,writeAnchorTag);
 483
 484             //
 485             // Last, handle reference-style shortcuts: [link text]
 486             // These must come last in case you've also got [link test][1]
 487             // or [link test](/foo)
 488             //
 489
 490             /*
 491                     text = text.replace(/
 492                     (                                                   // wrap whole match in $1
 493                             \[
 494                             ([^\[\]]+)                          // link text = $2; can't contain '[' or ']'
 495                             \]
 496                     )()()()()()                                 // pad rest of backreferences
 497                     /g, writeAnchorTag);
 498             */
 499             text = text.replace(/(\[([^\[\]]+)\])()()()()()/g, writeAnchorTag);
 500
 501             return text;
 502     }
 503
 504     var writeAnchorTag = function(wholeMatch,m1,m2,m3,m4,m5,m6,m7) {
 505             if (m7 == undefined) m7 = "";
 506             var whole_match = m1;
 507             var link_text   = m2;
 508             var link_id  = m3.toLowerCase();
 509             var url             = m4;
 510             var title   = m7;
 511
 512             if (url == "") {
 513                     if (link_id == "") {
 514                             // lower-case and turn embedded newlines into spaces
 515                             link_id = link_text.toLowerCase().replace(/ ?\n/g," ");
 516                     }
 517                     url = "#"+link_id;
 518
 519                     if (g_urls[link_id] != undefined) {
 520                             url = g_urls[link_id];
 521                             if (g_titles[link_id] != undefined) {
 522                                     title = g_titles[link_id];
 523                             }
 524                     }
 525                     else {
 526                             if (whole_match.search(/\(\s*\)$/m)>-1) {
 527                                     // Special case for explicit empty url
 528                                     url = "";
 529                             } else {
 530                                     return whole_match;
 531                             }
 532                     }
 533             }
 534
 535             url = escapeCharacters(url,"*_");
 536             var result = "<a href=\"" + url + "\"";
 537
 538             if (title != "") {
 539                     title = title.replace(/"/g,"&quot;");
 540                     title = escapeCharacters(title,"*_");
 541                     result +=  " title=\"" + title + "\"";
 542             }
 543
 544             result += ">" + link_text + "</a>";
 545
 546             return result;
 547     }
 548
 549
 550     var _DoImages = function(text) {
 551     //
 552     // Turn Markdown image shortcuts into <img> tags.
 553     //
 554
 555             //
 556             // First, handle reference-style labeled images: ![alt text][id]
 557             //
 558
 559             /*
 560                     text = text.replace(/
 561                     (                                           // wrap whole match in $1
 562                             !\[
 563                             (.*?)                               // alt text = $2
 564                             \]
 565
 566                             [ ]?                                // one optional space
 567                             (?:\n[ ]*)?                 // one optional newline followed by spaces
 568
 569                             \[
 570                             (.*?)                               // id = $3
 571                             \]
 572                     )()()()()                           // pad rest of backreferences
 573                     /g,writeImageTag);
 574             */
 575             text = text.replace(/(!\[(.*?)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g,writeImageTag);
 576
 577             //
 578             // Next, handle inline images:  ![alt text](url "optional title")
 579             // Don't forget: encode * and _
 580
 581             /*
 582                     text = text.replace(/
 583                     (                                           // wrap whole match in $1
 584                             !\[
 585                             (.*?)                               // alt text = $2
 586                             \]
 587                             \s?                                 // One optional whitespace character
 588                             \(                                  // literal paren
 589                             [ \t]*
 590                             ()                                  // no id, so leave $3 empty
 591                             <?(\S+?)>?                  // src url = $4
 592                             [ \t]*
 593                             (                                   // $5
 594                                     (['"])                      // quote char = $6
 595                                     (.*?)                       // title = $7
 596                                     \6                          // matching quote
 597                                     [ \t]*
 598                             )?                                  // title is optional
 599                     \)
 600                     )
 601                     /g,writeImageTag);
 602             */
 603             text = text.replace(/(!\[(.*?)\]\s?\([ \t]*()<?(\S+?)>?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g,writeImageTag);
 604
 605             return text;
 606     }
 607
 608     var writeImageTag = function(wholeMatch,m1,m2,m3,m4,m5,m6,m7) {
 609             var whole_match = m1;
 610             var alt_text   = m2;
 611             var link_id  = m3.toLowerCase();
 612             var url             = m4;
 613             var title   = m7;
 614
 615             if (!title) title = "";
 616
 617             if (url == "") {
 618                     if (link_id == "") {
 619                             // lower-case and turn embedded newlines into spaces
 620                             link_id = alt_text.toLowerCase().replace(/ ?\n/g," ");
 621                     }
 622                     url = "#"+link_id;
 623
 624                     if (g_urls[link_id] != undefined) {
 625                             url = g_urls[link_id];
 626                             if (g_titles[link_id] != undefined) {
 627                                     title = g_titles[link_id];
 628                             }
 629                     }
 630                     else {
 631                             return whole_match;
 632                     }
 633             }
 634
 635             alt_text = alt_text.replace(/"/g,"&quot;");
 636             url = escapeCharacters(url,"*_");
 637             var result = "<img src=\"" + url + "\" alt=\"" + alt_text + "\"";
 638
 639             // attacklab: Markdown.pl adds empty title attributes to images.
 640             // Replicate this bug.
 641
 642             //if (title != "") {
 643                     title = title.replace(/"/g,"&quot;");
 644                     title = escapeCharacters(title,"*_");
 645                     result +=  " title=\"" + title + "\"";
 646             //}
 647
 648             result += " />";
 649
 650             return result;
 651     }
 652
 653
 654     var _DoHeaders = function(text) {
 655
 656             // Setext-style headers:
 657             //  Header 1
 658             //  ========
 659             //
 660             //  Header 2
 661             //  --------
 662             //
 663             text = text.replace(/^(.+)[ \t]*\n=+[ \t]*\n+/gm,
 664                     function(wholeMatch,m1){return hashBlock('<h1 id="' + headerId(m1) + '">' + _RunSpanGamut(m1) + "</h1>");});
 665
 666             text = text.replace(/^(.+)[ \t]*\n-+[ \t]*\n+/gm,
 667                     function(matchFound,m1){return hashBlock('<h2 id="' + headerId(m1) + '">' + _RunSpanGamut(m1) + "</h2>");});
 668
 669             // atx-style headers:
 670             //  # Header 1
 671             //  ## Header 2
 672             //  ## Header 2 with closing hashes ##
 673             //  ...
 674             //  ###### Header 6
 675             //
 676
 677             /*
 678                     text = text.replace(/
 679                             ^(\#{1,6})                          // $1 = string of #'s
 680                             [ \t]*
 681                             (.+?)                                       // $2 = Header text
 682                             [ \t]*
 683                             \#*                                         // optional closing #'s (not counted)
 684                             \n+
 685                     /gm, function() {...});
 686             */
 687
 688             text = text.replace(/^(\#{1,6})[ \t]*(.+?)[ \t]*\#*\n+/gm,
 689                     function(wholeMatch,m1,m2) {
 690                             var h_level = m1.length;
 691                             return hashBlock("<h" + h_level + ' id="' + headerId(m2) + '">' + _RunSpanGamut(m2) + "</h" + h_level + ">");
 692                     });
 693
 694             function headerId(m) {
 695                     return m.replace(/[^\w]/g, '').toLowerCase();
 696             }
 697             return text;
 698     }
 699
 700     // This declaration keeps Dojo compressor from outputting garbage:
 701     var _ProcessListItems;
 702
 703     var _DoLists = function(text) {
 704     //
 705     // Form HTML ordered (numbered) and unordered (bulleted) lists.
 706     //
 707
 708             // attacklab: add sentinel to hack around khtml/safari bug:
 709             // http://bugs.webkit.org/show_bug.cgi?id=11231
 710             text += "~0";
 711
 712             // Re-usable pattern to match any entirel ul or ol list:
 713
 714             /*
 715                     var whole_list = /
 716                     (                                                                   // $1 = whole list
 717                             (                                                           // $2
 718                                     [ ]{0,3}                                    // attacklab: g_tab_width - 1
 719                                     ([*+-]|\d+[.])                              // $3 = first list item marker
 720                                     [ \t]+
 721                             )
 722                             [^\r]+?
 723                             (                                                           // $4
 724                                     ~0                                                  // sentinel for workaround; should be $
 725                             |
 726                                     \n{2,}
 727                                     (?=\S)
 728                                     (?!                                                 // Negative lookahead for another list item marker
 729                                             [ \t]*
 730                                             (?:[*+-]|\d+[.])[ \t]+
 731                                     )
 732                             )
 733                     )/g
 734             */
 735             var whole_list = /^(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/gm;
 736
 737             if (g_list_level) {
 738                     text = text.replace(whole_list,function(wholeMatch,m1,m2) {
 739                             var list = m1;
 740                             var list_type = (m2.search(/[*+-]/g)>-1) ? "ul" : "ol";
 741
 742                             // Turn double returns into triple returns, so that we can make a
 743                             // paragraph for the last item in a list, if necessary:
 744                             list = list.replace(/\n{2,}/g,"\n\n\n");;
 745                             var result = _ProcessListItems(list);
 746
 747                             // Trim any trailing whitespace, to put the closing `</$list_type>`
 748                             // up on the preceding line, to get it past the current stupid
 749                             // HTML block parser. This is a hack to work around the terrible
 750                             // hack that is the HTML block parser.
 751                             result = result.replace(/\s+$/,"");
 752                             result = "<"+list_type+">" + result + "</"+list_type+">\n";
 753                             return result;
 754                     });
 755             } else {
 756                     whole_list = /(\n\n|^\n?)(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/g;
 757                     text = text.replace(whole_list,function(wholeMatch,m1,m2,m3) {
 758                             var runup = m1;
 759                             var list = m2;
 760
 761                             var list_type = (m3.search(/[*+-]/g)>-1) ? "ul" : "ol";
 762                             // Turn double returns into triple returns, so that we can make a
 763                             // paragraph for the last item in a list, if necessary:
 764                             var list = list.replace(/\n{2,}/g,"\n\n\n");;
 765                             var result = _ProcessListItems(list);
 766                             result = runup + "<"+list_type+">\n" + result + "</"+list_type+">\n";
 767                             return result;
 768                     });
 769             }
 770
 771             // attacklab: strip sentinel
 772             text = text.replace(/~0/,"");
 773
 774             return text;
 775     }
 776
 777     _ProcessListItems = function(list_str) {
 778     //
 779     //  Process the contents of a single ordered or unordered list, splitting it
 780     //  into individual list items.
 781     //
 782             // The $g_list_level global keeps track of when we're inside a list.
 783             // Each time we enter a list, we increment it; when we leave a list,
 784             // we decrement. If it's zero, we're not in a list anymore.
 785             //
 786             // We do this because when we're not inside a list, we want to treat
 787             // something like this:
 788             //
 789             //    I recommend upgrading to version
 790             //    8. Oops, now this line is treated
 791             //    as a sub-list.
 792             //
 793             // As a single paragraph, despite the fact that the second line starts
 794             // with a digit-period-space sequence.
 795             //
 796             // Whereas when we're inside a list (or sub-list), that line will be
 797             // treated as the start of a sub-list. What a kludge, huh? This is
 798             // an aspect of Markdown's syntax that's hard to parse perfectly
 799             // without resorting to mind-reading. Perhaps the solution is to
 800             // change the syntax rules such that sub-lists must start with a
 801             // starting cardinal number; e.g. "1." or "a.".
 802
 803             g_list_level++;
 804
 805             // trim trailing blank lines:
 806             list_str = list_str.replace(/\n{2,}$/,"\n");
 807
 808             // attacklab: add sentinel to emulate \z
 809             list_str += "~0";
 810
 811             /*
 812                     list_str = list_str.replace(/
 813                             (\n)?                                                       // leading line = $1
 814                             (^[ \t]*)                                           // leading whitespace = $2
 815                             ([*+-]|\d+[.]) [ \t]+                       // list marker = $3
 816                             ([^\r]+?                                            // list item text   = $4
 817                             (\n{1,2}))
 818                             (?= \n* (~0 | \2 ([*+-]|\d+[.]) [ \t]+))
 819                     /gm, function(){...});
 820             */
 821             list_str = list_str.replace(/(\n)?(^[ \t]*)([*+-]|\d+[.])[ \t]+([^\r]+?(\n{1,2}))(?=\n*(~0|\2([*+-]|\d+[.])[ \t]+))/gm,
 822                     function(wholeMatch,m1,m2,m3,m4){
 823                             var item = m4;
 824                             var leading_line = m1;
 825                             var leading_space = m2;
 826
 827                             if (leading_line || (item.search(/\n{2,}/)>-1)) {
 828                                     item = _RunBlockGamut(_Outdent(item));
 829                             }
 830                             else {
 831                                     // Recursion for sub-lists:
 832                                     item = _DoLists(_Outdent(item));
 833                                     item = item.replace(/\n$/,""); // chomp(item)
 834                                     item = _RunSpanGamut(item);
 835                             }
 836
 837                             return  "<li>" + item + "</li>\n";
 838                     }
 839             );
 840
 841             // attacklab: strip sentinel
 842             list_str = list_str.replace(/~0/g,"");
 843
 844             g_list_level--;
 845             return list_str;
 846     }
 847
 848
 849     var _DoCodeBlocks = function(text) {
 850     //
 851     //  Process Markdown `<pre><code>` blocks.
 852     //
 853
 854             /*
 855                     text = text.replace(text,
 856                             /(?:\n\n|^)
 857                             (                                                           // $1 = the code block -- one or more lines, starting with a space/tab
 858                                     (?:
 859                                             (?:[ ]{4}|\t)                       // Lines must start with a tab or a tab-width of spaces - attacklab: g_tab_width
 860                                             .*\n+
 861                                     )+
 862                             )
 863                             (\n*[ ]{0,3}[^ \t\n]|(?=~0))        // attacklab: g_tab_width
 864                     /g,function(){...});
 865             */
 866
 867             // attacklab: sentinel workarounds for lack of \A and \Z, safari\khtml bug
 868             text += "~0";
 869
 870             text = text.replace(/(?:\n\n|^)((?:(?:[ ]{4}|\t).*\n+)+)(\n*[ ]{0,3}[^ \t\n]|(?=~0))/g,
 871                     function(wholeMatch,m1,m2) {
 872                             var codeblock = m1;
 873                             var nextChar = m2;
 874
 875                             codeblock = _EncodeCode( _Outdent(codeblock));
 876                             codeblock = _Detab(codeblock);
 877                             codeblock = codeblock.replace(/^\n+/g,""); // trim leading newlines
 878                             codeblock = codeblock.replace(/\n+$/g,""); // trim trailing whitespace
 879
 880                             codeblock = "<pre><code>" + codeblock + "\n</code></pre>";
 881
 882                             return hashBlock(codeblock) + nextChar;
 883                     }
 884             );
 885
 886             // attacklab: strip sentinel
 887             text = text.replace(/~0/,"");
 888
 889             return text;
 890     }
 891
 892     var hashBlock = function(text) {
 893             text = text.replace(/(^\n+|\n+$)/g,"");
 894             return "\n\n~K" + (g_html_blocks.push(text)-1) + "K\n\n";
 895     }
 896
 897
 898     var _DoCodeSpans = function(text) {
 899     //
 900     //   *  Backtick quotes are used for <code></code> spans.
 901     //
 902     //   *  You can use multiple backticks as the delimiters if you want to
 903     //   include literal backticks in the code span. So, this input:
 904     //
 905     //           Just type ``foo `bar` baz`` at the prompt.
 906     //
 907     //     Will translate to:
 908     //
 909     //           <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
 910     //
 911     //  There's no arbitrary limit to the number of backticks you
 912     //  can use as delimters. If you need three consecutive backticks
 913     //  in your code, use four for delimiters, etc.
 914     //
 915     //  *  You can use spaces to get literal backticks at the edges:
 916     //
 917     //           ... type `` `bar` `` ...
 918     //
 919     //     Turns to:
 920     //
 921     //           ... type <code>`bar`</code> ...
 922     //
 923
 924             /*
 925                     text = text.replace(/
 926                             (^|[^\\])                                   // Character before opening ` can't be a backslash
 927                             (`+)                                                // $2 = Opening run of `
 928                             (                                                   // $3 = The code block
 929                                     [^\r]*?
 930                                     [^`]                                        // attacklab: work around lack of lookbehind
 931                             )
 932                             \2                                                  // Matching closer
 933                             (?!`)
 934                     /gm, function(){...});
 935             */
 936
 937             text = text.replace(/(^|[^\\])(`+)([^\r]*?[^`])\2(?!`)/gm,
 938                     function(wholeMatch,m1,m2,m3,m4) {
 939                             var c = m3;
 940                             c = c.replace(/^([ \t]*)/g,"");     // leading whitespace
 941                             c = c.replace(/[ \t]*$/g,"");       // trailing whitespace
 942                             c = _EncodeCode(c);
 943                             return m1+"<code>"+c+"</code>";
 944                     });
 945
 946             return text;
 947     }
 948
 949
 950     var _EncodeCode = function(text) {
 951     //
 952     // Encode/escape certain characters inside Markdown code runs.
 953     // The point is that in code, these characters are literals,
 954     // and lose their special Markdown meanings.
 955     //
 956             // Encode all ampersands; HTML entities are not
 957             // entities within a Markdown code span.
 958             text = text.replace(/&/g,"&amp;");
 959
 960             // Do the angle bracket song and dance:
 961             text = text.replace(/</g,"&lt;");
 962             text = text.replace(/>/g,"&gt;");
 963
 964             // Now, escape characters that are magic in Markdown:
 965             text = escapeCharacters(text,"\*_{}[]\\",false);
 966
 967     // jj the line above breaks this:
 968     //---
 969
 970     //* Item
 971
 972     //   1. Subitem
 973
 974     //            special char: *
 975     //---
 976
 977             return text;
 978     }
 979
 980
 981     var _DoItalicsAndBold = function(text) {
 982
 983             // <strong> must go first:
 984             text = text.replace(/(\*\*|__)(?=\S)([^\r]*?\S[*_]*)\1/g,
 985                     "<strong>$2</strong>");
 986
 987             text = text.replace(/(\*|_)(?=\S)([^\r]*?\S)\1/g,
 988                     "<em>$2</em>");
 989
 990             return text;
 991     }
 992
 993
 994     var _DoBlockQuotes = function(text) {
 995
 996             /*
 997                     text = text.replace(/
 998                     (                                                           // Wrap whole match in $1
 999                             (
1000                                     ^[ \t]*>[ \t]?                      // '>' at the start of a line
1001                                     .+\n                                        // rest of the first line
1002                                     (.+\n)*                                     // subsequent consecutive lines
1003                                     \n*                                         // blanks
1004                             )+
1005                     )
1006                     /gm, function(){...});
1007             */
1008
1009             text = text.replace(/((^[ \t]*>[ \t]?.+\n(.+\n)*\n*)+)/gm,
1010                     function(wholeMatch,m1) {
1011                             var bq = m1;
1012
1013                             // attacklab: hack around Konqueror 3.5.4 bug:
1014                             // "----------bug".replace(/^-/g,"") == "bug"
1015
1016                             bq = bq.replace(/^[ \t]*>[ \t]?/gm,"~0");   // trim one level of quoting
1017
1018                             // attacklab: clean up hack
1019                             bq = bq.replace(/~0/g,"");
1020
1021                             bq = bq.replace(/^[ \t]+$/gm,"");           // trim whitespace-only lines
1022                             bq = _RunBlockGamut(bq);                            // recurse
1023
1024                             bq = bq.replace(/(^|\n)/g,"$1  ");
1025                             // These leading spaces screw with <pre> content, so we need to fix that:
1026                             bq = bq.replace(
1027                                             /(\s*<pre>[^\r]+?<\/pre>)/gm,
1028                                     function(wholeMatch,m1) {
1029                                             var pre = m1;
1030                                             // attacklab: hack around Konqueror 3.5.4 bug:
1031                                             pre = pre.replace(/^  /mg,"~0");
1032                                             pre = pre.replace(/~0/g,"");
1033                                             return pre;
1034                                     });
1035
1036                             return hashBlock("<blockquote>\n" + bq + "\n</blockquote>");
1037                     });
1038             return text;
1039     }
1040
1041
1042     var _FormParagraphs = function(text) {
1043     //
1044     //  Params:
1045     //    $text - string to process with html <p> tags
1046     //
1047
1048             // Strip leading and trailing lines:
1049             text = text.replace(/^\n+/g,"");
1050             text = text.replace(/\n+$/g,"");
1051
1052             var grafs = text.split(/\n{2,}/g);
1053             var grafsOut = new Array();
1054
1055             //
1056             // Wrap <p> tags.
1057             //
1058             var end = grafs.length;
1059             for (var i=0; i<end; i++) {
1060                     var str = grafs[i];
1061
1062                     // if this is an HTML marker, copy it
1063                     if (str.search(/~K(\d+)K/g) >= 0) {
1064                             grafsOut.push(str);
1065                     }
1066                     else if (str.search(/\S/) >= 0) {
1067                             str = _RunSpanGamut(str);
1068                             str = str.replace(/^([ \t]*)/g,"<p>");
1069                             str += "</p>"
1070                             grafsOut.push(str);
1071                     }
1072
1073             }
1074
1075             //
1076             // Unhashify HTML blocks
1077             //
1078             end = grafsOut.length;
1079             for (var i=0; i<end; i++) {
1080                     // if this is a marker for an html block...
1081                     while (grafsOut[i].search(/~K(\d+)K/) >= 0) {
1082                             var blockText = g_html_blocks[RegExp.$1];
1083                             blockText = blockText.replace(/\$/g,"$$$$"); // Escape any dollar signs
1084                             grafsOut[i] = grafsOut[i].replace(/~K\d+K/,blockText);
1085                     }
1086             }
1087
1088             return grafsOut.join("\n\n");
1089     }
1090
1091
1092     var _EncodeAmpsAndAngles = function(text) {
1093     // Smart processing for ampersands and angle brackets that need to be encoded.
1094
1095             // Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1096             //   http://bumppo.net/projects/amputator/
1097             text = text.replace(/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/g,"&amp;");
1098
1099             // Encode naked <'s
1100             text = text.replace(/<(?![a-z\/?\$!])/gi,"&lt;");
1101
1102             return text;
1103     }
1104
1105
1106     var _EncodeBackslashEscapes = function(text) {
1107     //
1108     //   Parameter:  String.
1109     //   Returns:       The string, with after processing the following backslash
1110     //                     escape sequences.
1111     //
1112
1113             // attacklab: The polite way to do this is with the new
1114             // escapeCharacters() function:
1115             //
1116             //  text = escapeCharacters(text,"\\",true);
1117             //  text = escapeCharacters(text,"`*_{}[]()>#+-.!",true);
1118             //
1119             // ...but we're sidestepping its use of the (slow) RegExp constructor
1120             // as an optimization for Firefox.  This function gets called a LOT.
1121
1122             text = text.replace(/\\(\\)/g,escapeCharacters_callback);
1123             text = text.replace(/\\([`*_{}\[\]()>#+-.!])/g,escapeCharacters_callback);
1124             return text;
1125     }
1126
1127
1128     var _DoAutoLinks = function(text) {
1129
1130             text = text.replace(/<((https?|ftp|dict):[^'">\s]+)>/gi,"<a href=\"$1\">$1</a>");
1131
1132             // Email addresses: <address@domain.foo>
1133
1134             /*
1135                     text = text.replace(/
1136                             <
1137                             (?:mailto:)?
1138                             (
1139                                     [-.\w]+
1140                                     \@
1141                                     [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1142                             )
1143                             >
1144                     /gi, _DoAutoLinks_callback());
1145             */
1146             text = text.replace(/<(?:mailto:)?([-.\w]+\@[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+)>/gi,
1147                     function(wholeMatch,m1) {
1148                             return _EncodeEmailAddress( _UnescapeSpecialChars(m1) );
1149                     }
1150             );
1151
1152             return text;
1153     }
1154
1155
1156     var _EncodeEmailAddress = function(addr) {
1157     //
1158     //  Input: an email address, e.g. "foo@example.com"
1159     //
1160     //  Output: the email address as a mailto link, with each character
1161     //  of the address encoded as either a decimal or hex entity, in
1162     //  the hopes of foiling most address harvesting spam bots. E.g.:
1163     //
1164     //  <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
1165     //     x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
1166     //     &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
1167     //
1168     //  Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
1169     //  mailing list: <http://tinyurl.com/yu7ue>
1170     //
1171
1172             // attacklab: why can't javascript speak hex?
1173             function char2hex(ch) {
1174                     var hexDigits = '0123456789ABCDEF';
1175                     var dec = ch.charCodeAt(0);
1176                     return(hexDigits.charAt(dec>>4) + hexDigits.charAt(dec&15));
1177             }
1178
1179             var encode = [
1180                     function(ch){return "&#"+ch.charCodeAt(0)+";";},
1181                     function(ch){return "&#x"+char2hex(ch)+";";},
1182                     function(ch){return ch;}
1183             ];
1184
1185             addr = "mailto:" + addr;
1186
1187             addr = addr.replace(/./g, function(ch) {
1188                     if (ch == "@") {
1189                             // this *must* be encoded. I insist.
1190                             ch = encode[Math.floor(Math.random()*2)](ch);
1191                     } else if (ch !=":") {
1192                             // leave ':' alone (to spot mailto: later)
1193                             var r = Math.random();
1194                             // roughly 10% raw, 45% hex, 45% dec
1195                             ch =  (
1196                                             r > .9  ?   encode[2](ch)   :
1197                                             r > .45 ?   encode[1](ch)   :
1198                                                                     encode[0](ch)
1199                                     );
1200                     }
1201                     return ch;
1202             });
1203
1204             addr = "<a href=\"" + addr + "\">" + addr + "</a>";
1205             addr = addr.replace(/">.+:/g,"\">"); // strip the mailto: from the visible part
1206
1207             return addr;
1208     }
1209
1210
1211     var _UnescapeSpecialChars = function(text) {
1212     //
1213     // Swap back in all the special characters we've hidden.
1214     //
1215             text = text.replace(/~E(\d+)E/g,
1216                     function(wholeMatch,m1) {
1217                             var charCodeToReplace = parseInt(m1);
1218                             return String.fromCharCode(charCodeToReplace);
1219                     }
1220             );
1221             return text;
1222     }
1223
1224
1225     var _Outdent = function(text) {
1226     //
1227     // Remove one level of line-leading tabs or spaces
1228     //
1229
1230             // attacklab: hack around Konqueror 3.5.4 bug:
1231             // "----------bug".replace(/^-/g,"") == "bug"
1232
1233             text = text.replace(/^(\t|[ ]{1,4})/gm,"~0"); // attacklab: g_tab_width
1234
1235             // attacklab: clean up hack
1236             text = text.replace(/~0/g,"")
1237
1238             return text;
1239     }
1240
1241     var _Detab = function(text) {
1242     // attacklab: Detab's completely rewritten for speed.
1243     // In perl we could fix it by anchoring the regexp with \G.
1244     // In javascript we're less fortunate.
1245
1246             // expand first n-1 tabs
1247             text = text.replace(/\t(?=\t)/g,"    "); // attacklab: g_tab_width
1248
1249             // replace the nth with two sentinels
1250             text = text.replace(/\t/g,"~A~B");
1251
1252             // use the sentinel to anchor our regex so it doesn't explode
1253             text = text.replace(/~B(.+?)~A/g,
1254                     function(wholeMatch,m1,m2) {
1255                             var leadingText = m1;
1256                             var numSpaces = 4 - leadingText.length % 4;  // attacklab: g_tab_width
1257
1258                             // there *must* be a better way to do this:
1259                             for (var i=0; i<numSpaces; i++) leadingText+=" ";
1260
1261                             return leadingText;
1262                     }
1263             );
1264
1265             // clean up sentinels
1266             text = text.replace(/~A/g,"    ");  // attacklab: g_tab_width
1267             text = text.replace(/~B/g,"");
1268
1269             return text;
1270     }
1271
1272
1273     //
1274     //  attacklab: Utility functions
1275     //
1276
1277
1278     var escapeCharacters = function(text, charsToEscape, afterBackslash) {
1279             // First we have to escape the escape characters so that
1280             // we can build a character class out of them
1281             var regexString = "([" + charsToEscape.replace(/([\[\]\\])/g,"\\$1") + "])";
1282
1283             if (afterBackslash) {
1284                     regexString = "\\\\" + regexString;
1285             }
1286
1287             var regex = new RegExp(regexString,"g");
1288             text = text.replace(regex,escapeCharacters_callback);
1289
1290             return text;
1291     }
1292
1293
1294     var escapeCharacters_callback = function(wholeMatch,m1) {
1295             var charCodeToEscape = m1.charCodeAt(0);
1296             return "~E"+charCodeToEscape+"E";
1297     }
1298
1299 } // end of Showdown.converter
1300
1301 // export
1302 //if (typeof exports != 'undefined') exports.Showdown = Showdown;