git.roojs.org Git - roojs1/blob - ux/Showdown.js

   1 //
   2 // showdown.js -- A javascript port of Markdown.
   3 //
   4 // Copyright (c) 2007 John Fraser.
   5 //
   6 // Original Markdown Copyright (c) 2004-2005 John Gruber
   7 //   <http://daringfireball.net/projects/markdown/>
   8 //
   9 // Redistributable under a BSD-style open source license.
  10 // See license.txt for more information.
  11 //
  12 // The full source distribution is at:
  13 //
  14 //                              A A L
  15 //                              T C A
  16 //                              T K B
  17 //
  18 //   <http://www.attacklab.net/>
  19 //
  20
  21 //
  22 // Wherever possible, Showdown is a straight, line-by-line port
  23 // of the Perl version of Markdown.
  24 //
  25 // This is not a normal parser design; it's basically just a
  26 // series of string substitutions.  It's hard to read and
  27 // maintain this way,  but keeping Showdown close to the original
  28 // design makes it easier to port new features.
  29 //
  30 // More importantly, Showdown behaves like markdown.pl in most
  31 // edge cases.  So web applications can do client-side preview
  32 // in Javascript, and then build identical HTML on the server.
  33 //
  34 // This port needs the new RegExp functionality of ECMA 262,
  35 // 3rd Edition (i.e. Javascript 1.5).  Most modern web browsers
  36 // should do fine.  Even with the new regular expression features,
  37 // We do a lot of work to emulate Perl's regex functionality.
  38 // The tricky changes in this file mostly have the "attacklab:"
  39 // label.  Major or self-explanatory changes don't.
  40 //
  41 // Smart diff tools like Araxis Merge will be able to match up
  42 // this file with markdown.pl in a useful way.  A little tweaking
  43 // helps: in a copy of markdown.pl, replace "#" with "//" and
  44 // replace "$text" with "text".  Be sure to ignore whitespace
  45 // and line endings.
  46 //
  47
  48
  49 //
  50 // Showdown usage:
  51 //
  52 //   alert( Roo.ux.Showdown.toHtml("Markdown *rocks*.") );
  53 //
  54 // Note: move the sample code to the bottom of this
  55 // file before uncommenting it.
  56 //
  57
  58
  59 //
  60 // Showdown namespace
  61 //
  62 Roo.namespace('Roo.ux');
  63 Roo.ux.Showdown = {};
  64 Roo.ux.Showdown.toHtml = function(text) {
  65     var c = new Roo.ux.Showdown.converter();
  66     return c.makeHtml(text);
  67 };
  68 //
  69 // converter
  70 //
  71 // Wraps all "globals" so that the only thing
  72 // exposed is makeHtml().
  73 //
  74 Roo.ux.Showdown.converter = function() {
  75
  76     //
  77     // Globals:
  78     //
  79
  80     // Global hashes, used by various utility routines
  81     var g_urls;
  82     var g_titles;
  83     var g_html_blocks;
  84
  85     // Used to track when we're inside an ordered or unordered list
  86     // (see _ProcessListItems() for details):
  87     var g_list_level = 0;
  88
  89
  90     this.makeHtml = function(_text) {
  91     //
  92     // Main function. The order in which other subs are called here is
  93     // essential. Link and image substitutions need to happen before
  94     // _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
  95     // and <img> tags get encoded.
  96     //v
  97             var text = _text;
  98             // Clear the global hashes. If we don't clear these, you get conflicts
  99             // from other articles when generating a page which contains more than
 100             // one article (e.g. an index page that shows the N most recent
 101             // articles):
 102             g_urls = new Array();
 103             g_titles = new Array();
 104             g_html_blocks = new Array();
 105
 106             // attacklab: Replace ~ with ~T
 107             // This lets us use tilde as an escape char to avoid md5 hashes
 108             // The choice of character is arbitray; anything that isn't
 109         // magic in Markdown will work.
 110             text = text.replace(/~/g,"~T");
 111
 112             // attacklab: Replace $ with ~D
 113             // RegExp interprets $ as a special character
 114             // when it's in a replacement string
 115             text = text.replace(/\$/g,"~D");
 116
 117             // Standardize line endings
 118             text = text.replace(/\r\n/g,"\n"); // DOS to Unix
 119             text = text.replace(/\r/g,"\n"); // Mac to Unix
 120
 121             // Make sure text begins and ends with a couple of newlines:
 122             text = "\n\n" + text + "\n\n";
 123
 124             // Convert all tabs to spaces.
 125             text = _Detab(text);
 126
 127             // Strip any lines consisting only of spaces and tabs.
 128             // This makes subsequent regexen easier to write, because we can
 129             // match consecutive blank lines with /\n+/ instead of something
 130             // contorted like /[ \t]*\n+/ .
 131             text = text.replace(/^[ \t]+$/mg,"");
 132
 133             // Turn block-level HTML blocks into hash entries
 134             text = _HashHTMLBlocks(text);
 135
 136             // Strip link definitions, store in hashes.
 137             text = _StripLinkDefinitions(text);
 138
 139             text = _RunBlockGamut(text);
 140
 141             text = _UnescapeSpecialChars(text);
 142
 143             // attacklab: Restore dollar signs
 144             text = text.replace(/~D/g,"$$");
 145
 146             // attacklab: Restore tildes
 147             text = text.replace(/~T/g,"~");
 148
 149             return text;
 150     }
 151
 152
 153     var _StripLinkDefinitions = function(text) {
 154     //
 155     // Strips link definitions from text, stores the URLs and titles in
 156     // hash references.
 157     //
 158
 159             // Link defs are in the form: ^[id]: url "optional title"
 160
 161             /*
 162                     var text = text.replace(/
 163                                     ^[ ]{0,3}\[(.+)\]:  // id = $1  attacklab: g_tab_width - 1
 164                                       [ \t]*
 165                                       \n?                               // maybe *one* newline
 166                                       [ \t]*
 167                                     <?(\S+?)>?                  // url = $2
 168                                       [ \t]*
 169                                       \n?                               // maybe one newline
 170                                       [ \t]*
 171                                     (?:
 172                                       (\n*)                             // any lines skipped = $3 attacklab: lookbehind removed
 173                                       ["(]
 174                                       (.+?)                             // title = $4
 175                                       [")]
 176                                       [ \t]*
 177                                     )?                                  // title is optional
 178                                     (?:\n+|$)
 179                               /gm,
 180                               function(){...});
 181             */
 182             text = text.replace(/^[ ]{0,3}\[(.+)\]:[ \t]*\n?[ \t]*<?(\S+?)>?[ \t]*\n?[ \t]*(?:(\n*)["(](.+?)[")][ \t]*)?(?:\n+|\Z)/gm,
 183                     function (wholeMatch,m1,m2,m3,m4) {
 184                             m1 = m1.toLowerCase();
 185                             g_urls[m1] = _EncodeAmpsAndAngles(m2);  // Link IDs are case-insensitive
 186                             if (m3) {
 187                                     // Oops, found blank lines, so it's not a title.
 188                                     // Put back the parenthetical statement we stole.
 189                                     return m3+m4;
 190                             } else if (m4) {
 191                                     g_titles[m1] = m4.replace(/"/g,"&quot;");
 192                             }
 193
 194                             // Completely remove the definition from the text
 195                             return "";
 196                     }
 197             );
 198
 199             return text;
 200     }
 201
 202
 203     var _HashHTMLBlocks = function(text) {
 204             // attacklab: Double up blank lines to reduce lookaround
 205             text = text.replace(/\n/g,"\n\n");
 206
 207             // Hashify HTML blocks:
 208             // We only want to do this for block-level HTML tags, such as headers,
 209             // lists, and tables. That's because we still want to wrap <p>s around
 210             // "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 211             // phrase emphasis, and spans. The list of tags we're looking for is
 212             // hard-coded:
 213             var block_tags_a = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del"
 214             var block_tags_b = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math"
 215
 216             // First, look for nested blocks, e.g.:
 217             //   <div>
 218             //     <div>
 219             //     tags for inner block must be indented.
 220             //     </div>
 221             //   </div>
 222             //
 223             // The outermost tags must start at the left margin for this to match, and
 224             // the inner nested divs must be indented.
 225             // We need to do this before the next, more liberal match, because the next
 226             // match will start at the first `<div>` and stop at the first `</div>`.
 227
 228             // attacklab: This regex can be expensive when it fails.
 229             /*
 230                     var text = text.replace(/
 231                     (                                           // save in $1
 232                             ^                                   // start of line  (with /m)
 233                             <($block_tags_a)    // start tag = $2
 234                             \b                                  // word break
 235                                                                     // attacklab: hack around khtml/pcre bug...
 236                             [^\r]*?\n                   // any number of lines, minimally matching
 237                             </\2>                               // the matching end tag
 238                             [ \t]*                              // trailing spaces/tabs
 239                             (?=\n+)                             // followed by a newline
 240                     )                                           // attacklab: there are sentinel newlines at end of document
 241                     /gm,function(){...}};
 242             */
 243             text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del)\b[^\r]*?\n<\/\2>[ \t]*(?=\n+))/gm,hashElement);
 244
 245             //
 246             // Now match more liberally, simply from `\n<tag>` to `</tag>\n`
 247             //
 248
 249             /*
 250                     var text = text.replace(/
 251                     (                                           // save in $1
 252                             ^                                   // start of line  (with /m)
 253                             <($block_tags_b)    // start tag = $2
 254                             \b                                  // word break
 255                                                                     // attacklab: hack around khtml/pcre bug...
 256                             [^\r]*?                             // any number of lines, minimally matching
 257                             .*</\2>                             // the matching end tag
 258                             [ \t]*                              // trailing spaces/tabs
 259                             (?=\n+)                             // followed by a newline
 260                     )                                           // attacklab: there are sentinel newlines at end of document
 261                     /gm,function(){...}};
 262             */
 263             text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math)\b[^\r]*?.*<\/\2>[ \t]*(?=\n+)\n)/gm,hashElement);
 264
 265             // Special case just for <hr />. It was easier to make a special case than
 266             // to make the other regex more complicated.
 267
 268             /*
 269                     text = text.replace(/
 270                     (                                           // save in $1
 271                             \n\n                                // Starting after a blank line
 272                             [ ]{0,3}
 273                             (<(hr)                              // start tag = $2
 274                             \b                                  // word break
 275                             ([^<>])*?                   //
 276                             \/?>)                               // the matching end tag
 277                             [ \t]*
 278                             (?=\n{2,})                  // followed by a blank line
 279                     )
 280                     /g,hashElement);
 281             */
 282             text = text.replace(/(\n[ ]{0,3}(<(hr)\b([^<>])*?\/?>)[ \t]*(?=\n{2,}))/g,hashElement);
 283
 284             // Special case for standalone HTML comments:
 285
 286             /*
 287                     text = text.replace(/
 288                     (                                           // save in $1
 289                             \n\n                                // Starting after a blank line
 290                             [ ]{0,3}                    // attacklab: g_tab_width - 1
 291                             <!
 292                             (--[^\r]*?--\s*)+
 293                             >
 294                             [ \t]*
 295                             (?=\n{2,})                  // followed by a blank line
 296                     )
 297                     /g,hashElement);
 298             */
 299             text = text.replace(/(\n\n[ ]{0,3}<!(--[^\r]*?--\s*)+>[ \t]*(?=\n{2,}))/g,hashElement);
 300
 301             // PHP and ASP-style processor instructions (<?...?> and <%...%>)
 302
 303             /*
 304                     text = text.replace(/
 305                     (?:
 306                             \n\n                                // Starting after a blank line
 307                     )
 308                     (                                           // save in $1
 309                             [ ]{0,3}                    // attacklab: g_tab_width - 1
 310                             (?:
 311                                     <([?%])                     // $2
 312                                     [^\r]*?
 313                                     \2>
 314                             )
 315                             [ \t]*
 316                             (?=\n{2,})                  // followed by a blank line
 317                     )
 318                     /g,hashElement);
 319             */
 320             text = text.replace(/(?:\n\n)([ ]{0,3}(?:<([?%])[^\r]*?\2>)[ \t]*(?=\n{2,}))/g,hashElement);
 321
 322             // attacklab: Undo double lines (see comment at top of this function)
 323             text = text.replace(/\n\n/g,"\n");
 324             return text;
 325     }
 326
 327     var hashElement = function(wholeMatch,m1) {
 328             var blockText = m1;
 329
 330             // Undo double lines
 331             blockText = blockText.replace(/\n\n/g,"\n");
 332             blockText = blockText.replace(/^\n/,"");
 333
 334             // strip trailing blank lines
 335             blockText = blockText.replace(/\n+$/g,"");
 336
 337             // Replace the element text with a marker ("~KxK" where x is its key)
 338             blockText = "\n\n~K" + (g_html_blocks.push(blockText)-1) + "K\n\n";
 339
 340             return blockText;
 341     };
 342
 343     var _RunBlockGamut = function(text) {
 344     //
 345     // These are all the transformations that form block-level
 346     // tags like paragraphs, headers, and list items.
 347     //
 348             text = _DoHeaders(text);
 349
 350             // Do Horizontal Rules:
 351             var key = hashBlock("<hr />");
 352             text = text.replace(/^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$/gm,key);
 353             text = text.replace(/^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$/gm,key);
 354             text = text.replace(/^[ ]{0,2}([ ]?\_[ ]?){3,}[ \t]*$/gm,key);
 355
 356             text = _DoLists(text);
 357             text = _DoCodeBlocks(text);
 358             text = _DoBlockQuotes(text);
 359
 360             // We already ran _HashHTMLBlocks() before, in Markdown(), but that
 361             // was to escape raw HTML in the original Markdown source. This time,
 362             // we're escaping the markup we've just created, so that we don't wrap
 363             // <p> tags around block-level tags.
 364             text = _HashHTMLBlocks(text);
 365             text = _FormParagraphs(text);
 366
 367             return text;
 368     }
 369
 370
 371     var _RunSpanGamut = function(text) {
 372     //
 373     // These are all the transformations that occur *within* block-level
 374     // tags like paragraphs, headers, and list items.
 375     //
 376
 377             text = _DoCodeSpans(text);
 378             text = _EscapeSpecialCharsWithinTagAttributes(text);
 379             text = _EncodeBackslashEscapes(text);
 380
 381             // Process anchor and image tags. Images must come first,
 382             // because ![foo][f] looks like an anchor.
 383             text = _DoImages(text);
 384             text = _DoAnchors(text);
 385
 386             // Make links out of things like `<http://example.com/>`
 387             // Must come after _DoAnchors(), because you can use < and >
 388             // delimiters in inline links like [this](<url>).
 389             text = _DoAutoLinks(text);
 390             text = _EncodeAmpsAndAngles(text);
 391             text = _DoItalicsAndBold(text);
 392
 393             // Do hard breaks:
 394             text = text.replace(/  +\n/g," <br />\n");
 395
 396             return text;
 397     }
 398
 399     var _EscapeSpecialCharsWithinTagAttributes = function(text) {
 400     //
 401     // Within tags -- meaning between < and > -- encode [\ ` * _] so they
 402     // don't conflict with their use in Markdown for code, italics and strong.
 403     //
 404
 405             // Build a regex to find HTML tags and comments.  See Friedl's
 406             // "Mastering Regular Expressions", 2nd Ed., pp. 200-201.
 407             var regex = /(<[a-z\/!$]("[^"]*"|'[^']*'|[^'">])*>|<!(--.*?--\s*)+>)/gi;
 408
 409             text = text.replace(regex, function(wholeMatch) {
 410                     var tag = wholeMatch.replace(/(.)<\/?code>(?=.)/g,"$1`");
 411                     tag = escapeCharacters(tag,"\\`*_");
 412                     return tag;
 413             });
 414
 415             return text;
 416     }
 417
 418     var _DoAnchors = function(text) {
 419     //
 420     // Turn Markdown link shortcuts into XHTML <a> tags.
 421     //
 422             //
 423             // First, handle reference-style links: [link text] [id]
 424             //
 425
 426             /*
 427                     text = text.replace(/
 428                     (                                                   // wrap whole match in $1
 429                             \[
 430                             (
 431                                     (?:
 432                                             \[[^\]]*\]          // allow brackets nested one level
 433                                             |
 434                                             [^\[]                       // or anything else
 435                                     )*
 436                             )
 437                             \]
 438
 439                             [ ]?                                        // one optional space
 440                             (?:\n[ ]*)?                         // one optional newline followed by spaces
 441
 442                             \[
 443                             (.*?)                                       // id = $3
 444                             \]
 445                     )()()()()                                   // pad remaining backreferences
 446                     /g,_DoAnchors_callback);
 447             */
 448             text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g,writeAnchorTag);
 449
 450             //
 451             // Next, inline-style links: [link text](url "optional title")
 452             //
 453
 454             /*
 455                     text = text.replace(/
 456                             (                                           // wrap whole match in $1
 457                                     \[
 458                                     (
 459                                             (?:
 460                                                     \[[^\]]*\]  // allow brackets nested one level
 461                                             |
 462                                             [^\[\]]                     // or anything else
 463                                     )
 464                             )
 465                             \]
 466                             \(                                          // literal paren
 467                             [ \t]*
 468                             ()                                          // no id, so leave $3 empty
 469                             <?(.*?)>?                           // href = $4
 470                             [ \t]*
 471                             (                                           // $5
 472                                     (['"])                              // quote char = $6
 473                                     (.*?)                               // Title = $7
 474                                     \6                                  // matching quote
 475                                     [ \t]*                              // ignore any spaces/tabs between closing quote and )
 476                             )?                                          // title is optional
 477                             \)
 478                     )
 479                     /g,writeAnchorTag);
 480             */
 481             text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\]\([ \t]*()<?(.*?)>?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g,writeAnchorTag);
 482
 483             //
 484             // Last, handle reference-style shortcuts: [link text]
 485             // These must come last in case you've also got [link test][1]
 486             // or [link test](/foo)
 487             //
 488
 489             /*
 490                     text = text.replace(/
 491                     (                                                   // wrap whole match in $1
 492                             \[
 493                             ([^\[\]]+)                          // link text = $2; can't contain '[' or ']'
 494                             \]
 495                     )()()()()()                                 // pad rest of backreferences
 496                     /g, writeAnchorTag);
 497             */
 498             text = text.replace(/(\[([^\[\]]+)\])()()()()()/g, writeAnchorTag);
 499
 500             return text;
 501     }
 502
 503     var writeAnchorTag = function(wholeMatch,m1,m2,m3,m4,m5,m6,m7) {
 504             if (m7 == undefined) m7 = "";
 505             var whole_match = m1;
 506             var link_text   = m2;
 507             var link_id  = m3.toLowerCase();
 508             var url             = m4;
 509             var title   = m7;
 510
 511             if (url == "") {
 512                     if (link_id == "") {
 513                             // lower-case and turn embedded newlines into spaces
 514                             link_id = link_text.toLowerCase().replace(/ ?\n/g," ");
 515                     }
 516                     url = "#"+link_id;
 517
 518                     if (g_urls[link_id] != undefined) {
 519                             url = g_urls[link_id];
 520                             if (g_titles[link_id] != undefined) {
 521                                     title = g_titles[link_id];
 522                             }
 523                     }
 524                     else {
 525                             if (whole_match.search(/\(\s*\)$/m)>-1) {
 526                                     // Special case for explicit empty url
 527                                     url = "";
 528                             } else {
 529                                     return whole_match;
 530                             }
 531                     }
 532             }
 533
 534             url = escapeCharacters(url,"*_");
 535             var result = "<a href=\"" + url + "\"";
 536
 537             if (title != "") {
 538                     title = title.replace(/"/g,"&quot;");
 539                     title = escapeCharacters(title,"*_");
 540                     result +=  " title=\"" + title + "\"";
 541             }
 542
 543             result += ">" + link_text + "</a>";
 544
 545             return result;
 546     }
 547
 548
 549     var _DoImages = function(text) {
 550     //
 551     // Turn Markdown image shortcuts into <img> tags.
 552     //
 553
 554             //
 555             // First, handle reference-style labeled images: ![alt text][id]
 556             //
 557
 558             /*
 559                     text = text.replace(/
 560                     (                                           // wrap whole match in $1
 561                             !\[
 562                             (.*?)                               // alt text = $2
 563                             \]
 564
 565                             [ ]?                                // one optional space
 566                             (?:\n[ ]*)?                 // one optional newline followed by spaces
 567
 568                             \[
 569                             (.*?)                               // id = $3
 570                             \]
 571                     )()()()()                           // pad rest of backreferences
 572                     /g,writeImageTag);
 573             */
 574             text = text.replace(/(!\[(.*?)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g,writeImageTag);
 575
 576             //
 577             // Next, handle inline images:  ![alt text](url "optional title")
 578             // Don't forget: encode * and _
 579
 580             /*
 581                     text = text.replace(/
 582                     (                                           // wrap whole match in $1
 583                             !\[
 584                             (.*?)                               // alt text = $2
 585                             \]
 586                             \s?                                 // One optional whitespace character
 587                             \(                                  // literal paren
 588                             [ \t]*
 589                             ()                                  // no id, so leave $3 empty
 590                             <?(\S+?)>?                  // src url = $4
 591                             [ \t]*
 592                             (                                   // $5
 593                                     (['"])                      // quote char = $6
 594                                     (.*?)                       // title = $7
 595                                     \6                          // matching quote
 596                                     [ \t]*
 597                             )?                                  // title is optional
 598                     \)
 599                     )
 600                     /g,writeImageTag);
 601             */
 602             text = text.replace(/(!\[(.*?)\]\s?\([ \t]*()<?(\S+?)>?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g,writeImageTag);
 603
 604             return text;
 605     }
 606
 607     var writeImageTag = function(wholeMatch,m1,m2,m3,m4,m5,m6,m7) {
 608             var whole_match = m1;
 609             var alt_text   = m2;
 610             var link_id  = m3.toLowerCase();
 611             var url             = m4;
 612             var title   = m7;
 613
 614             if (!title) title = "";
 615
 616             if (url == "") {
 617                     if (link_id == "") {
 618                             // lower-case and turn embedded newlines into spaces
 619                             link_id = alt_text.toLowerCase().replace(/ ?\n/g," ");
 620                     }
 621                     url = "#"+link_id;
 622
 623                     if (g_urls[link_id] != undefined) {
 624                             url = g_urls[link_id];
 625                             if (g_titles[link_id] != undefined) {
 626                                     title = g_titles[link_id];
 627                             }
 628                     }
 629                     else {
 630                             return whole_match;
 631                     }
 632             }
 633
 634             alt_text = alt_text.replace(/"/g,"&quot;");
 635             url = escapeCharacters(url,"*_");
 636             var result = "<img src=\"" + url + "\" alt=\"" + alt_text + "\"";
 637
 638             // attacklab: Markdown.pl adds empty title attributes to images.
 639             // Replicate this bug.
 640
 641             //if (title != "") {
 642                     title = title.replace(/"/g,"&quot;");
 643                     title = escapeCharacters(title,"*_");
 644                     result +=  " title=\"" + title + "\"";
 645             //}
 646
 647             result += " />";
 648
 649             return result;
 650     }
 651
 652
 653     var _DoHeaders = function(text) {
 654
 655             // Setext-style headers:
 656             //  Header 1
 657             //  ========
 658             //
 659             //  Header 2
 660             //  --------
 661             //
 662             text = text.replace(/^(.+)[ \t]*\n=+[ \t]*\n+/gm,
 663                     function(wholeMatch,m1){return hashBlock('<h1 id="' + headerId(m1) + '">' + _RunSpanGamut(m1) + "</h1>");});
 664
 665             text = text.replace(/^(.+)[ \t]*\n-+[ \t]*\n+/gm,
 666                     function(matchFound,m1){return hashBlock('<h2 id="' + headerId(m1) + '">' + _RunSpanGamut(m1) + "</h2>");});
 667
 668             // atx-style headers:
 669             //  # Header 1
 670             //  ## Header 2
 671             //  ## Header 2 with closing hashes ##
 672             //  ...
 673             //  ###### Header 6
 674             //
 675
 676             /*
 677                     text = text.replace(/
 678                             ^(\#{1,6})                          // $1 = string of #'s
 679                             [ \t]*
 680                             (.+?)                                       // $2 = Header text
 681                             [ \t]*
 682                             \#*                                         // optional closing #'s (not counted)
 683                             \n+
 684                     /gm, function() {...});
 685             */
 686
 687             text = text.replace(/^(\#{1,6})[ \t]*(.+?)[ \t]*\#*\n+/gm,
 688                     function(wholeMatch,m1,m2) {
 689                             var h_level = m1.length;
 690                             return hashBlock("<h" + h_level + ' id="' + headerId(m2) + '">' + _RunSpanGamut(m2) + "</h" + h_level + ">");
 691                     });
 692
 693             function headerId(m) {
 694                     return m.replace(/[^\w]/g, '').toLowerCase();
 695             }
 696             return text;
 697     }
 698
 699     // This declaration keeps Dojo compressor from outputting garbage:
 700     var _ProcessListItems;
 701
 702     var _DoLists = function(text) {
 703     //
 704     // Form HTML ordered (numbered) and unordered (bulleted) lists.
 705     //
 706
 707             // attacklab: add sentinel to hack around khtml/safari bug:
 708             // http://bugs.webkit.org/show_bug.cgi?id=11231
 709             text += "~0";
 710
 711             // Re-usable pattern to match any entirel ul or ol list:
 712
 713             /*
 714                     var whole_list = /
 715                     (                                                                   // $1 = whole list
 716                             (                                                           // $2
 717                                     [ ]{0,3}                                    // attacklab: g_tab_width - 1
 718                                     ([*+-]|\d+[.])                              // $3 = first list item marker
 719                                     [ \t]+
 720                             )
 721                             [^\r]+?
 722                             (                                                           // $4
 723                                     ~0                                                  // sentinel for workaround; should be $
 724                             |
 725                                     \n{2,}
 726                                     (?=\S)
 727                                     (?!                                                 // Negative lookahead for another list item marker
 728                                             [ \t]*
 729                                             (?:[*+-]|\d+[.])[ \t]+
 730                                     )
 731                             )
 732                     )/g
 733             */
 734             var whole_list = /^(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/gm;
 735
 736             if (g_list_level) {
 737                     text = text.replace(whole_list,function(wholeMatch,m1,m2) {
 738                             var list = m1;
 739                             var list_type = (m2.search(/[*+-]/g)>-1) ? "ul" : "ol";
 740
 741                             // Turn double returns into triple returns, so that we can make a
 742                             // paragraph for the last item in a list, if necessary:
 743                             list = list.replace(/\n{2,}/g,"\n\n\n");;
 744                             var result = _ProcessListItems(list);
 745
 746                             // Trim any trailing whitespace, to put the closing `</$list_type>`
 747                             // up on the preceding line, to get it past the current stupid
 748                             // HTML block parser. This is a hack to work around the terrible
 749                             // hack that is the HTML block parser.
 750                             result = result.replace(/\s+$/,"");
 751                             result = "<"+list_type+">" + result + "</"+list_type+">\n";
 752                             return result;
 753                     });
 754             } else {
 755                     whole_list = /(\n\n|^\n?)(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/g;
 756                     text = text.replace(whole_list,function(wholeMatch,m1,m2,m3) {
 757                             var runup = m1;
 758                             var list = m2;
 759
 760                             var list_type = (m3.search(/[*+-]/g)>-1) ? "ul" : "ol";
 761                             // Turn double returns into triple returns, so that we can make a
 762                             // paragraph for the last item in a list, if necessary:
 763                             var list = list.replace(/\n{2,}/g,"\n\n\n");;
 764                             var result = _ProcessListItems(list);
 765                             result = runup + "<"+list_type+">\n" + result + "</"+list_type+">\n";
 766                             return result;
 767                     });
 768             }
 769
 770             // attacklab: strip sentinel
 771             text = text.replace(/~0/,"");
 772
 773             return text;
 774     }
 775
 776     _ProcessListItems = function(list_str) {
 777     //
 778     //  Process the contents of a single ordered or unordered list, splitting it
 779     //  into individual list items.
 780     //
 781             // The $g_list_level global keeps track of when we're inside a list.
 782             // Each time we enter a list, we increment it; when we leave a list,
 783             // we decrement. If it's zero, we're not in a list anymore.
 784             //
 785             // We do this because when we're not inside a list, we want to treat
 786             // something like this:
 787             //
 788             //    I recommend upgrading to version
 789             //    8. Oops, now this line is treated
 790             //    as a sub-list.
 791             //
 792             // As a single paragraph, despite the fact that the second line starts
 793             // with a digit-period-space sequence.
 794             //
 795             // Whereas when we're inside a list (or sub-list), that line will be
 796             // treated as the start of a sub-list. What a kludge, huh? This is
 797             // an aspect of Markdown's syntax that's hard to parse perfectly
 798             // without resorting to mind-reading. Perhaps the solution is to
 799             // change the syntax rules such that sub-lists must start with a
 800             // starting cardinal number; e.g. "1." or "a.".
 801
 802             g_list_level++;
 803
 804             // trim trailing blank lines:
 805             list_str = list_str.replace(/\n{2,}$/,"\n");
 806
 807             // attacklab: add sentinel to emulate \z
 808             list_str += "~0";
 809
 810             /*
 811                     list_str = list_str.replace(/
 812                             (\n)?                                                       // leading line = $1
 813                             (^[ \t]*)                                           // leading whitespace = $2
 814                             ([*+-]|\d+[.]) [ \t]+                       // list marker = $3
 815                             ([^\r]+?                                            // list item text   = $4
 816                             (\n{1,2}))
 817                             (?= \n* (~0 | \2 ([*+-]|\d+[.]) [ \t]+))
 818                     /gm, function(){...});
 819             */
 820             list_str = list_str.replace(/(\n)?(^[ \t]*)([*+-]|\d+[.])[ \t]+([^\r]+?(\n{1,2}))(?=\n*(~0|\2([*+-]|\d+[.])[ \t]+))/gm,
 821                     function(wholeMatch,m1,m2,m3,m4){
 822                             var item = m4;
 823                             var leading_line = m1;
 824                             var leading_space = m2;
 825
 826                             if (leading_line || (item.search(/\n{2,}/)>-1)) {
 827                                     item = _RunBlockGamut(_Outdent(item));
 828                             }
 829                             else {
 830                                     // Recursion for sub-lists:
 831                                     item = _DoLists(_Outdent(item));
 832                                     item = item.replace(/\n$/,""); // chomp(item)
 833                                     item = _RunSpanGamut(item);
 834                             }
 835
 836                             return  "<li>" + item + "</li>\n";
 837                     }
 838             );
 839
 840             // attacklab: strip sentinel
 841             list_str = list_str.replace(/~0/g,"");
 842
 843             g_list_level--;
 844             return list_str;
 845     }
 846
 847
 848     var _DoCodeBlocks = function(text) {
 849     //
 850     //  Process Markdown `<pre><code>` blocks.
 851     //
 852
 853             /*
 854                     text = text.replace(text,
 855                             /(?:\n\n|^)
 856                             (                                                           // $1 = the code block -- one or more lines, starting with a space/tab
 857                                     (?:
 858                                             (?:[ ]{4}|\t)                       // Lines must start with a tab or a tab-width of spaces - attacklab: g_tab_width
 859                                             .*\n+
 860                                     )+
 861                             )
 862                             (\n*[ ]{0,3}[^ \t\n]|(?=~0))        // attacklab: g_tab_width
 863                     /g,function(){...});
 864             */
 865
 866             // attacklab: sentinel workarounds for lack of \A and \Z, safari\khtml bug
 867             text += "~0";
 868
 869             text = text.replace(/(?:\n\n|^)((?:(?:[ ]{4}|\t).*\n+)+)(\n*[ ]{0,3}[^ \t\n]|(?=~0))/g,
 870                     function(wholeMatch,m1,m2) {
 871                             var codeblock = m1;
 872                             var nextChar = m2;
 873
 874                             codeblock = _EncodeCode( _Outdent(codeblock));
 875                             codeblock = _Detab(codeblock);
 876                             codeblock = codeblock.replace(/^\n+/g,""); // trim leading newlines
 877                             codeblock = codeblock.replace(/\n+$/g,""); // trim trailing whitespace
 878
 879                             codeblock = "<pre><code>" + codeblock + "\n</code></pre>";
 880
 881                             return hashBlock(codeblock) + nextChar;
 882                     }
 883             );
 884
 885             // attacklab: strip sentinel
 886             text = text.replace(/~0/,"");
 887
 888             return text;
 889     }
 890
 891     var hashBlock = function(text) {
 892             text = text.replace(/(^\n+|\n+$)/g,"");
 893             return "\n\n~K" + (g_html_blocks.push(text)-1) + "K\n\n";
 894     }
 895
 896
 897     var _DoCodeSpans = function(text) {
 898     //
 899     //   *  Backtick quotes are used for <code></code> spans.
 900     //
 901     //   *  You can use multiple backticks as the delimiters if you want to
 902     //   include literal backticks in the code span. So, this input:
 903     //
 904     //           Just type ``foo `bar` baz`` at the prompt.
 905     //
 906     //     Will translate to:
 907     //
 908     //           <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
 909     //
 910     //  There's no arbitrary limit to the number of backticks you
 911     //  can use as delimters. If you need three consecutive backticks
 912     //  in your code, use four for delimiters, etc.
 913     //
 914     //  *  You can use spaces to get literal backticks at the edges:
 915     //
 916     //           ... type `` `bar` `` ...
 917     //
 918     //     Turns to:
 919     //
 920     //           ... type <code>`bar`</code> ...
 921     //
 922
 923             /*
 924                     text = text.replace(/
 925                             (^|[^\\])                                   // Character before opening ` can't be a backslash
 926                             (`+)                                                // $2 = Opening run of `
 927                             (                                                   // $3 = The code block
 928                                     [^\r]*?
 929                                     [^`]                                        // attacklab: work around lack of lookbehind
 930                             )
 931                             \2                                                  // Matching closer
 932                             (?!`)
 933                     /gm, function(){...});
 934             */
 935
 936             text = text.replace(/(^|[^\\])(`+)([^\r]*?[^`])\2(?!`)/gm,
 937                     function(wholeMatch,m1,m2,m3,m4) {
 938                             var c = m3;
 939                             c = c.replace(/^([ \t]*)/g,"");     // leading whitespace
 940                             c = c.replace(/[ \t]*$/g,"");       // trailing whitespace
 941                             c = _EncodeCode(c);
 942                             return m1+"<code>"+c+"</code>";
 943                     });
 944
 945             return text;
 946     }
 947
 948
 949     var _EncodeCode = function(text) {
 950     //
 951     // Encode/escape certain characters inside Markdown code runs.
 952     // The point is that in code, these characters are literals,
 953     // and lose their special Markdown meanings.
 954     //
 955             // Encode all ampersands; HTML entities are not
 956             // entities within a Markdown code span.
 957             text = text.replace(/&/g,"&amp;");
 958
 959             // Do the angle bracket song and dance:
 960             text = text.replace(/</g,"&lt;");
 961             text = text.replace(/>/g,"&gt;");
 962
 963             // Now, escape characters that are magic in Markdown:
 964             text = escapeCharacters(text,"\*_{}[]\\",false);
 965
 966     // jj the line above breaks this:
 967     //---
 968
 969     //* Item
 970
 971     //   1. Subitem
 972
 973     //            special char: *
 974     //---
 975
 976             return text;
 977     }
 978
 979
 980     var _DoItalicsAndBold = function(text) {
 981
 982             // <strong> must go first:
 983             text = text.replace(/(\*\*|__)(?=\S)([^\r]*?\S[*_]*)\1/g,
 984                     "<strong>$2</strong>");
 985
 986             text = text.replace(/(\*|_)(?=\S)([^\r]*?\S)\1/g,
 987                     "<em>$2</em>");
 988
 989             return text;
 990     }
 991
 992
 993     var _DoBlockQuotes = function(text) {
 994
 995             /*
 996                     text = text.replace(/
 997                     (                                                           // Wrap whole match in $1
 998                             (
 999                                     ^[ \t]*>[ \t]?                      // '>' at the start of a line
1000                                     .+\n                                        // rest of the first line
1001                                     (.+\n)*                                     // subsequent consecutive lines
1002                                     \n*                                         // blanks
1003                             )+
1004                     )
1005                     /gm, function(){...});
1006             */
1007
1008             text = text.replace(/((^[ \t]*>[ \t]?.+\n(.+\n)*\n*)+)/gm,
1009                     function(wholeMatch,m1) {
1010                             var bq = m1;
1011
1012                             // attacklab: hack around Konqueror 3.5.4 bug:
1013                             // "----------bug".replace(/^-/g,"") == "bug"
1014
1015                             bq = bq.replace(/^[ \t]*>[ \t]?/gm,"~0");   // trim one level of quoting
1016
1017                             // attacklab: clean up hack
1018                             bq = bq.replace(/~0/g,"");
1019
1020                             bq = bq.replace(/^[ \t]+$/gm,"");           // trim whitespace-only lines
1021                             bq = _RunBlockGamut(bq);                            // recurse
1022
1023                             bq = bq.replace(/(^|\n)/g,"$1  ");
1024                             // These leading spaces screw with <pre> content, so we need to fix that:
1025                             bq = bq.replace(
1026                                             /(\s*<pre>[^\r]+?<\/pre>)/gm,
1027                                     function(wholeMatch,m1) {
1028                                             var pre = m1;
1029                                             // attacklab: hack around Konqueror 3.5.4 bug:
1030                                             pre = pre.replace(/^  /mg,"~0");
1031                                             pre = pre.replace(/~0/g,"");
1032                                             return pre;
1033                                     });
1034
1035                             return hashBlock("<blockquote>\n" + bq + "\n</blockquote>");
1036                     });
1037             return text;
1038     }
1039
1040
1041     var _FormParagraphs = function(text) {
1042     //
1043     //  Params:
1044     //    $text - string to process with html <p> tags
1045     //
1046
1047             // Strip leading and trailing lines:
1048             text = text.replace(/^\n+/g,"");
1049             text = text.replace(/\n+$/g,"");
1050
1051             var grafs = text.split(/\n{2,}/g);
1052             var grafsOut = new Array();
1053
1054             //
1055             // Wrap <p> tags.
1056             //
1057             var end = grafs.length;
1058             for (var i=0; i<end; i++) {
1059                     var str = grafs[i];
1060
1061                     // if this is an HTML marker, copy it
1062                     if (str.search(/~K(\d+)K/g) >= 0) {
1063                             grafsOut.push(str);
1064                     }
1065                     else if (str.search(/\S/) >= 0) {
1066                             str = _RunSpanGamut(str);
1067                             str = str.replace(/^([ \t]*)/g,"<p>");
1068                             str += "</p>"
1069                             grafsOut.push(str);
1070                     }
1071
1072             }
1073
1074             //
1075             // Unhashify HTML blocks
1076             //
1077             end = grafsOut.length;
1078             for (var i=0; i<end; i++) {
1079                     // if this is a marker for an html block...
1080                     while (grafsOut[i].search(/~K(\d+)K/) >= 0) {
1081                             var blockText = g_html_blocks[RegExp.$1];
1082                             blockText = blockText.replace(/\$/g,"$$$$"); // Escape any dollar signs
1083                             grafsOut[i] = grafsOut[i].replace(/~K\d+K/,blockText);
1084                     }
1085             }
1086
1087             return grafsOut.join("\n\n");
1088     }
1089
1090
1091     var _EncodeAmpsAndAngles = function(text) {
1092     // Smart processing for ampersands and angle brackets that need to be encoded.
1093
1094             // Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1095             //   http://bumppo.net/projects/amputator/
1096             text = text.replace(/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/g,"&amp;");
1097
1098             // Encode naked <'s
1099             text = text.replace(/<(?![a-z\/?\$!])/gi,"&lt;");
1100
1101             return text;
1102     }
1103
1104
1105     var _EncodeBackslashEscapes = function(text) {
1106     //
1107     //   Parameter:  String.
1108     //   Returns:       The string, with after processing the following backslash
1109     //                     escape sequences.
1110     //
1111
1112             // attacklab: The polite way to do this is with the new
1113             // escapeCharacters() function:
1114             //
1115             //  text = escapeCharacters(text,"\\",true);
1116             //  text = escapeCharacters(text,"`*_{}[]()>#+-.!",true);
1117             //
1118             // ...but we're sidestepping its use of the (slow) RegExp constructor
1119             // as an optimization for Firefox.  This function gets called a LOT.
1120
1121             text = text.replace(/\\(\\)/g,escapeCharacters_callback);
1122             text = text.replace(/\\([`*_{}\[\]()>#+-.!])/g,escapeCharacters_callback);
1123             return text;
1124     }
1125
1126
1127     var _DoAutoLinks = function(text) {
1128
1129             text = text.replace(/<((https?|ftp|dict):[^'">\s]+)>/gi,"<a href=\"$1\">$1</a>");
1130
1131             // Email addresses: <address@domain.foo>
1132
1133             /*
1134                     text = text.replace(/
1135                             <
1136                             (?:mailto:)?
1137                             (
1138                                     [-.\w]+
1139                                     \@
1140                                     [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1141                             )
1142                             >
1143                     /gi, _DoAutoLinks_callback());
1144             */
1145             text = text.replace(/<(?:mailto:)?([-.\w]+\@[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+)>/gi,
1146                     function(wholeMatch,m1) {
1147                             return _EncodeEmailAddress( _UnescapeSpecialChars(m1) );
1148                     }
1149             );
1150
1151             return text;
1152     }
1153
1154
1155     var _EncodeEmailAddress = function(addr) {
1156     //
1157     //  Input: an email address, e.g. "foo@example.com"
1158     //
1159     //  Output: the email address as a mailto link, with each character
1160     //  of the address encoded as either a decimal or hex entity, in
1161     //  the hopes of foiling most address harvesting spam bots. E.g.:
1162     //
1163     //  <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
1164     //     x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
1165     //     &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
1166     //
1167     //  Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
1168     //  mailing list: <http://tinyurl.com/yu7ue>
1169     //
1170
1171             // attacklab: why can't javascript speak hex?
1172             function char2hex(ch) {
1173                     var hexDigits = '0123456789ABCDEF';
1174                     var dec = ch.charCodeAt(0);
1175                     return(hexDigits.charAt(dec>>4) + hexDigits.charAt(dec&15));
1176             }
1177
1178             var encode = [
1179                     function(ch){return "&#"+ch.charCodeAt(0)+";";},
1180                     function(ch){return "&#x"+char2hex(ch)+";";},
1181                     function(ch){return ch;}
1182             ];
1183
1184             addr = "mailto:" + addr;
1185
1186             addr = addr.replace(/./g, function(ch) {
1187                     if (ch == "@") {
1188                             // this *must* be encoded. I insist.
1189                             ch = encode[Math.floor(Math.random()*2)](ch);
1190                     } else if (ch !=":") {
1191                             // leave ':' alone (to spot mailto: later)
1192                             var r = Math.random();
1193                             // roughly 10% raw, 45% hex, 45% dec
1194                             ch =  (
1195                                             r > .9  ?   encode[2](ch)   :
1196                                             r > .45 ?   encode[1](ch)   :
1197                                                                     encode[0](ch)
1198                                     );
1199                     }
1200                     return ch;
1201             });
1202
1203             addr = "<a href=\"" + addr + "\">" + addr + "</a>";
1204             addr = addr.replace(/">.+:/g,"\">"); // strip the mailto: from the visible part
1205
1206             return addr;
1207     }
1208
1209
1210     var _UnescapeSpecialChars = function(text) {
1211     //
1212     // Swap back in all the special characters we've hidden.
1213     //
1214             text = text.replace(/~E(\d+)E/g,
1215                     function(wholeMatch,m1) {
1216                             var charCodeToReplace = parseInt(m1);
1217                             return String.fromCharCode(charCodeToReplace);
1218                     }
1219             );
1220             return text;
1221     }
1222
1223
1224     var _Outdent = function(text) {
1225     //
1226     // Remove one level of line-leading tabs or spaces
1227     //
1228
1229             // attacklab: hack around Konqueror 3.5.4 bug:
1230             // "----------bug".replace(/^-/g,"") == "bug"
1231
1232             text = text.replace(/^(\t|[ ]{1,4})/gm,"~0"); // attacklab: g_tab_width
1233
1234             // attacklab: clean up hack
1235             text = text.replace(/~0/g,"")
1236
1237             return text;
1238     }
1239
1240     var _Detab = function(text) {
1241     // attacklab: Detab's completely rewritten for speed.
1242     // In perl we could fix it by anchoring the regexp with \G.
1243     // In javascript we're less fortunate.
1244
1245             // expand first n-1 tabs
1246             text = text.replace(/\t(?=\t)/g,"    "); // attacklab: g_tab_width
1247
1248             // replace the nth with two sentinels
1249             text = text.replace(/\t/g,"~A~B");
1250
1251             // use the sentinel to anchor our regex so it doesn't explode
1252             text = text.replace(/~B(.+?)~A/g,
1253                     function(wholeMatch,m1,m2) {
1254                             var leadingText = m1;
1255                             var numSpaces = 4 - leadingText.length % 4;  // attacklab: g_tab_width
1256
1257                             // there *must* be a better way to do this:
1258                             for (var i=0; i<numSpaces; i++) leadingText+=" ";
1259
1260                             return leadingText;
1261                     }
1262             );
1263
1264             // clean up sentinels
1265             text = text.replace(/~A/g,"    ");  // attacklab: g_tab_width
1266             text = text.replace(/~B/g,"");
1267
1268             return text;
1269     }
1270
1271
1272     //
1273     //  attacklab: Utility functions
1274     //
1275
1276
1277     var escapeCharacters = function(text, charsToEscape, afterBackslash) {
1278             // First we have to escape the escape characters so that
1279             // we can build a character class out of them
1280             var regexString = "([" + charsToEscape.replace(/([\[\]\\])/g,"\\$1") + "])";
1281
1282             if (afterBackslash) {
1283                     regexString = "\\\\" + regexString;
1284             }
1285
1286             var regex = new RegExp(regexString,"g");
1287             text = text.replace(regex,escapeCharacters_callback);
1288
1289             return text;
1290     }
1291
1292
1293     var escapeCharacters_callback = function(wholeMatch,m1) {
1294             var charCodeToEscape = m1.charCodeAt(0);
1295             return "~E"+charCodeToEscape+"E";
1296     }
1297
1298 } // end of Showdown.converter
1299
1300 // export
1301 //if (typeof exports != 'undefined') exports.Showdown = Showdown;