git.roojs.org Git - roojs1/blob - ux/Showdown.js

   1 //
   2 // showdown.js -- A javascript port of Markdown.
   3 //
   4 // Copyright (c) 2007 John Fraser.
   5 //
   6 // Original Markdown Copyright (c) 2004-2005 John Gruber
   7 //   <http://daringfireball.net/projects/markdown/>
   8 //
   9 // Redistributable under a BSD-style open source license.
  10 // See license.txt for more information.
  11 //
  12 // The full source distribution is at:
  13 //
  14 //                              A A L
  15 //                              T C A
  16 //                              T K B
  17 //
  18 //   <http://www.attacklab.net/>
  19 //
  20
  21 //
  22 // Wherever possible, Showdown is a straight, line-by-line port
  23 // of the Perl version of Markdown.
  24 //
  25 // This is not a normal parser design; it's basically just a
  26 // series of string substitutions.  It's hard to read and
  27 // maintain this way,  but keeping Showdown close to the original
  28 // design makes it easier to port new features.
  29 //
  30 // More importantly, Showdown behaves like markdown.pl in most
  31 // edge cases.  So web applications can do client-side preview
  32 // in Javascript, and then build identical HTML on the server.
  33 //
  34 // This port needs the new RegExp functionality of ECMA 262,
  35 // 3rd Edition (i.e. Javascript 1.5).  Most modern web browsers
  36 // should do fine.  Even with the new regular expression features,
  37 // We do a lot of work to emulate Perl's regex functionality.
  38 // The tricky changes in this file mostly have the "attacklab:"
  39 // label.  Major or self-explanatory changes don't.
  40 //
  41 // Smart diff tools like Araxis Merge will be able to match up
  42 // this file with markdown.pl in a useful way.  A little tweaking
  43 // helps: in a copy of markdown.pl, replace "#" with "//" and
  44 // replace "$text" with "text".  Be sure to ignore whitespace
  45 // and line endings.
  46 //
  47
  48
  49 //
  50 // Showdown usage:
  51 //
  52 //   var text = "Markdown *rocks*.";
  53 //
  54 //   var converter = new Showdown.converter();
  55 //   var html = converter.makeHtml(text);
  56 //
  57 //   alert(html);
  58 //
  59 // Note: move the sample code to the bottom of this
  60 // file before uncommenting it.
  61 //
  62
  63
  64 //
  65 // Showdown namespace
  66 //
  67 Roo.namespace('Roo.ux');
  68 Roo.ux.Showdown = {};
  69
  70 //
  71 // converter
  72 //
  73 // Wraps all "globals" so that the only thing
  74 // exposed is makeHtml().
  75 //
  76 Roo.ux.Showdown.converter = function() {
  77
  78     //
  79     // Globals:
  80     //
  81
  82     // Global hashes, used by various utility routines
  83     var g_urls;
  84     var g_titles;
  85     var g_html_blocks;
  86
  87     // Used to track when we're inside an ordered or unordered list
  88     // (see _ProcessListItems() for details):
  89     var g_list_level = 0;
  90
  91
  92     this.makeHtml = function(text) {
  93     //
  94     // Main function. The order in which other subs are called here is
  95     // essential. Link and image substitutions need to happen before
  96     // _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
  97     // and <img> tags get encoded.
  98     //
  99
 100             // Clear the global hashes. If we don't clear these, you get conflicts
 101             // from other articles when generating a page which contains more than
 102             // one article (e.g. an index page that shows the N most recent
 103             // articles):
 104             g_urls = new Array();
 105             g_titles = new Array();
 106             g_html_blocks = new Array();
 107
 108             // attacklab: Replace ~ with ~T
 109             // This lets us use tilde as an escape char to avoid md5 hashes
 110             // The choice of character is arbitray; anything that isn't
 111         // magic in Markdown will work.
 112             text = text.replace(/~/g,"~T");
 113
 114             // attacklab: Replace $ with ~D
 115             // RegExp interprets $ as a special character
 116             // when it's in a replacement string
 117             text = text.replace(/\$/g,"~D");
 118
 119             // Standardize line endings
 120             text = text.replace(/\r\n/g,"\n"); // DOS to Unix
 121             text = text.replace(/\r/g,"\n"); // Mac to Unix
 122
 123             // Make sure text begins and ends with a couple of newlines:
 124             text = "\n\n" + text + "\n\n";
 125
 126             // Convert all tabs to spaces.
 127             text = _Detab(text);
 128
 129             // Strip any lines consisting only of spaces and tabs.
 130             // This makes subsequent regexen easier to write, because we can
 131             // match consecutive blank lines with /\n+/ instead of something
 132             // contorted like /[ \t]*\n+/ .
 133             text = text.replace(/^[ \t]+$/mg,"");
 134
 135             // Turn block-level HTML blocks into hash entries
 136             text = _HashHTMLBlocks(text);
 137
 138             // Strip link definitions, store in hashes.
 139             text = _StripLinkDefinitions(text);
 140
 141             text = _RunBlockGamut(text);
 142
 143             text = _UnescapeSpecialChars(text);
 144
 145             // attacklab: Restore dollar signs
 146             text = text.replace(/~D/g,"$$");
 147
 148             // attacklab: Restore tildes
 149             text = text.replace(/~T/g,"~");
 150
 151             return text;
 152     }
 153
 154
 155     var _StripLinkDefinitions = function(text) {
 156     //
 157     // Strips link definitions from text, stores the URLs and titles in
 158     // hash references.
 159     //
 160
 161             // Link defs are in the form: ^[id]: url "optional title"
 162
 163             /*
 164                     var text = text.replace(/
 165                                     ^[ ]{0,3}\[(.+)\]:  // id = $1  attacklab: g_tab_width - 1
 166                                       [ \t]*
 167                                       \n?                               // maybe *one* newline
 168                                       [ \t]*
 169                                     <?(\S+?)>?                  // url = $2
 170                                       [ \t]*
 171                                       \n?                               // maybe one newline
 172                                       [ \t]*
 173                                     (?:
 174                                       (\n*)                             // any lines skipped = $3 attacklab: lookbehind removed
 175                                       ["(]
 176                                       (.+?)                             // title = $4
 177                                       [")]
 178                                       [ \t]*
 179                                     )?                                  // title is optional
 180                                     (?:\n+|$)
 181                               /gm,
 182                               function(){...});
 183             */
 184             text = text.replace(/^[ ]{0,3}\[(.+)\]:[ \t]*\n?[ \t]*<?(\S+?)>?[ \t]*\n?[ \t]*(?:(\n*)["(](.+?)[")][ \t]*)?(?:\n+|\Z)/gm,
 185                     function (wholeMatch,m1,m2,m3,m4) {
 186                             m1 = m1.toLowerCase();
 187                             g_urls[m1] = _EncodeAmpsAndAngles(m2);  // Link IDs are case-insensitive
 188                             if (m3) {
 189                                     // Oops, found blank lines, so it's not a title.
 190                                     // Put back the parenthetical statement we stole.
 191                                     return m3+m4;
 192                             } else if (m4) {
 193                                     g_titles[m1] = m4.replace(/"/g,"&quot;");
 194                             }
 195
 196                             // Completely remove the definition from the text
 197                             return "";
 198                     }
 199             );
 200
 201             return text;
 202     }
 203
 204
 205     var _HashHTMLBlocks = function(text) {
 206             // attacklab: Double up blank lines to reduce lookaround
 207             text = text.replace(/\n/g,"\n\n");
 208
 209             // Hashify HTML blocks:
 210             // We only want to do this for block-level HTML tags, such as headers,
 211             // lists, and tables. That's because we still want to wrap <p>s around
 212             // "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 213             // phrase emphasis, and spans. The list of tags we're looking for is
 214             // hard-coded:
 215             var block_tags_a = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del"
 216             var block_tags_b = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math"
 217
 218             // First, look for nested blocks, e.g.:
 219             //   <div>
 220             //     <div>
 221             //     tags for inner block must be indented.
 222             //     </div>
 223             //   </div>
 224             //
 225             // The outermost tags must start at the left margin for this to match, and
 226             // the inner nested divs must be indented.
 227             // We need to do this before the next, more liberal match, because the next
 228             // match will start at the first `<div>` and stop at the first `</div>`.
 229
 230             // attacklab: This regex can be expensive when it fails.
 231             /*
 232                     var text = text.replace(/
 233                     (                                           // save in $1
 234                             ^                                   // start of line  (with /m)
 235                             <($block_tags_a)    // start tag = $2
 236                             \b                                  // word break
 237                                                                     // attacklab: hack around khtml/pcre bug...
 238                             [^\r]*?\n                   // any number of lines, minimally matching
 239                             </\2>                               // the matching end tag
 240                             [ \t]*                              // trailing spaces/tabs
 241                             (?=\n+)                             // followed by a newline
 242                     )                                           // attacklab: there are sentinel newlines at end of document
 243                     /gm,function(){...}};
 244             */
 245             text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del)\b[^\r]*?\n<\/\2>[ \t]*(?=\n+))/gm,hashElement);
 246
 247             //
 248             // Now match more liberally, simply from `\n<tag>` to `</tag>\n`
 249             //
 250
 251             /*
 252                     var text = text.replace(/
 253                     (                                           // save in $1
 254                             ^                                   // start of line  (with /m)
 255                             <($block_tags_b)    // start tag = $2
 256                             \b                                  // word break
 257                                                                     // attacklab: hack around khtml/pcre bug...
 258                             [^\r]*?                             // any number of lines, minimally matching
 259                             .*</\2>                             // the matching end tag
 260                             [ \t]*                              // trailing spaces/tabs
 261                             (?=\n+)                             // followed by a newline
 262                     )                                           // attacklab: there are sentinel newlines at end of document
 263                     /gm,function(){...}};
 264             */
 265             text = text.replace(/^(<(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math)\b[^\r]*?.*<\/\2>[ \t]*(?=\n+)\n)/gm,hashElement);
 266
 267             // Special case just for <hr />. It was easier to make a special case than
 268             // to make the other regex more complicated.
 269
 270             /*
 271                     text = text.replace(/
 272                     (                                           // save in $1
 273                             \n\n                                // Starting after a blank line
 274                             [ ]{0,3}
 275                             (<(hr)                              // start tag = $2
 276                             \b                                  // word break
 277                             ([^<>])*?                   //
 278                             \/?>)                               // the matching end tag
 279                             [ \t]*
 280                             (?=\n{2,})                  // followed by a blank line
 281                     )
 282                     /g,hashElement);
 283             */
 284             text = text.replace(/(\n[ ]{0,3}(<(hr)\b([^<>])*?\/?>)[ \t]*(?=\n{2,}))/g,hashElement);
 285
 286             // Special case for standalone HTML comments:
 287
 288             /*
 289                     text = text.replace(/
 290                     (                                           // save in $1
 291                             \n\n                                // Starting after a blank line
 292                             [ ]{0,3}                    // attacklab: g_tab_width - 1
 293                             <!
 294                             (--[^\r]*?--\s*)+
 295                             >
 296                             [ \t]*
 297                             (?=\n{2,})                  // followed by a blank line
 298                     )
 299                     /g,hashElement);
 300             */
 301             text = text.replace(/(\n\n[ ]{0,3}<!(--[^\r]*?--\s*)+>[ \t]*(?=\n{2,}))/g,hashElement);
 302
 303             // PHP and ASP-style processor instructions (<?...?> and <%...%>)
 304
 305             /*
 306                     text = text.replace(/
 307                     (?:
 308                             \n\n                                // Starting after a blank line
 309                     )
 310                     (                                           // save in $1
 311                             [ ]{0,3}                    // attacklab: g_tab_width - 1
 312                             (?:
 313                                     <([?%])                     // $2
 314                                     [^\r]*?
 315                                     \2>
 316                             )
 317                             [ \t]*
 318                             (?=\n{2,})                  // followed by a blank line
 319                     )
 320                     /g,hashElement);
 321             */
 322             text = text.replace(/(?:\n\n)([ ]{0,3}(?:<([?%])[^\r]*?\2>)[ \t]*(?=\n{2,}))/g,hashElement);
 323
 324             // attacklab: Undo double lines (see comment at top of this function)
 325             text = text.replace(/\n\n/g,"\n");
 326             return text;
 327     }
 328
 329     var hashElement = function(wholeMatch,m1) {
 330             var blockText = m1;
 331
 332             // Undo double lines
 333             blockText = blockText.replace(/\n\n/g,"\n");
 334             blockText = blockText.replace(/^\n/,"");
 335
 336             // strip trailing blank lines
 337             blockText = blockText.replace(/\n+$/g,"");
 338
 339             // Replace the element text with a marker ("~KxK" where x is its key)
 340             blockText = "\n\n~K" + (g_html_blocks.push(blockText)-1) + "K\n\n";
 341
 342             return blockText;
 343     };
 344
 345     var _RunBlockGamut = function(text) {
 346     //
 347     // These are all the transformations that form block-level
 348     // tags like paragraphs, headers, and list items.
 349     //
 350             text = _DoHeaders(text);
 351
 352             // Do Horizontal Rules:
 353             var key = hashBlock("<hr />");
 354             text = text.replace(/^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$/gm,key);
 355             text = text.replace(/^[ ]{0,2}([ ]?\-[ ]?){3,}[ \t]*$/gm,key);
 356             text = text.replace(/^[ ]{0,2}([ ]?\_[ ]?){3,}[ \t]*$/gm,key);
 357
 358             text = _DoLists(text);
 359             text = _DoCodeBlocks(text);
 360             text = _DoBlockQuotes(text);
 361
 362             // We already ran _HashHTMLBlocks() before, in Markdown(), but that
 363             // was to escape raw HTML in the original Markdown source. This time,
 364             // we're escaping the markup we've just created, so that we don't wrap
 365             // <p> tags around block-level tags.
 366             text = _HashHTMLBlocks(text);
 367             text = _FormParagraphs(text);
 368
 369             return text;
 370     }
 371
 372
 373     var _RunSpanGamut = function(text) {
 374     //
 375     // These are all the transformations that occur *within* block-level
 376     // tags like paragraphs, headers, and list items.
 377     //
 378
 379             text = _DoCodeSpans(text);
 380             text = _EscapeSpecialCharsWithinTagAttributes(text);
 381             text = _EncodeBackslashEscapes(text);
 382
 383             // Process anchor and image tags. Images must come first,
 384             // because ![foo][f] looks like an anchor.
 385             text = _DoImages(text);
 386             text = _DoAnchors(text);
 387
 388             // Make links out of things like `<http://example.com/>`
 389             // Must come after _DoAnchors(), because you can use < and >
 390             // delimiters in inline links like [this](<url>).
 391             text = _DoAutoLinks(text);
 392             text = _EncodeAmpsAndAngles(text);
 393             text = _DoItalicsAndBold(text);
 394
 395             // Do hard breaks:
 396             text = text.replace(/  +\n/g," <br />\n");
 397
 398             return text;
 399     }
 400
 401     var _EscapeSpecialCharsWithinTagAttributes = function(text) {
 402     //
 403     // Within tags -- meaning between < and > -- encode [\ ` * _] so they
 404     // don't conflict with their use in Markdown for code, italics and strong.
 405     //
 406
 407             // Build a regex to find HTML tags and comments.  See Friedl's
 408             // "Mastering Regular Expressions", 2nd Ed., pp. 200-201.
 409             var regex = /(<[a-z\/!$]("[^"]*"|'[^']*'|[^'">])*>|<!(--.*?--\s*)+>)/gi;
 410
 411             text = text.replace(regex, function(wholeMatch) {
 412                     var tag = wholeMatch.replace(/(.)<\/?code>(?=.)/g,"$1`");
 413                     tag = escapeCharacters(tag,"\\`*_");
 414                     return tag;
 415             });
 416
 417             return text;
 418     }
 419
 420     var _DoAnchors = function(text) {
 421     //
 422     // Turn Markdown link shortcuts into XHTML <a> tags.
 423     //
 424             //
 425             // First, handle reference-style links: [link text] [id]
 426             //
 427
 428             /*
 429                     text = text.replace(/
 430                     (                                                   // wrap whole match in $1
 431                             \[
 432                             (
 433                                     (?:
 434                                             \[[^\]]*\]          // allow brackets nested one level
 435                                             |
 436                                             [^\[]                       // or anything else
 437                                     )*
 438                             )
 439                             \]
 440
 441                             [ ]?                                        // one optional space
 442                             (?:\n[ ]*)?                         // one optional newline followed by spaces
 443
 444                             \[
 445                             (.*?)                                       // id = $3
 446                             \]
 447                     )()()()()                                   // pad remaining backreferences
 448                     /g,_DoAnchors_callback);
 449             */
 450             text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g,writeAnchorTag);
 451
 452             //
 453             // Next, inline-style links: [link text](url "optional title")
 454             //
 455
 456             /*
 457                     text = text.replace(/
 458                             (                                           // wrap whole match in $1
 459                                     \[
 460                                     (
 461                                             (?:
 462                                                     \[[^\]]*\]  // allow brackets nested one level
 463                                             |
 464                                             [^\[\]]                     // or anything else
 465                                     )
 466                             )
 467                             \]
 468                             \(                                          // literal paren
 469                             [ \t]*
 470                             ()                                          // no id, so leave $3 empty
 471                             <?(.*?)>?                           // href = $4
 472                             [ \t]*
 473                             (                                           // $5
 474                                     (['"])                              // quote char = $6
 475                                     (.*?)                               // Title = $7
 476                                     \6                                  // matching quote
 477                                     [ \t]*                              // ignore any spaces/tabs between closing quote and )
 478                             )?                                          // title is optional
 479                             \)
 480                     )
 481                     /g,writeAnchorTag);
 482             */
 483             text = text.replace(/(\[((?:\[[^\]]*\]|[^\[\]])*)\]\([ \t]*()<?(.*?)>?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g,writeAnchorTag);
 484
 485             //
 486             // Last, handle reference-style shortcuts: [link text]
 487             // These must come last in case you've also got [link test][1]
 488             // or [link test](/foo)
 489             //
 490
 491             /*
 492                     text = text.replace(/
 493                     (                                                   // wrap whole match in $1
 494                             \[
 495                             ([^\[\]]+)                          // link text = $2; can't contain '[' or ']'
 496                             \]
 497                     )()()()()()                                 // pad rest of backreferences
 498                     /g, writeAnchorTag);
 499             */
 500             text = text.replace(/(\[([^\[\]]+)\])()()()()()/g, writeAnchorTag);
 501
 502             return text;
 503     }
 504
 505     var writeAnchorTag = function(wholeMatch,m1,m2,m3,m4,m5,m6,m7) {
 506             if (m7 == undefined) m7 = "";
 507             var whole_match = m1;
 508             var link_text   = m2;
 509             var link_id  = m3.toLowerCase();
 510             var url             = m4;
 511             var title   = m7;
 512
 513             if (url == "") {
 514                     if (link_id == "") {
 515                             // lower-case and turn embedded newlines into spaces
 516                             link_id = link_text.toLowerCase().replace(/ ?\n/g," ");
 517                     }
 518                     url = "#"+link_id;
 519
 520                     if (g_urls[link_id] != undefined) {
 521                             url = g_urls[link_id];
 522                             if (g_titles[link_id] != undefined) {
 523                                     title = g_titles[link_id];
 524                             }
 525                     }
 526                     else {
 527                             if (whole_match.search(/\(\s*\)$/m)>-1) {
 528                                     // Special case for explicit empty url
 529                                     url = "";
 530                             } else {
 531                                     return whole_match;
 532                             }
 533                     }
 534             }
 535
 536             url = escapeCharacters(url,"*_");
 537             var result = "<a href=\"" + url + "\"";
 538
 539             if (title != "") {
 540                     title = title.replace(/"/g,"&quot;");
 541                     title = escapeCharacters(title,"*_");
 542                     result +=  " title=\"" + title + "\"";
 543             }
 544
 545             result += ">" + link_text + "</a>";
 546
 547             return result;
 548     }
 549
 550
 551     var _DoImages = function(text) {
 552     //
 553     // Turn Markdown image shortcuts into <img> tags.
 554     //
 555
 556             //
 557             // First, handle reference-style labeled images: ![alt text][id]
 558             //
 559
 560             /*
 561                     text = text.replace(/
 562                     (                                           // wrap whole match in $1
 563                             !\[
 564                             (.*?)                               // alt text = $2
 565                             \]
 566
 567                             [ ]?                                // one optional space
 568                             (?:\n[ ]*)?                 // one optional newline followed by spaces
 569
 570                             \[
 571                             (.*?)                               // id = $3
 572                             \]
 573                     )()()()()                           // pad rest of backreferences
 574                     /g,writeImageTag);
 575             */
 576             text = text.replace(/(!\[(.*?)\][ ]?(?:\n[ ]*)?\[(.*?)\])()()()()/g,writeImageTag);
 577
 578             //
 579             // Next, handle inline images:  ![alt text](url "optional title")
 580             // Don't forget: encode * and _
 581
 582             /*
 583                     text = text.replace(/
 584                     (                                           // wrap whole match in $1
 585                             !\[
 586                             (.*?)                               // alt text = $2
 587                             \]
 588                             \s?                                 // One optional whitespace character
 589                             \(                                  // literal paren
 590                             [ \t]*
 591                             ()                                  // no id, so leave $3 empty
 592                             <?(\S+?)>?                  // src url = $4
 593                             [ \t]*
 594                             (                                   // $5
 595                                     (['"])                      // quote char = $6
 596                                     (.*?)                       // title = $7
 597                                     \6                          // matching quote
 598                                     [ \t]*
 599                             )?                                  // title is optional
 600                     \)
 601                     )
 602                     /g,writeImageTag);
 603             */
 604             text = text.replace(/(!\[(.*?)\]\s?\([ \t]*()<?(\S+?)>?[ \t]*((['"])(.*?)\6[ \t]*)?\))/g,writeImageTag);
 605
 606             return text;
 607     }
 608
 609     var writeImageTag = function(wholeMatch,m1,m2,m3,m4,m5,m6,m7) {
 610             var whole_match = m1;
 611             var alt_text   = m2;
 612             var link_id  = m3.toLowerCase();
 613             var url             = m4;
 614             var title   = m7;
 615
 616             if (!title) title = "";
 617
 618             if (url == "") {
 619                     if (link_id == "") {
 620                             // lower-case and turn embedded newlines into spaces
 621                             link_id = alt_text.toLowerCase().replace(/ ?\n/g," ");
 622                     }
 623                     url = "#"+link_id;
 624
 625                     if (g_urls[link_id] != undefined) {
 626                             url = g_urls[link_id];
 627                             if (g_titles[link_id] != undefined) {
 628                                     title = g_titles[link_id];
 629                             }
 630                     }
 631                     else {
 632                             return whole_match;
 633                     }
 634             }
 635
 636             alt_text = alt_text.replace(/"/g,"&quot;");
 637             url = escapeCharacters(url,"*_");
 638             var result = "<img src=\"" + url + "\" alt=\"" + alt_text + "\"";
 639
 640             // attacklab: Markdown.pl adds empty title attributes to images.
 641             // Replicate this bug.
 642
 643             //if (title != "") {
 644                     title = title.replace(/"/g,"&quot;");
 645                     title = escapeCharacters(title,"*_");
 646                     result +=  " title=\"" + title + "\"";
 647             //}
 648
 649             result += " />";
 650
 651             return result;
 652     }
 653
 654
 655     var _DoHeaders = function(text) {
 656
 657             // Setext-style headers:
 658             //  Header 1
 659             //  ========
 660             //
 661             //  Header 2
 662             //  --------
 663             //
 664             text = text.replace(/^(.+)[ \t]*\n=+[ \t]*\n+/gm,
 665                     function(wholeMatch,m1){return hashBlock('<h1 id="' + headerId(m1) + '">' + _RunSpanGamut(m1) + "</h1>");});
 666
 667             text = text.replace(/^(.+)[ \t]*\n-+[ \t]*\n+/gm,
 668                     function(matchFound,m1){return hashBlock('<h2 id="' + headerId(m1) + '">' + _RunSpanGamut(m1) + "</h2>");});
 669
 670             // atx-style headers:
 671             //  # Header 1
 672             //  ## Header 2
 673             //  ## Header 2 with closing hashes ##
 674             //  ...
 675             //  ###### Header 6
 676             //
 677
 678             /*
 679                     text = text.replace(/
 680                             ^(\#{1,6})                          // $1 = string of #'s
 681                             [ \t]*
 682                             (.+?)                                       // $2 = Header text
 683                             [ \t]*
 684                             \#*                                         // optional closing #'s (not counted)
 685                             \n+
 686                     /gm, function() {...});
 687             */
 688
 689             text = text.replace(/^(\#{1,6})[ \t]*(.+?)[ \t]*\#*\n+/gm,
 690                     function(wholeMatch,m1,m2) {
 691                             var h_level = m1.length;
 692                             return hashBlock("<h" + h_level + ' id="' + headerId(m2) + '">' + _RunSpanGamut(m2) + "</h" + h_level + ">");
 693                     });
 694
 695             function headerId(m) {
 696                     return m.replace(/[^\w]/g, '').toLowerCase();
 697             }
 698             return text;
 699     }
 700
 701     // This declaration keeps Dojo compressor from outputting garbage:
 702     var _ProcessListItems;
 703
 704     var _DoLists = function(text) {
 705     //
 706     // Form HTML ordered (numbered) and unordered (bulleted) lists.
 707     //
 708
 709             // attacklab: add sentinel to hack around khtml/safari bug:
 710             // http://bugs.webkit.org/show_bug.cgi?id=11231
 711             text += "~0";
 712
 713             // Re-usable pattern to match any entirel ul or ol list:
 714
 715             /*
 716                     var whole_list = /
 717                     (                                                                   // $1 = whole list
 718                             (                                                           // $2
 719                                     [ ]{0,3}                                    // attacklab: g_tab_width - 1
 720                                     ([*+-]|\d+[.])                              // $3 = first list item marker
 721                                     [ \t]+
 722                             )
 723                             [^\r]+?
 724                             (                                                           // $4
 725                                     ~0                                                  // sentinel for workaround; should be $
 726                             |
 727                                     \n{2,}
 728                                     (?=\S)
 729                                     (?!                                                 // Negative lookahead for another list item marker
 730                                             [ \t]*
 731                                             (?:[*+-]|\d+[.])[ \t]+
 732                                     )
 733                             )
 734                     )/g
 735             */
 736             var whole_list = /^(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/gm;
 737
 738             if (g_list_level) {
 739                     text = text.replace(whole_list,function(wholeMatch,m1,m2) {
 740                             var list = m1;
 741                             var list_type = (m2.search(/[*+-]/g)>-1) ? "ul" : "ol";
 742
 743                             // Turn double returns into triple returns, so that we can make a
 744                             // paragraph for the last item in a list, if necessary:
 745                             list = list.replace(/\n{2,}/g,"\n\n\n");;
 746                             var result = _ProcessListItems(list);
 747
 748                             // Trim any trailing whitespace, to put the closing `</$list_type>`
 749                             // up on the preceding line, to get it past the current stupid
 750                             // HTML block parser. This is a hack to work around the terrible
 751                             // hack that is the HTML block parser.
 752                             result = result.replace(/\s+$/,"");
 753                             result = "<"+list_type+">" + result + "</"+list_type+">\n";
 754                             return result;
 755                     });
 756             } else {
 757                     whole_list = /(\n\n|^\n?)(([ ]{0,3}([*+-]|\d+[.])[ \t]+)[^\r]+?(~0|\n{2,}(?=\S)(?![ \t]*(?:[*+-]|\d+[.])[ \t]+)))/g;
 758                     text = text.replace(whole_list,function(wholeMatch,m1,m2,m3) {
 759                             var runup = m1;
 760                             var list = m2;
 761
 762                             var list_type = (m3.search(/[*+-]/g)>-1) ? "ul" : "ol";
 763                             // Turn double returns into triple returns, so that we can make a
 764                             // paragraph for the last item in a list, if necessary:
 765                             var list = list.replace(/\n{2,}/g,"\n\n\n");;
 766                             var result = _ProcessListItems(list);
 767                             result = runup + "<"+list_type+">\n" + result + "</"+list_type+">\n";
 768                             return result;
 769                     });
 770             }
 771
 772             // attacklab: strip sentinel
 773             text = text.replace(/~0/,"");
 774
 775             return text;
 776     }
 777
 778     _ProcessListItems = function(list_str) {
 779     //
 780     //  Process the contents of a single ordered or unordered list, splitting it
 781     //  into individual list items.
 782     //
 783             // The $g_list_level global keeps track of when we're inside a list.
 784             // Each time we enter a list, we increment it; when we leave a list,
 785             // we decrement. If it's zero, we're not in a list anymore.
 786             //
 787             // We do this because when we're not inside a list, we want to treat
 788             // something like this:
 789             //
 790             //    I recommend upgrading to version
 791             //    8. Oops, now this line is treated
 792             //    as a sub-list.
 793             //
 794             // As a single paragraph, despite the fact that the second line starts
 795             // with a digit-period-space sequence.
 796             //
 797             // Whereas when we're inside a list (or sub-list), that line will be
 798             // treated as the start of a sub-list. What a kludge, huh? This is
 799             // an aspect of Markdown's syntax that's hard to parse perfectly
 800             // without resorting to mind-reading. Perhaps the solution is to
 801             // change the syntax rules such that sub-lists must start with a
 802             // starting cardinal number; e.g. "1." or "a.".
 803
 804             g_list_level++;
 805
 806             // trim trailing blank lines:
 807             list_str = list_str.replace(/\n{2,}$/,"\n");
 808
 809             // attacklab: add sentinel to emulate \z
 810             list_str += "~0";
 811
 812             /*
 813                     list_str = list_str.replace(/
 814                             (\n)?                                                       // leading line = $1
 815                             (^[ \t]*)                                           // leading whitespace = $2
 816                             ([*+-]|\d+[.]) [ \t]+                       // list marker = $3
 817                             ([^\r]+?                                            // list item text   = $4
 818                             (\n{1,2}))
 819                             (?= \n* (~0 | \2 ([*+-]|\d+[.]) [ \t]+))
 820                     /gm, function(){...});
 821             */
 822             list_str = list_str.replace(/(\n)?(^[ \t]*)([*+-]|\d+[.])[ \t]+([^\r]+?(\n{1,2}))(?=\n*(~0|\2([*+-]|\d+[.])[ \t]+))/gm,
 823                     function(wholeMatch,m1,m2,m3,m4){
 824                             var item = m4;
 825                             var leading_line = m1;
 826                             var leading_space = m2;
 827
 828                             if (leading_line || (item.search(/\n{2,}/)>-1)) {
 829                                     item = _RunBlockGamut(_Outdent(item));
 830                             }
 831                             else {
 832                                     // Recursion for sub-lists:
 833                                     item = _DoLists(_Outdent(item));
 834                                     item = item.replace(/\n$/,""); // chomp(item)
 835                                     item = _RunSpanGamut(item);
 836                             }
 837
 838                             return  "<li>" + item + "</li>\n";
 839                     }
 840             );
 841
 842             // attacklab: strip sentinel
 843             list_str = list_str.replace(/~0/g,"");
 844
 845             g_list_level--;
 846             return list_str;
 847     }
 848
 849
 850     var _DoCodeBlocks = function(text) {
 851     //
 852     //  Process Markdown `<pre><code>` blocks.
 853     //
 854
 855             /*
 856                     text = text.replace(text,
 857                             /(?:\n\n|^)
 858                             (                                                           // $1 = the code block -- one or more lines, starting with a space/tab
 859                                     (?:
 860                                             (?:[ ]{4}|\t)                       // Lines must start with a tab or a tab-width of spaces - attacklab: g_tab_width
 861                                             .*\n+
 862                                     )+
 863                             )
 864                             (\n*[ ]{0,3}[^ \t\n]|(?=~0))        // attacklab: g_tab_width
 865                     /g,function(){...});
 866             */
 867
 868             // attacklab: sentinel workarounds for lack of \A and \Z, safari\khtml bug
 869             text += "~0";
 870
 871             text = text.replace(/(?:\n\n|^)((?:(?:[ ]{4}|\t).*\n+)+)(\n*[ ]{0,3}[^ \t\n]|(?=~0))/g,
 872                     function(wholeMatch,m1,m2) {
 873                             var codeblock = m1;
 874                             var nextChar = m2;
 875
 876                             codeblock = _EncodeCode( _Outdent(codeblock));
 877                             codeblock = _Detab(codeblock);
 878                             codeblock = codeblock.replace(/^\n+/g,""); // trim leading newlines
 879                             codeblock = codeblock.replace(/\n+$/g,""); // trim trailing whitespace
 880
 881                             codeblock = "<pre><code>" + codeblock + "\n</code></pre>";
 882
 883                             return hashBlock(codeblock) + nextChar;
 884                     }
 885             );
 886
 887             // attacklab: strip sentinel
 888             text = text.replace(/~0/,"");
 889
 890             return text;
 891     }
 892
 893     var hashBlock = function(text) {
 894             text = text.replace(/(^\n+|\n+$)/g,"");
 895             return "\n\n~K" + (g_html_blocks.push(text)-1) + "K\n\n";
 896     }
 897
 898
 899     var _DoCodeSpans = function(text) {
 900     //
 901     //   *  Backtick quotes are used for <code></code> spans.
 902     //
 903     //   *  You can use multiple backticks as the delimiters if you want to
 904     //   include literal backticks in the code span. So, this input:
 905     //
 906     //           Just type ``foo `bar` baz`` at the prompt.
 907     //
 908     //     Will translate to:
 909     //
 910     //           <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
 911     //
 912     //  There's no arbitrary limit to the number of backticks you
 913     //  can use as delimters. If you need three consecutive backticks
 914     //  in your code, use four for delimiters, etc.
 915     //
 916     //  *  You can use spaces to get literal backticks at the edges:
 917     //
 918     //           ... type `` `bar` `` ...
 919     //
 920     //     Turns to:
 921     //
 922     //           ... type <code>`bar`</code> ...
 923     //
 924
 925             /*
 926                     text = text.replace(/
 927                             (^|[^\\])                                   // Character before opening ` can't be a backslash
 928                             (`+)                                                // $2 = Opening run of `
 929                             (                                                   // $3 = The code block
 930                                     [^\r]*?
 931                                     [^`]                                        // attacklab: work around lack of lookbehind
 932                             )
 933                             \2                                                  // Matching closer
 934                             (?!`)
 935                     /gm, function(){...});
 936             */
 937
 938             text = text.replace(/(^|[^\\])(`+)([^\r]*?[^`])\2(?!`)/gm,
 939                     function(wholeMatch,m1,m2,m3,m4) {
 940                             var c = m3;
 941                             c = c.replace(/^([ \t]*)/g,"");     // leading whitespace
 942                             c = c.replace(/[ \t]*$/g,"");       // trailing whitespace
 943                             c = _EncodeCode(c);
 944                             return m1+"<code>"+c+"</code>";
 945                     });
 946
 947             return text;
 948     }
 949
 950
 951     var _EncodeCode = function(text) {
 952     //
 953     // Encode/escape certain characters inside Markdown code runs.
 954     // The point is that in code, these characters are literals,
 955     // and lose their special Markdown meanings.
 956     //
 957             // Encode all ampersands; HTML entities are not
 958             // entities within a Markdown code span.
 959             text = text.replace(/&/g,"&amp;");
 960
 961             // Do the angle bracket song and dance:
 962             text = text.replace(/</g,"&lt;");
 963             text = text.replace(/>/g,"&gt;");
 964
 965             // Now, escape characters that are magic in Markdown:
 966             text = escapeCharacters(text,"\*_{}[]\\",false);
 967
 968     // jj the line above breaks this:
 969     //---
 970
 971     //* Item
 972
 973     //   1. Subitem
 974
 975     //            special char: *
 976     //---
 977
 978             return text;
 979     }
 980
 981
 982     var _DoItalicsAndBold = function(text) {
 983
 984             // <strong> must go first:
 985             text = text.replace(/(\*\*|__)(?=\S)([^\r]*?\S[*_]*)\1/g,
 986                     "<strong>$2</strong>");
 987
 988             text = text.replace(/(\*|_)(?=\S)([^\r]*?\S)\1/g,
 989                     "<em>$2</em>");
 990
 991             return text;
 992     }
 993
 994
 995     var _DoBlockQuotes = function(text) {
 996
 997             /*
 998                     text = text.replace(/
 999                     (                                                           // Wrap whole match in $1
1000                             (
1001                                     ^[ \t]*>[ \t]?                      // '>' at the start of a line
1002                                     .+\n                                        // rest of the first line
1003                                     (.+\n)*                                     // subsequent consecutive lines
1004                                     \n*                                         // blanks
1005                             )+
1006                     )
1007                     /gm, function(){...});
1008             */
1009
1010             text = text.replace(/((^[ \t]*>[ \t]?.+\n(.+\n)*\n*)+)/gm,
1011                     function(wholeMatch,m1) {
1012                             var bq = m1;
1013
1014                             // attacklab: hack around Konqueror 3.5.4 bug:
1015                             // "----------bug".replace(/^-/g,"") == "bug"
1016
1017                             bq = bq.replace(/^[ \t]*>[ \t]?/gm,"~0");   // trim one level of quoting
1018
1019                             // attacklab: clean up hack
1020                             bq = bq.replace(/~0/g,"");
1021
1022                             bq = bq.replace(/^[ \t]+$/gm,"");           // trim whitespace-only lines
1023                             bq = _RunBlockGamut(bq);                            // recurse
1024
1025                             bq = bq.replace(/(^|\n)/g,"$1  ");
1026                             // These leading spaces screw with <pre> content, so we need to fix that:
1027                             bq = bq.replace(
1028                                             /(\s*<pre>[^\r]+?<\/pre>)/gm,
1029                                     function(wholeMatch,m1) {
1030                                             var pre = m1;
1031                                             // attacklab: hack around Konqueror 3.5.4 bug:
1032                                             pre = pre.replace(/^  /mg,"~0");
1033                                             pre = pre.replace(/~0/g,"");
1034                                             return pre;
1035                                     });
1036
1037                             return hashBlock("<blockquote>\n" + bq + "\n</blockquote>");
1038                     });
1039             return text;
1040     }
1041
1042
1043     var _FormParagraphs = function(text) {
1044     //
1045     //  Params:
1046     //    $text - string to process with html <p> tags
1047     //
1048
1049             // Strip leading and trailing lines:
1050             text = text.replace(/^\n+/g,"");
1051             text = text.replace(/\n+$/g,"");
1052
1053             var grafs = text.split(/\n{2,}/g);
1054             var grafsOut = new Array();
1055
1056             //
1057             // Wrap <p> tags.
1058             //
1059             var end = grafs.length;
1060             for (var i=0; i<end; i++) {
1061                     var str = grafs[i];
1062
1063                     // if this is an HTML marker, copy it
1064                     if (str.search(/~K(\d+)K/g) >= 0) {
1065                             grafsOut.push(str);
1066                     }
1067                     else if (str.search(/\S/) >= 0) {
1068                             str = _RunSpanGamut(str);
1069                             str = str.replace(/^([ \t]*)/g,"<p>");
1070                             str += "</p>"
1071                             grafsOut.push(str);
1072                     }
1073
1074             }
1075
1076             //
1077             // Unhashify HTML blocks
1078             //
1079             end = grafsOut.length;
1080             for (var i=0; i<end; i++) {
1081                     // if this is a marker for an html block...
1082                     while (grafsOut[i].search(/~K(\d+)K/) >= 0) {
1083                             var blockText = g_html_blocks[RegExp.$1];
1084                             blockText = blockText.replace(/\$/g,"$$$$"); // Escape any dollar signs
1085                             grafsOut[i] = grafsOut[i].replace(/~K\d+K/,blockText);
1086                     }
1087             }
1088
1089             return grafsOut.join("\n\n");
1090     }
1091
1092
1093     var _EncodeAmpsAndAngles = function(text) {
1094     // Smart processing for ampersands and angle brackets that need to be encoded.
1095
1096             // Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1097             //   http://bumppo.net/projects/amputator/
1098             text = text.replace(/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/g,"&amp;");
1099
1100             // Encode naked <'s
1101             text = text.replace(/<(?![a-z\/?\$!])/gi,"&lt;");
1102
1103             return text;
1104     }
1105
1106
1107     var _EncodeBackslashEscapes = function(text) {
1108     //
1109     //   Parameter:  String.
1110     //   Returns:       The string, with after processing the following backslash
1111     //                     escape sequences.
1112     //
1113
1114             // attacklab: The polite way to do this is with the new
1115             // escapeCharacters() function:
1116             //
1117             //  text = escapeCharacters(text,"\\",true);
1118             //  text = escapeCharacters(text,"`*_{}[]()>#+-.!",true);
1119             //
1120             // ...but we're sidestepping its use of the (slow) RegExp constructor
1121             // as an optimization for Firefox.  This function gets called a LOT.
1122
1123             text = text.replace(/\\(\\)/g,escapeCharacters_callback);
1124             text = text.replace(/\\([`*_{}\[\]()>#+-.!])/g,escapeCharacters_callback);
1125             return text;
1126     }
1127
1128
1129     var _DoAutoLinks = function(text) {
1130
1131             text = text.replace(/<((https?|ftp|dict):[^'">\s]+)>/gi,"<a href=\"$1\">$1</a>");
1132
1133             // Email addresses: <address@domain.foo>
1134
1135             /*
1136                     text = text.replace(/
1137                             <
1138                             (?:mailto:)?
1139                             (
1140                                     [-.\w]+
1141                                     \@
1142                                     [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1143                             )
1144                             >
1145                     /gi, _DoAutoLinks_callback());
1146             */
1147             text = text.replace(/<(?:mailto:)?([-.\w]+\@[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+)>/gi,
1148                     function(wholeMatch,m1) {
1149                             return _EncodeEmailAddress( _UnescapeSpecialChars(m1) );
1150                     }
1151             );
1152
1153             return text;
1154     }
1155
1156
1157     var _EncodeEmailAddress = function(addr) {
1158     //
1159     //  Input: an email address, e.g. "foo@example.com"
1160     //
1161     //  Output: the email address as a mailto link, with each character
1162     //  of the address encoded as either a decimal or hex entity, in
1163     //  the hopes of foiling most address harvesting spam bots. E.g.:
1164     //
1165     //  <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
1166     //     x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
1167     //     &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
1168     //
1169     //  Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
1170     //  mailing list: <http://tinyurl.com/yu7ue>
1171     //
1172
1173             // attacklab: why can't javascript speak hex?
1174             function char2hex(ch) {
1175                     var hexDigits = '0123456789ABCDEF';
1176                     var dec = ch.charCodeAt(0);
1177                     return(hexDigits.charAt(dec>>4) + hexDigits.charAt(dec&15));
1178             }
1179
1180             var encode = [
1181                     function(ch){return "&#"+ch.charCodeAt(0)+";";},
1182                     function(ch){return "&#x"+char2hex(ch)+";";},
1183                     function(ch){return ch;}
1184             ];
1185
1186             addr = "mailto:" + addr;
1187
1188             addr = addr.replace(/./g, function(ch) {
1189                     if (ch == "@") {
1190                             // this *must* be encoded. I insist.
1191                             ch = encode[Math.floor(Math.random()*2)](ch);
1192                     } else if (ch !=":") {
1193                             // leave ':' alone (to spot mailto: later)
1194                             var r = Math.random();
1195                             // roughly 10% raw, 45% hex, 45% dec
1196                             ch =  (
1197                                             r > .9  ?   encode[2](ch)   :
1198                                             r > .45 ?   encode[1](ch)   :
1199                                                                     encode[0](ch)
1200                                     );
1201                     }
1202                     return ch;
1203             });
1204
1205             addr = "<a href=\"" + addr + "\">" + addr + "</a>";
1206             addr = addr.replace(/">.+:/g,"\">"); // strip the mailto: from the visible part
1207
1208             return addr;
1209     }
1210
1211
1212     var _UnescapeSpecialChars = function(text) {
1213     //
1214     // Swap back in all the special characters we've hidden.
1215     //
1216             text = text.replace(/~E(\d+)E/g,
1217                     function(wholeMatch,m1) {
1218                             var charCodeToReplace = parseInt(m1);
1219                             return String.fromCharCode(charCodeToReplace);
1220                     }
1221             );
1222             return text;
1223     }
1224
1225
1226     var _Outdent = function(text) {
1227     //
1228     // Remove one level of line-leading tabs or spaces
1229     //
1230
1231             // attacklab: hack around Konqueror 3.5.4 bug:
1232             // "----------bug".replace(/^-/g,"") == "bug"
1233
1234             text = text.replace(/^(\t|[ ]{1,4})/gm,"~0"); // attacklab: g_tab_width
1235
1236             // attacklab: clean up hack
1237             text = text.replace(/~0/g,"")
1238
1239             return text;
1240     }
1241
1242     var _Detab = function(text) {
1243     // attacklab: Detab's completely rewritten for speed.
1244     // In perl we could fix it by anchoring the regexp with \G.
1245     // In javascript we're less fortunate.
1246
1247             // expand first n-1 tabs
1248             text = text.replace(/\t(?=\t)/g,"    "); // attacklab: g_tab_width
1249
1250             // replace the nth with two sentinels
1251             text = text.replace(/\t/g,"~A~B");
1252
1253             // use the sentinel to anchor our regex so it doesn't explode
1254             text = text.replace(/~B(.+?)~A/g,
1255                     function(wholeMatch,m1,m2) {
1256                             var leadingText = m1;
1257                             var numSpaces = 4 - leadingText.length % 4;  // attacklab: g_tab_width
1258
1259                             // there *must* be a better way to do this:
1260                             for (var i=0; i<numSpaces; i++) leadingText+=" ";
1261
1262                             return leadingText;
1263                     }
1264             );
1265
1266             // clean up sentinels
1267             text = text.replace(/~A/g,"    ");  // attacklab: g_tab_width
1268             text = text.replace(/~B/g,"");
1269
1270             return text;
1271     }
1272
1273
1274     //
1275     //  attacklab: Utility functions
1276     //
1277
1278
1279     var escapeCharacters = function(text, charsToEscape, afterBackslash) {
1280             // First we have to escape the escape characters so that
1281             // we can build a character class out of them
1282             var regexString = "([" + charsToEscape.replace(/([\[\]\\])/g,"\\$1") + "])";
1283
1284             if (afterBackslash) {
1285                     regexString = "\\\\" + regexString;
1286             }
1287
1288             var regex = new RegExp(regexString,"g");
1289             text = text.replace(regex,escapeCharacters_callback);
1290
1291             return text;
1292     }
1293
1294
1295     var escapeCharacters_callback = function(wholeMatch,m1) {
1296             var charCodeToEscape = m1.charCodeAt(0);
1297             return "~E"+charCodeToEscape+"E";
1298     }
1299
1300 } // end of Showdown.converter
1301
1302 // export
1303 //if (typeof exports != 'undefined') exports.Showdown = Showdown;