PHPXRef 0.7.1 : Unnamed Project : /lib/markdown/Markdown.php source

[Summary view] [Print] [Text view]
   1  <?php
   2  #
   3  # Markdown  -  A text-to-HTML conversion tool for web writers
   4  #
   5  # PHP Markdown  
   6  # Copyright (c) 2004-2015 Michel Fortin  
   7  # <https://michelf.ca/projects/php-markdown/>
   8  #
   9  # Original Markdown  
  10  # Copyright (c) 2004-2006 John Gruber  
  11  # <https://daringfireball.net/projects/markdown/>
  12  #
  13  namespace Michelf;
  14  
  15  
  16  #
  17  # Markdown Parser Class
  18  #
  19  
  20  class Markdown implements MarkdownInterface {
  21  
  22      ### Version ###
  23  
  24      const  MARKDOWNLIB_VERSION  =  "1.6.0";
  25  
  26      ### Simple Function Interface ###
  27  
  28  	public static function defaultTransform($text) {
  29      #
  30      # Initialize the parser and return the result of its transform method.
  31      # This will work fine for derived classes too.
  32      #
  33          # Take parser class on which this function was called.
  34          $parser_class = \get_called_class();
  35  
  36          # try to take parser from the static parser list
  37          static $parser_list;
  38          $parser =& $parser_list[$parser_class];
  39  
  40          # create the parser it not already set
  41          if (!$parser)
  42              $parser = new $parser_class;
  43  
  44          # Transform text using parser.
  45          return $parser->transform($text);
  46      }
  47  
  48      ### Configuration Variables ###
  49  
  50      # Change to ">" for HTML output.
  51      public $empty_element_suffix = " />";
  52      public $tab_width = 4;
  53      
  54      # Change to `true` to disallow markup or entities.
  55      public $no_markup = false;
  56      public $no_entities = false;
  57      
  58      # Predefined urls and titles for reference links and images.
  59      public $predef_urls = array();
  60      public $predef_titles = array();
  61  
  62      # Optional filter function for URLs
  63      public $url_filter_func = null;
  64  
  65      # Optional header id="" generation callback function.
  66      public $header_id_func = null;
  67      
  68      # Optional function for converting code block content to HTML
  69      public $code_block_content_func = null;
  70  
  71      # Class attribute to toggle "enhanced ordered list" behaviour
  72      # setting this to true will allow ordered lists to start from the index
  73      # number that is defined first.  For example:
  74      # 2. List item two
  75      # 3. List item three
  76      # 
  77      # becomes
  78      # <ol start="2">
  79      # <li>List item two</li>
  80      # <li>List item three</li>
  81      # </ol>
  82      public $enhanced_ordered_list = false;
  83  
  84      ### Parser Implementation ###
  85  
  86      # Regex to match balanced [brackets].
  87      # Needed to insert a maximum bracked depth while converting to PHP.
  88      protected $nested_brackets_depth = 6;
  89      protected $nested_brackets_re;
  90      
  91      protected $nested_url_parenthesis_depth = 4;
  92      protected $nested_url_parenthesis_re;
  93  
  94      # Table of hash values for escaped characters:
  95      protected $escape_chars = '\`*_{}[]()>#+-.!';
  96      protected $escape_chars_re;
  97  
  98  
  99  	public function __construct() {
 100      #
 101      # Constructor function. Initialize appropriate member variables.
 102      #
 103          $this->_initDetab();
 104          $this->prepareItalicsAndBold();
 105      
 106          $this->nested_brackets_re = 
 107              str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
 108              str_repeat('\])*', $this->nested_brackets_depth);
 109      
 110          $this->nested_url_parenthesis_re = 
 111              str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
 112              str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
 113          
 114          $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
 115          
 116          # Sort document, block, and span gamut in ascendent priority order.
 117          asort($this->document_gamut);
 118          asort($this->block_gamut);
 119          asort($this->span_gamut);
 120      }
 121  
 122  
 123      # Internal hashes used during transformation.
 124      protected $urls = array();
 125      protected $titles = array();
 126      protected $html_hashes = array();
 127      
 128      # Status flag to avoid invalid nesting.
 129      protected $in_anchor = false;
 130      
 131      
 132  	protected function setup() {
 133      #
 134      # Called before the transformation process starts to setup parser 
 135      # states.
 136      #
 137          # Clear global hashes.
 138          $this->urls = $this->predef_urls;
 139          $this->titles = $this->predef_titles;
 140          $this->html_hashes = array();
 141          
 142          $this->in_anchor = false;
 143      }
 144      
 145  	protected function teardown() {
 146      #
 147      # Called after the transformation process to clear any variable 
 148      # which may be taking up memory unnecessarly.
 149      #
 150          $this->urls = array();
 151          $this->titles = array();
 152          $this->html_hashes = array();
 153      }
 154  
 155  
 156  	public function transform($text) {
 157      #
 158      # Main function. Performs some preprocessing on the input text
 159      # and pass it through the document gamut.
 160      #
 161          $this->setup();
 162      
 163          # Remove UTF-8 BOM and marker character in input, if present.
 164          $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
 165  
 166          # Standardize line endings:
 167          #   DOS to Unix and Mac to Unix
 168          $text = preg_replace('{\r\n?}', "\n", $text);
 169  
 170          # Make sure $text ends with a couple of newlines:
 171          $text .= "\n\n";
 172  
 173          # Convert all tabs to spaces.
 174          $text = $this->detab($text);
 175  
 176          # Turn block-level HTML blocks into hash entries
 177          $text = $this->hashHTMLBlocks($text);
 178  
 179          # Strip any lines consisting only of spaces and tabs.
 180          # This makes subsequent regexen easier to write, because we can
 181          # match consecutive blank lines with /\n+/ instead of something
 182          # contorted like /[ ]*\n+/ .
 183          $text = preg_replace('/^[ ]+$/m', '', $text);
 184  
 185          # Run document gamut methods.
 186          foreach ($this->document_gamut as $method => $priority) {
 187              $text = $this->$method($text);
 188          }
 189          
 190          $this->teardown();
 191  
 192          return $text . "\n";
 193      }
 194      
 195      protected $document_gamut = array(
 196          # Strip link definitions, store in hashes.
 197          "stripLinkDefinitions" => 20,
 198          
 199          "runBasicBlockGamut"   => 30,
 200          );
 201  
 202  
 203  	protected function stripLinkDefinitions($text) {
 204      #
 205      # Strips link definitions from text, stores the URLs and titles in
 206      # hash references.
 207      #
 208          $less_than_tab = $this->tab_width - 1;
 209  
 210          # Link defs are in the form: ^[id]: url "optional title"
 211          $text = preg_replace_callback('{
 212                              ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?:    # id = $1
 213                                [ ]*
 214                                \n?                # maybe *one* newline
 215                                [ ]*
 216                              (?:
 217                                <(.+?)>            # url = $2
 218                              |
 219                                (\S+?)            # url = $3
 220                              )
 221                                [ ]*
 222                                \n?                # maybe one newline
 223                                [ ]*
 224                              (?:
 225                                  (?<=\s)            # lookbehind for whitespace
 226                                  ["(]
 227                                  (.*?)            # title = $4
 228                                  [")]
 229                                  [ ]*
 230                              )?    # title is optional
 231                              (?:\n+|\Z)
 232              }xm',
 233              array($this, '_stripLinkDefinitions_callback'),
 234              $text);
 235          return $text;
 236      }
 237  	protected function _stripLinkDefinitions_callback($matches) {
 238          $link_id = strtolower($matches[1]);
 239          $url = $matches[2] == '' ? $matches[3] : $matches[2];
 240          $this->urls[$link_id] = $url;
 241          $this->titles[$link_id] =& $matches[4];
 242          return ''; # String that will replace the block
 243      }
 244  
 245  
 246  	protected function hashHTMLBlocks($text) {
 247          if ($this->no_markup)  return $text;
 248  
 249          $less_than_tab = $this->tab_width - 1;
 250  
 251          # Hashify HTML blocks:
 252          # We only want to do this for block-level HTML tags, such as headers,
 253          # lists, and tables. That's because we still want to wrap <p>s around
 254          # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 255          # phrase emphasis, and spans. The list of tags we're looking for is
 256          # hard-coded:
 257          #
 258          # *  List "a" is made of tags which can be both inline or block-level.
 259          #    These will be treated block-level when the start tag is alone on 
 260          #    its line, otherwise they're not matched here and will be taken as 
 261          #    inline later.
 262          # *  List "b" is made of tags which are always block-level;
 263          #
 264          $block_tags_a_re = 'ins|del';
 265          $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
 266                             'script|noscript|style|form|fieldset|iframe|math|svg|'.
 267                             'article|section|nav|aside|hgroup|header|footer|'.
 268                             'figure';
 269  
 270          # Regular expression for the content of a block tag.
 271          $nested_tags_level = 4;
 272          $attr = '
 273              (?>                # optional tag attributes
 274                \s            # starts with whitespace
 275                (?>
 276                  [^>"/]+        # text outside quotes
 277                |
 278                  /+(?!>)        # slash not followed by ">"
 279                |
 280                  "[^"]*"        # text inside double quotes (tolerate ">")
 281                |
 282                  \'[^\']*\'    # text inside single quotes (tolerate ">")
 283                )*
 284              )?    
 285              ';
 286          $content =
 287              str_repeat('
 288                  (?>
 289                    [^<]+            # content without tag
 290                  |
 291                    <\2            # nested opening tag
 292                      '.$attr.'    # attributes
 293                      (?>
 294                        />
 295                      |
 296                        >', $nested_tags_level).    # end of opening tag
 297                        '.*?'.                    # last level nested tag content
 298              str_repeat('
 299                        </\2\s*>    # closing nested tag
 300                      )
 301                    |                
 302                      <(?!/\2\s*>    # other tags with a different name
 303                    )
 304                  )*',
 305                  $nested_tags_level);
 306          $content2 = str_replace('\2', '\3', $content);
 307  
 308          # First, look for nested blocks, e.g.:
 309          #     <div>
 310          #         <div>
 311          #         tags for inner block must be indented.
 312          #         </div>
 313          #     </div>
 314          #
 315          # The outermost tags must start at the left margin for this to match, and
 316          # the inner nested divs must be indented.
 317          # We need to do this before the next, more liberal match, because the next
 318          # match will start at the first `<div>` and stop at the first `</div>`.
 319          $text = preg_replace_callback('{(?>
 320              (?>
 321                  (?<=\n)            # Starting on its own line
 322                  |                # or
 323                  \A\n?            # the at beginning of the doc
 324              )
 325              (                        # save in $1
 326  
 327                # Match from `\n<tag>` to `</tag>\n`, handling nested tags 
 328                # in between.
 329                      
 330                          [ ]{0,'.$less_than_tab.'}
 331                          <('.$block_tags_b_re.')# start tag = $2
 332                          '.$attr.'>            # attributes followed by > and \n
 333                          '.$content.'        # content, support nesting
 334                          </\2>                # the matching end tag
 335                          [ ]*                # trailing spaces/tabs
 336                          (?=\n+|\Z)    # followed by a newline or end of document
 337  
 338              | # Special version for tags of group a.
 339  
 340                          [ ]{0,'.$less_than_tab.'}
 341                          <('.$block_tags_a_re.')# start tag = $3
 342                          '.$attr.'>[ ]*\n    # attributes followed by >
 343                          '.$content2.'        # content, support nesting
 344                          </\3>                # the matching end tag
 345                          [ ]*                # trailing spaces/tabs
 346                          (?=\n+|\Z)    # followed by a newline or end of document
 347                      
 348              | # Special case just for <hr />. It was easier to make a special 
 349                # case than to make the other regex more complicated.
 350              
 351                          [ ]{0,'.$less_than_tab.'}
 352                          <(hr)                # start tag = $2
 353                          '.$attr.'            # attributes
 354                          /?>                    # the matching end tag
 355                          [ ]*
 356                          (?=\n{2,}|\Z)        # followed by a blank line or end of document
 357              
 358              | # Special case for standalone HTML comments:
 359              
 360                      [ ]{0,'.$less_than_tab.'}
 361                      (?s:
 362                          <!-- .*? -->
 363                      )
 364                      [ ]*
 365                      (?=\n{2,}|\Z)        # followed by a blank line or end of document
 366              
 367              | # PHP and ASP-style processor instructions (<? and <%)
 368              
 369                      [ ]{0,'.$less_than_tab.'}
 370                      (?s:
 371                          <([?%])            # $2
 372                          .*?
 373                          \2>
 374                      )
 375                      [ ]*
 376                      (?=\n{2,}|\Z)        # followed by a blank line or end of document
 377                      
 378              )
 379              )}Sxmi',
 380              array($this, '_hashHTMLBlocks_callback'),
 381              $text);
 382  
 383          return $text;
 384      }
 385  	protected function _hashHTMLBlocks_callback($matches) {
 386          $text = $matches[1];
 387          $key  = $this->hashBlock($text);
 388          return "\n\n$key\n\n";
 389      }
 390      
 391      
 392  	protected function hashPart($text, $boundary = 'X') {
 393      #
 394      # Called whenever a tag must be hashed when a function insert an atomic 
 395      # element in the text stream. Passing $text to through this function gives
 396      # a unique text-token which will be reverted back when calling unhash.
 397      #
 398      # The $boundary argument specify what character should be used to surround
 399      # the token. By convension, "B" is used for block elements that needs not
 400      # to be wrapped into paragraph tags at the end, ":" is used for elements
 401      # that are word separators and "X" is used in the general case.
 402      #
 403          # Swap back any tag hash found in $text so we do not have to `unhash`
 404          # multiple times at the end.
 405          $text = $this->unhash($text);
 406          
 407          # Then hash the block.
 408          static $i = 0;
 409          $key = "$boundary\x1A" . ++$i . $boundary;
 410          $this->html_hashes[$key] = $text;
 411          return $key; # String that will replace the tag.
 412      }
 413  
 414  
 415  	protected function hashBlock($text) {
 416      #
 417      # Shortcut function for hashPart with block-level boundaries.
 418      #
 419          return $this->hashPart($text, 'B');
 420      }
 421  
 422  
 423      protected $block_gamut = array(
 424      #
 425      # These are all the transformations that form block-level
 426      # tags like paragraphs, headers, and list items.
 427      #
 428          "doHeaders"         => 10,
 429          "doHorizontalRules" => 20,
 430          
 431          "doLists"           => 40,
 432          "doCodeBlocks"      => 50,
 433          "doBlockQuotes"     => 60,
 434          );
 435  
 436  	protected function runBlockGamut($text) {
 437      #
 438      # Run block gamut tranformations.
 439      #
 440          # We need to escape raw HTML in Markdown source before doing anything 
 441          # else. This need to be done for each block, and not only at the 
 442          # begining in the Markdown function since hashed blocks can be part of
 443          # list items and could have been indented. Indented blocks would have 
 444          # been seen as a code block in a previous pass of hashHTMLBlocks.
 445          $text = $this->hashHTMLBlocks($text);
 446          
 447          return $this->runBasicBlockGamut($text);
 448      }
 449      
 450  	protected function runBasicBlockGamut($text) {
 451      #
 452      # Run block gamut tranformations, without hashing HTML blocks. This is 
 453      # useful when HTML blocks are known to be already hashed, like in the first
 454      # whole-document pass.
 455      #
 456          foreach ($this->block_gamut as $method => $priority) {
 457              $text = $this->$method($text);
 458          }
 459          
 460          # Finally form paragraph and restore hashed blocks.
 461          $text = $this->formParagraphs($text);
 462  
 463          return $text;
 464      }
 465      
 466      
 467  	protected function doHorizontalRules($text) {
 468          # Do Horizontal Rules:
 469          return preg_replace(
 470              '{
 471                  ^[ ]{0,3}    # Leading space
 472                  ([-*_])        # $1: First marker
 473                  (?>            # Repeated marker group
 474                      [ ]{0,2}    # Zero, one, or two spaces.
 475                      \1            # Marker character
 476                  ){2,}        # Group repeated at least twice
 477                  [ ]*        # Tailing spaces
 478                  $            # End of line.
 479              }mx',
 480              "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 
 481              $text);
 482      }
 483  
 484  
 485      protected $span_gamut = array(
 486      #
 487      # These are all the transformations that occur *within* block-level
 488      # tags like paragraphs, headers, and list items.
 489      #
 490          # Process character escapes, code spans, and inline HTML
 491          # in one shot.
 492          "parseSpan"           => -30,
 493  
 494          # Process anchor and image tags. Images must come first,
 495          # because ![foo][f] looks like an anchor.
 496          "doImages"            =>  10,
 497          "doAnchors"           =>  20,
 498          
 499          # Make links out of things like `<https://example.com/>`
 500          # Must come after doAnchors, because you can use < and >
 501          # delimiters in inline links like [this](<url>).
 502          "doAutoLinks"         =>  30,
 503          "encodeAmpsAndAngles" =>  40,
 504  
 505          "doItalicsAndBold"    =>  50,
 506          "doHardBreaks"        =>  60,
 507          );
 508  
 509  	protected function runSpanGamut($text) {
 510      #
 511      # Run span gamut tranformations.
 512      #
 513          foreach ($this->span_gamut as $method => $priority) {
 514              $text = $this->$method($text);
 515          }
 516  
 517          return $text;
 518      }
 519      
 520      
 521  	protected function doHardBreaks($text) {
 522          # Do hard breaks:
 523          return preg_replace_callback('/ {2,}\n/', 
 524              array($this, '_doHardBreaks_callback'), $text);
 525      }
 526  	protected function _doHardBreaks_callback($matches) {
 527          return $this->hashPart("<br$this->empty_element_suffix\n");
 528      }
 529  
 530  
 531  	protected function doAnchors($text) {
 532      #
 533      # Turn Markdown link shortcuts into XHTML <a> tags.
 534      #
 535          if ($this->in_anchor) return $text;
 536          $this->in_anchor = true;
 537          
 538          #
 539          # First, handle reference-style links: [link text] [id]
 540          #
 541          $text = preg_replace_callback('{
 542              (                    # wrap whole match in $1
 543                \[
 544                  ('.$this->nested_brackets_re.')    # link text = $2
 545                \]
 546  
 547                [ ]?                # one optional space
 548                (?:\n[ ]*)?        # one optional newline followed by spaces
 549  
 550                \[
 551                  (.*?)        # id = $3
 552                \]
 553              )
 554              }xs',
 555              array($this, '_doAnchors_reference_callback'), $text);
 556  
 557          #
 558          # Next, inline-style links: [link text](url "optional title")
 559          #
 560          $text = preg_replace_callback('{
 561              (                # wrap whole match in $1
 562                \[
 563                  ('.$this->nested_brackets_re.')    # link text = $2
 564                \]
 565                \(            # literal paren
 566                  [ \n]*
 567                  (?:
 568                      <(.+?)>    # href = $3
 569                  |
 570                      ('.$this->nested_url_parenthesis_re.')    # href = $4
 571                  )
 572                  [ \n]*
 573                  (            # $5
 574                    ([\'"])    # quote char = $6
 575                    (.*?)        # Title = $7
 576                    \6        # matching quote
 577                    [ \n]*    # ignore any spaces/tabs between closing quote and )
 578                  )?            # title is optional
 579                \)
 580              )
 581              }xs',
 582              array($this, '_doAnchors_inline_callback'), $text);
 583  
 584          #
 585          # Last, handle reference-style shortcuts: [link text]
 586          # These must come last in case you've also got [link text][1]
 587          # or [link text](/foo)
 588          #
 589          $text = preg_replace_callback('{
 590              (                    # wrap whole match in $1
 591                \[
 592                  ([^\[\]]+)        # link text = $2; can\'t contain [ or ]
 593                \]
 594              )
 595              }xs',
 596              array($this, '_doAnchors_reference_callback'), $text);
 597  
 598          $this->in_anchor = false;
 599          return $text;
 600      }
 601  	protected function _doAnchors_reference_callback($matches) {
 602          $whole_match =  $matches[1];
 603          $link_text   =  $matches[2];
 604          $link_id     =& $matches[3];
 605  
 606          if ($link_id == "") {
 607              # for shortcut links like [this][] or [this].
 608              $link_id = $link_text;
 609          }
 610          
 611          # lower-case and turn embedded newlines into spaces
 612          $link_id = strtolower($link_id);
 613          $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 614  
 615          if (isset($this->urls[$link_id])) {
 616              $url = $this->urls[$link_id];
 617              $url = $this->encodeURLAttribute($url);
 618              
 619              $result = "<a href=\"$url\"";
 620              if ( isset( $this->titles[$link_id] ) ) {
 621                  $title = $this->titles[$link_id];
 622                  $title = $this->encodeAttribute($title);
 623                  $result .=  " title=\"$title\"";
 624              }
 625          
 626              $link_text = $this->runSpanGamut($link_text);
 627              $result .= ">$link_text</a>";
 628              $result = $this->hashPart($result);
 629          }
 630          else {
 631              $result = $whole_match;
 632          }
 633          return $result;
 634      }
 635  	protected function _doAnchors_inline_callback($matches) {
 636          $whole_match    =  $matches[1];
 637          $link_text        =  $this->runSpanGamut($matches[2]);
 638          $url            =  $matches[3] == '' ? $matches[4] : $matches[3];
 639          $title            =& $matches[7];
 640  
 641          // if the URL was of the form <s p a c e s> it got caught by the HTML
 642          // tag parser and hashed. Need to reverse the process before using the URL.
 643          $unhashed = $this->unhash($url);
 644          if ($unhashed != $url)
 645              $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
 646  
 647          $url = $this->encodeURLAttribute($url);
 648  
 649          $result = "<a href=\"$url\"";
 650          if (isset($title)) {
 651              $title = $this->encodeAttribute($title);
 652              $result .=  " title=\"$title\"";
 653          }
 654          
 655          $link_text = $this->runSpanGamut($link_text);
 656          $result .= ">$link_text</a>";
 657  
 658          return $this->hashPart($result);
 659      }
 660  
 661  
 662  	protected function doImages($text) {
 663      #
 664      # Turn Markdown image shortcuts into <img> tags.
 665      #
 666          #
 667          # First, handle reference-style labeled images: ![alt text][id]
 668          #
 669          $text = preg_replace_callback('{
 670              (                # wrap whole match in $1
 671                !\[
 672                  ('.$this->nested_brackets_re.')        # alt text = $2
 673                \]
 674  
 675                [ ]?                # one optional space
 676                (?:\n[ ]*)?        # one optional newline followed by spaces
 677  
 678                \[
 679                  (.*?)        # id = $3
 680                \]
 681  
 682              )
 683              }xs', 
 684              array($this, '_doImages_reference_callback'), $text);
 685  
 686          #
 687          # Next, handle inline images:  ![alt text](url "optional title")
 688          # Don't forget: encode * and _
 689          #
 690          $text = preg_replace_callback('{
 691              (                # wrap whole match in $1
 692                !\[
 693                  ('.$this->nested_brackets_re.')        # alt text = $2
 694                \]
 695                \s?            # One optional whitespace character
 696                \(            # literal paren
 697                  [ \n]*
 698                  (?:
 699                      <(\S*)>    # src url = $3
 700                  |
 701                      ('.$this->nested_url_parenthesis_re.')    # src url = $4
 702                  )
 703                  [ \n]*
 704                  (            # $5
 705                    ([\'"])    # quote char = $6
 706                    (.*?)        # title = $7
 707                    \6        # matching quote
 708                    [ \n]*
 709                  )?            # title is optional
 710                \)
 711              )
 712              }xs',
 713              array($this, '_doImages_inline_callback'), $text);
 714  
 715          return $text;
 716      }
 717  	protected function _doImages_reference_callback($matches) {
 718          $whole_match = $matches[1];
 719          $alt_text    = $matches[2];
 720          $link_id     = strtolower($matches[3]);
 721  
 722          if ($link_id == "") {
 723              $link_id = strtolower($alt_text); # for shortcut links like ![this][].
 724          }
 725  
 726          $alt_text = $this->encodeAttribute($alt_text);
 727          if (isset($this->urls[$link_id])) {
 728              $url = $this->encodeURLAttribute($this->urls[$link_id]);
 729              $result = "<img src=\"$url\" alt=\"$alt_text\"";
 730              if (isset($this->titles[$link_id])) {
 731                  $title = $this->titles[$link_id];
 732                  $title = $this->encodeAttribute($title);
 733                  $result .=  " title=\"$title\"";
 734              }
 735              $result .= $this->empty_element_suffix;
 736              $result = $this->hashPart($result);
 737          }
 738          else {
 739              # If there's no such link ID, leave intact:
 740              $result = $whole_match;
 741          }
 742  
 743          return $result;
 744      }
 745  	protected function _doImages_inline_callback($matches) {
 746          $whole_match    = $matches[1];
 747          $alt_text        = $matches[2];
 748          $url            = $matches[3] == '' ? $matches[4] : $matches[3];
 749          $title            =& $matches[7];
 750  
 751          $alt_text = $this->encodeAttribute($alt_text);
 752          $url = $this->encodeURLAttribute($url);
 753          $result = "<img src=\"$url\" alt=\"$alt_text\"";
 754          if (isset($title)) {
 755              $title = $this->encodeAttribute($title);
 756              $result .=  " title=\"$title\""; # $title already quoted
 757          }
 758          $result .= $this->empty_element_suffix;
 759  
 760          return $this->hashPart($result);
 761      }
 762  
 763  
 764  	protected function doHeaders($text) {
 765          # Setext-style headers:
 766          #      Header 1
 767          #      ========
 768          #  
 769          #      Header 2
 770          #      --------
 771          #
 772          $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
 773              array($this, '_doHeaders_callback_setext'), $text);
 774  
 775          # atx-style headers:
 776          #    # Header 1
 777          #    ## Header 2
 778          #    ## Header 2 with closing hashes ##
 779          #    ...
 780          #    ###### Header 6
 781          #
 782          $text = preg_replace_callback('{
 783                  ^(\#{1,6})    # $1 = string of #\'s
 784                  [ ]*
 785                  (.+?)        # $2 = Header text
 786                  [ ]*
 787                  \#*            # optional closing #\'s (not counted)
 788                  \n+
 789              }xm',
 790              array($this, '_doHeaders_callback_atx'), $text);
 791  
 792          return $text;
 793      }
 794  
 795  	protected function _doHeaders_callback_setext($matches) {
 796          # Terrible hack to check we haven't found an empty list item.
 797          if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
 798              return $matches[0];
 799          
 800          $level = $matches[2]{0} == '=' ? 1 : 2;
 801  
 802          # id attribute generation
 803          $idAtt = $this->_generateIdFromHeaderValue($matches[1]);
 804  
 805          $block = "<h$level$idAtt>".$this->runSpanGamut($matches[1])."</h$level>";
 806          return "\n" . $this->hashBlock($block) . "\n\n";
 807      }
 808  	protected function _doHeaders_callback_atx($matches) {
 809  
 810          # id attribute generation
 811          $idAtt = $this->_generateIdFromHeaderValue($matches[2]);
 812  
 813          $level = strlen($matches[1]);
 814          $block = "<h$level$idAtt>".$this->runSpanGamut($matches[2])."</h$level>";
 815          return "\n" . $this->hashBlock($block) . "\n\n";
 816      }
 817  
 818  	protected function _generateIdFromHeaderValue($headerValue) {
 819  
 820          # if a header_id_func property is set, we can use it to automatically
 821          # generate an id attribute.
 822          #
 823          # This method returns a string in the form id="foo", or an empty string
 824          # otherwise.
 825          if (!is_callable($this->header_id_func)) {
 826              return "";
 827          }
 828          $idValue = call_user_func($this->header_id_func, $headerValue);
 829          if (!$idValue) return "";
 830  
 831          return ' id="' . $this->encodeAttribute($idValue) . '"';
 832  
 833      }
 834  
 835  	protected function doLists($text) {
 836      #
 837      # Form HTML ordered (numbered) and unordered (bulleted) lists.
 838      #
 839          $less_than_tab = $this->tab_width - 1;
 840  
 841          # Re-usable patterns to match list item bullets and number markers:
 842          $marker_ul_re  = '[*+-]';
 843          $marker_ol_re  = '\d+[\.]';
 844  
 845          $markers_relist = array(
 846              $marker_ul_re => $marker_ol_re,
 847              $marker_ol_re => $marker_ul_re,
 848              );
 849  
 850          foreach ($markers_relist as $marker_re => $other_marker_re) {
 851              # Re-usable pattern to match any entirel ul or ol list:
 852              $whole_list_re = '
 853                  (                                # $1 = whole list
 854                    (                                # $2
 855                      ([ ]{0,'.$less_than_tab.'})    # $3 = number of spaces
 856                      ('.$marker_re.')            # $4 = first list item marker
 857                      [ ]+
 858                    )
 859                    (?s:.+?)
 860                    (                                # $5
 861                        \z
 862                      |
 863                        \n{2,}
 864                        (?=\S)
 865                        (?!                        # Negative lookahead for another list item marker
 866                          [ ]*
 867                          '.$marker_re.'[ ]+
 868                        )
 869                      |
 870                        (?=                        # Lookahead for another kind of list
 871                          \n
 872                          \3                        # Must have the same indentation
 873                          '.$other_marker_re.'[ ]+
 874                        )
 875                    )
 876                  )
 877              '; // mx
 878              
 879              # We use a different prefix before nested lists than top-level lists.
 880              # See extended comment in _ProcessListItems().
 881          
 882              if ($this->list_level) {
 883                  $text = preg_replace_callback('{
 884                          ^
 885                          '.$whole_list_re.'
 886                      }mx',
 887                      array($this, '_doLists_callback'), $text);
 888              }
 889              else {
 890                  $text = preg_replace_callback('{
 891                          (?:(?<=\n)\n|\A\n?) # Must eat the newline
 892                          '.$whole_list_re.'
 893                      }mx',
 894                      array($this, '_doLists_callback'), $text);
 895              }
 896          }
 897  
 898          return $text;
 899      }
 900  	protected function _doLists_callback($matches) {
 901          # Re-usable patterns to match list item bullets and number markers:
 902          $marker_ul_re  = '[*+-]';
 903          $marker_ol_re  = '\d+[\.]';
 904          $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
 905          $marker_ol_start_re = '[0-9]+';
 906  
 907          $list = $matches[1];
 908          $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
 909  
 910          $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
 911  
 912          $list .= "\n";
 913          $result = $this->processListItems($list, $marker_any_re);
 914  
 915          $ol_start = 1;
 916          if ($this->enhanced_ordered_list) {
 917              # Get the start number for ordered list.
 918              if ($list_type == 'ol') {
 919                  $ol_start_array = array();
 920                  $ol_start_check = preg_match("/$marker_ol_start_re/", $matches[4], $ol_start_array);
 921                  if ($ol_start_check){
 922                      $ol_start = $ol_start_array[0];
 923                  }
 924              }
 925          }
 926  
 927          if ($ol_start > 1 && $list_type == 'ol'){
 928              $result = $this->hashBlock("<$list_type start=\"$ol_start\">\n" . $result . "</$list_type>");
 929          } else {
 930              $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
 931          }
 932          return "\n". $result ."\n\n";
 933      }
 934  
 935      protected $list_level = 0;
 936  
 937  	protected function processListItems($list_str, $marker_any_re) {
 938      #
 939      #    Process the contents of a single ordered or unordered list, splitting it
 940      #    into individual list items.
 941      #
 942          # The $this->list_level global keeps track of when we're inside a list.
 943          # Each time we enter a list, we increment it; when we leave a list,
 944          # we decrement. If it's zero, we're not in a list anymore.
 945          #
 946          # We do this because when we're not inside a list, we want to treat
 947          # something like this:
 948          #
 949          #        I recommend upgrading to version
 950          #        8. Oops, now this line is treated
 951          #        as a sub-list.
 952          #
 953          # As a single paragraph, despite the fact that the second line starts
 954          # with a digit-period-space sequence.
 955          #
 956          # Whereas when we're inside a list (or sub-list), that line will be
 957          # treated as the start of a sub-list. What a kludge, huh? This is
 958          # an aspect of Markdown's syntax that's hard to parse perfectly
 959          # without resorting to mind-reading. Perhaps the solution is to
 960          # change the syntax rules such that sub-lists must start with a
 961          # starting cardinal number; e.g. "1." or "a.".
 962          
 963          $this->list_level++;
 964  
 965          # trim trailing blank lines:
 966          $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
 967  
 968          $list_str = preg_replace_callback('{
 969              (\n)?                            # leading line = $1
 970              (^[ ]*)                            # leading whitespace = $2
 971              ('.$marker_any_re.'                # list marker and space = $3
 972                  (?:[ ]+|(?=\n))    # space only required if item is not empty
 973              )
 974              ((?s:.*?))                        # list item text   = $4
 975              (?:(\n+(?=\n))|\n)                # tailing blank line = $5
 976              (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
 977              }xm',
 978              array($this, '_processListItems_callback'), $list_str);
 979  
 980          $this->list_level--;
 981          return $list_str;
 982      }
 983  	protected function _processListItems_callback($matches) {
 984          $item = $matches[4];
 985          $leading_line =& $matches[1];
 986          $leading_space =& $matches[2];
 987          $marker_space = $matches[3];
 988          $tailing_blank_line =& $matches[5];
 989  
 990          if ($leading_line || $tailing_blank_line || 
 991              preg_match('/\n{2,}/', $item))
 992          {
 993              # Replace marker with the appropriate whitespace indentation
 994              $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
 995              $item = $this->runBlockGamut($this->outdent($item)."\n");
 996          }
 997          else {
 998              # Recursion for sub-lists:
 999              $item = $this->doLists($this->outdent($item));
1000              $item = preg_replace('/\n+$/', '', $item);
1001              $item = $this->runSpanGamut($item);
1002          }
1003  
1004          return "<li>" . $item . "</li>\n";
1005      }
1006  
1007  
1008  	protected function doCodeBlocks($text) {
1009      #
1010      #    Process Markdown `<pre><code>` blocks.
1011      #
1012          $text = preg_replace_callback('{
1013                  (?:\n\n|\A\n?)
1014                  (                # $1 = the code block -- one or more lines, starting with a space/tab
1015                    (?>
1016                      [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
1017                      .*\n+
1018                    )+
1019                  )
1020                  ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z)    # Lookahead for non-space at line-start, or end of doc
1021              }xm',
1022              array($this, '_doCodeBlocks_callback'), $text);
1023  
1024          return $text;
1025      }
1026  	protected function _doCodeBlocks_callback($matches) {
1027          $codeblock = $matches[1];
1028  
1029          $codeblock = $this->outdent($codeblock);
1030          if ($this->code_block_content_func) {
1031              $codeblock = call_user_func($this->code_block_content_func, $codeblock, "");
1032          } else {
1033              $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1034          }
1035  
1036          # trim leading newlines and trailing newlines
1037          $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1038  
1039          $codeblock = "<pre><code>$codeblock\n</code></pre>";
1040          return "\n\n".$this->hashBlock($codeblock)."\n\n";
1041      }
1042  
1043  
1044  	protected function makeCodeSpan($code) {
1045      #
1046      # Create a code span markup for $code. Called from handleSpanToken.
1047      #
1048          $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1049          return $this->hashPart("<code>$code</code>");
1050      }
1051  
1052  
1053      protected $em_relist = array(
1054          ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)',
1055          '*' => '(?<![\s*])\*(?!\*)',
1056          '_' => '(?<![\s_])_(?!_)',
1057          );
1058      protected $strong_relist = array(
1059          ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)',
1060          '**' => '(?<![\s*])\*\*(?!\*)',
1061          '__' => '(?<![\s_])__(?!_)',
1062          );
1063      protected $em_strong_relist = array(
1064          ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)',
1065          '***' => '(?<![\s*])\*\*\*(?!\*)',
1066          '___' => '(?<![\s_])___(?!_)',
1067          );
1068      protected $em_strong_prepared_relist;
1069      
1070  	protected function prepareItalicsAndBold() {
1071      #
1072      # Prepare regular expressions for searching emphasis tokens in any
1073      # context.
1074      #
1075          foreach ($this->em_relist as $em => $em_re) {
1076              foreach ($this->strong_relist as $strong => $strong_re) {
1077                  # Construct list of allowed token expressions.
1078                  $token_relist = array();
1079                  if (isset($this->em_strong_relist["$em$strong"])) {
1080                      $token_relist[] = $this->em_strong_relist["$em$strong"];
1081                  }
1082                  $token_relist[] = $em_re;
1083                  $token_relist[] = $strong_re;
1084                  
1085                  # Construct master expression from list.
1086                  $token_re = '{('. implode('|', $token_relist) .')}';
1087                  $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1088              }
1089          }
1090      }
1091      
1092  	protected function doItalicsAndBold($text) {
1093          $token_stack = array('');
1094          $text_stack = array('');
1095          $em = '';
1096          $strong = '';
1097          $tree_char_em = false;
1098          
1099          while (1) {
1100              #
1101              # Get prepared regular expression for seraching emphasis tokens
1102              # in current context.
1103              #
1104              $token_re = $this->em_strong_prepared_relist["$em$strong"];
1105              
1106              #
1107              # Each loop iteration search for the next emphasis token. 
1108              # Each token is then passed to handleSpanToken.
1109              #
1110              $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1111              $text_stack[0] .= $parts[0];
1112              $token =& $parts[1];
1113              $text =& $parts[2];
1114              
1115              if (empty($token)) {
1116                  # Reached end of text span: empty stack without emitting.
1117                  # any more emphasis.
1118                  while ($token_stack[0]) {
1119                      $text_stack[1] .= array_shift($token_stack);
1120                      $text_stack[0] .= array_shift($text_stack);
1121                  }
1122                  break;
1123              }
1124              
1125              $token_len = strlen($token);
1126              if ($tree_char_em) {
1127                  # Reached closing marker while inside a three-char emphasis.
1128                  if ($token_len == 3) {
1129                      # Three-char closing marker, close em and strong.
1130                      array_shift($token_stack);
1131                      $span = array_shift($text_stack);
1132                      $span = $this->runSpanGamut($span);
1133                      $span = "<strong><em>$span</em></strong>";
1134                      $text_stack[0] .= $this->hashPart($span);
1135                      $em = '';
1136                      $strong = '';
1137                  } else {
1138                      # Other closing marker: close one em or strong and
1139                      # change current token state to match the other
1140                      $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1141                      $tag = $token_len == 2 ? "strong" : "em";
1142                      $span = $text_stack[0];
1143                      $span = $this->runSpanGamut($span);
1144                      $span = "<$tag>$span</$tag>";
1145                      $text_stack[0] = $this->hashPart($span);
1146                      $$tag = ''; # $$tag stands for $em or $strong
1147                  }
1148                  $tree_char_em = false;
1149              } else if ($token_len == 3) {
1150                  if ($em) {
1151                      # Reached closing marker for both em and strong.
1152                      # Closing strong marker:
1153                      for ($i = 0; $i < 2; ++$i) {
1154                          $shifted_token = array_shift($token_stack);
1155                          $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1156                          $span = array_shift($text_stack);
1157                          $span = $this->runSpanGamut($span);
1158                          $span = "<$tag>$span</$tag>";
1159                          $text_stack[0] .= $this->hashPart($span);
1160                          $$tag = ''; # $$tag stands for $em or $strong
1161                      }
1162                  } else {
1163                      # Reached opening three-char emphasis marker. Push on token 
1164                      # stack; will be handled by the special condition above.
1165                      $em = $token{0};
1166                      $strong = "$em$em";
1167                      array_unshift($token_stack, $token);
1168                      array_unshift($text_stack, '');
1169                      $tree_char_em = true;
1170                  }
1171              } else if ($token_len == 2) {
1172                  if ($strong) {
1173                      # Unwind any dangling emphasis marker:
1174                      if (strlen($token_stack[0]) == 1) {
1175                          $text_stack[1] .= array_shift($token_stack);
1176                          $text_stack[0] .= array_shift($text_stack);
1177                      }
1178                      # Closing strong marker:
1179                      array_shift($token_stack);
1180                      $span = array_shift($text_stack);
1181                      $span = $this->runSpanGamut($span);
1182                      $span = "<strong>$span</strong>";
1183                      $text_stack[0] .= $this->hashPart($span);
1184                      $strong = '';
1185                  } else {
1186                      array_unshift($token_stack, $token);
1187                      array_unshift($text_stack, '');
1188                      $strong = $token;
1189                  }
1190              } else {
1191                  # Here $token_len == 1
1192                  if ($em) {
1193                      if (strlen($token_stack[0]) == 1) {
1194                          # Closing emphasis marker:
1195                          array_shift($token_stack);
1196                          $span = array_shift($text_stack);
1197                          $span = $this->runSpanGamut($span);
1198                          $span = "<em>$span</em>";
1199                          $text_stack[0] .= $this->hashPart($span);
1200                          $em = '';
1201                      } else {
1202                          $text_stack[0] .= $token;
1203                      }
1204                  } else {
1205                      array_unshift($token_stack, $token);
1206                      array_unshift($text_stack, '');
1207                      $em = $token;
1208                  }
1209              }
1210          }
1211          return $text_stack[0];
1212      }
1213  
1214  
1215  	protected function doBlockQuotes($text) {
1216          $text = preg_replace_callback('/
1217                (                                # Wrap whole match in $1
1218                  (?>
1219                    ^[ ]*>[ ]?            # ">" at the start of a line
1220                      .+\n                    # rest of the first line
1221                    (.+\n)*                    # subsequent consecutive lines
1222                    \n*                        # blanks
1223                  )+
1224                )
1225              /xm',
1226              array($this, '_doBlockQuotes_callback'), $text);
1227  
1228          return $text;
1229      }
1230  	protected function _doBlockQuotes_callback($matches) {
1231          $bq = $matches[1];
1232          # trim one level of quoting - trim whitespace-only lines
1233          $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1234          $bq = $this->runBlockGamut($bq);        # recurse
1235  
1236          $bq = preg_replace('/^/m', "  ", $bq);
1237          # These leading spaces cause problem with <pre> content, 
1238          # so we need to fix that:
1239          $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 
1240              array($this, '_doBlockQuotes_callback2'), $bq);
1241  
1242          return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1243      }
1244  	protected function _doBlockQuotes_callback2($matches) {
1245          $pre = $matches[1];
1246          $pre = preg_replace('/^  /m', '', $pre);
1247          return $pre;
1248      }
1249  
1250  
1251  	protected function formParagraphs($text) {
1252      #
1253      #    Params:
1254      #        $text - string to process with html <p> tags
1255      #
1256          # Strip leading and trailing lines:
1257          $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1258  
1259          $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1260  
1261          #
1262          # Wrap <p> tags and unhashify HTML blocks
1263          #
1264          foreach ($grafs as $key => $value) {
1265              if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1266                  # Is a paragraph.
1267                  $value = $this->runSpanGamut($value);
1268                  $value = preg_replace('/^([ ]*)/', "<p>", $value);
1269                  $value .= "</p>";
1270                  $grafs[$key] = $this->unhash($value);
1271              }
1272              else {
1273                  # Is a block.
1274                  # Modify elements of @grafs in-place...
1275                  $graf = $value;
1276                  $block = $this->html_hashes[$graf];
1277                  $graf = $block;
1278  //                if (preg_match('{
1279  //                    \A
1280  //                    (                            # $1 = <div> tag
1281  //                      <div  \s+
1282  //                      [^>]*
1283  //                      \b
1284  //                      markdown\s*=\s*  ([\'"])    #    $2 = attr quote char
1285  //                      1
1286  //                      \2
1287  //                      [^>]*
1288  //                      >
1289  //                    )
1290  //                    (                            # $3 = contents
1291  //                    .*
1292  //                    )
1293  //                    (</div>)                    # $4 = closing tag
1294  //                    \z
1295  //                    }xs', $block, $matches))
1296  //                {
1297  //                    list(, $div_open, , $div_content, $div_close) = $matches;
1298  //
1299  //                    # We can't call Markdown(), because that resets the hash;
1300  //                    # that initialization code should be pulled into its own sub, though.
1301  //                    $div_content = $this->hashHTMLBlocks($div_content);
1302  //                    
1303  //                    # Run document gamut methods on the content.
1304  //                    foreach ($this->document_gamut as $method => $priority) {
1305  //                        $div_content = $this->$method($div_content);
1306  //                    }
1307  //
1308  //                    $div_open = preg_replace(
1309  //                        '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1310  //
1311  //                    $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1312  //                }
1313                  $grafs[$key] = $graf;
1314              }
1315          }
1316  
1317          return implode("\n\n", $grafs);
1318      }
1319  
1320  
1321  	protected function encodeAttribute($text) {
1322      #
1323      # Encode text for a double-quoted HTML attribute. This function
1324      # is *not* suitable for attributes enclosed in single quotes.
1325      #
1326          $text = $this->encodeAmpsAndAngles($text);
1327          $text = str_replace('"', '&quot;', $text);
1328          return $text;
1329      }
1330  
1331  
1332  	protected function encodeURLAttribute($url, &$text = null) {
1333      #
1334      # Encode text for a double-quoted HTML attribute containing a URL,
1335      # applying the URL filter if set. Also generates the textual
1336      # representation for the URL (removing mailto: or tel:) storing it in $text.
1337      # This function is *not* suitable for attributes enclosed in single quotes.
1338      #
1339          if ($this->url_filter_func)
1340              $url = call_user_func($this->url_filter_func, $url);
1341  
1342          if (preg_match('{^mailto:}i', $url))
1343              $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
1344          else if (preg_match('{^tel:}i', $url))
1345          {
1346              $url = $this->encodeAttribute($url);
1347              $text = substr($url, 4);
1348          }
1349          else
1350          {
1351              $url = $this->encodeAttribute($url);
1352              $text = $url;
1353          }
1354  
1355          return $url;
1356      }
1357      
1358      
1359  	protected function encodeAmpsAndAngles($text) {
1360      #
1361      # Smart processing for ampersands and angle brackets that need to 
1362      # be encoded. Valid character entities are left alone unless the
1363      # no-entities mode is set.
1364      #
1365          if ($this->no_entities) {
1366              $text = str_replace('&', '&amp;', $text);
1367          } else {
1368              # Ampersand-encoding based entirely on Nat Irons's Amputator
1369              # MT plugin: <http://bumppo.net/projects/amputator/>
1370              $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 
1371                                  '&amp;', $text);
1372          }
1373          # Encode remaining <'s
1374          $text = str_replace('<', '&lt;', $text);
1375  
1376          return $text;
1377      }
1378  
1379  
1380  	protected function doAutoLinks($text) {
1381          $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i',
1382              array($this, '_doAutoLinks_url_callback'), $text);
1383  
1384          # Email addresses: <address@domain.foo>
1385          $text = preg_replace_callback('{
1386              <
1387              (?:mailto:)?
1388              (
1389                  (?:
1390                      [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1391                  |
1392                      ".*?"
1393                  )
1394                  \@
1395                  (?:
1396                      [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1397                  |
1398                      \[[\d.a-fA-F:]+\]    # IPv4 & IPv6
1399                  )
1400              )
1401              >
1402              }xi',
1403              array($this, '_doAutoLinks_email_callback'), $text);
1404  
1405          return $text;
1406      }
1407  	protected function _doAutoLinks_url_callback($matches) {
1408          $url = $this->encodeURLAttribute($matches[1], $text);
1409          $link = "<a href=\"$url\">$text</a>";
1410          return $this->hashPart($link);
1411      }
1412  	protected function _doAutoLinks_email_callback($matches) {
1413          $addr = $matches[1];
1414          $url = $this->encodeURLAttribute("mailto:$addr", $text);
1415          $link = "<a href=\"$url\">$text</a>";
1416          return $this->hashPart($link);
1417      }
1418  
1419  
1420  	protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) {
1421      #
1422      #    Input: some text to obfuscate, e.g. "mailto:foo@example.com"
1423      #
1424      #    Output: the same text but with most characters encoded as either a
1425      #        decimal or hex entity, in the hopes of foiling most address
1426      #        harvesting spam bots. E.g.:
1427      #
1428      #        &#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1429      #        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1430      #        &#x6d;
1431      #
1432      #    Note: the additional output $tail is assigned the same value as the
1433      #    ouput, minus the number of characters specified by $head_length.
1434      #
1435      #    Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1436      #   With some optimizations by Milian Wolff. Forced encoding of HTML
1437      #    attribute special characters by Allan Odgaard.
1438      #
1439          if ($text == "") return $tail = "";
1440  
1441          $chars = preg_split('/(?<!^)(?!$)/', $text);
1442          $seed = (int)abs(crc32($text) / strlen($text)); # Deterministic seed.
1443  
1444          foreach ($chars as $key => $char) {
1445              $ord = ord($char);
1446              # Ignore non-ascii chars.
1447              if ($ord < 128) {
1448                  $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1449                  # roughly 10% raw, 45% hex, 45% dec
1450                  # '@' *must* be encoded. I insist.
1451                  # '"' and '>' have to be encoded inside the attribute
1452                  if ($r > 90 && strpos('@"&>', $char) === false) /* do nothing */;
1453                  else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1454                  else              $chars[$key] = '&#'.$ord.';';
1455              }
1456          }
1457  
1458          $text = implode('', $chars);
1459          $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text;
1460  
1461          return $text;
1462      }
1463  
1464  
1465  	protected function parseSpan($str) {
1466      #
1467      # Take the string $str and parse it into tokens, hashing embeded HTML,
1468      # escaped characters and handling code spans.
1469      #
1470          $output = '';
1471          
1472          $span_re = '{
1473                  (
1474                      \\\\'.$this->escape_chars_re.'
1475                  |
1476                      (?<![`\\\\])
1477                      `+                        # code span marker
1478              '.( $this->no_markup ? '' : '
1479                  |
1480                      <!--    .*?     -->        # comment
1481                  |
1482                      <\?.*?\?> | <%.*?%>        # processing instruction
1483                  |
1484                      <[!$]?[-a-zA-Z0-9:_]+    # regular tags
1485                      (?>
1486                          \s
1487                          (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1488                      )?
1489                      >
1490                  |
1491                      <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
1492                  |
1493                      </[-a-zA-Z0-9:_]+\s*> # closing tag
1494              ').'
1495                  )
1496                  }xs';
1497  
1498          while (1) {
1499              #
1500              # Each loop iteration seach for either the next tag, the next 
1501              # openning code span marker, or the next escaped character. 
1502              # Each token is then passed to handleSpanToken.
1503              #
1504              $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1505              
1506              # Create token from text preceding tag.
1507              if ($parts[0] != "") {
1508                  $output .= $parts[0];
1509              }
1510              
1511              # Check if we reach the end.
1512              if (isset($parts[1])) {
1513                  $output .= $this->handleSpanToken($parts[1], $parts[2]);
1514                  $str = $parts[2];
1515              }
1516              else {
1517                  break;
1518              }
1519          }
1520          
1521          return $output;
1522      }
1523      
1524      
1525  	protected function handleSpanToken($token, &$str) {
1526      #
1527      # Handle $token provided by parseSpan by determining its nature and 
1528      # returning the corresponding value that should replace it.
1529      #
1530          switch ($token{0}) {
1531              case "\\":
1532                  return $this->hashPart("&#". ord($token{1}). ";");
1533              case "`":
1534                  # Search for end marker in remaining text.
1535                  if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 
1536                      $str, $matches))
1537                  {
1538                      $str = $matches[2];
1539                      $codespan = $this->makeCodeSpan($matches[1]);
1540                      return $this->hashPart($codespan);
1541                  }
1542                  return $token; // return as text since no ending marker found.
1543              default:
1544                  return $this->hashPart($token);
1545          }
1546      }
1547  
1548  
1549  	protected function outdent($text) {
1550      #
1551      # Remove one level of line-leading tabs or spaces
1552      #
1553          return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1554      }
1555  
1556  
1557      # String length function for detab. `_initDetab` will create a function to 
1558      # hanlde UTF-8 if the default function does not exist.
1559      protected $utf8_strlen = 'mb_strlen';
1560      
1561  	protected function detab($text) {
1562      #
1563      # Replace tabs with the appropriate amount of space.
1564      #
1565          # For each line we separate the line in blocks delemited by
1566          # tab characters. Then we reconstruct every line by adding the 
1567          # appropriate number of space between each blocks.
1568          
1569          $text = preg_replace_callback('/^.*\t.*$/m',
1570              array($this, '_detab_callback'), $text);
1571  
1572          return $text;
1573      }
1574  	protected function _detab_callback($matches) {
1575          $line = $matches[0];
1576          $strlen = $this->utf8_strlen; # strlen function for UTF-8.
1577          
1578          # Split in blocks.
1579          $blocks = explode("\t", $line);
1580          # Add each blocks to the line.
1581          $line = $blocks[0];
1582          unset($blocks[0]); # Do not add first block twice.
1583          foreach ($blocks as $block) {
1584              # Calculate amount of space, insert spaces, insert block.
1585              $amount = $this->tab_width - 
1586                  $strlen($line, 'UTF-8') % $this->tab_width;
1587              $line .= str_repeat(" ", $amount) . $block;
1588          }
1589          return $line;
1590      }
1591  	protected function _initDetab() {
1592      #
1593      # Check for the availability of the function in the `utf8_strlen` property
1594      # (initially `mb_strlen`). If the function is not available, create a 
1595      # function that will loosely count the number of UTF-8 characters with a
1596      # regular expression.
1597      #
1598          if (function_exists($this->utf8_strlen)) return;
1599          $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1600              "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 
1601              $text, $m);');
1602      }
1603  
1604  
1605  	protected function unhash($text) {
1606      #
1607      # Swap back in all the tags hashed by _HashHTMLBlocks.
1608      #
1609          return preg_replace_callback('/(.)\x1A[0-9]+\1/', 
1610              array($this, '_unhash_callback'), $text);
1611      }
1612  	protected function _unhash_callback($matches) {
1613          return $this->html_hashes[$matches[0]];
1614      }
1615  
1616  }
PHP Cross Reference of Unnamed Project

/lib/markdown/ -> Markdown.php (source)