PHPXRef 0.7.1 : Unnamed Project : /lib/markdown/MarkdownExtra.php source

[Summary view] [Print] [Text view]
   1  <?php
   2  #
   3  # Markdown Extra  -  A text-to-HTML conversion tool for web writers
   4  #
   5  # PHP Markdown Extra
   6  # Copyright (c) 2004-2015 Michel Fortin  
   7  # <https://michelf.ca/projects/php-markdown/>
   8  #
   9  # Original Markdown
  10  # Copyright (c) 2004-2006 John Gruber  
  11  # <https://daringfireball.net/projects/markdown/>
  12  #
  13  namespace Michelf;
  14  
  15  
  16  #
  17  # Markdown Extra Parser Class
  18  #
  19  
  20  class MarkdownExtra extends \Michelf\Markdown {
  21  
  22      ### Configuration Variables ###
  23  
  24      # Prefix for footnote ids.
  25      public $fn_id_prefix = "";
  26      
  27      # Optional title attribute for footnote links and backlinks.
  28      public $fn_link_title = "";
  29      public $fn_backlink_title = "";
  30      
  31      # Optional class attribute for footnote links and backlinks.
  32      public $fn_link_class = "footnote-ref";
  33      public $fn_backlink_class = "footnote-backref";
  34  
  35      # Content to be displayed within footnote backlinks. The default is '↩';
  36      # the U+FE0E on the end is a Unicode variant selector used to prevent iOS
  37      # from displaying the arrow character as an emoji.
  38      public $fn_backlink_html = '&#8617;&#xFE0E;';
  39  
  40      # Class name for table cell alignment (%% replaced left/center/right)
  41      # For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
  42      # If empty, the align attribute is used instead of a class name.
  43      public $table_align_class_tmpl = '';
  44  
  45      # Optional class prefix for fenced code block.
  46      public $code_class_prefix = "";
  47      # Class attribute for code blocks goes on the `code` tag;
  48      # setting this to true will put attributes on the `pre` tag instead.
  49      public $code_attr_on_pre = false;
  50  
  51      # Predefined abbreviations.
  52      public $predef_abbr = array();
  53  
  54      ### Parser Implementation ###
  55  
  56  	public function __construct() {
  57      #
  58      # Constructor function. Initialize the parser object.
  59      #
  60          # Add extra escapable characters before parent constructor 
  61          # initialize the table.
  62          $this->escape_chars .= ':|';
  63          
  64          # Insert extra document, block, and span transformations. 
  65          # Parent constructor will do the sorting.
  66          $this->document_gamut += array(
  67              "doFencedCodeBlocks" => 5,
  68              "stripFootnotes"     => 15,
  69              "stripAbbreviations" => 25,
  70              "appendFootnotes"    => 50,
  71              );
  72          $this->block_gamut += array(
  73              "doFencedCodeBlocks" => 5,
  74              "doTables"           => 15,
  75              "doDefLists"         => 45,
  76              );
  77          $this->span_gamut += array(
  78              "doFootnotes"        => 5,
  79              "doAbbreviations"    => 70,
  80              );
  81          
  82          $this->enhanced_ordered_list = true;
  83          parent::__construct();
  84      }
  85      
  86      
  87      # Extra variables used during extra transformations.
  88      protected $footnotes = array();
  89      protected $footnotes_ordered = array();
  90      protected $footnotes_ref_count = array();
  91      protected $footnotes_numbers = array();
  92      protected $abbr_desciptions = array();
  93      protected $abbr_word_re = '';
  94      
  95      # Give the current footnote number.
  96      protected $footnote_counter = 1;
  97      
  98      
  99  	protected function setup() {
 100      #
 101      # Setting up Extra-specific variables.
 102      #
 103          parent::setup();
 104          
 105          $this->footnotes = array();
 106          $this->footnotes_ordered = array();
 107          $this->footnotes_ref_count = array();
 108          $this->footnotes_numbers = array();
 109          $this->abbr_desciptions = array();
 110          $this->abbr_word_re = '';
 111          $this->footnote_counter = 1;
 112          
 113          foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
 114              if ($this->abbr_word_re)
 115                  $this->abbr_word_re .= '|';
 116              $this->abbr_word_re .= preg_quote($abbr_word);
 117              $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
 118          }
 119      }
 120      
 121  	protected function teardown() {
 122      #
 123      # Clearing Extra-specific variables.
 124      #
 125          $this->footnotes = array();
 126          $this->footnotes_ordered = array();
 127          $this->footnotes_ref_count = array();
 128          $this->footnotes_numbers = array();
 129          $this->abbr_desciptions = array();
 130          $this->abbr_word_re = '';
 131          
 132          parent::teardown();
 133      }
 134      
 135      
 136      ### Extra Attribute Parser ###
 137  
 138      # Expression to use to catch attributes (includes the braces)
 139      protected $id_class_attr_catch_re = '\{((?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}';
 140      # Expression to use when parsing in a context when no capture is desired
 141      protected $id_class_attr_nocatch_re = '\{(?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}';
 142  
 143  	protected function doExtraAttributes($tag_name, $attr, $defaultIdValue = null, $classes = array()) {
 144      #
 145      # Parse attributes caught by the $this->id_class_attr_catch_re expression
 146      # and return the HTML-formatted list of attributes.
 147      #
 148      # Currently supported attributes are .class and #id.
 149      #
 150      # In addition, this method also supports supplying a default Id value,
 151      # which will be used to populate the id attribute in case it was not
 152      # overridden.
 153          if (empty($attr) && !$defaultIdValue && empty($classes)) return "";
 154          
 155          # Split on components
 156          preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches);
 157          $elements = $matches[0];
 158  
 159          # handle classes and ids (only first id taken into account)
 160          $attributes = array();
 161          $id = false;
 162          foreach ($elements as $element) {
 163              if ($element{0} == '.') {
 164                  $classes[] = substr($element, 1);
 165              } else if ($element{0} == '#') {
 166                  if ($id === false) $id = substr($element, 1);
 167              } else if (strpos($element, '=') > 0) {
 168                  $parts = explode('=', $element, 2);
 169                  $attributes[] = $parts[0] . '="' . $parts[1] . '"';
 170              }
 171          }
 172  
 173          if (!$id) $id = $defaultIdValue;
 174  
 175          # compose attributes as string
 176          $attr_str = "";
 177          if (!empty($id)) {
 178              $attr_str .= ' id="'.$this->encodeAttribute($id) .'"';
 179          }
 180          if (!empty($classes)) {
 181              $attr_str .= ' class="'. implode(" ", $classes) . '"';
 182          }
 183          if (!$this->no_markup && !empty($attributes)) {
 184              $attr_str .= ' '.implode(" ", $attributes);
 185          }
 186          return $attr_str;
 187      }
 188  
 189  
 190  	protected function stripLinkDefinitions($text) {
 191      #
 192      # Strips link definitions from text, stores the URLs and titles in
 193      # hash references.
 194      #
 195          $less_than_tab = $this->tab_width - 1;
 196  
 197          # Link defs are in the form: ^[id]: url "optional title"
 198          $text = preg_replace_callback('{
 199                              ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?:    # id = $1
 200                                [ ]*
 201                                \n?                # maybe *one* newline
 202                                [ ]*
 203                              (?:
 204                                <(.+?)>            # url = $2
 205                              |
 206                                (\S+?)            # url = $3
 207                              )
 208                                [ ]*
 209                                \n?                # maybe one newline
 210                                [ ]*
 211                              (?:
 212                                  (?<=\s)            # lookbehind for whitespace
 213                                  ["(]
 214                                  (.*?)            # title = $4
 215                                  [")]
 216                                  [ ]*
 217                              )?    # title is optional
 218                      (?:[ ]* '.$this->id_class_attr_catch_re.' )?  # $5 = extra id & class attr
 219                              (?:\n+|\Z)
 220              }xm',
 221              array($this, '_stripLinkDefinitions_callback'),
 222              $text);
 223          return $text;
 224      }
 225  	protected function _stripLinkDefinitions_callback($matches) {
 226          $link_id = strtolower($matches[1]);
 227          $url = $matches[2] == '' ? $matches[3] : $matches[2];
 228          $this->urls[$link_id] = $url;
 229          $this->titles[$link_id] =& $matches[4];
 230          $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
 231          return ''; # String that will replace the block
 232      }
 233  
 234  
 235      ### HTML Block Parser ###
 236      
 237      # Tags that are always treated as block tags:
 238      protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure';
 239                             
 240      # Tags treated as block tags only if the opening tag is alone on its line:
 241      protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
 242      
 243      # Tags where markdown="1" default to span mode:
 244      protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
 245      
 246      # Tags which must not have their contents modified, no matter where 
 247      # they appear:
 248      protected $clean_tags_re = 'script|style|math|svg';
 249      
 250      # Tags that do not need to be closed.
 251      protected $auto_close_tags_re = 'hr|img|param|source|track';
 252      
 253  
 254  	protected function hashHTMLBlocks($text) {
 255      #
 256      # Hashify HTML Blocks and "clean tags".
 257      #
 258      # We only want to do this for block-level HTML tags, such as headers,
 259      # lists, and tables. That's because we still want to wrap <p>s around
 260      # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 261      # phrase emphasis, and spans. The list of tags we're looking for is
 262      # hard-coded.
 263      #
 264      # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
 265      # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 
 266      # attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
 267      #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
 268      # These two functions are calling each other. It's recursive!
 269      #
 270          if ($this->no_markup)  return $text;
 271  
 272          #
 273          # Call the HTML-in-Markdown hasher.
 274          #
 275          list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
 276          
 277          return $text;
 278      }
 279  	protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
 280                                          $enclosing_tag_re = '', $span = false)
 281      {
 282      #
 283      # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
 284      #
 285      # *   $indent is the number of space to be ignored when checking for code 
 286      #     blocks. This is important because if we don't take the indent into 
 287      #     account, something like this (which looks right) won't work as expected:
 288      #
 289      #     <div>
 290      #         <div markdown="1">
 291      #         Hello World.  <-- Is this a Markdown code block or text?
 292      #         </div>  <-- Is this a Markdown code block or a real tag?
 293      #     <div>
 294      #
 295      #     If you don't like this, just don't indent the tag on which
 296      #     you apply the markdown="1" attribute.
 297      #
 298      # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing 
 299      #     tag with that name. Nested tags supported.
 300      #
 301      # *   If $span is true, text inside must treated as span. So any double 
 302      #     newline will be replaced by a single newline so that it does not create 
 303      #     paragraphs.
 304      #
 305      # Returns an array of that form: ( processed text , remaining text )
 306      #
 307          if ($text === '') return array('', '');
 308  
 309          # Regex to check for the presense of newlines around a block tag.
 310          $newline_before_re = '/(?:^\n?|\n\n)*$/';
 311          $newline_after_re = 
 312              '{
 313                  ^                        # Start of text following the tag.
 314                  (?>[ ]*<!--.*?-->)?        # Optional comment.
 315                  [ ]*\n                    # Must be followed by newline.
 316              }xs';
 317          
 318          # Regex to match any tag.
 319          $block_tag_re =
 320              '{
 321                  (                    # $2: Capture whole tag.
 322                      </?                    # Any opening or closing tag.
 323                          (?>                # Tag name.
 324                              '.$this->block_tags_re.'            |
 325                              '.$this->context_block_tags_re.'    |
 326                              '.$this->clean_tags_re.'            |
 327                              (?!\s)'.$enclosing_tag_re.'
 328                          )
 329                          (?:
 330                              (?=[\s"\'/a-zA-Z0-9])    # Allowed characters after tag name.
 331                              (?>
 332                                  ".*?"        |    # Double quotes (can contain `>`)
 333                                  \'.*?\'       |    # Single quotes (can contain `>`)
 334                                  .+?                # Anything but quotes and `>`.
 335                              )*?
 336                          )?
 337                      >                    # End of tag.
 338                  |
 339                      <!--    .*?     -->    # HTML Comment
 340                  |
 341                      <\?.*?\?> | <%.*?%>    # Processing instruction
 342                  |
 343                      <!\[CDATA\[.*?\]\]>    # CData Block
 344                  '. ( !$span ? ' # If not in span.
 345                  |
 346                      # Indented code block
 347                      (?: ^[ ]*\n | ^ | \n[ ]*\n )
 348                      [ ]{'.($indent+4).'}[^\n]* \n
 349                      (?>
 350                          (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
 351                      )*
 352                  |
 353                      # Fenced code block marker
 354                      (?<= ^ | \n )
 355                      [ ]{0,'.($indent+3).'}(?:~{3,}|`{3,})
 356                      [ ]*
 357                      (?: \.?[-_:a-zA-Z0-9]+ )? # standalone class name
 358                      [ ]*
 359                      (?: '.$this->id_class_attr_nocatch_re.' )? # extra attributes
 360                      [ ]*
 361                      (?= \n )
 362                  ' : '' ). ' # End (if not is span).
 363                  |
 364                      # Code span marker
 365                      # Note, this regex needs to go after backtick fenced
 366                      # code blocks but it should also be kept outside of the
 367                      # "if not in span" condition adding backticks to the parser
 368                      `+
 369                  )
 370              }xs';
 371  
 372          
 373          $depth = 0;        # Current depth inside the tag tree.
 374          $parsed = "";    # Parsed text that will be returned.
 375  
 376          #
 377          # Loop through every tag until we find the closing tag of the parent
 378          # or loop until reaching the end of text if no parent tag specified.
 379          #
 380          do {
 381              #
 382              # Split the text using the first $tag_match pattern found.
 383              # Text before  pattern will be first in the array, text after
 384              # pattern will be at the end, and between will be any catches made 
 385              # by the pattern.
 386              #
 387              $parts = preg_split($block_tag_re, $text, 2, 
 388                                  PREG_SPLIT_DELIM_CAPTURE);
 389              
 390              # If in Markdown span mode, add a empty-string span-level hash 
 391              # after each newline to prevent triggering any block element.
 392              if ($span) {
 393                  $void = $this->hashPart("", ':');
 394                  $newline = "$void\n";
 395                  $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
 396              }
 397              
 398              $parsed .= $parts[0]; # Text before current tag.
 399              
 400              # If end of $text has been reached. Stop loop.
 401              if (count($parts) < 3) {
 402                  $text = "";
 403                  break;
 404              }
 405              
 406              $tag  = $parts[1]; # Tag to handle.
 407              $text = $parts[2]; # Remaining text after current tag.
 408              $tag_re = preg_quote($tag); # For use in a regular expression.
 409              
 410              #
 411              # Check for: Fenced code block marker.
 412              # Note: need to recheck the whole tag to disambiguate backtick
 413              # fences from code spans
 414              #
 415              if (preg_match('{^\n?([ ]{0,'.($indent+3).'})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+)?[ ]*(?:'.$this->id_class_attr_nocatch_re.')?[ ]*\n?$}', $tag, $capture)) {
 416                  # Fenced code block marker: find matching end marker.
 417                  $fence_indent = strlen($capture[1]); # use captured indent in re
 418                  $fence_re = $capture[2]; # use captured fence in re
 419                  if (preg_match('{^(?>.*\n)*?[ ]{'.($fence_indent).'}'.$fence_re.'[ ]*(?:\n|$)}', $text,
 420                      $matches)) 
 421                  {
 422                      # End marker found: pass text unchanged until marker.
 423                      $parsed .= $tag . $matches[0];
 424                      $text = substr($text, strlen($matches[0]));
 425                  }
 426                  else {
 427                      # No end marker: just skip it.
 428                      $parsed .= $tag;
 429                  }
 430              }
 431              #
 432              # Check for: Indented code block.
 433              #
 434              else if ($tag{0} == "\n" || $tag{0} == " ") {
 435                  # Indented code block: pass it unchanged, will be handled 
 436                  # later.
 437                  $parsed .= $tag;
 438              }
 439              #
 440              # Check for: Code span marker
 441              # Note: need to check this after backtick fenced code blocks
 442              #
 443              else if ($tag{0} == "`") {
 444                  # Find corresponding end marker.
 445                  $tag_re = preg_quote($tag);
 446                  if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
 447                      $text, $matches))
 448                  {
 449                      # End marker found: pass text unchanged until marker.
 450                      $parsed .= $tag . $matches[0];
 451                      $text = substr($text, strlen($matches[0]));
 452                  }
 453                  else {
 454                      # Unmatched marker: just skip it.
 455                      $parsed .= $tag;
 456                  }
 457              }
 458              #
 459              # Check for: Opening Block level tag or
 460              #            Opening Context Block tag (like ins and del) 
 461              #               used as a block tag (tag is alone on it's line).
 462              #
 463              else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
 464                  (    preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
 465                      preg_match($newline_before_re, $parsed) &&
 466                      preg_match($newline_after_re, $text)    )
 467                  )
 468              {
 469                  # Need to parse tag and following text using the HTML parser.
 470                  list($block_text, $text) = 
 471                      $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
 472                  
 473                  # Make sure it stays outside of any paragraph by adding newlines.
 474                  $parsed .= "\n\n$block_text\n\n";
 475              }
 476              #
 477              # Check for: Clean tag (like script, math)
 478              #            HTML Comments, processing instructions.
 479              #
 480              else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
 481                  $tag{1} == '!' || $tag{1} == '?')
 482              {
 483                  # Need to parse tag and following text using the HTML parser.
 484                  # (don't check for markdown attribute)
 485                  list($block_text, $text) = 
 486                      $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
 487                  
 488                  $parsed .= $block_text;
 489              }
 490              #
 491              # Check for: Tag with same name as enclosing tag.
 492              #
 493              else if ($enclosing_tag_re !== '' &&
 494                  # Same name as enclosing tag.
 495                  preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
 496              {
 497                  #
 498                  # Increase/decrease nested tag count.
 499                  #
 500                  if ($tag{1} == '/')                        $depth--;
 501                  else if ($tag{strlen($tag)-2} != '/')    $depth++;
 502  
 503                  if ($depth < 0) {
 504                      #
 505                      # Going out of parent element. Clean up and break so we
 506                      # return to the calling function.
 507                      #
 508                      $text = $tag . $text;
 509                      break;
 510                  }
 511                  
 512                  $parsed .= $tag;
 513              }
 514              else {
 515                  $parsed .= $tag;
 516              }
 517          } while ($depth >= 0);
 518          
 519          return array($parsed, $text);
 520      }
 521  	protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
 522      #
 523      # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
 524      #
 525      # *   Calls $hash_method to convert any blocks.
 526      # *   Stops when the first opening tag closes.
 527      # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
 528      #     (it is not inside clean tags)
 529      #
 530      # Returns an array of that form: ( processed text , remaining text )
 531      #
 532          if ($text === '') return array('', '');
 533          
 534          # Regex to match `markdown` attribute inside of a tag.
 535          $markdown_attr_re = '
 536              {
 537                  \s*            # Eat whitespace before the `markdown` attribute
 538                  markdown
 539                  \s*=\s*
 540                  (?>
 541                      (["\'])        # $1: quote delimiter        
 542                      (.*?)        # $2: attribute value
 543                      \1            # matching delimiter    
 544                  |
 545                      ([^\s>]*)    # $3: unquoted attribute value
 546                  )
 547                  ()                # $4: make $3 always defined (avoid warnings)
 548              }xs';
 549          
 550          # Regex to match any tag.
 551          $tag_re = '{
 552                  (                    # $2: Capture whole tag.
 553                      </?                    # Any opening or closing tag.
 554                          [\w:$]+            # Tag name.
 555                          (?:
 556                              (?=[\s"\'/a-zA-Z0-9])    # Allowed characters after tag name.
 557                              (?>
 558                                  ".*?"        |    # Double quotes (can contain `>`)
 559                                  \'.*?\'       |    # Single quotes (can contain `>`)
 560                                  .+?                # Anything but quotes and `>`.
 561                              )*?
 562                          )?
 563                      >                    # End of tag.
 564                  |
 565                      <!--    .*?     -->    # HTML Comment
 566                  |
 567                      <\?.*?\?> | <%.*?%>    # Processing instruction
 568                  |
 569                      <!\[CDATA\[.*?\]\]>    # CData Block
 570                  )
 571              }xs';
 572          
 573          $original_text = $text;        # Save original text in case of faliure.
 574          
 575          $depth        = 0;    # Current depth inside the tag tree.
 576          $block_text    = "";    # Temporary text holder for current text.
 577          $parsed        = "";    # Parsed text that will be returned.
 578  
 579          #
 580          # Get the name of the starting tag.
 581          # (This pattern makes $base_tag_name_re safe without quoting.)
 582          #
 583          if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
 584              $base_tag_name_re = $matches[1];
 585  
 586          #
 587          # Loop through every tag until we find the corresponding closing tag.
 588          #
 589          do {
 590              #
 591              # Split the text using the first $tag_match pattern found.
 592              # Text before  pattern will be first in the array, text after
 593              # pattern will be at the end, and between will be any catches made 
 594              # by the pattern.
 595              #
 596              $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
 597              
 598              if (count($parts) < 3) {
 599                  #
 600                  # End of $text reached with unbalenced tag(s).
 601                  # In that case, we return original text unchanged and pass the
 602                  # first character as filtered to prevent an infinite loop in the 
 603                  # parent function.
 604                  #
 605                  return array($original_text{0}, substr($original_text, 1));
 606              }
 607              
 608              $block_text .= $parts[0]; # Text before current tag.
 609              $tag         = $parts[1]; # Tag to handle.
 610              $text        = $parts[2]; # Remaining text after current tag.
 611              
 612              #
 613              # Check for: Auto-close tag (like <hr/>)
 614              #             Comments and Processing Instructions.
 615              #
 616              if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
 617                  $tag{1} == '!' || $tag{1} == '?')
 618              {
 619                  # Just add the tag to the block as if it was text.
 620                  $block_text .= $tag;
 621              }
 622              else {
 623                  #
 624                  # Increase/decrease nested tag count. Only do so if
 625                  # the tag's name match base tag's.
 626                  #
 627                  if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
 628                      if ($tag{1} == '/')                        $depth--;
 629                      else if ($tag{strlen($tag)-2} != '/')    $depth++;
 630                  }
 631                  
 632                  #
 633                  # Check for `markdown="1"` attribute and handle it.
 634                  #
 635                  if ($md_attr && 
 636                      preg_match($markdown_attr_re, $tag, $attr_m) &&
 637                      preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
 638                  {
 639                      # Remove `markdown` attribute from opening tag.
 640                      $tag = preg_replace($markdown_attr_re, '', $tag);
 641                      
 642                      # Check if text inside this tag must be parsed in span mode.
 643                      $this->mode = $attr_m[2] . $attr_m[3];
 644                      $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
 645                          preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
 646                      
 647                      # Calculate indent before tag.
 648                      if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
 649                          $strlen = $this->utf8_strlen;
 650                          $indent = $strlen($matches[1], 'UTF-8');
 651                      } else {
 652                          $indent = 0;
 653                      }
 654                      
 655                      # End preceding block with this tag.
 656                      $block_text .= $tag;
 657                      $parsed .= $this->$hash_method($block_text);
 658                      
 659                      # Get enclosing tag name for the ParseMarkdown function.
 660                      # (This pattern makes $tag_name_re safe without quoting.)
 661                      preg_match('/^<([\w:$]*)\b/', $tag, $matches);
 662                      $tag_name_re = $matches[1];
 663                      
 664                      # Parse the content using the HTML-in-Markdown parser.
 665                      list ($block_text, $text)
 666                          = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 
 667                              $tag_name_re, $span_mode);
 668                      
 669                      # Outdent markdown text.
 670                      if ($indent > 0) {
 671                          $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 
 672                                                      $block_text);
 673                      }
 674                      
 675                      # Append tag content to parsed text.
 676                      if (!$span_mode)    $parsed .= "\n\n$block_text\n\n";
 677                      else                $parsed .= "$block_text";
 678                      
 679                      # Start over with a new block.
 680                      $block_text = "";
 681                  }
 682                  else $block_text .= $tag;
 683              }
 684              
 685          } while ($depth > 0);
 686          
 687          #
 688          # Hash last block text that wasn't processed inside the loop.
 689          #
 690          $parsed .= $this->$hash_method($block_text);
 691          
 692          return array($parsed, $text);
 693      }
 694  
 695  
 696  	protected function hashClean($text) {
 697      #
 698      # Called whenever a tag must be hashed when a function inserts a "clean" tag
 699      # in $text, it passes through this function and is automaticaly escaped, 
 700      # blocking invalid nested overlap.
 701      #
 702          return $this->hashPart($text, 'C');
 703      }
 704  
 705  
 706  	protected function doAnchors($text) {
 707      #
 708      # Turn Markdown link shortcuts into XHTML <a> tags.
 709      #
 710          if ($this->in_anchor) return $text;
 711          $this->in_anchor = true;
 712          
 713          #
 714          # First, handle reference-style links: [link text] [id]
 715          #
 716          $text = preg_replace_callback('{
 717              (                    # wrap whole match in $1
 718                \[
 719                  ('.$this->nested_brackets_re.')    # link text = $2
 720                \]
 721  
 722                [ ]?                # one optional space
 723                (?:\n[ ]*)?        # one optional newline followed by spaces
 724  
 725                \[
 726                  (.*?)        # id = $3
 727                \]
 728              )
 729              }xs',
 730              array($this, '_doAnchors_reference_callback'), $text);
 731  
 732          #
 733          # Next, inline-style links: [link text](url "optional title")
 734          #
 735          $text = preg_replace_callback('{
 736              (                # wrap whole match in $1
 737                \[
 738                  ('.$this->nested_brackets_re.')    # link text = $2
 739                \]
 740                \(            # literal paren
 741                  [ \n]*
 742                  (?:
 743                      <(.+?)>    # href = $3
 744                  |
 745                      ('.$this->nested_url_parenthesis_re.')    # href = $4
 746                  )
 747                  [ \n]*
 748                  (            # $5
 749                    ([\'"])    # quote char = $6
 750                    (.*?)        # Title = $7
 751                    \6        # matching quote
 752                    [ \n]*    # ignore any spaces/tabs between closing quote and )
 753                  )?            # title is optional
 754                \)
 755                (?:[ ]? '.$this->id_class_attr_catch_re.' )?     # $8 = id/class attributes
 756              )
 757              }xs',
 758              array($this, '_doAnchors_inline_callback'), $text);
 759  
 760          #
 761          # Last, handle reference-style shortcuts: [link text]
 762          # These must come last in case you've also got [link text][1]
 763          # or [link text](/foo)
 764          #
 765          $text = preg_replace_callback('{
 766              (                    # wrap whole match in $1
 767                \[
 768                  ([^\[\]]+)        # link text = $2; can\'t contain [ or ]
 769                \]
 770              )
 771              }xs',
 772              array($this, '_doAnchors_reference_callback'), $text);
 773  
 774          $this->in_anchor = false;
 775          return $text;
 776      }
 777  	protected function _doAnchors_reference_callback($matches) {
 778          $whole_match =  $matches[1];
 779          $link_text   =  $matches[2];
 780          $link_id     =& $matches[3];
 781  
 782          if ($link_id == "") {
 783              # for shortcut links like [this][] or [this].
 784              $link_id = $link_text;
 785          }
 786          
 787          # lower-case and turn embedded newlines into spaces
 788          $link_id = strtolower($link_id);
 789          $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 790  
 791          if (isset($this->urls[$link_id])) {
 792              $url = $this->urls[$link_id];
 793              $url = $this->encodeURLAttribute($url);
 794              
 795              $result = "<a href=\"$url\"";
 796              if ( isset( $this->titles[$link_id] ) ) {
 797                  $title = $this->titles[$link_id];
 798                  $title = $this->encodeAttribute($title);
 799                  $result .=  " title=\"$title\"";
 800              }
 801              if (isset($this->ref_attr[$link_id]))
 802                  $result .= $this->ref_attr[$link_id];
 803          
 804              $link_text = $this->runSpanGamut($link_text);
 805              $result .= ">$link_text</a>";
 806              $result = $this->hashPart($result);
 807          }
 808          else {
 809              $result = $whole_match;
 810          }
 811          return $result;
 812      }
 813  	protected function _doAnchors_inline_callback($matches) {
 814          $whole_match    =  $matches[1];
 815          $link_text        =  $this->runSpanGamut($matches[2]);
 816          $url            =  $matches[3] == '' ? $matches[4] : $matches[3];
 817          $title            =& $matches[7];
 818          $attr  = $this->doExtraAttributes("a", $dummy =& $matches[8]);
 819  
 820          // if the URL was of the form <s p a c e s> it got caught by the HTML
 821          // tag parser and hashed. Need to reverse the process before using the URL.
 822          $unhashed = $this->unhash($url);
 823          if ($unhashed != $url)
 824              $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
 825  
 826          $url = $this->encodeURLAttribute($url);
 827  
 828          $result = "<a href=\"$url\"";
 829          if (isset($title)) {
 830              $title = $this->encodeAttribute($title);
 831              $result .=  " title=\"$title\"";
 832          }
 833          $result .= $attr;
 834          
 835          $link_text = $this->runSpanGamut($link_text);
 836          $result .= ">$link_text</a>";
 837  
 838          return $this->hashPart($result);
 839      }
 840  
 841  
 842  	protected function doImages($text) {
 843      #
 844      # Turn Markdown image shortcuts into <img> tags.
 845      #
 846          #
 847          # First, handle reference-style labeled images: ![alt text][id]
 848          #
 849          $text = preg_replace_callback('{
 850              (                # wrap whole match in $1
 851                !\[
 852                  ('.$this->nested_brackets_re.')        # alt text = $2
 853                \]
 854  
 855                [ ]?                # one optional space
 856                (?:\n[ ]*)?        # one optional newline followed by spaces
 857  
 858                \[
 859                  (.*?)        # id = $3
 860                \]
 861  
 862              )
 863              }xs', 
 864              array($this, '_doImages_reference_callback'), $text);
 865  
 866          #
 867          # Next, handle inline images:  ![alt text](url "optional title")
 868          # Don't forget: encode * and _
 869          #
 870          $text = preg_replace_callback('{
 871              (                # wrap whole match in $1
 872                !\[
 873                  ('.$this->nested_brackets_re.')        # alt text = $2
 874                \]
 875                \s?            # One optional whitespace character
 876                \(            # literal paren
 877                  [ \n]*
 878                  (?:
 879                      <(\S*)>    # src url = $3
 880                  |
 881                      ('.$this->nested_url_parenthesis_re.')    # src url = $4
 882                  )
 883                  [ \n]*
 884                  (            # $5
 885                    ([\'"])    # quote char = $6
 886                    (.*?)        # title = $7
 887                    \6        # matching quote
 888                    [ \n]*
 889                  )?            # title is optional
 890                \)
 891                (?:[ ]? '.$this->id_class_attr_catch_re.' )?     # $8 = id/class attributes
 892              )
 893              }xs',
 894              array($this, '_doImages_inline_callback'), $text);
 895  
 896          return $text;
 897      }
 898  	protected function _doImages_reference_callback($matches) {
 899          $whole_match = $matches[1];
 900          $alt_text    = $matches[2];
 901          $link_id     = strtolower($matches[3]);
 902  
 903          if ($link_id == "") {
 904              $link_id = strtolower($alt_text); # for shortcut links like ![this][].
 905          }
 906  
 907          $alt_text = $this->encodeAttribute($alt_text);
 908          if (isset($this->urls[$link_id])) {
 909              $url = $this->encodeURLAttribute($this->urls[$link_id]);
 910              $result = "<img src=\"$url\" alt=\"$alt_text\"";
 911              if (isset($this->titles[$link_id])) {
 912                  $title = $this->titles[$link_id];
 913                  $title = $this->encodeAttribute($title);
 914                  $result .=  " title=\"$title\"";
 915              }
 916              if (isset($this->ref_attr[$link_id]))
 917                  $result .= $this->ref_attr[$link_id];
 918              $result .= $this->empty_element_suffix;
 919              $result = $this->hashPart($result);
 920          }
 921          else {
 922              # If there's no such link ID, leave intact:
 923              $result = $whole_match;
 924          }
 925  
 926          return $result;
 927      }
 928  	protected function _doImages_inline_callback($matches) {
 929          $whole_match    = $matches[1];
 930          $alt_text        = $matches[2];
 931          $url            = $matches[3] == '' ? $matches[4] : $matches[3];
 932          $title            =& $matches[7];
 933          $attr  = $this->doExtraAttributes("img", $dummy =& $matches[8]);
 934  
 935          $alt_text = $this->encodeAttribute($alt_text);
 936          $url = $this->encodeURLAttribute($url);
 937          $result = "<img src=\"$url\" alt=\"$alt_text\"";
 938          if (isset($title)) {
 939              $title = $this->encodeAttribute($title);
 940              $result .=  " title=\"$title\""; # $title already quoted
 941          }
 942          $result .= $attr;
 943          $result .= $this->empty_element_suffix;
 944  
 945          return $this->hashPart($result);
 946      }
 947  
 948  
 949  	protected function doHeaders($text) {
 950      #
 951      # Redefined to add id and class attribute support.
 952      #
 953          # Setext-style headers:
 954          #      Header 1  {#header1}
 955          #      ========
 956          #  
 957          #      Header 2  {#header2 .class1 .class2}
 958          #      --------
 959          #
 960          $text = preg_replace_callback(
 961              '{
 962                  (^.+?)                                # $1: Header text
 963                  (?:[ ]+ '.$this->id_class_attr_catch_re.' )?     # $3 = id/class attributes
 964                  [ ]*\n(=+|-+)[ ]*\n+                # $3: Header footer
 965              }mx',
 966              array($this, '_doHeaders_callback_setext'), $text);
 967  
 968          # atx-style headers:
 969          #    # Header 1        {#header1}
 970          #    ## Header 2       {#header2}
 971          #    ## Header 2 with closing hashes ##  {#header3.class1.class2}
 972          #    ...
 973          #    ###### Header 6   {.class2}
 974          #
 975          $text = preg_replace_callback('{
 976                  ^(\#{1,6})    # $1 = string of #\'s
 977                  [ ]*
 978                  (.+?)        # $2 = Header text
 979                  [ ]*
 980                  \#*            # optional closing #\'s (not counted)
 981                  (?:[ ]+ '.$this->id_class_attr_catch_re.' )?     # $3 = id/class attributes
 982                  [ ]*
 983                  \n+
 984              }xm',
 985              array($this, '_doHeaders_callback_atx'), $text);
 986  
 987          return $text;
 988      }
 989  	protected function _doHeaders_callback_setext($matches) {
 990          if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
 991              return $matches[0];
 992  
 993          $level = $matches[3]{0} == '=' ? 1 : 2;
 994  
 995          $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[1]) : null;
 996  
 997          $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[2], $defaultId);
 998          $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
 999          return "\n" . $this->hashBlock($block) . "\n\n";
1000      }
1001  	protected function _doHeaders_callback_atx($matches) {
1002          $level = strlen($matches[1]);
1003  
1004          $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[2]) : null;
1005          $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[3], $defaultId);
1006          $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
1007          return "\n" . $this->hashBlock($block) . "\n\n";
1008      }
1009  
1010  
1011  	protected function doTables($text) {
1012      #
1013      # Form HTML tables.
1014      #
1015          $less_than_tab = $this->tab_width - 1;
1016          #
1017          # Find tables with leading pipe.
1018          #
1019          #    | Header 1 | Header 2
1020          #    | -------- | --------
1021          #    | Cell 1   | Cell 2
1022          #    | Cell 3   | Cell 4
1023          #
1024          $text = preg_replace_callback('
1025              {
1026                  ^                            # Start of a line
1027                  [ ]{0,'.$less_than_tab.'}    # Allowed whitespace.
1028                  [|]                            # Optional leading pipe (present)
1029                  (.+) \n                        # $1: Header row (at least one pipe)
1030                  
1031                  [ ]{0,'.$less_than_tab.'}    # Allowed whitespace.
1032                  [|] ([ ]*[-:]+[-| :]*) \n    # $2: Header underline
1033                  
1034                  (                            # $3: Cells
1035                      (?>
1036                          [ ]*                # Allowed whitespace.
1037                          [|] .* \n            # Row content.
1038                      )*
1039                  )
1040                  (?=\n|\Z)                    # Stop at final double newline.
1041              }xm',
1042              array($this, '_doTable_leadingPipe_callback'), $text);
1043          
1044          #
1045          # Find tables without leading pipe.
1046          #
1047          #    Header 1 | Header 2
1048          #    -------- | --------
1049          #    Cell 1   | Cell 2
1050          #    Cell 3   | Cell 4
1051          #
1052          $text = preg_replace_callback('
1053              {
1054                  ^                            # Start of a line
1055                  [ ]{0,'.$less_than_tab.'}    # Allowed whitespace.
1056                  (\S.*[|].*) \n                # $1: Header row (at least one pipe)
1057                  
1058                  [ ]{0,'.$less_than_tab.'}    # Allowed whitespace.
1059                  ([-:]+[ ]*[|][-| :]*) \n    # $2: Header underline
1060                  
1061                  (                            # $3: Cells
1062                      (?>
1063                          .* [|] .* \n        # Row content
1064                      )*
1065                  )
1066                  (?=\n|\Z)                    # Stop at final double newline.
1067              }xm',
1068              array($this, '_DoTable_callback'), $text);
1069  
1070          return $text;
1071      }
1072  	protected function _doTable_leadingPipe_callback($matches) {
1073          $head        = $matches[1];
1074          $underline    = $matches[2];
1075          $content    = $matches[3];
1076          
1077          # Remove leading pipe for each row.
1078          $content    = preg_replace('/^ *[|]/m', '', $content);
1079          
1080          return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
1081      }
1082  	protected function _doTable_makeAlignAttr($alignname)
1083      {
1084          if (empty($this->table_align_class_tmpl))
1085              return " align=\"$alignname\"";
1086  
1087          $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl);
1088          return " class=\"$classname\"";
1089      }
1090  	protected function _doTable_callback($matches) {
1091          $head        = $matches[1];
1092          $underline    = $matches[2];
1093          $content    = $matches[3];
1094  
1095          # Remove any tailing pipes for each line.
1096          $head        = preg_replace('/[|] *$/m', '', $head);
1097          $underline    = preg_replace('/[|] *$/m', '', $underline);
1098          $content    = preg_replace('/[|] *$/m', '', $content);
1099          
1100          # Reading alignement from header underline.
1101          $separators    = preg_split('/ *[|] */', $underline);
1102          foreach ($separators as $n => $s) {
1103              if (preg_match('/^ *-+: *$/', $s))
1104                  $attr[$n] = $this->_doTable_makeAlignAttr('right');
1105              else if (preg_match('/^ *:-+: *$/', $s))
1106                  $attr[$n] = $this->_doTable_makeAlignAttr('center');
1107              else if (preg_match('/^ *:-+ *$/', $s))
1108                  $attr[$n] = $this->_doTable_makeAlignAttr('left');
1109              else
1110                  $attr[$n] = '';
1111          }
1112          
1113          # Parsing span elements, including code spans, character escapes, 
1114          # and inline HTML tags, so that pipes inside those gets ignored.
1115          $head        = $this->parseSpan($head);
1116          $headers    = preg_split('/ *[|] */', $head);
1117          $col_count    = count($headers);
1118          $attr       = array_pad($attr, $col_count, '');
1119          
1120          # Write column headers.
1121          $text = "<table>\n";
1122          $text .= "<thead>\n";
1123          $text .= "<tr>\n";
1124          foreach ($headers as $n => $header)
1125              $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
1126          $text .= "</tr>\n";
1127          $text .= "</thead>\n";
1128          
1129          # Split content by row.
1130          $rows = explode("\n", trim($content, "\n"));
1131          
1132          $text .= "<tbody>\n";
1133          foreach ($rows as $row) {
1134              # Parsing span elements, including code spans, character escapes, 
1135              # and inline HTML tags, so that pipes inside those gets ignored.
1136              $row = $this->parseSpan($row);
1137              
1138              # Split row by cell.
1139              $row_cells = preg_split('/ *[|] */', $row, $col_count);
1140              $row_cells = array_pad($row_cells, $col_count, '');
1141              
1142              $text .= "<tr>\n";
1143              foreach ($row_cells as $n => $cell)
1144                  $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
1145              $text .= "</tr>\n";
1146          }
1147          $text .= "</tbody>\n";
1148          $text .= "</table>";
1149          
1150          return $this->hashBlock($text) . "\n";
1151      }
1152  
1153      
1154  	protected function doDefLists($text) {
1155      #
1156      # Form HTML definition lists.
1157      #
1158          $less_than_tab = $this->tab_width - 1;
1159  
1160          # Re-usable pattern to match any entire dl list:
1161          $whole_list_re = '(?>
1162              (                                # $1 = whole list
1163                (                                # $2
1164                  [ ]{0,'.$less_than_tab.'}
1165                  ((?>.*\S.*\n)+)                # $3 = defined term
1166                  \n?
1167                  [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1168                )
1169                (?s:.+?)
1170                (                                # $4
1171                    \z
1172                  |
1173                    \n{2,}
1174                    (?=\S)
1175                    (?!                        # Negative lookahead for another term
1176                      [ ]{0,'.$less_than_tab.'}
1177                      (?: \S.*\n )+?            # defined term
1178                      \n?
1179                      [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1180                    )
1181                    (?!                        # Negative lookahead for another definition
1182                      [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
1183                    )
1184                )
1185              )
1186          )'; // mx
1187  
1188          $text = preg_replace_callback('{
1189                  (?>\A\n?|(?<=\n\n))
1190                  '.$whole_list_re.'
1191              }mx',
1192              array($this, '_doDefLists_callback'), $text);
1193  
1194          return $text;
1195      }
1196  	protected function _doDefLists_callback($matches) {
1197          # Re-usable patterns to match list item bullets and number markers:
1198          $list = $matches[1];
1199          
1200          # Turn double returns into triple returns, so that we can make a
1201          # paragraph for the last item in a list, if necessary:
1202          $result = trim($this->processDefListItems($list));
1203          $result = "<dl>\n" . $result . "\n</dl>";
1204          return $this->hashBlock($result) . "\n\n";
1205      }
1206  
1207  
1208  	protected function processDefListItems($list_str) {
1209      #
1210      #    Process the contents of a single definition list, splitting it
1211      #    into individual term and definition list items.
1212      #
1213          $less_than_tab = $this->tab_width - 1;
1214          
1215          # trim trailing blank lines:
1216          $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1217  
1218          # Process definition terms.
1219          $list_str = preg_replace_callback('{
1220              (?>\A\n?|\n\n+)                    # leading line
1221              (                                # definition terms = $1
1222                  [ ]{0,'.$less_than_tab.'}    # leading whitespace
1223                  (?!\:[ ]|[ ])                # negative lookahead for a definition
1224                                              #   mark (colon) or more whitespace.
1225                  (?> \S.* \n)+?                # actual term (not whitespace).    
1226              )            
1227              (?=\n?[ ]{0,3}:[ ])                # lookahead for following line feed 
1228                                              #   with a definition mark.
1229              }xm',
1230              array($this, '_processDefListItems_callback_dt'), $list_str);
1231  
1232          # Process actual definitions.
1233          $list_str = preg_replace_callback('{
1234              \n(\n+)?                        # leading line = $1
1235              (                                # marker space = $2
1236                  [ ]{0,'.$less_than_tab.'}    # whitespace before colon
1237                  \:[ ]+                        # definition mark (colon)
1238              )
1239              ((?s:.+?))                        # definition text = $3
1240              (?= \n+                         # stop at next definition mark,
1241                  (?:                            # next term or end of text
1242                      [ ]{0,'.$less_than_tab.'} \:[ ]    |
1243                      <dt> | \z
1244                  )                        
1245              )                    
1246              }xm',
1247              array($this, '_processDefListItems_callback_dd'), $list_str);
1248  
1249          return $list_str;
1250      }
1251  	protected function _processDefListItems_callback_dt($matches) {
1252          $terms = explode("\n", trim($matches[1]));
1253          $text = '';
1254          foreach ($terms as $term) {
1255              $term = $this->runSpanGamut(trim($term));
1256              $text .= "\n<dt>" . $term . "</dt>";
1257          }
1258          return $text . "\n";
1259      }
1260  	protected function _processDefListItems_callback_dd($matches) {
1261          $leading_line    = $matches[1];
1262          $marker_space    = $matches[2];
1263          $def            = $matches[3];
1264  
1265          if ($leading_line || preg_match('/\n{2,}/', $def)) {
1266              # Replace marker with the appropriate whitespace indentation
1267              $def = str_repeat(' ', strlen($marker_space)) . $def;
1268              $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
1269              $def = "\n". $def ."\n";
1270          }
1271          else {
1272              $def = rtrim($def);
1273              $def = $this->runSpanGamut($this->outdent($def));
1274          }
1275  
1276          return "\n<dd>" . $def . "</dd>\n";
1277      }
1278  
1279  
1280  	protected function doFencedCodeBlocks($text) {
1281      #
1282      # Adding the fenced code block syntax to regular Markdown:
1283      #
1284      # ~~~
1285      # Code block
1286      # ~~~
1287      #
1288          $less_than_tab = $this->tab_width;
1289          
1290          $text = preg_replace_callback('{
1291                  (?:\n|\A)
1292                  # 1: Opening marker
1293                  (
1294                      (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
1295                  )
1296                  [ ]*
1297                  (?:
1298                      \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
1299                  )?
1300                  [ ]*
1301                  (?:
1302                      '.$this->id_class_attr_catch_re.' # 3: Extra attributes
1303                  )?
1304                  [ ]* \n # Whitespace and newline following marker.
1305                  
1306                  # 4: Content
1307                  (
1308                      (?>
1309                          (?!\1 [ ]* \n)    # Not a closing marker.
1310                          .*\n+
1311                      )+
1312                  )
1313                  
1314                  # Closing marker.
1315                  \1 [ ]* (?= \n )
1316              }xm',
1317              array($this, '_doFencedCodeBlocks_callback'), $text);
1318  
1319          return $text;
1320      }
1321  	protected function _doFencedCodeBlocks_callback($matches) {
1322          $classname =& $matches[2];
1323          $attrs     =& $matches[3];
1324          $codeblock = $matches[4];
1325  
1326          if ($this->code_block_content_func) {
1327              $codeblock = call_user_func($this->code_block_content_func, $codeblock, $classname);
1328          } else {
1329              $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1330          }
1331  
1332          $codeblock = preg_replace_callback('/^\n+/',
1333              array($this, '_doFencedCodeBlocks_newlines'), $codeblock);
1334  
1335          $classes = array();
1336          if ($classname != "") {
1337              if ($classname{0} == '.')
1338                  $classname = substr($classname, 1);
1339              $classes[] = $this->code_class_prefix.$classname;
1340          }
1341          $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs, null, $classes);
1342          $pre_attr_str  = $this->code_attr_on_pre ? $attr_str : '';
1343          $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str;
1344          $codeblock  = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
1345          
1346          return "\n\n".$this->hashBlock($codeblock)."\n\n";
1347      }
1348  	protected function _doFencedCodeBlocks_newlines($matches) {
1349          return str_repeat("<br$this->empty_element_suffix", 
1350              strlen($matches[0]));
1351      }
1352  
1353  
1354      #
1355      # Redefining emphasis markers so that emphasis by underscore does not
1356      # work in the middle of a word.
1357      #
1358      protected $em_relist = array(
1359          ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)',
1360          '*' => '(?<![\s*])\*(?!\*)',
1361          '_' => '(?<![\s_])_(?![a-zA-Z0-9_])',
1362          );
1363      protected $strong_relist = array(
1364          ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)',
1365          '**' => '(?<![\s*])\*\*(?!\*)',
1366          '__' => '(?<![\s_])__(?![a-zA-Z0-9_])',
1367          );
1368      protected $em_strong_relist = array(
1369          ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)',
1370          '***' => '(?<![\s*])\*\*\*(?!\*)',
1371          '___' => '(?<![\s_])___(?![a-zA-Z0-9_])',
1372          );
1373  
1374  
1375  	protected function formParagraphs($text) {
1376      #
1377      #    Params:
1378      #        $text - string to process with html <p> tags
1379      #
1380          # Strip leading and trailing lines:
1381          $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1382          
1383          $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1384  
1385          #
1386          # Wrap <p> tags and unhashify HTML blocks
1387          #
1388          foreach ($grafs as $key => $value) {
1389              $value = trim($this->runSpanGamut($value));
1390              
1391              # Check if this should be enclosed in a paragraph.
1392              # Clean tag hashes & block tag hashes are left alone.
1393              $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
1394              
1395              if ($is_p) {
1396                  $value = "<p>$value</p>";
1397              }
1398              $grafs[$key] = $value;
1399          }
1400          
1401          # Join grafs in one text, then unhash HTML tags. 
1402          $text = implode("\n\n", $grafs);
1403          
1404          # Finish by removing any tag hashes still present in $text.
1405          $text = $this->unhash($text);
1406          
1407          return $text;
1408      }
1409      
1410      
1411      ### Footnotes
1412      
1413  	protected function stripFootnotes($text) {
1414      #
1415      # Strips link definitions from text, stores the URLs and titles in
1416      # hash references.
1417      #
1418          $less_than_tab = $this->tab_width - 1;
1419  
1420          # Link defs are in the form: [^id]: url "optional title"
1421          $text = preg_replace_callback('{
1422              ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:    # note_id = $1
1423                [ ]*
1424                \n?                    # maybe *one* newline
1425              (                        # text = $2 (no blank lines allowed)
1426                  (?:                    
1427                      .+                # actual text
1428                  |
1429                      \n                # newlines but 
1430                      (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker.
1431                      (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 
1432                                      # by non-indented content
1433                  )*
1434              )        
1435              }xm',
1436              array($this, '_stripFootnotes_callback'),
1437              $text);
1438          return $text;
1439      }
1440  	protected function _stripFootnotes_callback($matches) {
1441          $note_id = $this->fn_id_prefix . $matches[1];
1442          $this->footnotes[$note_id] = $this->outdent($matches[2]);
1443          return ''; # String that will replace the block
1444      }
1445  
1446  
1447  	protected function doFootnotes($text) {
1448      #
1449      # Replace footnote references in $text [^id] with a special text-token 
1450      # which will be replaced by the actual footnote marker in appendFootnotes.
1451      #
1452          if (!$this->in_anchor) {
1453              $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
1454          }
1455          return $text;
1456      }
1457  
1458      
1459  	protected function appendFootnotes($text) {
1460      #
1461      # Append footnote list to text.
1462      #
1463          $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
1464              array($this, '_appendFootnotes_callback'), $text);
1465      
1466          if (!empty($this->footnotes_ordered)) {
1467              $text .= "\n\n";
1468              $text .= "<div class=\"footnotes\">\n";
1469              $text .= "<hr". $this->empty_element_suffix ."\n";
1470              $text .= "<ol>\n\n";
1471  
1472              $attr = "";
1473              if ($this->fn_backlink_class != "") {
1474                  $class = $this->fn_backlink_class;
1475                  $class = $this->encodeAttribute($class);
1476                  $attr .= " class=\"$class\"";
1477              }
1478              if ($this->fn_backlink_title != "") {
1479                  $title = $this->fn_backlink_title;
1480                  $title = $this->encodeAttribute($title);
1481                  $attr .= " title=\"$title\"";
1482              }
1483              $backlink_text = $this->fn_backlink_html;
1484              $num = 0;
1485              
1486              while (!empty($this->footnotes_ordered)) {
1487                  $footnote = reset($this->footnotes_ordered);
1488                  $note_id = key($this->footnotes_ordered);
1489                  unset($this->footnotes_ordered[$note_id]);
1490                  $ref_count = $this->footnotes_ref_count[$note_id];
1491                  unset($this->footnotes_ref_count[$note_id]);
1492                  unset($this->footnotes[$note_id]);
1493                  
1494                  $footnote .= "\n"; # Need to append newline before parsing.
1495                  $footnote = $this->runBlockGamut("$footnote\n");                
1496                  $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
1497                      array($this, '_appendFootnotes_callback'), $footnote);
1498                  
1499                  $attr = str_replace("%%", ++$num, $attr);
1500                  $note_id = $this->encodeAttribute($note_id);
1501  
1502                  # Prepare backlink, multiple backlinks if multiple references
1503                  $backlink = "<a href=\"#fnref:$note_id\"$attr>$backlink_text</a>";
1504                  for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) {
1505                      $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>$backlink_text</a>";
1506                  }
1507                  # Add backlink to last paragraph; create new paragraph if needed.
1508                  if (preg_match('{</p>$}', $footnote)) {
1509                      $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
1510                  } else {
1511                      $footnote .= "\n\n<p>$backlink</p>";
1512                  }
1513                  
1514                  $text .= "<li id=\"fn:$note_id\">\n";
1515                  $text .= $footnote . "\n";
1516                  $text .= "</li>\n\n";
1517              }
1518              
1519              $text .= "</ol>\n";
1520              $text .= "</div>";
1521          }
1522          return $text;
1523      }
1524  	protected function _appendFootnotes_callback($matches) {
1525          $node_id = $this->fn_id_prefix . $matches[1];
1526          
1527          # Create footnote marker only if it has a corresponding footnote *and*
1528          # the footnote hasn't been used by another marker.
1529          if (isset($this->footnotes[$node_id])) {
1530              $num =& $this->footnotes_numbers[$node_id];
1531              if (!isset($num)) {
1532                  # Transfer footnote content to the ordered list and give it its
1533                  # number
1534                  $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
1535                  $this->footnotes_ref_count[$node_id] = 1;
1536                  $num = $this->footnote_counter++;
1537                  $ref_count_mark = '';
1538              } else {
1539                  $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1;
1540              }
1541  
1542              $attr = "";
1543              if ($this->fn_link_class != "") {
1544                  $class = $this->fn_link_class;
1545                  $class = $this->encodeAttribute($class);
1546                  $attr .= " class=\"$class\"";
1547              }
1548              if ($this->fn_link_title != "") {
1549                  $title = $this->fn_link_title;
1550                  $title = $this->encodeAttribute($title);
1551                  $attr .= " title=\"$title\"";
1552              }
1553              
1554              $attr = str_replace("%%", $num, $attr);
1555              $node_id = $this->encodeAttribute($node_id);
1556              
1557              return
1558                  "<sup id=\"fnref$ref_count_mark:$node_id\">".
1559                  "<a href=\"#fn:$node_id\"$attr>$num</a>".
1560                  "</sup>";
1561          }
1562          
1563          return "[^".$matches[1]."]";
1564      }
1565          
1566      
1567      ### Abbreviations ###
1568      
1569  	protected function stripAbbreviations($text) {
1570      #
1571      # Strips abbreviations from text, stores titles in hash references.
1572      #
1573          $less_than_tab = $this->tab_width - 1;
1574  
1575          # Link defs are in the form: [id]*: url "optional title"
1576          $text = preg_replace_callback('{
1577              ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:    # abbr_id = $1
1578              (.*)                    # text = $2 (no blank lines allowed)    
1579              }xm',
1580              array($this, '_stripAbbreviations_callback'),
1581              $text);
1582          return $text;
1583      }
1584  	protected function _stripAbbreviations_callback($matches) {
1585          $abbr_word = $matches[1];
1586          $abbr_desc = $matches[2];
1587          if ($this->abbr_word_re)
1588              $this->abbr_word_re .= '|';
1589          $this->abbr_word_re .= preg_quote($abbr_word);
1590          $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1591          return ''; # String that will replace the block
1592      }
1593      
1594      
1595  	protected function doAbbreviations($text) {
1596      #
1597      # Find defined abbreviations in text and wrap them in <abbr> elements.
1598      #
1599          if ($this->abbr_word_re) {
1600              // cannot use the /x modifier because abbr_word_re may 
1601              // contain significant spaces:
1602              $text = preg_replace_callback('{'.
1603                  '(?<![\w\x1A])'.
1604                  '(?:'.$this->abbr_word_re.')'.
1605                  '(?![\w\x1A])'.
1606                  '}', 
1607                  array($this, '_doAbbreviations_callback'), $text);
1608          }
1609          return $text;
1610      }
1611  	protected function _doAbbreviations_callback($matches) {
1612          $abbr = $matches[0];
1613          if (isset($this->abbr_desciptions[$abbr])) {
1614              $desc = $this->abbr_desciptions[$abbr];
1615              if (empty($desc)) {
1616                  return $this->hashPart("<abbr>$abbr</abbr>");
1617              } else {
1618                  $desc = $this->encodeAttribute($desc);
1619                  return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
1620              }
1621          } else {
1622              return $matches[0];
1623          }
1624      }
1625  }
PHP Cross Reference of Unnamed Project

/lib/markdown/ -> MarkdownExtra.php (source)