[ Index ] |
PHP Cross Reference of Unnamed Project |
[Summary view] [Print] [Text view]
1 <?php 2 # 3 # Markdown Extra - A text-to-HTML conversion tool for web writers 4 # 5 # PHP Markdown Extra 6 # Copyright (c) 2004-2015 Michel Fortin 7 # <https://michelf.ca/projects/php-markdown/> 8 # 9 # Original Markdown 10 # Copyright (c) 2004-2006 John Gruber 11 # <https://daringfireball.net/projects/markdown/> 12 # 13 namespace Michelf; 14 15 16 # 17 # Markdown Extra Parser Class 18 # 19 20 class MarkdownExtra extends \Michelf\Markdown { 21 22 ### Configuration Variables ### 23 24 # Prefix for footnote ids. 25 public $fn_id_prefix = ""; 26 27 # Optional title attribute for footnote links and backlinks. 28 public $fn_link_title = ""; 29 public $fn_backlink_title = ""; 30 31 # Optional class attribute for footnote links and backlinks. 32 public $fn_link_class = "footnote-ref"; 33 public $fn_backlink_class = "footnote-backref"; 34 35 # Content to be displayed within footnote backlinks. The default is '↩'; 36 # the U+FE0E on the end is a Unicode variant selector used to prevent iOS 37 # from displaying the arrow character as an emoji. 38 public $fn_backlink_html = '↩︎'; 39 40 # Class name for table cell alignment (%% replaced left/center/right) 41 # For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center' 42 # If empty, the align attribute is used instead of a class name. 43 public $table_align_class_tmpl = ''; 44 45 # Optional class prefix for fenced code block. 46 public $code_class_prefix = ""; 47 # Class attribute for code blocks goes on the `code` tag; 48 # setting this to true will put attributes on the `pre` tag instead. 49 public $code_attr_on_pre = false; 50 51 # Predefined abbreviations. 52 public $predef_abbr = array(); 53 54 ### Parser Implementation ### 55 56 public function __construct() { 57 # 58 # Constructor function. Initialize the parser object. 59 # 60 # Add extra escapable characters before parent constructor 61 # initialize the table. 62 $this->escape_chars .= ':|'; 63 64 # Insert extra document, block, and span transformations. 65 # Parent constructor will do the sorting. 66 $this->document_gamut += array( 67 "doFencedCodeBlocks" => 5, 68 "stripFootnotes" => 15, 69 "stripAbbreviations" => 25, 70 "appendFootnotes" => 50, 71 ); 72 $this->block_gamut += array( 73 "doFencedCodeBlocks" => 5, 74 "doTables" => 15, 75 "doDefLists" => 45, 76 ); 77 $this->span_gamut += array( 78 "doFootnotes" => 5, 79 "doAbbreviations" => 70, 80 ); 81 82 $this->enhanced_ordered_list = true; 83 parent::__construct(); 84 } 85 86 87 # Extra variables used during extra transformations. 88 protected $footnotes = array(); 89 protected $footnotes_ordered = array(); 90 protected $footnotes_ref_count = array(); 91 protected $footnotes_numbers = array(); 92 protected $abbr_desciptions = array(); 93 protected $abbr_word_re = ''; 94 95 # Give the current footnote number. 96 protected $footnote_counter = 1; 97 98 99 protected function setup() { 100 # 101 # Setting up Extra-specific variables. 102 # 103 parent::setup(); 104 105 $this->footnotes = array(); 106 $this->footnotes_ordered = array(); 107 $this->footnotes_ref_count = array(); 108 $this->footnotes_numbers = array(); 109 $this->abbr_desciptions = array(); 110 $this->abbr_word_re = ''; 111 $this->footnote_counter = 1; 112 113 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) { 114 if ($this->abbr_word_re) 115 $this->abbr_word_re .= '|'; 116 $this->abbr_word_re .= preg_quote($abbr_word); 117 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 118 } 119 } 120 121 protected function teardown() { 122 # 123 # Clearing Extra-specific variables. 124 # 125 $this->footnotes = array(); 126 $this->footnotes_ordered = array(); 127 $this->footnotes_ref_count = array(); 128 $this->footnotes_numbers = array(); 129 $this->abbr_desciptions = array(); 130 $this->abbr_word_re = ''; 131 132 parent::teardown(); 133 } 134 135 136 ### Extra Attribute Parser ### 137 138 # Expression to use to catch attributes (includes the braces) 139 protected $id_class_attr_catch_re = '\{((?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}'; 140 # Expression to use when parsing in a context when no capture is desired 141 protected $id_class_attr_nocatch_re = '\{(?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}'; 142 143 protected function doExtraAttributes($tag_name, $attr, $defaultIdValue = null, $classes = array()) { 144 # 145 # Parse attributes caught by the $this->id_class_attr_catch_re expression 146 # and return the HTML-formatted list of attributes. 147 # 148 # Currently supported attributes are .class and #id. 149 # 150 # In addition, this method also supports supplying a default Id value, 151 # which will be used to populate the id attribute in case it was not 152 # overridden. 153 if (empty($attr) && !$defaultIdValue && empty($classes)) return ""; 154 155 # Split on components 156 preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches); 157 $elements = $matches[0]; 158 159 # handle classes and ids (only first id taken into account) 160 $attributes = array(); 161 $id = false; 162 foreach ($elements as $element) { 163 if ($element{0} == '.') { 164 $classes[] = substr($element, 1); 165 } else if ($element{0} == '#') { 166 if ($id === false) $id = substr($element, 1); 167 } else if (strpos($element, '=') > 0) { 168 $parts = explode('=', $element, 2); 169 $attributes[] = $parts[0] . '="' . $parts[1] . '"'; 170 } 171 } 172 173 if (!$id) $id = $defaultIdValue; 174 175 # compose attributes as string 176 $attr_str = ""; 177 if (!empty($id)) { 178 $attr_str .= ' id="'.$this->encodeAttribute($id) .'"'; 179 } 180 if (!empty($classes)) { 181 $attr_str .= ' class="'. implode(" ", $classes) . '"'; 182 } 183 if (!$this->no_markup && !empty($attributes)) { 184 $attr_str .= ' '.implode(" ", $attributes); 185 } 186 return $attr_str; 187 } 188 189 190 protected function stripLinkDefinitions($text) { 191 # 192 # Strips link definitions from text, stores the URLs and titles in 193 # hash references. 194 # 195 $less_than_tab = $this->tab_width - 1; 196 197 # Link defs are in the form: ^[id]: url "optional title" 198 $text = preg_replace_callback('{ 199 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 200 [ ]* 201 \n? # maybe *one* newline 202 [ ]* 203 (?: 204 <(.+?)> # url = $2 205 | 206 (\S+?) # url = $3 207 ) 208 [ ]* 209 \n? # maybe one newline 210 [ ]* 211 (?: 212 (?<=\s) # lookbehind for whitespace 213 ["(] 214 (.*?) # title = $4 215 [")] 216 [ ]* 217 )? # title is optional 218 (?:[ ]* '.$this->id_class_attr_catch_re.' )? # $5 = extra id & class attr 219 (?:\n+|\Z) 220 }xm', 221 array($this, '_stripLinkDefinitions_callback'), 222 $text); 223 return $text; 224 } 225 protected function _stripLinkDefinitions_callback($matches) { 226 $link_id = strtolower($matches[1]); 227 $url = $matches[2] == '' ? $matches[3] : $matches[2]; 228 $this->urls[$link_id] = $url; 229 $this->titles[$link_id] =& $matches[4]; 230 $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]); 231 return ''; # String that will replace the block 232 } 233 234 235 ### HTML Block Parser ### 236 237 # Tags that are always treated as block tags: 238 protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure'; 239 240 # Tags treated as block tags only if the opening tag is alone on its line: 241 protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video'; 242 243 # Tags where markdown="1" default to span mode: 244 protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address'; 245 246 # Tags which must not have their contents modified, no matter where 247 # they appear: 248 protected $clean_tags_re = 'script|style|math|svg'; 249 250 # Tags that do not need to be closed. 251 protected $auto_close_tags_re = 'hr|img|param|source|track'; 252 253 254 protected function hashHTMLBlocks($text) { 255 # 256 # Hashify HTML Blocks and "clean tags". 257 # 258 # We only want to do this for block-level HTML tags, such as headers, 259 # lists, and tables. That's because we still want to wrap <p>s around 260 # "paragraphs" that are wrapped in non-block-level tags, such as anchors, 261 # phrase emphasis, and spans. The list of tags we're looking for is 262 # hard-coded. 263 # 264 # This works by calling _HashHTMLBlocks_InMarkdown, which then calls 265 # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 266 # attribute is found within a tag, _HashHTMLBlocks_InHTML calls back 267 # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag. 268 # These two functions are calling each other. It's recursive! 269 # 270 if ($this->no_markup) return $text; 271 272 # 273 # Call the HTML-in-Markdown hasher. 274 # 275 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text); 276 277 return $text; 278 } 279 protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0, 280 $enclosing_tag_re = '', $span = false) 281 { 282 # 283 # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. 284 # 285 # * $indent is the number of space to be ignored when checking for code 286 # blocks. This is important because if we don't take the indent into 287 # account, something like this (which looks right) won't work as expected: 288 # 289 # <div> 290 # <div markdown="1"> 291 # Hello World. <-- Is this a Markdown code block or text? 292 # </div> <-- Is this a Markdown code block or a real tag? 293 # <div> 294 # 295 # If you don't like this, just don't indent the tag on which 296 # you apply the markdown="1" attribute. 297 # 298 # * If $enclosing_tag_re is not empty, stops at the first unmatched closing 299 # tag with that name. Nested tags supported. 300 # 301 # * If $span is true, text inside must treated as span. So any double 302 # newline will be replaced by a single newline so that it does not create 303 # paragraphs. 304 # 305 # Returns an array of that form: ( processed text , remaining text ) 306 # 307 if ($text === '') return array('', ''); 308 309 # Regex to check for the presense of newlines around a block tag. 310 $newline_before_re = '/(?:^\n?|\n\n)*$/'; 311 $newline_after_re = 312 '{ 313 ^ # Start of text following the tag. 314 (?>[ ]*<!--.*?-->)? # Optional comment. 315 [ ]*\n # Must be followed by newline. 316 }xs'; 317 318 # Regex to match any tag. 319 $block_tag_re = 320 '{ 321 ( # $2: Capture whole tag. 322 </? # Any opening or closing tag. 323 (?> # Tag name. 324 '.$this->block_tags_re.' | 325 '.$this->context_block_tags_re.' | 326 '.$this->clean_tags_re.' | 327 (?!\s)'.$enclosing_tag_re.' 328 ) 329 (?: 330 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. 331 (?> 332 ".*?" | # Double quotes (can contain `>`) 333 \'.*?\' | # Single quotes (can contain `>`) 334 .+? # Anything but quotes and `>`. 335 )*? 336 )? 337 > # End of tag. 338 | 339 <!-- .*? --> # HTML Comment 340 | 341 <\?.*?\?> | <%.*?%> # Processing instruction 342 | 343 <!\[CDATA\[.*?\]\]> # CData Block 344 '. ( !$span ? ' # If not in span. 345 | 346 # Indented code block 347 (?: ^[ ]*\n | ^ | \n[ ]*\n ) 348 [ ]{'.($indent+4).'}[^\n]* \n 349 (?> 350 (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n 351 )* 352 | 353 # Fenced code block marker 354 (?<= ^ | \n ) 355 [ ]{0,'.($indent+3).'}(?:~{3,}|`{3,}) 356 [ ]* 357 (?: \.?[-_:a-zA-Z0-9]+ )? # standalone class name 358 [ ]* 359 (?: '.$this->id_class_attr_nocatch_re.' )? # extra attributes 360 [ ]* 361 (?= \n ) 362 ' : '' ). ' # End (if not is span). 363 | 364 # Code span marker 365 # Note, this regex needs to go after backtick fenced 366 # code blocks but it should also be kept outside of the 367 # "if not in span" condition adding backticks to the parser 368 `+ 369 ) 370 }xs'; 371 372 373 $depth = 0; # Current depth inside the tag tree. 374 $parsed = ""; # Parsed text that will be returned. 375 376 # 377 # Loop through every tag until we find the closing tag of the parent 378 # or loop until reaching the end of text if no parent tag specified. 379 # 380 do { 381 # 382 # Split the text using the first $tag_match pattern found. 383 # Text before pattern will be first in the array, text after 384 # pattern will be at the end, and between will be any catches made 385 # by the pattern. 386 # 387 $parts = preg_split($block_tag_re, $text, 2, 388 PREG_SPLIT_DELIM_CAPTURE); 389 390 # If in Markdown span mode, add a empty-string span-level hash 391 # after each newline to prevent triggering any block element. 392 if ($span) { 393 $void = $this->hashPart("", ':'); 394 $newline = "$void\n"; 395 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void; 396 } 397 398 $parsed .= $parts[0]; # Text before current tag. 399 400 # If end of $text has been reached. Stop loop. 401 if (count($parts) < 3) { 402 $text = ""; 403 break; 404 } 405 406 $tag = $parts[1]; # Tag to handle. 407 $text = $parts[2]; # Remaining text after current tag. 408 $tag_re = preg_quote($tag); # For use in a regular expression. 409 410 # 411 # Check for: Fenced code block marker. 412 # Note: need to recheck the whole tag to disambiguate backtick 413 # fences from code spans 414 # 415 if (preg_match('{^\n?([ ]{0,'.($indent+3).'})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+)?[ ]*(?:'.$this->id_class_attr_nocatch_re.')?[ ]*\n?$}', $tag, $capture)) { 416 # Fenced code block marker: find matching end marker. 417 $fence_indent = strlen($capture[1]); # use captured indent in re 418 $fence_re = $capture[2]; # use captured fence in re 419 if (preg_match('{^(?>.*\n)*?[ ]{'.($fence_indent).'}'.$fence_re.'[ ]*(?:\n|$)}', $text, 420 $matches)) 421 { 422 # End marker found: pass text unchanged until marker. 423 $parsed .= $tag . $matches[0]; 424 $text = substr($text, strlen($matches[0])); 425 } 426 else { 427 # No end marker: just skip it. 428 $parsed .= $tag; 429 } 430 } 431 # 432 # Check for: Indented code block. 433 # 434 else if ($tag{0} == "\n" || $tag{0} == " ") { 435 # Indented code block: pass it unchanged, will be handled 436 # later. 437 $parsed .= $tag; 438 } 439 # 440 # Check for: Code span marker 441 # Note: need to check this after backtick fenced code blocks 442 # 443 else if ($tag{0} == "`") { 444 # Find corresponding end marker. 445 $tag_re = preg_quote($tag); 446 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}', 447 $text, $matches)) 448 { 449 # End marker found: pass text unchanged until marker. 450 $parsed .= $tag . $matches[0]; 451 $text = substr($text, strlen($matches[0])); 452 } 453 else { 454 # Unmatched marker: just skip it. 455 $parsed .= $tag; 456 } 457 } 458 # 459 # Check for: Opening Block level tag or 460 # Opening Context Block tag (like ins and del) 461 # used as a block tag (tag is alone on it's line). 462 # 463 else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) || 464 ( preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) && 465 preg_match($newline_before_re, $parsed) && 466 preg_match($newline_after_re, $text) ) 467 ) 468 { 469 # Need to parse tag and following text using the HTML parser. 470 list($block_text, $text) = 471 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true); 472 473 # Make sure it stays outside of any paragraph by adding newlines. 474 $parsed .= "\n\n$block_text\n\n"; 475 } 476 # 477 # Check for: Clean tag (like script, math) 478 # HTML Comments, processing instructions. 479 # 480 else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) || 481 $tag{1} == '!' || $tag{1} == '?') 482 { 483 # Need to parse tag and following text using the HTML parser. 484 # (don't check for markdown attribute) 485 list($block_text, $text) = 486 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false); 487 488 $parsed .= $block_text; 489 } 490 # 491 # Check for: Tag with same name as enclosing tag. 492 # 493 else if ($enclosing_tag_re !== '' && 494 # Same name as enclosing tag. 495 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag)) 496 { 497 # 498 # Increase/decrease nested tag count. 499 # 500 if ($tag{1} == '/') $depth--; 501 else if ($tag{strlen($tag)-2} != '/') $depth++; 502 503 if ($depth < 0) { 504 # 505 # Going out of parent element. Clean up and break so we 506 # return to the calling function. 507 # 508 $text = $tag . $text; 509 break; 510 } 511 512 $parsed .= $tag; 513 } 514 else { 515 $parsed .= $tag; 516 } 517 } while ($depth >= 0); 518 519 return array($parsed, $text); 520 } 521 protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) { 522 # 523 # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. 524 # 525 # * Calls $hash_method to convert any blocks. 526 # * Stops when the first opening tag closes. 527 # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed. 528 # (it is not inside clean tags) 529 # 530 # Returns an array of that form: ( processed text , remaining text ) 531 # 532 if ($text === '') return array('', ''); 533 534 # Regex to match `markdown` attribute inside of a tag. 535 $markdown_attr_re = ' 536 { 537 \s* # Eat whitespace before the `markdown` attribute 538 markdown 539 \s*=\s* 540 (?> 541 (["\']) # $1: quote delimiter 542 (.*?) # $2: attribute value 543 \1 # matching delimiter 544 | 545 ([^\s>]*) # $3: unquoted attribute value 546 ) 547 () # $4: make $3 always defined (avoid warnings) 548 }xs'; 549 550 # Regex to match any tag. 551 $tag_re = '{ 552 ( # $2: Capture whole tag. 553 </? # Any opening or closing tag. 554 [\w:$]+ # Tag name. 555 (?: 556 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. 557 (?> 558 ".*?" | # Double quotes (can contain `>`) 559 \'.*?\' | # Single quotes (can contain `>`) 560 .+? # Anything but quotes and `>`. 561 )*? 562 )? 563 > # End of tag. 564 | 565 <!-- .*? --> # HTML Comment 566 | 567 <\?.*?\?> | <%.*?%> # Processing instruction 568 | 569 <!\[CDATA\[.*?\]\]> # CData Block 570 ) 571 }xs'; 572 573 $original_text = $text; # Save original text in case of faliure. 574 575 $depth = 0; # Current depth inside the tag tree. 576 $block_text = ""; # Temporary text holder for current text. 577 $parsed = ""; # Parsed text that will be returned. 578 579 # 580 # Get the name of the starting tag. 581 # (This pattern makes $base_tag_name_re safe without quoting.) 582 # 583 if (preg_match('/^<([\w:$]*)\b/', $text, $matches)) 584 $base_tag_name_re = $matches[1]; 585 586 # 587 # Loop through every tag until we find the corresponding closing tag. 588 # 589 do { 590 # 591 # Split the text using the first $tag_match pattern found. 592 # Text before pattern will be first in the array, text after 593 # pattern will be at the end, and between will be any catches made 594 # by the pattern. 595 # 596 $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 597 598 if (count($parts) < 3) { 599 # 600 # End of $text reached with unbalenced tag(s). 601 # In that case, we return original text unchanged and pass the 602 # first character as filtered to prevent an infinite loop in the 603 # parent function. 604 # 605 return array($original_text{0}, substr($original_text, 1)); 606 } 607 608 $block_text .= $parts[0]; # Text before current tag. 609 $tag = $parts[1]; # Tag to handle. 610 $text = $parts[2]; # Remaining text after current tag. 611 612 # 613 # Check for: Auto-close tag (like <hr/>) 614 # Comments and Processing Instructions. 615 # 616 if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) || 617 $tag{1} == '!' || $tag{1} == '?') 618 { 619 # Just add the tag to the block as if it was text. 620 $block_text .= $tag; 621 } 622 else { 623 # 624 # Increase/decrease nested tag count. Only do so if 625 # the tag's name match base tag's. 626 # 627 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) { 628 if ($tag{1} == '/') $depth--; 629 else if ($tag{strlen($tag)-2} != '/') $depth++; 630 } 631 632 # 633 # Check for `markdown="1"` attribute and handle it. 634 # 635 if ($md_attr && 636 preg_match($markdown_attr_re, $tag, $attr_m) && 637 preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3])) 638 { 639 # Remove `markdown` attribute from opening tag. 640 $tag = preg_replace($markdown_attr_re, '', $tag); 641 642 # Check if text inside this tag must be parsed in span mode. 643 $this->mode = $attr_m[2] . $attr_m[3]; 644 $span_mode = $this->mode == 'span' || $this->mode != 'block' && 645 preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag); 646 647 # Calculate indent before tag. 648 if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) { 649 $strlen = $this->utf8_strlen; 650 $indent = $strlen($matches[1], 'UTF-8'); 651 } else { 652 $indent = 0; 653 } 654 655 # End preceding block with this tag. 656 $block_text .= $tag; 657 $parsed .= $this->$hash_method($block_text); 658 659 # Get enclosing tag name for the ParseMarkdown function. 660 # (This pattern makes $tag_name_re safe without quoting.) 661 preg_match('/^<([\w:$]*)\b/', $tag, $matches); 662 $tag_name_re = $matches[1]; 663 664 # Parse the content using the HTML-in-Markdown parser. 665 list ($block_text, $text) 666 = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 667 $tag_name_re, $span_mode); 668 669 # Outdent markdown text. 670 if ($indent > 0) { 671 $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 672 $block_text); 673 } 674 675 # Append tag content to parsed text. 676 if (!$span_mode) $parsed .= "\n\n$block_text\n\n"; 677 else $parsed .= "$block_text"; 678 679 # Start over with a new block. 680 $block_text = ""; 681 } 682 else $block_text .= $tag; 683 } 684 685 } while ($depth > 0); 686 687 # 688 # Hash last block text that wasn't processed inside the loop. 689 # 690 $parsed .= $this->$hash_method($block_text); 691 692 return array($parsed, $text); 693 } 694 695 696 protected function hashClean($text) { 697 # 698 # Called whenever a tag must be hashed when a function inserts a "clean" tag 699 # in $text, it passes through this function and is automaticaly escaped, 700 # blocking invalid nested overlap. 701 # 702 return $this->hashPart($text, 'C'); 703 } 704 705 706 protected function doAnchors($text) { 707 # 708 # Turn Markdown link shortcuts into XHTML <a> tags. 709 # 710 if ($this->in_anchor) return $text; 711 $this->in_anchor = true; 712 713 # 714 # First, handle reference-style links: [link text] [id] 715 # 716 $text = preg_replace_callback('{ 717 ( # wrap whole match in $1 718 \[ 719 ('.$this->nested_brackets_re.') # link text = $2 720 \] 721 722 [ ]? # one optional space 723 (?:\n[ ]*)? # one optional newline followed by spaces 724 725 \[ 726 (.*?) # id = $3 727 \] 728 ) 729 }xs', 730 array($this, '_doAnchors_reference_callback'), $text); 731 732 # 733 # Next, inline-style links: [link text](url "optional title") 734 # 735 $text = preg_replace_callback('{ 736 ( # wrap whole match in $1 737 \[ 738 ('.$this->nested_brackets_re.') # link text = $2 739 \] 740 \( # literal paren 741 [ \n]* 742 (?: 743 <(.+?)> # href = $3 744 | 745 ('.$this->nested_url_parenthesis_re.') # href = $4 746 ) 747 [ \n]* 748 ( # $5 749 ([\'"]) # quote char = $6 750 (.*?) # Title = $7 751 \6 # matching quote 752 [ \n]* # ignore any spaces/tabs between closing quote and ) 753 )? # title is optional 754 \) 755 (?:[ ]? '.$this->id_class_attr_catch_re.' )? # $8 = id/class attributes 756 ) 757 }xs', 758 array($this, '_doAnchors_inline_callback'), $text); 759 760 # 761 # Last, handle reference-style shortcuts: [link text] 762 # These must come last in case you've also got [link text][1] 763 # or [link text](/foo) 764 # 765 $text = preg_replace_callback('{ 766 ( # wrap whole match in $1 767 \[ 768 ([^\[\]]+) # link text = $2; can\'t contain [ or ] 769 \] 770 ) 771 }xs', 772 array($this, '_doAnchors_reference_callback'), $text); 773 774 $this->in_anchor = false; 775 return $text; 776 } 777 protected function _doAnchors_reference_callback($matches) { 778 $whole_match = $matches[1]; 779 $link_text = $matches[2]; 780 $link_id =& $matches[3]; 781 782 if ($link_id == "") { 783 # for shortcut links like [this][] or [this]. 784 $link_id = $link_text; 785 } 786 787 # lower-case and turn embedded newlines into spaces 788 $link_id = strtolower($link_id); 789 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); 790 791 if (isset($this->urls[$link_id])) { 792 $url = $this->urls[$link_id]; 793 $url = $this->encodeURLAttribute($url); 794 795 $result = "<a href=\"$url\""; 796 if ( isset( $this->titles[$link_id] ) ) { 797 $title = $this->titles[$link_id]; 798 $title = $this->encodeAttribute($title); 799 $result .= " title=\"$title\""; 800 } 801 if (isset($this->ref_attr[$link_id])) 802 $result .= $this->ref_attr[$link_id]; 803 804 $link_text = $this->runSpanGamut($link_text); 805 $result .= ">$link_text</a>"; 806 $result = $this->hashPart($result); 807 } 808 else { 809 $result = $whole_match; 810 } 811 return $result; 812 } 813 protected function _doAnchors_inline_callback($matches) { 814 $whole_match = $matches[1]; 815 $link_text = $this->runSpanGamut($matches[2]); 816 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 817 $title =& $matches[7]; 818 $attr = $this->doExtraAttributes("a", $dummy =& $matches[8]); 819 820 // if the URL was of the form <s p a c e s> it got caught by the HTML 821 // tag parser and hashed. Need to reverse the process before using the URL. 822 $unhashed = $this->unhash($url); 823 if ($unhashed != $url) 824 $url = preg_replace('/^<(.*)>$/', '\1', $unhashed); 825 826 $url = $this->encodeURLAttribute($url); 827 828 $result = "<a href=\"$url\""; 829 if (isset($title)) { 830 $title = $this->encodeAttribute($title); 831 $result .= " title=\"$title\""; 832 } 833 $result .= $attr; 834 835 $link_text = $this->runSpanGamut($link_text); 836 $result .= ">$link_text</a>"; 837 838 return $this->hashPart($result); 839 } 840 841 842 protected function doImages($text) { 843 # 844 # Turn Markdown image shortcuts into <img> tags. 845 # 846 # 847 # First, handle reference-style labeled images: ![alt text][id] 848 # 849 $text = preg_replace_callback('{ 850 ( # wrap whole match in $1 851 !\[ 852 ('.$this->nested_brackets_re.') # alt text = $2 853 \] 854 855 [ ]? # one optional space 856 (?:\n[ ]*)? # one optional newline followed by spaces 857 858 \[ 859 (.*?) # id = $3 860 \] 861 862 ) 863 }xs', 864 array($this, '_doImages_reference_callback'), $text); 865 866 # 867 # Next, handle inline images:  868 # Don't forget: encode * and _ 869 # 870 $text = preg_replace_callback('{ 871 ( # wrap whole match in $1 872 !\[ 873 ('.$this->nested_brackets_re.') # alt text = $2 874 \] 875 \s? # One optional whitespace character 876 \( # literal paren 877 [ \n]* 878 (?: 879 <(\S*)> # src url = $3 880 | 881 ('.$this->nested_url_parenthesis_re.') # src url = $4 882 ) 883 [ \n]* 884 ( # $5 885 ([\'"]) # quote char = $6 886 (.*?) # title = $7 887 \6 # matching quote 888 [ \n]* 889 )? # title is optional 890 \) 891 (?:[ ]? '.$this->id_class_attr_catch_re.' )? # $8 = id/class attributes 892 ) 893 }xs', 894 array($this, '_doImages_inline_callback'), $text); 895 896 return $text; 897 } 898 protected function _doImages_reference_callback($matches) { 899 $whole_match = $matches[1]; 900 $alt_text = $matches[2]; 901 $link_id = strtolower($matches[3]); 902 903 if ($link_id == "") { 904 $link_id = strtolower($alt_text); # for shortcut links like ![this][]. 905 } 906 907 $alt_text = $this->encodeAttribute($alt_text); 908 if (isset($this->urls[$link_id])) { 909 $url = $this->encodeURLAttribute($this->urls[$link_id]); 910 $result = "<img src=\"$url\" alt=\"$alt_text\""; 911 if (isset($this->titles[$link_id])) { 912 $title = $this->titles[$link_id]; 913 $title = $this->encodeAttribute($title); 914 $result .= " title=\"$title\""; 915 } 916 if (isset($this->ref_attr[$link_id])) 917 $result .= $this->ref_attr[$link_id]; 918 $result .= $this->empty_element_suffix; 919 $result = $this->hashPart($result); 920 } 921 else { 922 # If there's no such link ID, leave intact: 923 $result = $whole_match; 924 } 925 926 return $result; 927 } 928 protected function _doImages_inline_callback($matches) { 929 $whole_match = $matches[1]; 930 $alt_text = $matches[2]; 931 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 932 $title =& $matches[7]; 933 $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]); 934 935 $alt_text = $this->encodeAttribute($alt_text); 936 $url = $this->encodeURLAttribute($url); 937 $result = "<img src=\"$url\" alt=\"$alt_text\""; 938 if (isset($title)) { 939 $title = $this->encodeAttribute($title); 940 $result .= " title=\"$title\""; # $title already quoted 941 } 942 $result .= $attr; 943 $result .= $this->empty_element_suffix; 944 945 return $this->hashPart($result); 946 } 947 948 949 protected function doHeaders($text) { 950 # 951 # Redefined to add id and class attribute support. 952 # 953 # Setext-style headers: 954 # Header 1 {#header1} 955 # ======== 956 # 957 # Header 2 {#header2 .class1 .class2} 958 # -------- 959 # 960 $text = preg_replace_callback( 961 '{ 962 (^.+?) # $1: Header text 963 (?:[ ]+ '.$this->id_class_attr_catch_re.' )? # $3 = id/class attributes 964 [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer 965 }mx', 966 array($this, '_doHeaders_callback_setext'), $text); 967 968 # atx-style headers: 969 # # Header 1 {#header1} 970 # ## Header 2 {#header2} 971 # ## Header 2 with closing hashes ## {#header3.class1.class2} 972 # ... 973 # ###### Header 6 {.class2} 974 # 975 $text = preg_replace_callback('{ 976 ^(\#{1,6}) # $1 = string of #\'s 977 [ ]* 978 (.+?) # $2 = Header text 979 [ ]* 980 \#* # optional closing #\'s (not counted) 981 (?:[ ]+ '.$this->id_class_attr_catch_re.' )? # $3 = id/class attributes 982 [ ]* 983 \n+ 984 }xm', 985 array($this, '_doHeaders_callback_atx'), $text); 986 987 return $text; 988 } 989 protected function _doHeaders_callback_setext($matches) { 990 if ($matches[3] == '-' && preg_match('{^- }', $matches[1])) 991 return $matches[0]; 992 993 $level = $matches[3]{0} == '=' ? 1 : 2; 994 995 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[1]) : null; 996 997 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[2], $defaultId); 998 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>"; 999 return "\n" . $this->hashBlock($block) . "\n\n"; 1000 } 1001 protected function _doHeaders_callback_atx($matches) { 1002 $level = strlen($matches[1]); 1003 1004 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[2]) : null; 1005 $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[3], $defaultId); 1006 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>"; 1007 return "\n" . $this->hashBlock($block) . "\n\n"; 1008 } 1009 1010 1011 protected function doTables($text) { 1012 # 1013 # Form HTML tables. 1014 # 1015 $less_than_tab = $this->tab_width - 1; 1016 # 1017 # Find tables with leading pipe. 1018 # 1019 # | Header 1 | Header 2 1020 # | -------- | -------- 1021 # | Cell 1 | Cell 2 1022 # | Cell 3 | Cell 4 1023 # 1024 $text = preg_replace_callback(' 1025 { 1026 ^ # Start of a line 1027 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 1028 [|] # Optional leading pipe (present) 1029 (.+) \n # $1: Header row (at least one pipe) 1030 1031 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 1032 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline 1033 1034 ( # $3: Cells 1035 (?> 1036 [ ]* # Allowed whitespace. 1037 [|] .* \n # Row content. 1038 )* 1039 ) 1040 (?=\n|\Z) # Stop at final double newline. 1041 }xm', 1042 array($this, '_doTable_leadingPipe_callback'), $text); 1043 1044 # 1045 # Find tables without leading pipe. 1046 # 1047 # Header 1 | Header 2 1048 # -------- | -------- 1049 # Cell 1 | Cell 2 1050 # Cell 3 | Cell 4 1051 # 1052 $text = preg_replace_callback(' 1053 { 1054 ^ # Start of a line 1055 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 1056 (\S.*[|].*) \n # $1: Header row (at least one pipe) 1057 1058 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 1059 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline 1060 1061 ( # $3: Cells 1062 (?> 1063 .* [|] .* \n # Row content 1064 )* 1065 ) 1066 (?=\n|\Z) # Stop at final double newline. 1067 }xm', 1068 array($this, '_DoTable_callback'), $text); 1069 1070 return $text; 1071 } 1072 protected function _doTable_leadingPipe_callback($matches) { 1073 $head = $matches[1]; 1074 $underline = $matches[2]; 1075 $content = $matches[3]; 1076 1077 # Remove leading pipe for each row. 1078 $content = preg_replace('/^ *[|]/m', '', $content); 1079 1080 return $this->_doTable_callback(array($matches[0], $head, $underline, $content)); 1081 } 1082 protected function _doTable_makeAlignAttr($alignname) 1083 { 1084 if (empty($this->table_align_class_tmpl)) 1085 return " align=\"$alignname\""; 1086 1087 $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl); 1088 return " class=\"$classname\""; 1089 } 1090 protected function _doTable_callback($matches) { 1091 $head = $matches[1]; 1092 $underline = $matches[2]; 1093 $content = $matches[3]; 1094 1095 # Remove any tailing pipes for each line. 1096 $head = preg_replace('/[|] *$/m', '', $head); 1097 $underline = preg_replace('/[|] *$/m', '', $underline); 1098 $content = preg_replace('/[|] *$/m', '', $content); 1099 1100 # Reading alignement from header underline. 1101 $separators = preg_split('/ *[|] */', $underline); 1102 foreach ($separators as $n => $s) { 1103 if (preg_match('/^ *-+: *$/', $s)) 1104 $attr[$n] = $this->_doTable_makeAlignAttr('right'); 1105 else if (preg_match('/^ *:-+: *$/', $s)) 1106 $attr[$n] = $this->_doTable_makeAlignAttr('center'); 1107 else if (preg_match('/^ *:-+ *$/', $s)) 1108 $attr[$n] = $this->_doTable_makeAlignAttr('left'); 1109 else 1110 $attr[$n] = ''; 1111 } 1112 1113 # Parsing span elements, including code spans, character escapes, 1114 # and inline HTML tags, so that pipes inside those gets ignored. 1115 $head = $this->parseSpan($head); 1116 $headers = preg_split('/ *[|] */', $head); 1117 $col_count = count($headers); 1118 $attr = array_pad($attr, $col_count, ''); 1119 1120 # Write column headers. 1121 $text = "<table>\n"; 1122 $text .= "<thead>\n"; 1123 $text .= "<tr>\n"; 1124 foreach ($headers as $n => $header) 1125 $text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n"; 1126 $text .= "</tr>\n"; 1127 $text .= "</thead>\n"; 1128 1129 # Split content by row. 1130 $rows = explode("\n", trim($content, "\n")); 1131 1132 $text .= "<tbody>\n"; 1133 foreach ($rows as $row) { 1134 # Parsing span elements, including code spans, character escapes, 1135 # and inline HTML tags, so that pipes inside those gets ignored. 1136 $row = $this->parseSpan($row); 1137 1138 # Split row by cell. 1139 $row_cells = preg_split('/ *[|] */', $row, $col_count); 1140 $row_cells = array_pad($row_cells, $col_count, ''); 1141 1142 $text .= "<tr>\n"; 1143 foreach ($row_cells as $n => $cell) 1144 $text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n"; 1145 $text .= "</tr>\n"; 1146 } 1147 $text .= "</tbody>\n"; 1148 $text .= "</table>"; 1149 1150 return $this->hashBlock($text) . "\n"; 1151 } 1152 1153 1154 protected function doDefLists($text) { 1155 # 1156 # Form HTML definition lists. 1157 # 1158 $less_than_tab = $this->tab_width - 1; 1159 1160 # Re-usable pattern to match any entire dl list: 1161 $whole_list_re = '(?> 1162 ( # $1 = whole list 1163 ( # $2 1164 [ ]{0,'.$less_than_tab.'} 1165 ((?>.*\S.*\n)+) # $3 = defined term 1166 \n? 1167 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 1168 ) 1169 (?s:.+?) 1170 ( # $4 1171 \z 1172 | 1173 \n{2,} 1174 (?=\S) 1175 (?! # Negative lookahead for another term 1176 [ ]{0,'.$less_than_tab.'} 1177 (?: \S.*\n )+? # defined term 1178 \n? 1179 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 1180 ) 1181 (?! # Negative lookahead for another definition 1182 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 1183 ) 1184 ) 1185 ) 1186 )'; // mx 1187 1188 $text = preg_replace_callback('{ 1189 (?>\A\n?|(?<=\n\n)) 1190 '.$whole_list_re.' 1191 }mx', 1192 array($this, '_doDefLists_callback'), $text); 1193 1194 return $text; 1195 } 1196 protected function _doDefLists_callback($matches) { 1197 # Re-usable patterns to match list item bullets and number markers: 1198 $list = $matches[1]; 1199 1200 # Turn double returns into triple returns, so that we can make a 1201 # paragraph for the last item in a list, if necessary: 1202 $result = trim($this->processDefListItems($list)); 1203 $result = "<dl>\n" . $result . "\n</dl>"; 1204 return $this->hashBlock($result) . "\n\n"; 1205 } 1206 1207 1208 protected function processDefListItems($list_str) { 1209 # 1210 # Process the contents of a single definition list, splitting it 1211 # into individual term and definition list items. 1212 # 1213 $less_than_tab = $this->tab_width - 1; 1214 1215 # trim trailing blank lines: 1216 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 1217 1218 # Process definition terms. 1219 $list_str = preg_replace_callback('{ 1220 (?>\A\n?|\n\n+) # leading line 1221 ( # definition terms = $1 1222 [ ]{0,'.$less_than_tab.'} # leading whitespace 1223 (?!\:[ ]|[ ]) # negative lookahead for a definition 1224 # mark (colon) or more whitespace. 1225 (?> \S.* \n)+? # actual term (not whitespace). 1226 ) 1227 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed 1228 # with a definition mark. 1229 }xm', 1230 array($this, '_processDefListItems_callback_dt'), $list_str); 1231 1232 # Process actual definitions. 1233 $list_str = preg_replace_callback('{ 1234 \n(\n+)? # leading line = $1 1235 ( # marker space = $2 1236 [ ]{0,'.$less_than_tab.'} # whitespace before colon 1237 \:[ ]+ # definition mark (colon) 1238 ) 1239 ((?s:.+?)) # definition text = $3 1240 (?= \n+ # stop at next definition mark, 1241 (?: # next term or end of text 1242 [ ]{0,'.$less_than_tab.'} \:[ ] | 1243 <dt> | \z 1244 ) 1245 ) 1246 }xm', 1247 array($this, '_processDefListItems_callback_dd'), $list_str); 1248 1249 return $list_str; 1250 } 1251 protected function _processDefListItems_callback_dt($matches) { 1252 $terms = explode("\n", trim($matches[1])); 1253 $text = ''; 1254 foreach ($terms as $term) { 1255 $term = $this->runSpanGamut(trim($term)); 1256 $text .= "\n<dt>" . $term . "</dt>"; 1257 } 1258 return $text . "\n"; 1259 } 1260 protected function _processDefListItems_callback_dd($matches) { 1261 $leading_line = $matches[1]; 1262 $marker_space = $matches[2]; 1263 $def = $matches[3]; 1264 1265 if ($leading_line || preg_match('/\n{2,}/', $def)) { 1266 # Replace marker with the appropriate whitespace indentation 1267 $def = str_repeat(' ', strlen($marker_space)) . $def; 1268 $def = $this->runBlockGamut($this->outdent($def . "\n\n")); 1269 $def = "\n". $def ."\n"; 1270 } 1271 else { 1272 $def = rtrim($def); 1273 $def = $this->runSpanGamut($this->outdent($def)); 1274 } 1275 1276 return "\n<dd>" . $def . "</dd>\n"; 1277 } 1278 1279 1280 protected function doFencedCodeBlocks($text) { 1281 # 1282 # Adding the fenced code block syntax to regular Markdown: 1283 # 1284 # ~~~ 1285 # Code block 1286 # ~~~ 1287 # 1288 $less_than_tab = $this->tab_width; 1289 1290 $text = preg_replace_callback('{ 1291 (?:\n|\A) 1292 # 1: Opening marker 1293 ( 1294 (?:~{3,}|`{3,}) # 3 or more tildes/backticks. 1295 ) 1296 [ ]* 1297 (?: 1298 \.?([-_:a-zA-Z0-9]+) # 2: standalone class name 1299 )? 1300 [ ]* 1301 (?: 1302 '.$this->id_class_attr_catch_re.' # 3: Extra attributes 1303 )? 1304 [ ]* \n # Whitespace and newline following marker. 1305 1306 # 4: Content 1307 ( 1308 (?> 1309 (?!\1 [ ]* \n) # Not a closing marker. 1310 .*\n+ 1311 )+ 1312 ) 1313 1314 # Closing marker. 1315 \1 [ ]* (?= \n ) 1316 }xm', 1317 array($this, '_doFencedCodeBlocks_callback'), $text); 1318 1319 return $text; 1320 } 1321 protected function _doFencedCodeBlocks_callback($matches) { 1322 $classname =& $matches[2]; 1323 $attrs =& $matches[3]; 1324 $codeblock = $matches[4]; 1325 1326 if ($this->code_block_content_func) { 1327 $codeblock = call_user_func($this->code_block_content_func, $codeblock, $classname); 1328 } else { 1329 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 1330 } 1331 1332 $codeblock = preg_replace_callback('/^\n+/', 1333 array($this, '_doFencedCodeBlocks_newlines'), $codeblock); 1334 1335 $classes = array(); 1336 if ($classname != "") { 1337 if ($classname{0} == '.') 1338 $classname = substr($classname, 1); 1339 $classes[] = $this->code_class_prefix.$classname; 1340 } 1341 $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs, null, $classes); 1342 $pre_attr_str = $this->code_attr_on_pre ? $attr_str : ''; 1343 $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str; 1344 $codeblock = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>"; 1345 1346 return "\n\n".$this->hashBlock($codeblock)."\n\n"; 1347 } 1348 protected function _doFencedCodeBlocks_newlines($matches) { 1349 return str_repeat("<br$this->empty_element_suffix", 1350 strlen($matches[0])); 1351 } 1352 1353 1354 # 1355 # Redefining emphasis markers so that emphasis by underscore does not 1356 # work in the middle of a word. 1357 # 1358 protected $em_relist = array( 1359 '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)', 1360 '*' => '(?<![\s*])\*(?!\*)', 1361 '_' => '(?<![\s_])_(?![a-zA-Z0-9_])', 1362 ); 1363 protected $strong_relist = array( 1364 '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)', 1365 '**' => '(?<![\s*])\*\*(?!\*)', 1366 '__' => '(?<![\s_])__(?![a-zA-Z0-9_])', 1367 ); 1368 protected $em_strong_relist = array( 1369 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)', 1370 '***' => '(?<![\s*])\*\*\*(?!\*)', 1371 '___' => '(?<![\s_])___(?![a-zA-Z0-9_])', 1372 ); 1373 1374 1375 protected function formParagraphs($text) { 1376 # 1377 # Params: 1378 # $text - string to process with html <p> tags 1379 # 1380 # Strip leading and trailing lines: 1381 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 1382 1383 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 1384 1385 # 1386 # Wrap <p> tags and unhashify HTML blocks 1387 # 1388 foreach ($grafs as $key => $value) { 1389 $value = trim($this->runSpanGamut($value)); 1390 1391 # Check if this should be enclosed in a paragraph. 1392 # Clean tag hashes & block tag hashes are left alone. 1393 $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value); 1394 1395 if ($is_p) { 1396 $value = "<p>$value</p>"; 1397 } 1398 $grafs[$key] = $value; 1399 } 1400 1401 # Join grafs in one text, then unhash HTML tags. 1402 $text = implode("\n\n", $grafs); 1403 1404 # Finish by removing any tag hashes still present in $text. 1405 $text = $this->unhash($text); 1406 1407 return $text; 1408 } 1409 1410 1411 ### Footnotes 1412 1413 protected function stripFootnotes($text) { 1414 # 1415 # Strips link definitions from text, stores the URLs and titles in 1416 # hash references. 1417 # 1418 $less_than_tab = $this->tab_width - 1; 1419 1420 # Link defs are in the form: [^id]: url "optional title" 1421 $text = preg_replace_callback('{ 1422 ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1 1423 [ ]* 1424 \n? # maybe *one* newline 1425 ( # text = $2 (no blank lines allowed) 1426 (?: 1427 .+ # actual text 1428 | 1429 \n # newlines but 1430 (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker. 1431 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 1432 # by non-indented content 1433 )* 1434 ) 1435 }xm', 1436 array($this, '_stripFootnotes_callback'), 1437 $text); 1438 return $text; 1439 } 1440 protected function _stripFootnotes_callback($matches) { 1441 $note_id = $this->fn_id_prefix . $matches[1]; 1442 $this->footnotes[$note_id] = $this->outdent($matches[2]); 1443 return ''; # String that will replace the block 1444 } 1445 1446 1447 protected function doFootnotes($text) { 1448 # 1449 # Replace footnote references in $text [^id] with a special text-token 1450 # which will be replaced by the actual footnote marker in appendFootnotes. 1451 # 1452 if (!$this->in_anchor) { 1453 $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text); 1454 } 1455 return $text; 1456 } 1457 1458 1459 protected function appendFootnotes($text) { 1460 # 1461 # Append footnote list to text. 1462 # 1463 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 1464 array($this, '_appendFootnotes_callback'), $text); 1465 1466 if (!empty($this->footnotes_ordered)) { 1467 $text .= "\n\n"; 1468 $text .= "<div class=\"footnotes\">\n"; 1469 $text .= "<hr". $this->empty_element_suffix ."\n"; 1470 $text .= "<ol>\n\n"; 1471 1472 $attr = ""; 1473 if ($this->fn_backlink_class != "") { 1474 $class = $this->fn_backlink_class; 1475 $class = $this->encodeAttribute($class); 1476 $attr .= " class=\"$class\""; 1477 } 1478 if ($this->fn_backlink_title != "") { 1479 $title = $this->fn_backlink_title; 1480 $title = $this->encodeAttribute($title); 1481 $attr .= " title=\"$title\""; 1482 } 1483 $backlink_text = $this->fn_backlink_html; 1484 $num = 0; 1485 1486 while (!empty($this->footnotes_ordered)) { 1487 $footnote = reset($this->footnotes_ordered); 1488 $note_id = key($this->footnotes_ordered); 1489 unset($this->footnotes_ordered[$note_id]); 1490 $ref_count = $this->footnotes_ref_count[$note_id]; 1491 unset($this->footnotes_ref_count[$note_id]); 1492 unset($this->footnotes[$note_id]); 1493 1494 $footnote .= "\n"; # Need to append newline before parsing. 1495 $footnote = $this->runBlockGamut("$footnote\n"); 1496 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 1497 array($this, '_appendFootnotes_callback'), $footnote); 1498 1499 $attr = str_replace("%%", ++$num, $attr); 1500 $note_id = $this->encodeAttribute($note_id); 1501 1502 # Prepare backlink, multiple backlinks if multiple references 1503 $backlink = "<a href=\"#fnref:$note_id\"$attr>$backlink_text</a>"; 1504 for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) { 1505 $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>$backlink_text</a>"; 1506 } 1507 # Add backlink to last paragraph; create new paragraph if needed. 1508 if (preg_match('{</p>$}', $footnote)) { 1509 $footnote = substr($footnote, 0, -4) . " $backlink</p>"; 1510 } else { 1511 $footnote .= "\n\n<p>$backlink</p>"; 1512 } 1513 1514 $text .= "<li id=\"fn:$note_id\">\n"; 1515 $text .= $footnote . "\n"; 1516 $text .= "</li>\n\n"; 1517 } 1518 1519 $text .= "</ol>\n"; 1520 $text .= "</div>"; 1521 } 1522 return $text; 1523 } 1524 protected function _appendFootnotes_callback($matches) { 1525 $node_id = $this->fn_id_prefix . $matches[1]; 1526 1527 # Create footnote marker only if it has a corresponding footnote *and* 1528 # the footnote hasn't been used by another marker. 1529 if (isset($this->footnotes[$node_id])) { 1530 $num =& $this->footnotes_numbers[$node_id]; 1531 if (!isset($num)) { 1532 # Transfer footnote content to the ordered list and give it its 1533 # number 1534 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id]; 1535 $this->footnotes_ref_count[$node_id] = 1; 1536 $num = $this->footnote_counter++; 1537 $ref_count_mark = ''; 1538 } else { 1539 $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1; 1540 } 1541 1542 $attr = ""; 1543 if ($this->fn_link_class != "") { 1544 $class = $this->fn_link_class; 1545 $class = $this->encodeAttribute($class); 1546 $attr .= " class=\"$class\""; 1547 } 1548 if ($this->fn_link_title != "") { 1549 $title = $this->fn_link_title; 1550 $title = $this->encodeAttribute($title); 1551 $attr .= " title=\"$title\""; 1552 } 1553 1554 $attr = str_replace("%%", $num, $attr); 1555 $node_id = $this->encodeAttribute($node_id); 1556 1557 return 1558 "<sup id=\"fnref$ref_count_mark:$node_id\">". 1559 "<a href=\"#fn:$node_id\"$attr>$num</a>". 1560 "</sup>"; 1561 } 1562 1563 return "[^".$matches[1]."]"; 1564 } 1565 1566 1567 ### Abbreviations ### 1568 1569 protected function stripAbbreviations($text) { 1570 # 1571 # Strips abbreviations from text, stores titles in hash references. 1572 # 1573 $less_than_tab = $this->tab_width - 1; 1574 1575 # Link defs are in the form: [id]*: url "optional title" 1576 $text = preg_replace_callback('{ 1577 ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1 1578 (.*) # text = $2 (no blank lines allowed) 1579 }xm', 1580 array($this, '_stripAbbreviations_callback'), 1581 $text); 1582 return $text; 1583 } 1584 protected function _stripAbbreviations_callback($matches) { 1585 $abbr_word = $matches[1]; 1586 $abbr_desc = $matches[2]; 1587 if ($this->abbr_word_re) 1588 $this->abbr_word_re .= '|'; 1589 $this->abbr_word_re .= preg_quote($abbr_word); 1590 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 1591 return ''; # String that will replace the block 1592 } 1593 1594 1595 protected function doAbbreviations($text) { 1596 # 1597 # Find defined abbreviations in text and wrap them in <abbr> elements. 1598 # 1599 if ($this->abbr_word_re) { 1600 // cannot use the /x modifier because abbr_word_re may 1601 // contain significant spaces: 1602 $text = preg_replace_callback('{'. 1603 '(?<![\w\x1A])'. 1604 '(?:'.$this->abbr_word_re.')'. 1605 '(?![\w\x1A])'. 1606 '}', 1607 array($this, '_doAbbreviations_callback'), $text); 1608 } 1609 return $text; 1610 } 1611 protected function _doAbbreviations_callback($matches) { 1612 $abbr = $matches[0]; 1613 if (isset($this->abbr_desciptions[$abbr])) { 1614 $desc = $this->abbr_desciptions[$abbr]; 1615 if (empty($desc)) { 1616 return $this->hashPart("<abbr>$abbr</abbr>"); 1617 } else { 1618 $desc = $this->encodeAttribute($desc); 1619 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>"); 1620 } 1621 } else { 1622 return $matches[0]; 1623 } 1624 } 1625 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Thu Aug 11 10:00:09 2016 | Cross-referenced by PHPXref 0.7.1 |