[ Index ] |
PHP Cross Reference of Unnamed Project |
[Summary view] [Print] [Text view]
1 <?php 2 # 3 # Markdown - A text-to-HTML conversion tool for web writers 4 # 5 # PHP Markdown 6 # Copyright (c) 2004-2015 Michel Fortin 7 # <https://michelf.ca/projects/php-markdown/> 8 # 9 # Original Markdown 10 # Copyright (c) 2004-2006 John Gruber 11 # <https://daringfireball.net/projects/markdown/> 12 # 13 namespace Michelf; 14 15 16 # 17 # Markdown Parser Class 18 # 19 20 class Markdown implements MarkdownInterface { 21 22 ### Version ### 23 24 const MARKDOWNLIB_VERSION = "1.6.0"; 25 26 ### Simple Function Interface ### 27 28 public static function defaultTransform($text) { 29 # 30 # Initialize the parser and return the result of its transform method. 31 # This will work fine for derived classes too. 32 # 33 # Take parser class on which this function was called. 34 $parser_class = \get_called_class(); 35 36 # try to take parser from the static parser list 37 static $parser_list; 38 $parser =& $parser_list[$parser_class]; 39 40 # create the parser it not already set 41 if (!$parser) 42 $parser = new $parser_class; 43 44 # Transform text using parser. 45 return $parser->transform($text); 46 } 47 48 ### Configuration Variables ### 49 50 # Change to ">" for HTML output. 51 public $empty_element_suffix = " />"; 52 public $tab_width = 4; 53 54 # Change to `true` to disallow markup or entities. 55 public $no_markup = false; 56 public $no_entities = false; 57 58 # Predefined urls and titles for reference links and images. 59 public $predef_urls = array(); 60 public $predef_titles = array(); 61 62 # Optional filter function for URLs 63 public $url_filter_func = null; 64 65 # Optional header id="" generation callback function. 66 public $header_id_func = null; 67 68 # Optional function for converting code block content to HTML 69 public $code_block_content_func = null; 70 71 # Class attribute to toggle "enhanced ordered list" behaviour 72 # setting this to true will allow ordered lists to start from the index 73 # number that is defined first. For example: 74 # 2. List item two 75 # 3. List item three 76 # 77 # becomes 78 # <ol start="2"> 79 # <li>List item two</li> 80 # <li>List item three</li> 81 # </ol> 82 public $enhanced_ordered_list = false; 83 84 ### Parser Implementation ### 85 86 # Regex to match balanced [brackets]. 87 # Needed to insert a maximum bracked depth while converting to PHP. 88 protected $nested_brackets_depth = 6; 89 protected $nested_brackets_re; 90 91 protected $nested_url_parenthesis_depth = 4; 92 protected $nested_url_parenthesis_re; 93 94 # Table of hash values for escaped characters: 95 protected $escape_chars = '\`*_{}[]()>#+-.!'; 96 protected $escape_chars_re; 97 98 99 public function __construct() { 100 # 101 # Constructor function. Initialize appropriate member variables. 102 # 103 $this->_initDetab(); 104 $this->prepareItalicsAndBold(); 105 106 $this->nested_brackets_re = 107 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth). 108 str_repeat('\])*', $this->nested_brackets_depth); 109 110 $this->nested_url_parenthesis_re = 111 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth). 112 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth); 113 114 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']'; 115 116 # Sort document, block, and span gamut in ascendent priority order. 117 asort($this->document_gamut); 118 asort($this->block_gamut); 119 asort($this->span_gamut); 120 } 121 122 123 # Internal hashes used during transformation. 124 protected $urls = array(); 125 protected $titles = array(); 126 protected $html_hashes = array(); 127 128 # Status flag to avoid invalid nesting. 129 protected $in_anchor = false; 130 131 132 protected function setup() { 133 # 134 # Called before the transformation process starts to setup parser 135 # states. 136 # 137 # Clear global hashes. 138 $this->urls = $this->predef_urls; 139 $this->titles = $this->predef_titles; 140 $this->html_hashes = array(); 141 142 $this->in_anchor = false; 143 } 144 145 protected function teardown() { 146 # 147 # Called after the transformation process to clear any variable 148 # which may be taking up memory unnecessarly. 149 # 150 $this->urls = array(); 151 $this->titles = array(); 152 $this->html_hashes = array(); 153 } 154 155 156 public function transform($text) { 157 # 158 # Main function. Performs some preprocessing on the input text 159 # and pass it through the document gamut. 160 # 161 $this->setup(); 162 163 # Remove UTF-8 BOM and marker character in input, if present. 164 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text); 165 166 # Standardize line endings: 167 # DOS to Unix and Mac to Unix 168 $text = preg_replace('{\r\n?}', "\n", $text); 169 170 # Make sure $text ends with a couple of newlines: 171 $text .= "\n\n"; 172 173 # Convert all tabs to spaces. 174 $text = $this->detab($text); 175 176 # Turn block-level HTML blocks into hash entries 177 $text = $this->hashHTMLBlocks($text); 178 179 # Strip any lines consisting only of spaces and tabs. 180 # This makes subsequent regexen easier to write, because we can 181 # match consecutive blank lines with /\n+/ instead of something 182 # contorted like /[ ]*\n+/ . 183 $text = preg_replace('/^[ ]+$/m', '', $text); 184 185 # Run document gamut methods. 186 foreach ($this->document_gamut as $method => $priority) { 187 $text = $this->$method($text); 188 } 189 190 $this->teardown(); 191 192 return $text . "\n"; 193 } 194 195 protected $document_gamut = array( 196 # Strip link definitions, store in hashes. 197 "stripLinkDefinitions" => 20, 198 199 "runBasicBlockGamut" => 30, 200 ); 201 202 203 protected function stripLinkDefinitions($text) { 204 # 205 # Strips link definitions from text, stores the URLs and titles in 206 # hash references. 207 # 208 $less_than_tab = $this->tab_width - 1; 209 210 # Link defs are in the form: ^[id]: url "optional title" 211 $text = preg_replace_callback('{ 212 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 213 [ ]* 214 \n? # maybe *one* newline 215 [ ]* 216 (?: 217 <(.+?)> # url = $2 218 | 219 (\S+?) # url = $3 220 ) 221 [ ]* 222 \n? # maybe one newline 223 [ ]* 224 (?: 225 (?<=\s) # lookbehind for whitespace 226 ["(] 227 (.*?) # title = $4 228 [")] 229 [ ]* 230 )? # title is optional 231 (?:\n+|\Z) 232 }xm', 233 array($this, '_stripLinkDefinitions_callback'), 234 $text); 235 return $text; 236 } 237 protected function _stripLinkDefinitions_callback($matches) { 238 $link_id = strtolower($matches[1]); 239 $url = $matches[2] == '' ? $matches[3] : $matches[2]; 240 $this->urls[$link_id] = $url; 241 $this->titles[$link_id] =& $matches[4]; 242 return ''; # String that will replace the block 243 } 244 245 246 protected function hashHTMLBlocks($text) { 247 if ($this->no_markup) return $text; 248 249 $less_than_tab = $this->tab_width - 1; 250 251 # Hashify HTML blocks: 252 # We only want to do this for block-level HTML tags, such as headers, 253 # lists, and tables. That's because we still want to wrap <p>s around 254 # "paragraphs" that are wrapped in non-block-level tags, such as anchors, 255 # phrase emphasis, and spans. The list of tags we're looking for is 256 # hard-coded: 257 # 258 # * List "a" is made of tags which can be both inline or block-level. 259 # These will be treated block-level when the start tag is alone on 260 # its line, otherwise they're not matched here and will be taken as 261 # inline later. 262 # * List "b" is made of tags which are always block-level; 263 # 264 $block_tags_a_re = 'ins|del'; 265 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. 266 'script|noscript|style|form|fieldset|iframe|math|svg|'. 267 'article|section|nav|aside|hgroup|header|footer|'. 268 'figure'; 269 270 # Regular expression for the content of a block tag. 271 $nested_tags_level = 4; 272 $attr = ' 273 (?> # optional tag attributes 274 \s # starts with whitespace 275 (?> 276 [^>"/]+ # text outside quotes 277 | 278 /+(?!>) # slash not followed by ">" 279 | 280 "[^"]*" # text inside double quotes (tolerate ">") 281 | 282 \'[^\']*\' # text inside single quotes (tolerate ">") 283 )* 284 )? 285 '; 286 $content = 287 str_repeat(' 288 (?> 289 [^<]+ # content without tag 290 | 291 <\2 # nested opening tag 292 '.$attr.' # attributes 293 (?> 294 /> 295 | 296 >', $nested_tags_level). # end of opening tag 297 '.*?'. # last level nested tag content 298 str_repeat(' 299 </\2\s*> # closing nested tag 300 ) 301 | 302 <(?!/\2\s*> # other tags with a different name 303 ) 304 )*', 305 $nested_tags_level); 306 $content2 = str_replace('\2', '\3', $content); 307 308 # First, look for nested blocks, e.g.: 309 # <div> 310 # <div> 311 # tags for inner block must be indented. 312 # </div> 313 # </div> 314 # 315 # The outermost tags must start at the left margin for this to match, and 316 # the inner nested divs must be indented. 317 # We need to do this before the next, more liberal match, because the next 318 # match will start at the first `<div>` and stop at the first `</div>`. 319 $text = preg_replace_callback('{(?> 320 (?> 321 (?<=\n) # Starting on its own line 322 | # or 323 \A\n? # the at beginning of the doc 324 ) 325 ( # save in $1 326 327 # Match from `\n<tag>` to `</tag>\n`, handling nested tags 328 # in between. 329 330 [ ]{0,'.$less_than_tab.'} 331 <('.$block_tags_b_re.')# start tag = $2 332 '.$attr.'> # attributes followed by > and \n 333 '.$content.' # content, support nesting 334 </\2> # the matching end tag 335 [ ]* # trailing spaces/tabs 336 (?=\n+|\Z) # followed by a newline or end of document 337 338 | # Special version for tags of group a. 339 340 [ ]{0,'.$less_than_tab.'} 341 <('.$block_tags_a_re.')# start tag = $3 342 '.$attr.'>[ ]*\n # attributes followed by > 343 '.$content2.' # content, support nesting 344 </\3> # the matching end tag 345 [ ]* # trailing spaces/tabs 346 (?=\n+|\Z) # followed by a newline or end of document 347 348 | # Special case just for <hr />. It was easier to make a special 349 # case than to make the other regex more complicated. 350 351 [ ]{0,'.$less_than_tab.'} 352 <(hr) # start tag = $2 353 '.$attr.' # attributes 354 /?> # the matching end tag 355 [ ]* 356 (?=\n{2,}|\Z) # followed by a blank line or end of document 357 358 | # Special case for standalone HTML comments: 359 360 [ ]{0,'.$less_than_tab.'} 361 (?s: 362 <!-- .*? --> 363 ) 364 [ ]* 365 (?=\n{2,}|\Z) # followed by a blank line or end of document 366 367 | # PHP and ASP-style processor instructions (<? and <%) 368 369 [ ]{0,'.$less_than_tab.'} 370 (?s: 371 <([?%]) # $2 372 .*? 373 \2> 374 ) 375 [ ]* 376 (?=\n{2,}|\Z) # followed by a blank line or end of document 377 378 ) 379 )}Sxmi', 380 array($this, '_hashHTMLBlocks_callback'), 381 $text); 382 383 return $text; 384 } 385 protected function _hashHTMLBlocks_callback($matches) { 386 $text = $matches[1]; 387 $key = $this->hashBlock($text); 388 return "\n\n$key\n\n"; 389 } 390 391 392 protected function hashPart($text, $boundary = 'X') { 393 # 394 # Called whenever a tag must be hashed when a function insert an atomic 395 # element in the text stream. Passing $text to through this function gives 396 # a unique text-token which will be reverted back when calling unhash. 397 # 398 # The $boundary argument specify what character should be used to surround 399 # the token. By convension, "B" is used for block elements that needs not 400 # to be wrapped into paragraph tags at the end, ":" is used for elements 401 # that are word separators and "X" is used in the general case. 402 # 403 # Swap back any tag hash found in $text so we do not have to `unhash` 404 # multiple times at the end. 405 $text = $this->unhash($text); 406 407 # Then hash the block. 408 static $i = 0; 409 $key = "$boundary\x1A" . ++$i . $boundary; 410 $this->html_hashes[$key] = $text; 411 return $key; # String that will replace the tag. 412 } 413 414 415 protected function hashBlock($text) { 416 # 417 # Shortcut function for hashPart with block-level boundaries. 418 # 419 return $this->hashPart($text, 'B'); 420 } 421 422 423 protected $block_gamut = array( 424 # 425 # These are all the transformations that form block-level 426 # tags like paragraphs, headers, and list items. 427 # 428 "doHeaders" => 10, 429 "doHorizontalRules" => 20, 430 431 "doLists" => 40, 432 "doCodeBlocks" => 50, 433 "doBlockQuotes" => 60, 434 ); 435 436 protected function runBlockGamut($text) { 437 # 438 # Run block gamut tranformations. 439 # 440 # We need to escape raw HTML in Markdown source before doing anything 441 # else. This need to be done for each block, and not only at the 442 # begining in the Markdown function since hashed blocks can be part of 443 # list items and could have been indented. Indented blocks would have 444 # been seen as a code block in a previous pass of hashHTMLBlocks. 445 $text = $this->hashHTMLBlocks($text); 446 447 return $this->runBasicBlockGamut($text); 448 } 449 450 protected function runBasicBlockGamut($text) { 451 # 452 # Run block gamut tranformations, without hashing HTML blocks. This is 453 # useful when HTML blocks are known to be already hashed, like in the first 454 # whole-document pass. 455 # 456 foreach ($this->block_gamut as $method => $priority) { 457 $text = $this->$method($text); 458 } 459 460 # Finally form paragraph and restore hashed blocks. 461 $text = $this->formParagraphs($text); 462 463 return $text; 464 } 465 466 467 protected function doHorizontalRules($text) { 468 # Do Horizontal Rules: 469 return preg_replace( 470 '{ 471 ^[ ]{0,3} # Leading space 472 ([-*_]) # $1: First marker 473 (?> # Repeated marker group 474 [ ]{0,2} # Zero, one, or two spaces. 475 \1 # Marker character 476 ){2,} # Group repeated at least twice 477 [ ]* # Tailing spaces 478 $ # End of line. 479 }mx', 480 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 481 $text); 482 } 483 484 485 protected $span_gamut = array( 486 # 487 # These are all the transformations that occur *within* block-level 488 # tags like paragraphs, headers, and list items. 489 # 490 # Process character escapes, code spans, and inline HTML 491 # in one shot. 492 "parseSpan" => -30, 493 494 # Process anchor and image tags. Images must come first, 495 # because ![foo][f] looks like an anchor. 496 "doImages" => 10, 497 "doAnchors" => 20, 498 499 # Make links out of things like `<https://example.com/>` 500 # Must come after doAnchors, because you can use < and > 501 # delimiters in inline links like [this](<url>). 502 "doAutoLinks" => 30, 503 "encodeAmpsAndAngles" => 40, 504 505 "doItalicsAndBold" => 50, 506 "doHardBreaks" => 60, 507 ); 508 509 protected function runSpanGamut($text) { 510 # 511 # Run span gamut tranformations. 512 # 513 foreach ($this->span_gamut as $method => $priority) { 514 $text = $this->$method($text); 515 } 516 517 return $text; 518 } 519 520 521 protected function doHardBreaks($text) { 522 # Do hard breaks: 523 return preg_replace_callback('/ {2,}\n/', 524 array($this, '_doHardBreaks_callback'), $text); 525 } 526 protected function _doHardBreaks_callback($matches) { 527 return $this->hashPart("<br$this->empty_element_suffix\n"); 528 } 529 530 531 protected function doAnchors($text) { 532 # 533 # Turn Markdown link shortcuts into XHTML <a> tags. 534 # 535 if ($this->in_anchor) return $text; 536 $this->in_anchor = true; 537 538 # 539 # First, handle reference-style links: [link text] [id] 540 # 541 $text = preg_replace_callback('{ 542 ( # wrap whole match in $1 543 \[ 544 ('.$this->nested_brackets_re.') # link text = $2 545 \] 546 547 [ ]? # one optional space 548 (?:\n[ ]*)? # one optional newline followed by spaces 549 550 \[ 551 (.*?) # id = $3 552 \] 553 ) 554 }xs', 555 array($this, '_doAnchors_reference_callback'), $text); 556 557 # 558 # Next, inline-style links: [link text](url "optional title") 559 # 560 $text = preg_replace_callback('{ 561 ( # wrap whole match in $1 562 \[ 563 ('.$this->nested_brackets_re.') # link text = $2 564 \] 565 \( # literal paren 566 [ \n]* 567 (?: 568 <(.+?)> # href = $3 569 | 570 ('.$this->nested_url_parenthesis_re.') # href = $4 571 ) 572 [ \n]* 573 ( # $5 574 ([\'"]) # quote char = $6 575 (.*?) # Title = $7 576 \6 # matching quote 577 [ \n]* # ignore any spaces/tabs between closing quote and ) 578 )? # title is optional 579 \) 580 ) 581 }xs', 582 array($this, '_doAnchors_inline_callback'), $text); 583 584 # 585 # Last, handle reference-style shortcuts: [link text] 586 # These must come last in case you've also got [link text][1] 587 # or [link text](/foo) 588 # 589 $text = preg_replace_callback('{ 590 ( # wrap whole match in $1 591 \[ 592 ([^\[\]]+) # link text = $2; can\'t contain [ or ] 593 \] 594 ) 595 }xs', 596 array($this, '_doAnchors_reference_callback'), $text); 597 598 $this->in_anchor = false; 599 return $text; 600 } 601 protected function _doAnchors_reference_callback($matches) { 602 $whole_match = $matches[1]; 603 $link_text = $matches[2]; 604 $link_id =& $matches[3]; 605 606 if ($link_id == "") { 607 # for shortcut links like [this][] or [this]. 608 $link_id = $link_text; 609 } 610 611 # lower-case and turn embedded newlines into spaces 612 $link_id = strtolower($link_id); 613 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); 614 615 if (isset($this->urls[$link_id])) { 616 $url = $this->urls[$link_id]; 617 $url = $this->encodeURLAttribute($url); 618 619 $result = "<a href=\"$url\""; 620 if ( isset( $this->titles[$link_id] ) ) { 621 $title = $this->titles[$link_id]; 622 $title = $this->encodeAttribute($title); 623 $result .= " title=\"$title\""; 624 } 625 626 $link_text = $this->runSpanGamut($link_text); 627 $result .= ">$link_text</a>"; 628 $result = $this->hashPart($result); 629 } 630 else { 631 $result = $whole_match; 632 } 633 return $result; 634 } 635 protected function _doAnchors_inline_callback($matches) { 636 $whole_match = $matches[1]; 637 $link_text = $this->runSpanGamut($matches[2]); 638 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 639 $title =& $matches[7]; 640 641 // if the URL was of the form <s p a c e s> it got caught by the HTML 642 // tag parser and hashed. Need to reverse the process before using the URL. 643 $unhashed = $this->unhash($url); 644 if ($unhashed != $url) 645 $url = preg_replace('/^<(.*)>$/', '\1', $unhashed); 646 647 $url = $this->encodeURLAttribute($url); 648 649 $result = "<a href=\"$url\""; 650 if (isset($title)) { 651 $title = $this->encodeAttribute($title); 652 $result .= " title=\"$title\""; 653 } 654 655 $link_text = $this->runSpanGamut($link_text); 656 $result .= ">$link_text</a>"; 657 658 return $this->hashPart($result); 659 } 660 661 662 protected function doImages($text) { 663 # 664 # Turn Markdown image shortcuts into <img> tags. 665 # 666 # 667 # First, handle reference-style labeled images: ![alt text][id] 668 # 669 $text = preg_replace_callback('{ 670 ( # wrap whole match in $1 671 !\[ 672 ('.$this->nested_brackets_re.') # alt text = $2 673 \] 674 675 [ ]? # one optional space 676 (?:\n[ ]*)? # one optional newline followed by spaces 677 678 \[ 679 (.*?) # id = $3 680 \] 681 682 ) 683 }xs', 684 array($this, '_doImages_reference_callback'), $text); 685 686 # 687 # Next, handle inline images:  688 # Don't forget: encode * and _ 689 # 690 $text = preg_replace_callback('{ 691 ( # wrap whole match in $1 692 !\[ 693 ('.$this->nested_brackets_re.') # alt text = $2 694 \] 695 \s? # One optional whitespace character 696 \( # literal paren 697 [ \n]* 698 (?: 699 <(\S*)> # src url = $3 700 | 701 ('.$this->nested_url_parenthesis_re.') # src url = $4 702 ) 703 [ \n]* 704 ( # $5 705 ([\'"]) # quote char = $6 706 (.*?) # title = $7 707 \6 # matching quote 708 [ \n]* 709 )? # title is optional 710 \) 711 ) 712 }xs', 713 array($this, '_doImages_inline_callback'), $text); 714 715 return $text; 716 } 717 protected function _doImages_reference_callback($matches) { 718 $whole_match = $matches[1]; 719 $alt_text = $matches[2]; 720 $link_id = strtolower($matches[3]); 721 722 if ($link_id == "") { 723 $link_id = strtolower($alt_text); # for shortcut links like ![this][]. 724 } 725 726 $alt_text = $this->encodeAttribute($alt_text); 727 if (isset($this->urls[$link_id])) { 728 $url = $this->encodeURLAttribute($this->urls[$link_id]); 729 $result = "<img src=\"$url\" alt=\"$alt_text\""; 730 if (isset($this->titles[$link_id])) { 731 $title = $this->titles[$link_id]; 732 $title = $this->encodeAttribute($title); 733 $result .= " title=\"$title\""; 734 } 735 $result .= $this->empty_element_suffix; 736 $result = $this->hashPart($result); 737 } 738 else { 739 # If there's no such link ID, leave intact: 740 $result = $whole_match; 741 } 742 743 return $result; 744 } 745 protected function _doImages_inline_callback($matches) { 746 $whole_match = $matches[1]; 747 $alt_text = $matches[2]; 748 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 749 $title =& $matches[7]; 750 751 $alt_text = $this->encodeAttribute($alt_text); 752 $url = $this->encodeURLAttribute($url); 753 $result = "<img src=\"$url\" alt=\"$alt_text\""; 754 if (isset($title)) { 755 $title = $this->encodeAttribute($title); 756 $result .= " title=\"$title\""; # $title already quoted 757 } 758 $result .= $this->empty_element_suffix; 759 760 return $this->hashPart($result); 761 } 762 763 764 protected function doHeaders($text) { 765 # Setext-style headers: 766 # Header 1 767 # ======== 768 # 769 # Header 2 770 # -------- 771 # 772 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx', 773 array($this, '_doHeaders_callback_setext'), $text); 774 775 # atx-style headers: 776 # # Header 1 777 # ## Header 2 778 # ## Header 2 with closing hashes ## 779 # ... 780 # ###### Header 6 781 # 782 $text = preg_replace_callback('{ 783 ^(\#{1,6}) # $1 = string of #\'s 784 [ ]* 785 (.+?) # $2 = Header text 786 [ ]* 787 \#* # optional closing #\'s (not counted) 788 \n+ 789 }xm', 790 array($this, '_doHeaders_callback_atx'), $text); 791 792 return $text; 793 } 794 795 protected function _doHeaders_callback_setext($matches) { 796 # Terrible hack to check we haven't found an empty list item. 797 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) 798 return $matches[0]; 799 800 $level = $matches[2]{0} == '=' ? 1 : 2; 801 802 # id attribute generation 803 $idAtt = $this->_generateIdFromHeaderValue($matches[1]); 804 805 $block = "<h$level$idAtt>".$this->runSpanGamut($matches[1])."</h$level>"; 806 return "\n" . $this->hashBlock($block) . "\n\n"; 807 } 808 protected function _doHeaders_callback_atx($matches) { 809 810 # id attribute generation 811 $idAtt = $this->_generateIdFromHeaderValue($matches[2]); 812 813 $level = strlen($matches[1]); 814 $block = "<h$level$idAtt>".$this->runSpanGamut($matches[2])."</h$level>"; 815 return "\n" . $this->hashBlock($block) . "\n\n"; 816 } 817 818 protected function _generateIdFromHeaderValue($headerValue) { 819 820 # if a header_id_func property is set, we can use it to automatically 821 # generate an id attribute. 822 # 823 # This method returns a string in the form id="foo", or an empty string 824 # otherwise. 825 if (!is_callable($this->header_id_func)) { 826 return ""; 827 } 828 $idValue = call_user_func($this->header_id_func, $headerValue); 829 if (!$idValue) return ""; 830 831 return ' id="' . $this->encodeAttribute($idValue) . '"'; 832 833 } 834 835 protected function doLists($text) { 836 # 837 # Form HTML ordered (numbered) and unordered (bulleted) lists. 838 # 839 $less_than_tab = $this->tab_width - 1; 840 841 # Re-usable patterns to match list item bullets and number markers: 842 $marker_ul_re = '[*+-]'; 843 $marker_ol_re = '\d+[\.]'; 844 845 $markers_relist = array( 846 $marker_ul_re => $marker_ol_re, 847 $marker_ol_re => $marker_ul_re, 848 ); 849 850 foreach ($markers_relist as $marker_re => $other_marker_re) { 851 # Re-usable pattern to match any entirel ul or ol list: 852 $whole_list_re = ' 853 ( # $1 = whole list 854 ( # $2 855 ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces 856 ('.$marker_re.') # $4 = first list item marker 857 [ ]+ 858 ) 859 (?s:.+?) 860 ( # $5 861 \z 862 | 863 \n{2,} 864 (?=\S) 865 (?! # Negative lookahead for another list item marker 866 [ ]* 867 '.$marker_re.'[ ]+ 868 ) 869 | 870 (?= # Lookahead for another kind of list 871 \n 872 \3 # Must have the same indentation 873 '.$other_marker_re.'[ ]+ 874 ) 875 ) 876 ) 877 '; // mx 878 879 # We use a different prefix before nested lists than top-level lists. 880 # See extended comment in _ProcessListItems(). 881 882 if ($this->list_level) { 883 $text = preg_replace_callback('{ 884 ^ 885 '.$whole_list_re.' 886 }mx', 887 array($this, '_doLists_callback'), $text); 888 } 889 else { 890 $text = preg_replace_callback('{ 891 (?:(?<=\n)\n|\A\n?) # Must eat the newline 892 '.$whole_list_re.' 893 }mx', 894 array($this, '_doLists_callback'), $text); 895 } 896 } 897 898 return $text; 899 } 900 protected function _doLists_callback($matches) { 901 # Re-usable patterns to match list item bullets and number markers: 902 $marker_ul_re = '[*+-]'; 903 $marker_ol_re = '\d+[\.]'; 904 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)"; 905 $marker_ol_start_re = '[0-9]+'; 906 907 $list = $matches[1]; 908 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol"; 909 910 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re ); 911 912 $list .= "\n"; 913 $result = $this->processListItems($list, $marker_any_re); 914 915 $ol_start = 1; 916 if ($this->enhanced_ordered_list) { 917 # Get the start number for ordered list. 918 if ($list_type == 'ol') { 919 $ol_start_array = array(); 920 $ol_start_check = preg_match("/$marker_ol_start_re/", $matches[4], $ol_start_array); 921 if ($ol_start_check){ 922 $ol_start = $ol_start_array[0]; 923 } 924 } 925 } 926 927 if ($ol_start > 1 && $list_type == 'ol'){ 928 $result = $this->hashBlock("<$list_type start=\"$ol_start\">\n" . $result . "</$list_type>"); 929 } else { 930 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>"); 931 } 932 return "\n". $result ."\n\n"; 933 } 934 935 protected $list_level = 0; 936 937 protected function processListItems($list_str, $marker_any_re) { 938 # 939 # Process the contents of a single ordered or unordered list, splitting it 940 # into individual list items. 941 # 942 # The $this->list_level global keeps track of when we're inside a list. 943 # Each time we enter a list, we increment it; when we leave a list, 944 # we decrement. If it's zero, we're not in a list anymore. 945 # 946 # We do this because when we're not inside a list, we want to treat 947 # something like this: 948 # 949 # I recommend upgrading to version 950 # 8. Oops, now this line is treated 951 # as a sub-list. 952 # 953 # As a single paragraph, despite the fact that the second line starts 954 # with a digit-period-space sequence. 955 # 956 # Whereas when we're inside a list (or sub-list), that line will be 957 # treated as the start of a sub-list. What a kludge, huh? This is 958 # an aspect of Markdown's syntax that's hard to parse perfectly 959 # without resorting to mind-reading. Perhaps the solution is to 960 # change the syntax rules such that sub-lists must start with a 961 # starting cardinal number; e.g. "1." or "a.". 962 963 $this->list_level++; 964 965 # trim trailing blank lines: 966 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 967 968 $list_str = preg_replace_callback('{ 969 (\n)? # leading line = $1 970 (^[ ]*) # leading whitespace = $2 971 ('.$marker_any_re.' # list marker and space = $3 972 (?:[ ]+|(?=\n)) # space only required if item is not empty 973 ) 974 ((?s:.*?)) # list item text = $4 975 (?:(\n+(?=\n))|\n) # tailing blank line = $5 976 (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n)))) 977 }xm', 978 array($this, '_processListItems_callback'), $list_str); 979 980 $this->list_level--; 981 return $list_str; 982 } 983 protected function _processListItems_callback($matches) { 984 $item = $matches[4]; 985 $leading_line =& $matches[1]; 986 $leading_space =& $matches[2]; 987 $marker_space = $matches[3]; 988 $tailing_blank_line =& $matches[5]; 989 990 if ($leading_line || $tailing_blank_line || 991 preg_match('/\n{2,}/', $item)) 992 { 993 # Replace marker with the appropriate whitespace indentation 994 $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item; 995 $item = $this->runBlockGamut($this->outdent($item)."\n"); 996 } 997 else { 998 # Recursion for sub-lists: 999 $item = $this->doLists($this->outdent($item)); 1000 $item = preg_replace('/\n+$/', '', $item); 1001 $item = $this->runSpanGamut($item); 1002 } 1003 1004 return "<li>" . $item . "</li>\n"; 1005 } 1006 1007 1008 protected function doCodeBlocks($text) { 1009 # 1010 # Process Markdown `<pre><code>` blocks. 1011 # 1012 $text = preg_replace_callback('{ 1013 (?:\n\n|\A\n?) 1014 ( # $1 = the code block -- one or more lines, starting with a space/tab 1015 (?> 1016 [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces 1017 .*\n+ 1018 )+ 1019 ) 1020 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc 1021 }xm', 1022 array($this, '_doCodeBlocks_callback'), $text); 1023 1024 return $text; 1025 } 1026 protected function _doCodeBlocks_callback($matches) { 1027 $codeblock = $matches[1]; 1028 1029 $codeblock = $this->outdent($codeblock); 1030 if ($this->code_block_content_func) { 1031 $codeblock = call_user_func($this->code_block_content_func, $codeblock, ""); 1032 } else { 1033 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 1034 } 1035 1036 # trim leading newlines and trailing newlines 1037 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock); 1038 1039 $codeblock = "<pre><code>$codeblock\n</code></pre>"; 1040 return "\n\n".$this->hashBlock($codeblock)."\n\n"; 1041 } 1042 1043 1044 protected function makeCodeSpan($code) { 1045 # 1046 # Create a code span markup for $code. Called from handleSpanToken. 1047 # 1048 $code = htmlspecialchars(trim($code), ENT_NOQUOTES); 1049 return $this->hashPart("<code>$code</code>"); 1050 } 1051 1052 1053 protected $em_relist = array( 1054 '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)', 1055 '*' => '(?<![\s*])\*(?!\*)', 1056 '_' => '(?<![\s_])_(?!_)', 1057 ); 1058 protected $strong_relist = array( 1059 '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)', 1060 '**' => '(?<![\s*])\*\*(?!\*)', 1061 '__' => '(?<![\s_])__(?!_)', 1062 ); 1063 protected $em_strong_relist = array( 1064 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)', 1065 '***' => '(?<![\s*])\*\*\*(?!\*)', 1066 '___' => '(?<![\s_])___(?!_)', 1067 ); 1068 protected $em_strong_prepared_relist; 1069 1070 protected function prepareItalicsAndBold() { 1071 # 1072 # Prepare regular expressions for searching emphasis tokens in any 1073 # context. 1074 # 1075 foreach ($this->em_relist as $em => $em_re) { 1076 foreach ($this->strong_relist as $strong => $strong_re) { 1077 # Construct list of allowed token expressions. 1078 $token_relist = array(); 1079 if (isset($this->em_strong_relist["$em$strong"])) { 1080 $token_relist[] = $this->em_strong_relist["$em$strong"]; 1081 } 1082 $token_relist[] = $em_re; 1083 $token_relist[] = $strong_re; 1084 1085 # Construct master expression from list. 1086 $token_re = '{('. implode('|', $token_relist) .')}'; 1087 $this->em_strong_prepared_relist["$em$strong"] = $token_re; 1088 } 1089 } 1090 } 1091 1092 protected function doItalicsAndBold($text) { 1093 $token_stack = array(''); 1094 $text_stack = array(''); 1095 $em = ''; 1096 $strong = ''; 1097 $tree_char_em = false; 1098 1099 while (1) { 1100 # 1101 # Get prepared regular expression for seraching emphasis tokens 1102 # in current context. 1103 # 1104 $token_re = $this->em_strong_prepared_relist["$em$strong"]; 1105 1106 # 1107 # Each loop iteration search for the next emphasis token. 1108 # Each token is then passed to handleSpanToken. 1109 # 1110 $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 1111 $text_stack[0] .= $parts[0]; 1112 $token =& $parts[1]; 1113 $text =& $parts[2]; 1114 1115 if (empty($token)) { 1116 # Reached end of text span: empty stack without emitting. 1117 # any more emphasis. 1118 while ($token_stack[0]) { 1119 $text_stack[1] .= array_shift($token_stack); 1120 $text_stack[0] .= array_shift($text_stack); 1121 } 1122 break; 1123 } 1124 1125 $token_len = strlen($token); 1126 if ($tree_char_em) { 1127 # Reached closing marker while inside a three-char emphasis. 1128 if ($token_len == 3) { 1129 # Three-char closing marker, close em and strong. 1130 array_shift($token_stack); 1131 $span = array_shift($text_stack); 1132 $span = $this->runSpanGamut($span); 1133 $span = "<strong><em>$span</em></strong>"; 1134 $text_stack[0] .= $this->hashPart($span); 1135 $em = ''; 1136 $strong = ''; 1137 } else { 1138 # Other closing marker: close one em or strong and 1139 # change current token state to match the other 1140 $token_stack[0] = str_repeat($token{0}, 3-$token_len); 1141 $tag = $token_len == 2 ? "strong" : "em"; 1142 $span = $text_stack[0]; 1143 $span = $this->runSpanGamut($span); 1144 $span = "<$tag>$span</$tag>"; 1145 $text_stack[0] = $this->hashPart($span); 1146 $$tag = ''; # $$tag stands for $em or $strong 1147 } 1148 $tree_char_em = false; 1149 } else if ($token_len == 3) { 1150 if ($em) { 1151 # Reached closing marker for both em and strong. 1152 # Closing strong marker: 1153 for ($i = 0; $i < 2; ++$i) { 1154 $shifted_token = array_shift($token_stack); 1155 $tag = strlen($shifted_token) == 2 ? "strong" : "em"; 1156 $span = array_shift($text_stack); 1157 $span = $this->runSpanGamut($span); 1158 $span = "<$tag>$span</$tag>"; 1159 $text_stack[0] .= $this->hashPart($span); 1160 $$tag = ''; # $$tag stands for $em or $strong 1161 } 1162 } else { 1163 # Reached opening three-char emphasis marker. Push on token 1164 # stack; will be handled by the special condition above. 1165 $em = $token{0}; 1166 $strong = "$em$em"; 1167 array_unshift($token_stack, $token); 1168 array_unshift($text_stack, ''); 1169 $tree_char_em = true; 1170 } 1171 } else if ($token_len == 2) { 1172 if ($strong) { 1173 # Unwind any dangling emphasis marker: 1174 if (strlen($token_stack[0]) == 1) { 1175 $text_stack[1] .= array_shift($token_stack); 1176 $text_stack[0] .= array_shift($text_stack); 1177 } 1178 # Closing strong marker: 1179 array_shift($token_stack); 1180 $span = array_shift($text_stack); 1181 $span = $this->runSpanGamut($span); 1182 $span = "<strong>$span</strong>"; 1183 $text_stack[0] .= $this->hashPart($span); 1184 $strong = ''; 1185 } else { 1186 array_unshift($token_stack, $token); 1187 array_unshift($text_stack, ''); 1188 $strong = $token; 1189 } 1190 } else { 1191 # Here $token_len == 1 1192 if ($em) { 1193 if (strlen($token_stack[0]) == 1) { 1194 # Closing emphasis marker: 1195 array_shift($token_stack); 1196 $span = array_shift($text_stack); 1197 $span = $this->runSpanGamut($span); 1198 $span = "<em>$span</em>"; 1199 $text_stack[0] .= $this->hashPart($span); 1200 $em = ''; 1201 } else { 1202 $text_stack[0] .= $token; 1203 } 1204 } else { 1205 array_unshift($token_stack, $token); 1206 array_unshift($text_stack, ''); 1207 $em = $token; 1208 } 1209 } 1210 } 1211 return $text_stack[0]; 1212 } 1213 1214 1215 protected function doBlockQuotes($text) { 1216 $text = preg_replace_callback('/ 1217 ( # Wrap whole match in $1 1218 (?> 1219 ^[ ]*>[ ]? # ">" at the start of a line 1220 .+\n # rest of the first line 1221 (.+\n)* # subsequent consecutive lines 1222 \n* # blanks 1223 )+ 1224 ) 1225 /xm', 1226 array($this, '_doBlockQuotes_callback'), $text); 1227 1228 return $text; 1229 } 1230 protected function _doBlockQuotes_callback($matches) { 1231 $bq = $matches[1]; 1232 # trim one level of quoting - trim whitespace-only lines 1233 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq); 1234 $bq = $this->runBlockGamut($bq); # recurse 1235 1236 $bq = preg_replace('/^/m', " ", $bq); 1237 # These leading spaces cause problem with <pre> content, 1238 # so we need to fix that: 1239 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 1240 array($this, '_doBlockQuotes_callback2'), $bq); 1241 1242 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n"; 1243 } 1244 protected function _doBlockQuotes_callback2($matches) { 1245 $pre = $matches[1]; 1246 $pre = preg_replace('/^ /m', '', $pre); 1247 return $pre; 1248 } 1249 1250 1251 protected function formParagraphs($text) { 1252 # 1253 # Params: 1254 # $text - string to process with html <p> tags 1255 # 1256 # Strip leading and trailing lines: 1257 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 1258 1259 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 1260 1261 # 1262 # Wrap <p> tags and unhashify HTML blocks 1263 # 1264 foreach ($grafs as $key => $value) { 1265 if (!preg_match('/^B\x1A[0-9]+B$/', $value)) { 1266 # Is a paragraph. 1267 $value = $this->runSpanGamut($value); 1268 $value = preg_replace('/^([ ]*)/', "<p>", $value); 1269 $value .= "</p>"; 1270 $grafs[$key] = $this->unhash($value); 1271 } 1272 else { 1273 # Is a block. 1274 # Modify elements of @grafs in-place... 1275 $graf = $value; 1276 $block = $this->html_hashes[$graf]; 1277 $graf = $block; 1278 // if (preg_match('{ 1279 // \A 1280 // ( # $1 = <div> tag 1281 // <div \s+ 1282 // [^>]* 1283 // \b 1284 // markdown\s*=\s* ([\'"]) # $2 = attr quote char 1285 // 1 1286 // \2 1287 // [^>]* 1288 // > 1289 // ) 1290 // ( # $3 = contents 1291 // .* 1292 // ) 1293 // (</div>) # $4 = closing tag 1294 // \z 1295 // }xs', $block, $matches)) 1296 // { 1297 // list(, $div_open, , $div_content, $div_close) = $matches; 1298 // 1299 // # We can't call Markdown(), because that resets the hash; 1300 // # that initialization code should be pulled into its own sub, though. 1301 // $div_content = $this->hashHTMLBlocks($div_content); 1302 // 1303 // # Run document gamut methods on the content. 1304 // foreach ($this->document_gamut as $method => $priority) { 1305 // $div_content = $this->$method($div_content); 1306 // } 1307 // 1308 // $div_open = preg_replace( 1309 // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open); 1310 // 1311 // $graf = $div_open . "\n" . $div_content . "\n" . $div_close; 1312 // } 1313 $grafs[$key] = $graf; 1314 } 1315 } 1316 1317 return implode("\n\n", $grafs); 1318 } 1319 1320 1321 protected function encodeAttribute($text) { 1322 # 1323 # Encode text for a double-quoted HTML attribute. This function 1324 # is *not* suitable for attributes enclosed in single quotes. 1325 # 1326 $text = $this->encodeAmpsAndAngles($text); 1327 $text = str_replace('"', '"', $text); 1328 return $text; 1329 } 1330 1331 1332 protected function encodeURLAttribute($url, &$text = null) { 1333 # 1334 # Encode text for a double-quoted HTML attribute containing a URL, 1335 # applying the URL filter if set. Also generates the textual 1336 # representation for the URL (removing mailto: or tel:) storing it in $text. 1337 # This function is *not* suitable for attributes enclosed in single quotes. 1338 # 1339 if ($this->url_filter_func) 1340 $url = call_user_func($this->url_filter_func, $url); 1341 1342 if (preg_match('{^mailto:}i', $url)) 1343 $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7); 1344 else if (preg_match('{^tel:}i', $url)) 1345 { 1346 $url = $this->encodeAttribute($url); 1347 $text = substr($url, 4); 1348 } 1349 else 1350 { 1351 $url = $this->encodeAttribute($url); 1352 $text = $url; 1353 } 1354 1355 return $url; 1356 } 1357 1358 1359 protected function encodeAmpsAndAngles($text) { 1360 # 1361 # Smart processing for ampersands and angle brackets that need to 1362 # be encoded. Valid character entities are left alone unless the 1363 # no-entities mode is set. 1364 # 1365 if ($this->no_entities) { 1366 $text = str_replace('&', '&', $text); 1367 } else { 1368 # Ampersand-encoding based entirely on Nat Irons's Amputator 1369 # MT plugin: <http://bumppo.net/projects/amputator/> 1370 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 1371 '&', $text); 1372 } 1373 # Encode remaining <'s 1374 $text = str_replace('<', '<', $text); 1375 1376 return $text; 1377 } 1378 1379 1380 protected function doAutoLinks($text) { 1381 $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i', 1382 array($this, '_doAutoLinks_url_callback'), $text); 1383 1384 # Email addresses: <address@domain.foo> 1385 $text = preg_replace_callback('{ 1386 < 1387 (?:mailto:)? 1388 ( 1389 (?: 1390 [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+ 1391 | 1392 ".*?" 1393 ) 1394 \@ 1395 (?: 1396 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+ 1397 | 1398 \[[\d.a-fA-F:]+\] # IPv4 & IPv6 1399 ) 1400 ) 1401 > 1402 }xi', 1403 array($this, '_doAutoLinks_email_callback'), $text); 1404 1405 return $text; 1406 } 1407 protected function _doAutoLinks_url_callback($matches) { 1408 $url = $this->encodeURLAttribute($matches[1], $text); 1409 $link = "<a href=\"$url\">$text</a>"; 1410 return $this->hashPart($link); 1411 } 1412 protected function _doAutoLinks_email_callback($matches) { 1413 $addr = $matches[1]; 1414 $url = $this->encodeURLAttribute("mailto:$addr", $text); 1415 $link = "<a href=\"$url\">$text</a>"; 1416 return $this->hashPart($link); 1417 } 1418 1419 1420 protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) { 1421 # 1422 # Input: some text to obfuscate, e.g. "mailto:foo@example.com" 1423 # 1424 # Output: the same text but with most characters encoded as either a 1425 # decimal or hex entity, in the hopes of foiling most address 1426 # harvesting spam bots. E.g.: 1427 # 1428 # mailto:foo 1429 # @example.co 1430 # m 1431 # 1432 # Note: the additional output $tail is assigned the same value as the 1433 # ouput, minus the number of characters specified by $head_length. 1434 # 1435 # Based by a filter by Matthew Wickline, posted to BBEdit-Talk. 1436 # With some optimizations by Milian Wolff. Forced encoding of HTML 1437 # attribute special characters by Allan Odgaard. 1438 # 1439 if ($text == "") return $tail = ""; 1440 1441 $chars = preg_split('/(?<!^)(?!$)/', $text); 1442 $seed = (int)abs(crc32($text) / strlen($text)); # Deterministic seed. 1443 1444 foreach ($chars as $key => $char) { 1445 $ord = ord($char); 1446 # Ignore non-ascii chars. 1447 if ($ord < 128) { 1448 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function. 1449 # roughly 10% raw, 45% hex, 45% dec 1450 # '@' *must* be encoded. I insist. 1451 # '"' and '>' have to be encoded inside the attribute 1452 if ($r > 90 && strpos('@"&>', $char) === false) /* do nothing */; 1453 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';'; 1454 else $chars[$key] = '&#'.$ord.';'; 1455 } 1456 } 1457 1458 $text = implode('', $chars); 1459 $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text; 1460 1461 return $text; 1462 } 1463 1464 1465 protected function parseSpan($str) { 1466 # 1467 # Take the string $str and parse it into tokens, hashing embeded HTML, 1468 # escaped characters and handling code spans. 1469 # 1470 $output = ''; 1471 1472 $span_re = '{ 1473 ( 1474 \\\\'.$this->escape_chars_re.' 1475 | 1476 (?<![`\\\\]) 1477 `+ # code span marker 1478 '.( $this->no_markup ? '' : ' 1479 | 1480 <!-- .*? --> # comment 1481 | 1482 <\?.*?\?> | <%.*?%> # processing instruction 1483 | 1484 <[!$]?[-a-zA-Z0-9:_]+ # regular tags 1485 (?> 1486 \s 1487 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')* 1488 )? 1489 > 1490 | 1491 <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag 1492 | 1493 </[-a-zA-Z0-9:_]+\s*> # closing tag 1494 ').' 1495 ) 1496 }xs'; 1497 1498 while (1) { 1499 # 1500 # Each loop iteration seach for either the next tag, the next 1501 # openning code span marker, or the next escaped character. 1502 # Each token is then passed to handleSpanToken. 1503 # 1504 $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE); 1505 1506 # Create token from text preceding tag. 1507 if ($parts[0] != "") { 1508 $output .= $parts[0]; 1509 } 1510 1511 # Check if we reach the end. 1512 if (isset($parts[1])) { 1513 $output .= $this->handleSpanToken($parts[1], $parts[2]); 1514 $str = $parts[2]; 1515 } 1516 else { 1517 break; 1518 } 1519 } 1520 1521 return $output; 1522 } 1523 1524 1525 protected function handleSpanToken($token, &$str) { 1526 # 1527 # Handle $token provided by parseSpan by determining its nature and 1528 # returning the corresponding value that should replace it. 1529 # 1530 switch ($token{0}) { 1531 case "\\": 1532 return $this->hashPart("&#". ord($token{1}). ";"); 1533 case "`": 1534 # Search for end marker in remaining text. 1535 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 1536 $str, $matches)) 1537 { 1538 $str = $matches[2]; 1539 $codespan = $this->makeCodeSpan($matches[1]); 1540 return $this->hashPart($codespan); 1541 } 1542 return $token; // return as text since no ending marker found. 1543 default: 1544 return $this->hashPart($token); 1545 } 1546 } 1547 1548 1549 protected function outdent($text) { 1550 # 1551 # Remove one level of line-leading tabs or spaces 1552 # 1553 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text); 1554 } 1555 1556 1557 # String length function for detab. `_initDetab` will create a function to 1558 # hanlde UTF-8 if the default function does not exist. 1559 protected $utf8_strlen = 'mb_strlen'; 1560 1561 protected function detab($text) { 1562 # 1563 # Replace tabs with the appropriate amount of space. 1564 # 1565 # For each line we separate the line in blocks delemited by 1566 # tab characters. Then we reconstruct every line by adding the 1567 # appropriate number of space between each blocks. 1568 1569 $text = preg_replace_callback('/^.*\t.*$/m', 1570 array($this, '_detab_callback'), $text); 1571 1572 return $text; 1573 } 1574 protected function _detab_callback($matches) { 1575 $line = $matches[0]; 1576 $strlen = $this->utf8_strlen; # strlen function for UTF-8. 1577 1578 # Split in blocks. 1579 $blocks = explode("\t", $line); 1580 # Add each blocks to the line. 1581 $line = $blocks[0]; 1582 unset($blocks[0]); # Do not add first block twice. 1583 foreach ($blocks as $block) { 1584 # Calculate amount of space, insert spaces, insert block. 1585 $amount = $this->tab_width - 1586 $strlen($line, 'UTF-8') % $this->tab_width; 1587 $line .= str_repeat(" ", $amount) . $block; 1588 } 1589 return $line; 1590 } 1591 protected function _initDetab() { 1592 # 1593 # Check for the availability of the function in the `utf8_strlen` property 1594 # (initially `mb_strlen`). If the function is not available, create a 1595 # function that will loosely count the number of UTF-8 characters with a 1596 # regular expression. 1597 # 1598 if (function_exists($this->utf8_strlen)) return; 1599 $this->utf8_strlen = create_function('$text', 'return preg_match_all( 1600 "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 1601 $text, $m);'); 1602 } 1603 1604 1605 protected function unhash($text) { 1606 # 1607 # Swap back in all the tags hashed by _HashHTMLBlocks. 1608 # 1609 return preg_replace_callback('/(.)\x1A[0-9]+\1/', 1610 array($this, '_unhash_callback'), $text); 1611 } 1612 protected function _unhash_callback($matches) { 1613 return $this->html_hashes[$matches[0]]; 1614 } 1615 1616 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Thu Aug 11 10:00:09 2016 | Cross-referenced by PHPXref 0.7.1 |