[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/filter/urltolink/ -> filter.php (source)

   1  <?php
   2  
   3  // This file is part of Moodle - http://moodle.org/
   4  //
   5  // Moodle is free software: you can redistribute it and/or modify
   6  // it under the terms of the GNU General Public License as published by
   7  // the Free Software Foundation, either version 3 of the License, or
   8  // (at your option) any later version.
   9  //
  10  // Moodle is distributed in the hope that it will be useful,
  11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  // GNU General Public License for more details.
  14  //
  15  // You should have received a copy of the GNU General Public License
  16  // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
  17  
  18  /**
  19   * Filter converting URLs in the text to HTML links
  20   *
  21   * @package    filter
  22   * @subpackage urltolink
  23   * @copyright  2010 David Mudrak <david@moodle.com>
  24   * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  25   */
  26  
  27  defined('MOODLE_INTERNAL') || die();
  28  
  29  class filter_urltolink extends moodle_text_filter {
  30  
  31      /**
  32       * @var array global configuration for this filter
  33       *
  34       * This might be eventually moved into parent class if we found it
  35       * useful for other filters, too.
  36       */
  37      protected static $globalconfig;
  38  
  39      /**
  40       * Apply the filter to the text
  41       *
  42       * @see filter_manager::apply_filter_chain()
  43       * @param string $text to be processed by the text
  44       * @param array $options filter options
  45       * @return string text after processing
  46       */
  47      public function filter($text, array $options = array()) {
  48          if (!isset($options['originalformat'])) {
  49              // if the format is not specified, we are probably called by {@see format_string()}
  50              // in that case, it would be dangerous to replace URL with the link because it could
  51              // be stripped. therefore, we do nothing
  52              return $text;
  53          }
  54          if (in_array($options['originalformat'], explode(',', get_config('filter_urltolink', 'formats')))) {
  55              $this->convert_urls_into_links($text);
  56          }
  57          return $text;
  58      }
  59  
  60      ////////////////////////////////////////////////////////////////////////////
  61      // internal implementation starts here
  62      ////////////////////////////////////////////////////////////////////////////
  63  
  64      /**
  65       * Given some text this function converts any URLs it finds into HTML links
  66       *
  67       * @param string $text Passed in by reference. The string to be searched for urls.
  68       */
  69      protected function convert_urls_into_links(&$text) {
  70          //I've added img tags to this list of tags to ignore.
  71          //See MDL-21168 for more info. A better way to ignore tags whether or not
  72          //they are escaped partially or completely would be desirable. For example:
  73          //<a href="blah">
  74          //&lt;a href="blah"&gt;
  75          //&lt;a href="blah">
  76          $filterignoretagsopen  = array('<a\s[^>]+?>');
  77          $filterignoretagsclose = array('</a>');
  78          filter_save_ignore_tags($text,$filterignoretagsopen,$filterignoretagsclose,$ignoretags);
  79  
  80          // Check if we support unicode modifiers in regular expressions. Cache it.
  81          // TODO: this check should be a environment requirement in Moodle 2.0, as far as unicode
  82          // chars are going to arrive to URLs officially really soon (2010?)
  83          // Original RFC regex from: http://www.bytemycode.com/snippets/snippet/796/
  84          // Various ideas from: http://alanstorm.com/url_regex_explained
  85          // Unicode check, negative assertion and other bits from Moodle.
  86          static $unicoderegexp;
  87          if (!isset($unicoderegexp)) {
  88              $unicoderegexp = @preg_match('/\pL/u', 'a'); // This will fail silently, returning false,
  89          }
  90  
  91          // TODO MDL-21296 - use of unicode modifiers may cause a timeout
  92          $urlstart = '(?:http(s)?://|(?<!://)(www\.))';
  93          $domainsegment = '(?:[\pLl0-9][\pLl0-9-]*[\pLl0-9]|[\pLl0-9])';
  94          $numericip = '(?:(?:[0-9]{1,3}\.){3}[0-9]{1,3})';
  95          $port = '(?::\d*)';
  96          $pathchar = '(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-f0-9]{2})';
  97          $path = "(?:/$pathchar*)*";
  98          $querystring = '(?:\?(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@/?-]|%[a-fA-F0-9]{2})*)';
  99          $fragment = '(?:\#(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@/?-]|%[a-fA-F0-9]{2})*)';
 100  
 101          // Lookbehind assertions.
 102          // Is not HTML attribute or CSS URL property. Unfortunately legit text like "url(http://...)" will not be a link.
 103          $lookbehindend = "(?<![]),.;])";
 104  
 105          $regex = "$urlstart((?:$domainsegment\.)+$domainsegment|$numericip)" .
 106                  "($port?$path$querystring?$fragment?)$lookbehindend";
 107          if ($unicoderegexp) {
 108              $regex = '#' . $regex . '#ui';
 109          } else {
 110              $regex = '#' . preg_replace(array('\pLl', '\PL'), 'a-z', $regex) . '#i';
 111          }
 112  
 113          // Locate any HTML tags.
 114          $matches = preg_split('/(<[^<|>]*>)/i', $text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
 115  
 116          // Iterate through the tokenized text to handle chunks (html and content).
 117          foreach ($matches as $idx => $chunk) {
 118              // Nothing to do. We skip completely any html chunk.
 119              if (strpos(trim($chunk), '<') === 0) {
 120                  continue;
 121              }
 122  
 123              // Nothing to do. We skip any content chunk having any of these attributes.
 124              if (preg_match('#(background=")|(action=")|(style="background)|(href=")|(src=")|(url [(])#', $chunk)) {
 125                  continue;
 126              }
 127  
 128              // Arrived here, we want to process every word in this chunk.
 129              $text = $chunk;
 130              $words = explode(' ', $text);
 131  
 132              foreach ($words as $idx2 => $word) {
 133                  // ReDoS protection. Stop processing if a word is too large.
 134                  if (strlen($word) < 4096) {
 135                      $words[$idx2] = preg_replace($regex, '<a href="http$1://$2$3$4" class="_blanktarget">$0</a>', $word);
 136                  }
 137              }
 138              $text = implode(' ', $words);
 139  
 140              // Copy the result back to the array.
 141              $matches[$idx] = $text;
 142          }
 143  
 144          $text = implode('', $matches);
 145  
 146          if (!empty($ignoretags)) {
 147              $ignoretags = array_reverse($ignoretags); /// Reversed so "progressive" str_replace() will solve some nesting problems.
 148              $text = str_replace(array_keys($ignoretags),$ignoretags,$text);
 149          }
 150  
 151          if (get_config('filter_urltolink', 'embedimages')) {
 152              // now try to inject the images, this code was originally in the mediapluing filter
 153              // this may be useful only if somebody relies on the fact the links in FORMAT_MOODLE get converted
 154              // to URLs which in turn change to real images
 155              $search = '/<a href="([^"]+\.(jpg|png|gif))" class="_blanktarget">([^>]*)<\/a>/is';
 156              $text = preg_replace_callback($search, 'filter_urltolink_img_callback', $text);
 157          }
 158      }
 159  }
 160  
 161  
 162  /**
 163   * Change links to images into embedded images.
 164   *
 165   * This plugin is intended for automatic conversion of image URLs when FORMAT_MOODLE used.
 166   *
 167   * @param  $link
 168   * @return string
 169   */
 170  function filter_urltolink_img_callback($link) {
 171      if ($link[1] !== $link[3]) {
 172          // this is not a link created by this filter, because the url does not match the text
 173          return $link[0];
 174      }
 175      return '<img class="filter_urltolink_image" alt="" src="'.$link[1].'" />';
 176  }
 177  


Generated: Thu Aug 11 10:00:09 2016 Cross-referenced by PHPXref 0.7.1