[ Index ] |
PHP Cross Reference of Unnamed Project |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * SimplePie 4 * 5 * A PHP-Based RSS and Atom Feed Framework. 6 * Takes the hard work out of managing a complete RSS/Atom solution. 7 * 8 * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors 9 * All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without modification, are 12 * permitted provided that the following conditions are met: 13 * 14 * * Redistributions of source code must retain the above copyright notice, this list of 15 * conditions and the following disclaimer. 16 * 17 * * Redistributions in binary form must reproduce the above copyright notice, this list 18 * of conditions and the following disclaimer in the documentation and/or other materials 19 * provided with the distribution. 20 * 21 * * Neither the name of the SimplePie Team nor the names of its contributors may be used 22 * to endorse or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS 26 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 27 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS 28 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 32 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 * 35 * @package SimplePie 36 * @version 1.3.1 37 * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue 38 * @author Ryan Parman 39 * @author Geoffrey Sneddon 40 * @author Ryan McCue 41 * @link http://simplepie.org/ SimplePie 42 * @license http://www.opensource.org/licenses/bsd-license.php BSD License 43 */ 44 45 /** 46 * Used for data cleanup and post-processing 47 * 48 * 49 * This class can be overloaded with {@see SimplePie::set_sanitize_class()} 50 * 51 * @package SimplePie 52 * @todo Move to using an actual HTML parser (this will allow tags to be properly stripped, and to switch between HTML and XHTML), this will also make it easier to shorten a string while preserving HTML tags 53 */ 54 class SimplePie_Sanitize 55 { 56 // Private vars 57 var $base; 58 59 // Options 60 var $remove_div = true; 61 var $image_handler = ''; 62 var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'); 63 var $encode_instead_of_strip = false; 64 var $strip_attributes = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'); 65 var $strip_comments = false; 66 var $output_encoding = 'UTF-8'; 67 var $enable_cache = true; 68 var $cache_location = './cache'; 69 var $cache_name_function = 'md5'; 70 var $timeout = 10; 71 var $useragent = ''; 72 var $force_fsockopen = false; 73 var $replace_url_attributes = null; 74 75 public function __construct() 76 { 77 // Set defaults 78 $this->set_url_replacements(null); 79 } 80 81 public function remove_div($enable = true) 82 { 83 $this->remove_div = (bool) $enable; 84 } 85 86 public function set_image_handler($page = false) 87 { 88 if ($page) 89 { 90 $this->image_handler = (string) $page; 91 } 92 else 93 { 94 $this->image_handler = false; 95 } 96 } 97 98 public function set_registry(SimplePie_Registry $registry) 99 { 100 $this->registry = $registry; 101 } 102 103 public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie_Cache') 104 { 105 if (isset($enable_cache)) 106 { 107 $this->enable_cache = (bool) $enable_cache; 108 } 109 110 if ($cache_location) 111 { 112 $this->cache_location = (string) $cache_location; 113 } 114 115 if ($cache_name_function) 116 { 117 $this->cache_name_function = (string) $cache_name_function; 118 } 119 } 120 121 public function pass_file_data($file_class = 'SimplePie_File', $timeout = 10, $useragent = '', $force_fsockopen = false) 122 { 123 if ($timeout) 124 { 125 $this->timeout = (string) $timeout; 126 } 127 128 if ($useragent) 129 { 130 $this->useragent = (string) $useragent; 131 } 132 133 if ($force_fsockopen) 134 { 135 $this->force_fsockopen = (string) $force_fsockopen; 136 } 137 } 138 139 public function strip_htmltags($tags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style')) 140 { 141 if ($tags) 142 { 143 if (is_array($tags)) 144 { 145 $this->strip_htmltags = $tags; 146 } 147 else 148 { 149 $this->strip_htmltags = explode(',', $tags); 150 } 151 } 152 else 153 { 154 $this->strip_htmltags = false; 155 } 156 } 157 158 public function encode_instead_of_strip($encode = false) 159 { 160 $this->encode_instead_of_strip = (bool) $encode; 161 } 162 163 public function strip_attributes($attribs = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc')) 164 { 165 if ($attribs) 166 { 167 if (is_array($attribs)) 168 { 169 $this->strip_attributes = $attribs; 170 } 171 else 172 { 173 $this->strip_attributes = explode(',', $attribs); 174 } 175 } 176 else 177 { 178 $this->strip_attributes = false; 179 } 180 } 181 182 public function strip_comments($strip = false) 183 { 184 $this->strip_comments = (bool) $strip; 185 } 186 187 public function set_output_encoding($encoding = 'UTF-8') 188 { 189 $this->output_encoding = (string) $encoding; 190 } 191 192 /** 193 * Set element/attribute key/value pairs of HTML attributes 194 * containing URLs that need to be resolved relative to the feed 195 * 196 * Defaults to |a|@href, |area|@href, |blockquote|@cite, |del|@cite, 197 * |form|@action, |img|@longdesc, |img|@src, |input|@src, |ins|@cite, 198 * |q|@cite 199 * 200 * @since 1.0 201 * @param array|null $element_attribute Element/attribute key/value pairs, null for default 202 */ 203 public function set_url_replacements($element_attribute = null) 204 { 205 if ($element_attribute === null) 206 { 207 $element_attribute = array( 208 'a' => 'href', 209 'area' => 'href', 210 'blockquote' => 'cite', 211 'del' => 'cite', 212 'form' => 'action', 213 'img' => array( 214 'longdesc', 215 'src' 216 ), 217 'input' => 'src', 218 'ins' => 'cite', 219 'q' => 'cite' 220 ); 221 } 222 $this->replace_url_attributes = (array) $element_attribute; 223 } 224 225 public function sanitize($data, $type, $base = '') 226 { 227 $data = trim($data); 228 if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI) 229 { 230 if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML) 231 { 232 if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data)) 233 { 234 $type |= SIMPLEPIE_CONSTRUCT_HTML; 235 } 236 else 237 { 238 $type |= SIMPLEPIE_CONSTRUCT_TEXT; 239 } 240 } 241 242 if ($type & SIMPLEPIE_CONSTRUCT_BASE64) 243 { 244 $data = base64_decode($data); 245 } 246 247 if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML)) 248 { 249 250 $document = new DOMDocument(); 251 $document->encoding = 'UTF-8'; 252 $data = $this->preprocess($data, $type); 253 254 set_error_handler(array('SimplePie_Misc', 'silence_errors')); 255 $document->loadHTML($data); 256 restore_error_handler(); 257 258 // Strip comments 259 if ($this->strip_comments) 260 { 261 $xpath = new DOMXPath($document); 262 $comments = $xpath->query('//comment()'); 263 264 foreach ($comments as $comment) 265 { 266 $comment->parentNode->removeChild($comment); 267 } 268 } 269 270 // Strip out HTML tags and attributes that might cause various security problems. 271 // Based on recommendations by Mark Pilgrim at: 272 // http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely 273 if ($this->strip_htmltags) 274 { 275 foreach ($this->strip_htmltags as $tag) 276 { 277 $this->strip_tag($tag, $document, $type); 278 } 279 } 280 281 if ($this->strip_attributes) 282 { 283 foreach ($this->strip_attributes as $attrib) 284 { 285 $this->strip_attr($attrib, $document); 286 } 287 } 288 289 // Replace relative URLs 290 $this->base = $base; 291 foreach ($this->replace_url_attributes as $element => $attributes) 292 { 293 $this->replace_urls($document, $element, $attributes); 294 } 295 296 // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags. 297 if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache) 298 { 299 $images = $document->getElementsByTagName('img'); 300 foreach ($images as $img) 301 { 302 if ($img->hasAttribute('src')) 303 { 304 $image_url = call_user_func($this->cache_name_function, $img->getAttribute('src')); 305 $cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, $image_url, 'spi')); 306 307 if ($cache->load()) 308 { 309 $img->setAttribute('src', $this->image_handler . $image_url); 310 } 311 else 312 { 313 $file = $this->registry->create('File', array($img['attribs']['src']['data'], $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen)); 314 $headers = $file->headers; 315 316 if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300))) 317 { 318 if ($cache->save(array('headers' => $file->headers, 'body' => $file->body))) 319 { 320 $img->setAttribute('src', $this->image_handler . $image_url); 321 } 322 else 323 { 324 trigger_error("$this->cache_location is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); 325 } 326 } 327 } 328 } 329 } 330 } 331 332 // Remove the DOCTYPE 333 // Seems to cause segfaulting if we don't do this 334 if ($document->firstChild instanceof DOMDocumentType) 335 { 336 $document->removeChild($document->firstChild); 337 } 338 339 // Move everything from the body to the root 340 $real_body = $document->getElementsByTagName('body')->item(0)->childNodes->item(0); 341 $document->replaceChild($real_body, $document->firstChild); 342 343 // Finally, convert to a HTML string 344 $data = trim($document->saveHTML()); 345 346 if ($this->remove_div) 347 { 348 $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data); 349 $data = preg_replace('/<\/div>$/', '', $data); 350 } 351 else 352 { 353 $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data); 354 } 355 } 356 357 if ($type & SIMPLEPIE_CONSTRUCT_IRI) 358 { 359 $absolute = $this->registry->call('Misc', 'absolutize_url', array($data, $base)); 360 if ($absolute !== false) 361 { 362 $data = $absolute; 363 } 364 } 365 366 if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI)) 367 { 368 $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8'); 369 } 370 371 if ($this->output_encoding !== 'UTF-8') 372 { 373 $data = $this->registry->call('Misc', 'change_encoding', array($data, 'UTF-8', $this->output_encoding)); 374 } 375 } 376 return $data; 377 } 378 379 protected function preprocess($html, $type) 380 { 381 $ret = ''; 382 if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML) 383 { 384 // Atom XHTML constructs are wrapped with a div by default 385 // Note: No protection if $html contains a stray </div>! 386 $html = '<div>' . $html . '</div>'; 387 $ret .= '<!DOCTYPE html>'; 388 $content_type = 'text/html'; 389 } 390 else 391 { 392 $ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'; 393 $content_type = 'application/xhtml+xml'; 394 } 395 396 $ret .= '<html><head>'; 397 $ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />'; 398 $ret .= '</head><body>' . $html . '</body></html>'; 399 return $ret; 400 } 401 402 public function replace_urls($document, $tag, $attributes) 403 { 404 if (!is_array($attributes)) 405 { 406 $attributes = array($attributes); 407 } 408 409 if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags)) 410 { 411 $elements = $document->getElementsByTagName($tag); 412 foreach ($elements as $element) 413 { 414 foreach ($attributes as $attribute) 415 { 416 if ($element->hasAttribute($attribute)) 417 { 418 $value = $this->registry->call('Misc', 'absolutize_url', array($element->getAttribute($attribute), $this->base)); 419 if ($value !== false) 420 { 421 $element->setAttribute($attribute, $value); 422 } 423 } 424 } 425 } 426 } 427 } 428 429 public function do_strip_htmltags($match) 430 { 431 if ($this->encode_instead_of_strip) 432 { 433 if (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style'))) 434 { 435 $match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8'); 436 $match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8'); 437 return "<$match[1]$match[2]>$match[3]</$match[1]>"; 438 } 439 else 440 { 441 return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8'); 442 } 443 } 444 elseif (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style'))) 445 { 446 return $match[4]; 447 } 448 else 449 { 450 return ''; 451 } 452 } 453 454 protected function strip_tag($tag, $document, $type) 455 { 456 $xpath = new DOMXPath($document); 457 $elements = $xpath->query('body//' . $tag); 458 if ($this->encode_instead_of_strip) 459 { 460 foreach ($elements as $element) 461 { 462 $fragment = $document->createDocumentFragment(); 463 464 // For elements which aren't script or style, include the tag itself 465 if (!in_array($tag, array('script', 'style'))) 466 { 467 $text = '<' . $tag; 468 if ($element->hasAttributes()) 469 { 470 $attrs = array(); 471 foreach ($element->attributes as $name => $attr) 472 { 473 $value = $attr->value; 474 475 // In XHTML, empty values should never exist, so we repeat the value 476 if (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_XHTML)) 477 { 478 $value = $name; 479 } 480 // For HTML, empty is fine 481 elseif (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_HTML)) 482 { 483 $attrs[] = $name; 484 continue; 485 } 486 487 // Standard attribute text 488 $attrs[] = $name . '="' . $attr->value . '"'; 489 } 490 $text .= ' ' . implode(' ', $attrs); 491 } 492 $text .= '>'; 493 $fragment->appendChild(new DOMText($text)); 494 } 495 496 $number = $element->childNodes->length; 497 for ($i = $number; $i > 0; $i--) 498 { 499 $child = $element->childNodes->item(0); 500 $fragment->appendChild($child); 501 } 502 503 if (!in_array($tag, array('script', 'style'))) 504 { 505 $fragment->appendChild(new DOMText('</' . $tag . '>')); 506 } 507 508 $element->parentNode->replaceChild($fragment, $element); 509 } 510 511 return; 512 } 513 elseif (in_array($tag, array('script', 'style'))) 514 { 515 foreach ($elements as $element) 516 { 517 $element->parentNode->removeChild($element); 518 } 519 520 return; 521 } 522 else 523 { 524 foreach ($elements as $element) 525 { 526 $fragment = $document->createDocumentFragment(); 527 $number = $element->childNodes->length; 528 for ($i = $number; $i > 0; $i--) 529 { 530 $child = $element->childNodes->item(0); 531 $fragment->appendChild($child); 532 } 533 534 $element->parentNode->replaceChild($fragment, $element); 535 } 536 } 537 } 538 539 protected function strip_attr($attrib, $document) 540 { 541 $xpath = new DOMXPath($document); 542 $elements = $xpath->query('//*[@' . $attrib . ']'); 543 544 foreach ($elements as $element) 545 { 546 $element->removeAttribute($attrib); 547 } 548 } 549 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Thu Aug 11 10:00:09 2016 | Cross-referenced by PHPXref 0.7.1 |