[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/lib/simplepie/library/SimplePie/ -> Parser.php (source)

   1  <?php
   2  /**
   3   * SimplePie
   4   *
   5   * A PHP-Based RSS and Atom Feed Framework.
   6   * Takes the hard work out of managing a complete RSS/Atom solution.
   7   *
   8   * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
   9   * All rights reserved.
  10   *
  11   * Redistribution and use in source and binary forms, with or without modification, are
  12   * permitted provided that the following conditions are met:
  13   *
  14   *     * Redistributions of source code must retain the above copyright notice, this list of
  15   *       conditions and the following disclaimer.
  16   *
  17   *     * Redistributions in binary form must reproduce the above copyright notice, this list
  18   *       of conditions and the following disclaimer in the documentation and/or other materials
  19   *       provided with the distribution.
  20   *
  21   *     * Neither the name of the SimplePie Team nor the names of its contributors may be used
  22   *       to endorse or promote products derived from this software without specific prior
  23   *       written permission.
  24   *
  25   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
  26   * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  27   * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
  28   * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  30   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  31   * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  32   * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33   * POSSIBILITY OF SUCH DAMAGE.
  34   *
  35   * @package SimplePie
  36   * @version 1.3.1
  37   * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
  38   * @author Ryan Parman
  39   * @author Geoffrey Sneddon
  40   * @author Ryan McCue
  41   * @link http://simplepie.org/ SimplePie
  42   * @license http://www.opensource.org/licenses/bsd-license.php BSD License
  43   */
  44  
  45  /**
  46   * Parses XML into something sane
  47   *
  48   *
  49   * This class can be overloaded with {@see SimplePie::set_parser_class()}
  50   *
  51   * @package SimplePie
  52   * @subpackage Parsing
  53   */
  54  class SimplePie_Parser
  55  {
  56      var $error_code;
  57      var $error_string;
  58      var $current_line;
  59      var $current_column;
  60      var $current_byte;
  61      var $separator = ' ';
  62      var $namespace = array('');
  63      var $element = array('');
  64      var $xml_base = array('');
  65      var $xml_base_explicit = array(false);
  66      var $xml_lang = array('');
  67      var $data = array();
  68      var $datas = array(array());
  69      var $current_xhtml_construct = -1;
  70      var $encoding;
  71      protected $registry;
  72  
  73  	public function set_registry(SimplePie_Registry $registry)
  74      {
  75          $this->registry = $registry;
  76      }
  77  
  78  	public function parse(&$data, $encoding)
  79      {
  80          // Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character
  81          if (strtoupper($encoding) === 'US-ASCII')
  82          {
  83              $this->encoding = 'UTF-8';
  84          }
  85          else
  86          {
  87              $this->encoding = $encoding;
  88          }
  89  
  90          // Strip BOM:
  91          // UTF-32 Big Endian BOM
  92          if (substr($data, 0, 4) === "\x00\x00\xFE\xFF")
  93          {
  94              $data = substr($data, 4);
  95          }
  96          // UTF-32 Little Endian BOM
  97          elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00")
  98          {
  99              $data = substr($data, 4);
 100          }
 101          // UTF-16 Big Endian BOM
 102          elseif (substr($data, 0, 2) === "\xFE\xFF")
 103          {
 104              $data = substr($data, 2);
 105          }
 106          // UTF-16 Little Endian BOM
 107          elseif (substr($data, 0, 2) === "\xFF\xFE")
 108          {
 109              $data = substr($data, 2);
 110          }
 111          // UTF-8 BOM
 112          elseif (substr($data, 0, 3) === "\xEF\xBB\xBF")
 113          {
 114              $data = substr($data, 3);
 115          }
 116  
 117          if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false)
 118          {
 119              $declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5)));
 120              if ($declaration->parse())
 121              {
 122                  $data = substr($data, $pos + 2);
 123                  $data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data;
 124              }
 125              else
 126              {
 127                  $this->error_string = 'SimplePie bug! Please report this!';
 128                  return false;
 129              }
 130          }
 131  
 132          $return = true;
 133  
 134          static $xml_is_sane = null;
 135          if ($xml_is_sane === null)
 136          {
 137              $parser_check = xml_parser_create();
 138              xml_parse_into_struct($parser_check, '<foo>&amp;</foo>', $values);
 139              xml_parser_free($parser_check);
 140              $xml_is_sane = isset($values[0]['value']);
 141          }
 142  
 143          // Create the parser
 144          if ($xml_is_sane)
 145          {
 146              $xml = xml_parser_create_ns($this->encoding, $this->separator);
 147              xml_parser_set_option($xml, XML_OPTION_SKIP_WHITE, 1);
 148              xml_parser_set_option($xml, XML_OPTION_CASE_FOLDING, 0);
 149              xml_set_object($xml, $this);
 150              xml_set_character_data_handler($xml, 'cdata');
 151              xml_set_element_handler($xml, 'tag_open', 'tag_close');
 152  
 153              // Parse!
 154              if (!xml_parse($xml, $data, true))
 155              {
 156                  $this->error_code = xml_get_error_code($xml);
 157                  $this->error_string = xml_error_string($this->error_code);
 158                  $return = false;
 159              }
 160              $this->current_line = xml_get_current_line_number($xml);
 161              $this->current_column = xml_get_current_column_number($xml);
 162              $this->current_byte = xml_get_current_byte_index($xml);
 163              xml_parser_free($xml);
 164              return $return;
 165          }
 166          else
 167          {
 168              libxml_clear_errors();
 169              $xml = new XMLReader();
 170              $xml->xml($data);
 171              while (@$xml->read())
 172              {
 173                  switch ($xml->nodeType)
 174                  {
 175  
 176                      case constant('XMLReader::END_ELEMENT'):
 177                          if ($xml->namespaceURI !== '')
 178                          {
 179                              $tagName = $xml->namespaceURI . $this->separator . $xml->localName;
 180                          }
 181                          else
 182                          {
 183                              $tagName = $xml->localName;
 184                          }
 185                          $this->tag_close(null, $tagName);
 186                          break;
 187                      case constant('XMLReader::ELEMENT'):
 188                          $empty = $xml->isEmptyElement;
 189                          if ($xml->namespaceURI !== '')
 190                          {
 191                              $tagName = $xml->namespaceURI . $this->separator . $xml->localName;
 192                          }
 193                          else
 194                          {
 195                              $tagName = $xml->localName;
 196                          }
 197                          $attributes = array();
 198                          while ($xml->moveToNextAttribute())
 199                          {
 200                              if ($xml->namespaceURI !== '')
 201                              {
 202                                  $attrName = $xml->namespaceURI . $this->separator . $xml->localName;
 203                              }
 204                              else
 205                              {
 206                                  $attrName = $xml->localName;
 207                              }
 208                              $attributes[$attrName] = $xml->value;
 209                          }
 210                          $this->tag_open(null, $tagName, $attributes);
 211                          if ($empty)
 212                          {
 213                              $this->tag_close(null, $tagName);
 214                          }
 215                          break;
 216                      case constant('XMLReader::TEXT'):
 217  
 218                      case constant('XMLReader::CDATA'):
 219                          $this->cdata(null, $xml->value);
 220                          break;
 221                  }
 222              }
 223              if ($error = libxml_get_last_error())
 224              {
 225                  $this->error_code = $error->code;
 226                  $this->error_string = $error->message;
 227                  $this->current_line = $error->line;
 228                  $this->current_column = $error->column;
 229                  return false;
 230              }
 231              else
 232              {
 233                  return true;
 234              }
 235          }
 236      }
 237  
 238  	public function get_error_code()
 239      {
 240          return $this->error_code;
 241      }
 242  
 243  	public function get_error_string()
 244      {
 245          return $this->error_string;
 246      }
 247  
 248  	public function get_current_line()
 249      {
 250          return $this->current_line;
 251      }
 252  
 253  	public function get_current_column()
 254      {
 255          return $this->current_column;
 256      }
 257  
 258  	public function get_current_byte()
 259      {
 260          return $this->current_byte;
 261      }
 262  
 263  	public function get_data()
 264      {
 265          return $this->data;
 266      }
 267  
 268  	public function tag_open($parser, $tag, $attributes)
 269      {
 270          list($this->namespace[], $this->element[]) = $this->split_ns($tag);
 271  
 272          $attribs = array();
 273          foreach ($attributes as $name => $value)
 274          {
 275              list($attrib_namespace, $attribute) = $this->split_ns($name);
 276              $attribs[$attrib_namespace][$attribute] = $value;
 277          }
 278  
 279          if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['base']))
 280          {
 281              $base = $this->registry->call('Misc', 'absolutize_url', array($attribs[SIMPLEPIE_NAMESPACE_XML]['base'], end($this->xml_base)));
 282              if ($base !== false)
 283              {
 284                  $this->xml_base[] = $base;
 285                  $this->xml_base_explicit[] = true;
 286              }
 287          }
 288          else
 289          {
 290              $this->xml_base[] = end($this->xml_base);
 291              $this->xml_base_explicit[] = end($this->xml_base_explicit);
 292          }
 293  
 294          if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['lang']))
 295          {
 296              $this->xml_lang[] = $attribs[SIMPLEPIE_NAMESPACE_XML]['lang'];
 297          }
 298          else
 299          {
 300              $this->xml_lang[] = end($this->xml_lang);
 301          }
 302  
 303          if ($this->current_xhtml_construct >= 0)
 304          {
 305              $this->current_xhtml_construct++;
 306              if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML)
 307              {
 308                  $this->data['data'] .= '<' . end($this->element);
 309                  if (isset($attribs['']))
 310                  {
 311                      foreach ($attribs[''] as $name => $value)
 312                      {
 313                          $this->data['data'] .= ' ' . $name . '="' . htmlspecialchars($value, ENT_COMPAT, $this->encoding) . '"';
 314                      }
 315                  }
 316                  $this->data['data'] .= '>';
 317              }
 318          }
 319          else
 320          {
 321              $this->datas[] =& $this->data;
 322              $this->data =& $this->data['child'][end($this->namespace)][end($this->element)][];
 323              $this->data = array('data' => '', 'attribs' => $attribs, 'xml_base' => end($this->xml_base), 'xml_base_explicit' => end($this->xml_base_explicit), 'xml_lang' => end($this->xml_lang));
 324              if ((end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_03 && in_array(end($this->element), array('title', 'tagline', 'copyright', 'info', 'summary', 'content')) && isset($attribs['']['mode']) && $attribs['']['mode'] === 'xml')
 325              || (end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_10 && in_array(end($this->element), array('rights', 'subtitle', 'summary', 'info', 'title', 'content')) && isset($attribs['']['type']) && $attribs['']['type'] === 'xhtml')
 326              || (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_20 && in_array(end($this->element), array('title')))
 327              || (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_090 && in_array(end($this->element), array('title')))
 328              || (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_10 && in_array(end($this->element), array('title'))))
 329              {
 330                  $this->current_xhtml_construct = 0;
 331              }
 332          }
 333      }
 334  
 335  	public function cdata($parser, $cdata)
 336      {
 337          if ($this->current_xhtml_construct >= 0)
 338          {
 339              $this->data['data'] .= htmlspecialchars($cdata, ENT_QUOTES, $this->encoding);
 340          }
 341          else
 342          {
 343              $this->data['data'] .= $cdata;
 344          }
 345      }
 346  
 347  	public function tag_close($parser, $tag)
 348      {
 349          if ($this->current_xhtml_construct >= 0)
 350          {
 351              $this->current_xhtml_construct--;
 352              if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML && !in_array(end($this->element), array('area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param')))
 353              {
 354                  $this->data['data'] .= '</' . end($this->element) . '>';
 355              }
 356          }
 357          if ($this->current_xhtml_construct === -1)
 358          {
 359              $this->data =& $this->datas[count($this->datas) - 1];
 360              array_pop($this->datas);
 361          }
 362  
 363          array_pop($this->element);
 364          array_pop($this->namespace);
 365          array_pop($this->xml_base);
 366          array_pop($this->xml_base_explicit);
 367          array_pop($this->xml_lang);
 368      }
 369  
 370  	public function split_ns($string)
 371      {
 372          static $cache = array();
 373          if (!isset($cache[$string]))
 374          {
 375              if ($pos = strpos($string, $this->separator))
 376              {
 377                  static $separator_length;
 378                  if (!$separator_length)
 379                  {
 380                      $separator_length = strlen($this->separator);
 381                  }
 382                  $namespace = substr($string, 0, $pos);
 383                  $local_name = substr($string, $pos + $separator_length);
 384                  if (strtolower($namespace) === SIMPLEPIE_NAMESPACE_ITUNES)
 385                  {
 386                      $namespace = SIMPLEPIE_NAMESPACE_ITUNES;
 387                  }
 388  
 389                  // Normalize the Media RSS namespaces
 390                  if ($namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG ||
 391                      $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG2 ||
 392                      $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG3 ||
 393                      $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG4 ||
 394                      $namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG5 )
 395                  {
 396                      $namespace = SIMPLEPIE_NAMESPACE_MEDIARSS;
 397                  }
 398                  $cache[$string] = array($namespace, $local_name);
 399              }
 400              else
 401              {
 402                  $cache[$string] = array('', $string);
 403              }
 404          }
 405          return $cache[$string];
 406      }
 407  }


Generated: Thu Aug 11 10:00:09 2016 Cross-referenced by PHPXref 0.7.1