[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/lib/tcpdf/ -> tcpdf_parser.php (source)

   1  <?php
   2  //============================================================+
   3  // File name   : tcpdf_parser.php
   4  // Version     : 1.0.16
   5  // Begin       : 2011-05-23
   6  // Last Update : 2015-04-28
   7  // Author      : Nicola Asuni - Tecnick.com LTD - www.tecnick.com - info@tecnick.com
   8  // License     : http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT GNU-LGPLv3
   9  // -------------------------------------------------------------------
  10  // Copyright (C) 2011-2015 Nicola Asuni - Tecnick.com LTD
  11  //
  12  // This file is part of TCPDF software library.
  13  //
  14  // TCPDF is free software: you can redistribute it and/or modify it
  15  // under the terms of the GNU Lesser General Public License as
  16  // published by the Free Software Foundation, either version 3 of the
  17  // License, or (at your option) any later version.
  18  //
  19  // TCPDF is distributed in the hope that it will be useful, but
  20  // WITHOUT ANY WARRANTY; without even the implied warranty of
  21  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  22  // See the GNU Lesser General Public License for more details.
  23  //
  24  // You should have received a copy of the License
  25  // along with TCPDF. If not, see
  26  // <http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT>.
  27  //
  28  // See LICENSE.TXT file for more information.
  29  // -------------------------------------------------------------------
  30  //
  31  // Description : This is a PHP class for parsing PDF documents.
  32  //
  33  //============================================================+
  34  
  35  /**
  36   * @file
  37   * This is a PHP class for parsing PDF documents.<br>
  38   * @package com.tecnick.tcpdf
  39   * @author Nicola Asuni
  40   * @version 1.0.15
  41   */
  42  
  43  // include class for decoding filters
  44  require_once(dirname(__FILE__).'/include/tcpdf_filters.php');
  45  
  46  /**
  47   * @class TCPDF_PARSER
  48   * This is a PHP class for parsing PDF documents.<br>
  49   * @package com.tecnick.tcpdf
  50   * @brief This is a PHP class for parsing PDF documents..
  51   * @version 1.0.15
  52   * @author Nicola Asuni - info@tecnick.com
  53   */
  54  class TCPDF_PARSER {
  55  
  56      /**
  57       * Raw content of the PDF document.
  58       * @private
  59       */
  60      private $pdfdata = '';
  61  
  62      /**
  63       * XREF data.
  64       * @protected
  65       */
  66      protected $xref = array();
  67  
  68      /**
  69       * Array of PDF objects.
  70       * @protected
  71       */
  72      protected $objects = array();
  73  
  74      /**
  75       * Class object for decoding filters.
  76       * @private
  77       */
  78      private $FilterDecoders;
  79  
  80      /**
  81       * Array of configuration parameters.
  82       * @private
  83       */
  84      private $cfg = array(
  85          'die_for_errors' => false,
  86          'ignore_filter_decoding_errors' => true,
  87          'ignore_missing_filter_decoders' => true,
  88      );
  89  
  90  // -----------------------------------------------------------------------------
  91  
  92      /**
  93       * Parse a PDF document an return an array of objects.
  94       * @param $data (string) PDF data to parse.
  95       * @param $cfg (array) Array of configuration parameters:
  96       *             'die_for_errors' : if true termitate the program execution in case of error, otherwise thows an exception;
  97       *             'ignore_filter_decoding_errors' : if true ignore filter decoding errors;
  98       *             'ignore_missing_filter_decoders' : if true ignore missing filter decoding errors.
  99       * @public
 100       * @since 1.0.000 (2011-05-24)
 101       */
 102  	public function __construct($data, $cfg=array()) {
 103          if (empty($data)) {
 104              $this->Error('Empty PDF data.');
 105          }
 106          // find the pdf header starting position
 107          if (($trimpos = strpos($data, '%PDF-')) === FALSE) {
 108              $this->Error('Invalid PDF data: missing %PDF header.');
 109          }
 110          // get PDF content string
 111          $this->pdfdata = substr($data, $trimpos);
 112          // get length
 113          $pdflen = strlen($this->pdfdata);
 114          // set configuration parameters
 115          $this->setConfig($cfg);
 116          // get xref and trailer data
 117          $this->xref = $this->getXrefData();
 118          // parse all document objects
 119          $this->objects = array();
 120          foreach ($this->xref['xref'] as $obj => $offset) {
 121              if (!isset($this->objects[$obj]) AND ($offset > 0)) {
 122                  // decode objects with positive offset
 123                  $this->objects[$obj] = $this->getIndirectObject($obj, $offset, true);
 124              }
 125          }
 126          // release some memory
 127          unset($this->pdfdata);
 128          $this->pdfdata = '';
 129      }
 130  
 131      /**
 132       * Set the configuration parameters.
 133       * @param $cfg (array) Array of configuration parameters:
 134       *             'die_for_errors' : if true termitate the program execution in case of error, otherwise thows an exception;
 135       *             'ignore_filter_decoding_errors' : if true ignore filter decoding errors;
 136       *             'ignore_missing_filter_decoders' : if true ignore missing filter decoding errors.
 137       * @public
 138       */
 139  	protected function setConfig($cfg) {
 140          if (isset($cfg['die_for_errors'])) {
 141              $this->cfg['die_for_errors'] = !!$cfg['die_for_errors'];
 142          }
 143          if (isset($cfg['ignore_filter_decoding_errors'])) {
 144              $this->cfg['ignore_filter_decoding_errors'] = !!$cfg['ignore_filter_decoding_errors'];
 145          }
 146          if (isset($cfg['ignore_missing_filter_decoders'])) {
 147              $this->cfg['ignore_missing_filter_decoders'] = !!$cfg['ignore_missing_filter_decoders'];
 148          }
 149      }
 150  
 151      /**
 152       * Return an array of parsed PDF document objects.
 153       * @return (array) Array of parsed PDF document objects.
 154       * @public
 155       * @since 1.0.000 (2011-06-26)
 156       */
 157  	public function getParsedData() {
 158          return array($this->xref, $this->objects);
 159      }
 160  
 161      /**
 162       * Get Cross-Reference (xref) table and trailer data from PDF document data.
 163       * @param $offset (int) xref offset (if know).
 164       * @param $xref (array) previous xref array (if any).
 165       * @return Array containing xref and trailer data.
 166       * @protected
 167       * @since 1.0.000 (2011-05-24)
 168       */
 169  	protected function getXrefData($offset=0, $xref=array()) {
 170          if ($offset == 0) {
 171              // find last startxref
 172              if (preg_match_all('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_SET_ORDER, $offset) == 0) {
 173                  $this->Error('Unable to find startxref');
 174              }
 175              $matches = array_pop($matches);
 176              $startxref = $matches[1];
 177          } elseif (strpos($this->pdfdata, 'xref', $offset) == $offset) {
 178              // Already pointing at the xref table
 179              $startxref = $offset;
 180          } elseif (preg_match('/([0-9]+[\s][0-9]+[\s]obj)/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) {
 181              // Cross-Reference Stream object
 182              $startxref = $offset;
 183          } elseif (preg_match('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) {
 184              // startxref found
 185              $startxref = $matches[1][0];
 186          } else {
 187              $this->Error('Unable to find startxref');
 188          }
 189          // check xref position
 190          if (strpos($this->pdfdata, 'xref', $startxref) == $startxref) {
 191              // Cross-Reference
 192              $xref = $this->decodeXref($startxref, $xref);
 193          } else {
 194              // Cross-Reference Stream
 195              $xref = $this->decodeXrefStream($startxref, $xref);
 196          }
 197          if (empty($xref)) {
 198              $this->Error('Unable to find xref');
 199          }
 200          return $xref;
 201      }
 202  
 203      /**
 204       * Decode the Cross-Reference section
 205       * @param $startxref (int) Offset at which the xref section starts (position of the 'xref' keyword).
 206       * @param $xref (array) Previous xref array (if any).
 207       * @return Array containing xref and trailer data.
 208       * @protected
 209       * @since 1.0.000 (2011-06-20)
 210       */
 211  	protected function decodeXref($startxref, $xref=array()) {
 212          $startxref += 4; // 4 is the length of the word 'xref'
 213          // skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP)
 214          $offset = $startxref + strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $startxref);
 215          // initialize object number
 216          $obj_num = 0;
 217          // search for cross-reference entries or subsection
 218          while (preg_match('/([0-9]+)[\x20]([0-9]+)[\x20]?([nf]?)(\r\n|[\x20]?[\r\n])/', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
 219              if ($matches[0][1] != $offset) {
 220                  // we are on another section
 221                  break;
 222              }
 223              $offset += strlen($matches[0][0]);
 224              if ($matches[3][0] == 'n') {
 225                  // create unique object index: [object number]_[generation number]
 226                  $index = $obj_num.'_'.intval($matches[2][0]);
 227                  // check if object already exist
 228                  if (!isset($xref['xref'][$index])) {
 229                      // store object offset position
 230                      $xref['xref'][$index] = intval($matches[1][0]);
 231                  }
 232                  ++$obj_num;
 233              } elseif ($matches[3][0] == 'f') {
 234                  ++$obj_num;
 235              } else {
 236                  // object number (index)
 237                  $obj_num = intval($matches[1][0]);
 238              }
 239          }
 240          // get trailer data
 241          if (preg_match('/trailer[\s]*<<(.*)>>/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
 242              $trailer_data = $matches[1][0];
 243              if (!isset($xref['trailer']) OR empty($xref['trailer'])) {
 244                  // get only the last updated version
 245                  $xref['trailer'] = array();
 246                  // parse trailer_data
 247                  if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
 248                      $xref['trailer']['size'] = intval($matches[1]);
 249                  }
 250                  if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
 251                      $xref['trailer']['root'] = intval($matches[1]).'_'.intval($matches[2]);
 252                  }
 253                  if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
 254                      $xref['trailer']['encrypt'] = intval($matches[1]).'_'.intval($matches[2]);
 255                  }
 256                  if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
 257                      $xref['trailer']['info'] = intval($matches[1]).'_'.intval($matches[2]);
 258                  }
 259                  if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) {
 260                      $xref['trailer']['id'] = array();
 261                      $xref['trailer']['id'][0] = $matches[1];
 262                      $xref['trailer']['id'][1] = $matches[2];
 263                  }
 264              }
 265              if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
 266                  // get previous xref
 267                  $xref = $this->getXrefData(intval($matches[1]), $xref);
 268              }
 269          } else {
 270              $this->Error('Unable to find trailer');
 271          }
 272          return $xref;
 273      }
 274  
 275      /**
 276       * Decode the Cross-Reference Stream section
 277       * @param $startxref (int) Offset at which the xref section starts.
 278       * @param $xref (array) Previous xref array (if any).
 279       * @return Array containing xref and trailer data.
 280       * @protected
 281       * @since 1.0.003 (2013-03-16)
 282       */
 283  	protected function decodeXrefStream($startxref, $xref=array()) {
 284          // try to read Cross-Reference Stream
 285          $xrefobj = $this->getRawObject($startxref);
 286          $xrefcrs = $this->getIndirectObject($xrefobj[1], $startxref, true);
 287          if (!isset($xref['trailer']) OR empty($xref['trailer'])) {
 288              // get only the last updated version
 289              $xref['trailer'] = array();
 290              $filltrailer = true;
 291          } else {
 292              $filltrailer = false;
 293          }
 294          if (!isset($xref['xref'])) {
 295              $xref['xref'] = array();
 296          }
 297          $valid_crs = false;
 298          $columns = 0;
 299          $sarr = $xrefcrs[0][1];
 300          if (!is_array($sarr)) {
 301              $sarr = array();
 302          }
 303          foreach ($sarr as $k => $v) {
 304              if (($v[0] == '/') AND ($v[1] == 'Type') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == '/') AND ($sarr[($k +1)][1] == 'XRef'))) {
 305                  $valid_crs = true;
 306              } elseif (($v[0] == '/') AND ($v[1] == 'Index') AND (isset($sarr[($k +1)]))) {
 307                  // first object number in the subsection
 308                  $index_first = intval($sarr[($k +1)][1][0][1]);
 309                  // number of entries in the subsection
 310                  $index_entries = intval($sarr[($k +1)][1][1][1]);
 311              } elseif (($v[0] == '/') AND ($v[1] == 'Prev') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'numeric'))) {
 312                  // get previous xref offset
 313                  $prevxref = intval($sarr[($k +1)][1]);
 314              } elseif (($v[0] == '/') AND ($v[1] == 'W') AND (isset($sarr[($k +1)]))) {
 315                  // number of bytes (in the decoded stream) of the corresponding field
 316                  $wb = array();
 317                  $wb[0] = intval($sarr[($k +1)][1][0][1]);
 318                  $wb[1] = intval($sarr[($k +1)][1][1][1]);
 319                  $wb[2] = intval($sarr[($k +1)][1][2][1]);
 320              } elseif (($v[0] == '/') AND ($v[1] == 'DecodeParms') AND (isset($sarr[($k +1)][1]))) {
 321                  $decpar = $sarr[($k +1)][1];
 322                  foreach ($decpar as $kdc => $vdc) {
 323                      if (($vdc[0] == '/') AND ($vdc[1] == 'Columns') AND (isset($decpar[($kdc +1)]) AND ($decpar[($kdc +1)][0] == 'numeric'))) {
 324                          $columns = intval($decpar[($kdc +1)][1]);
 325                      } elseif (($vdc[0] == '/') AND ($vdc[1] == 'Predictor') AND (isset($decpar[($kdc +1)]) AND ($decpar[($kdc +1)][0] == 'numeric'))) {
 326                          $predictor = intval($decpar[($kdc +1)][1]);
 327                      }
 328                  }
 329              } elseif ($filltrailer) {
 330                  if (($v[0] == '/') AND ($v[1] == 'Size') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'numeric'))) {
 331                      $xref['trailer']['size'] = $sarr[($k +1)][1];
 332                  } elseif (($v[0] == '/') AND ($v[1] == 'Root') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'objref'))) {
 333                      $xref['trailer']['root'] = $sarr[($k +1)][1];
 334                  } elseif (($v[0] == '/') AND ($v[1] == 'Info') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'objref'))) {
 335                      $xref['trailer']['info'] = $sarr[($k +1)][1];
 336                  } elseif (($v[0] == '/') AND ($v[1] == 'Encrypt') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'objref'))) {
 337                      $xref['trailer']['encrypt'] = $sarr[($k +1)][1];
 338                  } elseif (($v[0] == '/') AND ($v[1] == 'ID') AND (isset($sarr[($k +1)]))) {
 339                      $xref['trailer']['id'] = array();
 340                      $xref['trailer']['id'][0] = $sarr[($k +1)][1][0][1];
 341                      $xref['trailer']['id'][1] = $sarr[($k +1)][1][1][1];
 342                  }
 343              }
 344          }
 345          // decode data
 346          if ($valid_crs AND isset($xrefcrs[1][3][0])) {
 347              // number of bytes in a row
 348              $rowlen = ($columns + 1);
 349              // convert the stream into an array of integers
 350              $sdata = unpack('C*', $xrefcrs[1][3][0]);
 351              // split the rows
 352              $sdata = array_chunk($sdata, $rowlen);
 353              // initialize decoded array
 354              $ddata = array();
 355              // initialize first row with zeros
 356              $prev_row = array_fill (0, $rowlen, 0);
 357              // for each row apply PNG unpredictor
 358              foreach ($sdata as $k => $row) {
 359                  // initialize new row
 360                  $ddata[$k] = array();
 361                  // get PNG predictor value
 362                  $predictor = (10 + $row[0]);
 363                  // for each byte on the row
 364                  for ($i=1; $i<=$columns; ++$i) {
 365                      // new index
 366                      $j = ($i - 1);
 367                      $row_up = $prev_row[$j];
 368                      if ($i == 1) {
 369                          $row_left = 0;
 370                          $row_upleft = 0;
 371                      } else {
 372                          $row_left = $row[($i - 1)];
 373                          $row_upleft = $prev_row[($j - 1)];
 374                      }
 375                      switch ($predictor) {
 376                          case 10: { // PNG prediction (on encoding, PNG None on all rows)
 377                              $ddata[$k][$j] = $row[$i];
 378                              break;
 379                          }
 380                          case 11: { // PNG prediction (on encoding, PNG Sub on all rows)
 381                              $ddata[$k][$j] = (($row[$i] + $row_left) & 0xff);
 382                              break;
 383                          }
 384                          case 12: { // PNG prediction (on encoding, PNG Up on all rows)
 385                              $ddata[$k][$j] = (($row[$i] + $row_up) & 0xff);
 386                              break;
 387                          }
 388                          case 13: { // PNG prediction (on encoding, PNG Average on all rows)
 389                              $ddata[$k][$j] = (($row[$i] + (($row_left + $row_up) / 2)) & 0xff);
 390                              break;
 391                          }
 392                          case 14: { // PNG prediction (on encoding, PNG Paeth on all rows)
 393                              // initial estimate
 394                              $p = ($row_left + $row_up - $row_upleft);
 395                              // distances
 396                              $pa = abs($p - $row_left);
 397                              $pb = abs($p - $row_up);
 398                              $pc = abs($p - $row_upleft);
 399                              $pmin = min($pa, $pb, $pc);
 400                              // return minimum distance
 401                              switch ($pmin) {
 402                                  case $pa: {
 403                                      $ddata[$k][$j] = (($row[$i] + $row_left) & 0xff);
 404                                      break;
 405                                  }
 406                                  case $pb: {
 407                                      $ddata[$k][$j] = (($row[$i] + $row_up) & 0xff);
 408                                      break;
 409                                  }
 410                                  case $pc: {
 411                                      $ddata[$k][$j] = (($row[$i] + $row_upleft) & 0xff);
 412                                      break;
 413                                  }
 414                              }
 415                              break;
 416                          }
 417                          default: { // PNG prediction (on encoding, PNG optimum)
 418                              $this->Error('Unknown PNG predictor');
 419                              break;
 420                          }
 421                      }
 422                  }
 423                  $prev_row = $ddata[$k];
 424              } // end for each row
 425              // complete decoding
 426              $sdata = array();
 427              // for every row
 428              foreach ($ddata as $k => $row) {
 429                  // initialize new row
 430                  $sdata[$k] = array(0, 0, 0);
 431                  if ($wb[0] == 0) {
 432                      // default type field
 433                      $sdata[$k][0] = 1;
 434                  }
 435                  $i = 0; // count bytes in the row
 436                  // for every column
 437                  for ($c = 0; $c < 3; ++$c) {
 438                      // for every byte on the column
 439                      for ($b = 0; $b < $wb[$c]; ++$b) {
 440                          if (isset($row[$i])) {
 441                              $sdata[$k][$c] += ($row[$i] << (($wb[$c] - 1 - $b) * 8));
 442                          }
 443                          ++$i;
 444                      }
 445                  }
 446              }
 447              $ddata = array();
 448              // fill xref
 449              if (isset($index_first)) {
 450                  $obj_num = $index_first;
 451              } else {
 452                  $obj_num = 0;
 453              }
 454              foreach ($sdata as $k => $row) {
 455                  switch ($row[0]) {
 456                      case 0: { // (f) linked list of free objects
 457                          break;
 458                      }
 459                      case 1: { // (n) objects that are in use but are not compressed
 460                          // create unique object index: [object number]_[generation number]
 461                          $index = $obj_num.'_'.$row[2];
 462                          // check if object already exist
 463                          if (!isset($xref['xref'][$index])) {
 464                              // store object offset position
 465                              $xref['xref'][$index] = $row[1];
 466                          }
 467                          break;
 468                      }
 469                      case 2: { // compressed objects
 470                          // $row[1] = object number of the object stream in which this object is stored
 471                          // $row[2] = index of this object within the object stream
 472                          $index = $row[1].'_0_'.$row[2];
 473                          $xref['xref'][$index] = -1;
 474                          break;
 475                      }
 476                      default: { // null objects
 477                          break;
 478                      }
 479                  }
 480                  ++$obj_num;
 481              }
 482          } // end decoding data
 483          if (isset($prevxref)) {
 484              // get previous xref
 485              $xref = $this->getXrefData($prevxref, $xref);
 486          }
 487          return $xref;
 488      }
 489  
 490      /**
 491       * Get object type, raw value and offset to next object
 492       * @param $offset (int) Object offset.
 493       * @return array containing object type, raw value and offset to next object
 494       * @protected
 495       * @since 1.0.000 (2011-06-20)
 496       */
 497  	protected function getRawObject($offset=0) {
 498          $objtype = ''; // object type to be returned
 499          $objval = ''; // object value to be returned
 500          // skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP)
 501          $offset += strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $offset);
 502          // get first char
 503          $char = $this->pdfdata[$offset];
 504          // get object type
 505          switch ($char) {
 506              case '%': { // \x25 PERCENT SIGN
 507                  // skip comment and search for next token
 508                  $next = strcspn($this->pdfdata, "\r\n", $offset);
 509                  if ($next > 0) {
 510                      $offset += $next;
 511                      return $this->getRawObject($offset);
 512                  }
 513                  break;
 514              }
 515              case '/': { // \x2F SOLIDUS
 516                  // name object
 517                  $objtype = $char;
 518                  ++$offset;
 519                  if (preg_match('/^([^\x00\x09\x0a\x0c\x0d\x20\s\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+)/', substr($this->pdfdata, $offset, 256), $matches) == 1) {
 520                      $objval = $matches[1]; // unescaped value
 521                      $offset += strlen($objval);
 522                  }
 523                  break;
 524              }
 525              case '(':   // \x28 LEFT PARENTHESIS
 526              case ')': { // \x29 RIGHT PARENTHESIS
 527                  // literal string object
 528                  $objtype = $char;
 529                  ++$offset;
 530                  $strpos = $offset;
 531                  if ($char == '(') {
 532                      $open_bracket = 1;
 533                      while ($open_bracket > 0) {
 534                          if (!isset($this->pdfdata{$strpos})) {
 535                              break;
 536                          }
 537                          $ch = $this->pdfdata{$strpos};
 538                          switch ($ch) {
 539                              case '\\': { // REVERSE SOLIDUS (5Ch) (Backslash)
 540                                  // skip next character
 541                                  ++$strpos;
 542                                  break;
 543                              }
 544                              case '(': { // LEFT PARENHESIS (28h)
 545                                  ++$open_bracket;
 546                                  break;
 547                              }
 548                              case ')': { // RIGHT PARENTHESIS (29h)
 549                                  --$open_bracket;
 550                                  break;
 551                              }
 552                          }
 553                          ++$strpos;
 554                      }
 555                      $objval = substr($this->pdfdata, $offset, ($strpos - $offset - 1));
 556                      $offset = $strpos;
 557                  }
 558                  break;
 559              }
 560              case '[':   // \x5B LEFT SQUARE BRACKET
 561              case ']': { // \x5D RIGHT SQUARE BRACKET
 562                  // array object
 563                  $objtype = $char;
 564                  ++$offset;
 565                  if ($char == '[') {
 566                      // get array content
 567                      $objval = array();
 568                      do {
 569                          // get element
 570                          $element = $this->getRawObject($offset);
 571                          $offset = $element[2];
 572                          $objval[] = $element;
 573                      } while ($element[0] != ']');
 574                      // remove closing delimiter
 575                      array_pop($objval);
 576                  }
 577                  break;
 578              }
 579              case '<':   // \x3C LESS-THAN SIGN
 580              case '>': { // \x3E GREATER-THAN SIGN
 581                  if (isset($this->pdfdata{($offset + 1)}) AND ($this->pdfdata{($offset + 1)} == $char)) {
 582                      // dictionary object
 583                      $objtype = $char.$char;
 584                      $offset += 2;
 585                      if ($char == '<') {
 586                          // get array content
 587                          $objval = array();
 588                          do {
 589                              // get element
 590                              $element = $this->getRawObject($offset);
 591                              $offset = $element[2];
 592                              $objval[] = $element;
 593                          } while ($element[0] != '>>');
 594                          // remove closing delimiter
 595                          array_pop($objval);
 596                      }
 597                  } else {
 598                      // hexadecimal string object
 599                      $objtype = $char;
 600                      ++$offset;
 601                      if (($char == '<') AND (preg_match('/^([0-9A-Fa-f\x09\x0a\x0c\x0d\x20]+)>/iU', substr($this->pdfdata, $offset), $matches) == 1)) {
 602                          // remove white space characters
 603                          $objval = strtr($matches[1], "\x09\x0a\x0c\x0d\x20", '');
 604                          $offset += strlen($matches[0]);
 605                      } elseif (($endpos = strpos($this->pdfdata, '>', $offset)) !== FALSE) {
 606                          $offset = $endpos + 1;
 607                      }
 608                  }
 609                  break;
 610              }
 611              default: {
 612                  if (substr($this->pdfdata, $offset, 6) == 'endobj') {
 613                      // indirect object
 614                      $objtype = 'endobj';
 615                      $offset += 6;
 616                  } elseif (substr($this->pdfdata, $offset, 4) == 'null') {
 617                      // null object
 618                      $objtype = 'null';
 619                      $offset += 4;
 620                      $objval = 'null';
 621                  } elseif (substr($this->pdfdata, $offset, 4) == 'true') {
 622                      // boolean true object
 623                      $objtype = 'boolean';
 624                      $offset += 4;
 625                      $objval = 'true';
 626                  } elseif (substr($this->pdfdata, $offset, 5) == 'false') {
 627                      // boolean false object
 628                      $objtype = 'boolean';
 629                      $offset += 5;
 630                      $objval = 'false';
 631                  } elseif (substr($this->pdfdata, $offset, 6) == 'stream') {
 632                      // start stream object
 633                      $objtype = 'stream';
 634                      $offset += 6;
 635                      if (preg_match('/^([\r]?[\n])/isU', substr($this->pdfdata, $offset), $matches) == 1) {
 636                          $offset += strlen($matches[0]);
 637                          if (preg_match('/(endstream)[\x09\x0a\x0c\x0d\x20]/isU', substr($this->pdfdata, $offset), $matches, PREG_OFFSET_CAPTURE) == 1) {
 638                              $objval = substr($this->pdfdata, $offset, $matches[0][1]);
 639                              $offset += $matches[1][1];
 640                          }
 641                      }
 642                  } elseif (substr($this->pdfdata, $offset, 9) == 'endstream') {
 643                      // end stream object
 644                      $objtype = 'endstream';
 645                      $offset += 9;
 646                  } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) {
 647                      // indirect object reference
 648                      $objtype = 'objref';
 649                      $offset += strlen($matches[0]);
 650                      $objval = intval($matches[1]).'_'.intval($matches[2]);
 651                  } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) {
 652                      // object start
 653                      $objtype = 'obj';
 654                      $objval = intval($matches[1]).'_'.intval($matches[2]);
 655                      $offset += strlen ($matches[0]);
 656                  } elseif (($numlen = strspn($this->pdfdata, '+-.0123456789', $offset)) > 0) {
 657                      // numeric object
 658                      $objtype = 'numeric';
 659                      $objval = substr($this->pdfdata, $offset, $numlen);
 660                      $offset += $numlen;
 661                  }
 662                  break;
 663              }
 664          }
 665          return array($objtype, $objval, $offset);
 666      }
 667  
 668      /**
 669       * Get content of indirect object.
 670       * @param $obj_ref (string) Object number and generation number separated by underscore character.
 671       * @param $offset (int) Object offset.
 672       * @param $decoding (boolean) If true decode streams.
 673       * @return array containing object data.
 674       * @protected
 675       * @since 1.0.000 (2011-05-24)
 676       */
 677  	protected function getIndirectObject($obj_ref, $offset=0, $decoding=true) {
 678          $obj = explode('_', $obj_ref);
 679          if (($obj === false) OR (count($obj) != 2)) {
 680              $this->Error('Invalid object reference: '.$obj);
 681              return;
 682          }
 683          $objref = $obj[0].' '.$obj[1].' obj';
 684          // ignore leading zeros
 685          $offset += strspn($this->pdfdata, '0', $offset);
 686          if (strpos($this->pdfdata, $objref, $offset) != $offset) {
 687              // an indirect reference to an undefined object shall be considered a reference to the null object
 688              return array('null', 'null', $offset);
 689          }
 690          // starting position of object content
 691          $offset += strlen($objref);
 692          // get array of object content
 693          $objdata = array();
 694          $i = 0; // object main index
 695          do {
 696              $oldoffset = $offset;
 697                          // get element
 698              $element = $this->getRawObject($offset);
 699              $offset = $element[2];
 700              // decode stream using stream's dictionary information
 701              if ($decoding AND ($element[0] == 'stream') AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == '<<')) {
 702                  $element[3] = $this->decodeStream($objdata[($i - 1)][1], $element[1]);
 703              }
 704              $objdata[$i] = $element;
 705              ++$i;
 706          } while (($element[0] != 'endobj') AND ($offset != $oldoffset));
 707          // remove closing delimiter
 708          array_pop($objdata);
 709          // return raw object content
 710          return $objdata;
 711      }
 712  
 713      /**
 714       * Get the content of object, resolving indect object reference if necessary.
 715       * @param $obj (string) Object value.
 716       * @return array containing object data.
 717       * @protected
 718       * @since 1.0.000 (2011-06-26)
 719       */
 720  	protected function getObjectVal($obj) {
 721          if ($obj[0] == 'objref') {
 722              // reference to indirect object
 723              if (isset($this->objects[$obj[1]])) {
 724                  // this object has been already parsed
 725                  return $this->objects[$obj[1]];
 726              } elseif (isset($this->xref[$obj[1]])) {
 727                  // parse new object
 728                  $this->objects[$obj[1]] = $this->getIndirectObject($obj[1], $this->xref[$obj[1]], false);
 729                  return $this->objects[$obj[1]];
 730              }
 731          }
 732          return $obj;
 733      }
 734  
 735      /**
 736       * Decode the specified stream.
 737       * @param $sdic (array) Stream's dictionary array.
 738       * @param $stream (string) Stream to decode.
 739       * @return array containing decoded stream data and remaining filters.
 740       * @protected
 741       * @since 1.0.000 (2011-06-22)
 742       */
 743  	protected function decodeStream($sdic, $stream) {
 744          // get stream length and filters
 745          $slength = strlen($stream);
 746          if ($slength <= 0) {
 747              return array('', array());
 748          }
 749          $filters = array();
 750          foreach ($sdic as $k => $v) {
 751              if ($v[0] == '/') {
 752                  if (($v[1] == 'Length') AND (isset($sdic[($k + 1)])) AND ($sdic[($k + 1)][0] == 'numeric')) {
 753                      // get declared stream length
 754                      $declength = intval($sdic[($k + 1)][1]);
 755                      if ($declength < $slength) {
 756                          $stream = substr($stream, 0, $declength);
 757                          $slength = $declength;
 758                      }
 759                  } elseif (($v[1] == 'Filter') AND (isset($sdic[($k + 1)]))) {
 760                      // resolve indirect object
 761                      $objval = $this->getObjectVal($sdic[($k + 1)]);
 762                      if ($objval[0] == '/') {
 763                          // single filter
 764                          $filters[] = $objval[1];
 765                      } elseif ($objval[0] == '[') {
 766                          // array of filters
 767                          foreach ($objval[1] as $flt) {
 768                              if ($flt[0] == '/') {
 769                                  $filters[] = $flt[1];
 770                              }
 771                          }
 772                      }
 773                  }
 774              }
 775          }
 776          // decode the stream
 777          $remaining_filters = array();
 778          foreach ($filters as $filter) {
 779              if (in_array($filter, TCPDF_FILTERS::getAvailableFilters())) {
 780                  try {
 781                      $stream = TCPDF_FILTERS::decodeFilter($filter, $stream);
 782                  } catch (Exception $e) {
 783                      $emsg = $e->getMessage();
 784                      if ((($emsg[0] == '~') AND !$this->cfg['ignore_missing_filter_decoders'])
 785                          OR (($emsg[0] != '~') AND !$this->cfg['ignore_filter_decoding_errors'])) {
 786                          $this->Error($e->getMessage());
 787                      }
 788                  }
 789              } else {
 790                  // add missing filter to array
 791                  $remaining_filters[] = $filter;
 792              }
 793          }
 794          return array($stream, $remaining_filters);
 795      }
 796  
 797      /**
 798       * Throw an exception or print an error message and die if the K_TCPDF_PARSER_THROW_EXCEPTION_ERROR constant is set to true.
 799       * @param $msg (string) The error message
 800       * @public
 801       * @since 1.0.000 (2011-05-23)
 802       */
 803  	public function Error($msg) {
 804          if ($this->cfg['die_for_errors']) {
 805              die('<strong>TCPDF_PARSER ERROR: </strong>'.$msg);
 806          } else {
 807              throw new Exception('TCPDF_PARSER ERROR: '.$msg);
 808          }
 809      }
 810  
 811  } // END OF TCPDF_PARSER CLASS
 812  
 813  //============================================================+
 814  // END OF FILE
 815  //============================================================+


Generated: Thu Aug 11 10:00:09 2016 Cross-referenced by PHPXref 0.7.1