[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/lib/spout/src/Spout/Reader/CSV/ -> RowIterator.php (source)

   1  <?php
   2  
   3  namespace Box\Spout\Reader\CSV;
   4  
   5  use Box\Spout\Reader\IteratorInterface;
   6  use Box\Spout\Common\Helper\EncodingHelper;
   7  
   8  /**
   9   * Class RowIterator
  10   * Iterate over CSV rows.
  11   *
  12   * @package Box\Spout\Reader\CSV
  13   */
  14  class RowIterator implements IteratorInterface
  15  {
  16      /**
  17       * If no value is given to fgetcsv(), it defaults to 8192 (which may be too low).
  18       * Alignement with other functions like fgets() is discussed here: https://bugs.php.net/bug.php?id=48421
  19       */
  20      const MAX_READ_BYTES_PER_LINE = 32768;
  21  
  22      /** @var resource Pointer to the CSV file to read */
  23      protected $filePointer;
  24  
  25      /** @var int Number of read rows */
  26      protected $numReadRows = 0;
  27  
  28      /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
  29      protected $rowDataBuffer = null;
  30  
  31      /** @var bool Indicates whether all rows have been read */
  32      protected $hasReachedEndOfFile = false;
  33  
  34      /** @var string Defines the character used to delimit fields (one character only) */
  35      protected $fieldDelimiter;
  36  
  37      /** @var string Defines the character used to enclose fields (one character only) */
  38      protected $fieldEnclosure;
  39  
  40      /** @var string Encoding of the CSV file to be read */
  41      protected $encoding;
  42  
  43      /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
  44      protected $globalFunctionsHelper;
  45  
  46      /** @var \Box\Spout\Common\Helper\EncodingHelper Helper to work with different encodings */
  47      protected $encodingHelper;
  48  
  49      /** @var string End of line delimiter, encoded using the same encoding as the CSV */
  50      protected $encodedEOLDelimiter;
  51  
  52      /** @var string End of line delimiter, given by the user as input. */
  53      protected $inputEOLDelimiter;
  54  
  55      /**
  56       * @param resource $filePointer Pointer to the CSV file to read
  57       * @param string $fieldDelimiter Character that delimits fields
  58       * @param string $fieldEnclosure Character that enclose fields
  59       * @param string $encoding Encoding of the CSV file to be read
  60       * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
  61       */
  62      public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineDelimiter, $globalFunctionsHelper)
  63      {
  64          $this->filePointer = $filePointer;
  65          $this->fieldDelimiter = $fieldDelimiter;
  66          $this->fieldEnclosure = $fieldEnclosure;
  67          $this->encoding = $encoding;
  68          $this->inputEOLDelimiter = $endOfLineDelimiter;
  69          $this->globalFunctionsHelper = $globalFunctionsHelper;
  70  
  71          $this->encodingHelper = new EncodingHelper($globalFunctionsHelper);
  72      }
  73  
  74      /**
  75       * Rewind the Iterator to the first element
  76       * @link http://php.net/manual/en/iterator.rewind.php
  77       *
  78       * @return void
  79       */
  80      public function rewind()
  81      {
  82          $this->rewindAndSkipBom();
  83  
  84          $this->numReadRows = 0;
  85          $this->rowDataBuffer = null;
  86  
  87          $this->next();
  88      }
  89  
  90      /**
  91       * This rewinds and skips the BOM if inserted at the beginning of the file
  92       * by moving the file pointer after it, so that it is not read.
  93       *
  94       * @return void
  95       */
  96      protected function rewindAndSkipBom()
  97      {
  98          $byteOffsetToSkipBom = $this->encodingHelper->getBytesOffsetToSkipBOM($this->filePointer, $this->encoding);
  99  
 100          // sets the cursor after the BOM (0 means no BOM, so rewind it)
 101          $this->globalFunctionsHelper->fseek($this->filePointer, $byteOffsetToSkipBom);
 102      }
 103  
 104      /**
 105       * Checks if current position is valid
 106       * @link http://php.net/manual/en/iterator.valid.php
 107       *
 108       * @return boolean
 109       */
 110      public function valid()
 111      {
 112          return ($this->filePointer && !$this->hasReachedEndOfFile);
 113      }
 114  
 115      /**
 116       * Move forward to next element. Empty rows are skipped.
 117       * @link http://php.net/manual/en/iterator.next.php
 118       *
 119       * @return void
 120       * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
 121       */
 122      public function next()
 123      {
 124          $this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
 125  
 126          if ($this->hasReachedEndOfFile) {
 127              return;
 128          }
 129  
 130          do {
 131              $rowData = $this->getNextUTF8EncodedRow();
 132              $hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
 133          } while (($rowData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($rowData));
 134  
 135          if ($rowData !== false) {
 136              $this->rowDataBuffer = $rowData;
 137              $this->numReadRows++;
 138          } else {
 139              // If we reach this point, it means end of file was reached.
 140              // This happens when the last lines are empty lines.
 141              $this->hasReachedEndOfFile = $hasNowReachedEndOfFile;
 142          }
 143      }
 144  
 145      /**
 146       * Returns the next row, converted if necessary to UTF-8.
 147       * As fgetcsv() does not manage correctly encoding for non UTF-8 data,
 148       * we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes)
 149       *
 150       * @return array|false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
 151       * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
 152       */
 153      protected function getNextUTF8EncodedRow()
 154      {
 155          $encodedRowData = fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure);
 156          if (false === $encodedRowData) {
 157              return false;
 158          }
 159  
 160          foreach ($encodedRowData as $cellIndex => $cellValue) {
 161              switch($this->encoding) {
 162                  case EncodingHelper::ENCODING_UTF16_LE:
 163                  case EncodingHelper::ENCODING_UTF32_LE:
 164                      // remove whitespace from the beginning of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
 165                      $cellValue = ltrim($cellValue);
 166                      break;
 167  
 168                  case EncodingHelper::ENCODING_UTF16_BE:
 169                  case EncodingHelper::ENCODING_UTF32_BE:
 170                      // remove whitespace from the end of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
 171                      $cellValue = rtrim($cellValue);
 172                      break;
 173              }
 174  
 175              $encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->encoding);
 176          }
 177  
 178          return $encodedRowData;
 179      }
 180  
 181      /**
 182       * Returns the end of line delimiter, encoded using the same encoding as the CSV.
 183       * The return value is cached.
 184       *
 185       * @return string
 186       */
 187      protected function getEncodedEOLDelimiter()
 188      {
 189          if (!isset($this->encodedEOLDelimiter)) {
 190              $this->encodedEOLDelimiter = $this->encodingHelper->attemptConversionFromUTF8($this->inputEOLDelimiter, $this->encoding);
 191          }
 192  
 193          return $this->encodedEOLDelimiter;
 194      }
 195  
 196      /**
 197       * @param array $lineData Array containing the cells value for the line
 198       * @return bool Whether the given line is empty
 199       */
 200      protected function isEmptyLine($lineData)
 201      {
 202          return (is_array($lineData) && count($lineData) === 1 && $lineData[0] === null);
 203      }
 204  
 205      /**
 206       * Return the current element from the buffer
 207       * @link http://php.net/manual/en/iterator.current.php
 208       *
 209       * @return array|null
 210       */
 211      public function current()
 212      {
 213          return $this->rowDataBuffer;
 214      }
 215  
 216      /**
 217       * Return the key of the current element
 218       * @link http://php.net/manual/en/iterator.key.php
 219       *
 220       * @return int
 221       */
 222      public function key()
 223      {
 224          return $this->numReadRows;
 225      }
 226  
 227      /**
 228       * Cleans up what was created to iterate over the object.
 229       *
 230       * @return void
 231       */
 232      public function end()
 233      {
 234          // do nothing
 235      }
 236  }


Generated: Thu Aug 11 10:00:09 2016 Cross-referenced by PHPXref 0.7.1