[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/lib/spout/src/Spout/Common/Helper/ -> EncodingHelper.php (source)

   1  <?php
   2  
   3  namespace Box\Spout\Common\Helper;
   4  
   5  use Box\Spout\Common\Exception\EncodingConversionException;
   6  
   7  /**
   8   * Class EncodingHelper
   9   * This class provides helper functions to work with encodings.
  10   *
  11   * @package Box\Spout\Common\Helper
  12   */
  13  class EncodingHelper
  14  {
  15      /** Definition of the encodings that can have a BOM */
  16      const ENCODING_UTF8     = 'UTF-8';
  17      const ENCODING_UTF16_LE = 'UTF-16LE';
  18      const ENCODING_UTF16_BE = 'UTF-16BE';
  19      const ENCODING_UTF32_LE = 'UTF-32LE';
  20      const ENCODING_UTF32_BE = 'UTF-32BE';
  21  
  22      /** Definition of the BOMs for the different encodings */
  23      const BOM_UTF8     = "\xEF\xBB\xBF";
  24      const BOM_UTF16_LE = "\xFF\xFE";
  25      const BOM_UTF16_BE = "\xFE\xFF";
  26      const BOM_UTF32_LE = "\xFF\xFE\x00\x00";
  27      const BOM_UTF32_BE = "\x00\x00\xFE\xFF";
  28  
  29      /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
  30      protected $globalFunctionsHelper;
  31  
  32      /** @var array Map representing the encodings supporting BOMs (key) and their associated BOM (value) */
  33      protected $supportedEncodingsWithBom;
  34  
  35      /**
  36       * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
  37       */
  38      public function __construct($globalFunctionsHelper)
  39      {
  40          $this->globalFunctionsHelper = $globalFunctionsHelper;
  41  
  42          $this->supportedEncodingsWithBom = [
  43              self::ENCODING_UTF8     => self::BOM_UTF8,
  44              self::ENCODING_UTF16_LE => self::BOM_UTF16_LE,
  45              self::ENCODING_UTF16_BE => self::BOM_UTF16_BE,
  46              self::ENCODING_UTF32_LE => self::BOM_UTF32_LE,
  47              self::ENCODING_UTF32_BE => self::BOM_UTF32_BE,
  48          ];
  49      }
  50  
  51      /**
  52       * Returns the number of bytes to use as offset in order to skip the BOM.
  53       *
  54       * @param resource $filePointer Pointer to the file to check
  55       * @param string $encoding Encoding of the file to check
  56       * @return int Bytes offset to apply to skip the BOM (0 means no BOM)
  57       */
  58      public function getBytesOffsetToSkipBOM($filePointer, $encoding)
  59      {
  60          $byteOffsetToSkipBom = 0;
  61  
  62          if ($this->hasBom($filePointer, $encoding)) {
  63              $bomUsed = $this->supportedEncodingsWithBom[$encoding];
  64  
  65              // we skip the N first bytes
  66              $byteOffsetToSkipBom = strlen($bomUsed);
  67          }
  68  
  69          return $byteOffsetToSkipBom;
  70      }
  71  
  72      /**
  73       * Returns whether the file identified by the given pointer has a BOM.
  74       *
  75       * @param resource $filePointer Pointer to the file to check
  76       * @param string $encoding Encoding of the file to check
  77       * @return bool TRUE if the file has a BOM, FALSE otherwise
  78       */
  79      protected function hasBOM($filePointer, $encoding)
  80      {
  81          $hasBOM = false;
  82  
  83          $this->globalFunctionsHelper->rewind($filePointer);
  84  
  85          if (array_key_exists($encoding, $this->supportedEncodingsWithBom)) {
  86              $potentialBom = $this->supportedEncodingsWithBom[$encoding];
  87              $numBytesInBom = strlen($potentialBom);
  88  
  89              $hasBOM = ($this->globalFunctionsHelper->fgets($filePointer, $numBytesInBom + 1) === $potentialBom);
  90          }
  91  
  92          return $hasBOM;
  93      }
  94  
  95      /**
  96       * Attempts to convert a non UTF-8 string into UTF-8.
  97       *
  98       * @param string $string Non UTF-8 string to be converted
  99       * @param string $sourceEncoding The encoding used to encode the source string
 100       * @return string The converted, UTF-8 string
 101       * @throws \Box\Spout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed
 102       */
 103      public function attemptConversionToUTF8($string, $sourceEncoding)
 104      {
 105          return $this->attemptConversion($string, $sourceEncoding, self::ENCODING_UTF8);
 106      }
 107  
 108      /**
 109       * Attempts to convert a UTF-8 string into the given encoding.
 110       *
 111       * @param string $string UTF-8 string to be converted
 112       * @param string $targetEncoding The encoding the string should be re-encoded into
 113       * @return string The converted string, encoded with the given encoding
 114       * @throws \Box\Spout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed
 115       */
 116      public function attemptConversionFromUTF8($string, $targetEncoding)
 117      {
 118          return $this->attemptConversion($string, self::ENCODING_UTF8, $targetEncoding);
 119      }
 120  
 121      /**
 122       * Attempts to convert the given string to the given encoding.
 123       * Depending on what is installed on the server, we will try to iconv or mbstring.
 124       *
 125       * @param string $string string to be converted
 126       * @param string $sourceEncoding The encoding used to encode the source string
 127       * @param string $targetEncoding The encoding the string should be re-encoded into
 128       * @return string The converted string, encoded with the given encoding
 129       * @throws \Box\Spout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed
 130       */
 131      protected function attemptConversion($string, $sourceEncoding, $targetEncoding)
 132      {
 133          // if source and target encodings are the same, it's a no-op
 134          if ($sourceEncoding === $targetEncoding) {
 135              return $string;
 136          }
 137  
 138          $convertedString = null;
 139  
 140          if ($this->canUseIconv()) {
 141              $convertedString = $this->globalFunctionsHelper->iconv($string, $sourceEncoding, $targetEncoding);
 142          } else if ($this->canUseMbString()) {
 143              $convertedString = $this->globalFunctionsHelper->mb_convert_encoding($string, $sourceEncoding, $targetEncoding);
 144          } else {
 145              throw new EncodingConversionException("The conversion from $sourceEncoding to $targetEncoding is not supported. Please install \"iconv\" or \"PHP Intl\".");
 146          }
 147  
 148          if ($convertedString === false) {
 149              throw new EncodingConversionException("The conversion from $sourceEncoding to $targetEncoding failed.");
 150          }
 151  
 152          return $convertedString;
 153      }
 154  
 155      /**
 156       * Returns whether "iconv" can be used.
 157       *
 158       * @return bool TRUE if "iconv" is available and can be used, FALSE otherwise
 159       */
 160      protected function canUseIconv()
 161      {
 162          return $this->globalFunctionsHelper->function_exists('iconv');
 163      }
 164  
 165      /**
 166       * Returns whether "mb_string" functions can be used.
 167       * These functions come with the PHP Intl package.
 168       *
 169       * @return bool TRUE if "mb_string" functions are available and can be used, FALSE otherwise
 170       */
 171      protected function canUseMbString()
 172      {
 173          return $this->globalFunctionsHelper->function_exists('mb_convert_encoding');
 174      }
 175  }


Generated: Thu Aug 11 10:00:09 2016 Cross-referenced by PHPXref 0.7.1