[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/lib/spout/src/Spout/Reader/XLSX/Helper/SharedStringsCaching/ -> CachingStrategyFactory.php (source)

   1  <?php
   2  
   3  namespace Box\Spout\Reader\XLSX\Helper\SharedStringsCaching;
   4  
   5  /**
   6   * Class CachingStrategyFactory
   7   *
   8   * @package Box\Spout\Reader\XLSX\Helper\SharedStringsCaching
   9   */
  10  class CachingStrategyFactory
  11  {
  12      /**
  13       * The memory amount needed to store a string was obtained empirically from this data:
  14       *
  15       *        ------------------------------------
  16       *        | Number of chars⁺ | Memory needed |
  17       *        ------------------------------------
  18       *        |           3,000  |         1 MB  |
  19       *        |          15,000  |         2 MB  |
  20       *        |          30,000  |         5 MB  |
  21       *        |          75,000  |        11 MB  |
  22       *        |         150,000  |        21 MB  |
  23       *        |         300,000  |        43 MB  |
  24       *        |         750,000  |       105 MB  |
  25       *        |       1,500,000  |       210 MB  |
  26       *        |       2,250,000  |       315 MB  |
  27       *        |       3,000,000  |       420 MB  |
  28       *        |       4,500,000  |       630 MB  |
  29       *        ------------------------------------
  30       *
  31       *        ⁺ All characters were 1 byte long
  32       *
  33       * This gives a linear graph where each 1-byte character requires about 150 bytes to be stored.
  34       * Given that some characters can take up to 4 bytes, we need 600 bytes per character to be safe.
  35       * Also, there is on average about 20 characters per cell (this is entirely empirical data...).
  36       *
  37       * This means that in order to store one shared string in memory, the memory amount needed is:
  38       *   => 20 * 600 ≈ 12KB
  39       */
  40      const AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB = 12;
  41  
  42      /**
  43       * To avoid running out of memory when extracting a huge number of shared strings, they can be saved to temporary files
  44       * instead of in memory. Then, when accessing a string, the corresponding file contents will be loaded in memory
  45       * and the string will be quickly retrieved.
  46       * The performance bottleneck is not when creating these temporary files, but rather when loading their content.
  47       * Because the contents of the last loaded file stays in memory until another file needs to be loaded, it works
  48       * best when the indexes of the shared strings are sorted in the sheet data.
  49       * 10,000 was chosen because it creates small files that are fast to be loaded in memory.
  50       */
  51      const MAX_NUM_STRINGS_PER_TEMP_FILE = 10000;
  52  
  53      /** @var CachingStrategyFactory|null Singleton instance */
  54      protected static $instance = null;
  55  
  56      /**
  57       * Private constructor for singleton
  58       */
  59      private function __construct()
  60      {
  61      }
  62  
  63      /**
  64       * Returns the singleton instance of the factory
  65       *
  66       * @return CachingStrategyFactory
  67       */
  68      public static function getInstance()
  69      {
  70          if (self::$instance === null) {
  71              self::$instance = new CachingStrategyFactory();
  72          }
  73  
  74          return self::$instance;
  75      }
  76  
  77      /**
  78       * Returns the best caching strategy, given the number of unique shared strings
  79       * and the amount of memory available.
  80       *
  81       * @param int $sharedStringsUniqueCount Number of unique shared strings
  82       * @param string|void $tempFolder Temporary folder where the temporary files to store shared strings will be stored
  83       * @return CachingStrategyInterface The best caching strategy
  84       */
  85      public function getBestCachingStrategy($sharedStringsUniqueCount, $tempFolder = null)
  86      {
  87          if ($this->isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)) {
  88              return new InMemoryStrategy($sharedStringsUniqueCount);
  89          } else {
  90              return new FileBasedStrategy($tempFolder, self::MAX_NUM_STRINGS_PER_TEMP_FILE);
  91          }
  92      }
  93  
  94      /**
  95       * Returns whether it is safe to use in-memory caching, given the number of unique shared strings
  96       * and the amount of memory available.
  97       *
  98       * @param int $sharedStringsUniqueCount Number of unique shared strings
  99       * @return bool
 100       */
 101      protected function isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)
 102      {
 103          $memoryAvailable = $this->getMemoryLimitInKB();
 104  
 105          if ($memoryAvailable === -1) {
 106              // if cannot get memory limit or if memory limit set as unlimited, don't trust and play safe
 107              return ($sharedStringsUniqueCount < self::MAX_NUM_STRINGS_PER_TEMP_FILE);
 108          } else {
 109              $memoryNeeded = $sharedStringsUniqueCount * self::AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB;
 110              return ($memoryAvailable > $memoryNeeded);
 111          }
 112      }
 113  
 114      /**
 115       * Returns the PHP "memory_limit" in Kilobytes
 116       *
 117       * @return float
 118       */
 119      protected function getMemoryLimitInKB()
 120      {
 121          $memoryLimitFormatted = $this->getMemoryLimitFromIni();
 122          $memoryLimitFormatted = strtolower(trim($memoryLimitFormatted));
 123  
 124          // No memory limit
 125          if ($memoryLimitFormatted === '-1') {
 126              return -1;
 127          }
 128  
 129          if (preg_match('/(\d+)([bkmgt])b?/', $memoryLimitFormatted, $matches)) {
 130              $amount = intval($matches[1]);
 131              $unit = $matches[2];
 132  
 133              switch ($unit) {
 134                  case 'b': return ($amount / 1024);
 135                  case 'k': return $amount;
 136                  case 'm': return ($amount * 1024);
 137                  case 'g': return ($amount * 1024 * 1024);
 138                  case 't': return ($amount * 1024 * 1024 * 1024);
 139              }
 140          }
 141  
 142          return -1;
 143      }
 144  
 145      /**
 146       * Returns the formatted "memory_limit" value
 147       *
 148       * @return string
 149       */
 150      protected function getMemoryLimitFromIni()
 151      {
 152          return ini_get('memory_limit');
 153      }
 154  }


Generated: Thu Aug 11 10:00:09 2016 Cross-referenced by PHPXref 0.7.1