StringUtils.php 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. <?php
  2. /**
  3. * Zend Framework (http://framework.zend.com/)
  4. *
  5. * @link http://github.com/zendframework/zf2 for the canonical source repository
  6. * @copyright Copyright (c) 2005-2014 Zend Technologies USA Inc. (http://www.zend.com)
  7. * @license http://framework.zend.com/license/new-bsd New BSD License
  8. */
  9. namespace Zend\Stdlib;
  10. use Zend\Stdlib\StringWrapper\StringWrapperInterface;
  11. /**
  12. * Utility class for handling strings of different character encodings
  13. * using available PHP extensions.
  14. *
  15. * Declared abstract, as we have no need for instantiation.
  16. */
  17. abstract class StringUtils
  18. {
  19. /**
  20. * Ordered list of registered string wrapper instances
  21. *
  22. * @var StringWrapperInterface[]
  23. */
  24. protected static $wrapperRegistry = null;
  25. /**
  26. * A list of known single-byte character encodings (upper-case)
  27. *
  28. * @var string[]
  29. */
  30. protected static $singleByteEncodings = array(
  31. 'ASCII', '7BIT', '8BIT',
  32. 'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4', 'ISO-8859-5',
  33. 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9', 'ISO-8859-10',
  34. 'ISO-8859-11', 'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'ISO-8859-16',
  35. 'CP-1251', 'CP-1252',
  36. // TODO
  37. );
  38. /**
  39. * Is PCRE compiled with Unicode support?
  40. *
  41. * @var bool
  42. **/
  43. protected static $hasPcreUnicodeSupport = null;
  44. /**
  45. * Get registered wrapper classes
  46. *
  47. * @return string[]
  48. */
  49. public static function getRegisteredWrappers()
  50. {
  51. if (static::$wrapperRegistry === null) {
  52. static::$wrapperRegistry = array();
  53. if (extension_loaded('intl')) {
  54. static::$wrapperRegistry[] = 'Zend\Stdlib\StringWrapper\Intl';
  55. }
  56. if (extension_loaded('mbstring')) {
  57. static::$wrapperRegistry[] = 'Zend\Stdlib\StringWrapper\MbString';
  58. }
  59. if (extension_loaded('iconv')) {
  60. static::$wrapperRegistry[] = 'Zend\Stdlib\StringWrapper\Iconv';
  61. }
  62. static::$wrapperRegistry[] = 'Zend\Stdlib\StringWrapper\Native';
  63. }
  64. return static::$wrapperRegistry;
  65. }
  66. /**
  67. * Register a string wrapper class
  68. *
  69. * @param string $wrapper
  70. * @return void
  71. */
  72. public static function registerWrapper($wrapper)
  73. {
  74. $wrapper = (string) $wrapper;
  75. if (!in_array($wrapper, static::$wrapperRegistry, true)) {
  76. static::$wrapperRegistry[] = $wrapper;
  77. }
  78. }
  79. /**
  80. * Unregister a string wrapper class
  81. *
  82. * @param string $wrapper
  83. * @return void
  84. */
  85. public static function unregisterWrapper($wrapper)
  86. {
  87. $index = array_search((string) $wrapper, static::$wrapperRegistry, true);
  88. if ($index !== false) {
  89. unset(static::$wrapperRegistry[$index]);
  90. }
  91. }
  92. /**
  93. * Reset all registered wrappers so the default wrappers will be used
  94. *
  95. * @return void
  96. */
  97. public static function resetRegisteredWrappers()
  98. {
  99. static::$wrapperRegistry = null;
  100. }
  101. /**
  102. * Get the first string wrapper supporting the given character encoding
  103. * and supports to convert into the given convert encoding.
  104. *
  105. * @param string $encoding Character encoding to support
  106. * @param string|null $convertEncoding OPTIONAL character encoding to convert in
  107. * @return StringWrapperInterface
  108. * @throws Exception\RuntimeException If no wrapper supports given character encodings
  109. */
  110. public static function getWrapper($encoding = 'UTF-8', $convertEncoding = null)
  111. {
  112. foreach (static::getRegisteredWrappers() as $wrapperClass) {
  113. if ($wrapperClass::isSupported($encoding, $convertEncoding)) {
  114. $wrapper = new $wrapperClass($encoding, $convertEncoding);
  115. $wrapper->setEncoding($encoding, $convertEncoding);
  116. return $wrapper;
  117. }
  118. }
  119. throw new Exception\RuntimeException(
  120. 'No wrapper found supporting "' . $encoding . '"'
  121. . (($convertEncoding !== null) ? ' and "' . $convertEncoding . '"' : '')
  122. );
  123. }
  124. /**
  125. * Get a list of all known single-byte character encodings
  126. *
  127. * @return string[]
  128. */
  129. public static function getSingleByteEncodings()
  130. {
  131. return static::$singleByteEncodings;
  132. }
  133. /**
  134. * Check if a given encoding is a known single-byte character encoding
  135. *
  136. * @param string $encoding
  137. * @return bool
  138. */
  139. public static function isSingleByteEncoding($encoding)
  140. {
  141. return in_array(strtoupper($encoding), static::$singleByteEncodings);
  142. }
  143. /**
  144. * Check if a given string is valid UTF-8 encoded
  145. *
  146. * @param string $str
  147. * @return bool
  148. */
  149. public static function isValidUtf8($str)
  150. {
  151. return is_string($str) && ($str === '' || preg_match('/^./su', $str) == 1);
  152. }
  153. /**
  154. * Is PCRE compiled with Unicode support?
  155. *
  156. * @return bool
  157. */
  158. public static function hasPcreUnicodeSupport()
  159. {
  160. if (static::$hasPcreUnicodeSupport === null) {
  161. ErrorHandler::start();
  162. static::$hasPcreUnicodeSupport = defined('PREG_BAD_UTF8_OFFSET_ERROR') && preg_match('/\pL/u', 'a') == 1;
  163. ErrorHandler::stop();
  164. }
  165. return static::$hasPcreUnicodeSupport;
  166. }
  167. }