Unescaper.php 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. * (c) Fabien Potencier <fabien@symfony.com>
  5. *
  6. * For the full copyright and license information, please view the LICENSE
  7. * file that was distributed with this source code.
  8. */
  9. namespace Symfony\Component\Yaml;
  10. /**
  11. * Unescaper encapsulates unescaping rules for single and double-quoted
  12. * YAML strings.
  13. *
  14. * @author Matthew Lewinski <matthew@lewinski.org>
  15. */
  16. class Unescaper
  17. {
  18. // Parser and Inline assume UTF-8 encoding, so escaped Unicode characters
  19. // must be converted to that encoding.
  20. const ENCODING = 'UTF-8';
  21. // Regex fragment that matches an escaped character in a double quoted
  22. // string.
  23. const REGEX_ESCAPED_CHARACTER = "\\\\([0abt\tnvfre \\\"\\/\\\\N_LP]|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})";
  24. /**
  25. * Unescapes a single quoted string.
  26. *
  27. * @param string $value A single quoted string.
  28. *
  29. * @return string The unescaped string.
  30. */
  31. public function unescapeSingleQuotedString($value)
  32. {
  33. return str_replace('\'\'', '\'', $value);
  34. }
  35. /**
  36. * Unescapes a double quoted string.
  37. *
  38. * @param string $value A double quoted string.
  39. *
  40. * @return string The unescaped string.
  41. */
  42. public function unescapeDoubleQuotedString($value)
  43. {
  44. $self = $this;
  45. $callback = function($match) use ($self) {
  46. return $self->unescapeCharacter($match[0]);
  47. };
  48. // evaluate the string
  49. return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value);
  50. }
  51. /**
  52. * Unescapes a character that was found in a double-quoted string
  53. *
  54. * @param string $value An escaped character
  55. *
  56. * @return string The unescaped character
  57. */
  58. public function unescapeCharacter($value)
  59. {
  60. switch ($value{1}) {
  61. case '0':
  62. return "\x0";
  63. case 'a':
  64. return "\x7";
  65. case 'b':
  66. return "\x8";
  67. case 't':
  68. return "\t";
  69. case "\t":
  70. return "\t";
  71. case 'n':
  72. return "\n";
  73. case 'v':
  74. return "\xb";
  75. case 'f':
  76. return "\xc";
  77. case 'r':
  78. return "\xd";
  79. case 'e':
  80. return "\x1b";
  81. case ' ':
  82. return ' ';
  83. case '"':
  84. return '"';
  85. case '/':
  86. return '/';
  87. case '\\':
  88. return '\\';
  89. case 'N':
  90. // U+0085 NEXT LINE
  91. return $this->convertEncoding("\x00\x85", self::ENCODING, 'UCS-2BE');
  92. case '_':
  93. // U+00A0 NO-BREAK SPACE
  94. return $this->convertEncoding("\x00\xA0", self::ENCODING, 'UCS-2BE');
  95. case 'L':
  96. // U+2028 LINE SEPARATOR
  97. return $this->convertEncoding("\x20\x28", self::ENCODING, 'UCS-2BE');
  98. case 'P':
  99. // U+2029 PARAGRAPH SEPARATOR
  100. return $this->convertEncoding("\x20\x29", self::ENCODING, 'UCS-2BE');
  101. case 'x':
  102. $char = pack('n', hexdec(substr($value, 2, 2)));
  103. return $this->convertEncoding($char, self::ENCODING, 'UCS-2BE');
  104. case 'u':
  105. $char = pack('n', hexdec(substr($value, 2, 4)));
  106. return $this->convertEncoding($char, self::ENCODING, 'UCS-2BE');
  107. case 'U':
  108. $char = pack('N', hexdec(substr($value, 2, 8)));
  109. return $this->convertEncoding($char, self::ENCODING, 'UCS-4BE');
  110. }
  111. }
  112. /**
  113. * Convert a string from one encoding to another.
  114. *
  115. * @param string $value The string to convert
  116. * @param string $to The input encoding
  117. * @param string $from The output encoding
  118. *
  119. * @return string The string with the new encoding
  120. *
  121. * @throws RuntimeException if no suitable encoding function is found (iconv or mbstring)
  122. */
  123. private function convertEncoding($value, $to, $from)
  124. {
  125. if (function_exists('mb_convert_encoding')) {
  126. return mb_convert_encoding($value, $to, $from);
  127. } elseif (function_exists('iconv')) {
  128. return iconv($from, $to, $value);
  129. }
  130. throw new RuntimeException('No suitable convert encoding function (install the iconv or mbstring extension).');
  131. }
  132. }