PercentEncoder.php 3.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. <?php
  2. /**
  3. * Class that handles operations involving percent-encoding in URIs.
  4. *
  5. * @warning
  6. * Be careful when reusing instances of PercentEncoder. The object
  7. * you use for normalize() SHOULD NOT be used for encode(), or
  8. * vice-versa.
  9. */
  10. class HTMLPurifier_PercentEncoder
  11. {
  12. /**
  13. * Reserved characters to preserve when using encode().
  14. */
  15. protected $preserve = array();
  16. /**
  17. * String of characters that should be preserved while using encode().
  18. */
  19. public function __construct($preserve = false) {
  20. // unreserved letters, ought to const-ify
  21. for ($i = 48; $i <= 57; $i++) $this->preserve[$i] = true; // digits
  22. for ($i = 65; $i <= 90; $i++) $this->preserve[$i] = true; // upper-case
  23. for ($i = 97; $i <= 122; $i++) $this->preserve[$i] = true; // lower-case
  24. $this->preserve[45] = true; // Dash -
  25. $this->preserve[46] = true; // Period .
  26. $this->preserve[95] = true; // Underscore _
  27. $this->preserve[126]= true; // Tilde ~
  28. // extra letters not to escape
  29. if ($preserve !== false) {
  30. for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
  31. $this->preserve[ord($preserve[$i])] = true;
  32. }
  33. }
  34. }
  35. /**
  36. * Our replacement for urlencode, it encodes all non-reserved characters,
  37. * as well as any extra characters that were instructed to be preserved.
  38. * @note
  39. * Assumes that the string has already been normalized, making any
  40. * and all percent escape sequences valid. Percents will not be
  41. * re-escaped, regardless of their status in $preserve
  42. * @param $string String to be encoded
  43. * @return Encoded string.
  44. */
  45. public function encode($string) {
  46. $ret = '';
  47. for ($i = 0, $c = strlen($string); $i < $c; $i++) {
  48. if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])]) ) {
  49. $ret .= '%' . sprintf('%02X', $int);
  50. } else {
  51. $ret .= $string[$i];
  52. }
  53. }
  54. return $ret;
  55. }
  56. /**
  57. * Fix up percent-encoding by decoding unreserved characters and normalizing.
  58. * @warning This function is affected by $preserve, even though the
  59. * usual desired behavior is for this not to preserve those
  60. * characters. Be careful when reusing instances of PercentEncoder!
  61. * @param $string String to normalize
  62. */
  63. public function normalize($string) {
  64. if ($string == '') return '';
  65. $parts = explode('%', $string);
  66. $ret = array_shift($parts);
  67. foreach ($parts as $part) {
  68. $length = strlen($part);
  69. if ($length < 2) {
  70. $ret .= '%25' . $part;
  71. continue;
  72. }
  73. $encoding = substr($part, 0, 2);
  74. $text = substr($part, 2);
  75. if (!ctype_xdigit($encoding)) {
  76. $ret .= '%25' . $part;
  77. continue;
  78. }
  79. $int = hexdec($encoding);
  80. if (isset($this->preserve[$int])) {
  81. $ret .= chr($int) . $text;
  82. continue;
  83. }
  84. $encoding = strtoupper($encoding);
  85. $ret .= '%' . $encoding . $text;
  86. }
  87. return $ret;
  88. }
  89. }
  90. // vim: et sw=4 sts=4