URI.php 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. <?php
  2. /**
  3. * HTML Purifier's internal representation of a URI.
  4. * @note
  5. * Internal data-structures are completely escaped. If the data needs
  6. * to be used in a non-URI context (which is very unlikely), be sure
  7. * to decode it first. The URI may not necessarily be well-formed until
  8. * validate() is called.
  9. */
  10. class HTMLPurifier_URI
  11. {
  12. public $scheme, $userinfo, $host, $port, $path, $query, $fragment;
  13. /**
  14. * @note Automatically normalizes scheme and port
  15. */
  16. public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment) {
  17. $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
  18. $this->userinfo = $userinfo;
  19. $this->host = $host;
  20. $this->port = is_null($port) ? $port : (int) $port;
  21. $this->path = $path;
  22. $this->query = $query;
  23. $this->fragment = $fragment;
  24. }
  25. /**
  26. * Retrieves a scheme object corresponding to the URI's scheme/default
  27. * @param $config Instance of HTMLPurifier_Config
  28. * @param $context Instance of HTMLPurifier_Context
  29. * @return Scheme object appropriate for validating this URI
  30. */
  31. public function getSchemeObj($config, $context) {
  32. $registry = HTMLPurifier_URISchemeRegistry::instance();
  33. if ($this->scheme !== null) {
  34. $scheme_obj = $registry->getScheme($this->scheme, $config, $context);
  35. if (!$scheme_obj) return false; // invalid scheme, clean it out
  36. } else {
  37. // no scheme: retrieve the default one
  38. $def = $config->getDefinition('URI');
  39. $scheme_obj = $registry->getScheme($def->defaultScheme, $config, $context);
  40. if (!$scheme_obj) {
  41. // something funky happened to the default scheme object
  42. trigger_error(
  43. 'Default scheme object "' . $def->defaultScheme . '" was not readable',
  44. E_USER_WARNING
  45. );
  46. return false;
  47. }
  48. }
  49. return $scheme_obj;
  50. }
  51. /**
  52. * Generic validation method applicable for all schemes. May modify
  53. * this URI in order to get it into a compliant form.
  54. * @param $config Instance of HTMLPurifier_Config
  55. * @param $context Instance of HTMLPurifier_Context
  56. * @return True if validation/filtering succeeds, false if failure
  57. */
  58. public function validate($config, $context) {
  59. // ABNF definitions from RFC 3986
  60. $chars_sub_delims = '!$&\'()*+,;=';
  61. $chars_gen_delims = ':/?#[]@';
  62. $chars_pchar = $chars_sub_delims . ':@';
  63. // validate scheme (MUST BE FIRST!)
  64. if (!is_null($this->scheme) && is_null($this->host)) {
  65. $def = $config->getDefinition('URI');
  66. if ($def->defaultScheme === $this->scheme) {
  67. $this->scheme = null;
  68. }
  69. }
  70. // validate host
  71. if (!is_null($this->host)) {
  72. $host_def = new HTMLPurifier_AttrDef_URI_Host();
  73. $this->host = $host_def->validate($this->host, $config, $context);
  74. if ($this->host === false) $this->host = null;
  75. }
  76. // validate username
  77. if (!is_null($this->userinfo)) {
  78. $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
  79. $this->userinfo = $encoder->encode($this->userinfo);
  80. }
  81. // validate port
  82. if (!is_null($this->port)) {
  83. if ($this->port < 1 || $this->port > 65535) $this->port = null;
  84. }
  85. // validate path
  86. $path_parts = array();
  87. $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
  88. if (!is_null($this->host)) {
  89. // path-abempty (hier and relative)
  90. $this->path = $segments_encoder->encode($this->path);
  91. } elseif ($this->path !== '' && $this->path[0] === '/') {
  92. // path-absolute (hier and relative)
  93. if (strlen($this->path) >= 2 && $this->path[1] === '/') {
  94. // This shouldn't ever happen!
  95. $this->path = '';
  96. } else {
  97. $this->path = $segments_encoder->encode($this->path);
  98. }
  99. } elseif (!is_null($this->scheme) && $this->path !== '') {
  100. // path-rootless (hier)
  101. // Short circuit evaluation means we don't need to check nz
  102. $this->path = $segments_encoder->encode($this->path);
  103. } elseif (is_null($this->scheme) && $this->path !== '') {
  104. // path-noscheme (relative)
  105. // (once again, not checking nz)
  106. $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
  107. $c = strpos($this->path, '/');
  108. if ($c !== false) {
  109. $this->path =
  110. $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
  111. $segments_encoder->encode(substr($this->path, $c));
  112. } else {
  113. $this->path = $segment_nc_encoder->encode($this->path);
  114. }
  115. } else {
  116. // path-empty (hier and relative)
  117. $this->path = ''; // just to be safe
  118. }
  119. // qf = query and fragment
  120. $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
  121. if (!is_null($this->query)) {
  122. $this->query = $qf_encoder->encode($this->query);
  123. }
  124. if (!is_null($this->fragment)) {
  125. $this->fragment = $qf_encoder->encode($this->fragment);
  126. }
  127. return true;
  128. }
  129. /**
  130. * Convert URI back to string
  131. * @return String URI appropriate for output
  132. */
  133. public function toString() {
  134. // reconstruct authority
  135. $authority = null;
  136. if (!is_null($this->host)) {
  137. $authority = '';
  138. if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@';
  139. $authority .= $this->host;
  140. if(!is_null($this->port)) $authority .= ':' . $this->port;
  141. }
  142. // reconstruct the result
  143. $result = '';
  144. if (!is_null($this->scheme)) $result .= $this->scheme . ':';
  145. if (!is_null($authority)) $result .= '//' . $authority;
  146. $result .= $this->path;
  147. if (!is_null($this->query)) $result .= '?' . $this->query;
  148. if (!is_null($this->fragment)) $result .= '#' . $this->fragment;
  149. return $result;
  150. }
  151. }
  152. // vim: et sw=4 sts=4