URI.php 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. <?php
  2. /**
  3. * HTML Purifier's internal representation of a URI.
  4. * @note
  5. * Internal data-structures are completely escaped. If the data needs
  6. * to be used in a non-URI context (which is very unlikely), be sure
  7. * to decode it first. The URI may not necessarily be well-formed until
  8. * validate() is called.
  9. */
  10. class HTMLPurifier_URI
  11. {
  12. public $scheme, $userinfo, $host, $port, $path, $query, $fragment;
  13. /**
  14. * @note Automatically normalizes scheme and port
  15. */
  16. public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment) {
  17. $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
  18. $this->userinfo = $userinfo;
  19. $this->host = $host;
  20. $this->port = is_null($port) ? $port : (int) $port;
  21. $this->path = $path;
  22. $this->query = $query;
  23. $this->fragment = $fragment;
  24. }
  25. /**
  26. * Retrieves a scheme object corresponding to the URI's scheme/default
  27. * @param $config Instance of HTMLPurifier_Config
  28. * @param $context Instance of HTMLPurifier_Context
  29. * @return Scheme object appropriate for validating this URI
  30. */
  31. public function getSchemeObj($config, $context) {
  32. $registry = HTMLPurifier_URISchemeRegistry::instance();
  33. if ($this->scheme !== null) {
  34. $scheme_obj = $registry->getScheme($this->scheme, $config, $context);
  35. if (!$scheme_obj) return false; // invalid scheme, clean it out
  36. } else {
  37. // no scheme: retrieve the default one
  38. $def = $config->getDefinition('URI');
  39. $scheme_obj = $def->getDefaultScheme($config, $context);
  40. if (!$scheme_obj) {
  41. // something funky happened to the default scheme object
  42. trigger_error(
  43. 'Default scheme object "' . $def->defaultScheme . '" was not readable',
  44. E_USER_WARNING
  45. );
  46. return false;
  47. }
  48. }
  49. return $scheme_obj;
  50. }
  51. /**
  52. * Generic validation method applicable for all schemes. May modify
  53. * this URI in order to get it into a compliant form.
  54. * @param $config Instance of HTMLPurifier_Config
  55. * @param $context Instance of HTMLPurifier_Context
  56. * @return True if validation/filtering succeeds, false if failure
  57. */
  58. public function validate($config, $context) {
  59. // ABNF definitions from RFC 3986
  60. $chars_sub_delims = '!$&\'()*+,;=';
  61. $chars_gen_delims = ':/?#[]@';
  62. $chars_pchar = $chars_sub_delims . ':@';
  63. // validate host
  64. if (!is_null($this->host)) {
  65. $host_def = new HTMLPurifier_AttrDef_URI_Host();
  66. $this->host = $host_def->validate($this->host, $config, $context);
  67. if ($this->host === false) $this->host = null;
  68. }
  69. // validate scheme
  70. // NOTE: It's not appropriate to check whether or not this
  71. // scheme is in our registry, since a URIFilter may convert a
  72. // URI that we don't allow into one we do. So instead, we just
  73. // check if the scheme can be dropped because there is no host
  74. // and it is our default scheme.
  75. if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') {
  76. // support for relative paths is pretty abysmal when the
  77. // scheme is present, so axe it when possible
  78. $def = $config->getDefinition('URI');
  79. if ($def->defaultScheme === $this->scheme) {
  80. $this->scheme = null;
  81. }
  82. }
  83. // validate username
  84. if (!is_null($this->userinfo)) {
  85. $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
  86. $this->userinfo = $encoder->encode($this->userinfo);
  87. }
  88. // validate port
  89. if (!is_null($this->port)) {
  90. if ($this->port < 1 || $this->port > 65535) $this->port = null;
  91. }
  92. // validate path
  93. $path_parts = array();
  94. $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
  95. if (!is_null($this->host)) { // this catches $this->host === ''
  96. // path-abempty (hier and relative)
  97. // http://www.example.com/my/path
  98. // //www.example.com/my/path (looks odd, but works, and
  99. // recognized by most browsers)
  100. // (this set is valid or invalid on a scheme by scheme
  101. // basis, so we'll deal with it later)
  102. // file:///my/path
  103. // ///my/path
  104. $this->path = $segments_encoder->encode($this->path);
  105. } elseif ($this->path !== '') {
  106. if ($this->path[0] === '/') {
  107. // path-absolute (hier and relative)
  108. // http:/my/path
  109. // /my/path
  110. if (strlen($this->path) >= 2 && $this->path[1] === '/') {
  111. // This could happen if both the host gets stripped
  112. // out
  113. // http://my/path
  114. // //my/path
  115. $this->path = '';
  116. } else {
  117. $this->path = $segments_encoder->encode($this->path);
  118. }
  119. } elseif (!is_null($this->scheme)) {
  120. // path-rootless (hier)
  121. // http:my/path
  122. // Short circuit evaluation means we don't need to check nz
  123. $this->path = $segments_encoder->encode($this->path);
  124. } else {
  125. // path-noscheme (relative)
  126. // my/path
  127. // (once again, not checking nz)
  128. $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
  129. $c = strpos($this->path, '/');
  130. if ($c !== false) {
  131. $this->path =
  132. $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
  133. $segments_encoder->encode(substr($this->path, $c));
  134. } else {
  135. $this->path = $segment_nc_encoder->encode($this->path);
  136. }
  137. }
  138. } else {
  139. // path-empty (hier and relative)
  140. $this->path = ''; // just to be safe
  141. }
  142. // qf = query and fragment
  143. $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
  144. if (!is_null($this->query)) {
  145. $this->query = $qf_encoder->encode($this->query);
  146. }
  147. if (!is_null($this->fragment)) {
  148. $this->fragment = $qf_encoder->encode($this->fragment);
  149. }
  150. return true;
  151. }
  152. /**
  153. * Convert URI back to string
  154. * @return String URI appropriate for output
  155. */
  156. public function toString() {
  157. // reconstruct authority
  158. $authority = null;
  159. // there is a rendering difference between a null authority
  160. // (http:foo-bar) and an empty string authority
  161. // (http:///foo-bar).
  162. if (!is_null($this->host)) {
  163. $authority = '';
  164. if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@';
  165. $authority .= $this->host;
  166. if(!is_null($this->port)) $authority .= ':' . $this->port;
  167. }
  168. // Reconstruct the result
  169. // One might wonder about parsing quirks from browsers after
  170. // this reconstruction. Unfortunately, parsing behavior depends
  171. // on what *scheme* was employed (file:///foo is handled *very*
  172. // differently than http:///foo), so unfortunately we have to
  173. // defer to the schemes to do the right thing.
  174. $result = '';
  175. if (!is_null($this->scheme)) $result .= $this->scheme . ':';
  176. if (!is_null($authority)) $result .= '//' . $authority;
  177. $result .= $this->path;
  178. if (!is_null($this->query)) $result .= '?' . $this->query;
  179. if (!is_null($this->fragment)) $result .= '#' . $this->fragment;
  180. return $result;
  181. }
  182. /**
  183. * Returns true if this URL might be considered a 'local' URL given
  184. * the current context. This is true when the host is null, or
  185. * when it matches the host supplied to the configuration.
  186. *
  187. * Note that this does not do any scheme checking, so it is mostly
  188. * only appropriate for metadata that doesn't care about protocol
  189. * security. isBenign is probably what you actually want.
  190. */
  191. public function isLocal($config, $context) {
  192. if ($this->host === null) return true;
  193. $uri_def = $config->getDefinition('URI');
  194. if ($uri_def->host === $this->host) return true;
  195. return false;
  196. }
  197. /**
  198. * Returns true if this URL should be considered a 'benign' URL,
  199. * that is:
  200. *
  201. * - It is a local URL (isLocal), and
  202. * - It has a equal or better level of security
  203. */
  204. public function isBenign($config, $context) {
  205. if (!$this->isLocal($config, $context)) return false;
  206. $scheme_obj = $this->getSchemeObj($config, $context);
  207. if (!$scheme_obj) return false; // conservative approach
  208. $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);
  209. if ($current_scheme_obj->secure) {
  210. if (!$scheme_obj->secure) {
  211. return false;
  212. }
  213. }
  214. return true;
  215. }
  216. }
  217. // vim: et sw=4 sts=4