Host.php 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. <?php
  2. /**
  3. * Validates a host according to the IPv4, IPv6 and DNS (future) specifications.
  4. */
  5. class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
  6. {
  7. /**
  8. * Instance of HTMLPurifier_AttrDef_URI_IPv4 sub-validator
  9. */
  10. protected $ipv4;
  11. /**
  12. * Instance of HTMLPurifier_AttrDef_URI_IPv6 sub-validator
  13. */
  14. protected $ipv6;
  15. public function __construct() {
  16. $this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();
  17. $this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();
  18. }
  19. public function validate($string, $config, $context) {
  20. $length = strlen($string);
  21. // empty hostname is OK; it's usually semantically equivalent:
  22. // the default host as defined by a URI scheme is used:
  23. //
  24. // If the URI scheme defines a default for host, then that
  25. // default applies when the host subcomponent is undefined
  26. // or when the registered name is empty (zero length).
  27. if ($string === '') return '';
  28. if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') {
  29. //IPv6
  30. $ip = substr($string, 1, $length - 2);
  31. $valid = $this->ipv6->validate($ip, $config, $context);
  32. if ($valid === false) return false;
  33. return '['. $valid . ']';
  34. }
  35. // need to do checks on unusual encodings too
  36. $ipv4 = $this->ipv4->validate($string, $config, $context);
  37. if ($ipv4 !== false) return $ipv4;
  38. // A regular domain name.
  39. // This doesn't match I18N domain names, but we don't have proper IRI support,
  40. // so force users to insert Punycode.
  41. // The productions describing this are:
  42. $a = '[a-z]'; // alpha
  43. $an = '[a-z0-9]'; // alphanum
  44. $and = '[a-z0-9-]'; // alphanum | "-"
  45. // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
  46. $domainlabel = "$an($and*$an)?";
  47. // toplabel = alpha | alpha *( alphanum | "-" ) alphanum
  48. $toplabel = "$a($and*$an)?";
  49. // hostname = *( domainlabel "." ) toplabel [ "." ]
  50. if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
  51. return $string;
  52. }
  53. // If we have Net_IDNA2 support, we can support IRIs by
  54. // punycoding them. (This is the most portable thing to do,
  55. // since otherwise we have to assume browsers support
  56. if ($config->get('Core.EnableIDNA')) {
  57. $idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
  58. // we need to encode each period separately
  59. $parts = explode('.', $string);
  60. try {
  61. $new_parts = array();
  62. foreach ($parts as $part) {
  63. $encodable = false;
  64. for ($i = 0, $c = strlen($part); $i < $c; $i++) {
  65. if (ord($part[$i]) > 0x7a) {
  66. $encodable = true;
  67. break;
  68. }
  69. }
  70. if (!$encodable) {
  71. $new_parts[] = $part;
  72. } else {
  73. $new_parts[] = $idna->encode($part);
  74. }
  75. }
  76. $string = implode('.', $new_parts);
  77. if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
  78. return $string;
  79. }
  80. } catch (Exception $e) {
  81. // XXX error reporting
  82. }
  83. }
  84. return false;
  85. }
  86. }
  87. // vim: et sw=4 sts=4