AttrDef.php 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. <?php
  2. /**
  3. * Base class for all validating attribute definitions.
  4. *
  5. * This family of classes forms the core for not only HTML attribute validation,
  6. * but also any sort of string that needs to be validated or cleaned (which
  7. * means CSS properties and composite definitions are defined here too).
  8. * Besides defining (through code) what precisely makes the string valid,
  9. * subclasses are also responsible for cleaning the code if possible.
  10. */
  11. abstract class HTMLPurifier_AttrDef
  12. {
  13. /**
  14. * Tells us whether or not an HTML attribute is minimized. Has no
  15. * meaning in other contexts.
  16. */
  17. public $minimized = false;
  18. /**
  19. * Tells us whether or not an HTML attribute is required. Has no
  20. * meaning in other contexts
  21. */
  22. public $required = false;
  23. /**
  24. * Validates and cleans passed string according to a definition.
  25. *
  26. * @param $string String to be validated and cleaned.
  27. * @param $config Mandatory HTMLPurifier_Config object.
  28. * @param $context Mandatory HTMLPurifier_AttrContext object.
  29. */
  30. abstract public function validate($string, $config, $context);
  31. /**
  32. * Convenience method that parses a string as if it were CDATA.
  33. *
  34. * This method process a string in the manner specified at
  35. * <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
  36. * leading and trailing whitespace, ignoring line feeds, and replacing
  37. * carriage returns and tabs with spaces. While most useful for HTML
  38. * attributes specified as CDATA, it can also be applied to most CSS
  39. * values.
  40. *
  41. * @note This method is not entirely standards compliant, as trim() removes
  42. * more types of whitespace than specified in the spec. In practice,
  43. * this is rarely a problem, as those extra characters usually have
  44. * already been removed by HTMLPurifier_Encoder.
  45. *
  46. * @warning This processing is inconsistent with XML's whitespace handling
  47. * as specified by section 3.3.3 and referenced XHTML 1.0 section
  48. * 4.7. However, note that we are NOT necessarily
  49. * parsing XML, thus, this behavior may still be correct. We
  50. * assume that newlines have been normalized.
  51. */
  52. public function parseCDATA($string) {
  53. $string = trim($string);
  54. $string = str_replace(array("\n", "\t", "\r"), ' ', $string);
  55. return $string;
  56. }
  57. /**
  58. * Factory method for creating this class from a string.
  59. * @param $string String construction info
  60. * @return Created AttrDef object corresponding to $string
  61. */
  62. public function make($string) {
  63. // default implementation, return a flyweight of this object.
  64. // If $string has an effect on the returned object (i.e. you
  65. // need to overload this method), it is best
  66. // to clone or instantiate new copies. (Instantiation is safer.)
  67. return $this;
  68. }
  69. /**
  70. * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
  71. * properly. THIS IS A HACK!
  72. */
  73. protected function mungeRgb($string) {
  74. return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
  75. }
  76. /**
  77. * Parses a possibly escaped CSS string and returns the "pure"
  78. * version of it.
  79. */
  80. protected function expandCSSEscape($string) {
  81. // flexibly parse it
  82. $ret = '';
  83. for ($i = 0, $c = strlen($string); $i < $c; $i++) {
  84. if ($string[$i] === '\\') {
  85. $i++;
  86. if ($i >= $c) {
  87. $ret .= '\\';
  88. break;
  89. }
  90. if (ctype_xdigit($string[$i])) {
  91. $code = $string[$i];
  92. for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
  93. if (!ctype_xdigit($string[$i])) break;
  94. $code .= $string[$i];
  95. }
  96. // We have to be extremely careful when adding
  97. // new characters, to make sure we're not breaking
  98. // the encoding.
  99. $char = HTMLPurifier_Encoder::unichr(hexdec($code));
  100. if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
  101. $ret .= $char;
  102. if ($i < $c && trim($string[$i]) !== '') $i--;
  103. continue;
  104. }
  105. if ($string[$i] === "\n") continue;
  106. }
  107. $ret .= $string[$i];
  108. }
  109. return $ret;
  110. }
  111. }
  112. // vim: et sw=4 sts=4