Tidy.php 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. <?php
  2. /**
  3. * Abstract class for a set of proprietary modules that clean up (tidy)
  4. * poorly written HTML.
  5. * @todo Figure out how to protect some of these methods/properties
  6. */
  7. class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
  8. {
  9. /**
  10. * List of supported levels. Index zero is a special case "no fixes"
  11. * level.
  12. */
  13. public $levels = array(0 => 'none', 'light', 'medium', 'heavy');
  14. /**
  15. * Default level to place all fixes in. Disabled by default
  16. */
  17. public $defaultLevel = null;
  18. /**
  19. * Lists of fixes used by getFixesForLevel(). Format is:
  20. * HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2');
  21. */
  22. public $fixesForLevel = array(
  23. 'light' => array(),
  24. 'medium' => array(),
  25. 'heavy' => array()
  26. );
  27. /**
  28. * Lazy load constructs the module by determining the necessary
  29. * fixes to create and then delegating to the populate() function.
  30. * @todo Wildcard matching and error reporting when an added or
  31. * subtracted fix has no effect.
  32. */
  33. public function setup($config) {
  34. // create fixes, initialize fixesForLevel
  35. $fixes = $this->makeFixes();
  36. $this->makeFixesForLevel($fixes);
  37. // figure out which fixes to use
  38. $level = $config->get('HTML.TidyLevel');
  39. $fixes_lookup = $this->getFixesForLevel($level);
  40. // get custom fix declarations: these need namespace processing
  41. $add_fixes = $config->get('HTML.TidyAdd');
  42. $remove_fixes = $config->get('HTML.TidyRemove');
  43. foreach ($fixes as $name => $fix) {
  44. // needs to be refactored a little to implement globbing
  45. if (
  46. isset($remove_fixes[$name]) ||
  47. (!isset($add_fixes[$name]) && !isset($fixes_lookup[$name]))
  48. ) {
  49. unset($fixes[$name]);
  50. }
  51. }
  52. // populate this module with necessary fixes
  53. $this->populate($fixes);
  54. }
  55. /**
  56. * Retrieves all fixes per a level, returning fixes for that specific
  57. * level as well as all levels below it.
  58. * @param $level String level identifier, see $levels for valid values
  59. * @return Lookup up table of fixes
  60. */
  61. public function getFixesForLevel($level) {
  62. if ($level == $this->levels[0]) {
  63. return array();
  64. }
  65. $activated_levels = array();
  66. for ($i = 1, $c = count($this->levels); $i < $c; $i++) {
  67. $activated_levels[] = $this->levels[$i];
  68. if ($this->levels[$i] == $level) break;
  69. }
  70. if ($i == $c) {
  71. trigger_error(
  72. 'Tidy level ' . htmlspecialchars($level) . ' not recognized',
  73. E_USER_WARNING
  74. );
  75. return array();
  76. }
  77. $ret = array();
  78. foreach ($activated_levels as $level) {
  79. foreach ($this->fixesForLevel[$level] as $fix) {
  80. $ret[$fix] = true;
  81. }
  82. }
  83. return $ret;
  84. }
  85. /**
  86. * Dynamically populates the $fixesForLevel member variable using
  87. * the fixes array. It may be custom overloaded, used in conjunction
  88. * with $defaultLevel, or not used at all.
  89. */
  90. public function makeFixesForLevel($fixes) {
  91. if (!isset($this->defaultLevel)) return;
  92. if (!isset($this->fixesForLevel[$this->defaultLevel])) {
  93. trigger_error(
  94. 'Default level ' . $this->defaultLevel . ' does not exist',
  95. E_USER_ERROR
  96. );
  97. return;
  98. }
  99. $this->fixesForLevel[$this->defaultLevel] = array_keys($fixes);
  100. }
  101. /**
  102. * Populates the module with transforms and other special-case code
  103. * based on a list of fixes passed to it
  104. * @param $lookup Lookup table of fixes to activate
  105. */
  106. public function populate($fixes) {
  107. foreach ($fixes as $name => $fix) {
  108. // determine what the fix is for
  109. list($type, $params) = $this->getFixType($name);
  110. switch ($type) {
  111. case 'attr_transform_pre':
  112. case 'attr_transform_post':
  113. $attr = $params['attr'];
  114. if (isset($params['element'])) {
  115. $element = $params['element'];
  116. if (empty($this->info[$element])) {
  117. $e = $this->addBlankElement($element);
  118. } else {
  119. $e = $this->info[$element];
  120. }
  121. } else {
  122. $type = "info_$type";
  123. $e = $this;
  124. }
  125. // PHP does some weird parsing when I do
  126. // $e->$type[$attr], so I have to assign a ref.
  127. $f =& $e->$type;
  128. $f[$attr] = $fix;
  129. break;
  130. case 'tag_transform':
  131. $this->info_tag_transform[$params['element']] = $fix;
  132. break;
  133. case 'child':
  134. case 'content_model_type':
  135. $element = $params['element'];
  136. if (empty($this->info[$element])) {
  137. $e = $this->addBlankElement($element);
  138. } else {
  139. $e = $this->info[$element];
  140. }
  141. $e->$type = $fix;
  142. break;
  143. default:
  144. trigger_error("Fix type $type not supported", E_USER_ERROR);
  145. break;
  146. }
  147. }
  148. }
  149. /**
  150. * Parses a fix name and determines what kind of fix it is, as well
  151. * as other information defined by the fix
  152. * @param $name String name of fix
  153. * @return array(string $fix_type, array $fix_parameters)
  154. * @note $fix_parameters is type dependant, see populate() for usage
  155. * of these parameters
  156. */
  157. public function getFixType($name) {
  158. // parse it
  159. $property = $attr = null;
  160. if (strpos($name, '#') !== false) list($name, $property) = explode('#', $name);
  161. if (strpos($name, '@') !== false) list($name, $attr) = explode('@', $name);
  162. // figure out the parameters
  163. $params = array();
  164. if ($name !== '') $params['element'] = $name;
  165. if (!is_null($attr)) $params['attr'] = $attr;
  166. // special case: attribute transform
  167. if (!is_null($attr)) {
  168. if (is_null($property)) $property = 'pre';
  169. $type = 'attr_transform_' . $property;
  170. return array($type, $params);
  171. }
  172. // special case: tag transform
  173. if (is_null($property)) {
  174. return array('tag_transform', $params);
  175. }
  176. return array($property, $params);
  177. }
  178. /**
  179. * Defines all fixes the module will perform in a compact
  180. * associative array of fix name to fix implementation.
  181. */
  182. public function makeFixes() {}
  183. }
  184. // vim: et sw=4 sts=4