123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207 |
- <?php
- /**
- * Abstract class for a set of proprietary modules that clean up (tidy)
- * poorly written HTML.
- * @todo Figure out how to protect some of these methods/properties
- */
- class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
- {
- /**
- * List of supported levels. Index zero is a special case "no fixes"
- * level.
- */
- public $levels = array(0 => 'none', 'light', 'medium', 'heavy');
- /**
- * Default level to place all fixes in. Disabled by default
- */
- public $defaultLevel = null;
- /**
- * Lists of fixes used by getFixesForLevel(). Format is:
- * HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2');
- */
- public $fixesForLevel = array(
- 'light' => array(),
- 'medium' => array(),
- 'heavy' => array()
- );
- /**
- * Lazy load constructs the module by determining the necessary
- * fixes to create and then delegating to the populate() function.
- * @todo Wildcard matching and error reporting when an added or
- * subtracted fix has no effect.
- */
- public function setup($config) {
- // create fixes, initialize fixesForLevel
- $fixes = $this->makeFixes();
- $this->makeFixesForLevel($fixes);
- // figure out which fixes to use
- $level = $config->get('HTML.TidyLevel');
- $fixes_lookup = $this->getFixesForLevel($level);
- // get custom fix declarations: these need namespace processing
- $add_fixes = $config->get('HTML.TidyAdd');
- $remove_fixes = $config->get('HTML.TidyRemove');
- foreach ($fixes as $name => $fix) {
- // needs to be refactored a little to implement globbing
- if (
- isset($remove_fixes[$name]) ||
- (!isset($add_fixes[$name]) && !isset($fixes_lookup[$name]))
- ) {
- unset($fixes[$name]);
- }
- }
- // populate this module with necessary fixes
- $this->populate($fixes);
- }
- /**
- * Retrieves all fixes per a level, returning fixes for that specific
- * level as well as all levels below it.
- * @param $level String level identifier, see $levels for valid values
- * @return Lookup up table of fixes
- */
- public function getFixesForLevel($level) {
- if ($level == $this->levels[0]) {
- return array();
- }
- $activated_levels = array();
- for ($i = 1, $c = count($this->levels); $i < $c; $i++) {
- $activated_levels[] = $this->levels[$i];
- if ($this->levels[$i] == $level) break;
- }
- if ($i == $c) {
- trigger_error(
- 'Tidy level ' . htmlspecialchars($level) . ' not recognized',
- E_USER_WARNING
- );
- return array();
- }
- $ret = array();
- foreach ($activated_levels as $level) {
- foreach ($this->fixesForLevel[$level] as $fix) {
- $ret[$fix] = true;
- }
- }
- return $ret;
- }
- /**
- * Dynamically populates the $fixesForLevel member variable using
- * the fixes array. It may be custom overloaded, used in conjunction
- * with $defaultLevel, or not used at all.
- */
- public function makeFixesForLevel($fixes) {
- if (!isset($this->defaultLevel)) return;
- if (!isset($this->fixesForLevel[$this->defaultLevel])) {
- trigger_error(
- 'Default level ' . $this->defaultLevel . ' does not exist',
- E_USER_ERROR
- );
- return;
- }
- $this->fixesForLevel[$this->defaultLevel] = array_keys($fixes);
- }
- /**
- * Populates the module with transforms and other special-case code
- * based on a list of fixes passed to it
- * @param $lookup Lookup table of fixes to activate
- */
- public function populate($fixes) {
- foreach ($fixes as $name => $fix) {
- // determine what the fix is for
- list($type, $params) = $this->getFixType($name);
- switch ($type) {
- case 'attr_transform_pre':
- case 'attr_transform_post':
- $attr = $params['attr'];
- if (isset($params['element'])) {
- $element = $params['element'];
- if (empty($this->info[$element])) {
- $e = $this->addBlankElement($element);
- } else {
- $e = $this->info[$element];
- }
- } else {
- $type = "info_$type";
- $e = $this;
- }
- // PHP does some weird parsing when I do
- // $e->$type[$attr], so I have to assign a ref.
- $f =& $e->$type;
- $f[$attr] = $fix;
- break;
- case 'tag_transform':
- $this->info_tag_transform[$params['element']] = $fix;
- break;
- case 'child':
- case 'content_model_type':
- $element = $params['element'];
- if (empty($this->info[$element])) {
- $e = $this->addBlankElement($element);
- } else {
- $e = $this->info[$element];
- }
- $e->$type = $fix;
- break;
- default:
- trigger_error("Fix type $type not supported", E_USER_ERROR);
- break;
- }
- }
- }
- /**
- * Parses a fix name and determines what kind of fix it is, as well
- * as other information defined by the fix
- * @param $name String name of fix
- * @return array(string $fix_type, array $fix_parameters)
- * @note $fix_parameters is type dependant, see populate() for usage
- * of these parameters
- */
- public function getFixType($name) {
- // parse it
- $property = $attr = null;
- if (strpos($name, '#') !== false) list($name, $property) = explode('#', $name);
- if (strpos($name, '@') !== false) list($name, $attr) = explode('@', $name);
- // figure out the parameters
- $params = array();
- if ($name !== '') $params['element'] = $name;
- if (!is_null($attr)) $params['attr'] = $attr;
- // special case: attribute transform
- if (!is_null($attr)) {
- if (is_null($property)) $property = 'pre';
- $type = 'attr_transform_' . $property;
- return array($type, $params);
- }
- // special case: tag transform
- if (is_null($property)) {
- return array('tag_transform', $params);
- }
- return array($property, $params);
- }
- /**
- * Defines all fixes the module will perform in a compact
- * associative array of fix name to fix implementation.
- */
- public function makeFixes() {}
- }
- // vim: et sw=4 sts=4
|