HTMLModuleManager.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. <?php
  2. class HTMLPurifier_HTMLModuleManager
  3. {
  4. /**
  5. * Instance of HTMLPurifier_DoctypeRegistry
  6. */
  7. public $doctypes;
  8. /**
  9. * Instance of current doctype
  10. */
  11. public $doctype;
  12. /**
  13. * Instance of HTMLPurifier_AttrTypes
  14. */
  15. public $attrTypes;
  16. /**
  17. * Active instances of modules for the specified doctype are
  18. * indexed, by name, in this array.
  19. */
  20. public $modules = array();
  21. /**
  22. * Array of recognized HTMLPurifier_Module instances, indexed by
  23. * module's class name. This array is usually lazy loaded, but a
  24. * user can overload a module by pre-emptively registering it.
  25. */
  26. public $registeredModules = array();
  27. /**
  28. * List of extra modules that were added by the user using addModule().
  29. * These get unconditionally merged into the current doctype, whatever
  30. * it may be.
  31. */
  32. public $userModules = array();
  33. /**
  34. * Associative array of element name to list of modules that have
  35. * definitions for the element; this array is dynamically filled.
  36. */
  37. public $elementLookup = array();
  38. /** List of prefixes we should use for registering small names */
  39. public $prefixes = array('HTMLPurifier_HTMLModule_');
  40. public $contentSets; /**< Instance of HTMLPurifier_ContentSets */
  41. public $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */
  42. /** If set to true, unsafe elements and attributes will be allowed */
  43. public $trusted = false;
  44. public function __construct() {
  45. // editable internal objects
  46. $this->attrTypes = new HTMLPurifier_AttrTypes();
  47. $this->doctypes = new HTMLPurifier_DoctypeRegistry();
  48. // setup basic modules
  49. $common = array(
  50. 'CommonAttributes', 'Text', 'Hypertext', 'List',
  51. 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
  52. 'StyleAttribute',
  53. // Unsafe:
  54. 'Scripting', 'Object', 'Forms',
  55. // Sorta legacy, but present in strict:
  56. 'Name',
  57. );
  58. $transitional = array('Legacy', 'Target', 'Iframe');
  59. $xml = array('XMLCommonAttributes');
  60. $non_xml = array('NonXMLCommonAttributes');
  61. // setup basic doctypes
  62. $this->doctypes->register(
  63. 'HTML 4.01 Transitional', false,
  64. array_merge($common, $transitional, $non_xml),
  65. array('Tidy_Transitional', 'Tidy_Proprietary'),
  66. array(),
  67. '-//W3C//DTD HTML 4.01 Transitional//EN',
  68. 'http://www.w3.org/TR/html4/loose.dtd'
  69. );
  70. $this->doctypes->register(
  71. 'HTML 4.01 Strict', false,
  72. array_merge($common, $non_xml),
  73. array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
  74. array(),
  75. '-//W3C//DTD HTML 4.01//EN',
  76. 'http://www.w3.org/TR/html4/strict.dtd'
  77. );
  78. $this->doctypes->register(
  79. 'XHTML 1.0 Transitional', true,
  80. array_merge($common, $transitional, $xml, $non_xml),
  81. array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'),
  82. array(),
  83. '-//W3C//DTD XHTML 1.0 Transitional//EN',
  84. 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
  85. );
  86. $this->doctypes->register(
  87. 'XHTML 1.0 Strict', true,
  88. array_merge($common, $xml, $non_xml),
  89. array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
  90. array(),
  91. '-//W3C//DTD XHTML 1.0 Strict//EN',
  92. 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
  93. );
  94. $this->doctypes->register(
  95. 'XHTML 1.1', true,
  96. // Iframe is a real XHTML 1.1 module, despite being
  97. // "transitional"!
  98. array_merge($common, $xml, array('Ruby', 'Iframe')),
  99. array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
  100. array(),
  101. '-//W3C//DTD XHTML 1.1//EN',
  102. 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
  103. );
  104. }
  105. /**
  106. * Registers a module to the recognized module list, useful for
  107. * overloading pre-existing modules.
  108. * @param $module Mixed: string module name, with or without
  109. * HTMLPurifier_HTMLModule prefix, or instance of
  110. * subclass of HTMLPurifier_HTMLModule.
  111. * @param $overload Boolean whether or not to overload previous modules.
  112. * If this is not set, and you do overload a module,
  113. * HTML Purifier will complain with a warning.
  114. * @note This function will not call autoload, you must instantiate
  115. * (and thus invoke) autoload outside the method.
  116. * @note If a string is passed as a module name, different variants
  117. * will be tested in this order:
  118. * - Check for HTMLPurifier_HTMLModule_$name
  119. * - Check all prefixes with $name in order they were added
  120. * - Check for literal object name
  121. * - Throw fatal error
  122. * If your object name collides with an internal class, specify
  123. * your module manually. All modules must have been included
  124. * externally: registerModule will not perform inclusions for you!
  125. */
  126. public function registerModule($module, $overload = false) {
  127. if (is_string($module)) {
  128. // attempt to load the module
  129. $original_module = $module;
  130. $ok = false;
  131. foreach ($this->prefixes as $prefix) {
  132. $module = $prefix . $original_module;
  133. if (class_exists($module)) {
  134. $ok = true;
  135. break;
  136. }
  137. }
  138. if (!$ok) {
  139. $module = $original_module;
  140. if (!class_exists($module)) {
  141. trigger_error($original_module . ' module does not exist',
  142. E_USER_ERROR);
  143. return;
  144. }
  145. }
  146. $module = new $module();
  147. }
  148. if (empty($module->name)) {
  149. trigger_error('Module instance of ' . get_class($module) . ' must have name');
  150. return;
  151. }
  152. if (!$overload && isset($this->registeredModules[$module->name])) {
  153. trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING);
  154. }
  155. $this->registeredModules[$module->name] = $module;
  156. }
  157. /**
  158. * Adds a module to the current doctype by first registering it,
  159. * and then tacking it on to the active doctype
  160. */
  161. public function addModule($module) {
  162. $this->registerModule($module);
  163. if (is_object($module)) $module = $module->name;
  164. $this->userModules[] = $module;
  165. }
  166. /**
  167. * Adds a class prefix that registerModule() will use to resolve a
  168. * string name to a concrete class
  169. */
  170. public function addPrefix($prefix) {
  171. $this->prefixes[] = $prefix;
  172. }
  173. /**
  174. * Performs processing on modules, after being called you may
  175. * use getElement() and getElements()
  176. * @param $config Instance of HTMLPurifier_Config
  177. */
  178. public function setup($config) {
  179. $this->trusted = $config->get('HTML.Trusted');
  180. // generate
  181. $this->doctype = $this->doctypes->make($config);
  182. $modules = $this->doctype->modules;
  183. // take out the default modules that aren't allowed
  184. $lookup = $config->get('HTML.AllowedModules');
  185. $special_cases = $config->get('HTML.CoreModules');
  186. if (is_array($lookup)) {
  187. foreach ($modules as $k => $m) {
  188. if (isset($special_cases[$m])) continue;
  189. if (!isset($lookup[$m])) unset($modules[$k]);
  190. }
  191. }
  192. // custom modules
  193. if ($config->get('HTML.Proprietary')) {
  194. $modules[] = 'Proprietary';
  195. }
  196. if ($config->get('HTML.SafeObject')) {
  197. $modules[] = 'SafeObject';
  198. }
  199. if ($config->get('HTML.SafeEmbed')) {
  200. $modules[] = 'SafeEmbed';
  201. }
  202. if ($config->get('HTML.SafeScripting') !== array()) {
  203. $modules[] = 'SafeScripting';
  204. }
  205. if ($config->get('HTML.Nofollow')) {
  206. $modules[] = 'Nofollow';
  207. }
  208. if ($config->get('HTML.TargetBlank')) {
  209. $modules[] = 'TargetBlank';
  210. }
  211. // merge in custom modules
  212. $modules = array_merge($modules, $this->userModules);
  213. foreach ($modules as $module) {
  214. $this->processModule($module);
  215. $this->modules[$module]->setup($config);
  216. }
  217. foreach ($this->doctype->tidyModules as $module) {
  218. $this->processModule($module);
  219. $this->modules[$module]->setup($config);
  220. }
  221. // prepare any injectors
  222. foreach ($this->modules as $module) {
  223. $n = array();
  224. foreach ($module->info_injector as $i => $injector) {
  225. if (!is_object($injector)) {
  226. $class = "HTMLPurifier_Injector_$injector";
  227. $injector = new $class;
  228. }
  229. $n[$injector->name] = $injector;
  230. }
  231. $module->info_injector = $n;
  232. }
  233. // setup lookup table based on all valid modules
  234. foreach ($this->modules as $module) {
  235. foreach ($module->info as $name => $def) {
  236. if (!isset($this->elementLookup[$name])) {
  237. $this->elementLookup[$name] = array();
  238. }
  239. $this->elementLookup[$name][] = $module->name;
  240. }
  241. }
  242. // note the different choice
  243. $this->contentSets = new HTMLPurifier_ContentSets(
  244. // content set assembly deals with all possible modules,
  245. // not just ones deemed to be "safe"
  246. $this->modules
  247. );
  248. $this->attrCollections = new HTMLPurifier_AttrCollections(
  249. $this->attrTypes,
  250. // there is no way to directly disable a global attribute,
  251. // but using AllowedAttributes or simply not including
  252. // the module in your custom doctype should be sufficient
  253. $this->modules
  254. );
  255. }
  256. /**
  257. * Takes a module and adds it to the active module collection,
  258. * registering it if necessary.
  259. */
  260. public function processModule($module) {
  261. if (!isset($this->registeredModules[$module]) || is_object($module)) {
  262. $this->registerModule($module);
  263. }
  264. $this->modules[$module] = $this->registeredModules[$module];
  265. }
  266. /**
  267. * Retrieves merged element definitions.
  268. * @return Array of HTMLPurifier_ElementDef
  269. */
  270. public function getElements() {
  271. $elements = array();
  272. foreach ($this->modules as $module) {
  273. if (!$this->trusted && !$module->safe) continue;
  274. foreach ($module->info as $name => $v) {
  275. if (isset($elements[$name])) continue;
  276. $elements[$name] = $this->getElement($name);
  277. }
  278. }
  279. // remove dud elements, this happens when an element that
  280. // appeared to be safe actually wasn't
  281. foreach ($elements as $n => $v) {
  282. if ($v === false) unset($elements[$n]);
  283. }
  284. return $elements;
  285. }
  286. /**
  287. * Retrieves a single merged element definition
  288. * @param $name Name of element
  289. * @param $trusted Boolean trusted overriding parameter: set to true
  290. * if you want the full version of an element
  291. * @return Merged HTMLPurifier_ElementDef
  292. * @note You may notice that modules are getting iterated over twice (once
  293. * in getElements() and once here). This
  294. * is because
  295. */
  296. public function getElement($name, $trusted = null) {
  297. if (!isset($this->elementLookup[$name])) {
  298. return false;
  299. }
  300. // setup global state variables
  301. $def = false;
  302. if ($trusted === null) $trusted = $this->trusted;
  303. // iterate through each module that has registered itself to this
  304. // element
  305. foreach($this->elementLookup[$name] as $module_name) {
  306. $module = $this->modules[$module_name];
  307. // refuse to create/merge from a module that is deemed unsafe--
  308. // pretend the module doesn't exist--when trusted mode is not on.
  309. if (!$trusted && !$module->safe) {
  310. continue;
  311. }
  312. // clone is used because, ideally speaking, the original
  313. // definition should not be modified. Usually, this will
  314. // make no difference, but for consistency's sake
  315. $new_def = clone $module->info[$name];
  316. if (!$def && $new_def->standalone) {
  317. $def = $new_def;
  318. } elseif ($def) {
  319. // This will occur even if $new_def is standalone. In practice,
  320. // this will usually result in a full replacement.
  321. $def->mergeIn($new_def);
  322. } else {
  323. // :TODO:
  324. // non-standalone definitions that don't have a standalone
  325. // to merge into could be deferred to the end
  326. // HOWEVER, it is perfectly valid for a non-standalone
  327. // definition to lack a standalone definition, even
  328. // after all processing: this allows us to safely
  329. // specify extra attributes for elements that may not be
  330. // enabled all in one place. In particular, this might
  331. // be the case for trusted elements. WARNING: care must
  332. // be taken that the /extra/ definitions are all safe.
  333. continue;
  334. }
  335. // attribute value expansions
  336. $this->attrCollections->performInclusions($def->attr);
  337. $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
  338. // descendants_are_inline, for ChildDef_Chameleon
  339. if (is_string($def->content_model) &&
  340. strpos($def->content_model, 'Inline') !== false) {
  341. if ($name != 'del' && $name != 'ins') {
  342. // this is for you, ins/del
  343. $def->descendants_are_inline = true;
  344. }
  345. }
  346. $this->contentSets->generateChildDef($def, $module);
  347. }
  348. // This can occur if there is a blank definition, but no base to
  349. // mix it in with
  350. if (!$def) return false;
  351. // add information on required attributes
  352. foreach ($def->attr as $attr_name => $attr_def) {
  353. if ($attr_def->required) {
  354. $def->required_attr[] = $attr_name;
  355. }
  356. }
  357. return $def;
  358. }
  359. }
  360. // vim: et sw=4 sts=4