HTMLModule.php 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. <?php
  2. /**
  3. * Represents an XHTML 1.1 module, with information on elements, tags
  4. * and attributes.
  5. * @note Even though this is technically XHTML 1.1, it is also used for
  6. * regular HTML parsing. We are using modulization as a convenient
  7. * way to represent the internals of HTMLDefinition, and our
  8. * implementation is by no means conforming and does not directly
  9. * use the normative DTDs or XML schemas.
  10. * @note The public variables in a module should almost directly
  11. * correspond to the variables in HTMLPurifier_HTMLDefinition.
  12. * However, the prefix info carries no special meaning in these
  13. * objects (include it anyway if that's the correspondence though).
  14. * @todo Consider making some member functions protected
  15. */
  16. class HTMLPurifier_HTMLModule
  17. {
  18. // -- Overloadable ----------------------------------------------------
  19. /**
  20. * Short unique string identifier of the module
  21. */
  22. public $name;
  23. /**
  24. * Informally, a list of elements this module changes. Not used in
  25. * any significant way.
  26. */
  27. public $elements = array();
  28. /**
  29. * Associative array of element names to element definitions.
  30. * Some definitions may be incomplete, to be merged in later
  31. * with the full definition.
  32. */
  33. public $info = array();
  34. /**
  35. * Associative array of content set names to content set additions.
  36. * This is commonly used to, say, add an A element to the Inline
  37. * content set. This corresponds to an internal variable $content_sets
  38. * and NOT info_content_sets member variable of HTMLDefinition.
  39. */
  40. public $content_sets = array();
  41. /**
  42. * Associative array of attribute collection names to attribute
  43. * collection additions. More rarely used for adding attributes to
  44. * the global collections. Example is the StyleAttribute module adding
  45. * the style attribute to the Core. Corresponds to HTMLDefinition's
  46. * attr_collections->info, since the object's data is only info,
  47. * with extra behavior associated with it.
  48. */
  49. public $attr_collections = array();
  50. /**
  51. * Associative array of deprecated tag name to HTMLPurifier_TagTransform
  52. */
  53. public $info_tag_transform = array();
  54. /**
  55. * List of HTMLPurifier_AttrTransform to be performed before validation.
  56. */
  57. public $info_attr_transform_pre = array();
  58. /**
  59. * List of HTMLPurifier_AttrTransform to be performed after validation.
  60. */
  61. public $info_attr_transform_post = array();
  62. /**
  63. * List of HTMLPurifier_Injector to be performed during well-formedness fixing.
  64. * An injector will only be invoked if all of it's pre-requisites are met;
  65. * if an injector fails setup, there will be no error; it will simply be
  66. * silently disabled.
  67. */
  68. public $info_injector = array();
  69. /**
  70. * Boolean flag that indicates whether or not getChildDef is implemented.
  71. * For optimization reasons: may save a call to a function. Be sure
  72. * to set it if you do implement getChildDef(), otherwise it will have
  73. * no effect!
  74. */
  75. public $defines_child_def = false;
  76. /**
  77. * Boolean flag whether or not this module is safe. If it is not safe, all
  78. * of its members are unsafe. Modules are safe by default (this might be
  79. * slightly dangerous, but it doesn't make much sense to force HTML Purifier,
  80. * which is based off of safe HTML, to explicitly say, "This is safe," even
  81. * though there are modules which are "unsafe")
  82. *
  83. * @note Previously, safety could be applied at an element level granularity.
  84. * We've removed this ability, so in order to add "unsafe" elements
  85. * or attributes, a dedicated module with this property set to false
  86. * must be used.
  87. */
  88. public $safe = true;
  89. /**
  90. * Retrieves a proper HTMLPurifier_ChildDef subclass based on
  91. * content_model and content_model_type member variables of
  92. * the HTMLPurifier_ElementDef class. There is a similar function
  93. * in HTMLPurifier_HTMLDefinition.
  94. * @param $def HTMLPurifier_ElementDef instance
  95. * @return HTMLPurifier_ChildDef subclass
  96. */
  97. public function getChildDef($def) {return false;}
  98. // -- Convenience -----------------------------------------------------
  99. /**
  100. * Convenience function that sets up a new element
  101. * @param $element Name of element to add
  102. * @param $type What content set should element be registered to?
  103. * Set as false to skip this step.
  104. * @param $contents Allowed children in form of:
  105. * "$content_model_type: $content_model"
  106. * @param $attr_includes What attribute collections to register to
  107. * element?
  108. * @param $attr What unique attributes does the element define?
  109. * @note See ElementDef for in-depth descriptions of these parameters.
  110. * @return Created element definition object, so you
  111. * can set advanced parameters
  112. */
  113. public function addElement($element, $type, $contents, $attr_includes = array(), $attr = array()) {
  114. $this->elements[] = $element;
  115. // parse content_model
  116. list($content_model_type, $content_model) = $this->parseContents($contents);
  117. // merge in attribute inclusions
  118. $this->mergeInAttrIncludes($attr, $attr_includes);
  119. // add element to content sets
  120. if ($type) $this->addElementToContentSet($element, $type);
  121. // create element
  122. $this->info[$element] = HTMLPurifier_ElementDef::create(
  123. $content_model, $content_model_type, $attr
  124. );
  125. // literal object $contents means direct child manipulation
  126. if (!is_string($contents)) $this->info[$element]->child = $contents;
  127. return $this->info[$element];
  128. }
  129. /**
  130. * Convenience function that creates a totally blank, non-standalone
  131. * element.
  132. * @param $element Name of element to create
  133. * @return Created element
  134. */
  135. public function addBlankElement($element) {
  136. if (!isset($this->info[$element])) {
  137. $this->elements[] = $element;
  138. $this->info[$element] = new HTMLPurifier_ElementDef();
  139. $this->info[$element]->standalone = false;
  140. } else {
  141. trigger_error("Definition for $element already exists in module, cannot redefine");
  142. }
  143. return $this->info[$element];
  144. }
  145. /**
  146. * Convenience function that registers an element to a content set
  147. * @param Element to register
  148. * @param Name content set (warning: case sensitive, usually upper-case
  149. * first letter)
  150. */
  151. public function addElementToContentSet($element, $type) {
  152. if (!isset($this->content_sets[$type])) $this->content_sets[$type] = '';
  153. else $this->content_sets[$type] .= ' | ';
  154. $this->content_sets[$type] .= $element;
  155. }
  156. /**
  157. * Convenience function that transforms single-string contents
  158. * into separate content model and content model type
  159. * @param $contents Allowed children in form of:
  160. * "$content_model_type: $content_model"
  161. * @note If contents is an object, an array of two nulls will be
  162. * returned, and the callee needs to take the original $contents
  163. * and use it directly.
  164. */
  165. public function parseContents($contents) {
  166. if (!is_string($contents)) return array(null, null); // defer
  167. switch ($contents) {
  168. // check for shorthand content model forms
  169. case 'Empty':
  170. return array('empty', '');
  171. case 'Inline':
  172. return array('optional', 'Inline | #PCDATA');
  173. case 'Flow':
  174. return array('optional', 'Flow | #PCDATA');
  175. }
  176. list($content_model_type, $content_model) = explode(':', $contents);
  177. $content_model_type = strtolower(trim($content_model_type));
  178. $content_model = trim($content_model);
  179. return array($content_model_type, $content_model);
  180. }
  181. /**
  182. * Convenience function that merges a list of attribute includes into
  183. * an attribute array.
  184. * @param $attr Reference to attr array to modify
  185. * @param $attr_includes Array of includes / string include to merge in
  186. */
  187. public function mergeInAttrIncludes(&$attr, $attr_includes) {
  188. if (!is_array($attr_includes)) {
  189. if (empty($attr_includes)) $attr_includes = array();
  190. else $attr_includes = array($attr_includes);
  191. }
  192. $attr[0] = $attr_includes;
  193. }
  194. /**
  195. * Convenience function that generates a lookup table with boolean
  196. * true as value.
  197. * @param $list List of values to turn into a lookup
  198. * @note You can also pass an arbitrary number of arguments in
  199. * place of the regular argument
  200. * @return Lookup array equivalent of list
  201. */
  202. public function makeLookup($list) {
  203. if (is_string($list)) $list = func_get_args();
  204. $ret = array();
  205. foreach ($list as $value) {
  206. if (is_null($value)) continue;
  207. $ret[$value] = true;
  208. }
  209. return $ret;
  210. }
  211. /**
  212. * Lazy load construction of the module after determining whether
  213. * or not it's needed, and also when a finalized configuration object
  214. * is available.
  215. * @param $config Instance of HTMLPurifier_Config
  216. */
  217. public function setup($config) {}
  218. }
  219. // vim: et sw=4 sts=4