Injector.php 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. <?php
  2. /**
  3. * Injects tokens into the document while parsing for well-formedness.
  4. * This enables "formatter-like" functionality such as auto-paragraphing,
  5. * smiley-ification and linkification to take place.
  6. *
  7. * A note on how handlers create changes; this is done by assigning a new
  8. * value to the $token reference. These values can take a variety of forms and
  9. * are best described HTMLPurifier_Strategy_MakeWellFormed->processToken()
  10. * documentation.
  11. *
  12. * @todo Allow injectors to request a re-run on their output. This
  13. * would help if an operation is recursive.
  14. */
  15. abstract class HTMLPurifier_Injector
  16. {
  17. /**
  18. * Advisory name of injector, this is for friendly error messages
  19. */
  20. public $name;
  21. /**
  22. * Instance of HTMLPurifier_HTMLDefinition
  23. */
  24. protected $htmlDefinition;
  25. /**
  26. * Reference to CurrentNesting variable in Context. This is an array
  27. * list of tokens that we are currently "inside"
  28. */
  29. protected $currentNesting;
  30. /**
  31. * Reference to InputTokens variable in Context. This is an array
  32. * list of the input tokens that are being processed.
  33. */
  34. protected $inputTokens;
  35. /**
  36. * Reference to InputIndex variable in Context. This is an integer
  37. * array index for $this->inputTokens that indicates what token
  38. * is currently being processed.
  39. */
  40. protected $inputIndex;
  41. /**
  42. * Array of elements and attributes this injector creates and therefore
  43. * need to be allowed by the definition. Takes form of
  44. * array('element' => array('attr', 'attr2'), 'element2')
  45. */
  46. public $needed = array();
  47. /**
  48. * Index of inputTokens to rewind to.
  49. */
  50. protected $rewind = false;
  51. /**
  52. * Rewind to a spot to re-perform processing. This is useful if you
  53. * deleted a node, and now need to see if this change affected any
  54. * earlier nodes. Rewinding does not affect other injectors, and can
  55. * result in infinite loops if not used carefully.
  56. * @warning HTML Purifier will prevent you from fast-forwarding with this
  57. * function.
  58. */
  59. public function rewind($index) {
  60. $this->rewind = $index;
  61. }
  62. /**
  63. * Retrieves rewind, and then unsets it.
  64. */
  65. public function getRewind() {
  66. $r = $this->rewind;
  67. $this->rewind = false;
  68. return $r;
  69. }
  70. /**
  71. * Prepares the injector by giving it the config and context objects:
  72. * this allows references to important variables to be made within
  73. * the injector. This function also checks if the HTML environment
  74. * will work with the Injector (see checkNeeded()).
  75. * @param $config Instance of HTMLPurifier_Config
  76. * @param $context Instance of HTMLPurifier_Context
  77. * @return Boolean false if success, string of missing needed element/attribute if failure
  78. */
  79. public function prepare($config, $context) {
  80. $this->htmlDefinition = $config->getHTMLDefinition();
  81. // Even though this might fail, some unit tests ignore this and
  82. // still test checkNeeded, so be careful. Maybe get rid of that
  83. // dependency.
  84. $result = $this->checkNeeded($config);
  85. if ($result !== false) return $result;
  86. $this->currentNesting =& $context->get('CurrentNesting');
  87. $this->inputTokens =& $context->get('InputTokens');
  88. $this->inputIndex =& $context->get('InputIndex');
  89. return false;
  90. }
  91. /**
  92. * This function checks if the HTML environment
  93. * will work with the Injector: if p tags are not allowed, the
  94. * Auto-Paragraphing injector should not be enabled.
  95. * @param $config Instance of HTMLPurifier_Config
  96. * @param $context Instance of HTMLPurifier_Context
  97. * @return Boolean false if success, string of missing needed element/attribute if failure
  98. */
  99. public function checkNeeded($config) {
  100. $def = $config->getHTMLDefinition();
  101. foreach ($this->needed as $element => $attributes) {
  102. if (is_int($element)) $element = $attributes;
  103. if (!isset($def->info[$element])) return $element;
  104. if (!is_array($attributes)) continue;
  105. foreach ($attributes as $name) {
  106. if (!isset($def->info[$element]->attr[$name])) return "$element.$name";
  107. }
  108. }
  109. return false;
  110. }
  111. /**
  112. * Tests if the context node allows a certain element
  113. * @param $name Name of element to test for
  114. * @return True if element is allowed, false if it is not
  115. */
  116. public function allowsElement($name) {
  117. if (!empty($this->currentNesting)) {
  118. $parent_token = array_pop($this->currentNesting);
  119. $this->currentNesting[] = $parent_token;
  120. $parent = $this->htmlDefinition->info[$parent_token->name];
  121. } else {
  122. $parent = $this->htmlDefinition->info_parent_def;
  123. }
  124. if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) {
  125. return false;
  126. }
  127. // check for exclusion
  128. for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) {
  129. $node = $this->currentNesting[$i];
  130. $def = $this->htmlDefinition->info[$node->name];
  131. if (isset($def->excludes[$name])) return false;
  132. }
  133. return true;
  134. }
  135. /**
  136. * Iterator function, which starts with the next token and continues until
  137. * you reach the end of the input tokens.
  138. * @warning Please prevent previous references from interfering with this
  139. * functions by setting $i = null beforehand!
  140. * @param &$i Current integer index variable for inputTokens
  141. * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
  142. */
  143. protected function forward(&$i, &$current) {
  144. if ($i === null) $i = $this->inputIndex + 1;
  145. else $i++;
  146. if (!isset($this->inputTokens[$i])) return false;
  147. $current = $this->inputTokens[$i];
  148. return true;
  149. }
  150. /**
  151. * Similar to _forward, but accepts a third parameter $nesting (which
  152. * should be initialized at 0) and stops when we hit the end tag
  153. * for the node $this->inputIndex starts in.
  154. */
  155. protected function forwardUntilEndToken(&$i, &$current, &$nesting) {
  156. $result = $this->forward($i, $current);
  157. if (!$result) return false;
  158. if ($nesting === null) $nesting = 0;
  159. if ($current instanceof HTMLPurifier_Token_Start) $nesting++;
  160. elseif ($current instanceof HTMLPurifier_Token_End) {
  161. if ($nesting <= 0) return false;
  162. $nesting--;
  163. }
  164. return true;
  165. }
  166. /**
  167. * Iterator function, starts with the previous token and continues until
  168. * you reach the beginning of input tokens.
  169. * @warning Please prevent previous references from interfering with this
  170. * functions by setting $i = null beforehand!
  171. * @param &$i Current integer index variable for inputTokens
  172. * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
  173. */
  174. protected function backward(&$i, &$current) {
  175. if ($i === null) $i = $this->inputIndex - 1;
  176. else $i--;
  177. if ($i < 0) return false;
  178. $current = $this->inputTokens[$i];
  179. return true;
  180. }
  181. /**
  182. * Initializes the iterator at the current position. Use in a do {} while;
  183. * loop to force the _forward and _backward functions to start at the
  184. * current location.
  185. * @warning Please prevent previous references from interfering with this
  186. * functions by setting $i = null beforehand!
  187. * @param &$i Current integer index variable for inputTokens
  188. * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
  189. */
  190. protected function current(&$i, &$current) {
  191. if ($i === null) $i = $this->inputIndex;
  192. $current = $this->inputTokens[$i];
  193. }
  194. /**
  195. * Handler that is called when a text token is processed
  196. */
  197. public function handleText(&$token) {}
  198. /**
  199. * Handler that is called when a start or empty token is processed
  200. */
  201. public function handleElement(&$token) {}
  202. /**
  203. * Handler that is called when an end token is processed
  204. */
  205. public function handleEnd(&$token) {
  206. $this->notifyEnd($token);
  207. }
  208. /**
  209. * Notifier that is called when an end token is processed
  210. * @note This differs from handlers in that the token is read-only
  211. * @deprecated
  212. */
  213. public function notifyEnd($token) {}
  214. }
  215. // vim: et sw=4 sts=4