RemoveForeignElements.php 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. <?php
  2. /**
  3. * Removes all unrecognized tags from the list of tokens.
  4. *
  5. * This strategy iterates through all the tokens and removes unrecognized
  6. * tokens. If a token is not recognized but a TagTransform is defined for
  7. * that element, the element will be transformed accordingly.
  8. */
  9. class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
  10. {
  11. public function execute($tokens, $config, $context) {
  12. $definition = $config->getHTMLDefinition();
  13. $generator = new HTMLPurifier_Generator($config, $context);
  14. $result = array();
  15. $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
  16. $remove_invalid_img = $config->get('Core.RemoveInvalidImg');
  17. // currently only used to determine if comments should be kept
  18. $trusted = $config->get('HTML.Trusted');
  19. $remove_script_contents = $config->get('Core.RemoveScriptContents');
  20. $hidden_elements = $config->get('Core.HiddenElements');
  21. // remove script contents compatibility
  22. if ($remove_script_contents === true) {
  23. $hidden_elements['script'] = true;
  24. } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
  25. unset($hidden_elements['script']);
  26. }
  27. $attr_validator = new HTMLPurifier_AttrValidator();
  28. // removes tokens until it reaches a closing tag with its value
  29. $remove_until = false;
  30. // converts comments into text tokens when this is equal to a tag name
  31. $textify_comments = false;
  32. $token = false;
  33. $context->register('CurrentToken', $token);
  34. $e = false;
  35. if ($config->get('Core.CollectErrors')) {
  36. $e =& $context->get('ErrorCollector');
  37. }
  38. foreach($tokens as $token) {
  39. if ($remove_until) {
  40. if (empty($token->is_tag) || $token->name !== $remove_until) {
  41. continue;
  42. }
  43. }
  44. if (!empty( $token->is_tag )) {
  45. // DEFINITION CALL
  46. // before any processing, try to transform the element
  47. if (
  48. isset($definition->info_tag_transform[$token->name])
  49. ) {
  50. $original_name = $token->name;
  51. // there is a transformation for this tag
  52. // DEFINITION CALL
  53. $token = $definition->
  54. info_tag_transform[$token->name]->
  55. transform($token, $config, $context);
  56. if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
  57. }
  58. if (isset($definition->info[$token->name])) {
  59. // mostly everything's good, but
  60. // we need to make sure required attributes are in order
  61. if (
  62. ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
  63. $definition->info[$token->name]->required_attr &&
  64. ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
  65. ) {
  66. $attr_validator->validateToken($token, $config, $context);
  67. $ok = true;
  68. foreach ($definition->info[$token->name]->required_attr as $name) {
  69. if (!isset($token->attr[$name])) {
  70. $ok = false;
  71. break;
  72. }
  73. }
  74. if (!$ok) {
  75. if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name);
  76. continue;
  77. }
  78. $token->armor['ValidateAttributes'] = true;
  79. }
  80. if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
  81. $textify_comments = $token->name;
  82. } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
  83. $textify_comments = false;
  84. }
  85. } elseif ($escape_invalid_tags) {
  86. // invalid tag, generate HTML representation and insert in
  87. if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
  88. $token = new HTMLPurifier_Token_Text(
  89. $generator->generateFromToken($token)
  90. );
  91. } else {
  92. // check if we need to destroy all of the tag's children
  93. // CAN BE GENERICIZED
  94. if (isset($hidden_elements[$token->name])) {
  95. if ($token instanceof HTMLPurifier_Token_Start) {
  96. $remove_until = $token->name;
  97. } elseif ($token instanceof HTMLPurifier_Token_Empty) {
  98. // do nothing: we're still looking
  99. } else {
  100. $remove_until = false;
  101. }
  102. if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
  103. } else {
  104. if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
  105. }
  106. continue;
  107. }
  108. } elseif ($token instanceof HTMLPurifier_Token_Comment) {
  109. // textify comments in script tags when they are allowed
  110. if ($textify_comments !== false) {
  111. $data = $token->data;
  112. $token = new HTMLPurifier_Token_Text($data);
  113. } elseif ($trusted) {
  114. // keep, but perform comment cleaning
  115. if ($e) {
  116. // perform check whether or not there's a trailing hyphen
  117. if (substr($token->data, -1) == '-') {
  118. $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
  119. }
  120. }
  121. $token->data = rtrim($token->data, '-');
  122. $found_double_hyphen = false;
  123. while (strpos($token->data, '--') !== false) {
  124. if ($e && !$found_double_hyphen) {
  125. $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
  126. }
  127. $found_double_hyphen = true; // prevent double-erroring
  128. $token->data = str_replace('--', '-', $token->data);
  129. }
  130. } else {
  131. // strip comments
  132. if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
  133. continue;
  134. }
  135. } elseif ($token instanceof HTMLPurifier_Token_Text) {
  136. } else {
  137. continue;
  138. }
  139. $result[] = $token;
  140. }
  141. if ($remove_until && $e) {
  142. // we removed tokens until the end, throw error
  143. $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
  144. }
  145. $context->destroy('CurrentToken');
  146. return $result;
  147. }
  148. }
  149. // vim: et sw=4 sts=4