123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171 |
- <?php
- /**
- * Removes all unrecognized tags from the list of tokens.
- *
- * This strategy iterates through all the tokens and removes unrecognized
- * tokens. If a token is not recognized but a TagTransform is defined for
- * that element, the element will be transformed accordingly.
- */
- class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
- {
- public function execute($tokens, $config, $context) {
- $definition = $config->getHTMLDefinition();
- $generator = new HTMLPurifier_Generator($config, $context);
- $result = array();
- $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
- $remove_invalid_img = $config->get('Core.RemoveInvalidImg');
- // currently only used to determine if comments should be kept
- $trusted = $config->get('HTML.Trusted');
- $remove_script_contents = $config->get('Core.RemoveScriptContents');
- $hidden_elements = $config->get('Core.HiddenElements');
- // remove script contents compatibility
- if ($remove_script_contents === true) {
- $hidden_elements['script'] = true;
- } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
- unset($hidden_elements['script']);
- }
- $attr_validator = new HTMLPurifier_AttrValidator();
- // removes tokens until it reaches a closing tag with its value
- $remove_until = false;
- // converts comments into text tokens when this is equal to a tag name
- $textify_comments = false;
- $token = false;
- $context->register('CurrentToken', $token);
- $e = false;
- if ($config->get('Core.CollectErrors')) {
- $e =& $context->get('ErrorCollector');
- }
- foreach($tokens as $token) {
- if ($remove_until) {
- if (empty($token->is_tag) || $token->name !== $remove_until) {
- continue;
- }
- }
- if (!empty( $token->is_tag )) {
- // DEFINITION CALL
- // before any processing, try to transform the element
- if (
- isset($definition->info_tag_transform[$token->name])
- ) {
- $original_name = $token->name;
- // there is a transformation for this tag
- // DEFINITION CALL
- $token = $definition->
- info_tag_transform[$token->name]->
- transform($token, $config, $context);
- if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
- }
- if (isset($definition->info[$token->name])) {
- // mostly everything's good, but
- // we need to make sure required attributes are in order
- if (
- ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
- $definition->info[$token->name]->required_attr &&
- ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
- ) {
- $attr_validator->validateToken($token, $config, $context);
- $ok = true;
- foreach ($definition->info[$token->name]->required_attr as $name) {
- if (!isset($token->attr[$name])) {
- $ok = false;
- break;
- }
- }
- if (!$ok) {
- if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name);
- continue;
- }
- $token->armor['ValidateAttributes'] = true;
- }
- if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
- $textify_comments = $token->name;
- } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
- $textify_comments = false;
- }
- } elseif ($escape_invalid_tags) {
- // invalid tag, generate HTML representation and insert in
- if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
- $token = new HTMLPurifier_Token_Text(
- $generator->generateFromToken($token)
- );
- } else {
- // check if we need to destroy all of the tag's children
- // CAN BE GENERICIZED
- if (isset($hidden_elements[$token->name])) {
- if ($token instanceof HTMLPurifier_Token_Start) {
- $remove_until = $token->name;
- } elseif ($token instanceof HTMLPurifier_Token_Empty) {
- // do nothing: we're still looking
- } else {
- $remove_until = false;
- }
- if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
- } else {
- if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
- }
- continue;
- }
- } elseif ($token instanceof HTMLPurifier_Token_Comment) {
- // textify comments in script tags when they are allowed
- if ($textify_comments !== false) {
- $data = $token->data;
- $token = new HTMLPurifier_Token_Text($data);
- } elseif ($trusted) {
- // keep, but perform comment cleaning
- if ($e) {
- // perform check whether or not there's a trailing hyphen
- if (substr($token->data, -1) == '-') {
- $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
- }
- }
- $token->data = rtrim($token->data, '-');
- $found_double_hyphen = false;
- while (strpos($token->data, '--') !== false) {
- if ($e && !$found_double_hyphen) {
- $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
- }
- $found_double_hyphen = true; // prevent double-erroring
- $token->data = str_replace('--', '-', $token->data);
- }
- } else {
- // strip comments
- if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
- continue;
- }
- } elseif ($token instanceof HTMLPurifier_Token_Text) {
- } else {
- continue;
- }
- $result[] = $token;
- }
- if ($remove_until && $e) {
- // we removed tokens until the end, throw error
- $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
- }
- $context->destroy('CurrentToken');
- return $result;
- }
- }
- // vim: et sw=4 sts=4
|