Table.php 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. <?php
  2. /**
  3. * Definition for tables. The general idea is to extract out all of the
  4. * essential bits, and then reconstruct it later.
  5. *
  6. * This is a bit confusing, because the DTDs and the W3C
  7. * validators seem to disagree on the appropriate definition. The
  8. * DTD claims:
  9. *
  10. * (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)
  11. *
  12. * But actually, the HTML4 spec then has this to say:
  13. *
  14. * The TBODY start tag is always required except when the table
  15. * contains only one table body and no table head or foot sections.
  16. * The TBODY end tag may always be safely omitted.
  17. *
  18. * So the DTD is kind of wrong. The validator is, unfortunately, kind
  19. * of on crack.
  20. *
  21. * The definition changed again in XHTML1.1; and in my opinion, this
  22. * formulation makes the most sense.
  23. *
  24. * caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))
  25. *
  26. * Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode.
  27. * If we encounter a thead, tfoot or tbody, we are placed in the former
  28. * mode, and we *must* wrap any stray tr segments with a tbody. But if
  29. * we don't run into any of them, just have tr tags is OK.
  30. */
  31. class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
  32. {
  33. public $allow_empty = false;
  34. public $type = 'table';
  35. public $elements = array('tr' => true, 'tbody' => true, 'thead' => true,
  36. 'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true);
  37. public function __construct() {}
  38. public function validateChildren($tokens_of_children, $config, $context) {
  39. if (empty($tokens_of_children)) return false;
  40. // this ensures that the loop gets run one last time before closing
  41. // up. It's a little bit of a hack, but it works! Just make sure you
  42. // get rid of the token later.
  43. $tokens_of_children[] = false;
  44. // only one of these elements is allowed in a table
  45. $caption = false;
  46. $thead = false;
  47. $tfoot = false;
  48. // as many of these as you want
  49. $cols = array();
  50. $content = array();
  51. $nesting = 0; // current depth so we can determine nodes
  52. $is_collecting = false; // are we globbing together tokens to package
  53. // into one of the collectors?
  54. $collection = array(); // collected nodes
  55. $tag_index = 0; // the first node might be whitespace,
  56. // so this tells us where the start tag is
  57. $tbody_mode = false; // if true, then we need to wrap any stray
  58. // <tr>s with a <tbody>.
  59. foreach ($tokens_of_children as $token) {
  60. $is_child = ($nesting == 0);
  61. if ($token === false) {
  62. // terminating sequence started
  63. } elseif ($token instanceof HTMLPurifier_Token_Start) {
  64. $nesting++;
  65. } elseif ($token instanceof HTMLPurifier_Token_End) {
  66. $nesting--;
  67. }
  68. // handle node collection
  69. if ($is_collecting) {
  70. if ($is_child) {
  71. // okay, let's stash the tokens away
  72. // first token tells us the type of the collection
  73. switch ($collection[$tag_index]->name) {
  74. case 'tbody':
  75. $tbody_mode = true;
  76. case 'tr':
  77. $content[] = $collection;
  78. break;
  79. case 'caption':
  80. if ($caption !== false) break;
  81. $caption = $collection;
  82. break;
  83. case 'thead':
  84. case 'tfoot':
  85. $tbody_mode = true;
  86. // XXX This breaks rendering properties with
  87. // Firefox, which never floats a <thead> to
  88. // the top. Ever. (Our scheme will float the
  89. // first <thead> to the top.) So maybe
  90. // <thead>s that are not first should be
  91. // turned into <tbody>? Very tricky, indeed.
  92. // access the appropriate variable, $thead or $tfoot
  93. $var = $collection[$tag_index]->name;
  94. if ($$var === false) {
  95. $$var = $collection;
  96. } else {
  97. // Oops, there's a second one! What
  98. // should we do? Current behavior is to
  99. // transmutate the first and last entries into
  100. // tbody tags, and then put into content.
  101. // Maybe a better idea is to *attach
  102. // it* to the existing thead or tfoot?
  103. // We don't do this, because Firefox
  104. // doesn't float an extra tfoot to the
  105. // bottom like it does for the first one.
  106. $collection[$tag_index]->name = 'tbody';
  107. $collection[count($collection)-1]->name = 'tbody';
  108. $content[] = $collection;
  109. }
  110. break;
  111. case 'colgroup':
  112. $cols[] = $collection;
  113. break;
  114. }
  115. $collection = array();
  116. $is_collecting = false;
  117. $tag_index = 0;
  118. } else {
  119. // add the node to the collection
  120. $collection[] = $token;
  121. }
  122. }
  123. // terminate
  124. if ($token === false) break;
  125. if ($is_child) {
  126. // determine what we're dealing with
  127. if ($token->name == 'col') {
  128. // the only empty tag in the possie, we can handle it
  129. // immediately
  130. $cols[] = array_merge($collection, array($token));
  131. $collection = array();
  132. $tag_index = 0;
  133. continue;
  134. }
  135. switch($token->name) {
  136. case 'caption':
  137. case 'colgroup':
  138. case 'thead':
  139. case 'tfoot':
  140. case 'tbody':
  141. case 'tr':
  142. $is_collecting = true;
  143. $collection[] = $token;
  144. continue;
  145. default:
  146. if (!empty($token->is_whitespace)) {
  147. $collection[] = $token;
  148. $tag_index++;
  149. }
  150. continue;
  151. }
  152. }
  153. }
  154. if (empty($content)) return false;
  155. $ret = array();
  156. if ($caption !== false) $ret = array_merge($ret, $caption);
  157. if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
  158. if ($thead !== false) $ret = array_merge($ret, $thead);
  159. if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
  160. if ($tbody_mode) {
  161. // a little tricky, since the start of the collection may be
  162. // whitespace
  163. $inside_tbody = false;
  164. foreach ($content as $token_array) {
  165. // find the starting token
  166. foreach ($token_array as $t) {
  167. if ($t->name === 'tr' || $t->name === 'tbody') {
  168. break;
  169. }
  170. } // iterator variable carries over
  171. if ($t->name === 'tr') {
  172. if ($inside_tbody) {
  173. $ret = array_merge($ret, $token_array);
  174. } else {
  175. $ret[] = new HTMLPurifier_Token_Start('tbody');
  176. $ret = array_merge($ret, $token_array);
  177. $inside_tbody = true;
  178. }
  179. } elseif ($t->name === 'tbody') {
  180. if ($inside_tbody) {
  181. $ret[] = new HTMLPurifier_Token_End('tbody');
  182. $inside_tbody = false;
  183. $ret = array_merge($ret, $token_array);
  184. } else {
  185. $ret = array_merge($ret, $token_array);
  186. }
  187. } else {
  188. trigger_error("tr/tbody in content invariant failed in Table ChildDef", E_USER_ERROR);
  189. }
  190. }
  191. if ($inside_tbody) {
  192. $ret[] = new HTMLPurifier_Token_End('tbody');
  193. }
  194. } else {
  195. foreach ($content as $token_array) {
  196. // invariant: everything in here is <tr>s
  197. $ret = array_merge($ret, $token_array);
  198. }
  199. }
  200. if (!empty($collection) && $is_collecting == false){
  201. // grab the trailing space
  202. $ret = array_merge($ret, $collection);
  203. }
  204. array_pop($tokens_of_children); // remove phantom token
  205. return ($ret === $tokens_of_children) ? true : $ret;
  206. }
  207. }
  208. // vim: et sw=4 sts=4