htmlpurifier.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. <?php
  2. /**
  3. * HTML Purifier Phorum Mod. Filter your HTML the Standards-Compliant Way!
  4. *
  5. * This Phorum mod enables users to post raw HTML into Phorum. But never
  6. * fear: with the help of HTML Purifier, this HTML will be beat into
  7. * de-XSSed and standards-compliant form, safe for general consumption.
  8. * It is not recommended, but possible to run this mod in parallel
  9. * with other formatters (in short, please DISABLE the BBcode mod).
  10. *
  11. * For help migrating from your previous markup language to pure HTML
  12. * please check the migrate.bbcode.php file.
  13. *
  14. * If you'd like to use this with a WYSIWYG editor, make sure that
  15. * editor sets $PHORUM['mod_htmlpurifier']['wysiwyg'] to true. Otherwise,
  16. * administrators who need to edit other people's comments may be at
  17. * risk for some nasty attacks.
  18. *
  19. * Tested with Phorum 5.2.11.
  20. */
  21. // Note: Cache data is base64 encoded because Phorum insists on flinging
  22. // to the user and expecting it to come back unharmed, newlines and
  23. // all, which ain't happening. It's slower, it takes up more space, but
  24. // at least it won't get mutilated
  25. /**
  26. * Purifies a data array
  27. */
  28. function phorum_htmlpurifier_format($data)
  29. {
  30. $PHORUM = $GLOBALS["PHORUM"];
  31. $purifier =& HTMLPurifier::getInstance();
  32. $cache_serial = $PHORUM['mod_htmlpurifier']['body_cache_serial'];
  33. foreach($data as $message_id => $message){
  34. if(isset($message['body'])) {
  35. if ($message_id) {
  36. // we're dealing with a real message, not a fake, so
  37. // there a number of shortcuts that can be taken
  38. if (isset($message['meta']['htmlpurifier_light'])) {
  39. // format hook was called outside of Phorum's normal
  40. // functions, do the abridged purification
  41. $data[$message_id]['body'] = $purifier->purify($message['body']);
  42. continue;
  43. }
  44. if (!empty($PHORUM['args']['purge'])) {
  45. // purge the cache, must be below the following if
  46. unset($message['meta']['body_cache']);
  47. }
  48. if (
  49. isset($message['meta']['body_cache']) &&
  50. isset($message['meta']['body_cache_serial']) &&
  51. $message['meta']['body_cache_serial'] == $cache_serial
  52. ) {
  53. // cached version is present, bail out early
  54. $data[$message_id]['body'] = base64_decode($message['meta']['body_cache']);
  55. continue;
  56. }
  57. }
  58. // migration might edit this array, that's why it's defined
  59. // so early
  60. $updated_message = array();
  61. // create the $body variable
  62. if (
  63. $message_id && // message must be real to migrate
  64. !isset($message['meta']['body_cache_serial'])
  65. ) {
  66. // perform migration
  67. $fake_data = array();
  68. list($signature, $edit_message) = phorum_htmlpurifier_remove_sig_and_editmessage($message);
  69. $fake_data[$message_id] = $message;
  70. $fake_data = phorum_htmlpurifier_migrate($fake_data);
  71. $body = $fake_data[$message_id]['body'];
  72. $body = str_replace("<phorum break>\n", "\n", $body);
  73. $updated_message['body'] = $body; // save it in
  74. $body .= $signature . $edit_message; // add it back in
  75. } else {
  76. // reverse Phorum's pre-processing
  77. $body = $message['body'];
  78. // order is important
  79. $body = str_replace("<phorum break>\n", "\n", $body);
  80. $body = str_replace(array('&lt;','&gt;','&amp;', '&quot;'), array('<','>','&','"'), $body);
  81. if (!$message_id && defined('PHORUM_CONTROL_CENTER')) {
  82. // we're in control.php, so it was double-escaped
  83. $body = str_replace(array('&lt;','&gt;','&amp;', '&quot;'), array('<','>','&','"'), $body);
  84. }
  85. }
  86. $body = $purifier->purify($body);
  87. // dynamically update the cache (MUST BE DONE HERE!)
  88. // this is inefficient because it's one db call per
  89. // cache miss, but once the cache is in place things are
  90. // a lot zippier.
  91. if ($message_id) { // make sure it's not a fake id
  92. $updated_message['meta'] = $message['meta'];
  93. $updated_message['meta']['body_cache'] = base64_encode($body);
  94. $updated_message['meta']['body_cache_serial'] = $cache_serial;
  95. phorum_db_update_message($message_id, $updated_message);
  96. }
  97. // must not get overloaded until after we cache it, otherwise
  98. // we'll inadvertently change the original text
  99. $data[$message_id]['body'] = $body;
  100. }
  101. }
  102. return $data;
  103. }
  104. // -----------------------------------------------------------------------
  105. // This is fragile code, copied from read.php:596 (Phorum 5.2.6). Please
  106. // keep this code in-sync with Phorum
  107. /**
  108. * Generates a signature based on a message array
  109. */
  110. function phorum_htmlpurifier_generate_sig($row) {
  111. $phorum_sig = '';
  112. if(isset($row["user"]["signature"])
  113. && isset($row['meta']['show_signature']) && $row['meta']['show_signature']==1){
  114. $phorum_sig=trim($row["user"]["signature"]);
  115. if(!empty($phorum_sig)){
  116. $phorum_sig="\n\n$phorum_sig";
  117. }
  118. }
  119. return $phorum_sig;
  120. }
  121. /**
  122. * Generates an edit message based on a message array
  123. */
  124. function phorum_htmlpurifier_generate_editmessage($row) {
  125. $PHORUM = $GLOBALS['PHORUM'];
  126. $editmessage = '';
  127. if(isset($row['meta']['edit_count']) && $row['meta']['edit_count'] > 0) {
  128. $editmessage = str_replace ("%count%", $row['meta']['edit_count'], $PHORUM["DATA"]["LANG"]["EditedMessage"]);
  129. $editmessage = str_replace ("%lastedit%", phorum_date($PHORUM["short_date_time"],$row['meta']['edit_date']), $editmessage);
  130. $editmessage = str_replace ("%lastuser%", $row['meta']['edit_username'], $editmessage);
  131. $editmessage = "\n\n\n\n$editmessage";
  132. }
  133. return $editmessage;
  134. }
  135. // End fragile code
  136. // -----------------------------------------------------------------------
  137. /**
  138. * Removes the signature and edit message from a message
  139. * @param $row Message passed by reference
  140. */
  141. function phorum_htmlpurifier_remove_sig_and_editmessage(&$row) {
  142. $signature = phorum_htmlpurifier_generate_sig($row);
  143. $editmessage = phorum_htmlpurifier_generate_editmessage($row);
  144. $replacements = array();
  145. // we need to remove add <phorum break> as that is the form these
  146. // extra bits are in.
  147. if ($signature) $replacements[str_replace("\n", "<phorum break>\n", $signature)] = '';
  148. if ($editmessage) $replacements[str_replace("\n", "<phorum break>\n", $editmessage)] = '';
  149. $row['body'] = strtr($row['body'], $replacements);
  150. return array($signature, $editmessage);
  151. }
  152. /**
  153. * Indicate that data is fully HTML and not from migration, invalidate
  154. * previous caches
  155. * @note This function could generate the actual cache entries, but
  156. * since there's data missing that must be deferred to the first read
  157. */
  158. function phorum_htmlpurifier_posting($message) {
  159. $PHORUM = $GLOBALS["PHORUM"];
  160. unset($message['meta']['body_cache']); // invalidate the cache
  161. $message['meta']['body_cache_serial'] = $PHORUM['mod_htmlpurifier']['body_cache_serial'];
  162. return $message;
  163. }
  164. /**
  165. * Overload quoting mechanism to prevent default, mail-style quote from happening
  166. */
  167. function phorum_htmlpurifier_quote($array) {
  168. $PHORUM = $GLOBALS["PHORUM"];
  169. $purifier =& HTMLPurifier::getInstance();
  170. $text = $purifier->purify($array[1]);
  171. $source = htmlspecialchars($array[0]);
  172. return "<blockquote cite=\"$source\">\n$text\n</blockquote>";
  173. }
  174. /**
  175. * Ensure that our format hook is processed last. Also, loads the library.
  176. * @credits <http://secretsauce.phorum.org/snippets/make_bbcode_last_formatter.php.txt>
  177. */
  178. function phorum_htmlpurifier_common() {
  179. require_once(dirname(__FILE__).'/htmlpurifier/HTMLPurifier.auto.php');
  180. require(dirname(__FILE__).'/init-config.php');
  181. $config = phorum_htmlpurifier_get_config();
  182. HTMLPurifier::getInstance($config);
  183. // increment revision.txt if you want to invalidate the cache
  184. $GLOBALS['PHORUM']['mod_htmlpurifier']['body_cache_serial'] = $config->getSerial();
  185. // load migration
  186. if (file_exists(dirname(__FILE__) . '/migrate.php')) {
  187. include(dirname(__FILE__) . '/migrate.php');
  188. } else {
  189. echo '<strong>Error:</strong> No migration path specified for HTML Purifier, please check
  190. <tt>modes/htmlpurifier/migrate.bbcode.php</tt> for instructions on
  191. how to migrate from your previous markup language.';
  192. exit;
  193. }
  194. if (!function_exists('phorum_htmlpurifier_migrate')) {
  195. // Dummy function
  196. function phorum_htmlpurifier_migrate($data) {return $data;}
  197. }
  198. }
  199. /**
  200. * Pre-emptively performs purification if it looks like a WYSIWYG editor
  201. * is being used
  202. */
  203. function phorum_htmlpurifier_before_editor($message) {
  204. if (!empty($GLOBALS['PHORUM']['mod_htmlpurifier']['wysiwyg'])) {
  205. if (!empty($message['body'])) {
  206. $body = $message['body'];
  207. // de-entity-ize contents
  208. $body = str_replace(array('&lt;','&gt;','&amp;'), array('<','>','&'), $body);
  209. $purifier =& HTMLPurifier::getInstance();
  210. $body = $purifier->purify($body);
  211. // re-entity-ize contents
  212. $body = htmlspecialchars($body, ENT_QUOTES, $GLOBALS['PHORUM']['DATA']['CHARSET']);
  213. $message['body'] = $body;
  214. }
  215. }
  216. return $message;
  217. }
  218. function phorum_htmlpurifier_editor_after_subject() {
  219. // don't show this message if it's a WYSIWYG editor, since it will
  220. // then be handled automatically
  221. if (!empty($GLOBALS['PHORUM']['mod_htmlpurifier']['wysiwyg'])) {
  222. $i = $GLOBALS['PHORUM']['DATA']['MODE'];
  223. if ($i == 'quote' || $i == 'edit' || $i == 'moderation') {
  224. ?>
  225. <div>
  226. <p>
  227. <strong>Notice:</strong> HTML has been scrubbed for your safety.
  228. If you would like to see the original, turn off WYSIWYG mode
  229. (consult your administrator for details.)
  230. </p>
  231. </div>
  232. <?php
  233. }
  234. return;
  235. }
  236. if (!empty($GLOBALS['PHORUM']['mod_htmlpurifier']['suppress_message'])) return;
  237. ?><div class="htmlpurifier-help">
  238. <p>
  239. <strong>HTML input</strong> is enabled. Make sure you escape all HTML and
  240. angled brackets with <code>&amp;lt;</code> and <code>&amp;gt;</code>.
  241. </p><?php
  242. $purifier =& HTMLPurifier::getInstance();
  243. $config = $purifier->config;
  244. if ($config->get('AutoFormat.AutoParagraph')) {
  245. ?><p>
  246. <strong>Auto-paragraphing</strong> is enabled. Double
  247. newlines will be converted to paragraphs; for single
  248. newlines, use the <code>pre</code> tag.
  249. </p><?php
  250. }
  251. $html_definition = $config->getDefinition('HTML');
  252. $allowed = array();
  253. foreach ($html_definition->info as $name => $x) $allowed[] = "<code>$name</code>";
  254. sort($allowed);
  255. $allowed_text = implode(', ', $allowed);
  256. ?><p><strong>Allowed tags:</strong> <?php
  257. echo $allowed_text;
  258. ?>.</p><?php
  259. ?>
  260. </p>
  261. <p>
  262. For inputting literal code such as HTML and PHP for display, use
  263. CDATA tags to auto-escape your angled brackets, and <code>pre</code>
  264. to preserve newlines:
  265. </p>
  266. <pre>&lt;pre&gt;&lt;![CDATA[
  267. <em>Place code here</em>
  268. ]]&gt;&lt;/pre&gt;</pre>
  269. <p>
  270. Power users, you can hide this notice with:
  271. <pre>.htmlpurifier-help {display:none;}</pre>
  272. </p>
  273. </div><?php
  274. }
  275. // vim: et sw=4 sts=4