kses.php 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867
  1. <?php
  2. /**
  3. * kses 0.2.2 - HTML/XHTML filter that only allows some elements and attributes
  4. * Copyright (C) 2002, 2003, 2005 Ulf Harnhammar
  5. *
  6. * This program is free software and open source software; you can redistribute
  7. * it and/or modify it under the terms of the GNU General Public License as
  8. * published by the Free Software Foundation; either version 3 of the License,
  9. * or (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful, but WITHOUT
  12. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  14. * more details.
  15. *
  16. * You should have received a copy of the GNU General Public License along
  17. * with this program; if not, write to the Free Software Foundation, Inc.,
  18. * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA or visit
  19. * http://www.gnu.org/licenses/gpl.html
  20. *
  21. * *** CONTACT INFORMATION ***
  22. *
  23. * E-mail: metaur at users dot sourceforge dot net
  24. * Web page: http://sourceforge.net/projects/kses
  25. * Paper mail: Ulf Harnhammar
  26. * Ymergatan 17 C
  27. * 753 25 Uppsala
  28. * SWEDEN
  29. *
  30. * [kses strips evil scripts!]
  31. *
  32. * @package chamilo.kses
  33. * @copyright Ulf Harnhammar {@link http://sourceforge.net/projects/kses}
  34. * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  35. */
  36. $kses_allowedentitynames = array(
  37. 'nbsp', 'iexcl', 'cent', 'pound', 'curren', 'yen',
  38. 'brvbar', 'sect', 'uml', 'copy', 'ordf', 'laquo',
  39. 'not', 'shy', 'reg', 'macr', 'deg', 'plusmn',
  40. 'acute', 'micro', 'para', 'middot', 'cedil', 'ordm',
  41. 'raquo', 'iquest', 'Agrave', 'Aacute', 'Acirc', 'Atilde',
  42. 'Auml', 'Aring', 'AElig', 'Ccedil', 'Egrave', 'Eacute',
  43. 'Ecirc', 'Euml', 'Igrave', 'Iacute', 'Icirc', 'Iuml',
  44. 'ETH', 'Ntilde', 'Ograve', 'Oacute', 'Ocirc', 'Otilde',
  45. 'Ouml', 'times', 'Oslash', 'Ugrave', 'Uacute', 'Ucirc',
  46. 'Uuml', 'Yacute', 'THORN', 'szlig', 'agrave', 'aacute',
  47. 'acirc', 'atilde', 'auml', 'aring', 'aelig', 'ccedil',
  48. 'egrave', 'eacute', 'ecirc', 'euml', 'igrave', 'iacute',
  49. 'icirc', 'iuml', 'eth', 'ntilde', 'ograve', 'oacute',
  50. 'ocirc', 'otilde', 'ouml', 'divide', 'oslash', 'ugrave',
  51. 'uacute', 'ucirc', 'uuml', 'yacute', 'thorn', 'yuml',
  52. 'quot', 'amp', 'lt', 'gt', 'apos', 'OElig',
  53. 'oelig', 'Scaron', 'scaron', 'Yuml', 'circ', 'tilde',
  54. 'ensp', 'emsp', 'thinsp', 'zwnj', 'zwj', 'lrm',
  55. 'rlm', 'ndash', 'mdash', 'lsquo', 'rsquo', 'sbquo',
  56. 'ldquo', 'rdquo', 'bdquo', 'dagger', 'Dagger', 'permil',
  57. 'lsaquo', 'rsaquo', 'euro', 'fnof', 'Alpha', 'Beta',
  58. 'Gamma', 'Delta', 'Epsilon', 'Zeta', 'Eta', 'Theta',
  59. 'Iota', 'Kappa', 'Lambda', 'Mu', 'Nu', 'Xi',
  60. 'Omicron', 'Pi', 'Rho', 'Sigma', 'Tau', 'Upsilon',
  61. 'Phi', 'Chi', 'Psi', 'Omega', 'alpha', 'beta',
  62. 'gamma', 'delta', 'epsilon', 'zeta', 'eta', 'theta',
  63. 'iota', 'kappa', 'lambda', 'mu', 'nu', 'xi',
  64. 'omicron', 'pi', 'rho', 'sigmaf', 'sigma', 'tau',
  65. 'upsilon', 'phi', 'chi', 'psi', 'omega', 'thetasym',
  66. 'upsih', 'piv', 'bull', 'hellip', 'prime', 'Prime',
  67. 'oline', 'frasl', 'weierp', 'image', 'real', 'trade',
  68. 'alefsym', 'larr', 'uarr', 'rarr', 'darr', 'harr',
  69. 'crarr', 'lArr', 'uArr', 'rArr', 'dArr', 'hArr',
  70. 'forall', 'part', 'exist', 'empty', 'nabla', 'isin',
  71. 'notin', 'ni', 'prod', 'sum', 'minus', 'lowast',
  72. 'radic', 'prop', 'infin', 'ang', 'and', 'or',
  73. 'cap', 'cup', 'int', 'sim', 'cong', 'asymp',
  74. 'ne', 'equiv', 'le', 'ge', 'sub', 'sup',
  75. 'nsub', 'sube', 'supe', 'oplus', 'otimes', 'perp',
  76. 'sdot', 'lceil', 'rceil', 'lfloor', 'rfloor', 'lang',
  77. 'rang', 'loz', 'spades', 'clubs', 'hearts', 'diams',
  78. );
  79. /**
  80. * Filters content and keeps only allowable HTML elements.
  81. *
  82. * This function makes sure that only the allowed HTML element names, attribute
  83. * names and attribute values plus only sane HTML entities will occur in
  84. * $string. You have to remove any slashes from PHP's magic quotes before you
  85. * call this function.
  86. *
  87. * The default allowed protocols are 'http', 'https', 'ftp', 'news',
  88. * 'nntp', 'telnet', 'gopher', 'mailto'.
  89. * This covers all common link protocols, except for 'javascript' which should not
  90. * be allowed for untrusted users.
  91. *
  92. * @param string $string
  93. * @param string $allowed_html
  94. * @param array $allowed_protocols
  95. * @return string
  96. */
  97. function kses($string, $allowed_html, $allowed_protocols =
  98. array('http', 'https', 'ftp', 'news', 'nntp', 'telnet',
  99. 'gopher', 'mailto'))
  100. {
  101. $string = kses_no_null($string);
  102. $string = kses_js_entities($string);
  103. $string = kses_normalize_entities($string);
  104. $string = kses_hook($string);
  105. $allowed_html_fixed = kses_array_lc($allowed_html);
  106. return kses_split($string, $allowed_html_fixed, $allowed_protocols);
  107. }
  108. /**
  109. * You add any kses hooks here.
  110. *
  111. * @param string $string
  112. * @return string
  113. */
  114. function kses_hook($string)
  115. {
  116. return $string;
  117. }
  118. /**
  119. * This function returns kses' version number.
  120. *
  121. * @return string KSES Version Number
  122. */
  123. function kses_version()
  124. {
  125. return '0.2.2';
  126. }
  127. /**
  128. * This function searches for HTML tags, no matter how malformed.
  129. * It also matches stray ">" characters.
  130. *
  131. * @param string $string Content to filter
  132. * @param array $allowed_html Allowed HTML elements
  133. * @param array $allowed_protocols Allowed protocols to keep
  134. * @return string Content with fixed HTML tags
  135. */
  136. function kses_split($string, $allowed_html, $allowed_protocols)
  137. {
  138. global $pass_allowed_html, $pass_allowed_protocols;
  139. $pass_allowed_html = $allowed_html;
  140. $pass_allowed_protocols = $allowed_protocols;
  141. return preg_replace_callback( '%((<!--.*?(-->|$))|(<[^>]*(>|$)|>))%', '_kses_split_callback', $string );
  142. }
  143. /**
  144. * Callback for kses_split.
  145. *
  146. * @access private
  147. */
  148. function _kses_split_callback( $match )
  149. {
  150. global $pass_allowed_html, $pass_allowed_protocols;
  151. return kses_split2( $match[1], $pass_allowed_html, $pass_allowed_protocols );
  152. }
  153. /**
  154. * Callback for kses_split for fixing malformed HTML tags.
  155. *
  156. * This function does a lot of work. It rejects some very malformed things like
  157. * <:::>. It returns an empty string, if the element isn't allowed (look ma, no
  158. * strip_tags()!). Otherwise it splits the tag into an element and an attribute
  159. * list.
  160. *
  161. * After the tag is split into an element and an attribute list, it is run
  162. * through another filter which will remove illegal attributes and once that is
  163. * completed, will be returned.
  164. *
  165. * @access private
  166. * @uses kses_attr()
  167. *
  168. * @param string $string Content to filter
  169. * @param array $allowed_html Allowed HTML elements
  170. * @param array $allowed_protocols Allowed protocols to keep
  171. * @return string Fixed HTML element
  172. */
  173. function kses_split2($string, $allowed_html, $allowed_protocols)
  174. {
  175. $string = kses_stripslashes($string);
  176. if (substr($string, 0, 1) != '<')
  177. return '&gt;';
  178. // It matched a ">" character
  179. if (preg_match('%^<!--(.*?)(-->)?$%', $string, $matches)) {
  180. $string = str_replace(array('<!--', '-->'), '', $matches[1]);
  181. while ( $string != $newstring = kses($string, $allowed_html, $allowed_protocols) )
  182. $string = $newstring;
  183. if ( $string == '' )
  184. return '';
  185. // prevent multiple dashes in comments
  186. $string = preg_replace('/--+/', '-', $string);
  187. // prevent three dashes closing a comment
  188. $string = preg_replace('/-$/', '', $string);
  189. return "<!--{$string}-->";
  190. }
  191. // Allow HTML comments
  192. if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches))
  193. return '';
  194. // It's seriously malformed
  195. $slash = trim($matches[1]);
  196. $elem = $matches[2];
  197. $attrlist = $matches[3];
  198. if (!@isset($allowed_html[strtolower($elem)]))
  199. return '';
  200. // They are using a not allowed HTML element
  201. if ($slash != '')
  202. return "<$slash$elem>";
  203. // No attributes are allowed for closing elements
  204. return kses_attr("$slash$elem", $attrlist, $allowed_html,
  205. $allowed_protocols);
  206. }
  207. /**
  208. * This function removes all attributes, if none are allowed for this element.
  209. *
  210. * If some are allowed it calls kses_hair() to split them further, and then
  211. * it builds up new HTML code from the data that kses_hair() returns. It also
  212. * removes "<" and ">" characters, if there are any left. One more thing it does
  213. * is to check if the tag has a closing XHTML slash, and if it does, it puts one
  214. * in the returned code as well.
  215. *
  216. * @param string $element HTML element/tag
  217. * @param string $attr HTML attributes from HTML element to closing HTML element tag
  218. * @param array $allowed_html Allowed HTML elements
  219. * @param array $allowed_protocols Allowed protocols to keep
  220. * @return string Sanitized HTML element
  221. */
  222. function kses_attr($element, $attr, $allowed_html, $allowed_protocols)
  223. {
  224. // Is there a closing XHTML slash at the end of the attributes?
  225. $xhtml_slash = '';
  226. if (preg_match('%\s*/\s*$%', $attr))
  227. $xhtml_slash = ' /';
  228. // Are any attributes allowed at all for this element?
  229. if (@count($allowed_html[strtolower($element)]) == 0)
  230. return "<$element$xhtml_slash>";
  231. // Split it
  232. $attrarr = kses_hair($attr, $allowed_protocols);
  233. // Go through $attrarr, and save the allowed attributes for this element
  234. // in $attr2
  235. $attr2 = '';
  236. foreach ($attrarr as $arreach)
  237. {
  238. if (!@isset($allowed_html[strtolower($element)]
  239. [strtolower($arreach['name'])]))
  240. continue; // the attribute is not allowed
  241. $current = $allowed_html[strtolower($element)]
  242. [strtolower($arreach['name'])];
  243. if ($current == '')
  244. continue; // the attribute is not allowed
  245. if (!is_array($current))
  246. $attr2 .= ' '.$arreach['whole'];
  247. // there are no checks
  248. else
  249. {
  250. // there are some checks
  251. $ok = true;
  252. foreach ($current as $currkey => $currval)
  253. if (!kses_check_attr_val($arreach['value'], $arreach['vless'],
  254. $currkey, $currval))
  255. { $ok = false; break; }
  256. if ( strtolower($arreach['name']) == 'style' ) {
  257. $orig_value = $arreach['value'];
  258. $value = kses_safecss_filter_attr($orig_value);
  259. if ( empty($value) )
  260. continue;
  261. $arreach['value'] = $value;
  262. $arreach['whole'] = str_replace($orig_value, $value, $arreach['whole']);
  263. }
  264. if ($ok)
  265. $attr2 .= ' '.$arreach['whole']; // it passed them
  266. } // if !is_array($current)
  267. } // foreach
  268. // Remove any "<" or ">" characters
  269. $attr2 = preg_replace('/[<>]/', '', $attr2);
  270. return "<$element$attr2$xhtml_slash>";
  271. }
  272. /**
  273. * Builds an attribute list from string containing attributes.
  274. *
  275. * This function does a lot of work. It parses an attribute list into an array
  276. * with attribute data, and tries to do the right thing even if it gets weird
  277. * input. It will add quotes around attribute values that don't have any quotes
  278. * or apostrophes around them, to make it easier to produce HTML code that will
  279. * conform to W3C's HTML specification. It will also remove bad URL protocols
  280. * from attribute values.
  281. * It also reduces duplicate attributes by using the
  282. * attribute defined first (foo='bar' foo='baz' will result in foo='bar').
  283. *
  284. * @param string $attr Attribute list from HTML element to closing HTML element tag
  285. * @param array $allowed_protocols Allowed protocols to keep
  286. * @return array List of attributes after parsing
  287. */
  288. function kses_hair($attr, $allowed_protocols)
  289. {
  290. $attrarr = array();
  291. $mode = 0;
  292. $attrname = '';
  293. $uris = array('xmlns', 'profile', 'href', 'src', 'cite', 'classid', 'codebase', 'data', 'usemap', 'longdesc', 'action');
  294. // Loop through the whole attribute list
  295. while (strlen($attr) != 0)
  296. {
  297. $working = 0; // Was the last operation successful?
  298. switch ($mode)
  299. {
  300. case 0: // attribute name, href for instance
  301. if (preg_match('/^([-a-zA-Z]+)/', $attr, $match))
  302. {
  303. $attrname = $match[1];
  304. $working = $mode = 1;
  305. $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
  306. }
  307. break;
  308. case 1: // equals sign or valueless ("selected")
  309. if (preg_match('/^\s*=\s*/', $attr)) // equals sign
  310. {
  311. $working = 1; $mode = 2;
  312. $attr = preg_replace('/^\s*=\s*/', '', $attr);
  313. break;
  314. }
  315. if (preg_match('/^\s+/', $attr)) // valueless
  316. {
  317. $working = 1; $mode = 0;
  318. if(FALSE === array_key_exists($attrname, $attrarr)) {
  319. $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
  320. }
  321. $attr = preg_replace('/^\s+/', '', $attr);
  322. }
  323. break;
  324. case 2: // attribute value, a URL after href= for instance
  325. if (preg_match('%^"([^"]*)"(\s+|/?$)%', $attr, $match))
  326. // "value"
  327. {
  328. // MDL-2684 - kses stripping CSS styles that it thinks look like protocols
  329. if ($attrname == 'style') {
  330. $thisval = $match[1];
  331. } else {
  332. $thisval = $match[1];
  333. if ( in_array(strtolower($attrname), $uris) )
  334. $thisval = kses_bad_protocol($thisval, $allowed_protocols);
  335. }
  336. if(FALSE === array_key_exists($attrname, $attrarr)) {
  337. $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
  338. }
  339. $working = 1; $mode = 0;
  340. $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
  341. break;
  342. }
  343. if (preg_match("%^'([^']*)'(\s+|/?$)%", $attr, $match))
  344. // 'value'
  345. {
  346. $thisval = $match[1];
  347. if ( in_array(strtolower($attrname), $uris) )
  348. $thisval = kses_bad_protocol($thisval, $allowed_protocols);
  349. if(FALSE === array_key_exists($attrname, $attrarr)) {
  350. $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname='$thisval'", 'vless' => 'n');
  351. }
  352. $working = 1; $mode = 0;
  353. $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
  354. break;
  355. }
  356. if (preg_match("%^([^\s\"']+)(\s+|/?$)%", $attr, $match))
  357. // value
  358. {
  359. $thisval = $match[1];
  360. if ( in_array(strtolower($attrname), $uris) )
  361. $thisval = kses_bad_protocol($thisval, $allowed_protocols);
  362. if(FALSE === array_key_exists($attrname, $attrarr)) {
  363. $attrarr[$attrname] = array ('name' => $attrname, 'value' => $thisval, 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
  364. }
  365. // We add quotes to conform to W3C's HTML spec.
  366. $working = 1; $mode = 0;
  367. $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
  368. }
  369. break;
  370. } // switch
  371. if ($working == 0) // not well formed, remove and try again
  372. {
  373. $attr = kses_html_error($attr);
  374. $mode = 0;
  375. }
  376. } // while
  377. if ($mode == 1 && FALSE === array_key_exists($attrname, $attrarr))
  378. // special case, for when the attribute list ends with a valueless
  379. // attribute like "selected"
  380. $attrarr[$attrname] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
  381. return $attrarr;
  382. }
  383. /**
  384. * This function performs different checks for attribute values.
  385. *
  386. * The currently implemented checks are "maxlen", "minlen", "maxval", "minval"
  387. * and "valueless" with even more checks to come soon.
  388. *
  389. * @param string $value Attribute value
  390. * @param string $vless Whether the value is valueless. Use 'y' or 'n'
  391. * @param string $checkname What $checkvalue is checking for.
  392. * @param mixed $checkvalue What constraint the value should pass
  393. * @return bool Whether check passes
  394. */
  395. function kses_check_attr_val($value, $vless, $checkname, $checkvalue)
  396. {
  397. $ok = true;
  398. switch (strtolower($checkname))
  399. {
  400. case 'maxlen':
  401. // The maxlen check makes sure that the attribute value has a length not
  402. // greater than the given value. This can be used to avoid Buffer Overflows
  403. // in WWW clients and various Internet servers.
  404. if (strlen($value) > $checkvalue)
  405. $ok = false;
  406. break;
  407. case 'minlen':
  408. // The minlen check makes sure that the attribute value has a length not
  409. // smaller than the given value.
  410. if (strlen($value) < $checkvalue)
  411. $ok = false;
  412. break;
  413. case 'maxval':
  414. // The maxval check does two things: it checks that the attribute value is
  415. // an integer from 0 and up, without an excessive amount of zeroes or
  416. // whitespace (to avoid Buffer Overflows). It also checks that the attribute
  417. // value is not greater than the given value.
  418. // This check can be used to avoid Denial of Service attacks.
  419. if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
  420. $ok = false;
  421. if ($value > $checkvalue)
  422. $ok = false;
  423. break;
  424. case 'minval':
  425. // The minval check checks that the attribute value is a positive integer,
  426. // and that it is not smaller than the given value.
  427. if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
  428. $ok = false;
  429. if ($value < $checkvalue)
  430. $ok = false;
  431. break;
  432. case 'valueless':
  433. // The valueless check checks if the attribute has a value
  434. // (like <a href="blah">) or not (<option selected>). If the given value
  435. // is a "y" or a "Y", the attribute must not have a value.
  436. // If the given value is an "n" or an "N", the attribute must have one.
  437. if (strtolower($checkvalue) != $vless)
  438. $ok = false;
  439. break;
  440. } // switch
  441. return $ok;
  442. }
  443. /**
  444. * Sanitize string from bad protocols.
  445. *
  446. * This function removes all non-allowed protocols from the beginning of
  447. * $string. It ignores whitespace and the case of the letters, and it does
  448. * understand HTML entities. It does its work in a while loop, so it won't be
  449. * fooled by a string like "javascript:javascript:alert(57)".
  450. *
  451. * @param string $string Content to filter bad protocols from
  452. * @param array $allowed_protocols Allowed protocols to keep
  453. * @return string Filtered content
  454. */
  455. function kses_bad_protocol($string, $allowed_protocols)
  456. {
  457. $string = kses_no_null($string);
  458. $string = preg_replace('/([^\xc3-\xcf])\xad+/', '\\1', $string); // deals with Opera "feature" -- moodle utf8 fix
  459. $string2 = $string.'a';
  460. while ($string != $string2)
  461. {
  462. $string2 = $string;
  463. $string = kses_bad_protocol_once($string, $allowed_protocols);
  464. } // while
  465. return $string;
  466. }
  467. /**
  468. * This function removes any NULL characters in $string.
  469. *
  470. * @param string $string
  471. * @return string
  472. */
  473. function kses_no_null($string)
  474. {
  475. $string = preg_replace('/\0+/', '', $string);
  476. $string = preg_replace('/(\\\\0)+/', '', $string);
  477. return $string;
  478. }
  479. /**
  480. * Strips slashes from in front of quotes.
  481. *
  482. * This function changes the character sequence \" to just "
  483. * It leaves all other slashes alone. It's really weird, but the quoting from
  484. * preg_replace(//e) seems to require this.
  485. *
  486. * @param string $string String to strip slashes
  487. * @return string Fixed strings with quoted slashes
  488. */
  489. function kses_stripslashes($string)
  490. {
  491. return preg_replace('%\\\\"%', '"', $string);
  492. }
  493. /**
  494. * This function goes through an array, and changes the keys to all lower case.
  495. *
  496. * @param array $inarray Unfiltered array
  497. * @return array Fixed array with all lowercase keys
  498. */
  499. function kses_array_lc($inarray)
  500. {
  501. $outarray = array();
  502. foreach ( (array) $inarray as $inkey => $inval)
  503. {
  504. $outkey = strtolower($inkey);
  505. $outarray[$outkey] = array();
  506. foreach ( (array) $inval as $inkey2 => $inval2)
  507. {
  508. $outkey2 = strtolower($inkey2);
  509. $outarray[$outkey][$outkey2] = $inval2;
  510. } // foreach $inval
  511. } // foreach $inarray
  512. return $outarray;
  513. }
  514. /**
  515. * This function removes the HTML JavaScript entities found in early versions of Netscape 4.
  516. *
  517. * @param string $string
  518. * @return string
  519. */
  520. function kses_js_entities($string)
  521. {
  522. return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
  523. }
  524. /**
  525. * This function handles parsing errors in kses_hair().
  526. *
  527. * The general plan is to remove everything to and including some whitespace,
  528. * but it deals with quotes and apostrophes as well.
  529. *
  530. * @param string $string
  531. * @return string
  532. */
  533. function kses_html_error($string)
  534. {
  535. return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string);
  536. }
  537. /**
  538. * Sanitizes content from bad protocols and other characters.
  539. *
  540. * This function searches for URL protocols at the beginning of $string, while
  541. * handling whitespace and HTML entities.
  542. *
  543. * @param string $string Content to check for bad protocols
  544. * @param string $allowed_protocols Allowed protocols
  545. * @return string Sanitized content
  546. */
  547. function kses_bad_protocol_once($string, $allowed_protocols)
  548. {
  549. $string2 = preg_split('/:|&#0*58;|&#x0*3a;/i', $string, 2);
  550. if(isset($string2[1]) && !preg_match('%/\?%',$string2[0]))
  551. {
  552. $string = kses_bad_protocol_once2($string2[0],$allowed_protocols).trim($string2[1]);
  553. }
  554. return $string;
  555. }
  556. /**
  557. * Callback for kses_bad_protocol_once() regular expression.
  558. *
  559. * This function processes URL protocols, checks to see if they're in the
  560. * white-list or not, and returns different data depending on the answer.
  561. *
  562. * @access private
  563. *
  564. * @param string $string URI scheme to check against the whitelist
  565. * @param string $allowed_protocols Allowed protocols
  566. * @return string Sanitized content
  567. */
  568. function kses_bad_protocol_once2($string, $allowed_protocols)
  569. {
  570. $string2 = kses_decode_entities($string);
  571. $string2 = preg_replace('/\s/', '', $string2);
  572. $string2 = kses_no_null($string2);
  573. $string2 = preg_replace('/\xad+/', '', $string2); // deals with Opera "feature"
  574. $string2 = strtolower($string2);
  575. $allowed = false;
  576. foreach ( (array) $allowed_protocols as $one_protocol)
  577. if (strtolower($one_protocol) == $string2)
  578. {
  579. $allowed = true;
  580. break;
  581. }
  582. if ($allowed)
  583. return "$string2:";
  584. else
  585. return '';
  586. }
  587. /**
  588. * Converts and fixes HTML entities.
  589. *
  590. * This function normalizes HTML entities. It will convert "AT&T" to the correct
  591. * "AT&amp;T", "&#00058;" to "&#58;", "&#XYZZY;" to "&amp;#XYZZY;" and so on.
  592. *
  593. * @param string $string Content to normalize entities
  594. * @return string Content with normalized entities
  595. */
  596. function kses_normalize_entities($string)
  597. {
  598. // Disarm all entities by converting & to &amp;
  599. $string = str_replace('&', '&amp;', $string);
  600. // Change back the allowed entities in our entity whitelist
  601. $string = preg_replace_callback('/&amp;([A-Za-z]{2,8});/', 'kses_named_entities', $string);
  602. $string = preg_replace_callback('/&amp;#(0*[0-9]{1,7});/', 'kses_normalize_entities2', $string);
  603. $string = preg_replace_callback('/&amp;#[Xx](0*[0-9A-Fa-f]{1,6});/', 'kses_normalize_entities3', $string);
  604. return $string;
  605. }
  606. /**
  607. * Callback for kses_normalize_entities() regular expression.
  608. *
  609. * This function only accepts valid named entity references, which are finite,
  610. * case-sensitive, and highly scrutinized by HTML and XML validators.
  611. *
  612. * @since 3.0.0
  613. *
  614. * @param array $matches preg_replace_callback() matches array
  615. * @return string Correctly encoded entity
  616. */
  617. function kses_named_entities($matches) {
  618. global $kses_allowedentitynames;
  619. if ( empty($matches[1]) )
  620. return '';
  621. $i = $matches[1];
  622. return ( ( ! in_array($i, $kses_allowedentitynames) ) ? "&amp;$i;" : "&$i;" );
  623. }
  624. /**
  625. * Callback for kses_normalize_entities() regular expression.
  626. *
  627. * This function helps kses_normalize_entities() to only accept 16 bit values
  628. * and nothing more for &#number; entities.
  629. *
  630. * @access private
  631. *
  632. * @param array $matches preg_replace_callback() matches array
  633. * @return string Correctly encoded entity
  634. */
  635. function kses_normalize_entities2($matches) {
  636. if ( empty($matches[1]) )
  637. return '';
  638. $i = $matches[1];
  639. if (kses_valid_unicode($i)) {
  640. $i = str_pad(ltrim($i,'0'), 3, '0', STR_PAD_LEFT);
  641. $i = "&#$i;";
  642. } else {
  643. $i = "&amp;#$i;";
  644. }
  645. return $i;
  646. }
  647. /**
  648. * Callback for kses_normalize_entities() for regular expression.
  649. *
  650. * This function helps kses_normalize_entities() to only accept valid Unicode
  651. * numeric entities in hex form.
  652. *
  653. * @access private
  654. *
  655. * @param array $matches preg_replace_callback() matches array
  656. * @return string Correctly encoded entity
  657. */
  658. function kses_normalize_entities3($matches) {
  659. if ( empty($matches[1]) )
  660. return '';
  661. $hexchars = $matches[1];
  662. return ( ( ! kses_valid_unicode(hexdec($hexchars)) ) ? "&amp;#x$hexchars;" : '&#x'.ltrim($hexchars,'0').';' );
  663. }
  664. /**
  665. * Helper function to determine if a Unicode value is valid.
  666. *
  667. * @param int $i Unicode value
  668. * @return bool true if the value was a valid Unicode number
  669. */
  670. function kses_valid_unicode($i) {
  671. return ( $i == 0x9 || $i == 0xa || $i == 0xd ||
  672. ($i >= 0x20 && $i <= 0xd7ff) ||
  673. ($i >= 0xe000 && $i <= 0xfffd) ||
  674. ($i >= 0x10000 && $i <= 0x10ffff) );
  675. }
  676. /**
  677. * Convert all entities to their character counterparts.
  678. *
  679. * This function decodes numeric HTML entities (&#65; and &#x41;). It doesn't do
  680. * anything with other entities like &auml;, but we don't need them in the URL
  681. * protocol whitelisting system anyway.
  682. *
  683. * @param string $string Content to change entities
  684. * @return string Content after decoded entities
  685. */
  686. function kses_decode_entities($string)
  687. {
  688. $string = preg_replace_callback('/&#([0-9]+);/', '_kses_decode_entities_chr', $string);
  689. $string = preg_replace_callback('/&#[Xx]([0-9A-Fa-f]+);/', '_kses_decode_entities_chr_hexdec', $string);
  690. return $string;
  691. }
  692. /**
  693. * Regex callback for kses_decode_entities()
  694. *
  695. * @param array $match preg match
  696. * @return string
  697. */
  698. function _kses_decode_entities_chr( $match ) {
  699. return chr( $match[1] );
  700. }
  701. /**
  702. * Regex callback for kses_decode_entities()
  703. *
  704. * @param array $match preg match
  705. * @return string
  706. */
  707. function _kses_decode_entities_chr_hexdec( $match ) {
  708. return chr( hexdec( $match[1] ) );
  709. }
  710. /**
  711. * Inline CSS filter
  712. *
  713. */
  714. function kses_safecss_filter_attr( $css ) {
  715. $css = kses_no_null($css);
  716. $css = str_replace(array("\n","\r","\t"), '', $css);
  717. if ( preg_match( '%[\\(&=}]|/\*%', $css ) ) // remove any inline css containing \ ( & } = or comments
  718. return '';
  719. $css_array = explode( ';', trim( $css ) );
  720. $allowed_attr = array( 'text-align', 'margin', 'color', 'float',
  721. 'border', 'background', 'background-color', 'border-bottom', 'border-bottom-color',
  722. 'border-bottom-style', 'border-bottom-width', 'border-collapse', 'border-color', 'border-left',
  723. 'border-left-color', 'border-left-style', 'border-left-width', 'border-right', 'border-right-color',
  724. 'border-right-style', 'border-right-width', 'border-spacing', 'border-style', 'border-top',
  725. 'border-top-color', 'border-top-style', 'border-top-width', 'border-width', 'caption-side',
  726. 'clear', 'cursor', 'direction', 'display', 'font', 'font-family', 'font-size', 'font-style',
  727. 'font-variant', 'font-weight', 'height', 'letter-spacing', 'line-height', 'margin-bottom',
  728. 'margin-left', 'margin-right', 'margin-top', 'overflow', 'padding', 'padding-bottom',
  729. 'padding-left', 'padding-right', 'padding-top', 'text-decoration', 'text-indent', 'vertical-align',
  730. 'width' );
  731. if ( empty($allowed_attr) )
  732. return $css;
  733. $css = '';
  734. foreach ( $css_array as $css_item ) {
  735. if ( $css_item == '' )
  736. continue;
  737. $css_item = trim( $css_item );
  738. $found = false;
  739. if ( strpos( $css_item, ':' ) === false ) {
  740. $found = true;
  741. } else {
  742. $parts = split( ':', $css_item );
  743. if ( in_array( strtolower( trim( $parts[0] ) ), $allowed_attr ) )
  744. $found = true;
  745. }
  746. if ( $found ) {
  747. if( $css != '' )
  748. $css .= ';';
  749. $css .= $css_item;
  750. }
  751. }
  752. return $css;
  753. }