Lexer.php 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. <?php
  2. require_once '../library/HTMLPurifier.auto.php';
  3. @include_once '../test-settings.php';
  4. // PEAR
  5. require_once 'Benchmark/Timer.php'; // to do the timing
  6. require_once 'Text/Password.php'; // for generating random input
  7. $LEXERS = array();
  8. $RUNS = isset($GLOBALS['HTMLPurifierTest']['Runs'])
  9. ? $GLOBALS['HTMLPurifierTest']['Runs'] : 2;
  10. require_once 'HTMLPurifier/Lexer/DirectLex.php';
  11. $LEXERS['DirectLex'] = new HTMLPurifier_Lexer_DirectLex();
  12. if (version_compare(PHP_VERSION, '5', '>=')) {
  13. require_once 'HTMLPurifier/Lexer/DOMLex.php';
  14. $LEXERS['DOMLex'] = new HTMLPurifier_Lexer_DOMLex();
  15. }
  16. // custom class to aid unit testing
  17. class RowTimer extends Benchmark_Timer
  18. {
  19. var $name;
  20. function RowTimer($name, $auto = false) {
  21. $this->name = htmlentities($name);
  22. $this->Benchmark_Timer($auto);
  23. }
  24. function getOutput() {
  25. $total = $this->TimeElapsed();
  26. $result = $this->getProfiling();
  27. $dashes = '';
  28. $out = '<tr>';
  29. $out .= "<td>{$this->name}</td>";
  30. $standard = false;
  31. foreach ($result as $k => $v) {
  32. if ($v['name'] == 'Start' || $v['name'] == 'Stop') continue;
  33. //$perc = (($v['diff'] * 100) / $total);
  34. //$tperc = (($v['total'] * 100) / $total);
  35. //$out .= '<td align="right">' . $v['diff'] . '</td>';
  36. if ($standard == false) $standard = $v['diff'];
  37. $perc = $v['diff'] * 100 / $standard;
  38. $bad_run = ($v['diff'] < 0);
  39. $out .= '<td align="right"'.
  40. ($bad_run ? ' style="color:#AAA;"' : '').
  41. '>' . number_format($perc, 2, '.', '') .
  42. '%</td><td>'.number_format($v['diff'],4,'.','').'</td>';
  43. }
  44. $out .= '</tr>';
  45. return $out;
  46. }
  47. }
  48. function print_lexers() {
  49. global $LEXERS;
  50. $first = true;
  51. foreach ($LEXERS as $key => $value) {
  52. if (!$first) echo ' / ';
  53. echo htmlspecialchars($key);
  54. $first = false;
  55. }
  56. }
  57. function do_benchmark($name, $document) {
  58. global $LEXERS, $RUNS;
  59. $config = HTMLPurifier_Config::createDefault();
  60. $context = new HTMLPurifier_Context();
  61. $timer = new RowTimer($name);
  62. $timer->start();
  63. foreach($LEXERS as $key => $lexer) {
  64. for ($i=0; $i<$RUNS; $i++) $tokens = $lexer->tokenizeHTML($document, $config, $context);
  65. $timer->setMarker($key);
  66. }
  67. $timer->stop();
  68. $timer->display();
  69. }
  70. ?>
  71. <html>
  72. <head>
  73. <title>Benchmark: <?php print_lexers(); ?></title>
  74. </head>
  75. <body>
  76. <h1>Benchmark: <?php print_lexers(); ?></h1>
  77. <table border="1">
  78. <tr><th>Case</th><?php
  79. foreach ($LEXERS as $key => $value) {
  80. echo '<th colspan="2">' . htmlspecialchars($key) . '</th>';
  81. }
  82. ?></tr>
  83. <?php
  84. // ************************************************************************** //
  85. // sample of html pages
  86. $dir = 'samples/Lexer';
  87. $dh = opendir($dir);
  88. while (false !== ($filename = readdir($dh))) {
  89. if (strpos($filename, '.html') !== strlen($filename) - 5) continue;
  90. $document = file_get_contents($dir . '/' . $filename);
  91. do_benchmark("File: $filename", $document);
  92. }
  93. // crashers, caused infinite loops before
  94. $snippets = array();
  95. $snippets[] = '<a href="foo>';
  96. $snippets[] = '<a "=>';
  97. foreach ($snippets as $snippet) {
  98. do_benchmark($snippet, $snippet);
  99. }
  100. // random input
  101. $random = Text_Password::create(80, 'unpronounceable', 'qwerty <>="\'');
  102. do_benchmark('Random input', $random);
  103. ?></table>
  104. <?php
  105. echo '<div>Random input was: ' .
  106. '<span colspan="4" style="font-family:monospace;">' .
  107. htmlspecialchars($random) . '</span></div>';
  108. ?>
  109. </body></html>
  110. <?php
  111. // vim: et sw=4 sts=4