config-scanner.php 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. #!/usr/bin/php
  2. <?php
  3. chdir(dirname(__FILE__));
  4. require_once 'common.php';
  5. require_once '../library/HTMLPurifier.auto.php';
  6. assertCli();
  7. if (version_compare(PHP_VERSION, '5.2.2', '<')) {
  8. echo "This script requires PHP 5.2.2 or later, for tokenizer line numbers.";
  9. exit(1);
  10. }
  11. /**
  12. * @file
  13. * Scans HTML Purifier source code for $config tokens and records the
  14. * directive being used; configdoc can use this info later.
  15. *
  16. * Currently, this just dumps all the info onto the console. Eventually, it
  17. * will create an XML file that our XSLT transform can use.
  18. */
  19. $FS = new FSTools();
  20. chdir(dirname(__FILE__) . '/../library/');
  21. $raw_files = $FS->globr('.', '*.php');
  22. $files = array();
  23. foreach ($raw_files as $file) {
  24. $file = substr($file, 2); // rm leading './'
  25. if (strncmp('standalone/', $file, 11) === 0) continue; // rm generated files
  26. if (substr_count($file, '.') > 1) continue; // rm meta files
  27. $files[] = $file;
  28. }
  29. /**
  30. * Moves the $i cursor to the next non-whitespace token
  31. */
  32. function consumeWhitespace($tokens, &$i) {
  33. do {$i++;} while (is_array($tokens[$i]) && $tokens[$i][0] === T_WHITESPACE);
  34. }
  35. /**
  36. * Tests whether or not a token is a particular type. There are three run-cases:
  37. * - ($token, $expect_token): tests if the token is $expect_token type;
  38. * - ($token, $expect_value): tests if the token is the string $expect_value;
  39. * - ($token, $expect_token, $expect_value): tests if token is $expect_token type, and
  40. * its string representation is $expect_value
  41. */
  42. function testToken($token, $value_or_token, $value = null) {
  43. if (is_null($value)) {
  44. if (is_int($value_or_token)) return is_array($token) && $token[0] === $value_or_token;
  45. else return $token === $value_or_token;
  46. } else {
  47. return is_array($token) && $token[0] === $value_or_token && $token[1] === $value;
  48. }
  49. }
  50. $counter = 0;
  51. $full_counter = 0;
  52. $tracker = array();
  53. foreach ($files as $file) {
  54. $tokens = token_get_all(file_get_contents($file));
  55. $file = str_replace('\\', '/', $file);
  56. for ($i = 0, $c = count($tokens); $i < $c; $i++) {
  57. $ok = false;
  58. // Match $config
  59. if (!$ok && testToken($tokens[$i], T_VARIABLE, '$config')) $ok = true;
  60. // Match $this->config
  61. while (!$ok && testToken($tokens[$i], T_VARIABLE, '$this')) {
  62. consumeWhitespace($tokens, $i);
  63. if (!testToken($tokens[$i], T_OBJECT_OPERATOR)) break;
  64. consumeWhitespace($tokens, $i);
  65. if (testToken($tokens[$i], T_STRING, 'config')) $ok = true;
  66. break;
  67. }
  68. if (!$ok) continue;
  69. $ok = false;
  70. for($i++; $i < $c; $i++) {
  71. if ($tokens[$i] === ',' || $tokens[$i] === ')' || $tokens[$i] === ';') {
  72. break;
  73. }
  74. if (is_string($tokens[$i])) continue;
  75. if ($tokens[$i][0] === T_OBJECT_OPERATOR) {
  76. $ok = true;
  77. break;
  78. }
  79. }
  80. if (!$ok) continue;
  81. $line = $tokens[$i][2];
  82. consumeWhitespace($tokens, $i);
  83. if (!testToken($tokens[$i], T_STRING, 'get')) continue;
  84. consumeWhitespace($tokens, $i);
  85. if (!testToken($tokens[$i], '(')) continue;
  86. $full_counter++;
  87. $matched = false;
  88. do {
  89. // What we currently don't match are batch retrievals, and
  90. // wildcard retrievals. This data might be useful in the future,
  91. // which is why we have a do {} while loop that doesn't actually
  92. // do anything.
  93. consumeWhitespace($tokens, $i);
  94. if (!testToken($tokens[$i], T_CONSTANT_ENCAPSED_STRING)) continue;
  95. $id = substr($tokens[$i][1], 1, -1);
  96. $counter++;
  97. $matched = true;
  98. if (!isset($tracker[$id])) $tracker[$id] = array();
  99. if (!isset($tracker[$id][$file])) $tracker[$id][$file] = array();
  100. $tracker[$id][$file][] = $line;
  101. } while (0);
  102. //echo "$file:$line uses $namespace.$directive\n";
  103. }
  104. }
  105. echo "\n$counter/$full_counter instances of \$config or \$this->config found in source code.\n";
  106. echo "Generating XML... ";
  107. $xw = new XMLWriter();
  108. $xw->openURI('../configdoc/usage.xml');
  109. $xw->setIndent(true);
  110. $xw->startDocument('1.0', 'UTF-8');
  111. $xw->startElement('usage');
  112. foreach ($tracker as $id => $files) {
  113. $xw->startElement('directive');
  114. $xw->writeAttribute('id', $id);
  115. foreach ($files as $file => $lines) {
  116. $xw->startElement('file');
  117. $xw->writeAttribute('name', $file);
  118. foreach ($lines as $line) {
  119. $xw->writeElement('line', $line);
  120. }
  121. $xw->endElement();
  122. }
  123. $xw->endElement();
  124. }
  125. $xw->endElement();
  126. $xw->flush();
  127. echo "done!\n";
  128. // vim: et sw=4 sts=4