langstats_file_builder.php 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. <?php
  2. /* For licensing terms, see /license.txt */
  3. /**
  4. * This script generates a directory based on the English language variables
  5. * but only composed of the 10,000 (can be configured) most frequent words
  6. * used in Chamilo. This implies first using the langstats.php script, which
  7. * in turn implies configuring an additional variable in configuration.php
  8. * (see langstats.php for more info).
  9. * When running the language_builder, please make sure this parameter is
  10. * set to 0 in the configuration.php file, otherwise it will take *ages*.
  11. */
  12. /**
  13. * Requires
  14. */
  15. $language_file = array(
  16. 'accessibility',
  17. 'admin',
  18. 'agenda',
  19. 'announcements',
  20. 'blog',
  21. 'chat',
  22. 'course_description',
  23. 'course_home',
  24. 'course_info',
  25. 'coursebackup',
  26. 'courses',
  27. 'create_course',
  28. 'document',
  29. 'dropbox',
  30. 'exercice',
  31. 'external_module',
  32. 'forum',
  33. 'glossary',
  34. 'gradebook',
  35. 'group',
  36. 'help',
  37. 'hotspot',
  38. 'import',
  39. 'index',
  40. 'install',
  41. 'learnpath',
  42. 'link',
  43. 'md_document',
  44. 'md_link',
  45. 'md_mix',
  46. 'md_scorm',
  47. 'messages',
  48. 'myagenda',
  49. 'notebook',
  50. 'notification',
  51. 'registration',
  52. 'reportlib',
  53. 'reservation',
  54. 'resourcelinker',
  55. 'scorm',
  56. 'scormbuilder',
  57. 'scormdocument',
  58. 'shibboleth',
  59. 'slideshow',
  60. 'survey',
  61. 'tracking',
  62. 'trad4all',
  63. 'userInfo',
  64. 'videoconf',
  65. 'wiki',
  66. 'work',
  67. );
  68. require_once '../../inc/global.inc.php';
  69. require_once 'langstats.class.php';
  70. global $_configuration;
  71. $_configuration['language_measure_frequency'] = 0;
  72. $langstats = new langstats();
  73. $orig_lang = 'english';
  74. /**
  75. * Init
  76. */
  77. $words_limit = 10000; //change this if you want more words
  78. $terms_limit = 3000; //change this if you think you'll need more terms
  79. $terms = $langstats->get_popular_terms($terms_limit);
  80. $words_counter = 0;
  81. $i = 0;
  82. $terms_in_limit = array();
  83. $lang_dir = api_get_path(SYS_LANG_PATH);
  84. $arch_dir = api_get_path(SYS_ARCHIVE_PATH);
  85. /**
  86. * Code run
  87. */
  88. foreach ($terms as $row) {
  89. if ($words_counter > 10000) { break; }
  90. $words = str_word_count(get_lang($row['term_name'],null,$orig_lang));
  91. $words_counter += $words;
  92. $terms_in_limit[$row['term_name']] = $i;
  93. //echo "Term <b>".$row['term_name']."</b> is <b>'".get_lang($row['term_name'],null,$orig_lang)."'</b> which means $words words<br /><br />\n";
  94. //if ($words_counter%1000 >= 0) {
  95. //echo "Reached $words_counter words at term $i (".$row['term_name']." used ".$row['term_count']." times)...<br />\n";
  96. //}
  97. $i++;
  98. }
  99. //echo $words_counter.'<br />';
  100. echo "Reached ".count($terms_in_limit)." terms for the $words_counter most-used words<br /><br />\n";
  101. echo "Scanning English files, trying to find these terms...<br />\n";
  102. if (!is_dir($arch_dir.'/langstats')) {
  103. mkdir($arch_dir.'/langstats');
  104. mkdir($arch_dir.'/langstats/'.$orig_lang);
  105. }
  106. $list_files = scandir($lang_dir.'/'.$orig_lang);
  107. $j = 1;
  108. $terms_found = array();
  109. $words_found = 0;
  110. $global_var = array(); //keep the combination of all vars
  111. $terms_in_limit = array_flip($terms_in_limit);
  112. foreach ($list_files as $file) {
  113. if (substr($file,0,1) == '.') {continue;}
  114. //echo "'".substr($file,0,-8)."',<br />"; //print in a PHP array format
  115. $vars = file($lang_dir.'/'.$orig_lang.'/'.$file);
  116. $local_var = array();
  117. $file_string = '<?php'."\n";
  118. foreach ($vars as $line) {
  119. $var = array();
  120. $res = preg_match('/^(\$\w*)/',$line,$var);
  121. if ($res>0) {
  122. //echo $var[1]."<br />";
  123. if (in_array(substr($var[1],1),$terms_in_limit)) {
  124. //echo "Var ".$var[1]." was in the limit<br />";
  125. $local_var[$var[1]] = $line;
  126. $file_string .= $line;
  127. $terms_found[] = substr($var[1],1); //e.g. store Tools
  128. $words_found += str_word_count(get_lang($var[1],null,$orig_lang));
  129. } elseif (in_array(substr($var[1],5),$terms_in_limit)) {
  130. //echo "Var ".$var[1]." was in the limit<br />";
  131. $local_var[$var[1]] = $line;
  132. $file_string .= $line;
  133. $terms_found[] = substr($var[1],5); //e.g. store langTools
  134. $words_found += str_word_count(get_lang(substr($var[1],5),null,$orig_lang));
  135. } //else do not care
  136. }
  137. }
  138. echo "Writing ".$arch_dir.'/langstats/'.$orig_lang.'/'.$file."<br />\n";
  139. file_put_contents($arch_dir.'/langstats/'.$orig_lang.'/'.$file,$file_string);
  140. $global_var += $local_var;
  141. };
  142. $terms_diff = count($global_var)-count($terms_in_limit);
  143. echo count($global_var)." terms found in English files (summing up to $words_found words). Some terms ($terms_diff in this case) might have appeared in two different files<br />";
  144. /**
  145. * Display results
  146. */
  147. echo "Difference between filtered and found in English:<br />";
  148. //print_r($terms_found);
  149. echo "<pre>".print_r(array_diff($terms_in_limit,$terms_found),1)."</pre>";
  150. echo "#";