123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104 |
- <?php
- require_once '../../inc/global.inc.php';
- require_once 'langstats.class.php';
- global $_configuration;
- $_configuration['language_measure_frequency'] = 0;
- $langstats = new langstats();
- $orig_lang = 'english';
- $words_limit = 10000;
- $terms_limit = 3000;
- $terms = $langstats->get_popular_terms($terms_limit);
- $words_counter = 0;
- $i = 0;
- $terms_in_limit = [];
- $lang_dir = api_get_path(SYS_LANG_PATH);
- $arch_dir = api_get_path(SYS_ARCHIVE_PATH);
- foreach ($terms as $row) {
- if ($words_counter > 10000) {
- break;
- }
- $words = str_word_count(get_lang($row['term_name'], null, $orig_lang));
- $words_counter += $words;
- $terms_in_limit[$row['term_name']] = $i;
-
-
-
-
- $i++;
- }
- echo "Reached ".count($terms_in_limit)." terms for the $words_counter most-used words<br /><br />\n";
- echo "Scanning English files, trying to find these terms...<br />\n";
- if (!is_dir($arch_dir.'/langstats')) {
- mkdir($arch_dir.'/langstats');
- mkdir($arch_dir.'/langstats/'.$orig_lang);
- }
- $list_files = scandir($lang_dir.'/'.$orig_lang);
- $j = 1;
- $terms_found = [];
- $words_found = 0;
- $global_var = [];
- $terms_in_limit = array_flip($terms_in_limit);
- foreach ($list_files as $file) {
- if (substr($file, 0, 1) == '.') {
- continue;
- }
-
- $vars = file($lang_dir.'/'.$orig_lang.'/'.$file);
- $local_var = [];
- $file_string = '<?php'."\n";
- foreach ($vars as $line) {
- $var = [];
- $res = preg_match('/^(\$\w*)/', $line, $var);
- if ($res > 0) {
-
- if (in_array(substr($var[1], 1), $terms_in_limit)) {
-
- $local_var[$var[1]] = $line;
- $file_string .= $line;
- $terms_found[] = substr($var[1], 1);
- $words_found += str_word_count(get_lang($var[1], null, $orig_lang));
- } elseif (in_array(substr($var[1], 5), $terms_in_limit)) {
-
- $local_var[$var[1]] = $line;
- $file_string .= $line;
- $terms_found[] = substr($var[1], 5);
- $words_found += str_word_count(get_lang(substr($var[1], 5), null, $orig_lang));
- }
- }
- }
- echo "Writing ".$arch_dir.'/langstats/'.$orig_lang.'/'.$file."<br />\n";
- file_put_contents($arch_dir.'/langstats/'.$orig_lang.'/'.$file, $file_string);
- $global_var += $local_var;
- }
- $terms_diff = count($global_var) - count($terms_in_limit);
- echo count(
- $global_var
- )." terms found in English files (summing up to $words_found words). Some terms ($terms_diff in this case) might have appeared in two different files<br />";
- echo "Difference between filtered and found in English:<br />";
- echo "<pre>".print_r(array_diff($terms_in_limit, $terms_found), 1)."</pre>";
- echo "#";
|