You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
101 lines
3.8 KiB
101 lines
3.8 KiB
<?php
|
|
/* For licensing terms, see /license.txt */
|
|
/**
|
|
* This script generates a directory based on the English language variables
|
|
* but only composed of the 10,000 (can be configured) most frequent words
|
|
* used in Chamilo. This implies first using the langstats.php script, which
|
|
* in turn implies configuring an additional variable in configuration.php
|
|
* (see langstats.php for more info).
|
|
* When running the language_builder, please make sure this parameter is
|
|
* set to 0 in the configuration.php file, otherwise it will take *ages*.
|
|
*/
|
|
/**
|
|
* Requires
|
|
*/
|
|
require_once '../../inc/global.inc.php';
|
|
require_once 'langstats.class.php';
|
|
global $_configuration;
|
|
$_configuration['language_measure_frequency'] = 0;
|
|
$langstats = new langstats();
|
|
$orig_lang = 'english';
|
|
/**
|
|
* Init
|
|
*/
|
|
$words_limit = 10000; //change this if you want more words
|
|
$terms_limit = 3000; //change this if you think you'll need more terms
|
|
$terms = $langstats->get_popular_terms($terms_limit);
|
|
$words_counter = 0;
|
|
$i = 0;
|
|
$terms_in_limit = array();
|
|
$lang_dir = api_get_path(SYS_LANG_PATH);
|
|
$arch_dir = api_get_path(SYS_ARCHIVE_PATH);
|
|
/**
|
|
* Code run
|
|
*/
|
|
foreach ($terms as $row) {
|
|
if ($words_counter > 10000) { break; }
|
|
$words = str_word_count(get_lang($row['term_name'],null,$orig_lang));
|
|
$words_counter += $words;
|
|
$terms_in_limit[$row['term_name']] = $i;
|
|
//echo "Term <b>".$row['term_name']."</b> is <b>'".get_lang($row['term_name'],null,$orig_lang)."'</b> which means $words words<br /><br />\n";
|
|
//if ($words_counter%1000 >= 0) {
|
|
//echo "Reached $words_counter words at term $i (".$row['term_name']." used ".$row['term_count']." times)...<br />\n";
|
|
//}
|
|
$i++;
|
|
}
|
|
//echo $words_counter.'<br />';
|
|
|
|
echo "Reached ".count($terms_in_limit)." terms for the $words_counter most-used words<br /><br />\n";
|
|
|
|
echo "Scanning English files, trying to find these terms...<br />\n";
|
|
if (!is_dir($arch_dir.'/langstats')) {
|
|
mkdir($arch_dir.'/langstats');
|
|
mkdir($arch_dir.'/langstats/'.$orig_lang);
|
|
}
|
|
$list_files = scandir($lang_dir.'/'.$orig_lang);
|
|
$j = 1;
|
|
$terms_found = array();
|
|
$words_found = 0;
|
|
$global_var = array(); //keep the combination of all vars
|
|
$terms_in_limit = array_flip($terms_in_limit);
|
|
foreach ($list_files as $file) {
|
|
if (substr($file,0,1) == '.') {continue;}
|
|
//echo "'".substr($file,0,-8)."',<br />"; //print in a PHP array format
|
|
$vars = file($lang_dir.'/'.$orig_lang.'/'.$file);
|
|
$local_var = array();
|
|
$file_string = '<?php'."\n";
|
|
foreach ($vars as $line) {
|
|
$var = array();
|
|
$res = preg_match('/^(\$\w*)/',$line,$var);
|
|
if ($res>0) {
|
|
//echo $var[1]."<br />";
|
|
|
|
if (in_array(substr($var[1],1),$terms_in_limit)) {
|
|
//echo "Var ".$var[1]." was in the limit<br />";
|
|
$local_var[$var[1]] = $line;
|
|
$file_string .= $line;
|
|
$terms_found[] = substr($var[1],1); //e.g. store Tools
|
|
$words_found += str_word_count(get_lang($var[1],null,$orig_lang));
|
|
} elseif (in_array(substr($var[1],5),$terms_in_limit)) {
|
|
//echo "Var ".$var[1]." was in the limit<br />";
|
|
$local_var[$var[1]] = $line;
|
|
$file_string .= $line;
|
|
$terms_found[] = substr($var[1],5); //e.g. store langTools
|
|
$words_found += str_word_count(get_lang(substr($var[1],5),null,$orig_lang));
|
|
} //else do not care
|
|
}
|
|
}
|
|
echo "Writing ".$arch_dir.'/langstats/'.$orig_lang.'/'.$file."<br />\n";
|
|
file_put_contents($arch_dir.'/langstats/'.$orig_lang.'/'.$file,$file_string);
|
|
$global_var += $local_var;
|
|
};
|
|
$terms_diff = count($global_var)-count($terms_in_limit);
|
|
echo count($global_var)." terms found in English files (summing up to $words_found words). Some terms ($terms_diff in this case) might have appeared in two different files<br />";
|
|
/**
|
|
* Display results
|
|
*/
|
|
|
|
echo "Difference between filtered and found in English:<br />";
|
|
//print_r($terms_found);
|
|
echo "<pre>".print_r(array_diff($terms_in_limit,$terms_found),1)."</pre>";
|
|
echo "#";
|
|
|