Chamilo is a learning management system focused on ease of use and accessibility
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
chamilo-lms/main/metadata/md_phpdig.php

225 lines
8.2 KiB

<?php /* <!-- Dokeos metadata/md_phpdig.php -->
<!-- 2005/03/24 -->
<!-- Copyright (C) 2005 rene.haentjens@UGent.be - see metadata/md_funcs.php -->
*/
/**
==============================================================================
* Dokeos Metadata: PhpDig connection
*
* If PhpDig 1.8.3 is installed in a Dokeos course site, then MD items
* can be indexed for search (via PhpDig's search screen search.php).
*
* The functions below inject the words of metadata/indexabletext directly
* into PhpDig's tables. Affected tables:
*
* keywords: key_id, twoletters, keyword (lowercase, accents removed)
*
* sites: site_id, site_url (e.g. http://xx.yy.zz/), upddate, ...
*
* spider: spider_id, site_id, upddate, num_words, first_words,
* path (e.g. uu/vv/ww/), file (e.g. index.php?sid=xxx), ...
*
* engine: spider_id, key_id, weight
*
* Most of the function code is a simplified version of real PhpDig code
* released under the GNU GPL V2, see www.phpdig.net.
*
* @package dokeos.metadata
==============================================================================
*/
// PHPDIG CONNECTION ---------------------------------------------------------->
$phpDigInc = get_course_path() . $_course['path'] . '/phpdig-1.8.6/includes/';
$phpDigIncCn = $phpDigInc. 'connect.php'; // to connect to PhpDig's database
$phpDigIncCw = $phpDigInc. 'common_words.txt'; // stopwords
// if (!file_exists($phpDigIncCn)) return(); doesn't seem to work properly...
if (file_exists($phpDigIncCw))
if (is_array($lines = @file($phpDigIncCw)))
while (list($id,$word) = each($lines))
$common_words[trim($word)] = 1;
define('SUMMARY_DISPLAY_LENGTH', 700);
//define('PHPDIG_ENCODING', 'iso-8859-1');
define('PHPDIG_ENCODING', strtolower($charset));
define('SMALL_WORDS_SIZE', 2);
define('MAX_WORDS_SIZE',50);
define('WORDS_CHARS_LATIN1', '[:alnum:]<EFBFBD><EFBFBD>ߵ');
foreach (array( 'A'=>'<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>', 'a'=>'<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>', 'O'=>'<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>', 'o'=>'<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>',
'E'=>'<EFBFBD><EFBFBD><EFBFBD><EFBFBD>', 'e'=>'<EFBFBD><EFBFBD><EFBFBD><EFBFBD>', 'C'=>'<EFBFBD>', 'c'=>'<EFBFBD>', 'I'=>'<EFBFBD><EFBFBD><EFBFBD><EFBFBD>',
'i'=>'<EFBFBD><EFBFBD><EFBFBD><EFBFBD>', 'U'=>'<EFBFBD><EFBFBD><EFBFBD><EFBFBD>', 'u'=>'<EFBFBD><EFBFBD><EFBFBD><EFBFBD>', 'Y'=>'<EFBFBD>', 'y'=>'<EFBFBD><EFBFBD>',
'N'=>'<EFBFBD>', 'n'=>'<EFBFBD>') as $without => $allwith)
foreach (explode('!', chunk_split($allwith, 1, '!')) as $with)
if ($with) // because last one will be empty!
{
$letterswithout .= $without; $letterswith .= $with;
}
define('LETTERS_WITH_ACCENTS', $letterswith);
define('SAME_WITHOUT_ACCENTS', $letterswithout);
(strlen(LETTERS_WITH_ACCENTS) == strlen(SAME_WITHOUT_ACCENTS))
or give_up('LETTERS_WITH_ACCENTS problem in md_phpdig.php');
function find_site($url)
{
$site_url = "site_url = '" . addslashes($url) . "'";
$result = api_sql_query("SELECT site_id FROM " . PHPDIG_DB_PREFIX .
"sites WHERE " . $site_url, __FILE__, __LINE__); // find site
if (mysql_num_rows($result) == 1)
{
$row = mysql_fetch_array($result); return (int) $row['site_id'];
}
else
{
$result = api_sql_query("INSERT INTO " . PHPDIG_DB_PREFIX .
"sites SET " . $site_url, __FILE__, __LINE__); // new site
$site_id = mysql_insert_id();
$result = api_sql_query("INSERT INTO " . PHPDIG_DB_PREFIX .
"site_page (site_id,num_page) VALUES ('$site_id', '0')");
return $site_id;
}
}
function remove_engine_entries($url, $path, $file = '')
{
global $charset;
$and_path = " AND path = '" . addslashes($path) . "'";
if ($file) $and_path .= " AND file LIKE '" . addslashes(
str_replace(array('_', '%'), array('\_', '\%'), $file)) . "%'";
$result = api_sql_query("SELECT spider_id FROM " . PHPDIG_DB_PREFIX .
"spider WHERE site_id=" . ($site_id = find_site($url)) . $and_path,
__FILE__, __LINE__); // find page(s)
while ($row = mysql_fetch_array($result))
{
api_sql_query("DELETE FROM " . PHPDIG_DB_PREFIX .
"engine WHERE spider_id=" . (int)$row['spider_id'],
__FILE__, __LINE__); // delete all references to keywords
$aff .= ' +' . mysql_affected_rows();
}
api_sql_query("DELETE FROM " . PHPDIG_DB_PREFIX .
"spider WHERE site_id=" . $site_id . $and_path,
__FILE__, __LINE__); // delete page
echo htmlspecialchars($url . $path . $file, ENT_QUOTES, $charset), ' (site_id ',
$site_id, '): ', mysql_affected_rows(), $aff,
' pages + word references removed from index.<br />';
return $site_id;
}
function index_words($site_id, $path, $file, $first_words, $keywords)
{
global $common_words;
$spider_set_path_etc = "spider SET path='" . addslashes($path) .
"',file='" . addslashes($file) . "',first_words='" .
addslashes($first_words) . "',site_id='$site_id'";
// do not set upddate,md5,num_words,last_modified,filesize
api_sql_query("INSERT INTO " . PHPDIG_DB_PREFIX . $spider_set_path_etc,
__FILE__, __LINE__);
$spider_id = mysql_insert_id(); $new = 0;
foreach ($keywords as $key => $w)
if (strlen($key) > SMALL_WORDS_SIZE and strlen($key) <= MAX_WORDS_SIZE and
!isset($common_words[$key]) and
ereg('^['.WORDS_CHARS_LATIN1.'#$]', $key))
{
$result = api_sql_query("SELECT key_id FROM " . PHPDIG_DB_PREFIX .
"keywords WHERE keyword = '" . addslashes($key) . "'",
__FILE__, __LINE__);
if (mysql_num_rows($result) == 0)
{
api_sql_query("INSERT INTO " . PHPDIG_DB_PREFIX .
"keywords (keyword,twoletters) VALUES ('" . addslashes($key) .
"','" .addslashes(substr(str_replace('\\','',$key),0,2)) ."')",
__FILE__, __LINE__);
$key_id = mysql_insert_id(); $new++;
}
else
{
$keyid = mysql_fetch_row($result); $key_id = $keyid[0];
}
api_sql_query("INSERT INTO " . PHPDIG_DB_PREFIX .
"engine (spider_id,key_id,weight) VALUES ($spider_id,$key_id,$w)",
__FILE__, __LINE__);
}
echo '<tr><td>', htmlspecialchars($file, ENT_QUOTES, $charset), '</td><td>(spider_id ',
$spider_id, '):</td><td align="right">', count($keywords), ' kwds, ',
$new , ' new</td></tr>', "\n";
}
function get_first_words($text, $path, $file)
{
$db_some_text = preg_replace("/([ ]{2}|\n|\r|\r\n)/" ," ", $text);
if (strlen($db_some_text) > SUMMARY_DISPLAY_LENGTH) {
$db_some_text = substr($db_some_text, 0, SUMMARY_DISPLAY_LENGTH) . "...";
}
$titre_resume = $path . $file;
if (($psc = strpos($titre_resume, 'scorm/')) !== FALSE)
$titre_resume = substr($titre_resume, $psc + 6);
if (($pth = strpos($titre_resume, '&thumb')) !== FALSE)
$titre_resume = substr($titre_resume, 0, $pth);
return $titre_resume."\n".$db_some_text;
}
function get_keywords($text)
{
if (($token = strtok(phpdigEpureText($text), ' '))) $nbre_mots[$token] = 1;
while (($token = strtok(' ')))
$nbre_mots[$token] = ($nm = $nbre_mots[$token]) ? $nm + 1 : 1;
return $nbre_mots;
}
function phpdigEpureText($text)
{
$text = strtr(phpdigStripAccents(strtolower($text)), '<EFBFBD><EFBFBD>', '<EFBFBD><EFBFBD>');
$text = ereg_replace('[^'.WORDS_CHARS_LATIN1.' \'._~@#$&%/=-]+',' ',$text); // RH: was ' \'._~@#$:&%/;,=-]+', also below
$text = ereg_replace('(['.WORDS_CHARS_LATIN1.'])[\'._~@#$&%/=-]+($|[[:space:]]$|[[:space:]]['.WORDS_CHARS_LATIN1.'])','\1\2',$text);
// the next two repeated lines needed
if (SMALL_WORDS_SIZE >= 1) {
$text = ereg_replace('[[:space:]][^ ]{1,'.SMALL_WORDS_SIZE.'}[[:space:]]',' ',' '.$text.' ');
$text = ereg_replace('[[:space:]][^ ]{1,'.SMALL_WORDS_SIZE.'}[[:space:]]',' ',' '.$text.' ');
}
//$text = ereg_replace('\.+[[:space:]]|\.+$|\.{2,}',' ',$text);
$text = ereg_replace('\.{2,}',' ',$text);
$text = ereg_replace('^[[:space:]]*\.+',' ',$text);
return trim(ereg_replace("[[:space:]]+"," ",$text));
}
function phpdigStripAccents($chaine)
{
$chaine = str_replace('<EFBFBD>','ae',str_replace('<EFBFBD>','ae',$chaine));
return strtr($chaine, LETTERS_WITH_ACCENTS, SAME_WITHOUT_ACCENTS);
}
?>