parent
3c0cd26774
commit
0b7de49a5a
@ -0,0 +1,9 @@ |
||||
<?php |
||||
include 'xapian/XapianIndexer.class.php'; |
||||
|
||||
/** |
||||
* Class wrapper |
||||
*/ |
||||
class DokeosIndexer extends XapianIndexer { |
||||
} |
||||
?> |
||||
@ -0,0 +1,39 @@ |
||||
<?php |
||||
/* |
||||
* Script defining generic functions against a search engine api. Just only if one day the search engine changes |
||||
* @package: dokeos.search |
||||
*/ |
||||
require 'xapian/XapianQuery.php'; |
||||
|
||||
/** |
||||
* Wrapper for queries |
||||
* |
||||
* @param string $query_string The search string |
||||
* @param int $offset Offset to the first item to retrieve. Optional |
||||
* @param int lenght Number of items to retrieve. Optional |
||||
* @return array |
||||
*/ |
||||
function dokeos_query_query($query_string, $offset=0, $length=10) { |
||||
return xapian_query($query_string, NULL, $offset, $length); |
||||
} |
||||
|
||||
/** |
||||
* Wrapper for getting tags |
||||
* |
||||
* @param int $count Number of terms to retrieve. Optional. |
||||
* @return array |
||||
*/ |
||||
function dokeos_query_get_tags($count=100) { |
||||
return xapian_get_all_terms($count); |
||||
} |
||||
|
||||
/** |
||||
* Wrapper for getting specific document tags |
||||
* |
||||
* @param mixed Document entry, with apropiate class |
||||
* @return array |
||||
*/ |
||||
function dokeos_query_tags_for_doc($doc) { |
||||
return xapian_get_doc_terms($doc); |
||||
} |
||||
?> |
||||
@ -0,0 +1,110 @@ |
||||
<?php |
||||
abstract class _IndexableChunk |
||||
{ |
||||
|
||||
/* int */ |
||||
protected $id; |
||||
|
||||
/* boolean */ |
||||
public $parent; |
||||
|
||||
/* int */ |
||||
public $parentId; |
||||
|
||||
/* struct (array) |
||||
* { |
||||
* string title; <- nombre de archivo/elemento |
||||
* string content; <- texto a indexar |
||||
* string ids; <- los flags a guardar "cidReq:lp_id:path" |
||||
* } |
||||
*/ |
||||
public $data; |
||||
|
||||
/** |
||||
* array( |
||||
* name => string |
||||
* flag => char |
||||
* ) |
||||
*/ |
||||
public $terms; |
||||
|
||||
/** |
||||
* Add a value to the indexed item |
||||
* @param string Key |
||||
* @param string Value |
||||
* @return void |
||||
*/ |
||||
function addValue($key, $value) { |
||||
$this->data[$key] = $value; |
||||
} |
||||
|
||||
/** |
||||
* Add a term (like xapian definition) |
||||
* @param string Term |
||||
* @param string Flag (one character) |
||||
*/ |
||||
function addTerm($term, $flag) { |
||||
if (strlen($flag) == 1) { |
||||
$this->terms[] = array('name' => $term, 'flag' => $flag); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Get the ID from an indexed item. In case data are in an array, get the second item of the 'ids' element of the array |
||||
* @return integer ID |
||||
*/ |
||||
function getId() { |
||||
$id = -1; |
||||
|
||||
if (is_array($this->data)) { |
||||
$ids = explode(':', $this->data['ids']); |
||||
|
||||
/* we need at least course_id and document_id, else it's broken */ |
||||
if (count($ids)) { |
||||
$id = $ids[1]; |
||||
} |
||||
} |
||||
|
||||
return $id; |
||||
} |
||||
|
||||
/** |
||||
* Sets the parent of the current indexed item |
||||
* @param mixed A parent object |
||||
* @return void |
||||
*/ |
||||
function setParent($parent) { |
||||
if (is_a($parent, 'IndexableChunk')) { |
||||
$this->parentId = $parent->getId(); |
||||
$this->parent = False; |
||||
} else { |
||||
$this->parentId = -1; |
||||
$this->parent = True; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Class constructor. Just generates an empty 'data' array attribute |
||||
*/ |
||||
function __construct() { |
||||
$this->data = array(); |
||||
} |
||||
|
||||
/** |
||||
* Class desctructor. Unsets attributes. |
||||
*/ |
||||
function __destruct() { |
||||
unset($this->data); |
||||
unset($this->terms); |
||||
unset($this->parent); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Extension of the _IndexableChunk class to make IndexableChunk extensible. |
||||
*/ |
||||
class IndexableChunk extends _IndexableChunk |
||||
{ |
||||
} |
||||
|
||||
?> |
||||
@ -0,0 +1,112 @@ |
||||
<?php |
||||
/** |
||||
* Add some required CSS and JS to html's head. |
||||
* |
||||
* Note that $htmlHeadXtra should be passed by reference and not value, |
||||
* otherwise this function will have no effect and your form will be broken. |
||||
* |
||||
* @param array $htmlHeadXtra A reference to the doc $htmlHeadXtra |
||||
*/ |
||||
function search_widget_prepare(&$htmlHeadXtra) { |
||||
$htmlHeadXtra[] = ' |
||||
<style type="text/css"> |
||||
.tags { |
||||
display: block; |
||||
margin-top: 20px; |
||||
width: 70%; |
||||
} |
||||
.tag { |
||||
float: left; |
||||
display: block; |
||||
padding: 5px; |
||||
padding-right: 4px; |
||||
padding-left: 4px; |
||||
margin: 3px; |
||||
border: 1px solid #ddd; |
||||
} |
||||
.tag:hover { |
||||
background: #ddd; |
||||
cursor: pointer; |
||||
} |
||||
.lighttagcolor { |
||||
background: #ddd; |
||||
} |
||||
.lighttagcolor:hover { |
||||
background: #fff; |
||||
} |
||||
|
||||
</style>'; |
||||
$htmlHeadXtra[] = ' |
||||
<script src="'.api_get_path(WEB_LIBRARY_PATH).'javascript/jquery.js" type="text/javascript"></script>'; |
||||
$htmlHeadXtra[] = " |
||||
<script type=\"text/javascript\"> |
||||
$(document).ready(function() { |
||||
$('#dokeos_search').submit(function (e) { |
||||
var tags = String(); |
||||
$('.lighttagcolor').each(function (b, a) { |
||||
tags = tags.concat(a.id+','); |
||||
}); |
||||
$('#tag_holder').val(tags); |
||||
return true; |
||||
}); |
||||
}); |
||||
</script>"; |
||||
} |
||||
|
||||
/** |
||||
* Show the search widget |
||||
* |
||||
* The form will post to lp_controller.php by default, you can pass a value to |
||||
* $action to use a custom action. |
||||
* IMPORTANT: you have to call search_widget_prepare() before calling this |
||||
* function or otherwise the form will not behave correctly. |
||||
* |
||||
* @param string $action Just in case your action is not |
||||
* lp_controller.php |
||||
*/ |
||||
function search_widget_show($action="lp_controller.php") { |
||||
require_once api_get_path(LIBRARY_PATH).'/search/DokeosQuery.php'; |
||||
$dktags = dokeos_query_get_tags(); |
||||
|
||||
$post_tags = array(); |
||||
|
||||
if (isset($_REQUEST['tags'])) { |
||||
$filter = TRUE; |
||||
$post_tags = explode(',', $_REQUEST['tags']); |
||||
} |
||||
?> |
||||
<form id="dokeos_search" action="<?php echo $action ?>"
|
||||
method="get"> |
||||
<input type="hidden" name="action" value="search"/> |
||||
<input type="text" name="query" size="40" /> |
||||
<input type="submit" id="submit" value="<?php echo get_lang("Search") ?>" />
|
||||
<br/> |
||||
<h2><?php echo get_lang("Tags") ?></h2>
|
||||
<input type="hidden" name="tags" id="tag_holder" /> |
||||
<div class="tags"> |
||||
<?php |
||||
foreach ($dktags as $tag) |
||||
{ |
||||
$tag = trim($tag['name'], 'T '); |
||||
$color = ""; |
||||
if ($filter) { |
||||
if (array_search($tag, $post_tags) !== FALSE) |
||||
$color = "lighttagcolor"; |
||||
} |
||||
?> |
||||
<span class="tag <?php echo $color?>" id="<?php echo $tag ?>">
|
||||
<?php echo $tag ?></span>
|
||||
<script type="text/javascript"> |
||||
$('#<?php echo $tag ?>').click(function waaa (e) {
|
||||
$('#<?php echo $tag ?>').toggleClass('lighttagcolor');
|
||||
}); |
||||
</script> |
||||
<?php |
||||
} |
||||
?> |
||||
</div> |
||||
</form> |
||||
<br style="clear: both;"/> |
||||
<?php |
||||
} |
||||
?> |
||||
@ -0,0 +1,7 @@ |
||||
<?php |
||||
/* |
||||
* This file is included by the other xapian files, it only describes some |
||||
* constants and 'config'. |
||||
*/ |
||||
|
||||
?> |
||||
@ -0,0 +1,208 @@ |
||||
<?php |
||||
require 'xapian.php'; |
||||
|
||||
/** |
||||
* Abstract helper class |
||||
*/ |
||||
abstract class XapianIndexer { |
||||
/* XapianWritableDatabase */ |
||||
protected $db; |
||||
protected $parents; |
||||
/* IndexableChunk[] */ |
||||
protected $chunks; |
||||
/* XapianTermGenerator */ |
||||
public $indexer; |
||||
/* XapianStem */ |
||||
public $stemmer; |
||||
|
||||
/** |
||||
* Generates a list of languages Xapian manages |
||||
* |
||||
* This method enables the definition of more matches between |
||||
* Dokeos languages and Xapian languages (through hardcoding) |
||||
* @return array Array of languages codes -> Xapian languages |
||||
*/ |
||||
public final function xapian_languages() { |
||||
/* http://xapian.org/docs/apidoc/html/classXapian_1_1Stem.html */ |
||||
return array( |
||||
'none' => 'none', //don't stem terms |
||||
'da' => 'danish', |
||||
'nl' => 'dutch', |
||||
/* Martin Porter's 2002 revision of his stemmer */ |
||||
'en' => 'english', |
||||
/* Lovin's stemmer */ |
||||
'lovins' => 'english_lovins', |
||||
/* Porter's stemmer as described in his 1980 paper */ |
||||
'porter' => 'english_porter', |
||||
'fi' => 'finnish', |
||||
'fr' => 'french', |
||||
'de' => 'german', |
||||
'it' => 'italian', |
||||
'no' => 'norwegian', |
||||
'pt' => 'portuguese', |
||||
'ru' => 'russian', |
||||
'es' => 'spanish', |
||||
'sv' => 'swedish', |
||||
); |
||||
} |
||||
|
||||
/** |
||||
* Connect to the database, and create it if it doesn't exist |
||||
*/ |
||||
function connectDb($path=NULL, $dbMode=NULL, $lang='english') { |
||||
if ($dbMode == NULL) |
||||
$dbMode = Xapian::DB_CREATE_OR_OPEN; |
||||
|
||||
if ($path == NULL) |
||||
$path = api_get_path(SYS_PATH).'searchdb/'; |
||||
|
||||
try { |
||||
$this->db = new XapianWritableDatabase($path, $dbMode); |
||||
$this->indexer = new XapianTermGenerator(); |
||||
|
||||
if (!in_array($lang, $this->xapian_languages())) { |
||||
$lang = 'english'; |
||||
} |
||||
|
||||
$this->stemmer = new XapianStem($lang); |
||||
$this->indexer->set_stemmer($this->stemmer); |
||||
|
||||
return $this->db; |
||||
} |
||||
catch (Exception $e) { |
||||
Display::display_error_message($e->getMessage()); |
||||
return 1; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Simple getter for the db attribute |
||||
* @return object The db attribute |
||||
*/ |
||||
function getDb() { |
||||
return $this->db; |
||||
} |
||||
|
||||
/** |
||||
* Add this chunk to the chunk array attribute |
||||
* @param string Chunk of text |
||||
* @return void |
||||
*/ |
||||
function addChunk($chunk) { |
||||
/* |
||||
if ($chunk->parent) { |
||||
$this->parents[] = $chunk; |
||||
} else { |
||||
$this->chunks[] = $chunk; |
||||
} |
||||
*/ |
||||
$this->chunks[] = $chunk; |
||||
} |
||||
|
||||
/** |
||||
* Actually index the current data |
||||
* |
||||
* @return integer New Xapian document ID or NULL upon failure |
||||
*/ |
||||
function index() { |
||||
try { |
||||
foreach ($this->chunks as $chunk) { |
||||
$doc = new XapianDocument(); |
||||
$this->indexer->set_document($doc); |
||||
|
||||
foreach ($chunk->terms as $term) { |
||||
/* FIXME: think of getting weight */ |
||||
$doc->add_term($term['flag'] . $term['name'], 1); |
||||
} |
||||
|
||||
/* free-form ignoring ids, indexes title and content */ |
||||
foreach ($chunk->data as $key => $value) { |
||||
if ($key != 'ids') |
||||
$this->indexer->index_text($value, 1); |
||||
} |
||||
|
||||
/* Hard-coded approach */ |
||||
/* |
||||
if (array_key_exists ('title', $chunk->data)) |
||||
$this->indexer->index_text($chunk->data['title'], 1); |
||||
*/ |
||||
|
||||
$doc->set_data($chunk->data['ids'], 1); |
||||
$id = $chunk->getId(); |
||||
if ($id < 0) |
||||
return NULL; |
||||
|
||||
$did = $this->db->replace_document($id, $doc); |
||||
|
||||
//write to disk |
||||
$this->db->flush(); |
||||
|
||||
return $did; |
||||
} |
||||
} |
||||
catch (Exception $e) { |
||||
Display::display_error_message($e->getMessage()); |
||||
exit(1); |
||||
} |
||||
|
||||
} |
||||
|
||||
/** |
||||
* Get a specific document from xapian db |
||||
* |
||||
* @param int did Xapian::docid |
||||
* @return XapianDocument |
||||
*/ |
||||
function get_document($did) { |
||||
if ($path == NULL) { |
||||
$this->connectDb(); |
||||
} |
||||
return $this->db->get_document($did); |
||||
} |
||||
|
||||
/** |
||||
* Replace all terms of a document in xapian db |
||||
* |
||||
* @param int did Xapian::docid |
||||
* @param array terms New terms of the document |
||||
*/ |
||||
function update_terms($did, $terms, $prefix='T') { |
||||
$doc = $this->get_document($did); |
||||
$doc->clear_terms(); |
||||
foreach ($terms as $term) { |
||||
//add directly |
||||
$doc->add_term($prefix.$term, 1); |
||||
} |
||||
$this->db->replace_document($did, $doc); |
||||
$this->db->flush(); |
||||
} |
||||
|
||||
/** |
||||
* Remove a document from xapian db |
||||
* |
||||
* @param int did Xapian::docid |
||||
*/ |
||||
function remove_document($did) { |
||||
if ($path == NULL) { |
||||
$this->connectDb(); |
||||
} |
||||
$this->db->delete_document($did); |
||||
$this->db->flush(); |
||||
} |
||||
|
||||
/** |
||||
* Class contructor |
||||
*/ |
||||
function __construct() { |
||||
$this->db = NULL; |
||||
$this->stemmer = NULL; |
||||
} |
||||
/** |
||||
* Class destructor |
||||
*/ |
||||
function __destruct() { |
||||
unset($this->db); |
||||
unset($this->stemmer); |
||||
} |
||||
} |
||||
?> |
||||
@ -0,0 +1,164 @@ |
||||
<?php |
||||
require_once 'xapian.php'; |
||||
|
||||
define('XAPIAN_DB', api_get_path(SYS_PATH).'searchdb/'); |
||||
|
||||
/** |
||||
* Queries the database. |
||||
* The xapian_query function queries the database using both a query string |
||||
* and application-defined terms. Based on drupal-xapian |
||||
* |
||||
* @param string $query_string The search string. This string will |
||||
* be parsed and stemmed automatically. |
||||
* @param XapianDatabase $db Xapian database to connect |
||||
* @param int $start An integer defining the first |
||||
* document to return |
||||
* @param int $length The number of results to return. |
||||
* @param array $extra An array containing arrays of |
||||
* extra terms to search for. |
||||
* @param int $count_type Number of items to retrieve |
||||
* @return array An array of nids corresponding to the results. |
||||
*/ |
||||
function xapian_query($query_string, $db = NULL, $start = 0, $length = 10, |
||||
$extra = array(), $count_type = 0) { |
||||
|
||||
try { |
||||
if (!is_object($db)) { |
||||
$db = new XapianDatabase(XAPIAN_DB); |
||||
} |
||||
|
||||
$enquire = new XapianEnquire($db); |
||||
$query_parser = new XapianQueryParser(); |
||||
$stemmer = new XapianStem("english"); |
||||
$query_parser->set_stemmer($stemmer); |
||||
$query_parser->set_database($db); |
||||
$query_parser->set_stemming_strategy(XapianQueryParser::STEM_SOME); |
||||
$query_parser->add_boolean_prefix('filetype', 'F'); |
||||
$query_parser->add_boolean_prefix('tag', 'T'); |
||||
$query_parser->add_boolean_prefix('courseid', 'C'); |
||||
$query = $query_parser->parse_query($query_string); |
||||
|
||||
// Build subqueries from $extra array. |
||||
foreach ($extra as $subq) { |
||||
if (!empty($subq)) { |
||||
/* TODO: review if we want to use this constructor |
||||
* deprecated in C: http://xapian.org/docs/apidoc/html/classXapian_1_1Query.html#f85d155b99f1f2007fe75ffc7a8bd51e |
||||
* maybe use: Query (Query::op op_, const Query &left, const Query &right) ? |
||||
*/ |
||||
$subquery = new XapianQuery(XapianQuery::OP_OR, $subq); |
||||
$query = new XapianQuery(XapianQuery::OP_AND, array($subquery, $query)); |
||||
} |
||||
} |
||||
|
||||
$enquire->set_query($query); |
||||
$matches = $enquire->get_mset((int)$start, (int)$length); |
||||
|
||||
$results = array(); |
||||
$i = $matches->begin(); |
||||
$count = 0; |
||||
while (!$i->equals($matches->end())) { |
||||
$count++; |
||||
$document = $i->get_document(); |
||||
if (is_object($document)) { |
||||
$results[$count]->ids = ($document->get_data()); |
||||
$results[$count]->score = ($i->get_percent()); |
||||
$results[$count]->terms = xapian_get_doc_terms($document); |
||||
} |
||||
$i->next(); |
||||
} |
||||
|
||||
switch ($count_type) { |
||||
case 1: // Lower bound |
||||
$count = $matches->get_matches_lower_bound(); |
||||
break; |
||||
|
||||
case 2: // Upper bound |
||||
$count = $matches->get_matches_upper_bound(); |
||||
break; |
||||
|
||||
case 0: // Best estimate |
||||
default: |
||||
$count = $matches->get_matches_estimated(); |
||||
break; |
||||
} |
||||
|
||||
return array($count, $results); |
||||
} |
||||
catch (Exception $e) { |
||||
Display::display_error_message('xapian error message: '. $e->getMessage()); |
||||
return NULL; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Retrieve a list db terms |
||||
* |
||||
* @param int $count Number of terms to retrieve |
||||
* @param char $prefix The prefix of the term to retrieve |
||||
* @param XapianDatabase $db Xapian database to connect |
||||
* @return array |
||||
*/ |
||||
function xapian_get_all_terms($count=0, $prefix='T', $db=NULL) { |
||||
try { |
||||
if (!is_object($db)) { |
||||
$db = new XapianDatabase(XAPIAN_DB); |
||||
} |
||||
|
||||
if (!empty($prefix)) { |
||||
$termi= $db->allterms_begin($prefix); |
||||
} |
||||
else { |
||||
$termi= $db->allterms_begin(); |
||||
} |
||||
|
||||
$terms = array(); |
||||
$i = 0; |
||||
for ( ; !$termi->equals($db->allterms_end()) && (++$i<=$count || $count==0) ; $termi->next() ) { |
||||
$terms[] = array( |
||||
'frequency' => $termi->get_termfreq(), |
||||
'name' => $termi->get_term(), |
||||
); |
||||
} |
||||
|
||||
return $terms; |
||||
} |
||||
catch (Exception $e) { |
||||
Display::display_error_message('xapian error message: '. $e->getMessage()); |
||||
return NULL; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Retrieve all terms of a document |
||||
* |
||||
* @param XapianDocument document searched |
||||
* @return array |
||||
*/ |
||||
function xapian_get_doc_terms($doc=NULL, $prefix='T') { |
||||
try { |
||||
if (!is_a($doc, 'XapianDocument')) { |
||||
return; |
||||
} |
||||
|
||||
//TODO: make the filter by prefix on xapian if possible |
||||
//ojwb marvil07: use Document::termlist_begin() and then skip_to(prefix) on the TermIterator |
||||
//ojwb you'll need to check the end condition by hand though |
||||
$terms = array(); |
||||
for ($termi=$doc->termlist_begin() ; !$termi->equals($doc->termlist_end()); $termi->next() ) { |
||||
$term = array( |
||||
'frequency' => $termi->get_termfreq(), |
||||
'name' => $termi->get_term(), |
||||
); |
||||
if ($term['name'][0] === $prefix) { |
||||
$terms[] = $term; |
||||
} |
||||
} |
||||
|
||||
return $terms; |
||||
} |
||||
catch (Exception $e) { |
||||
Display::display_error_message('xapian error message: '. $e->getMessage()); |
||||
return NULL; |
||||
} |
||||
} |
||||
?> |
||||
@ -0,0 +1,16 @@ |
||||
<?php |
||||
/* |
||||
* This file includes lp_list_search to avoid duplication of code, it |
||||
* bootstraps dokeos api enough to make lp_list_search work. |
||||
*/ |
||||
include_once ('../main/inc/global.inc.php'); |
||||
include_once (api_get_path(LIBRARY_PATH).'course.lib.php'); |
||||
include_once (api_get_path(LIBRARY_PATH).'debug.lib.inc.php'); |
||||
include_once (api_get_path(LIBRARY_PATH).'system_announcements.lib.php'); |
||||
include_once (api_get_path(LIBRARY_PATH).'groupmanager.lib.php'); |
||||
include_once (api_get_path(LIBRARY_PATH).'usermanager.lib.php'); |
||||
|
||||
api_block_anonymous_users(); // only users who are logged in can proceed |
||||
|
||||
require '../main/newscorm/lp_list_search.php'; |
||||
?> |
||||
@ -0,0 +1,2 @@ |
||||
This directory is a placeholder for the search plugin, which allows the |
||||
indexation of Dokeos contents through the use of the Xapian search engine. |
||||
Loading…
Reference in new issue