[svn r16093] Search feature (still not totally complete)

skala
Yannick Warnier 18 years ago
parent 3c0cd26774
commit 0b7de49a5a
  1. 9
      main/inc/lib/search/DokeosIndexer.class.php
  2. 39
      main/inc/lib/search/DokeosQuery.php
  3. 110
      main/inc/lib/search/IndexableChunk.class.php
  4. 112
      main/inc/lib/search/search_widget.php
  5. 7
      main/inc/lib/search/xapian/XapianConsts.php
  6. 208
      main/inc/lib/search/xapian/XapianIndexer.class.php
  7. 164
      main/inc/lib/search/xapian/XapianQuery.php
  8. 16
      main/search/index.php
  9. 2
      searchdb/readme.txt

@ -0,0 +1,9 @@
<?php
include 'xapian/XapianIndexer.class.php';
/**
* Class wrapper
*/
class DokeosIndexer extends XapianIndexer {
}
?>

@ -0,0 +1,39 @@
<?php
/*
* Script defining generic functions against a search engine api. Just only if one day the search engine changes
* @package: dokeos.search
*/
require 'xapian/XapianQuery.php';
/**
* Wrapper for queries
*
* @param string $query_string The search string
* @param int $offset Offset to the first item to retrieve. Optional
* @param int lenght Number of items to retrieve. Optional
* @return array
*/
function dokeos_query_query($query_string, $offset=0, $length=10) {
return xapian_query($query_string, NULL, $offset, $length);
}
/**
* Wrapper for getting tags
*
* @param int $count Number of terms to retrieve. Optional.
* @return array
*/
function dokeos_query_get_tags($count=100) {
return xapian_get_all_terms($count);
}
/**
* Wrapper for getting specific document tags
*
* @param mixed Document entry, with apropiate class
* @return array
*/
function dokeos_query_tags_for_doc($doc) {
return xapian_get_doc_terms($doc);
}
?>

@ -0,0 +1,110 @@
<?php
abstract class _IndexableChunk
{
/* int */
protected $id;
/* boolean */
public $parent;
/* int */
public $parentId;
/* struct (array)
* {
* string title; <- nombre de archivo/elemento
* string content; <- texto a indexar
* string ids; <- los flags a guardar "cidReq:lp_id:path"
* }
*/
public $data;
/**
* array(
* name => string
* flag => char
* )
*/
public $terms;
/**
* Add a value to the indexed item
* @param string Key
* @param string Value
* @return void
*/
function addValue($key, $value) {
$this->data[$key] = $value;
}
/**
* Add a term (like xapian definition)
* @param string Term
* @param string Flag (one character)
*/
function addTerm($term, $flag) {
if (strlen($flag) == 1) {
$this->terms[] = array('name' => $term, 'flag' => $flag);
}
}
/**
* Get the ID from an indexed item. In case data are in an array, get the second item of the 'ids' element of the array
* @return integer ID
*/
function getId() {
$id = -1;
if (is_array($this->data)) {
$ids = explode(':', $this->data['ids']);
/* we need at least course_id and document_id, else it's broken */
if (count($ids)) {
$id = $ids[1];
}
}
return $id;
}
/**
* Sets the parent of the current indexed item
* @param mixed A parent object
* @return void
*/
function setParent($parent) {
if (is_a($parent, 'IndexableChunk')) {
$this->parentId = $parent->getId();
$this->parent = False;
} else {
$this->parentId = -1;
$this->parent = True;
}
}
/**
* Class constructor. Just generates an empty 'data' array attribute
*/
function __construct() {
$this->data = array();
}
/**
* Class desctructor. Unsets attributes.
*/
function __destruct() {
unset($this->data);
unset($this->terms);
unset($this->parent);
}
}
/**
* Extension of the _IndexableChunk class to make IndexableChunk extensible.
*/
class IndexableChunk extends _IndexableChunk
{
}
?>

@ -0,0 +1,112 @@
<?php
/**
* Add some required CSS and JS to html's head.
*
* Note that $htmlHeadXtra should be passed by reference and not value,
* otherwise this function will have no effect and your form will be broken.
*
* @param array $htmlHeadXtra A reference to the doc $htmlHeadXtra
*/
function search_widget_prepare(&$htmlHeadXtra) {
$htmlHeadXtra[] = '
<style type="text/css">
.tags {
display: block;
margin-top: 20px;
width: 70%;
}
.tag {
float: left;
display: block;
padding: 5px;
padding-right: 4px;
padding-left: 4px;
margin: 3px;
border: 1px solid #ddd;
}
.tag:hover {
background: #ddd;
cursor: pointer;
}
.lighttagcolor {
background: #ddd;
}
.lighttagcolor:hover {
background: #fff;
}
</style>';
$htmlHeadXtra[] = '
<script src="'.api_get_path(WEB_LIBRARY_PATH).'javascript/jquery.js" type="text/javascript"></script>';
$htmlHeadXtra[] = "
<script type=\"text/javascript\">
$(document).ready(function() {
$('#dokeos_search').submit(function (e) {
var tags = String();
$('.lighttagcolor').each(function (b, a) {
tags = tags.concat(a.id+',');
});
$('#tag_holder').val(tags);
return true;
});
});
</script>";
}
/**
* Show the search widget
*
* The form will post to lp_controller.php by default, you can pass a value to
* $action to use a custom action.
* IMPORTANT: you have to call search_widget_prepare() before calling this
* function or otherwise the form will not behave correctly.
*
* @param string $action Just in case your action is not
* lp_controller.php
*/
function search_widget_show($action="lp_controller.php") {
require_once api_get_path(LIBRARY_PATH).'/search/DokeosQuery.php';
$dktags = dokeos_query_get_tags();
$post_tags = array();
if (isset($_REQUEST['tags'])) {
$filter = TRUE;
$post_tags = explode(',', $_REQUEST['tags']);
}
?>
<form id="dokeos_search" action="<?php echo $action ?>"
method="get">
<input type="hidden" name="action" value="search"/>
<input type="text" name="query" size="40" />
<input type="submit" id="submit" value="<?php echo get_lang("Search") ?>" />
<br/>
<h2><?php echo get_lang("Tags") ?></h2>
<input type="hidden" name="tags" id="tag_holder" />
<div class="tags">
<?php
foreach ($dktags as $tag)
{
$tag = trim($tag['name'], 'T ');
$color = "";
if ($filter) {
if (array_search($tag, $post_tags) !== FALSE)
$color = "lighttagcolor";
}
?>
<span class="tag <?php echo $color?>" id="<?php echo $tag ?>">
<?php echo $tag ?></span>
<script type="text/javascript">
$('#<?php echo $tag ?>').click(function waaa (e) {
$('#<?php echo $tag ?>').toggleClass('lighttagcolor');
});
</script>
<?php
}
?>
</div>
</form>
<br style="clear: both;"/>
<?php
}
?>

@ -0,0 +1,7 @@
<?php
/*
* This file is included by the other xapian files, it only describes some
* constants and 'config'.
*/
?>

@ -0,0 +1,208 @@
<?php
require 'xapian.php';
/**
* Abstract helper class
*/
abstract class XapianIndexer {
/* XapianWritableDatabase */
protected $db;
protected $parents;
/* IndexableChunk[] */
protected $chunks;
/* XapianTermGenerator */
public $indexer;
/* XapianStem */
public $stemmer;
/**
* Generates a list of languages Xapian manages
*
* This method enables the definition of more matches between
* Dokeos languages and Xapian languages (through hardcoding)
* @return array Array of languages codes -> Xapian languages
*/
public final function xapian_languages() {
/* http://xapian.org/docs/apidoc/html/classXapian_1_1Stem.html */
return array(
'none' => 'none', //don't stem terms
'da' => 'danish',
'nl' => 'dutch',
/* Martin Porter's 2002 revision of his stemmer */
'en' => 'english',
/* Lovin's stemmer */
'lovins' => 'english_lovins',
/* Porter's stemmer as described in his 1980 paper */
'porter' => 'english_porter',
'fi' => 'finnish',
'fr' => 'french',
'de' => 'german',
'it' => 'italian',
'no' => 'norwegian',
'pt' => 'portuguese',
'ru' => 'russian',
'es' => 'spanish',
'sv' => 'swedish',
);
}
/**
* Connect to the database, and create it if it doesn't exist
*/
function connectDb($path=NULL, $dbMode=NULL, $lang='english') {
if ($dbMode == NULL)
$dbMode = Xapian::DB_CREATE_OR_OPEN;
if ($path == NULL)
$path = api_get_path(SYS_PATH).'searchdb/';
try {
$this->db = new XapianWritableDatabase($path, $dbMode);
$this->indexer = new XapianTermGenerator();
if (!in_array($lang, $this->xapian_languages())) {
$lang = 'english';
}
$this->stemmer = new XapianStem($lang);
$this->indexer->set_stemmer($this->stemmer);
return $this->db;
}
catch (Exception $e) {
Display::display_error_message($e->getMessage());
return 1;
}
}
/**
* Simple getter for the db attribute
* @return object The db attribute
*/
function getDb() {
return $this->db;
}
/**
* Add this chunk to the chunk array attribute
* @param string Chunk of text
* @return void
*/
function addChunk($chunk) {
/*
if ($chunk->parent) {
$this->parents[] = $chunk;
} else {
$this->chunks[] = $chunk;
}
*/
$this->chunks[] = $chunk;
}
/**
* Actually index the current data
*
* @return integer New Xapian document ID or NULL upon failure
*/
function index() {
try {
foreach ($this->chunks as $chunk) {
$doc = new XapianDocument();
$this->indexer->set_document($doc);
foreach ($chunk->terms as $term) {
/* FIXME: think of getting weight */
$doc->add_term($term['flag'] . $term['name'], 1);
}
/* free-form ignoring ids, indexes title and content */
foreach ($chunk->data as $key => $value) {
if ($key != 'ids')
$this->indexer->index_text($value, 1);
}
/* Hard-coded approach */
/*
if (array_key_exists ('title', $chunk->data))
$this->indexer->index_text($chunk->data['title'], 1);
*/
$doc->set_data($chunk->data['ids'], 1);
$id = $chunk->getId();
if ($id < 0)
return NULL;
$did = $this->db->replace_document($id, $doc);
//write to disk
$this->db->flush();
return $did;
}
}
catch (Exception $e) {
Display::display_error_message($e->getMessage());
exit(1);
}
}
/**
* Get a specific document from xapian db
*
* @param int did Xapian::docid
* @return XapianDocument
*/
function get_document($did) {
if ($path == NULL) {
$this->connectDb();
}
return $this->db->get_document($did);
}
/**
* Replace all terms of a document in xapian db
*
* @param int did Xapian::docid
* @param array terms New terms of the document
*/
function update_terms($did, $terms, $prefix='T') {
$doc = $this->get_document($did);
$doc->clear_terms();
foreach ($terms as $term) {
//add directly
$doc->add_term($prefix.$term, 1);
}
$this->db->replace_document($did, $doc);
$this->db->flush();
}
/**
* Remove a document from xapian db
*
* @param int did Xapian::docid
*/
function remove_document($did) {
if ($path == NULL) {
$this->connectDb();
}
$this->db->delete_document($did);
$this->db->flush();
}
/**
* Class contructor
*/
function __construct() {
$this->db = NULL;
$this->stemmer = NULL;
}
/**
* Class destructor
*/
function __destruct() {
unset($this->db);
unset($this->stemmer);
}
}
?>

@ -0,0 +1,164 @@
<?php
require_once 'xapian.php';
define('XAPIAN_DB', api_get_path(SYS_PATH).'searchdb/');
/**
* Queries the database.
* The xapian_query function queries the database using both a query string
* and application-defined terms. Based on drupal-xapian
*
* @param string $query_string The search string. This string will
* be parsed and stemmed automatically.
* @param XapianDatabase $db Xapian database to connect
* @param int $start An integer defining the first
* document to return
* @param int $length The number of results to return.
* @param array $extra An array containing arrays of
* extra terms to search for.
* @param int $count_type Number of items to retrieve
* @return array An array of nids corresponding to the results.
*/
function xapian_query($query_string, $db = NULL, $start = 0, $length = 10,
$extra = array(), $count_type = 0) {
try {
if (!is_object($db)) {
$db = new XapianDatabase(XAPIAN_DB);
}
$enquire = new XapianEnquire($db);
$query_parser = new XapianQueryParser();
$stemmer = new XapianStem("english");
$query_parser->set_stemmer($stemmer);
$query_parser->set_database($db);
$query_parser->set_stemming_strategy(XapianQueryParser::STEM_SOME);
$query_parser->add_boolean_prefix('filetype', 'F');
$query_parser->add_boolean_prefix('tag', 'T');
$query_parser->add_boolean_prefix('courseid', 'C');
$query = $query_parser->parse_query($query_string);
// Build subqueries from $extra array.
foreach ($extra as $subq) {
if (!empty($subq)) {
/* TODO: review if we want to use this constructor
* deprecated in C: http://xapian.org/docs/apidoc/html/classXapian_1_1Query.html#f85d155b99f1f2007fe75ffc7a8bd51e
* maybe use: Query (Query::op op_, const Query &left, const Query &right) ?
*/
$subquery = new XapianQuery(XapianQuery::OP_OR, $subq);
$query = new XapianQuery(XapianQuery::OP_AND, array($subquery, $query));
}
}
$enquire->set_query($query);
$matches = $enquire->get_mset((int)$start, (int)$length);
$results = array();
$i = $matches->begin();
$count = 0;
while (!$i->equals($matches->end())) {
$count++;
$document = $i->get_document();
if (is_object($document)) {
$results[$count]->ids = ($document->get_data());
$results[$count]->score = ($i->get_percent());
$results[$count]->terms = xapian_get_doc_terms($document);
}
$i->next();
}
switch ($count_type) {
case 1: // Lower bound
$count = $matches->get_matches_lower_bound();
break;
case 2: // Upper bound
$count = $matches->get_matches_upper_bound();
break;
case 0: // Best estimate
default:
$count = $matches->get_matches_estimated();
break;
}
return array($count, $results);
}
catch (Exception $e) {
Display::display_error_message('xapian error message: '. $e->getMessage());
return NULL;
}
}
/**
* Retrieve a list db terms
*
* @param int $count Number of terms to retrieve
* @param char $prefix The prefix of the term to retrieve
* @param XapianDatabase $db Xapian database to connect
* @return array
*/
function xapian_get_all_terms($count=0, $prefix='T', $db=NULL) {
try {
if (!is_object($db)) {
$db = new XapianDatabase(XAPIAN_DB);
}
if (!empty($prefix)) {
$termi= $db->allterms_begin($prefix);
}
else {
$termi= $db->allterms_begin();
}
$terms = array();
$i = 0;
for ( ; !$termi->equals($db->allterms_end()) && (++$i<=$count || $count==0) ; $termi->next() ) {
$terms[] = array(
'frequency' => $termi->get_termfreq(),
'name' => $termi->get_term(),
);
}
return $terms;
}
catch (Exception $e) {
Display::display_error_message('xapian error message: '. $e->getMessage());
return NULL;
}
}
/**
* Retrieve all terms of a document
*
* @param XapianDocument document searched
* @return array
*/
function xapian_get_doc_terms($doc=NULL, $prefix='T') {
try {
if (!is_a($doc, 'XapianDocument')) {
return;
}
//TODO: make the filter by prefix on xapian if possible
//ojwb marvil07: use Document::termlist_begin() and then skip_to(prefix) on the TermIterator
//ojwb you'll need to check the end condition by hand though
$terms = array();
for ($termi=$doc->termlist_begin() ; !$termi->equals($doc->termlist_end()); $termi->next() ) {
$term = array(
'frequency' => $termi->get_termfreq(),
'name' => $termi->get_term(),
);
if ($term['name'][0] === $prefix) {
$terms[] = $term;
}
}
return $terms;
}
catch (Exception $e) {
Display::display_error_message('xapian error message: '. $e->getMessage());
return NULL;
}
}
?>

@ -0,0 +1,16 @@
<?php
/*
* This file includes lp_list_search to avoid duplication of code, it
* bootstraps dokeos api enough to make lp_list_search work.
*/
include_once ('../main/inc/global.inc.php');
include_once (api_get_path(LIBRARY_PATH).'course.lib.php');
include_once (api_get_path(LIBRARY_PATH).'debug.lib.inc.php');
include_once (api_get_path(LIBRARY_PATH).'system_announcements.lib.php');
include_once (api_get_path(LIBRARY_PATH).'groupmanager.lib.php');
include_once (api_get_path(LIBRARY_PATH).'usermanager.lib.php');
api_block_anonymous_users(); // only users who are logged in can proceed
require '../main/newscorm/lp_list_search.php';
?>

@ -0,0 +1,2 @@
This directory is a placeholder for the search plugin, which allows the
indexation of Dokeos contents through the use of the Xapian search engine.
Loading…
Cancel
Save