diff --git a/main/inc/lib/search/DokeosIndexer.class.php b/main/inc/lib/search/DokeosIndexer.class.php new file mode 100644 index 0000000000..f2ecd80352 --- /dev/null +++ b/main/inc/lib/search/DokeosIndexer.class.php @@ -0,0 +1,9 @@ + \ No newline at end of file diff --git a/main/inc/lib/search/DokeosQuery.php b/main/inc/lib/search/DokeosQuery.php new file mode 100644 index 0000000000..c80077fa95 --- /dev/null +++ b/main/inc/lib/search/DokeosQuery.php @@ -0,0 +1,39 @@ + \ No newline at end of file diff --git a/main/inc/lib/search/IndexableChunk.class.php b/main/inc/lib/search/IndexableChunk.class.php new file mode 100644 index 0000000000..c49a4cceaf --- /dev/null +++ b/main/inc/lib/search/IndexableChunk.class.php @@ -0,0 +1,110 @@ + string + * flag => char + * ) + */ + public $terms; + + /** + * Add a value to the indexed item + * @param string Key + * @param string Value + * @return void + */ + function addValue($key, $value) { + $this->data[$key] = $value; + } + + /** + * Add a term (like xapian definition) + * @param string Term + * @param string Flag (one character) + */ + function addTerm($term, $flag) { + if (strlen($flag) == 1) { + $this->terms[] = array('name' => $term, 'flag' => $flag); + } + } + + /** + * Get the ID from an indexed item. In case data are in an array, get the second item of the 'ids' element of the array + * @return integer ID + */ + function getId() { + $id = -1; + + if (is_array($this->data)) { + $ids = explode(':', $this->data['ids']); + + /* we need at least course_id and document_id, else it's broken */ + if (count($ids)) { + $id = $ids[1]; + } + } + + return $id; + } + + /** + * Sets the parent of the current indexed item + * @param mixed A parent object + * @return void + */ + function setParent($parent) { + if (is_a($parent, 'IndexableChunk')) { + $this->parentId = $parent->getId(); + $this->parent = False; + } else { + $this->parentId = -1; + $this->parent = True; + } + } + + /** + * Class constructor. Just generates an empty 'data' array attribute + */ + function __construct() { + $this->data = array(); + } + + /** + * Class desctructor. Unsets attributes. + */ + function __destruct() { + unset($this->data); + unset($this->terms); + unset($this->parent); + } +} + +/** + * Extension of the _IndexableChunk class to make IndexableChunk extensible. + */ +class IndexableChunk extends _IndexableChunk +{ +} + +?> \ No newline at end of file diff --git a/main/inc/lib/search/search_widget.php b/main/inc/lib/search/search_widget.php new file mode 100644 index 0000000000..8fc51ba9de --- /dev/null +++ b/main/inc/lib/search/search_widget.php @@ -0,0 +1,112 @@ + + .tags { + display: block; + margin-top: 20px; + width: 70%; + } + .tag { + float: left; + display: block; + padding: 5px; + padding-right: 4px; + padding-left: 4px; + margin: 3px; + border: 1px solid #ddd; + } + .tag:hover { + background: #ddd; + cursor: pointer; + } + .lighttagcolor { + background: #ddd; + } + .lighttagcolor:hover { + background: #fff; + } + + '; + $htmlHeadXtra[] = ' + '; + $htmlHeadXtra[] = " + "; +} + +/** + * Show the search widget + * + * The form will post to lp_controller.php by default, you can pass a value to + * $action to use a custom action. + * IMPORTANT: you have to call search_widget_prepare() before calling this + * function or otherwise the form will not behave correctly. + * + * @param string $action Just in case your action is not + * lp_controller.php + */ +function search_widget_show($action="lp_controller.php") { + require_once api_get_path(LIBRARY_PATH).'/search/DokeosQuery.php'; + $dktags = dokeos_query_get_tags(); + + $post_tags = array(); + + if (isset($_REQUEST['tags'])) { + $filter = TRUE; + $post_tags = explode(',', $_REQUEST['tags']); + } +?> + +
+ \ No newline at end of file diff --git a/main/inc/lib/search/xapian/XapianConsts.php b/main/inc/lib/search/xapian/XapianConsts.php new file mode 100644 index 0000000000..d1fa23c4dd --- /dev/null +++ b/main/inc/lib/search/xapian/XapianConsts.php @@ -0,0 +1,7 @@ + diff --git a/main/inc/lib/search/xapian/XapianIndexer.class.php b/main/inc/lib/search/xapian/XapianIndexer.class.php new file mode 100644 index 0000000000..53a48ec6c1 --- /dev/null +++ b/main/inc/lib/search/xapian/XapianIndexer.class.php @@ -0,0 +1,208 @@ + Xapian languages + */ + public final function xapian_languages() { + /* http://xapian.org/docs/apidoc/html/classXapian_1_1Stem.html */ + return array( + 'none' => 'none', //don't stem terms + 'da' => 'danish', + 'nl' => 'dutch', + /* Martin Porter's 2002 revision of his stemmer */ + 'en' => 'english', + /* Lovin's stemmer */ + 'lovins' => 'english_lovins', + /* Porter's stemmer as described in his 1980 paper */ + 'porter' => 'english_porter', + 'fi' => 'finnish', + 'fr' => 'french', + 'de' => 'german', + 'it' => 'italian', + 'no' => 'norwegian', + 'pt' => 'portuguese', + 'ru' => 'russian', + 'es' => 'spanish', + 'sv' => 'swedish', + ); + } + + /** + * Connect to the database, and create it if it doesn't exist + */ + function connectDb($path=NULL, $dbMode=NULL, $lang='english') { + if ($dbMode == NULL) + $dbMode = Xapian::DB_CREATE_OR_OPEN; + + if ($path == NULL) + $path = api_get_path(SYS_PATH).'searchdb/'; + + try { + $this->db = new XapianWritableDatabase($path, $dbMode); + $this->indexer = new XapianTermGenerator(); + + if (!in_array($lang, $this->xapian_languages())) { + $lang = 'english'; + } + + $this->stemmer = new XapianStem($lang); + $this->indexer->set_stemmer($this->stemmer); + + return $this->db; + } + catch (Exception $e) { + Display::display_error_message($e->getMessage()); + return 1; + } + } + + /** + * Simple getter for the db attribute + * @return object The db attribute + */ + function getDb() { + return $this->db; + } + + /** + * Add this chunk to the chunk array attribute + * @param string Chunk of text + * @return void + */ + function addChunk($chunk) { + /* + if ($chunk->parent) { + $this->parents[] = $chunk; + } else { + $this->chunks[] = $chunk; + } + */ + $this->chunks[] = $chunk; + } + + /** + * Actually index the current data + * + * @return integer New Xapian document ID or NULL upon failure + */ + function index() { + try { + foreach ($this->chunks as $chunk) { + $doc = new XapianDocument(); + $this->indexer->set_document($doc); + + foreach ($chunk->terms as $term) { + /* FIXME: think of getting weight */ + $doc->add_term($term['flag'] . $term['name'], 1); + } + + /* free-form ignoring ids, indexes title and content */ + foreach ($chunk->data as $key => $value) { + if ($key != 'ids') + $this->indexer->index_text($value, 1); + } + + /* Hard-coded approach */ + /* + if (array_key_exists ('title', $chunk->data)) + $this->indexer->index_text($chunk->data['title'], 1); + */ + + $doc->set_data($chunk->data['ids'], 1); + $id = $chunk->getId(); + if ($id < 0) + return NULL; + + $did = $this->db->replace_document($id, $doc); + + //write to disk + $this->db->flush(); + + return $did; + } + } + catch (Exception $e) { + Display::display_error_message($e->getMessage()); + exit(1); + } + + } + + /** + * Get a specific document from xapian db + * + * @param int did Xapian::docid + * @return XapianDocument + */ + function get_document($did) { + if ($path == NULL) { + $this->connectDb(); + } + return $this->db->get_document($did); + } + + /** + * Replace all terms of a document in xapian db + * + * @param int did Xapian::docid + * @param array terms New terms of the document + */ + function update_terms($did, $terms, $prefix='T') { + $doc = $this->get_document($did); + $doc->clear_terms(); + foreach ($terms as $term) { + //add directly + $doc->add_term($prefix.$term, 1); + } + $this->db->replace_document($did, $doc); + $this->db->flush(); + } + + /** + * Remove a document from xapian db + * + * @param int did Xapian::docid + */ + function remove_document($did) { + if ($path == NULL) { + $this->connectDb(); + } + $this->db->delete_document($did); + $this->db->flush(); + } + + /** + * Class contructor + */ + function __construct() { + $this->db = NULL; + $this->stemmer = NULL; + } + /** + * Class destructor + */ + function __destruct() { + unset($this->db); + unset($this->stemmer); + } +} +?> diff --git a/main/inc/lib/search/xapian/XapianQuery.php b/main/inc/lib/search/xapian/XapianQuery.php new file mode 100644 index 0000000000..c17feffbab --- /dev/null +++ b/main/inc/lib/search/xapian/XapianQuery.php @@ -0,0 +1,164 @@ +set_stemmer($stemmer); + $query_parser->set_database($db); + $query_parser->set_stemming_strategy(XapianQueryParser::STEM_SOME); + $query_parser->add_boolean_prefix('filetype', 'F'); + $query_parser->add_boolean_prefix('tag', 'T'); + $query_parser->add_boolean_prefix('courseid', 'C'); + $query = $query_parser->parse_query($query_string); + + // Build subqueries from $extra array. + foreach ($extra as $subq) { + if (!empty($subq)) { + /* TODO: review if we want to use this constructor + * deprecated in C: http://xapian.org/docs/apidoc/html/classXapian_1_1Query.html#f85d155b99f1f2007fe75ffc7a8bd51e + * maybe use: Query (Query::op op_, const Query &left, const Query &right) ? + */ + $subquery = new XapianQuery(XapianQuery::OP_OR, $subq); + $query = new XapianQuery(XapianQuery::OP_AND, array($subquery, $query)); + } + } + + $enquire->set_query($query); + $matches = $enquire->get_mset((int)$start, (int)$length); + + $results = array(); + $i = $matches->begin(); + $count = 0; + while (!$i->equals($matches->end())) { + $count++; + $document = $i->get_document(); + if (is_object($document)) { + $results[$count]->ids = ($document->get_data()); + $results[$count]->score = ($i->get_percent()); + $results[$count]->terms = xapian_get_doc_terms($document); + } + $i->next(); + } + + switch ($count_type) { + case 1: // Lower bound + $count = $matches->get_matches_lower_bound(); + break; + + case 2: // Upper bound + $count = $matches->get_matches_upper_bound(); + break; + + case 0: // Best estimate + default: + $count = $matches->get_matches_estimated(); + break; + } + + return array($count, $results); + } + catch (Exception $e) { + Display::display_error_message('xapian error message: '. $e->getMessage()); + return NULL; + } +} + +/** + * Retrieve a list db terms + * + * @param int $count Number of terms to retrieve + * @param char $prefix The prefix of the term to retrieve + * @param XapianDatabase $db Xapian database to connect + * @return array + */ +function xapian_get_all_terms($count=0, $prefix='T', $db=NULL) { + try { + if (!is_object($db)) { + $db = new XapianDatabase(XAPIAN_DB); + } + + if (!empty($prefix)) { + $termi= $db->allterms_begin($prefix); + } + else { + $termi= $db->allterms_begin(); + } + + $terms = array(); + $i = 0; + for ( ; !$termi->equals($db->allterms_end()) && (++$i<=$count || $count==0) ; $termi->next() ) { + $terms[] = array( + 'frequency' => $termi->get_termfreq(), + 'name' => $termi->get_term(), + ); + } + + return $terms; + } + catch (Exception $e) { + Display::display_error_message('xapian error message: '. $e->getMessage()); + return NULL; + } +} + +/** + * Retrieve all terms of a document + * + * @param XapianDocument document searched + * @return array + */ +function xapian_get_doc_terms($doc=NULL, $prefix='T') { + try { + if (!is_a($doc, 'XapianDocument')) { + return; + } + + //TODO: make the filter by prefix on xapian if possible + //ojwb marvil07: use Document::termlist_begin() and then skip_to(prefix) on the TermIterator + //ojwb you'll need to check the end condition by hand though + $terms = array(); + for ($termi=$doc->termlist_begin() ; !$termi->equals($doc->termlist_end()); $termi->next() ) { + $term = array( + 'frequency' => $termi->get_termfreq(), + 'name' => $termi->get_term(), + ); + if ($term['name'][0] === $prefix) { + $terms[] = $term; + } + } + + return $terms; + } + catch (Exception $e) { + Display::display_error_message('xapian error message: '. $e->getMessage()); + return NULL; + } +} +?> diff --git a/main/search/index.php b/main/search/index.php new file mode 100644 index 0000000000..108c8b75eb --- /dev/null +++ b/main/search/index.php @@ -0,0 +1,16 @@ + diff --git a/searchdb/readme.txt b/searchdb/readme.txt new file mode 100644 index 0000000000..0209b82b28 --- /dev/null +++ b/searchdb/readme.txt @@ -0,0 +1,2 @@ +This directory is a placeholder for the search plugin, which allows the +indexation of Dokeos contents through the use of the Xapian search engine. \ No newline at end of file