Chamilo is a learning management system focused on ease of use and accessibility
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
chamilo-lms/main/inc/lib/search/xapian/XapianIndexer.class.php

318 lines
8.5 KiB

<?php
/* For licensing terms, see /license.txt */
/**
* @package chamilo.include.search
*/
require_once 'xapian.php';
require_once dirname(__FILE__) . '/../IndexableChunk.class.php';
/**
* Abstract helper class
* @package chamilo.include.search
*/
abstract class XapianIndexer
{
/* XapianWritableDatabase */
protected $db;
/* IndexableChunk[] */
protected $chunks;
/* XapianTermGenerator */
public $indexer;
/* XapianStem */
public $stemmer;
/**
* Class contructor
*/
function __construct()
{
$this->db = null;
$this->stemmer = null;
}
/**
* Generates a list of languages Xapian manages
*
* This method enables the definition of more matches between
* Chamilo languages and Xapian languages (through hardcoding)
* @return array Array of languages codes -> Xapian languages
*/
public final function xapian_languages()
{
/* http://xapian.org/docs/apidoc/html/classXapian_1_1Stem.html */
return array(
'none' => 'none', //don't stem terms
'da' => 'danish',
'nl' => 'dutch',
/* Martin Porter's 2002 revision of his stemmer */
'en' => 'english',
/* Lovin's stemmer */
'lovins' => 'english_lovins',
/* Porter's stemmer as described in his 1980 paper */
'porter' => 'english_porter',
'fi' => 'finnish',
'fr' => 'french',
'de' => 'german',
'it' => 'italian',
'no' => 'norwegian',
'pt' => 'portuguese',
'ru' => 'russian',
'es' => 'spanish',
'sv' => 'swedish',
);
}
/**
* Connect to the database, and create it if it doesn't exist
*/
function connectDb($path = null, $dbMode = null, $lang = 'english')
{
if ($this->db != null)
return $this->db;
if ($dbMode == null)
$dbMode = Xapian::DB_CREATE_OR_OPEN;
if ($path == null)
$path = api_get_path(SYS_UPLOAD_PATH).'plugins/xapian/searchdb/';
try {
$this->db = new XapianWritableDatabase($path, $dbMode);
$this->indexer = new XapianTermGenerator();
if (!in_array($lang, $this->xapian_languages())) {
$lang = 'english';
}
$this->stemmer = new XapianStem($lang);
$this->indexer->set_stemmer($this->stemmer);
return $this->db;
} catch (Exception $e) {
Display::display_error_message($e->getMessage());
return 1;
}
}
/**
* Simple getter for the db attribute
* @return object The db attribute
*/
function getDb()
{
return $this->db;
}
/**
* Add this chunk to the chunk array attribute
* @param string Chunk of text
* @return void
*/
function addChunk($chunk)
{
$this->chunks[] = $chunk;
}
/**
* Actually index the current data
*
* @return integer New Xapian document ID or null upon failure
*/
function index()
{
try {
if (!empty($this->chunks)) {
foreach ($this->chunks as $chunk) {
$doc = new XapianDocument();
$this->indexer->set_document($doc);
if (!empty($chunk->terms)) {
foreach ($chunk->terms as $term) {
/* FIXME: think of getting weight */
$doc->add_term($term['flag'] . $term['name'], 1);
}
}
// free-form index all data array (title, content, etc)
if (!empty($chunk->data)) {
foreach ($chunk->data as $key => $value) {
$this->indexer->index_text($value, 1);
}
}
$doc->set_data($chunk->xapian_data, 1);
$did = $this->db->add_document($doc);
//write to disk
$this->db->flush();
return $did;
}
}
} catch (Exception $e) {
Display::display_error_message($e->getMessage());
exit(1);
}
}
/**
* Get a specific document from xapian db
*
* @param int did Xapian::docid
* @return mixed XapianDocument, or false on error
*/
function get_document($did)
{
if ($this->db == null) {
$this->connectDb();
}
try {
$docid = $this->db->get_document($did);
} catch (Exception $e) {
//Display::display_error_message($e->getMessage());
return false;
}
return $docid;
}
/**
* Get document data on a xapian document
*
* @param XapianDocument $doc xapian document to push into the db
* @return mixed xapian document data or FALSE if error
*/
function get_document_data($doc)
{
if ($this->db == null) {
$this->connectDb();
}
try {
if (!is_a($doc, 'XapianDocument')) {
return FALSE;
}
$doc_data = $doc->get_data();
return $doc_data;
} catch (Exception $e) {
//Display::display_error_message($e->getMessage());
return false;
}
}
/**
* Replace all terms of a document in xapian db
*
* @param int $did Xapian::docid
* @param array $terms New terms of the document
* @param string $prefix Prefix used to categorize the doc (usually 'T' for title, 'A' for author)
* @return boolean false on error
*/
function update_terms($did, $terms, $prefix)
{
$doc = $this->get_document($did);
if ($doc === false) {
return false;
}
$doc->clear_terms();
foreach ($terms as $term) {
//add directly
$doc->add_term($prefix . $term, 1);
}
$this->db->replace_document($did, $doc);
$this->db->flush();
return true;
}
/**
* Remove a document from xapian db
*
* @param int did Xapian::docid
*/
function remove_document($did)
{
if ($this->db == null) {
$this->connectDb();
}
if (is_numeric($did) && $did > 0) {
$doc = $this->get_document($did);
if ($doc !== FALSE) {
$this->db->delete_document($did);
$this->db->flush();
}
}
}
/**
* Adds a term to the document specified
*
* @param string $term The term to add
* @param XapianDocument $doc The xapian document where to add the term
* @return mixed XapianDocument, or false on error
*/
function add_term_to_doc($term, $doc)
{
if (!is_a($doc, 'XapianDocument')) {
return FALSE;
}
try {
$doc->add_term($term);
} catch (Exception $e) {
Display::display_error_message($e->getMessage());
return 1;
}
}
/**
* Remove a term from the document specified
*
* @param string $term The term to add
* @param XapianDocument $doc The xapian document where to add the term
* @return mixed XapianDocument, or false on error
*/
function remove_term_from_doc($term, $doc)
{
if (!is_a($doc, 'XapianDocument')) {
return FALSE;
}
try {
$doc->remove_term($term);
} catch (Exception $e) {
Display::display_error_message($e->getMessage());
return 1;
}
}
/**
* Replace a document in the actual db
*
* @param XapianDocument $doc xapian document to push into the db
* @param Xapian::docid $did xapian document id of the document to replace
*/
function replace_document($doc, $did)
{
if (!is_a($doc, 'XapianDocument')) {
return FALSE;
}
if ($this->db == null) {
$this->connectDb();
}
try {
$this->getDb()->replace_document((int) $did, $doc);
$this->getDb()->flush();
} catch (Exception $e) {
Display::display_error_message($e->getMessage());
return 1;
}
}
/**
* Class destructor
*/
function __destruct()
{
unset($this->db);
unset($this->stemmer);
}
}