123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316 |
- <?php
- /* For licensing terms, see /license.txt */
- /**
- * @package chamilo.include.search
- */
- require_once 'xapian.php';
- require_once __DIR__.'/../IndexableChunk.class.php';
- /**
- * Abstract helper class
- * @package chamilo.include.search
- */
- abstract class XapianIndexer
- {
- /* XapianWritableDatabase */
- protected $db;
- /* IndexableChunk[] */
- protected $chunks;
- /* XapianTermGenerator */
- public $indexer;
- /* XapianStem */
- public $stemmer;
- /**
- * Class contructor
- */
- function __construct()
- {
- $this->db = null;
- $this->stemmer = null;
- }
- /**
- * Generates a list of languages Xapian manages
- *
- * This method enables the definition of more matches between
- * Chamilo languages and Xapian languages (through hardcoding)
- * @return array Array of languages codes -> Xapian languages
- */
- public final function xapian_languages()
- {
- /* http://xapian.org/docs/apidoc/html/classXapian_1_1Stem.html */
- return array(
- 'none' => 'none', //don't stem terms
- 'da' => 'danish',
- 'nl' => 'dutch',
- /* Martin Porter's 2002 revision of his stemmer */
- 'en' => 'english',
- /* Lovin's stemmer */
- 'lovins' => 'english_lovins',
- /* Porter's stemmer as described in his 1980 paper */
- 'porter' => 'english_porter',
- 'fi' => 'finnish',
- 'fr' => 'french',
- 'de' => 'german',
- 'it' => 'italian',
- 'no' => 'norwegian',
- 'pt' => 'portuguese',
- 'ru' => 'russian',
- 'es' => 'spanish',
- 'sv' => 'swedish',
- );
- }
- /**
- * Connect to the database, and create it if it doesn't exist
- */
- function connectDb($path = null, $dbMode = null, $lang = 'english')
- {
- if ($this->db != null)
- return $this->db;
- if ($dbMode == null)
- $dbMode = Xapian::DB_CREATE_OR_OPEN;
- if ($path == null)
- $path = api_get_path(SYS_UPLOAD_PATH).'plugins/xapian/searchdb/';
- try {
- $this->db = new XapianWritableDatabase($path, $dbMode);
- $this->indexer = new XapianTermGenerator();
- if (!in_array($lang, $this->xapian_languages())) {
- $lang = 'english';
- }
- $this->stemmer = new XapianStem($lang);
- $this->indexer->set_stemmer($this->stemmer);
- return $this->db;
- } catch (Exception $e) {
- echo Display::return_message($e->getMessage(), 'error');
- return 1;
- }
- }
- /**
- * Simple getter for the db attribute
- * @return object The db attribute
- */
- function getDb()
- {
- return $this->db;
- }
- /**
- * Add this chunk to the chunk array attribute
- * @param string Chunk of text
- * @return void
- */
- function addChunk($chunk)
- {
- $this->chunks[] = $chunk;
- }
- /**
- * Actually index the current data
- *
- * @return integer New Xapian document ID or null upon failure
- */
- function index()
- {
- try {
- if (!empty($this->chunks)) {
- foreach ($this->chunks as $chunk) {
- $doc = new XapianDocument();
- $this->indexer->set_document($doc);
- if (!empty($chunk->terms)) {
- foreach ($chunk->terms as $term) {
- /* FIXME: think of getting weight */
- $doc->add_term($term['flag'].$term['name'], 1);
- }
- }
- // free-form index all data array (title, content, etc)
- if (!empty($chunk->data)) {
- foreach ($chunk->data as $key => $value) {
- $this->indexer->index_text($value, 1);
- }
- }
- $doc->set_data($chunk->xapian_data, 1);
- $did = $this->db->add_document($doc);
- //write to disk
- $this->db->flush();
- return $did;
- }
- }
- } catch (Exception $e) {
- echo Display::return_message($e->getMessage(), 'error');
- exit(1);
- }
- }
- /**
- * Get a specific document from xapian db
- *
- * @param int did Xapian::docid
- * @return mixed XapianDocument, or false on error
- */
- function get_document($did)
- {
- if ($this->db == null) {
- $this->connectDb();
- }
- try {
- $docid = $this->db->get_document($did);
- } catch (Exception $e) {
- //echo Display::return_message($e->getMessage(), 'error');
- return false;
- }
- return $docid;
- }
- /**
- * Get document data on a xapian document
- *
- * @param XapianDocument $doc xapian document to push into the db
- * @return mixed xapian document data or FALSE if error
- */
- function get_document_data($doc)
- {
- if ($this->db == null) {
- $this->connectDb();
- }
- try {
- if (!is_a($doc, 'XapianDocument')) {
- return false;
- }
- $doc_data = $doc->get_data();
- return $doc_data;
- } catch (Exception $e) {
- //echo Display::return_message($e->getMessage(), 'error');
- return false;
- }
- }
- /**
- * Replace all terms of a document in xapian db
- *
- * @param int $did Xapian::docid
- * @param array $terms New terms of the document
- * @param string $prefix Prefix used to categorize the doc (usually 'T' for title, 'A' for author)
- * @return boolean false on error
- */
- function update_terms($did, $terms, $prefix)
- {
- $doc = $this->get_document($did);
- if ($doc === false) {
- return false;
- }
- $doc->clear_terms();
- foreach ($terms as $term) {
- //add directly
- $doc->add_term($prefix.$term, 1);
- }
- $this->db->replace_document($did, $doc);
- $this->db->flush();
- return true;
- }
- /**
- * Remove a document from xapian db
- *
- * @param int did Xapian::docid
- */
- function remove_document($did)
- {
- if ($this->db == null) {
- $this->connectDb();
- }
- if (is_numeric($did) && $did > 0) {
- $doc = $this->get_document($did);
- if ($doc !== false) {
- $this->db->delete_document($did);
- $this->db->flush();
- }
- }
- }
- /**
- * Adds a term to the document specified
- *
- * @param string $term The term to add
- * @param XapianDocument $doc The xapian document where to add the term
- * @return mixed XapianDocument, or false on error
- */
- function add_term_to_doc($term, $doc)
- {
- if (!is_a($doc, 'XapianDocument')) {
- return false;
- }
- try {
- $doc->add_term($term);
- } catch (Exception $e) {
- echo Display::return_message($e->getMessage(), 'error');
- return 1;
- }
- }
- /**
- * Remove a term from the document specified
- *
- * @param string $term The term to add
- * @param XapianDocument $doc The xapian document where to add the term
- * @return mixed XapianDocument, or false on error
- */
- function remove_term_from_doc($term, $doc)
- {
- if (!is_a($doc, 'XapianDocument')) {
- return false;
- }
- try {
- $doc->remove_term($term);
- } catch (Exception $e) {
- echo Display::return_message($e->getMessage(), 'error');
- return 1;
- }
- }
- /**
- * Replace a document in the actual db
- *
- * @param XapianDocument $doc xapian document to push into the db
- * @param Xapian::docid $did xapian document id of the document to replace
- */
- function replace_document($doc, $did)
- {
- if (!is_a($doc, 'XapianDocument')) {
- return false;
- }
- if ($this->db == null) {
- $this->connectDb();
- }
- try {
- $this->getDb()->replace_document((int) $did, $doc);
- $this->getDb()->flush();
- } catch (Exception $e) {
- echo Display::return_message($e->getMessage(), 'error');
- return 1;
- }
- }
- /**
- * Class destructor
- */
- function __destruct()
- {
- unset($this->db);
- unset($this->stemmer);
- }
- }
|