XapianQuery.php 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. <?php
  2. /* For licensing terms, see /license.txt */
  3. /**
  4. * @package chamilo.include.search
  5. */
  6. /**
  7. * Code
  8. */
  9. require_once 'xapian.php';
  10. require_once dirname(__FILE__) . '/../IndexableChunk.class.php';
  11. //TODO: think another way without including specific fields here
  12. require_once api_get_path(LIBRARY_PATH) . 'specific_fields_manager.lib.php';
  13. define('XAPIAN_DB', api_get_path(SYS_PATH) . 'searchdb/');
  14. /**
  15. * Queries the database.
  16. * The xapian_query function queries the database using both a query string
  17. * and application-defined terms. Based on drupal-xapian
  18. *
  19. * @param string $query_string The search string. This string will
  20. * be parsed and stemmed automatically.
  21. * @param XapianDatabase $db Xapian database to connect
  22. * @param int $start An integer defining the first
  23. * document to return
  24. * @param int $length The number of results to return.
  25. * @param array $extra An array containing arrays of
  26. * extra terms to search for.
  27. * @param int $count_type Number of items to retrieve
  28. * @return array An array of nids corresponding to the results.
  29. */
  30. function xapian_query($query_string, $db = NULL, $start = 0, $length = 10, $extra = array(), $count_type = 0) {
  31. try {
  32. if (!is_object($db)) {
  33. $db = new XapianDatabase(XAPIAN_DB);
  34. }
  35. // Build subqueries from $extra array. Now only used by tags search filter on search widget
  36. $subqueries = array();
  37. foreach ($extra as $subquery) {
  38. if (!empty($subquery)) {
  39. $subqueries[] = new XapianQuery($subquery);
  40. }
  41. }
  42. $query = NULL;
  43. $enquire = new XapianEnquire($db);
  44. if (!empty($query_string)) {
  45. $query_parser = new XapianQueryParser();
  46. //TODO: choose stemmer
  47. $stemmer = new XapianStem("english");
  48. $query_parser->set_stemmer($stemmer);
  49. $query_parser->set_database($db);
  50. $query_parser->set_stemming_strategy(XapianQueryParser::STEM_SOME);
  51. $query_parser->add_boolean_prefix('courseid', XAPIAN_PREFIX_COURSEID);
  52. $query_parser->add_boolean_prefix('toolid', XAPIAN_PREFIX_TOOLID);
  53. $query = $query_parser->parse_query($query_string);
  54. $final_array = array_merge($subqueries, array($query));
  55. $query = new XapianQuery(XapianQuery::OP_AND, $final_array);
  56. } else {
  57. $query = new XapianQuery(XapianQuery::OP_OR, $subqueries);
  58. }
  59. $enquire->set_query($query);
  60. $matches = $enquire->get_mset((int) $start, (int) $length);
  61. $specific_fields = get_specific_field_list();
  62. $results = array();
  63. $i = $matches->begin();
  64. // Display the results.
  65. //echo $matches->get_matches_estimated().'results found';
  66. $count = 0;
  67. while (!$i->equals($matches->end())) {
  68. $count++;
  69. $document = $i->get_document();
  70. if (is_object($document)) {
  71. // process one item terms
  72. $courseid_terms = xapian_get_doc_terms($document, XAPIAN_PREFIX_COURSEID);
  73. $results[$count]['courseid'] = substr($courseid_terms[0]['name'], 1);
  74. $toolid_terms = xapian_get_doc_terms($document, XAPIAN_PREFIX_TOOLID);
  75. $results[$count]['toolid'] = substr($toolid_terms[0]['name'], 1);
  76. // process each specific field prefix
  77. foreach ($specific_fields as $specific_field) {
  78. $results[$count]['sf-' . $specific_field['code']] = xapian_get_doc_terms($document, $specific_field['code']);
  79. }
  80. // rest of data
  81. $results[$count]['xapian_data'] = unserialize($document->get_data());
  82. $results[$count]['score'] = ($i->get_percent());
  83. }
  84. $i->next();
  85. }
  86. switch ($count_type) {
  87. case 1: // Lower bound
  88. $count = $matches->get_matches_lower_bound();
  89. break;
  90. case 2: // Upper bound
  91. $count = $matches->get_matches_upper_bound();
  92. break;
  93. case 0: // Best estimate
  94. default:
  95. $count = $matches->get_matches_estimated();
  96. break;
  97. }
  98. return array($count, $results);
  99. } catch (Exception $e) {
  100. display_xapian_error($e->getMessage());
  101. return NULL;
  102. }
  103. }
  104. /**
  105. * build a boolean query
  106. */
  107. function xapian_get_boolean_query($term) {
  108. return new XapianQuery($term);
  109. }
  110. /**
  111. * Retrieve a list db terms
  112. *
  113. * @param int $count Number of terms to retrieve
  114. * @param char $prefix The prefix of the term to retrieve
  115. * @param XapianDatabase $db Xapian database to connect
  116. * @return array
  117. */
  118. function xapian_get_all_terms($count = 0, $prefix, $db = NULL) {
  119. try {
  120. if (!is_object($db)) {
  121. $db = new XapianDatabase(XAPIAN_DB);
  122. }
  123. if (!empty($prefix)) {
  124. $termi = $db->allterms_begin($prefix);
  125. } else {
  126. $termi = $db->allterms_begin();
  127. }
  128. $terms = array();
  129. $i = 0;
  130. for (; !$termi->equals($db->allterms_end()) && (++$i <= $count || $count == 0); $termi->next()) {
  131. $terms[] = array(
  132. 'frequency' => $termi->get_termfreq(),
  133. 'name' => $termi->get_term(),
  134. );
  135. }
  136. return $terms;
  137. } catch (Exception $e) {
  138. display_xapian_error($e->getMessage());
  139. return NULL;
  140. }
  141. }
  142. /**
  143. * Retrieve all terms of a document
  144. *
  145. * @param XapianDocument document searched
  146. * @return array
  147. */
  148. function xapian_get_doc_terms($doc = NULL, $prefix) {
  149. try {
  150. if (!is_a($doc, 'XapianDocument')) {
  151. return;
  152. }
  153. //TODO: make the filter by prefix on xapian if possible
  154. //ojwb marvil07: use Document::termlist_begin() and then skip_to(prefix) on the TermIterator
  155. //ojwb you'll need to check the end condition by hand though
  156. $terms = array();
  157. for ($termi = $doc->termlist_begin(); !$termi->equals($doc->termlist_end()); $termi->next()) {
  158. $term = array(
  159. 'frequency' => $termi->get_termfreq(),
  160. 'name' => $termi->get_term(),
  161. );
  162. if ($term['name'][0] === $prefix) {
  163. $terms[] = $term;
  164. }
  165. }
  166. return $terms;
  167. } catch (Exception $e) {
  168. display_xapian_error($e->getMessage());
  169. return NULL;
  170. }
  171. }
  172. /**
  173. * Join xapian queries
  174. *
  175. * @param XapianQuery|array $query1
  176. * @param XapianQuery|array $query2
  177. * @param string $op
  178. * @return XapianQuery query joined
  179. */
  180. function xapian_join_queries($query1, $query2 = NULL, $op = 'or') {
  181. // let decide how to join, avoiding include xapian.php outside
  182. switch ($op) {
  183. case 'or': $op = XapianQuery::OP_OR;
  184. break;
  185. case 'and': $op = XapianQuery::OP_AND;
  186. break;
  187. default: $op = XapianQuery::OP_OR;
  188. break;
  189. }
  190. // review parameters to decide how to join
  191. if (!is_array($query1)) {
  192. $query1 = array($query1);
  193. }
  194. if (is_null($query2)) {
  195. // join an array of queries with $op
  196. return new XapianQuery($op, $query1);
  197. }
  198. if (!is_array($query2)) {
  199. $query2 = array($query2);
  200. }
  201. return new XapianQuery($op, array_merge($query1, $query2));
  202. }
  203. /**
  204. * @author Isaac flores paz <florespaz@bidsoftperu.com>
  205. * @param String The xapian error message
  206. * @return String The chamilo error message
  207. */
  208. function display_xapian_error($xapian_error_message) {
  209. $message = explode(':', $xapian_error_message);
  210. $type_error_message = $message[0];
  211. if ($type_error_message == 'DatabaseOpeningError') {
  212. $message_error = get_lang('SearchDatabaseOpeningError');
  213. } elseif ($type_error_message == 'DatabaseVersionError') {
  214. $message_error = get_lang('SearchDatabaseVersionError');
  215. } elseif ($type_error_message == 'DatabaseModifiedError') {
  216. $message_error = get_lang('SearchDatabaseModifiedError');
  217. } elseif ($type_error_message == 'DatabaseLockError') {
  218. $message_error = get_lang('SearchDatabaseLockError');
  219. } elseif ($type_error_message == 'DatabaseCreateError') {
  220. $message_error = get_lang('SearchDatabaseCreateError');
  221. } elseif ($type_error_message == 'DatabaseCorruptError') {
  222. $message_error = get_lang('SearchDatabaseCorruptError');
  223. } elseif ($type_error_message == 'NetworkTimeoutError') {
  224. $message_error = get_lang('SearchNetworkTimeoutError');
  225. } else {
  226. $message_error = get_lang('SearchOtherXapianError');
  227. }
  228. $display_message = get_lang('Error') . ' : ' . $message_error;
  229. Display::display_error_message($display_message);
  230. }