openoffice_text.class.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. <?php
  2. /* For licensing terms, see /license.txt */
  3. /**
  4. * Defines the OpenofficeDocument class, which is meant as a conversion
  5. * tool from Office text documents (.doc, .sxw, .odt, .docx) to
  6. * learning paths.
  7. *
  8. * @package chamilo.learnpath
  9. *
  10. * @author Eric Marguin <eric.marguin@dokeos.com>
  11. * @license GNU/GPL
  12. */
  13. /**
  14. * Defines the "OpenofficeText" child of class "learnpath".
  15. */
  16. require_once 'openoffice_document.class.php';
  17. if (api_get_setting('search_enabled') == 'true') {
  18. require_once api_get_path(LIBRARY_PATH).'specific_fields_manager.lib.php';
  19. }
  20. /**
  21. * @package chamilo.learnpath.OpenofficeDocument
  22. */
  23. class OpenofficeText extends OpenofficeDocument
  24. {
  25. public $split_steps;
  26. /**
  27. * Class constructor. Calls the parent class and initialises the local attribute split_steps.
  28. *
  29. * @param bool Whether to split steps (true) or make one large page (false)
  30. * @param string Course code
  31. * @param int Resource ID
  32. * @param int Creator user id
  33. */
  34. public function __construct(
  35. $split_steps = false,
  36. $course_code = null,
  37. $resource_id = null,
  38. $user_id = null
  39. ) {
  40. $this->split_steps = $split_steps;
  41. parent::__construct($course_code, $resource_id, $user_id);
  42. }
  43. /**
  44. * Gets html pages and compose them into a learning path.
  45. *
  46. * @param array The files that will compose the generated learning path. Unused so far.
  47. *
  48. * @return bool False if file does not exit. Nothing otherwise.
  49. */
  50. public function make_lp($files = [])
  51. {
  52. $_course = api_get_course_info();
  53. // We get a content where ||page_break|| indicates where the page is broken.
  54. if (!file_exists($this->base_work_dir.'/'.$this->created_dir.'/'.$this->file_name.'.html')) {
  55. return false;
  56. }
  57. $content = file_get_contents($this->base_work_dir.'/'.$this->created_dir.'/'.$this->file_name.'.html');
  58. unlink($this->base_work_dir.'/'.$this->file_path);
  59. unlink($this->base_work_dir.'/'.$this->created_dir.'/'.$this->file_name.'.html');
  60. // The file is utf8 encoded and it seems to make problems with special quotes.
  61. // Then we htmlentities that, we replace these quotes and html_entity_decode that in good charset.
  62. $charset = api_get_system_encoding();
  63. $content = api_htmlentities($content, ENT_COMPAT, $this->original_charset);
  64. $content = str_replace('&rsquo;', '\'', $content);
  65. $content = api_convert_encoding($content, $charset, $this->original_charset);
  66. $content = str_replace($this->original_charset, $charset, $content);
  67. $content = api_html_entity_decode($content, ENT_COMPAT, $charset);
  68. // Set the path to pictures to absolute (so that it can be modified in fckeditor).
  69. $content = preg_replace(
  70. "|src=\"([^\"]*)|i",
  71. "src=\"".api_get_path(REL_COURSE_PATH).$_course['path'].'/document'.$this->created_dir."/\\1",
  72. $content
  73. );
  74. list($header, $body) = explode('<BODY', $content);
  75. $body = '<BODY'.$body;
  76. // Remove font-family styles.
  77. $header = preg_replace("|font\-family[^;]*;|i", '', $header);
  78. // Chamilo styles.
  79. $my_style = api_get_setting('stylesheets');
  80. if (empty($my_style)) {
  81. $my_style = 'chamilo';
  82. }
  83. $style_to_import = "<style type=\"text/css\">\r\n";
  84. $style_to_import .= '@import "'.api_get_path(WEB_CODE_PATH).'css/'.$my_style.'/default.css";'."\n";
  85. $style_to_import .= "</style>\r\n";
  86. $header = preg_replace("|</head>|i", "\r\n$style_to_import\r\n\\0", $header);
  87. // Line break before and after picture.
  88. $header = str_replace('p {', 'p {clear:both;', $header);
  89. $header = str_replace('absolute', 'relative', $header);
  90. switch ($this->split_steps) {
  91. case 'per_page':
  92. $this->dealPerPage($header, $body);
  93. break;
  94. case 'per_chapter':
  95. $this->dealPerChapter($header, $body);
  96. break;
  97. }
  98. }
  99. /**
  100. * Manages dir/chapter splitting.
  101. *
  102. * @param string Chapter header
  103. * @param string Content
  104. */
  105. public function dealPerChapter($header, $content)
  106. {
  107. $_course = api_get_course_info();
  108. $content = str_replace('||page_break||', '', $content);
  109. // Get all the h1.
  110. preg_match_all("|<h1[^>]*>([^(h1)+]*)</h1>|is", $content, $matches_temp);
  111. // Empty the fake chapters.
  112. $new_index = 0;
  113. for ($i = 0; $i < count($matches_temp[0]); $i++) {
  114. if (trim($matches_temp[1][$i]) !== '') {
  115. $matches[0][$new_index] = $matches_temp[0][$i];
  116. $matches[1][$new_index] = $matches_temp[1][$i];
  117. $new_index++;
  118. }
  119. }
  120. // Add intro item.
  121. $intro_content = substr($content, 0, strpos($content, $matches[0][0]));
  122. $items_to_create[get_lang('Introduction')] = $intro_content;
  123. for ($i = 0; $i < count($matches[0]); $i++) {
  124. if (empty($matches[1][$i])) {
  125. continue;
  126. }
  127. $content = strstr($content, $matches[0][$i]);
  128. if ($i + 1 !== count($matches[0])) {
  129. $dir_content = substr($content, 0, strpos($content, $matches[0][$i + 1]));
  130. } else {
  131. $dir_content = $content;
  132. }
  133. $items_to_create[$matches[1][$i]] = $dir_content;
  134. }
  135. $i = 0;
  136. foreach ($items_to_create as $item_title => $item_content) {
  137. $i++;
  138. $page_content = $this->format_page_content($header, $item_content);
  139. $html_file = $this->created_dir.'-'.$i.'.html';
  140. $handle = fopen($this->base_work_dir.$this->created_dir.'/'.$html_file, 'w+');
  141. fwrite($handle, $page_content);
  142. fclose($handle);
  143. $document_id = add_document(
  144. $_course,
  145. $this->created_dir.'/'.$html_file,
  146. 'file',
  147. filesize(
  148. $this->base_work_dir.$this->created_dir.'/'.$html_file
  149. ),
  150. $html_file
  151. );
  152. if ($document_id) {
  153. // Put the document in item_property update.
  154. api_item_property_update(
  155. $_course,
  156. TOOL_DOCUMENT,
  157. $document_id,
  158. 'DocumentAdded',
  159. api_get_user_id(),
  160. 0,
  161. 0,
  162. null,
  163. null,
  164. api_get_session_id()
  165. );
  166. $infos = pathinfo($this->filepath);
  167. $slide_name = strip_tags(nl2br($item_title));
  168. $slide_name = str_replace(["\r\n", "\r", "\n"], '', $slide_name);
  169. $slide_name = html_entity_decode($slide_name);
  170. $previous = learnpath::add_item(
  171. 0,
  172. $previous,
  173. 'document',
  174. $document_id,
  175. $slide_name,
  176. ''
  177. );
  178. if ($this->first_item == 0) {
  179. $this->first_item = $previous;
  180. }
  181. }
  182. }
  183. }
  184. /**
  185. * Manages page splitting.
  186. *
  187. * @param string Page header
  188. * @param string Page body
  189. */
  190. public function dealPerPage($header, $body)
  191. {
  192. $_course = api_get_course_info();
  193. // Split document to pages.
  194. $pages = explode('||page_break||', $body);
  195. $first_item = 0;
  196. foreach ($pages as $key => $page_content) {
  197. // For every pages, we create a new file.
  198. $key++;
  199. $page_content = $this->format_page_content($header, $page_content, $this->base_work_dir.$this->created_dir);
  200. $html_file = $this->created_dir.'-'.$key.'.html';
  201. $handle = fopen($this->base_work_dir.$this->created_dir.'/'.$html_file, 'w+');
  202. fwrite($handle, $page_content);
  203. fclose($handle);
  204. $document_id = add_document(
  205. $_course,
  206. $this->created_dir.$html_file,
  207. 'file',
  208. filesize($this->base_work_dir.$this->created_dir.$html_file),
  209. $html_file
  210. );
  211. $slide_name = '';
  212. if ($document_id) {
  213. // Put the document in item_property update.
  214. api_item_property_update(
  215. $_course,
  216. TOOL_DOCUMENT,
  217. $document_id,
  218. 'DocumentAdded',
  219. api_get_user_id(),
  220. 0,
  221. 0,
  222. null,
  223. null,
  224. api_get_session_id()
  225. );
  226. $infos = pathinfo($this->filepath);
  227. $slide_name = 'Page '.str_repeat('0', 2 - strlen($key)).$key;
  228. $previous = learnpath::add_item(0, $previous, 'document', $document_id, $slide_name, '');
  229. if ($this->first_item == 0) {
  230. $this->first_item = $previous;
  231. }
  232. // Code for text indexing.
  233. if (api_get_setting('search_enabled') == 'true') {
  234. if (isset($_POST['index_document']) && $_POST['index_document']) {
  235. //echo Display::return_message(print_r($_POST));
  236. $di = new ChamiloIndexer();
  237. isset($_POST['language']) ? $lang = Database::escape_string(
  238. $_POST['language']
  239. ) : $lang = 'english';
  240. $di->connectDb(null, null, $lang);
  241. $ic_slide = new IndexableChunk();
  242. $ic_slide->addValue('title', $slide_name);
  243. $specific_fields = get_specific_field_list();
  244. $all_specific_terms = '';
  245. foreach ($specific_fields as $specific_field) {
  246. if (isset($_REQUEST[$specific_field['code']])) {
  247. $sterms = trim($_REQUEST[$specific_field['code']]);
  248. $all_specific_terms .= ' '.$sterms;
  249. if (!empty($sterms)) {
  250. $sterms = explode(',', $sterms);
  251. foreach ($sterms as $sterm) {
  252. $ic_slide->addTerm(trim($sterm), $specific_field['code']);
  253. }
  254. }
  255. }
  256. }
  257. $page_content = $all_specific_terms.' '.$page_content;
  258. $ic_slide->addValue('content', $page_content);
  259. // Add a comment to say terms separated by commas.
  260. $courseid = api_get_course_id();
  261. $ic_slide->addCourseId($courseid);
  262. $ic_slide->addToolId(TOOL_LEARNPATH);
  263. $lp_id = $this->lp_id;
  264. $xapian_data = [
  265. SE_COURSE_ID => $courseid,
  266. SE_TOOL_ID => TOOL_LEARNPATH,
  267. SE_DATA => ['lp_id' => $lp_id, 'lp_item' => $previous, 'document_id' => $document_id],
  268. SE_USER => (int) api_get_user_id(),
  269. ];
  270. $ic_slide->xapian_data = serialize($xapian_data);
  271. $di->addChunk($ic_slide);
  272. // Index and return search engine document id.
  273. $did = $di->index();
  274. if ($did) {
  275. // Save it to db.
  276. $tbl_se_ref = Database::get_main_table(TABLE_MAIN_SEARCH_ENGINE_REF);
  277. $sql = 'INSERT INTO %s (id, course_code, tool_id, ref_id_high_level, ref_id_second_level, search_did)
  278. VALUES (NULL , \'%s\', \'%s\', %s, %s, %s)';
  279. $sql = sprintf(
  280. $sql,
  281. $tbl_se_ref,
  282. api_get_course_id(),
  283. TOOL_LEARNPATH,
  284. $lp_id,
  285. $previous,
  286. $did
  287. );
  288. Database::query($sql);
  289. }
  290. }
  291. }
  292. }
  293. }
  294. }
  295. /**
  296. * Returns additional Java command parameters.
  297. *
  298. * @return string The additional parameters to be used in the Java call
  299. */
  300. public function add_command_parameters()
  301. {
  302. return ' -d woogie "'.$this->base_work_dir.'/'.$this->file_path.'" "'.$this->base_work_dir.$this->created_dir.'/'.$this->file_name.'.html"';
  303. }
  304. /**
  305. * Formats a page content by reorganising the HTML code a little.
  306. *
  307. * @param string Page header
  308. * @param string Page content
  309. *
  310. * @return string Formatted page content
  311. */
  312. public function format_page_content($header, $content)
  313. {
  314. // Limit the width of the doc.
  315. list($max_width, $max_height) = explode('x', api_get_setting('service_ppt2lp', 'size'));
  316. $content = preg_replace("|<body[^>]*>|i", "\\0\r\n<div style=\"width:".$max_width."\">", $content, -1, $count);
  317. if ($count < 1) {
  318. $content = '<body><div style="width:'.$max_width.'">'.$content;
  319. }
  320. $content = preg_replace('|</body>|i', '</div>\\0', $content, -1, $count);
  321. if ($count < 1) {
  322. $content = $content.'</div></body>';
  323. }
  324. // Add the headers.
  325. $content = $header.$content;
  326. // Resize all the picture to the max_width-10
  327. preg_match_all("|<img[^src]*src=\"([^\"]*)\"[^>]*>|i", $content, $images);
  328. foreach ($images[1] as $key => $image) {
  329. // Check if the <img tag soon has a width attribute.
  330. $defined_width = preg_match("|width=([^\s]*)|i", $images[0][$key], $img_width);
  331. $img_width = $img_width[1];
  332. if (!$defined_width) {
  333. list($img_width, $img_height, $type) = getimagesize($this->base_work_dir.$this->created_dir.'/'.$image);
  334. $new_width = $max_width - 10;
  335. if ($img_width > $new_width) {
  336. $picture_resized = str_ireplace('<img', '<img width="'.$new_width.'" ', $images[0][$key]);
  337. $content = str_replace($images[0][$key], $picture_resized, $content);
  338. }
  339. } elseif ($img_width > $max_width - 10) {
  340. $picture_resized = str_ireplace('width='.$img_width, 'width="'.($max_width - 10).'"', $images[0][$key]);
  341. $content = str_replace($images[0][$key], $picture_resized, $content);
  342. }
  343. }
  344. return $content;
  345. }
  346. /**
  347. * Add documents to the visioconference (to be implemented).
  348. */
  349. public function add_docs_to_visio()
  350. {
  351. }
  352. }