internationalization_internal.lib.php 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167
  1. <?php
  2. /* For licensing terms, see /license.txt */
  3. /**
  4. * File: internationalization_internal.lib.php
  5. * Main API extension library for Chamilo 1.8.7 LMS,
  6. * contains functions for internal use only.
  7. * License: GNU General Public License Version 3 (Free Software Foundation)
  8. * @author Ivan Tcholakov, <ivantcholakov@gmail.com>, 2009, 2010
  9. * @author More authors, mentioned in the correpsonding fragments of this source
  10. *
  11. * Note: All functions and data structures here are not to be used directly.
  12. * See the file internationalization.lib.php which contains the "public" API.
  13. * @package chamilo.library
  14. */
  15. /**
  16. * Global variables used by some callback functions
  17. */
  18. $_api_encoding = null;
  19. $_api_collator = null;
  20. /**
  21. * This function returns an array of those languages that can use Latin 1 encoding.
  22. * Appendix to "Language support"
  23. * @return array The array of languages that can use Latin 1 encoding (ISO-8859-15, ISO-8859-1, WINDOWS-1252, ...).
  24. * Note: The returned language identificators are purified, without suffixes.
  25. */
  26. function _api_get_latin1_compatible_languages() {
  27. static $latin1_languages;
  28. if (!isset($latin1_languages)) {
  29. $latin1_languages = array();
  30. $encodings = & _api_non_utf8_encodings();
  31. foreach ($encodings as $key => $value) {
  32. if (api_is_latin1($value[0])) {
  33. $latin1_languages[] = $key;
  34. }
  35. }
  36. }
  37. return $latin1_languages;
  38. }
  39. /**
  40. * Appendix to "Language recognition"
  41. * Based on the publication:
  42. * W. B. Cavnar and J. M. Trenkle. N-gram-based text categorization.
  43. * Proceedings of SDAIR-94, 3rd Annual Symposium on Document Analysis
  44. * and Information Retrieval, 1994.
  45. * @link http://citeseer.ist.psu.edu/cache/papers/cs/810/http:zSzzSzwww.info.unicaen.frzSz~giguetzSzclassifzSzcavnar_trenkle_ngram.pdf/n-gram-based-text.pdf
  46. */
  47. /**
  48. * Generates statistical, based on n-grams language profile from the given text.
  49. * @param string $string The input text. It should be UTF-8 encoded. Practically it should be at least 3000 characters long, 40000 characters size is for increased accuracy.
  50. * @param int $n_grams_max (optional) The size of the array of the generated n-grams.
  51. * @param int $n_max (optional) The limit if the number of characters that a n-gram may contain.
  52. * @return array An array that contains cunstructed n-grams, sorted in reverse order by their frequences. Frequences are not stored in the array.
  53. */
  54. function &_api_generate_n_grams(&$string, $encoding, $n_grams_max = 350, $n_max = 4) {
  55. if (empty($string)) {
  56. return array();
  57. }
  58. // We construct only lowercase n-grams if it is applicable for the given language.
  59. // Removing all puntuation and some other non-letter characters. Apostrophe characters stay.
  60. // Splitting the sample text into separate words.
  61. $words = preg_split('/_/u', preg_replace('/[\x00-\x1F\x20-\x26\x28-\x3E\?@\x5B-\x60{|}~\x7F]/u', '_', ' '.api_strtolower(api_utf8_encode($string, $encoding), 'UTF-8').' '), -1, PREG_SPLIT_NO_EMPTY);
  62. $prefix = '_'; // Beginning of a word.
  63. $suffix = str_repeat('_', $n_max); // End of a word. Only the last '_' stays.
  64. $n_grams = array(); // The array that will contain the constructed n-grams.
  65. foreach ($words as $word) {
  66. $k = api_strlen($word, 'UTF-8') + 1;
  67. $word = $prefix.$word.$suffix;
  68. for ($n = 1; $n <= $n_max; $n++) {
  69. for ($i = 0; $i < $k; $i++) {
  70. $n_gram = api_utf8_decode(api_substr($word, $i, $n, 'UTF-8'), $encoding);
  71. if (isset($n_grams[$n_gram])) {
  72. $n_grams[$n_gram]++;
  73. } else {
  74. $n_grams[$n_gram] = 1;
  75. }
  76. }
  77. }
  78. }
  79. // Sorting the n-grams in reverse order by their frequences.
  80. arsort($n_grams);
  81. // Reduction the number of n-grams.
  82. return array_keys(array_slice($n_grams, 0, $n_grams_max));
  83. }
  84. /**
  85. *
  86. * The value $max_delta = 80000 is good enough for speed and detection accuracy.
  87. * If you set the value of $max_delta too low, no language will be recognized.
  88. * $max_delta = 400 * 350 = 140000 is the best detection with lowest speed.
  89. */
  90. function & _api_compare_n_grams(&$n_grams, $encoding, $max_delta = LANGUAGE_DETECT_MAX_DELTA) {
  91. static $language_profiles;
  92. if (!isset($language_profiles)) {
  93. // Reading the language profile files from the internationalization database.
  94. $exceptions = array('.', '..', 'CVS', '.htaccess', '.svn', '_svn', 'index.html');
  95. $path = str_replace("\\", '/', dirname(__FILE__).'/internationalization_database/language_detection/language_profiles/');
  96. $non_utf8_encodings = & _api_non_utf8_encodings();
  97. if (is_dir($path)) {
  98. if ($handle = @opendir($path)) {
  99. while (($dir_entry = @readdir($handle)) !== false) {
  100. if (api_in_array_nocase($dir_entry, $exceptions)) continue;
  101. if (strpos($dir_entry, '.txt') === false) continue;
  102. $dir_entry_full_path = $path .'/'. $dir_entry;
  103. if (@filetype($dir_entry_full_path) != 'dir') {
  104. if (false !== $data = @file_get_contents($dir_entry_full_path)) {
  105. $language = basename($dir_entry_full_path, '.txt');
  106. $encodings = array('UTF-8');
  107. if (!empty($non_utf8_encodings[$language])) {
  108. $encodings = array_merge($encodings, $non_utf8_encodings[$language]);
  109. }
  110. foreach ($encodings as $enc) {
  111. $data_enc = api_utf8_decode($data, $enc);
  112. if (empty($data_enc)) {
  113. continue;
  114. }
  115. $key = $language.':'.$enc;
  116. $language_profiles[$key]['data'] = array_flip(explode("\n", $data_enc));
  117. $language_profiles[$key]['language'] = $language;
  118. $language_profiles[$key]['encoding'] = $enc;
  119. }
  120. }
  121. }
  122. }
  123. }
  124. }
  125. @closedir($handle);
  126. ksort($language_profiles);
  127. }
  128. if (!is_array($n_grams) || empty($n_grams)) {
  129. return array();
  130. }
  131. // Comparison between the input n-grams and the lanuage profiles.
  132. foreach ($language_profiles as $key => &$language_profile) {
  133. if (!api_is_language_supported($language_profile['language']) || !api_equal_encodings($encoding, $language_profile['encoding'])) {
  134. continue;
  135. }
  136. $delta = 0; // This is a summary measurment for matching between the input text and the current language profile.
  137. // Searching each n-gram from the input text into the language profile.
  138. foreach ($n_grams as $rank => &$n_gram) {
  139. if (isset($language_profile['data'][$n_gram])) {
  140. // The n-gram has been found, the difference between places in both
  141. // arrays is calculated (so called delta-points are adopted for
  142. // measuring distances between n-gram ranks.
  143. $delta += abs($rank - $language_profile['data'][$n_gram]);
  144. } else {
  145. // The n-gram has not been found in the profile. We add then
  146. // a large enough "distance" in delta-points.
  147. $delta += 400;
  148. }
  149. // Abort: This language already differs too much.
  150. if ($delta > $max_delta) {
  151. break;
  152. }
  153. }
  154. // Include only non-aborted languages in result array.
  155. if ($delta < ($max_delta - 400)) {
  156. $result[$key] = $delta;
  157. }
  158. }
  159. if (!isset($result)) {
  160. return array();
  161. }
  162. asort($result);
  163. return $result;
  164. }
  165. /**
  166. * Appendix to "Name order conventions"
  167. */
  168. /**
  169. * Returns returns person name convention for a given language.
  170. * @param string $language The input language.
  171. * @param string $type The type of the requested convention. It may be 'format' for name order convention or 'sort_by' for name sorting convention.
  172. * @return mixed Depending of the requested type, the returned result may be string or boolean; null is returned on error;
  173. */
  174. function _api_get_person_name_convention($language, $type) {
  175. global $app;
  176. $conventions = $app['name_order_conventions'];
  177. $language = api_purify_language_id($language);
  178. switch ($type) {
  179. case 'format':
  180. return is_string($conventions[$language]['format']) ? $conventions[$language]['format'] : '%t %f %l';
  181. case 'sort_by':
  182. return is_bool($conventions[$language]['sort_by']) ? $conventions[$language]['sort_by'] : true;
  183. }
  184. return null;
  185. }
  186. /**
  187. * Replaces non-valid formats for person names with the default (English) format.
  188. * @param string $format The input format to be verified.
  189. * @return bool Returns the same format if is is valid, otherwise returns a valid English format.
  190. */
  191. function _api_validate_person_name_format($format) {
  192. if (empty($format) || stripos($format, '%f') === false || stripos($format, '%l') === false) {
  193. return '%t %f %l';
  194. }
  195. return $format;
  196. }
  197. /**
  198. * Removes leading, trailing and duplicate whitespace and/or commas in a full person name.
  199. * Cleaning is needed for the cases when not all parts of the name are available or when the name is constructed using a "dirty" pattern.
  200. * @param string $person_name The input person name.
  201. * @return string Returns cleaned person name.
  202. */
  203. function _api_clean_person_name($person_name) {
  204. return preg_replace(array('/\s+/', '/, ,/', '/,+/', '/^[ ,]/', '/[ ,]$/'), array(' ', ', ', ',', '', ''), $person_name);
  205. }
  206. /**
  207. * Appendix to "Multibyte string conversion functions"
  208. */
  209. /**
  210. * This is a php-implementation of a function that is similar to mb_convert_encoding() from mbstring extension.
  211. * The function converts a given string from one to another character encoding.
  212. * @param string $string The string being converted.
  213. * @param string $to_encoding The encoding that $string is being converted to.
  214. * @param string $from_encoding The encoding that $string is being converted from.
  215. * @return string Returns the converted string.
  216. */
  217. function _api_convert_encoding(&$string, $to_encoding, $from_encoding) {
  218. $str = (string)$string;
  219. static $character_map = array();
  220. static $utf8_compatible = array('UTF-8', 'US-ASCII');
  221. if (empty($str)) {
  222. return $str;
  223. }
  224. $to_encoding = api_refine_encoding_id($to_encoding);
  225. $from_encoding = api_refine_encoding_id($from_encoding);
  226. if (api_equal_encodings($to_encoding, $from_encoding)) {
  227. return $str;
  228. }
  229. if ($to_encoding == 'HTML-ENTITIES') {
  230. return api_htmlentities($str, ENT_QUOTES, $from_encoding);
  231. }
  232. if ($from_encoding == 'HTML-ENTITIES') {
  233. return api_html_entity_decode($str, ENT_QUOTES, $to_encoding);
  234. }
  235. $to = _api_get_character_map_name($to_encoding);
  236. $from = _api_get_character_map_name($from_encoding);
  237. if (empty($to) || empty($from) || $to == $from || (in_array($to, $utf8_compatible) && in_array($from, $utf8_compatible))) {
  238. return $str;
  239. }
  240. if (!isset($character_map[$to])) {
  241. $character_map[$to] = &_api_parse_character_map($to);
  242. }
  243. if ($character_map[$to] === false) {
  244. return $str;
  245. }
  246. if (!isset($character_map[$from])) {
  247. $character_map[$from] = &_api_parse_character_map($from);
  248. }
  249. if ($character_map[$from] === false) {
  250. return $str;
  251. }
  252. if ($from != 'UTF-8') {
  253. $len = api_byte_count($str);
  254. $codepoints = array();
  255. for ($i = 0; $i < $len; $i++) {
  256. $ord = ord($str[$i]);
  257. if ($ord > 127) {
  258. if (isset($character_map[$from]['local'][$ord])) {
  259. $codepoints[] = $character_map[$from]['local'][$ord];
  260. } else {
  261. $codepoints[] = 0xFFFD; // U+FFFD REPLACEMENT CHARACTER is the general substitute character in the Unicode Standard.
  262. }
  263. } else {
  264. $codepoints[] = $ord;
  265. }
  266. }
  267. } else {
  268. $codepoints = _api_utf8_to_unicode($str);
  269. }
  270. if ($to != 'UTF-8') {
  271. foreach ($codepoints as $i => &$codepoint) {
  272. if ($codepoint > 127) {
  273. if (isset($character_map[$to]['unicode'][$codepoint])) {
  274. $codepoint = chr($character_map[$to]['unicode'][$codepoint]);
  275. } else {
  276. $codepoint = '?'; // Unknown character.
  277. }
  278. } else {
  279. $codepoint = chr($codepoint);
  280. }
  281. }
  282. $str = implode($codepoints);
  283. } else {
  284. $str = _api_utf8_from_unicode($codepoints);
  285. }
  286. return $str;
  287. }
  288. /**
  289. * This function determines the name of corresponding to a given encoding conversion table.
  290. * It is able to deal with some aliases of the encoding.
  291. * @param string $encoding The given encoding identificator, for example 'WINDOWS-1252'.
  292. * @return string Returns the name of the corresponding conversion table, for the same example - 'CP1252'.
  293. */
  294. function _api_get_character_map_name($encoding) {
  295. static $character_map_selector;
  296. if (!isset($character_map_selector)) {
  297. $file = dirname(__FILE__).'/internationalization_database/conversion/character_map_selector.php';
  298. if (file_exists($file)) {
  299. $character_map_selector = include ($file);
  300. } else {
  301. $character_map_selector = array();
  302. }
  303. }
  304. return isset($character_map_selector[$encoding]) ? $character_map_selector[$encoding] : '';
  305. }
  306. /**
  307. * This function parses a given conversion table (a text file) and creates in the memory
  308. * two tables for conversion - character set from/to Unicode codepoints.
  309. * @param string $name The name of the thext file that contains the conversion table, for example 'CP1252' (file CP1252.TXT will be parsed).
  310. * @return array Returns an array that contains forward and reverse tables (from/to Unicode).
  311. */
  312. function &_api_parse_character_map($name) {
  313. $result = array();
  314. $file = dirname(__FILE__).'/internationalization_database/conversion/' . $name . '.TXT';
  315. if (file_exists($file)) {
  316. $text = @file_get_contents($file);
  317. if ($text !== false) {
  318. $text = explode(chr(10), $text);
  319. foreach ($text as $line) {
  320. if (empty($line)) {
  321. continue;
  322. }
  323. if (!empty($line) && trim($line) && $line[0] != '#') {
  324. $matches = array();
  325. preg_match('/[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+/', $line, $matches);
  326. $ord = hexdec(trim($matches[1]));
  327. if ($ord > 127) {
  328. $codepoint = hexdec(trim($matches[2]));
  329. $result['local'][$ord] = $codepoint;
  330. $result['unicode'][$codepoint] = $ord;
  331. }
  332. }
  333. }
  334. } else {
  335. return false ;
  336. }
  337. } else {
  338. return false;
  339. }
  340. return $result;
  341. }
  342. /**
  343. * Takes an UTF-8 string and returns an array of integer values representing the Unicode characters.
  344. * Astral planes are supported ie. the ints in the output can be > 0xFFFF. Occurrances of the BOM are ignored.
  345. * Surrogates are not allowed.
  346. * @param string $string The UTF-8 encoded string.
  347. * @return array Returns an array of unicode code points.
  348. * @author Henri Sivonen, mailto:hsivonen@iki.fi
  349. * @link http://hsivonen.iki.fi/php-utf8/
  350. * @author Ivan Tcholakov, August 2009, adaptation for the Dokeos LMS.
  351. */
  352. function _api_utf8_to_unicode(&$string) {
  353. $str = (string)$string;
  354. $state = 0; // cached expected number of octets after the current octet
  355. // until the beginning of the next UTF8 character sequence
  356. $codepoint = 0; // cached Unicode character
  357. $bytes = 1; // cached expected number of octets in the current sequence
  358. $result = array();
  359. $len = api_byte_count($str);
  360. for ($i = 0; $i < $len; $i++) {
  361. $byte = ord($str[$i]);
  362. if ($state == 0) {
  363. // When state is zero we expect either a US-ASCII character or a multi-octet sequence.
  364. if (0 == (0x80 & ($byte))) {
  365. // US-ASCII, pass straight through.
  366. $result[] = $byte;
  367. $bytes = 1;
  368. } else if (0xC0 == (0xE0 & ($byte))) {
  369. // First octet of 2 octet sequence
  370. $codepoint = ($byte);
  371. $codepoint = ($codepoint & 0x1F) << 6;
  372. $state = 1;
  373. $bytes = 2;
  374. } else if (0xE0 == (0xF0 & ($byte))) {
  375. // First octet of 3 octet sequence
  376. $codepoint = ($byte);
  377. $codepoint = ($codepoint & 0x0F) << 12;
  378. $state = 2;
  379. $bytes = 3;
  380. } else if (0xF0 == (0xF8 & ($byte))) {
  381. // First octet of 4 octet sequence
  382. $codepoint = ($byte);
  383. $codepoint = ($codepoint & 0x07) << 18;
  384. $state = 3;
  385. $bytes = 4;
  386. } else if (0xF8 == (0xFC & ($byte))) {
  387. // First octet of 5 octet sequence.
  388. // This is illegal because the encoded codepoint must be either
  389. // (a) not the shortest form or
  390. // (b) outside the Unicode range of 0-0x10FFFF.
  391. // Rather than trying to resynchronize, we will carry on until the end
  392. // of the sequence and let the later error handling code catch it.
  393. $codepoint = ($byte);
  394. $codepoint = ($codepoint & 0x03) << 24;
  395. $state = 4;
  396. $bytes = 5;
  397. } else if (0xFC == (0xFE & ($byte))) {
  398. // First octet of 6 octet sequence, see comments for 5 octet sequence.
  399. $codepoint = ($byte);
  400. $codepoint = ($codepoint & 1) << 30;
  401. $state = 5;
  402. $bytes = 6;
  403. } else {
  404. // Current octet is neither in the US-ASCII range nor a legal first octet of a multi-octet sequence.
  405. $state = 0;
  406. $codepoint = 0;
  407. $bytes = 1;
  408. $result[] = 0xFFFD; // U+FFFD REPLACEMENT CHARACTER is the general substitute character in the Unicode Standard.
  409. continue ;
  410. }
  411. } else {
  412. // When state is non-zero, we expect a continuation of the multi-octet sequence
  413. if (0x80 == (0xC0 & ($byte))) {
  414. // Legal continuation.
  415. $shift = ($state - 1) * 6;
  416. $tmp = $byte;
  417. $tmp = ($tmp & 0x0000003F) << $shift;
  418. $codepoint |= $tmp;
  419. // End of the multi-octet sequence. $codepoint now contains the final Unicode codepoint to be output
  420. if (0 == --$state) {
  421. // Check for illegal sequences and codepoints.
  422. // From Unicode 3.1, non-shortest form is illegal
  423. if (((2 == $bytes) && ($codepoint < 0x0080)) ||
  424. ((3 == $bytes) && ($codepoint < 0x0800)) ||
  425. ((4 == $bytes) && ($codepoint < 0x10000)) ||
  426. (4 < $bytes) ||
  427. // From Unicode 3.2, surrogate characters are illegal
  428. (($codepoint & 0xFFFFF800) == 0xD800) ||
  429. // Codepoints outside the Unicode range are illegal
  430. ($codepoint > 0x10FFFF)) {
  431. $state = 0;
  432. $codepoint = 0;
  433. $bytes = 1;
  434. $result[] = 0xFFFD;
  435. continue ;
  436. }
  437. if (0xFEFF != $codepoint) {
  438. // BOM is legal but we don't want to output it
  439. $result[] = $codepoint;
  440. }
  441. // Initialize UTF8 cache
  442. $state = 0;
  443. $codepoint = 0;
  444. $bytes = 1;
  445. }
  446. } else {
  447. // ((0xC0 & (*in) != 0x80) && (state != 0))
  448. // Incomplete multi-octet sequence.
  449. $state = 0;
  450. $codepoint = 0;
  451. $bytes = 1;
  452. $result[] = 0xFFFD;
  453. }
  454. }
  455. }
  456. return $result;
  457. }
  458. /**
  459. * Takes an array of Unicode codepoints and returns a UTF-8 string.
  460. * @param array $codepoints An array of Unicode codepoints representing a string.
  461. * @return string Returns a UTF-8 string constructed using the given codepoints.
  462. */
  463. function _api_utf8_from_unicode($codepoints) {
  464. return implode(array_map('_api_utf8_chr', $codepoints));
  465. }
  466. /**
  467. * Takes a codepoint and returns its correspondent UTF-8 encoded character.
  468. * Astral planes are supported, ie the intger input can be > 0xFFFF. Occurrances of the BOM are ignored.
  469. * Surrogates are not allowed.
  470. * @param int $codepoint The Unicode codepoint.
  471. * @return string Returns the corresponding UTF-8 character.
  472. * @author Henri Sivonen, mailto:hsivonen@iki.fi
  473. * @link http://hsivonen.iki.fi/php-utf8/
  474. * @author Ivan Tcholakov, 2009, modifications for the Dokeos LMS.
  475. * @see _api_utf8_from_unicode()
  476. * This is a UTF-8 aware version of the function chr().
  477. * @link http://php.net/manual/en/function.chr.php
  478. */
  479. function _api_utf8_chr($codepoint) {
  480. // ASCII range (including control chars)
  481. if ( ($codepoint >= 0) && ($codepoint <= 0x007f) ) {
  482. $result = chr($codepoint);
  483. // 2 byte sequence
  484. } else if ($codepoint <= 0x07ff) {
  485. $result = chr(0xc0 | ($codepoint >> 6)) . chr(0x80 | ($codepoint & 0x003f));
  486. // Byte order mark (skip)
  487. } else if($codepoint == 0xFEFF) {
  488. // nop -- zap the BOM
  489. $result = '';
  490. // Test for illegal surrogates
  491. } else if ($codepoint >= 0xD800 && $codepoint <= 0xDFFF) {
  492. // found a surrogate
  493. $result = _api_utf8_chr(0xFFFD); // U+FFFD REPLACEMENT CHARACTER is the general substitute character in the Unicode Standard.
  494. // 3 byte sequence
  495. } else if ($codepoint <= 0xffff) {
  496. $result = chr(0xe0 | ($codepoint >> 12)) . chr(0x80 | (($codepoint >> 6) & 0x003f)) . chr(0x80 | ($codepoint & 0x003f));
  497. // 4 byte sequence
  498. } else if ($codepoint <= 0x10ffff) {
  499. $result = chr(0xf0 | ($codepoint >> 18)) . chr(0x80 | (($codepoint >> 12) & 0x3f)) . chr(0x80 | (($codepoint >> 6) & 0x3f)) . chr(0x80 | ($codepoint & 0x3f));
  500. } else {
  501. // out of range
  502. $result = _api_utf8_chr(0xFFFD);
  503. }
  504. return $result;
  505. }
  506. /**
  507. * Takes the first UTF-8 character in a string and returns its Unicode codepoint.
  508. * @param string $utf8_character The UTF-8 encoded character.
  509. * @return int Returns: the codepoint; or 0xFFFD (unknown character) when the input string is empty.
  510. * This is a UTF-8 aware version of the function ord().
  511. * @link http://php.net/manual/en/function.ord.php
  512. * Note about a difference with the original funtion ord(): ord('') returns 0.
  513. */
  514. function _api_utf8_ord($utf8_character) {
  515. if ($utf8_character == '') {
  516. return 0xFFFD;
  517. }
  518. $codepoints = _api_utf8_to_unicode($utf8_character);
  519. return $codepoints[0];
  520. }
  521. /**
  522. * Makes a html-entity from Unicode codepoint.
  523. * @param int $codepoint The Unicode codepoint.
  524. * @return string Returns the corresponding html-entity; or ASCII character if $codepoint < 128.
  525. */
  526. function _api_html_entity_from_unicode($codepoint) {
  527. if ($codepoint < 128) {
  528. return chr($codepoint);
  529. }
  530. return '&#'.$codepoint.';';
  531. }
  532. /**
  533. * Appendix to "Common multibyte string functions"
  534. */
  535. /**
  536. * The following function reads case folding properties about a given character from a file-based "database".
  537. * @param int $codepoint The Unicode codepoint that represents a caharacter.
  538. * @param string $type (optional) The type of initial case to be altered: 'lower' (default) or 'upper'.
  539. * @return array Returns an array with properties used to change case of the character.
  540. */
  541. function &_api_utf8_get_letter_case_properties($codepoint, $type = 'lower') {
  542. static $config = array();
  543. static $range = array();
  544. if (!isset($range[$codepoint])) {
  545. if ($codepoint > 128 && $codepoint < 256) {
  546. $range[$codepoint] = '0080_00ff'; // Latin-1 Supplement
  547. } elseif ($codepoint < 384) {
  548. $range[$codepoint] = '0100_017f'; // Latin Extended-A
  549. } elseif ($codepoint < 592) {
  550. $range[$codepoint] = '0180_024F'; // Latin Extended-B
  551. } elseif ($codepoint < 688) {
  552. $range[$codepoint] = '0250_02af'; // IPA Extensions
  553. } elseif ($codepoint >= 880 && $codepoint < 1024) {
  554. $range[$codepoint] = '0370_03ff'; // Greek and Coptic
  555. } elseif ($codepoint < 1280) {
  556. $range[$codepoint] = '0400_04ff'; // Cyrillic
  557. } elseif ($codepoint < 1328) {
  558. $range[$codepoint] = '0500_052f'; // Cyrillic Supplement
  559. } elseif ($codepoint < 1424) {
  560. $range[$codepoint] = '0530_058f'; // Armenian
  561. } elseif ($codepoint >= 7680 && $codepoint < 7936) {
  562. $range[$codepoint] = '1e00_1eff'; // Latin Extended Additional
  563. } elseif ($codepoint < 8192) {
  564. $range[$codepoint] = '1f00_1fff'; // Greek Extended
  565. } elseif ($codepoint >= 8448 && $codepoint < 8528) {
  566. $range[$codepoint] = '2100_214f'; // Letterlike Symbols
  567. } elseif ($codepoint < 8592) {
  568. $range[$codepoint] = '2150_218f'; // Number Forms
  569. } elseif ($codepoint >= 9312 && $codepoint < 9472) {
  570. $range[$codepoint] = '2460_24ff'; // Enclosed Alphanumerics
  571. } elseif ($codepoint >= 11264 && $codepoint < 11360) {
  572. $range[$codepoint] = '2c00_2c5f'; // Glagolitic
  573. } elseif ($codepoint < 11392) {
  574. $range[$codepoint] = '2c60_2c7f'; // Latin Extended-C
  575. } elseif ($codepoint < 11520) {
  576. $range[$codepoint] = '2c80_2cff'; // Coptic
  577. } elseif ($codepoint >= 65280 && $codepoint < 65520) {
  578. $range[$codepoint] = 'ff00_ffef'; // Halfwidth and Fullwidth Forms
  579. } else {
  580. $range[$codepoint] = false;
  581. }
  582. if ($range[$codepoint] === false) {
  583. return null;
  584. }
  585. if (!isset($config[$range[$codepoint]])) {
  586. $file = dirname(__FILE__).'/internationalization_database/casefolding/' . $range[$codepoint] . '.php';
  587. if (file_exists($file)) {
  588. include $file;
  589. }
  590. }
  591. }
  592. if ($range[$codepoint] === false || !isset($config[$range[$codepoint]])) {
  593. return null;
  594. }
  595. $result = array();
  596. $count = count($config[$range[$codepoint]]);
  597. for ($i = 0; $i < $count; $i++) {
  598. if ($type === 'lower' && $config[$range[$codepoint]][$i][$type][0] === $codepoint) {
  599. $result[] = $config[$range[$codepoint]][$i];
  600. } elseif ($type === 'upper' && $config[$range[$codepoint]][$i][$type] === $codepoint) {
  601. $result[] = $config[$range[$codepoint]][$i];
  602. }
  603. }
  604. return $result;
  605. }
  606. /**
  607. * A callback for serving the function api_ucwords().
  608. * @param array $matches Input array of matches corresponding to a single word
  609. * @return string Returns a with first char of the word in uppercase
  610. */
  611. function _api_utf8_ucwords_callback($matches) {
  612. return $matches[2] . api_ucfirst(ltrim($matches[0]), 'UTF-8');
  613. }
  614. /**
  615. * Appendix to "Common sting operations with arrays"
  616. */
  617. /**
  618. * This callback function converts from UTF-8 to other encoding. It works with strings or arrays of strings.
  619. * @param mixed $variable The variable to be converted, a string or an array.
  620. * @return mixed Returns the converted form UTF-8 $variable with the same type, string or array.
  621. */
  622. function _api_array_utf8_decode($variable) {
  623. global $_api_encoding;
  624. if (is_array($variable)) {
  625. return array_map('_api_array_utf8_decode', $variable);
  626. }
  627. if (is_string($variable)) {
  628. return api_utf8_decode($variable, $_api_encoding);
  629. }
  630. return $variable;
  631. }
  632. /**
  633. * Appendix to "String comparison"
  634. */
  635. /**
  636. * Returns an instance of Collator class (ICU) created for a specified language.
  637. * @param string $language (optional) Language indentificator: 'english', 'french' ... If it is omited, the current interface language is assumed.
  638. * @return object Returns a instance of Collator class that is suitable for common string comparisons.
  639. */
  640. function _api_get_collator($language = null) {
  641. static $collator = array();
  642. if (empty($language)) {
  643. $language = api_get_interface_language();
  644. }
  645. if (!isset($collator[$language])) {
  646. $locale = _api_get_locale_from_language($language);
  647. $collator[$language] = collator_create($locale);
  648. if (is_object($collator[$language])) {
  649. collator_set_attribute($collator[$language], Collator::CASE_FIRST, Collator::UPPER_FIRST);
  650. }
  651. }
  652. return $collator[$language];
  653. }
  654. /**
  655. * Returns an instance of Collator class (ICU) created for a specified language. This collator treats substrings of digits as numbers.
  656. * @param string $language (optional) Language indentificator. If it is omited, the current interface language is assumed.
  657. * @return object Returns a instance of Collator class that is suitable for alpha-numerical comparisons.
  658. */
  659. function _api_get_alpha_numerical_collator($language = null) {
  660. static $collator = array();
  661. if (empty($language)) {
  662. $language = api_get_interface_language();
  663. }
  664. if (!isset($collator[$language])) {
  665. $locale = _api_get_locale_from_language($language);
  666. $collator[$language] = collator_create($locale);
  667. if (is_object($collator[$language])) {
  668. collator_set_attribute($collator[$language], Collator::CASE_FIRST, Collator::UPPER_FIRST);
  669. collator_set_attribute($collator[$language], Collator::NUMERIC_COLLATION, Collator::ON);
  670. }
  671. }
  672. return $collator[$language];
  673. }
  674. /**
  675. * A string comparison callback function for sorting.
  676. * @param string $string1 The first string.
  677. * @param string $string2 The second string.
  678. * @return int Returns 0 if $string1 = $string2 or if there is an error; 1 if $string1 > $string2; -1 if $string1 < $string2.
  679. */
  680. function _api_cmp($string1, $string2) {
  681. global $_api_collator, $_api_encoding;
  682. $result = collator_compare($_api_collator, api_utf8_encode($string1, $_api_encoding), api_utf8_encode($string2, $_api_encoding));
  683. return $result === false ? 0 : $result;
  684. }
  685. /**
  686. * A reverse string comparison callback function for sorting.
  687. * @param string $string1 The first string.
  688. * @param string $string2 The second string.
  689. * @return int Returns 0 if $string1 = $string2 or if there is an error; 1 if $string1 < $string2; -1 if $string1 > $string2.
  690. */
  691. function _api_rcmp($string1, $string2) {
  692. global $_api_collator, $_api_encoding;
  693. $result = collator_compare($_api_collator, api_utf8_encode($string2, $_api_encoding), api_utf8_encode($string1, $_api_encoding));
  694. return $result === false ? 0 : $result;
  695. }
  696. /**
  697. * A case-insensitive string comparison callback function for sorting.
  698. * @param string $string1 The first string.
  699. * @param string $string2 The second string.
  700. * @return int Returns 0 if $string1 = $string2 or if there is an error; 1 if $string1 > $string2; -1 if $string1 < $string2.
  701. */
  702. function _api_casecmp($string1, $string2) {
  703. global $_api_collator, $_api_encoding;
  704. $result = collator_compare($_api_collator, api_strtolower(api_utf8_encode($string1, $_api_encoding), 'UTF-8'), api_strtolower(api_utf8_encode($string2, $_api_encoding), 'UTF-8'));
  705. return $result === false ? 0 : $result;
  706. }
  707. /**
  708. * A reverse case-insensitive string comparison callback function for sorting.
  709. * @param string $string1 The first string.
  710. * @param string $string2 The second string.
  711. * @return int Returns 0 if $string1 = $string2 or if there is an error; 1 if $string1 < $string2; -1 if $string1 > $string2.
  712. */
  713. function _api_casercmp($string1, $string2) {
  714. global $_api_collator, $_api_encoding;
  715. $result = collator_compare($_api_collator, api_strtolower(api_utf8_encode($string2, $_api_encoding), 'UTF-8'), api_strtolower(api_utf8_encode($string1, $_api_encoding), 'UTF-8'));
  716. return $result === false ? 0 : $result;
  717. }
  718. /**
  719. * A reverse function from php-core function strnatcmp(), performs string comparison in reverse natural (alpha-numerical) order.
  720. * @param string $string1 The first string.
  721. * @param string $string2 The second string.
  722. * @return int Returns 0 if $string1 = $string2; >0 if $string1 < $string2; <0 if $string1 > $string2.
  723. */
  724. function _api_strnatrcmp($string1, $string2) {
  725. return strnatcmp($string2, $string1);
  726. }
  727. /**
  728. * A reverse function from php-core function strnatcasecmp(), performs string comparison in reverse case-insensitive natural (alpha-numerical) order.
  729. * @param string $string1 The first string.
  730. * @param string $string2 The second string.
  731. * @return int Returns 0 if $string1 = $string2; >0 if $string1 < $string2; <0 if $string1 > $string2.
  732. */
  733. function _api_strnatcasercmp($string1, $string2) {
  734. return strnatcasecmp($string2, $string1);
  735. }
  736. /**
  737. * A fuction that translates sorting flag constants from php core to correspondent constants from intl extension.
  738. * @param int $sort_flag (optional) Sorting modifier flag as it is defined for php core. The default value is SORT_REGULAR.
  739. * @return int Retturns the corresponding sorting modifier flag as it is defined in intl php-extension.
  740. */
  741. function _api_get_collator_sort_flag($sort_flag = SORT_REGULAR) {
  742. switch ($sort_flag) {
  743. case SORT_STRING:
  744. case SORT_SORT_LOCALE_STRING:
  745. return Collator::SORT_STRING;
  746. case SORT_NUMERIC:
  747. return Collator::SORT_NUMERIC;
  748. }
  749. return Collator::SORT_REGULAR;
  750. }
  751. /**
  752. * ICU locales (accessible through intl extension).
  753. */
  754. /**
  755. * Returns isocode (see api_get_language_isocode()) which is purified accordingly to
  756. * be used by the php intl extension (ICU library).
  757. * @param string $language (optional) This is the name of the folder containing translations for the corresponding language.
  758. * If $language is omitted, interface language is assumed then.
  759. * @return string The found language locale id or null on error. Examples: bg, en, pt_BR, ...
  760. */
  761. function _api_get_locale_from_language($language = null) {
  762. static $locale = array();
  763. if (empty($language)) {
  764. $language = api_get_interface_language();
  765. }
  766. if (!isset($locale[$language])) {
  767. $locale[$language] = str_replace('-', '_', api_get_language_isocode($language));
  768. }
  769. return $locale[$language];
  770. }
  771. /**
  772. * Sets/gets the default internal value of the locale id (for the intl extension, ICU).
  773. * @param string $locale (optional) The locale id to be set. When it is omitted, the function returns (gets, reads) the default internal value.
  774. * @return mixed When the function sets the default value, it returns TRUE on success or FALSE on error. Otherwise the function returns as string the current default value.
  775. */
  776. function _api_set_default_locale($locale = null) {
  777. static $default_locale = 'en';
  778. if (!empty($locale)) {
  779. $default_locale = $locale;
  780. if (INTL_INSTALLED) {
  781. return @locale_set_default($locale);
  782. }
  783. return true;
  784. } else {
  785. if (INTL_INSTALLED) {
  786. $default_locale = @locale_get_default();
  787. }
  788. }
  789. return $default_locale;
  790. }
  791. /**
  792. * Gets the default internal value of the locale id (for the intl extension, ICU).
  793. * @return string Returns as string the current default value.
  794. */
  795. function api_get_default_locale() {
  796. return _api_set_default_locale();
  797. }
  798. /**
  799. * Appendix to "Encoding management functions"
  800. */
  801. /**
  802. * Returns a table with non-UTF-8 encodings for all system languages.
  803. * @return array Returns an array in the form array('language1' => array('encoding1', encoding2', ...), ...)
  804. * Note: The function api_get_non_utf8_encoding() returns the first encoding from this array that is correspondent to the given language.
  805. */
  806. function & _api_non_utf8_encodings() {
  807. static $encodings;
  808. if (!isset($encodings)) {
  809. $file = dirname(__FILE__).'/internationalization_database/non_utf8_encodings.php';
  810. if (file_exists($file)) {
  811. $encodings = include ($file);
  812. } else {
  813. $encodings = array('english' => array('ISO-8859-15'));
  814. }
  815. }
  816. return $encodings;
  817. }
  818. /**
  819. * Sets/Gets internal character encoding of the common string functions within the PHP mbstring extension.
  820. * @param string $encoding (optional) When this parameter is given, the function sets the internal encoding.
  821. * @return string When $encoding parameter is not given, the function returns the internal encoding.
  822. * Note: This function is used in the global initialization script for setting the internal encoding to the platform's character set.
  823. * @link http://php.net/manual/en/function.mb-internal-encoding
  824. */
  825. function _api_mb_internal_encoding($encoding = null) {
  826. static $mb_internal_encoding = null;
  827. if (empty($encoding)) {
  828. if (is_null($mb_internal_encoding)) {
  829. if (MBSTRING_INSTALLED) {
  830. $mb_internal_encoding = @mb_internal_encoding();
  831. } else {
  832. $mb_internal_encoding = 'UTF-8';
  833. }
  834. }
  835. return $mb_internal_encoding;
  836. }
  837. $mb_internal_encoding = $encoding;
  838. if (_api_mb_supports($encoding)) {
  839. return @mb_internal_encoding($encoding);
  840. }
  841. return false;
  842. }
  843. /**
  844. * Sets/Gets internal character encoding of the regular expression functions (ereg-like) within the PHP mbstring extension.
  845. * @param string $encoding (optional) When this parameter is given, the function sets the internal encoding.
  846. * @return string When $encoding parameter is not given, the function returns the internal encoding.
  847. * Note: This function is used in the global initialization script for setting the internal encoding to the platform's character set.
  848. * @link http://php.net/manual/en/function.mb-regex-encoding
  849. */
  850. function _api_mb_regex_encoding($encoding = null) {
  851. static $mb_regex_encoding = null;
  852. if (empty($encoding)) {
  853. if (is_null($mb_regex_encoding)) {
  854. if (MBSTRING_INSTALLED) {
  855. $mb_regex_encoding = @mb_regex_encoding();
  856. } else {
  857. $mb_regex_encoding = 'UTF-8';
  858. }
  859. }
  860. return $mb_regex_encoding;
  861. }
  862. $mb_regex_encoding = $encoding;
  863. if (_api_mb_supports($encoding)) {
  864. return @mb_regex_encoding($encoding);
  865. }
  866. return false;
  867. }
  868. /**
  869. * Retrieves specified internal encoding configuration variable within the PHP iconv extension.
  870. * @param string $type The parameter $type could be: 'iconv_internal_encoding', 'iconv_input_encoding', or 'iconv_output_encoding'.
  871. * @return mixed The function returns the requested encoding or FALSE on error.
  872. * @link http://php.net/manual/en/function.iconv-get-encoding
  873. */
  874. function _api_iconv_get_encoding($type) {
  875. return _api_iconv_set_encoding($type);
  876. }
  877. /**
  878. * Sets specified internal encoding configuration variables within the PHP iconv extension.
  879. * @param string $type The parameter $type could be: 'iconv_internal_encoding', 'iconv_input_encoding', or 'iconv_output_encoding'.
  880. * @param string $encoding (optional) The desired encoding to be set.
  881. * @return bool Returns TRUE on success, FALSE on error.
  882. * Note: This function is used in the global initialization script for setting these three internal encodings to the platform's character set.
  883. * @link http://php.net/manual/en/function.iconv-set-encoding
  884. */
  885. function _api_iconv_set_encoding($type, $encoding = null) {
  886. static $iconv_internal_encoding = null;
  887. static $iconv_input_encoding = null;
  888. static $iconv_output_encoding = null;
  889. if (!ICONV_INSTALLED) {
  890. return false;
  891. }
  892. switch ($type) {
  893. case 'iconv_internal_encoding':
  894. if (empty($encoding)) {
  895. if (is_null($iconv_internal_encoding)) {
  896. $iconv_internal_encoding = @iconv_get_encoding($type);
  897. }
  898. return $iconv_internal_encoding;
  899. }
  900. if (_api_iconv_supports($encoding)) {
  901. if(@iconv_set_encoding($type, $encoding)) {
  902. $iconv_internal_encoding = $encoding;
  903. return true;
  904. }
  905. return false;
  906. }
  907. return false;
  908. case 'iconv_input_encoding':
  909. if (empty($encoding)) {
  910. if (is_null($iconv_input_encoding)) {
  911. $iconv_input_encoding = @iconv_get_encoding($type);
  912. }
  913. return $iconv_input_encoding;
  914. }
  915. if (_api_iconv_supports($encoding)) {
  916. if(@iconv_set_encoding($type, $encoding)) {
  917. $iconv_input_encoding = $encoding;
  918. return true;
  919. }
  920. return false;
  921. }
  922. return false;
  923. case 'iconv_output_encoding':
  924. if (empty($encoding)) {
  925. if (is_null($iconv_output_encoding)) {
  926. $iconv_output_encoding = @iconv_get_encoding($type);
  927. }
  928. return $iconv_output_encoding;
  929. }
  930. if (_api_iconv_supports($encoding)) {
  931. if(@iconv_set_encoding($type, $encoding)) {
  932. $iconv_output_encoding = $encoding;
  933. return true;
  934. }
  935. return false;
  936. }
  937. return false;
  938. }
  939. return false;
  940. }
  941. /**
  942. * Ckecks whether a given encoding is known to define single-byte characters only.
  943. * The result might be not accurate for unknown by this library encodings. This is not fatal,
  944. * then the library picks up conversions plus Unicode related internal algorithms.
  945. * @param string $encoding A given encoding identificator.
  946. * @return bool TRUE if the encoding is known as single-byte (for ISO-8859-15, WINDOWS-1251, etc.), FALSE otherwise.
  947. */
  948. function _api_is_single_byte_encoding($encoding) {
  949. static $checked = array();
  950. if (!isset($checked[$encoding])) {
  951. $character_map = _api_get_character_map_name(api_refine_encoding_id($encoding));
  952. $checked[$encoding] = (!empty($character_map)
  953. && !in_array($character_map, array('UTF-8', 'HTML-ENTITIES')));
  954. }
  955. return $checked[$encoding];
  956. }
  957. /**
  958. * Checks whether the specified encoding is supported by the PHP mbstring extension.
  959. * @param string $encoding The specified encoding.
  960. * @return bool Returns TRUE when the specified encoding is supported, FALSE othewise.
  961. */
  962. function _api_mb_supports($encoding) {
  963. static $supported = array();
  964. if (!isset($supported[$encoding])) {
  965. if (MBSTRING_INSTALLED) {
  966. $supported[$encoding] = api_equal_encodings($encoding, mb_list_encodings(), true);
  967. } else {
  968. $supported[$encoding] = false;
  969. }
  970. }
  971. return $supported[$encoding];
  972. }
  973. /**
  974. * Checks whether the specified encoding is supported by the PHP iconv extension.
  975. * @param string $encoding The specified encoding.
  976. * @return bool Returns TRUE when the specified encoding is supported, FALSE othewise.
  977. */
  978. function _api_iconv_supports($encoding) {
  979. static $supported = array();
  980. if (!isset($supported[$encoding])) {
  981. if (ICONV_INSTALLED) {
  982. $enc = api_refine_encoding_id($encoding);
  983. if ($enc != 'HTML-ENTITIES') {
  984. $test_string = '';
  985. for ($i = 32; $i < 128; $i++) {
  986. $test_string .= chr($i);
  987. }
  988. $supported[$encoding] = (@iconv_strlen($test_string, $enc)) ? true : false;
  989. } else {
  990. $supported[$encoding] = false;
  991. }
  992. } else {
  993. $supported[$encoding] = false;
  994. }
  995. }
  996. return $supported[$encoding];
  997. }
  998. // This function checks whether the function _api_convert_encoding() (the php-
  999. // implementation) is able to convert from/to a given encoding.
  1000. function _api_convert_encoding_supports($encoding) {
  1001. static $supports = array();
  1002. if (!isset($supports[$encoding])) {
  1003. $supports[$encoding] = _api_get_character_map_name(api_refine_encoding_id($encoding)) != '';
  1004. }
  1005. return $supports[$encoding];
  1006. }
  1007. /**
  1008. * Upgrading the PHP5 mbstring extension
  1009. */
  1010. // A multibyte replacement of strchr(). This function exists in PHP 5 >= 5.2.0
  1011. // See http://php.net/manual/en/function.mb-strrchr
  1012. if (MBSTRING_INSTALLED && !function_exists('mb_strchr')) {
  1013. function mb_strchr($haystack, $needle, $part = false, $encoding = null) {
  1014. if (empty($encoding)) {
  1015. $encoding = mb_internal_encoding();
  1016. }
  1017. return mb_strstr($haystack, $needle, $part, $encoding);
  1018. }
  1019. }
  1020. // A multibyte replacement of stripos(). This function exists in PHP 5 >= 5.2.0
  1021. // See http://php.net/manual/en/function.mb-stripos
  1022. if (MBSTRING_INSTALLED && !function_exists('mb_stripos')) {
  1023. function mb_stripos($haystack, $needle, $offset = 0, $encoding = null) {
  1024. if (empty($encoding)) {
  1025. $encoding = mb_internal_encoding();
  1026. }
  1027. return mb_strpos(mb_strtolower($haystack, $encoding), mb_strtolower($needle, $encoding), $offset, $encoding);
  1028. }
  1029. }
  1030. // A multibyte replacement of stristr(). This function exists in PHP 5 >= 5.2.0
  1031. // See http://php.net/manual/en/function.mb-stristr
  1032. if (MBSTRING_INSTALLED && !function_exists('mb_stristr')) {
  1033. function mb_stristr($haystack, $needle, $part = false, $encoding = null) {
  1034. if (empty($encoding)) {
  1035. $encoding = mb_internal_encoding();
  1036. }
  1037. $pos = mb_strpos(mb_strtolower($haystack, $encoding), mb_strtolower($needle, $encoding), 0, $encoding);
  1038. if ($pos === false) {
  1039. return false;
  1040. }
  1041. if ($part) {
  1042. return mb_substr($haystack, 0, $pos + 1, $encoding);
  1043. }
  1044. return mb_substr($haystack, $pos, mb_strlen($haystack, $encoding), $encoding);
  1045. }
  1046. }
  1047. // A multibyte replacement of strrchr(). This function exists in PHP 5 >= 5.2.0
  1048. // See http://php.net/manual/en/function.mb-strrchr
  1049. if (MBSTRING_INSTALLED && !function_exists('mb_strrchr')) {
  1050. function mb_strrchr($haystack, $needle, $part = false, $encoding = null) {
  1051. if (empty($encoding)) {
  1052. $encoding = mb_internal_encoding();
  1053. }
  1054. $needle = mb_substr($needle, 0, 1, $encoding);
  1055. $pos = mb_strrpos($haystack, $needle, mb_strlen($haystack, $encoding) - 1, $encoding);
  1056. if ($pos === false) {
  1057. return false;
  1058. }
  1059. if ($part) {
  1060. return mb_substr($haystack, 0, $pos + 1, $encoding);
  1061. }
  1062. return mb_substr($haystack, $pos, mb_strlen($haystack, $encoding), $encoding);
  1063. }
  1064. }
  1065. // A multibyte replacement of strstr(). This function exists in PHP 5 >= 5.2.0
  1066. // See http://php.net/manual/en/function.mb-strstr
  1067. if (MBSTRING_INSTALLED && !function_exists('mb_strstr')) {
  1068. function mb_strstr($haystack, $needle, $part = false, $encoding = null) {
  1069. if (empty($encoding)) {
  1070. $encoding = mb_internal_encoding();
  1071. }
  1072. $pos = mb_strpos($haystack, $needle, 0, $encoding);
  1073. if ($pos === false) {
  1074. return false;
  1075. }
  1076. if ($part) {
  1077. return mb_substr($haystack, 0, $pos + 1, $encoding);
  1078. }
  1079. return mb_substr($haystack, $pos, mb_strlen($haystack, $encoding), $encoding);
  1080. }
  1081. }
  1082. /**
  1083. * Returns an array of translated week days and months, short and normal names.
  1084. * @param string $language (optional) Language indentificator. If it is omited, the current interface language is assumed.
  1085. * @return array Returns a multidimensional array with translated week days and months.
  1086. */
  1087. function &_api_get_day_month_names($language = null) {
  1088. static $date_parts = array();
  1089. if (empty($language)) {
  1090. $language = api_get_interface_language();
  1091. }
  1092. if (!isset($date_parts[$language])) {
  1093. $week_day = array('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday');
  1094. $month = array('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December');
  1095. for ($i = 0; $i < 7; $i++) {
  1096. $date_parts[$language]['days_short'][] = get_lang($week_day[$i].'Short', '', $language);
  1097. $date_parts[$language]['days_long'][] = get_lang($week_day[$i].'Long', '', $language);
  1098. }
  1099. for ($i = 0; $i < 12; $i++) {
  1100. $date_parts[$language]['months_short'][] = get_lang($month[$i].'Short', '', $language);
  1101. $date_parts[$language]['months_long'][] = get_lang($month[$i].'Long', '', $language);
  1102. }
  1103. }
  1104. return $date_parts[$language];
  1105. }