xmd.lib.php 31 KB

  1. <?php /* <!-- xmd.lib.php -->
  2. <!-- XML MiniDom, 2006/12/13 -->
  3. <!-- Copyright (C) 2005 rene.haentjens@UGent.be - see note at end of text -->
  4. <!-- Released under the GNU GPL V2, see http://www.gnu.org/licenses/gpl.html -->
  5. */
  6. /**
  7. * This is the XML Dom library for Dokeos.
  8. * Include/require it in your code to use its functionality.
  9. *
  10. * @author René Haentjens
  11. * @package chamilo.library
  12. */
  13. class xmddoc
  14. {
  15. /* This MiniDom for XML essentially implements an array of elements, each
  16. with a parent, a name, a namespace-URI, attributes & namespace definitions,
  17. and children. Child nodes are a mix of texts and subelements. Parent
  18. and subelements are stored as elementnumbers, the root is element 0.
  19. Parsing is built on James Clark's expat, by default enabled in PHP.
  20. The MiniDom is an alternative to the experimental DOM XML functions.
  21. It is open source PHP and requires no extra libraries.
  22. Restrictions of the MiniDom:
  23. - no two attributes with same name (different namespaces) on one element;
  24. - only 'ISO-8859-1' right now; author will investigate 'UTF-8' later;
  25. - processing instructions & external entities are ignored;
  26. - no distinction between text and cdata child nodes;
  27. - xmd_xml(nonrootelement) may not generate all needed namespace definitions;
  28. - xmd_value, xmd_html_value, xmd_select_xxx, xmd_update, xmd_update_many:
  29. path parameter uses names without namespaces
  30. and supports only a small subset of XPath, with some extensions;
  31. - maximum 11 auto-generated namespace prefixes (can be changed in xmddoc)
  32. Namespace definitions are stored as attributes, with name = 'xmlns...'
  33. e.g. xmlns:xml='http://www.w3.org/XML/1998/namespace'
  34. e.g. xmlns='http://www.imsglobal.org/xsd/imscp_v1p1' (default namespace)
  35. Exposed methods:
  36. new xmddoc(array_of_strings, charset = 'ISO-8859-1'): parse strings
  37. new xmddoc(names, numbers, textstring): restore from cached arrays & string
  38. xmd_add_element(complete_name, parent, attributes_with_complete_names)
  39. complete name = [ URI + ':' + ] name
  40. xmd_set_attribute(element, complete_attribute_name, value) (id. as above)
  41. xmd_add_text(text, element)
  42. xmd_add_text_element(complete_name, text, parent, attributes) =
  43. xmd_add_text(text, xmd_add_element(complete_name, parent, attributes))
  44. xmd_get_element(element) => children, attributes, '?name', '?parent'
  45. xmd_get_ns_uri(element [, attribute_name_without_uri] )
  46. xmd_text(element): combines element and subelements' text nodes
  47. xmd_xml(): generate XML-formatted string (reverse parsing)
  48. xmd_xml(indent_increase, initial_indent, lbr): e.g. ' ', '', "\n"
  49. xmd_value(path): follow path from root, return attribute value or text
  50. e.g. 'manifest/organizations/@default' 'body/p[1]' (1=first, -1=last)
  51. xmd_value(path, parent, fix, function): find value(s) with path from parent,
  52. apply function and decorate with fix = ('pre'=>..., 'in'=>..., 'post')
  53. e.g. 'general/title/*' array('in' => ', ')
  54. extensions to XPath:
  55. - and + for previous and next sibling, e.g. general/title/+/string
  56. -name and +name for sibling with specific name, e.g. item[2]/+item
  57. .. for parent, e.g. general/title/../../technical/format (stops at root)
  58. @* for location (element number within siblings, starts at 1)
  59. @*name for location in siblings with specific name
  60. @. for element name, e.g. organization/*[1]/@.
  61. namespaces are not supported in paths: they use names without URI
  62. xmd_html_value(pathFix, parent, fun): 'path' 'path infix' 'prefix -% path'
  63. 'path infix %- postfix': fun = 'htmlspecialchars' by default
  64. xmd_select_elements(path, parent): find element nodes with path (see above)
  65. xmd_select_single_element (id.) returns -1 or elementnumber
  66. xmd_select_elements_where(path, subpath, value, parent): e.g. '@id', '12'
  67. is like XPath with path[@id='12']; subpath = '.' means text
  68. xmd_select_elements_where_notempty(path, subpath, parent): e.g. '@id'
  69. xmd_select_xxx methods only select elements, not attributes
  70. xmd_remove_element(childelement_number)
  71. xmd_remove_nodes(childelement_numbers_and_strings, parent)
  72. xmd_update(path, text, parent): select single element, then:
  73. text element: replace text by new text
  74. attribute: give attribute new value = text
  75. somepath/!newtag: create new child element containing text
  76. somepath/~: delete single (first or only) element
  77. xmd_update_many(paths, subpath, ...): paths can be path1,path2,...:
  78. for all elements selected by all paths, update with subpath
  79. xmd_copy_foreign_child(fdoc, child, parent):
  80. copies fdoc's child as a new child of parent;
  81. note this method hasn't been tested for all cases (namespaces...)
  82. xmd_cache(): dump xmddoc into names+numbers+textstring for serialization
  83. Order of parameters (if present) for xmd_xxx methods:
  84. name, text, children, path, subPath, value,
  85. parent, fix, fun, attributes (name value)
  86. Properties: (G)lobal to xmddoc or array (one for each xmddoc (E)lement)
  87. e.g. $this->name[0] is the name of the document root element
  88. e.g. $this->names[$this->ns[0]] is its namespace URI
  89. e.g. $this->attributes[0]['title'] is the value of its attribute 'title'
  90. e.g. $this->attributes[0]['xmlns:a'] is the URI for prefix 'a:'
  91. */
  92. var $names; //G array: n => namespace URI (0 => '')
  93. var $numbers; //G array: numeric dump of xmddoc for caching
  94. var $textstring; //G string: string dump of xmddoc for caching
  95. var $error; //G string: empty or parsing error message
  96. var $_nesting; //G array: nested elements while parsing (internal)
  97. var $_ns; //G array: namespace defs for upcoming element (id.)
  98. var $_concat; //G bool: concatenate cData with previous (id.)
  99. var $_nsp; //G array: namespace prefixes in use somewhere (id.)
  100. var $_last; //G int: last used elementnumber (id.)
  101. var $_strings; //G int: number of string child nodes cached (id.)
  102. var $parent; //E int: elementnumber: 0 is root, -1 is parent of root
  103. var $name; //E string: element name, without namespace
  104. var $ns; //E int: index into $names to find namespace URI
  105. var $attributes; //E array: attribute name(without namespace) => value
  106. var $atns; //E array: attribute name(id.) => index into $names
  107. var $children; //E array: elementnumbers and strings (text children)
  108. function xmd_get_element($parent = 0) // for convenience, readonly copy
  109. {
  110. // returns mixed array: children + texts have numeric key,
  111. // other elements are attributes, '?name' and '?parent'
  112. if ($parent < 0 || $parent > $this->_last) return array();
  113. return array_merge($this->children[$parent], $this->attributes[$parent],
  114. array('?name' => $this->name[$parent],
  115. '?parent' => $this->parent[$parent]));
  116. }
  117. function xmd_get_ns_uri($parent = 0, $attName = '')
  118. {
  119. if ($parent < 0 || $parent > $this->_last) return '';
  120. return $attName ? $this->names[$this->atns[$parent][$attName]] :
  121. $this->names[$this->ns[$parent]];
  122. }
  123. function xmd_remove_element($child) // success = TRUE
  124. {
  125. if ($child <= 0 || $child > $this->_last) return FALSE;
  126. $parent = $this->parent[$child];
  127. foreach ($this->children[$parent] as $key => $value)
  128. if ($value === $child)
  129. {
  130. unset($this->children[$parent][$key]); return TRUE;
  131. }
  132. return FALSE;
  133. }
  134. function xmd_remove_nodes($children, $parent = 0) // success = TRUE
  135. {
  136. if ($parent < 0 || $parent > $this->_last) return FALSE;
  137. if (!is_array($children)) $children = array($children);
  138. foreach ($children as $child)
  139. {
  140. $childFound = FALSE;
  141. foreach ($this->children[$parent] as $key => $value)
  142. if ($value === $child)
  143. {
  144. unset($this->children[$parent][$key]);
  145. $childFound = TRUE; break;
  146. }
  147. if (!$childFound) return FALSE;
  148. }
  149. return TRUE;
  150. }
  151. function xmd_update($xmPath, $text = '', $parent = 0) // success = TRUE
  152. {
  153. if ($parent < 0 || $parent > $this->_last ||
  154. !is_string($text) || !is_string($xmPath)) return FALSE;
  155. $m = array();
  156. if (api_ereg('^(.*)([~!@])(.*)$', $xmPath, $m)) // split on ~ or ! or @
  157. {
  158. $xmPath = $m[1]; $op = $m[2]; $name = $m[3];
  159. }
  160. if (($elem = $this->xmd_select_single_element($xmPath, $parent)) == -1)
  161. return FALSE;
  162. if (isset($op))
  163. {
  164. if ($op == '!' && $name)
  165. {
  166. $this->xmd_add_text_element($name, $text, $elem); return TRUE;
  167. }
  168. elseif ($op == '@' && $name)
  169. {
  170. $this->attributes[$elem][$name] = $text; return TRUE;
  171. }
  172. elseif ($op == '~' && !$name)
  173. return $this->xmd_remove_element($elem);
  174. return FALSE;
  175. }
  176. if (($nch = count($this->children[$elem])) > 1) return FALSE;
  177. $this->children[$elem][0] = $text; return TRUE;
  178. }
  179. function xmd_update_many($xmPaths, $subPath = '', $text = '', $parent = 0)
  180. {
  181. $result = TRUE;
  182. foreach (explode(',', $xmPaths) as $xmPath)
  183. foreach ($this->xmd_select_elements($xmPath, $parent) as $elem)
  184. $result &= $this->xmd_update($subPath, $text, $elem);
  185. // '&=' always evaluates rhs, '&&=' skips it if $result is FALSE
  186. return $result;
  187. }
  188. function xmd_copy_foreign_child($fdoc, $fchild = 0, $parent = 0)
  189. {
  190. $my_queue = array($fchild, $parent); // optimization, see below
  191. while (!is_null($fchild = array_shift($my_queue)))
  192. {
  193. $parent = array_shift($my_queue);
  194. if (is_string($fchild))
  195. $this->xmd_add_text($fchild, $parent);
  196. elseif (isset($fdoc->name[$fchild]))
  197. {
  198. $fullname = $fdoc->name[$fchild];
  199. $attribs = array(); $nsdefs = array();
  200. if (($nsn = $fdoc->ns[$fchild]))
  201. $fullname = $fdoc->names[$nsn] . ':' . $fullname;
  202. foreach ($fdoc->attributes[$fchild] as $name => $value)
  203. {
  204. if (($p = strrpos($name, ':')) !== FALSE) // 'xmlns:...'
  205. $nsdefs[$name] = $value;
  206. else
  207. {
  208. if (($nsn = $fdoc->atns[$fchild][$name]))
  209. $name = $fdoc->names[$nsn] . ':' . $name;
  210. $attribs[$name] = $value;
  211. }
  212. }
  213. $child = $this->xmd_add_element($fullname, $parent,
  214. array_merge($attribs, $nsdefs));
  215. foreach ($fdoc->children[$fchild] as $ch)
  216. array_push($my_queue, $ch, $child);
  217. // recursive call was 10 times slower...
  218. }
  219. }
  220. }
  221. function xmd_add_element($name, $parent = 0, $attribs = array())
  222. {
  223. if (!is_string($name) || $name == '') return -1;
  224. if (($p = strrpos($name, ':')) !== FALSE) // URI + ':' + name
  225. if ($p == 0 || $p == api_strlen($name) - 1) return -1;
  226. $child = ($this->_last += 1); $uris = array(); $uri = '';
  227. if ($p)
  228. {
  229. $uri = api_substr($name, 0, $p); $name = api_substr($name, $p + 1);
  230. $uris[] = $uri; // check uris after defining all attributes
  231. }
  232. $this->parent[$child] = $parent; $this->name[$child] = $name;
  233. $this->ns[$child] = $uri ? $this->_lookup($uri) : 0;
  234. $this->children[$child] = array();
  235. $this->attributes[$child] = array(); $this->atns[$child] = array();
  236. foreach ($attribs as $name => $value)
  237. if (($uri = $this->xmd_set_attribute($child, $name, $value, FALSE)))
  238. $uris[] = $uri; // check at end, not immediately
  239. if ($parent >= 0 && $parent <= $this->_last)
  240. $this->children[$parent][] = $child; // link to parent
  241. foreach ($uris as $uri) $this->_nsPfx($child, $uri);
  242. // find prefix (child and upwards) or create new prefix at root
  243. return $child;
  244. }
  245. function xmd_set_attribute($parent, $name, $value, $checkurihaspfx = TRUE)
  246. {
  247. if (!is_string($name) || $name == '') return '';
  248. if (($p = strrpos($name, ':')) !== FALSE) // URI + ':' + name
  249. if ($p == 0 || $p == api_strlen($name) - 1) return '';
  250. $uri = ''; // beware of 'xmlns...', which is a namespace def!
  251. if ($p) if (api_substr($name, 0, 6) != 'xmlns:')
  252. {
  253. $uri = api_substr($name, 0, $p); $name = api_substr($name, $p + 1);
  254. }
  255. $this->attributes[$parent][$name] = $value;
  256. $this->atns[$parent][$name] = $uri ? $this->_lookup($uri) : 0;
  257. if ($checkurihaspfx) if ($uri) $this->_nsPfx($parent, $uri);
  258. if (api_substr($name, 0, 6) == 'xmlns:') // namespace def with prefix
  259. $this->_nsp[api_substr($name, 6)] = $value; // prefix is in use
  260. return $uri;
  261. }
  262. function xmd_add_text($text, $parent = 0) // success = TRUE
  263. {
  264. if ($parent < 0 || $parent > $this->_last || !is_string($text))
  265. return FALSE;
  266. if ($text) $this->children[$parent][] = $text; return TRUE;
  267. }
  268. function xmd_add_text_element($name, $text, $parent = 0, $attribs = array())
  269. {
  270. $this->xmd_add_text($text,
  271. $child = $this->xmd_add_element($name, $parent, $attribs));
  272. return $child;
  273. }
  274. function xmd_text($parent = 0)
  275. {
  276. if ($parent < 0 || $parent > $this->_last) return '';
  277. $text = ''; // assemble text subnodes and text in child elements
  278. foreach ($this->children[$parent] as $child)
  279. $text .= is_string($child) ? $child : $this->xmd_text($child);
  280. return $text;
  281. }
  282. function xmd_xml($increase = ' ', $indent = '', $lbr = "\n", $parent = 0)
  283. {
  284. global $charset;
  285. if ($parent < 0 || $parent > $this->_last) return '';
  286. $uri = $this->names[$this->ns[$parent]];
  287. $pfxc = ($uri == '') ? '' : $this->_nsPfx($parent, $uri);
  288. $dbg = ''; // ($uri == '') ? '' : (' <!-- ' . $uri . ' -->');
  289. $result = $indent . '<' . ($element = $pfxc . $this->name[$parent]);
  290. $atnsp = $this->atns[$parent];
  291. foreach ($this->attributes[$parent] as $name => $value)
  292. {
  293. if (isset($atnsp[$name]))
  294. $atnsn = $atnsp[$name];
  295. elseif (isset($atnsn))
  296. unset($atnsn);
  297. $uri = isset($atnsn) && isset($this->names[$atnsn]) ?
  298. $this->names[$atnsn] : '';
  299. $pfxc = ($uri == '') ? '' : $this->_nsPfx($parent, $uri);
  300. $result .= ' ' . $pfxc . $name
  301. . '="' . htmlspecialchars($value, ENT_QUOTES, $charset) . '"';
  302. }
  303. if (count($this->children[$parent]) == 0)
  304. return $result . ' />' . $dbg;
  305. $result .= '>';
  306. foreach ($this->children[$parent] as $child)
  307. $result .= is_string($child) ? htmlspecialchars($child, ENT_QUOTES, $charset) : ($lbr .
  308. $this->xmd_xml($increase, $indent.$increase, $lbr, $child));
  309. if (!is_string($child)) $result .= $lbr . $indent; // last $child
  310. return $result . '</' . $element . '>' . $dbg;
  311. }
  312. function xmd_value($xmPath, $parent = 0, $fix = array(), $fun = '')
  313. {
  314. // extensions: @*[name] for element position (starts at 1)
  315. // @. for element (tag)name
  316. if ($parent < 0 || $parent > $this->_last || !is_string($xmPath))
  317. return '';
  318. if (($p = strrpos($xmPath, '@')) !== FALSE)
  319. {
  320. $attName = api_substr($xmPath, $p+1); $xmPath = api_substr($xmPath, 0, $p);
  321. }
  322. if (!($elems = $this->xmd_select_elements($xmPath, $parent))) return '';
  323. $result = ''; $fixin = isset($fix['in']) ? $fix['in'] : '';
  324. foreach ($elems as $elem)
  325. {
  326. $value = isset($attName) && api_strlen($attName) >= 1 ?
  327. ($attName == '.' ? $this->name[$elem] :
  328. ($attName{0} == '*' ?
  329. $this->_sibnum($elem, api_substr($attName, 1)) :
  330. $this->attributes[$elem][$attName])) :
  331. $this->xmd_text($elem);
  332. $result .= $fixin . ($fun ? $fun($value) : $value);
  333. }
  334. return (isset($fix['pre']) ? $fix['pre'] : '') .
  335. api_substr($result, api_strlen($fixin)) .
  336. (isset($fix['post']) ? $fix['post'] : '');
  337. }
  338. function xmd_html_value($xmPath, $parent = 0, $fun = 'htmlspecialchars')
  339. {
  340. if (!is_string($xmPath)) return '';
  341. $fix = array();
  342. if (($p = api_strpos($xmPath, ' -% ')) !== FALSE)
  343. {
  344. $fix['pre'] = api_substr($xmPath, 0, $p);
  345. $xmPath = api_substr($xmPath, $p+4);
  346. }
  347. if (($p = api_strpos($xmPath, ' %- ')) !== FALSE)
  348. {
  349. $fix['post'] = api_substr($xmPath, $p+4);
  350. $xmPath = api_substr($xmPath, 0, $p);
  351. }
  352. if (($p = api_strpos($xmPath, ' ')) !== FALSE)
  353. {
  354. $fix['in'] = api_substr($xmPath, $p+1);
  355. $xmPath = api_substr($xmPath, 0, $p);
  356. }
  357. return $this->xmd_value($xmPath, $parent, $fix, $fun);
  358. }
  359. function xmd_select_single_element($xmPath, $parent = 0) // for convenience
  360. {
  361. $elements = $this->xmd_select_elements($xmPath, $parent);
  362. if (count($elements) == 0) return -1;
  363. return $elements[0];
  364. }
  365. function xmd_select_elements_where($xmPath,
  366. $subPath = '.', $value = '', $parent = 0)
  367. {
  368. if (!is_string($subPath)) return array();
  369. $elems = array(); if ($subPath == '.') $subPath = '';
  370. foreach ($this->xmd_select_elements($xmPath, $parent) as $elem)
  371. if ($this->xmd_value($subPath, $elem) == $value) $elems[] = $elem;
  372. return $elems;
  373. }
  374. function xmd_select_elements_where_notempty($xmPath,
  375. $subPath = '.', $parent = 0)
  376. {
  377. if (!is_string($subPath)) return array();
  378. $elems = array(); if ($subPath == '.') $subPath = '';
  379. foreach ($this->xmd_select_elements($xmPath, $parent) as $elem)
  380. if ($this->xmd_value($subPath, $elem)) $elems[] = $elem;
  381. return $elems;
  382. }
  383. function xmd_select_elements($xmPath, $parent = 0)
  384. {
  385. // XPath subset: e1/e2/.../en, also * and e[n] and *[n] (at 1 or -1)
  386. // /*/... starts from root, regardless of $parent
  387. // extensions: e= - or + (previous & next sibling)
  388. // e= -name or +name (sibling of specific name)
  389. // e= .. (stops at root, so too many doesn't matter)
  390. if (api_substr($xmPath, 0, 3) == '/*/')
  391. {
  392. $xmPath = api_substr($xmPath, 3); $parent = 0;
  393. }
  394. if ($parent < 0 || $parent > $this->_last) return array();
  395. while (api_substr($xmPath, 0, 1) == '/') $xmPath = api_substr($xmPath, 1);
  396. while (api_substr($xmPath, -1) == '/') $xmPath = api_substr($xmPath, 0, -1);
  397. if ($xmPath == '' || $xmPath == '.') return array($parent);
  398. if ($xmPath == '..')
  399. {
  400. if ($parent > 0) return array($this->parent[$parent]);
  401. return array($parent);
  402. }
  403. if ($xmPath{0} == '-' || $xmPath{0} == '+')
  404. {
  405. $sib = $this->_sibnum($parent, api_substr($xmPath, 1), $xmPath{0});
  406. if ($sib == -1) return array(); return array($sib);
  407. }
  408. $m = array();
  409. if (api_ereg('^(.+)/([^/]+)$', $xmPath, $m)) // split on last /
  410. {
  411. if (!($set = $this->xmd_select_elements($m[1], $parent)))
  412. return $set; // which is empty array
  413. if (count($set) == 1)
  414. return $this->xmd_select_elements($m[2], $set[0]);
  415. $bigset = array(); $m2 = $m[2];
  416. foreach ($set as $e)
  417. $bigset = array_merge($bigset,
  418. $this->xmd_select_elements($m2, $e));
  419. return $bigset;
  420. }
  421. $xmName = $xmPath; $xmNum = 0; $elems = array();
  422. if (api_ereg('^(.+)\[(-?[0-9]+)\]$', $xmPath, $m))
  423. {
  424. $xmName = $m[1]; $xmNum = (int) $m[2];
  425. }
  426. foreach ($this->children[$parent] as $child) if (!is_string($child))
  427. if ($xmName == '*' || ($this->name[$child]) == $xmName)
  428. $elems[] = $child;
  429. if ($xmNum == 0) return $elems;
  430. $xmNum = ($xmNum > 0) ? $xmNum - 1 : count($elems) + $xmNum;
  431. return ($xmNum < count($elems)) ? array($elems[$xmNum]) : array();
  432. }
  433. // Notes on parsing and caching:
  434. // - parsing 388 KB -> 0.94 sec
  435. // - caching 298 KB <- 1.63 sec: 11387 elements, 5137 string nodes
  436. // - uncache 298 KB -> 0.42 sec
  437. // - $this->children[$n][] in a loop is quicker than a temporary array
  438. // $children[] and copying $this->children[$n] = $children after the loop
  439. // - incremental operator ++$numptr is not quicker than ($numptr += 1)
  440. // - numbers & textstring: more compact with base64_encode(gzcompress())
  441. function xmd_cache() // store all data in numbers+names+textstring
  442. {
  443. $this->numbers = array(); $this->textstring = ''; $this->_strings = 0;
  444. // add all element- and attributenames to names - see below
  445. for ($n = 0; $n <= $this->_last; $n++)
  446. {
  447. $this->numbers[] = count($this->children[$n]);
  448. foreach ($this->children[$n] as $ch)
  449. {
  450. if (is_string($ch))
  451. {
  452. $this->numbers[] = 0; $this->_strings += 1;
  453. $this->numbers[] = strlen($ch); $this->textstring .= $ch; //!!! Here strlen() has not been changed to api_strlen(). To be investigated. Ivan Tcholakov, 29-AUG-2008.
  454. }
  455. else
  456. {
  457. $this->numbers[] = ($ch-$n); // more compact than $ch
  458. }
  459. }
  460. $this->numbers[] = count($this->attributes[$n]);
  461. foreach ($this->attributes[$n] as $name => $value)
  462. {
  463. $this->numbers[] = $this->_lookup($name);
  464. $this->numbers[] = $this->atns[$n][$name];
  465. $this->numbers[] = strlen($value); $this->textstring .= $value; //!!! Here strlen() has not been changed to api_strlen(). To be investigated. Ivan Tcholakov, 29-AUG-2008.
  466. }
  467. $this->numbers[] = $this->_lookup($this->name[$n]);
  468. $this->numbers[] = $this->ns[$n];
  469. $this->numbers[] = $n - $this->parent[$n]; // more compact
  470. }
  471. }
  472. function xmddoc($strings, $charset = null, $textstring = '')
  473. {
  474. if (empty($charset))
  475. {
  476. $charset = api_get_system_encoding();
  477. }
  478. $this->parent = array(); $this->name = array();
  479. $this->ns = array(); $this->attributes = array();
  480. $this->atns = array(); $this->children = array();
  481. $this->error = ''; $this->_nesting = array();
  482. $this->_ns = array(); $this->_last = -1;
  483. $this->_nsp = array();
  484. foreach (explode(',', 'eb,tn,eg,ut,as,ne,jt,ne,ah,en,er') as $pfx)
  485. $this->_nsp[$pfx] = '';
  486. if (is_array($charset)) // new xmddoc($names, $numbers, $textstring)
  487. {
  488. $this->names = $strings; $this->numbers = $charset;
  489. $this->textstring = $textstring; $this->_uncache(); return;
  490. }
  491. $this->names = array(); $this->_lookup(''); // empty ns is number 0
  492. // This is a quick workaround.
  493. // The xml-parser supports only ISO-8859-1, UTF-8 and US-ASCII.
  494. // See http://php.net/manual/en/function.xml-parser-create-ns.php
  495. //$xml_parser = xml_parser_create_ns($charset, ':');
  496. $xml_parser = xml_parser_create_ns(api_is_utf8($charset) ? 'UTF-8' : 'ISO-8859-1', ':');
  497. xml_set_object($xml_parser,$this); // instead of ...,&$this
  498. // See PHP manual: Passing by Reference vs. xml_set_object
  499. xml_set_element_handler($xml_parser, '_startElement', '_endElement');
  500. xml_set_character_data_handler($xml_parser, '_cData');
  501. xml_set_start_namespace_decl_handler($xml_parser, '_startNs');
  502. // xml_set_end_namespace_decl_handler($xml_parser, '_endNs');
  503. // xml_set_default_handler ($xml_parser, '');
  504. xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, FALSE);
  505. if (!is_array($strings)) $strings = array($strings);
  506. if (count($strings) && (api_substr($strings[0], 0, 5) != '<?xml') &&
  507. !xml_parse($xml_parser,
  508. '<?xml version="1.0" encoding="' . $charset . '"?>', FALSE))
  509. {
  510. $this->error = 'Encoding ' . $charset . ': ' .
  511. xml_error_string(xml_get_error_code($xml_parser));
  512. $strings = array();
  513. }
  514. foreach ($strings as $s)
  515. {
  516. if (api_substr($s, -1) != "\n") $s .= "\n";
  517. if (!xml_parse($xml_parser, $s, FALSE))
  518. {
  519. $errCode = xml_get_error_code($xml_parser);
  520. $this->error = 'Line '. xml_get_current_line_number($xml_parser) .
  521. ' (c' . xml_get_current_column_number($xml_parser) .
  522. // ', b' . xml_get_current_byte_index($xml_parser) .
  523. '): error ' . $errCode . '= ' . xml_error_string($errCode);
  524. break; // the error string is English...
  525. }
  526. }
  527. xml_parse($xml_parser, '', TRUE); xml_parser_free($xml_parser);
  528. }
  529. // internal methods
  530. function _sibnum($parent, $name = '', $pmn = 'N') // sibling or number
  531. {
  532. if ($parent <= 0) return -1;
  533. $found = FALSE; $prev = -1; $next = -1; $num = 0;
  534. foreach ($this->children[$this->parent[$parent]] as $child)
  535. {
  536. if (is_string($child)) continue;
  537. $name_ok = $name ? ($this->name[$child] == $name) : TRUE;
  538. if ($found && $name_ok)
  539. {
  540. $next = $child; break;
  541. }
  542. elseif ($parent === $child)
  543. {
  544. $num ++; $found = TRUE;
  545. }
  546. elseif ($name_ok)
  547. {
  548. $num ++; $prev = $child;
  549. }
  550. }
  551. return ($pmn == '-') ? $prev : (($pmn == '+') ? $next : $num);
  552. }
  553. function _uncache() // restore all data from numbers+names+textstring
  554. {
  555. $n = -1; $numptr = -1; $txtptr = 0; $count = count($this->numbers);
  556. $A0 = array(); // believe it or not, this makes the loops quicker!
  557. while (++$numptr < $count)
  558. {
  559. $n++;
  560. if (($countdown = $this->numbers[$numptr]) == 0)
  561. {
  562. $this->children[$n] = $A0;
  563. }
  564. else while (--$countdown >= 0)
  565. {
  566. if (($chc = $this->numbers[++$numptr]) == 0)
  567. {
  568. $this->children[$n][] = api_substr($this->textstring,
  569. $txtptr, ($len = $this->numbers[++$numptr]));
  570. $txtptr += $len;
  571. }
  572. else
  573. {
  574. $this->children[$n][] = $n + $chc;
  575. }
  576. }
  577. if (($countdown = $this->numbers[++$numptr]) == 0)
  578. {
  579. $this->attributes[$n] = $this->atns[$n] = $A0;
  580. }
  581. else while (--$countdown >= 0)
  582. {
  583. $name = $this->names[$this->numbers[++$numptr]];
  584. $this->atns[$n][$name] = $this->numbers[++$numptr];
  585. $this->attributes[$n][$name] = api_substr($this->textstring,
  586. $txtptr, ($len = $this->numbers[++$numptr]));
  587. $txtptr += $len;
  588. }
  589. $this->name[$n] = $this->names[$this->numbers[++$numptr]];
  590. $this->ns[$n] = $this->numbers[++$numptr];
  591. $this->parent[$n] = $n - $this->numbers[++$numptr];
  592. }
  593. $this->_last = $n;
  594. }
  595. function _startElement($parser, $name, $attribs)
  596. {
  597. $level = count($this->_nesting);
  598. $parent = ($level == 0) ? -1 : $this->_nesting[$level-1];
  599. $child = $this->xmd_add_element($name, $parent,
  600. array_merge($attribs, $this->_ns));
  601. $this->_nesting[] = $child; $this->_ns = array();
  602. $this->_concat = FALSE; // see _cData
  603. }
  604. function _endElement($parser, $name)
  605. {
  606. array_pop($this->_nesting); $this->_concat = FALSE;
  607. }
  608. function _cData($parser, $data)
  609. {
  610. if (!ltrim($data)) return; // empty line, or whitespace preceding <tag>
  611. $level = count($this->_nesting);
  612. $parent = ($level == 0) ? -1 : $this->_nesting[$level-1];
  613. if ($parent >= 0)
  614. {
  615. $nc = count($this->children[$parent]);
  616. $pcs = ($nc > 0 && is_string($this->children[$parent][$nc - 1]));
  617. if ($pcs && api_strlen($data) == 1) $this->_concat = TRUE;
  618. // expat parser puts &xx; in a separate cData, try to re-assemble
  619. if ($pcs && $data{0} > '~') $this->_concat = TRUE;
  620. // PHP5 expat breaks before 8-bit characters
  621. if ($this->_concat)
  622. {
  623. $this->children[$parent][$nc - 1] .= $data;
  624. $this->_concat = (api_strlen($data) == 1);
  625. }
  626. else
  627. $this->children[$parent][] = $pcs ? "\n" . $data : $data;
  628. }
  629. }
  630. function _startNs($parser, $pfx, $uri) // called before _startElement
  631. {
  632. $this->_ns['xmlns' . ($pfx ? ':'.$pfx : '')] = $uri;
  633. $this->_nsp[$pfx] = $uri;
  634. }
  635. function _nsPfx($ppar, $uri) // find namespace prefix
  636. {
  637. while ($ppar >= 0)
  638. {
  639. foreach ($this->attributes[$ppar] as $name => $value)
  640. if (api_substr($name, 0, 5) == 'xmlns' && $value == $uri)
  641. {
  642. $pfxc = api_substr($name, 6) . api_substr($name, 5, 1); break 2;
  643. }
  644. $ppar = $this->parent[$ppar];
  645. }
  646. if ($ppar >= 0) return $pfxc; if ($uri == '') return '';
  647. if ($uri == 'http://www.w3.org/XML/1998/namespace') return 'xml:';
  648. foreach($this->_nsp as $pfx => $used) if (!$used) break;
  649. $this->_nsp[$pfx] = $uri; $xmlnspfx = 'xmlns:' . $pfx;
  650. $this->attributes[0][$xmlnspfx] = $uri; $this->atns[0][$xmlnspfx] = 0;
  651. return $pfx . ':';
  652. }
  653. function _lookup($name) // for namespaces + see cache
  654. {
  655. $where = array_search($name, $this->names);
  656. if ($where === FALSE || $where === NULL)
  657. {
  658. $where = count($this->names); $this->names[] = $name;
  659. }
  660. return $where;
  661. }
  662. }
  663. /*
  664. <!--
  665. This program is free software; you can redistribute it and/or
  666. modify it under the terms of the GNU General Public License
  667. as published by the Free Software Foundation; either version 2
  668. of the License, or (at your option) any later version.
  669. This program is distributed in the hope that it will be useful,
  670. but WITHOUT ANY WARRANTY; without even the implied warranty of
  672. GNU General Public License for more details.
  673. -->
  674. */
  675. ?>