MimeDir.php 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616
  1. <?php
  2. namespace Sabre\VObject\Parser;
  3. use
  4. Sabre\VObject\ParseException,
  5. Sabre\VObject\EofException,
  6. Sabre\VObject\Component,
  7. Sabre\VObject\Property,
  8. Sabre\VObject\Component\VCalendar,
  9. Sabre\VObject\Component\VCard;
  10. /**
  11. * MimeDir parser.
  12. *
  13. * This class parses iCalendar/vCard files and returns an array.
  14. *
  15. * The array is identical to the format jCard/jCal use.
  16. *
  17. * @copyright Copyright (C) 2007-2014 fruux GmbH. All rights reserved.
  18. * @author Evert Pot (http://evertpot.com/)
  19. * @license http://sabre.io/license/ Modified BSD License
  20. */
  21. class MimeDir extends Parser {
  22. /**
  23. * The input stream.
  24. *
  25. * @var resource
  26. */
  27. protected $input;
  28. /**
  29. * Root component
  30. *
  31. * @var Component
  32. */
  33. protected $root;
  34. /**
  35. * Parses an iCalendar or vCard file
  36. *
  37. * Pass a stream or a string. If null is parsed, the existing buffer is
  38. * used.
  39. *
  40. * @param string|resource|null $input
  41. * @param int|null $options
  42. * @return array
  43. */
  44. public function parse($input = null, $options = null) {
  45. $this->root = null;
  46. if (!is_null($input)) {
  47. $this->setInput($input);
  48. }
  49. if (!is_null($options)) $this->options = $options;
  50. $this->parseDocument();
  51. return $this->root;
  52. }
  53. /**
  54. * Sets the input buffer. Must be a string or stream.
  55. *
  56. * @param resource|string $input
  57. * @return void
  58. */
  59. public function setInput($input) {
  60. // Resetting the parser
  61. $this->lineIndex = 0;
  62. $this->startLine = 0;
  63. if (is_string($input)) {
  64. // Convering to a stream.
  65. $stream = fopen('php://temp', 'r+');
  66. fwrite($stream, $input);
  67. rewind($stream);
  68. $this->input = $stream;
  69. } elseif (is_resource($input)) {
  70. $this->input = $input;
  71. } else {
  72. throw new \InvalidArgumentException('This parser can only read from strings or streams.');
  73. }
  74. }
  75. /**
  76. * Parses an entire document.
  77. *
  78. * @return void
  79. */
  80. protected function parseDocument() {
  81. $line = $this->readLine();
  82. switch(strtoupper($line)) {
  83. case 'BEGIN:VCALENDAR' :
  84. $class = isset(VCalendar::$componentMap['VCALENDAR'])
  85. ? VCalendar::$componentMap[$name]
  86. : 'Sabre\\VObject\\Component\\VCalendar';
  87. break;
  88. case 'BEGIN:VCARD' :
  89. $class = isset(VCard::$componentMap['VCARD'])
  90. ? VCard::$componentMap['VCARD']
  91. : 'Sabre\\VObject\\Component\\VCard';
  92. break;
  93. default :
  94. throw new ParseException('This parser only supports VCARD and VCALENDAR files');
  95. }
  96. $this->root = new $class(array(), false);
  97. while(true) {
  98. // Reading until we hit END:
  99. $line = $this->readLine();
  100. if (strtoupper(substr($line,0,4)) === 'END:') {
  101. break;
  102. }
  103. $result = $this->parseLine($line);
  104. if ($result) {
  105. $this->root->add($result);
  106. }
  107. }
  108. $name = strtoupper(substr($line, 4));
  109. if ($name!==$this->root->name) {
  110. throw new ParseException('Invalid MimeDir file. expected: "END:' . $this->root->name . '" got: "END:' . $name . '"');
  111. }
  112. }
  113. /**
  114. * Parses a line, and if it hits a component, it will also attempt to parse
  115. * the entire component
  116. *
  117. * @param string $line Unfolded line
  118. * @return Node
  119. */
  120. protected function parseLine($line) {
  121. // Start of a new component
  122. if (strtoupper(substr($line, 0, 6)) === 'BEGIN:') {
  123. $component = $this->root->createComponent(substr($line,6), array(), false);
  124. while(true) {
  125. // Reading until we hit END:
  126. $line = $this->readLine();
  127. if (strtoupper(substr($line,0,4)) === 'END:') {
  128. break;
  129. }
  130. $result = $this->parseLine($line);
  131. if ($result) {
  132. $component->add($result);
  133. }
  134. }
  135. $name = strtoupper(substr($line, 4));
  136. if ($name!==$component->name) {
  137. throw new ParseException('Invalid MimeDir file. expected: "END:' . $component->name . '" got: "END:' . $name . '"');
  138. }
  139. return $component;
  140. } else {
  141. // Property reader
  142. $property = $this->readProperty($line);
  143. if (!$property) {
  144. // Ignored line
  145. return false;
  146. }
  147. return $property;
  148. }
  149. }
  150. /**
  151. * We need to look ahead 1 line every time to see if we need to 'unfold'
  152. * the next line.
  153. *
  154. * If that was not the case, we store it here.
  155. *
  156. * @var null|string
  157. */
  158. protected $lineBuffer;
  159. /**
  160. * The real current line number.
  161. */
  162. protected $lineIndex = 0;
  163. /**
  164. * In the case of unfolded lines, this property holds the line number for
  165. * the start of the line.
  166. *
  167. * @var int
  168. */
  169. protected $startLine = 0;
  170. /**
  171. * Contains a 'raw' representation of the current line.
  172. *
  173. * @var string
  174. */
  175. protected $rawLine;
  176. /**
  177. * Reads a single line from the buffer.
  178. *
  179. * This method strips any newlines and also takes care of unfolding.
  180. *
  181. * @throws \Sabre\VObject\EofException
  182. * @return string
  183. */
  184. protected function readLine() {
  185. if (!is_null($this->lineBuffer)) {
  186. $rawLine = $this->lineBuffer;
  187. $this->lineBuffer = null;
  188. } else {
  189. do {
  190. $eof = feof($this->input);
  191. $rawLine = fgets($this->input);
  192. if ($eof || (feof($this->input) && $rawLine===false)) {
  193. throw new EofException('End of document reached prematurely');
  194. }
  195. if ($rawLine === false) {
  196. throw new ParseException('Error reading from input stream');
  197. }
  198. $rawLine = rtrim($rawLine, "\r\n");
  199. } while ($rawLine === ''); // Skipping empty lines
  200. $this->lineIndex++;
  201. }
  202. $line = $rawLine;
  203. $this->startLine = $this->lineIndex;
  204. // Looking ahead for folded lines.
  205. while (true) {
  206. $nextLine = rtrim(fgets($this->input), "\r\n");
  207. $this->lineIndex++;
  208. if (!$nextLine) {
  209. break;
  210. }
  211. if ($nextLine[0] === "\t" || $nextLine[0] === " ") {
  212. $line .= substr($nextLine, 1);
  213. $rawLine .= "\n " . substr($nextLine, 1);
  214. } else {
  215. $this->lineBuffer = $nextLine;
  216. break;
  217. }
  218. }
  219. $this->rawLine = $rawLine;
  220. return $line;
  221. }
  222. /**
  223. * Reads a property or component from a line.
  224. *
  225. * @return void
  226. */
  227. protected function readProperty($line) {
  228. if ($this->options & self::OPTION_FORGIVING) {
  229. $propNameToken = 'A-Z0-9\-\._\\/';
  230. } else {
  231. $propNameToken = 'A-Z0-9\-\.';
  232. }
  233. $paramNameToken = 'A-Z0-9\-';
  234. $safeChar = '^";:,';
  235. $qSafeChar = '^"';
  236. $regex = "/
  237. ^(?P<name> [$propNameToken]+ ) (?=[;:]) # property name
  238. |
  239. (?<=:)(?P<propValue> .+)$ # property value
  240. |
  241. ;(?P<paramName> [$paramNameToken]+) (?=[=;:]) # parameter name
  242. |
  243. (=|,)(?P<paramValue> # parameter value
  244. (?: [$safeChar]*) |
  245. \"(?: [$qSafeChar]+)\"
  246. ) (?=[;:,])
  247. /xi";
  248. //echo $regex, "\n"; die();
  249. preg_match_all($regex, $line, $matches, PREG_SET_ORDER);
  250. $property = array(
  251. 'name' => null,
  252. 'parameters' => array(),
  253. 'value' => null
  254. );
  255. $lastParam = null;
  256. /**
  257. * Looping through all the tokens.
  258. *
  259. * Note that we are looping through them in reverse order, because if a
  260. * sub-pattern matched, the subsequent named patterns will not show up
  261. * in the result.
  262. */
  263. foreach($matches as $match) {
  264. if (isset($match['paramValue'])) {
  265. if ($match['paramValue'] && $match['paramValue'][0] === '"') {
  266. $value = substr($match['paramValue'], 1, -1);
  267. } else {
  268. $value = $match['paramValue'];
  269. }
  270. $value = $this->unescapeParam($value);
  271. if (is_null($property['parameters'][$lastParam])) {
  272. $property['parameters'][$lastParam] = $value;
  273. } elseif (is_array($property['parameters'][$lastParam])) {
  274. $property['parameters'][$lastParam][] = $value;
  275. } else {
  276. $property['parameters'][$lastParam] = array(
  277. $property['parameters'][$lastParam],
  278. $value
  279. );
  280. }
  281. continue;
  282. }
  283. if (isset($match['paramName'])) {
  284. $lastParam = strtoupper($match['paramName']);
  285. if (!isset($property['parameters'][$lastParam])) {
  286. $property['parameters'][$lastParam] = null;
  287. }
  288. continue;
  289. }
  290. if (isset($match['propValue'])) {
  291. $property['value'] = $match['propValue'];
  292. continue;
  293. }
  294. if (isset($match['name']) && $match['name']) {
  295. $property['name'] = strtoupper($match['name']);
  296. continue;
  297. }
  298. // @codeCoverageIgnoreStart
  299. throw new \LogicException('This code should not be reachable');
  300. // @codeCoverageIgnoreEnd
  301. }
  302. if (is_null($property['value'])) {
  303. $property['value'] = '';
  304. }
  305. if (!$property['name']) {
  306. if ($this->options & self::OPTION_IGNORE_INVALID_LINES) {
  307. return false;
  308. }
  309. throw new ParseException('Invalid Mimedir file. Line starting at ' . $this->startLine . ' did not follow iCalendar/vCard conventions');
  310. }
  311. // vCard 2.1 states that parameters may appear without a name, and only
  312. // a value. We can deduce the value based on it's name.
  313. //
  314. // Our parser will get those as parameters without a value instead, so
  315. // we're filtering these parameters out first.
  316. $namedParameters = array();
  317. $namelessParameters = array();
  318. foreach($property['parameters'] as $name=>$value) {
  319. if (!is_null($value)) {
  320. $namedParameters[$name] = $value;
  321. } else {
  322. $namelessParameters[] = $name;
  323. }
  324. }
  325. $propObj = $this->root->createProperty($property['name'], null, $namedParameters);
  326. foreach($namelessParameters as $namelessParameter) {
  327. $propObj->add(null, $namelessParameter);
  328. }
  329. if (strtoupper($propObj['ENCODING']) === 'QUOTED-PRINTABLE') {
  330. $propObj->setQuotedPrintableValue($this->extractQuotedPrintableValue());
  331. } else {
  332. $propObj->setRawMimeDirValue($property['value']);
  333. }
  334. return $propObj;
  335. }
  336. /**
  337. * Unescapes a property value.
  338. *
  339. * vCard 2.1 says:
  340. * * Semi-colons must be escaped in some property values, specifically
  341. * ADR, ORG and N.
  342. * * Semi-colons must be escaped in parameter values, because semi-colons
  343. * are also use to separate values.
  344. * * No mention of escaping backslashes with another backslash.
  345. * * newlines are not escaped either, instead QUOTED-PRINTABLE is used to
  346. * span values over more than 1 line.
  347. *
  348. * vCard 3.0 says:
  349. * * (rfc2425) Backslashes, newlines (\n or \N) and comma's must be
  350. * escaped, all time time.
  351. * * Comma's are used for delimeters in multiple values
  352. * * (rfc2426) Adds to to this that the semi-colon MUST also be escaped,
  353. * as in some properties semi-colon is used for separators.
  354. * * Properties using semi-colons: N, ADR, GEO, ORG
  355. * * Both ADR and N's individual parts may be broken up further with a
  356. * comma.
  357. * * Properties using commas: NICKNAME, CATEGORIES
  358. *
  359. * vCard 4.0 (rfc6350) says:
  360. * * Commas must be escaped.
  361. * * Semi-colons may be escaped, an unescaped semi-colon _may_ be a
  362. * delimiter, depending on the property.
  363. * * Backslashes must be escaped
  364. * * Newlines must be escaped as either \N or \n.
  365. * * Some compound properties may contain multiple parts themselves, so a
  366. * comma within a semi-colon delimited property may also be unescaped
  367. * to denote multiple parts _within_ the compound property.
  368. * * Text-properties using semi-colons: N, ADR, ORG, CLIENTPIDMAP.
  369. * * Text-properties using commas: NICKNAME, RELATED, CATEGORIES, PID.
  370. *
  371. * Even though the spec says that commas must always be escaped, the
  372. * example for GEO in Section 6.5.2 seems to violate this.
  373. *
  374. * iCalendar 2.0 (rfc5545) says:
  375. * * Commas or semi-colons may be used as delimiters, depending on the
  376. * property.
  377. * * Commas, semi-colons, backslashes, newline (\N or \n) are always
  378. * escaped, unless they are delimiters.
  379. * * Colons shall not be escaped.
  380. * * Commas can be considered the 'default delimiter' and is described as
  381. * the delimiter in cases where the order of the multiple values is
  382. * insignificant.
  383. * * Semi-colons are described as the delimiter for 'structured values'.
  384. * They are specifically used in Semi-colons are used as a delimiter in
  385. * REQUEST-STATUS, RRULE, GEO and EXRULE. EXRULE is deprecated however.
  386. *
  387. * Now for the parameters
  388. *
  389. * If delimiter is not set (null) this method will just return a string.
  390. * If it's a comma or a semi-colon the string will be split on those
  391. * characters, and always return an array.
  392. *
  393. * @param string $input
  394. * @param string $delimiter
  395. * @return string|string[]
  396. */
  397. static public function unescapeValue($input, $delimiter = ';') {
  398. $regex = '# (?: (\\\\ (?: \\\\ | N | n | ; | , ) )';
  399. if ($delimiter) {
  400. $regex .= ' | (' . $delimiter . ')';
  401. }
  402. $regex .= ') #x';
  403. $matches = preg_split($regex, $input, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
  404. $resultArray = array();
  405. $result = '';
  406. foreach($matches as $match) {
  407. switch ($match) {
  408. case '\\\\' :
  409. $result .='\\';
  410. break;
  411. case '\N' :
  412. case '\n' :
  413. $result .="\n";
  414. break;
  415. case '\;' :
  416. $result .=';';
  417. break;
  418. case '\,' :
  419. $result .=',';
  420. break;
  421. case $delimiter :
  422. $resultArray[] = $result;
  423. $result = '';
  424. break;
  425. default :
  426. $result .= $match;
  427. break;
  428. }
  429. }
  430. $resultArray[] = $result;
  431. return $delimiter ? $resultArray : $result;
  432. }
  433. /**
  434. * Unescapes a parameter value.
  435. *
  436. * vCard 2.1:
  437. * * Does not mention a mechanism for this. In addition, double quotes
  438. * are never used to wrap values.
  439. * * This means that parameters can simply not contain colons or
  440. * semi-colons.
  441. *
  442. * vCard 3.0 (rfc2425, rfc2426):
  443. * * Parameters _may_ be surrounded by double quotes.
  444. * * If this is not the case, semi-colon, colon and comma may simply not
  445. * occur (the comma used for multiple parameter values though).
  446. * * If it is surrounded by double-quotes, it may simply not contain
  447. * double-quotes.
  448. * * This means that a parameter can in no case encode double-quotes, or
  449. * newlines.
  450. *
  451. * vCard 4.0 (rfc6350)
  452. * * Behavior seems to be identical to vCard 3.0
  453. *
  454. * iCalendar 2.0 (rfc5545)
  455. * * Behavior seems to be identical to vCard 3.0
  456. *
  457. * Parameter escaping mechanism (rfc6868) :
  458. * * This rfc describes a new way to escape parameter values.
  459. * * New-line is encoded as ^n
  460. * * ^ is encoded as ^^.
  461. * * " is encoded as ^'
  462. *
  463. * @param string $input
  464. * @return void
  465. */
  466. private function unescapeParam($input) {
  467. return
  468. preg_replace_callback(
  469. '#(\^(\^|n|\'))#',
  470. function($matches) {
  471. switch($matches[2]) {
  472. case 'n' :
  473. return "\n";
  474. case '^' :
  475. return '^';
  476. case '\'' :
  477. return '"';
  478. // @codeCoverageIgnoreStart
  479. }
  480. // @codeCoverageIgnoreEnd
  481. },
  482. $input
  483. );
  484. }
  485. /**
  486. * Gets the full quoted printable value.
  487. *
  488. * We need a special method for this, because newlines have both a meaning
  489. * in vCards, and in QuotedPrintable.
  490. *
  491. * This method does not do any decoding.
  492. *
  493. * @return string
  494. */
  495. private function extractQuotedPrintableValue() {
  496. // We need to parse the raw line again to get the start of the value.
  497. //
  498. // We are basically looking for the first colon (:), but we need to
  499. // skip over the parameters first, as they may contain one.
  500. $regex = '/^
  501. (?: [^:])+ # Anything but a colon
  502. (?: "[^"]")* # A parameter in double quotes
  503. : # start of the value we really care about
  504. (.*)$
  505. /xs';
  506. preg_match($regex, $this->rawLine, $matches);
  507. $value = $matches[1];
  508. // Removing the first whitespace character from every line. Kind of
  509. // like unfolding, but we keep the newline.
  510. $value = str_replace("\n ", "\n", $value);
  511. // Microsoft products don't always correctly fold lines, they may be
  512. // missing a whitespace. So if 'forgiving' is turned on, we will take
  513. // those as well.
  514. if ($this->options & self::OPTION_FORGIVING) {
  515. while(substr($value,-1) === '=') {
  516. // Reading the line
  517. $this->readLine();
  518. // Grabbing the raw form
  519. $value.="\n" . $this->rawLine;
  520. }
  521. }
  522. return $value;
  523. }
  524. }