Diff.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
  1. <?php
  2. /**
  3. * General API for generating and formatting diffs - the differences between
  4. * two sequences of strings.
  5. *
  6. * The original PHP version of this code was written by Geoffrey T. Dairiki
  7. * <dairiki@dairiki.org>, and is used/adapted with his permission.
  8. *
  9. * $Horde: framework/Text_Diff/Diff.php,v 1.11.2.12 2009/01/06 15:23:41 jan Exp $
  10. *
  11. * Copyright 2004 Geoffrey T. Dairiki <dairiki@dairiki.org>
  12. * Copyright 2004-2009 The Horde Project (http://www.horde.org/)
  13. *
  14. * See the enclosed file COPYING for license information (LGPL). If you did
  15. * not receive this file, see http://opensource.org/licenses/lgpl-license.php.
  16. *
  17. * @package Text_Diff
  18. * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
  19. */
  20. class Text_Diff
  21. {
  22. /**
  23. * Array of changes.
  24. *
  25. * @var array
  26. */
  27. var $_edits;
  28. /**
  29. * Computes diffs between sequences of strings.
  30. *
  31. * @param string $engine Name of the diffing engine to use. 'auto'
  32. * will automatically select the best.
  33. * @param array $params Parameters to pass to the diffing engine.
  34. * Normally an array of two arrays, each
  35. * containing the lines from a file.
  36. */
  37. function Text_Diff($engine, $params)
  38. {
  39. // Backward compatibility workaround.
  40. if (!is_string($engine)) {
  41. $params = array($engine, $params);
  42. $engine = 'auto';
  43. }
  44. if ($engine == 'auto') {
  45. $engine = extension_loaded('xdiff') ? 'xdiff' : 'native';
  46. } else {
  47. $engine = basename($engine);
  48. }
  49. require_once 'Text/Diff/Engine/' . $engine . '.php';
  50. $class = 'Text_Diff_Engine_' . $engine;
  51. $diff_engine = new $class();
  52. $this->_edits = call_user_func_array(array($diff_engine, 'diff'), $params);
  53. }
  54. /**
  55. * Returns the array of differences.
  56. */
  57. function getDiff()
  58. {
  59. return $this->_edits;
  60. }
  61. /**
  62. * returns the number of new (added) lines in a given diff.
  63. *
  64. * @since Text_Diff 1.1.0
  65. * @since Horde 3.2
  66. *
  67. * @return integer The number of new lines
  68. */
  69. function countAddedLines()
  70. {
  71. $count = 0;
  72. foreach ($this->_edits as $edit) {
  73. if (is_a($edit, 'Text_Diff_Op_add') ||
  74. is_a($edit, 'Text_Diff_Op_change')) {
  75. $count += $edit->nfinal();
  76. }
  77. }
  78. return $count;
  79. }
  80. /**
  81. * Returns the number of deleted (removed) lines in a given diff.
  82. *
  83. * @since Text_Diff 1.1.0
  84. * @since Horde 3.2
  85. *
  86. * @return integer The number of deleted lines
  87. */
  88. function countDeletedLines()
  89. {
  90. $count = 0;
  91. foreach ($this->_edits as $edit) {
  92. if (is_a($edit, 'Text_Diff_Op_delete') ||
  93. is_a($edit, 'Text_Diff_Op_change')) {
  94. $count += $edit->norig();
  95. }
  96. }
  97. return $count;
  98. }
  99. /**
  100. * Computes a reversed diff.
  101. *
  102. * Example:
  103. * <code>
  104. * $diff = new Text_Diff($lines1, $lines2);
  105. * $rev = $diff->reverse();
  106. * </code>
  107. *
  108. * @return Text_Diff A Diff object representing the inverse of the
  109. * original diff. Note that we purposely don't return a
  110. * reference here, since this essentially is a clone()
  111. * method.
  112. */
  113. function reverse()
  114. {
  115. if (version_compare(zend_version(), '2', '>')) {
  116. $rev = clone($this);
  117. } else {
  118. $rev = $this;
  119. }
  120. $rev->_edits = array();
  121. foreach ($this->_edits as $edit) {
  122. $rev->_edits[] = $edit->reverse();
  123. }
  124. return $rev;
  125. }
  126. /**
  127. * Checks for an empty diff.
  128. *
  129. * @return boolean True if two sequences were identical.
  130. */
  131. function isEmpty()
  132. {
  133. foreach ($this->_edits as $edit) {
  134. if (!is_a($edit, 'Text_Diff_Op_copy')) {
  135. return false;
  136. }
  137. }
  138. return true;
  139. }
  140. /**
  141. * Computes the length of the Longest Common Subsequence (LCS).
  142. *
  143. * This is mostly for diagnostic purposes.
  144. *
  145. * @return integer The length of the LCS.
  146. */
  147. function lcs()
  148. {
  149. $lcs = 0;
  150. foreach ($this->_edits as $edit) {
  151. if (is_a($edit, 'Text_Diff_Op_copy')) {
  152. $lcs += count($edit->orig);
  153. }
  154. }
  155. return $lcs;
  156. }
  157. /**
  158. * Gets the original set of lines.
  159. *
  160. * This reconstructs the $from_lines parameter passed to the constructor.
  161. *
  162. * @return array The original sequence of strings.
  163. */
  164. function getOriginal()
  165. {
  166. $lines = array();
  167. foreach ($this->_edits as $edit) {
  168. if ($edit->orig) {
  169. array_splice($lines, count($lines), 0, $edit->orig);
  170. }
  171. }
  172. return $lines;
  173. }
  174. /**
  175. * Gets the final set of lines.
  176. *
  177. * This reconstructs the $to_lines parameter passed to the constructor.
  178. *
  179. * @return array The sequence of strings.
  180. */
  181. function getFinal()
  182. {
  183. $lines = array();
  184. foreach ($this->_edits as $edit) {
  185. if ($edit->final) {
  186. array_splice($lines, count($lines), 0, $edit->final);
  187. }
  188. }
  189. return $lines;
  190. }
  191. /**
  192. * Removes trailing newlines from a line of text. This is meant to be used
  193. * with array_walk().
  194. *
  195. * @param string $line The line to trim.
  196. * @param integer $key The index of the line in the array. Not used.
  197. */
  198. function trimNewlines(&$line, $key)
  199. {
  200. $line = str_replace(array("\n", "\r"), '', $line);
  201. }
  202. /**
  203. * Determines the location of the system temporary directory.
  204. *
  205. * @static
  206. *
  207. * @access protected
  208. *
  209. * @return string A directory name which can be used for temp files.
  210. * Returns false if one could not be found.
  211. */
  212. function _getTempDir()
  213. {
  214. $tmp_locations = array('/tmp', '/var/tmp', 'c:\WUTemp', 'c:\temp',
  215. 'c:\windows\temp', 'c:\winnt\temp');
  216. /* Try PHP's upload_tmp_dir directive. */
  217. $tmp = ini_get('upload_tmp_dir');
  218. /* Otherwise, try to determine the TMPDIR environment variable. */
  219. if (!strlen($tmp)) {
  220. $tmp = getenv('TMPDIR');
  221. }
  222. /* If we still cannot determine a value, then cycle through a list of
  223. * preset possibilities. */
  224. while (!strlen($tmp) && count($tmp_locations)) {
  225. $tmp_check = array_shift($tmp_locations);
  226. if (@is_dir($tmp_check)) {
  227. $tmp = $tmp_check;
  228. }
  229. }
  230. /* If it is still empty, we have failed, so return false; otherwise
  231. * return the directory determined. */
  232. return strlen($tmp) ? $tmp : false;
  233. }
  234. /**
  235. * Checks a diff for validity.
  236. *
  237. * This is here only for debugging purposes.
  238. */
  239. function _check($from_lines, $to_lines)
  240. {
  241. if (serialize($from_lines) != serialize($this->getOriginal())) {
  242. trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
  243. }
  244. if (serialize($to_lines) != serialize($this->getFinal())) {
  245. trigger_error("Reconstructed final doesn't match", E_USER_ERROR);
  246. }
  247. $rev = $this->reverse();
  248. if (serialize($to_lines) != serialize($rev->getOriginal())) {
  249. trigger_error("Reversed original doesn't match", E_USER_ERROR);
  250. }
  251. if (serialize($from_lines) != serialize($rev->getFinal())) {
  252. trigger_error("Reversed final doesn't match", E_USER_ERROR);
  253. }
  254. $prevtype = null;
  255. foreach ($this->_edits as $edit) {
  256. if ($prevtype == get_class($edit)) {
  257. trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
  258. }
  259. $prevtype = get_class($edit);
  260. }
  261. return true;
  262. }
  263. }
  264. /**
  265. * @package Text_Diff
  266. * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
  267. */
  268. class Text_MappedDiff extends Text_Diff {
  269. /**
  270. * Computes a diff between sequences of strings.
  271. *
  272. * This can be used to compute things like case-insensitve diffs, or diffs
  273. * which ignore changes in white-space.
  274. *
  275. * @param array $from_lines An array of strings.
  276. * @param array $to_lines An array of strings.
  277. * @param array $mapped_from_lines This array should have the same size
  278. * number of elements as $from_lines. The
  279. * elements in $mapped_from_lines and
  280. * $mapped_to_lines are what is actually
  281. * compared when computing the diff.
  282. * @param array $mapped_to_lines This array should have the same number
  283. * of elements as $to_lines.
  284. */
  285. function Text_MappedDiff($from_lines, $to_lines,
  286. $mapped_from_lines, $mapped_to_lines)
  287. {
  288. assert(count($from_lines) == count($mapped_from_lines));
  289. assert(count($to_lines) == count($mapped_to_lines));
  290. parent::Text_Diff($mapped_from_lines, $mapped_to_lines);
  291. $xi = $yi = 0;
  292. for ($i = 0; $i < count($this->_edits); $i++) {
  293. $orig = &$this->_edits[$i]->orig;
  294. if (is_array($orig)) {
  295. $orig = array_slice($from_lines, $xi, count($orig));
  296. $xi += count($orig);
  297. }
  298. $final = &$this->_edits[$i]->final;
  299. if (is_array($final)) {
  300. $final = array_slice($to_lines, $yi, count($final));
  301. $yi += count($final);
  302. }
  303. }
  304. }
  305. }
  306. /**
  307. * @package Text_Diff
  308. * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
  309. *
  310. * @access private
  311. */
  312. class Text_Diff_Op {
  313. var $orig;
  314. var $final;
  315. function &reverse()
  316. {
  317. trigger_error('Abstract method', E_USER_ERROR);
  318. }
  319. function norig()
  320. {
  321. return $this->orig ? count($this->orig) : 0;
  322. }
  323. function nfinal()
  324. {
  325. return $this->final ? count($this->final) : 0;
  326. }
  327. }
  328. /**
  329. * @package Text_Diff
  330. * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
  331. *
  332. * @access private
  333. */
  334. class Text_Diff_Op_copy extends Text_Diff_Op {
  335. function Text_Diff_Op_copy($orig, $final = false)
  336. {
  337. if (!is_array($final)) {
  338. $final = $orig;
  339. }
  340. $this->orig = $orig;
  341. $this->final = $final;
  342. }
  343. function &reverse()
  344. {
  345. $reverse = new Text_Diff_Op_copy($this->final, $this->orig);
  346. return $reverse;
  347. }
  348. }
  349. /**
  350. * @package Text_Diff
  351. * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
  352. *
  353. * @access private
  354. */
  355. class Text_Diff_Op_delete extends Text_Diff_Op
  356. {
  357. function Text_Diff_Op_delete($lines)
  358. {
  359. $this->orig = $lines;
  360. $this->final = false;
  361. }
  362. function &reverse()
  363. {
  364. $reverse = new Text_Diff_Op_add($this->orig);
  365. return $reverse;
  366. }
  367. }
  368. /**
  369. * @package Text_Diff
  370. * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
  371. *
  372. * @access private
  373. */
  374. class Text_Diff_Op_add extends Text_Diff_Op {
  375. function Text_Diff_Op_add($lines)
  376. {
  377. $this->final = $lines;
  378. $this->orig = false;
  379. }
  380. function &reverse()
  381. {
  382. $reverse = new Text_Diff_Op_delete($this->final);
  383. return $reverse;
  384. }
  385. }
  386. /**
  387. * @package Text_Diff
  388. * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
  389. *
  390. * @access private
  391. */
  392. class Text_Diff_Op_change extends Text_Diff_Op
  393. {
  394. function Text_Diff_Op_change($orig, $final)
  395. {
  396. $this->orig = $orig;
  397. $this->final = $final;
  398. }
  399. function &reverse()
  400. {
  401. $reverse = new Text_Diff_Op_change($this->final, $this->orig);
  402. return $reverse;
  403. }
  404. }