Snoopy.class.inc 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896
  1. <?php
  2. /*************************************************
  3. Snoopy - the PHP net client
  4. Author: Monte Ohrt <monte@ispi.net>
  5. Copyright (c): 1999-2000 ispi, all rights reserved
  6. Version: 1.0
  7. * This library is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * This library is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with this library; if not, write to the Free Software
  19. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  20. You may contact the author of Snoopy by e-mail at:
  21. monte@ispi.net
  22. Or, write to:
  23. Monte Ohrt
  24. CTO, ispi
  25. 237 S. 70th suite 220
  26. Lincoln, NE 68510
  27. The latest version of Snoopy can be obtained from:
  28. http://snoopy.sourceforge.com
  29. *************************************************/
  30. class Snoopy {
  31. /**** Public variables ****/
  32. /* user definable vars */
  33. public $host = "www.php.net"; // host name we are connecting to
  34. public $port = 80; // port we are connecting to
  35. public $proxy_host = ""; // proxy host to use
  36. public $proxy_port = ""; // proxy port to use
  37. public $agent = "Snoopy v1.0"; // agent we masquerade as
  38. public $referer = ""; // referer info to pass
  39. public $cookies = array(); // array of cookies to pass
  40. // $cookies["username"]="joe";
  41. public $rawheaders = array(); // array of raw headers to send
  42. // $rawheaders["Content-type"]="text/html";
  43. public $maxredirs = 5; // http redirection depth maximum. 0 = disallow
  44. public $lastredirectaddr = ""; // contains address of last redirected address
  45. public $offsiteok = true; // allows redirection off-site
  46. public $maxframes = 0; // frame content depth maximum. 0 = disallow
  47. public $expandlinks = true; // expand links to fully qualified URLs.
  48. // this only applies to fetchlinks()
  49. // or submitlinks()
  50. public $passcookies = true; // pass set cookies back through redirects
  51. // NOTE: this currently does not respect
  52. // dates, domains or paths.
  53. public $user = ""; // user for http authentication
  54. public $pass = ""; // password for http authentication
  55. // http accept types
  56. public $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
  57. public $results = ""; // where the content is put
  58. public $error = ""; // error messages sent here
  59. public $response_code = ""; // response code returned from server
  60. public $headers = array(); // headers returned from server sent here
  61. public $maxlength = 500000; // max return data length (body)
  62. public $read_timeout = 0; // timeout on read operations, in seconds
  63. // supported only since PHP 4 Beta 4
  64. // set to 0 to disallow timeouts
  65. public $timed_out = false; // if a read operation timed out
  66. public $status = 0; // http request status
  67. public $curl_path = "/usr/bin/curl";
  68. // Snoopy will use cURL for fetching
  69. // SSL content if a full system path to
  70. // the cURL binary is supplied here.
  71. // set to false if you do not have
  72. // cURL installed. See http://curl.haxx.se
  73. // for details on installing cURL.
  74. // Snoopy does *not* use the cURL
  75. // library functions built into php,
  76. // as these functions are not stable
  77. // as of this Snoopy release.
  78. // send Accept-encoding: gzip?
  79. public $use_gzip = true;
  80. /**** Private variables ****/
  81. private $_maxlinelen = 4096; // max line length (headers)
  82. private $_httpmethod = "GET"; // default http request method
  83. private $_httpversion = "HTTP/1.0"; // default http request version
  84. private $_submit_method = "POST"; // default submit method
  85. private $_submit_type = "application/x-www-form-urlencoded"; // default submit type
  86. private $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
  87. private $_redirectaddr = false; // will be set if page fetched is a redirect
  88. private $_redirectdepth = 0; // increments on an http redirect
  89. private $_frameurls = array(); // frame src urls
  90. private $_framedepth = 0; // increments on frame depth
  91. private $_isproxy = false; // set if using a proxy server
  92. private $_fp_timeout = 30; // timeout for socket connection
  93. /*======================================================================*\
  94. Function: fetch
  95. Purpose: fetch the contents of a web page
  96. (and possibly other protocols in the
  97. future like ftp, nntp, gopher, etc.)
  98. Input: $URI the location of the page to fetch
  99. Output: $this->results the output text from the fetch
  100. \*======================================================================*/
  101. public function fetch($URI) {
  102. //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
  103. $URI_PARTS = parse_url($URI);
  104. if (!empty($URI_PARTS["user"]))
  105. $this->user = $URI_PARTS["user"];
  106. if (!empty($URI_PARTS["pass"]))
  107. $this->pass = $URI_PARTS["pass"];
  108. if (!isset($fp)) { $fp = false; }
  109. switch ($URI_PARTS["scheme"]) {
  110. case "http":
  111. $this->host = $URI_PARTS["host"];
  112. if(!empty($URI_PARTS["port"]))
  113. $this->port = $URI_PARTS["port"];
  114. if($this->_connect($fp))
  115. {
  116. if($this->_isproxy)
  117. {
  118. // using proxy, send entire URI
  119. $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
  120. }
  121. else
  122. {
  123. $path = $URI_PARTS["path"].(isset($URI_PARTS["query"]) ? "?".$URI_PARTS["query"] : "");
  124. // no proxy, send only the path
  125. $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
  126. }
  127. $this->_disconnect($fp);
  128. if($this->_redirectaddr)
  129. {
  130. /* url was redirected, check if we've hit the max depth */
  131. if($this->maxredirs > $this->_redirectdepth)
  132. {
  133. // only follow redirect if it's on this site, or offsiteok is true
  134. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  135. {
  136. /* follow the redirect */
  137. $this->_redirectdepth++;
  138. $this->lastredirectaddr=$this->_redirectaddr;
  139. $this->fetch($this->_redirectaddr);
  140. }
  141. }
  142. }
  143. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  144. {
  145. $frameurls = $this->_frameurls;
  146. $this->_frameurls = array();
  147. while(list(,$frameurl) = each($frameurls))
  148. {
  149. if($this->_framedepth < $this->maxframes)
  150. {
  151. $this->fetch($frameurl);
  152. $this->_framedepth++;
  153. }
  154. else
  155. break;
  156. }
  157. }
  158. }
  159. else
  160. {
  161. return false;
  162. }
  163. return true;
  164. break;
  165. case "https":
  166. if(!$this->curl_path || (!is_executable($this->curl_path))) {
  167. $this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n";
  168. return false;
  169. }
  170. $this->host = $URI_PARTS["host"];
  171. if(!empty($URI_PARTS["port"]))
  172. $this->port = $URI_PARTS["port"];
  173. if($this->_isproxy)
  174. {
  175. // using proxy, send entire URI
  176. $this->_httpsrequest($URI,$URI,$this->_httpmethod);
  177. }
  178. else
  179. {
  180. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  181. // no proxy, send only the path
  182. $this->_httpsrequest($path, $URI, $this->_httpmethod);
  183. }
  184. if($this->_redirectaddr)
  185. {
  186. /* url was redirected, check if we've hit the max depth */
  187. if($this->maxredirs > $this->_redirectdepth)
  188. {
  189. // only follow redirect if it's on this site, or offsiteok is true
  190. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  191. {
  192. /* follow the redirect */
  193. $this->_redirectdepth++;
  194. $this->lastredirectaddr=$this->_redirectaddr;
  195. $this->fetch($this->_redirectaddr);
  196. }
  197. }
  198. }
  199. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  200. {
  201. $frameurls = $this->_frameurls;
  202. $this->_frameurls = array();
  203. while(list(,$frameurl) = each($frameurls))
  204. {
  205. if($this->_framedepth < $this->maxframes)
  206. {
  207. $this->fetch($frameurl);
  208. $this->_framedepth++;
  209. }
  210. else
  211. break;
  212. }
  213. }
  214. return true;
  215. break;
  216. default:
  217. // not a valid protocol
  218. $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
  219. return false;
  220. break;
  221. }
  222. return true;
  223. }
  224. /*======================================================================*\
  225. Private functions
  226. \*======================================================================*/
  227. /*======================================================================*\
  228. Function: _striplinks
  229. Purpose: strip the hyperlinks from an html document
  230. Input: $document document to strip.
  231. Output: $match an array of the links
  232. \*======================================================================*/
  233. private function _striplinks($document)
  234. {
  235. preg_match_all("'<\s*a\s+.*href\s*=\s* # find <a href=
  236. ([\"\'])? # find single or double quote
  237. (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
  238. # quote, otherwise match up to next space
  239. 'isx",$document,$links);
  240. // catenate the non-empty matches from the conditional subpattern
  241. while(list($key,$val) = each($links[2]))
  242. {
  243. if(!empty($val))
  244. $match[] = $val;
  245. }
  246. while(list($key,$val) = each($links[3]))
  247. {
  248. if(!empty($val))
  249. $match[] = $val;
  250. }
  251. // return the links
  252. return $match;
  253. }
  254. /*======================================================================*\
  255. Function: _stripform
  256. Purpose: strip the form elements from an html document
  257. Input: $document document to strip.
  258. Output: $match an array of the links
  259. \*======================================================================*/
  260. private function _stripform($document)
  261. {
  262. preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
  263. // catenate the matches
  264. $match = implode("\r\n",$elements[0]);
  265. // return the links
  266. return $match;
  267. }
  268. /*======================================================================*\
  269. Function: _striptext
  270. Purpose: strip the text from an html document
  271. Input: $document document to strip.
  272. Output: $text the resulting text
  273. \*======================================================================*/
  274. private function _striptext($document)
  275. {
  276. // I didn't use preg eval (//e) since that is only available in PHP 4.0.
  277. // so, list your entities one by one here. I included some of the
  278. // more common ones.
  279. $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
  280. "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
  281. "'([\r\n])[\s]+'", // strip out white space
  282. "'&(quote|#34);'i", // replace html entities
  283. "'&(amp|#38);'i",
  284. "'&(lt|#60);'i",
  285. "'&(gt|#62);'i",
  286. "'&(nbsp|#160);'i",
  287. "'&(iexcl|#161);'i",
  288. "'&(cent|#162);'i",
  289. "'&(pound|#163);'i",
  290. "'&(copy|#169);'i"
  291. );
  292. $replace = array( "",
  293. "",
  294. "\\1",
  295. "\"",
  296. "&",
  297. "<",
  298. ">",
  299. " ",
  300. chr(161),
  301. chr(162),
  302. chr(163),
  303. chr(169));
  304. $text = preg_replace($search,$replace,$document);
  305. return $text;
  306. }
  307. /*======================================================================*\
  308. Function: _expandlinks
  309. Purpose: expand each link into a fully qualified URL
  310. Input: $links the links to qualify
  311. $URI the full URI to get the base from
  312. Output: $expandedLinks the expanded links
  313. \*======================================================================*/
  314. private function _expandlinks($links,$URI)
  315. {
  316. preg_match("/^[^\?]+/",$URI,$match);
  317. $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
  318. $search = array( "|^http://".preg_quote($this->host)."|i",
  319. "|^(?!http://)(\/)?(?!mailto:)|i",
  320. "|/\./|",
  321. "|/[^\/]+/\.\./|"
  322. );
  323. $replace = array( "",
  324. $match."/",
  325. "/",
  326. "/"
  327. );
  328. $expandedLinks = preg_replace($search,$replace,$links);
  329. return $expandedLinks;
  330. }
  331. /*======================================================================*\
  332. Function: _httprequest
  333. Purpose: go get the http data from the server
  334. Input: $url the url to fetch
  335. $fp the current open file pointer
  336. $URI the full URI
  337. $body body contents to send if any (POST)
  338. Output:
  339. \*======================================================================*/
  340. private function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
  341. {
  342. if($this->passcookies && $this->_redirectaddr)
  343. $this->setcookies();
  344. $URI_PARTS = parse_url($URI);
  345. if(empty($url))
  346. $url = "/";
  347. $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
  348. if(!empty($this->agent))
  349. $headers .= "User-Agent: ".$this->agent."\r\n";
  350. if(!empty($this->host) && !isset($this->rawheaders['Host']))
  351. $headers .= "Host: ".$this->host."\r\n";
  352. if(!empty($this->accept))
  353. $headers .= "Accept: ".$this->accept."\r\n";
  354. if($this->use_gzip) {
  355. // make sure PHP was built with --with-zlib
  356. // and we can handle gzipp'ed data
  357. if ( function_exists(gzinflate) ) {
  358. $headers .= "Accept-encoding: gzip\r\n";
  359. }
  360. else {
  361. trigger_error(
  362. "use_gzip is on, but PHP was built without zlib support.".
  363. " Requesting file(s) without gzip encoding.",
  364. E_USER_NOTICE);
  365. }
  366. }
  367. if(!empty($this->referer))
  368. $headers .= "Referer: ".$this->referer."\r\n";
  369. if(!empty($this->cookies))
  370. {
  371. if(!is_array($this->cookies))
  372. $this->cookies = (array)$this->cookies;
  373. reset($this->cookies);
  374. if ( count($this->cookies) > 0 ) {
  375. $cookie_headers .= 'Cookie: ';
  376. foreach ( $this->cookies as $cookieKey => $cookieVal ) {
  377. $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
  378. }
  379. $headers .= substr($cookie_headers,0,-2) . "\r\n";
  380. }
  381. }
  382. if(!empty($this->rawheaders))
  383. {
  384. if(!is_array($this->rawheaders))
  385. $this->rawheaders = (array)$this->rawheaders;
  386. while(list($headerKey,$headerVal) = each($this->rawheaders))
  387. $headers .= $headerKey.": ".$headerVal."\r\n";
  388. }
  389. if(!empty($content_type)) {
  390. $headers .= "Content-type: $content_type";
  391. if ($content_type == "multipart/form-data")
  392. $headers .= "; boundary=".$this->_mime_boundary;
  393. $headers .= "\r\n";
  394. }
  395. if(!empty($body))
  396. $headers .= "Content-length: ".strlen($body)."\r\n";
  397. if(!empty($this->user) || !empty($this->pass))
  398. $headers .= "Authorization: BASIC ".base64_encode($this->user.":".$this->pass)."\r\n";
  399. $headers .= "\r\n";
  400. // set the read timeout if needed
  401. if ($this->read_timeout > 0)
  402. socket_set_timeout($fp, $this->read_timeout);
  403. $this->timed_out = false;
  404. fwrite($fp,$headers.$body,strlen($headers.$body));
  405. $this->_redirectaddr = false;
  406. unset($this->headers);
  407. // content was returned gzip encoded?
  408. $is_gzipped = false;
  409. while($currentHeader = fgets($fp,$this->_maxlinelen))
  410. {
  411. if ($this->read_timeout > 0 && $this->_check_timeout($fp))
  412. {
  413. $this->status=-100;
  414. return false;
  415. }
  416. // if($currentHeader == "\r\n")
  417. if(preg_match("/^\r?\n$/", $currentHeader) )
  418. break;
  419. // if a header begins with Location: or URI:, set the redirect
  420. if(preg_match("/^(Location:|URI:)/i",$currentHeader))
  421. {
  422. // get URL portion of the redirect
  423. preg_match("/^(Location:|URI:)\s+(.*)/",chop($currentHeader),$matches);
  424. // look for :// in the Location header to see if hostname is included
  425. if(!preg_match("|\:\/\/|",$matches[2]))
  426. {
  427. // no host in the path, so prepend
  428. $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
  429. // eliminate double slash
  430. if(!preg_match("|^/|",$matches[2]))
  431. $this->_redirectaddr .= "/".$matches[2];
  432. else
  433. $this->_redirectaddr .= $matches[2];
  434. }
  435. else
  436. $this->_redirectaddr = $matches[2];
  437. }
  438. if(preg_match("|^HTTP/|",$currentHeader))
  439. {
  440. if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
  441. {
  442. $this->status= $status[1];
  443. }
  444. $this->response_code = $currentHeader;
  445. }
  446. if (preg_match("/Content-Encoding: gzip/", $currentHeader) ) {
  447. $is_gzipped = true;
  448. }
  449. $this->headers[] = $currentHeader;
  450. }
  451. # $results = fread($fp, $this->maxlength);
  452. $results = "";
  453. while ( $data = fread($fp, $this->maxlength) ) {
  454. $results .= $data;
  455. if (
  456. strlen($results) > $this->maxlength ) {
  457. break;
  458. }
  459. }
  460. // gunzip
  461. if ( $is_gzipped ) {
  462. // per http://www.php.net/manual/en/function.gzencode.php
  463. $results = substr($results, 10);
  464. $results = gzinflate($results);
  465. }
  466. if ($this->read_timeout > 0 && $this->_check_timeout($fp))
  467. {
  468. $this->status=-100;
  469. return false;
  470. }
  471. // check if there is a a redirect meta tag
  472. if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
  473. {
  474. $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
  475. }
  476. // have we hit our frame depth and is there frame src to fetch?
  477. if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
  478. {
  479. $this->results[] = $results;
  480. for($x=0; $x<count($match[1]); $x++)
  481. $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
  482. }
  483. // have we already fetched framed content?
  484. elseif(is_array($this->results))
  485. $this->results[] = $results;
  486. // no framed content
  487. else
  488. $this->results = $results;
  489. return true;
  490. }
  491. /*======================================================================*\
  492. Function: _httpsrequest
  493. Purpose: go get the https data from the server using curl
  494. Input: $url the url to fetch
  495. $URI the full URI
  496. $body body contents to send if any (POST)
  497. Output:
  498. \*======================================================================*/
  499. private function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
  500. {
  501. if($this->passcookies && $this->_redirectaddr)
  502. $this->setcookies();
  503. $headers = array();
  504. $URI_PARTS = parse_url($URI);
  505. if(empty($url))
  506. $url = "/";
  507. // GET ... header not needed for curl
  508. //$headers[] = $http_method." ".$url." ".$this->_httpversion;
  509. if(!empty($this->agent))
  510. $headers[] = "User-Agent: ".$this->agent;
  511. if(!empty($this->host))
  512. $headers[] = "Host: ".$this->host;
  513. if(!empty($this->accept))
  514. $headers[] = "Accept: ".$this->accept;
  515. if(!empty($this->referer))
  516. $headers[] = "Referer: ".$this->referer;
  517. if(!empty($this->cookies))
  518. {
  519. if(!is_array($this->cookies))
  520. $this->cookies = (array)$this->cookies;
  521. reset($this->cookies);
  522. if ( count($this->cookies) > 0 ) {
  523. $cookie_str = 'Cookie: ';
  524. foreach ( $this->cookies as $cookieKey => $cookieVal ) {
  525. $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
  526. }
  527. $headers[] = substr($cookie_str,0,-2);
  528. }
  529. }
  530. if(!empty($this->rawheaders))
  531. {
  532. if(!is_array($this->rawheaders))
  533. $this->rawheaders = (array)$this->rawheaders;
  534. while(list($headerKey,$headerVal) = each($this->rawheaders))
  535. $headers[] = $headerKey.": ".$headerVal;
  536. }
  537. if(!empty($content_type)) {
  538. if ($content_type == "multipart/form-data")
  539. $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
  540. else
  541. $headers[] = "Content-type: $content_type";
  542. }
  543. if(!empty($body))
  544. $headers[] = "Content-length: ".strlen($body);
  545. if(!empty($this->user) || !empty($this->pass))
  546. $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
  547. for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
  548. $cmdline_params .= " -H \"".$headers[$curr_header]."\"";
  549. }
  550. if(!empty($body))
  551. $cmdline_params .= " -d \"$body\"";
  552. if($this->read_timeout > 0)
  553. $cmdline_params .= " -m ".$this->read_timeout;
  554. $headerfile = uniqid(time());
  555. # accept self-signed certs
  556. $cmdline_params .= " -k";
  557. $results = array();
  558. $return = 0;
  559. exec($this->curl_path." -D \"/tmp/$headerfile\"".escapeshellcmd($cmdline_params)." ".escapeshellcmd($URI),$results,$return);
  560. if($return)
  561. {
  562. $this->error = "Error: cURL could not retrieve the document, error $return.";
  563. return false;
  564. }
  565. $results = implode("\r\n",$results);
  566. $result_headers = file("/tmp/$headerfile");
  567. $this->_redirectaddr = false;
  568. unset($this->headers);
  569. for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
  570. {
  571. // if a header begins with Location: or URI:, set the redirect
  572. if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
  573. {
  574. // get URL portion of the redirect
  575. preg_match("/^(Location: |URI:)(.*)/",chop($result_headers[$currentHeader]),$matches);
  576. // look for :// in the Location header to see if hostname is included
  577. if(!preg_match("|\:\/\/|",$matches[2]))
  578. {
  579. // no host in the path, so prepend
  580. $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
  581. // eliminate double slash
  582. if(!preg_match("|^/|",$matches[2]))
  583. $this->_redirectaddr .= "/".$matches[2];
  584. else
  585. $this->_redirectaddr .= $matches[2];
  586. }
  587. else
  588. $this->_redirectaddr = $matches[2];
  589. }
  590. if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
  591. {
  592. $this->response_code = $result_headers[$currentHeader];
  593. if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$this->response_code, $match))
  594. {
  595. $this->status= $match[1];
  596. }
  597. }
  598. $this->headers[] = $result_headers[$currentHeader];
  599. }
  600. // check if there is a a redirect meta tag
  601. if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
  602. {
  603. $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
  604. }
  605. // have we hit our frame depth and is there frame src to fetch?
  606. if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
  607. {
  608. $this->results[] = $results;
  609. for($x=0; $x<count($match[1]); $x++)
  610. $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
  611. }
  612. // have we already fetched framed content?
  613. elseif(is_array($this->results))
  614. $this->results[] = $results;
  615. // no framed content
  616. else
  617. $this->results = $results;
  618. unlink("/tmp/$headerfile");
  619. return true;
  620. }
  621. /*======================================================================*\
  622. Function: setcookies()
  623. Purpose: set cookies for a redirection
  624. \*======================================================================*/
  625. public function setcookies()
  626. {
  627. for($x=0; $x<count($this->headers); $x++)
  628. {
  629. if(preg_match("/^set-cookie:[\s]+([^=]+)=([^;]+)/i", $this->headers[$x],$match))
  630. $this->cookies[$match[1]] = $match[2];
  631. }
  632. }
  633. /*======================================================================*\
  634. Function: _check_timeout
  635. Purpose: checks whether timeout has occurred
  636. Input: $fp file pointer
  637. \*======================================================================*/
  638. private function _check_timeout($fp)
  639. {
  640. if ($this->read_timeout > 0) {
  641. $fp_status = socket_get_status($fp);
  642. if ($fp_status["timed_out"]) {
  643. $this->timed_out = true;
  644. return true;
  645. }
  646. }
  647. return false;
  648. }
  649. /*======================================================================*\
  650. Function: _connect
  651. Purpose: make a socket connection
  652. Input: $fp file pointer
  653. \*======================================================================*/
  654. private function _connect(&$fp)
  655. {
  656. if(!empty($this->proxy_host) && !empty($this->proxy_port))
  657. {
  658. $this->_isproxy = true;
  659. $host = $this->proxy_host;
  660. $port = $this->proxy_port;
  661. }
  662. else
  663. {
  664. $host = $this->host;
  665. $port = $this->port;
  666. }
  667. $this->status = 0;
  668. if($fp = fsockopen(
  669. $host,
  670. $port,
  671. $errno,
  672. $errstr,
  673. $this->_fp_timeout
  674. ))
  675. {
  676. // socket connection succeeded
  677. return true;
  678. }
  679. else
  680. {
  681. // socket connection failed
  682. $this->status = $errno;
  683. switch($errno)
  684. {
  685. case -3:
  686. $this->error="socket creation failed (-3)";
  687. case -4:
  688. $this->error="dns lookup failure (-4)";
  689. case -5:
  690. $this->error="connection refused or timed out (-5)";
  691. default:
  692. $this->error="connection failed (".$errno.")";
  693. }
  694. return false;
  695. }
  696. }
  697. /*======================================================================*\
  698. Function: _disconnect
  699. Purpose: disconnect a socket connection
  700. Input: $fp file pointer
  701. \*======================================================================*/
  702. private function _disconnect($fp)
  703. {
  704. return(fclose($fp));
  705. }
  706. /*======================================================================*\
  707. Function: _prepare_post_body
  708. Purpose: Prepare post body according to encoding type
  709. Input: $formvars - form variables
  710. $formfiles - form upload files
  711. Output: post body
  712. \*======================================================================*/
  713. private function _prepare_post_body($formvars, $formfiles)
  714. {
  715. settype($formvars, "array");
  716. settype($formfiles, "array");
  717. if (count($formvars) == 0 && count($formfiles) == 0)
  718. return;
  719. switch ($this->_submit_type) {
  720. case "application/x-www-form-urlencoded":
  721. reset($formvars);
  722. while(list($key,$val) = each($formvars)) {
  723. if (is_array($val) || is_object($val)) {
  724. while (list($cur_key, $cur_val) = each($val)) {
  725. $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
  726. }
  727. } else
  728. $postdata .= urlencode($key)."=".urlencode($val)."&";
  729. }
  730. break;
  731. case "multipart/form-data":
  732. $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
  733. reset($formvars);
  734. while(list($key,$val) = each($formvars)) {
  735. if (is_array($val) || is_object($val)) {
  736. while (list($cur_key, $cur_val) = each($val)) {
  737. $postdata .= "--".$this->_mime_boundary."\r\n";
  738. $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
  739. $postdata .= "$cur_val\r\n";
  740. }
  741. } else {
  742. $postdata .= "--".$this->_mime_boundary."\r\n";
  743. $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
  744. $postdata .= "$val\r\n";
  745. }
  746. }
  747. reset($formfiles);
  748. while (list($field_name, $file_names) = each($formfiles)) {
  749. settype($file_names, "array");
  750. while (list(, $file_name) = each($file_names)) {
  751. if (!is_readable($file_name)) continue;
  752. $fp = fopen($file_name, "r");
  753. $file_content = fread($fp, filesize($file_name));
  754. fclose($fp);
  755. $base_name = basename($file_name);
  756. $postdata .= "--".$this->_mime_boundary."\r\n";
  757. $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
  758. $postdata .= "$file_content\r\n";
  759. }
  760. }
  761. $postdata .= "--".$this->_mime_boundary."--\r\n";
  762. break;
  763. }
  764. return $postdata;
  765. }
  766. }