772 |
florian |
1 |
<?php
|
|
|
2 |
|
|
|
3 |
/*************************************************
|
|
|
4 |
|
|
|
5 |
Snoopy - the PHP net client
|
|
|
6 |
Author: Monte Ohrt <monte@ispi.net>
|
|
|
7 |
Copyright (c): 1999-2000 ispi, all rights reserved
|
|
|
8 |
Version: 1.0
|
|
|
9 |
|
|
|
10 |
* This library is free software; you can redistribute it and/or
|
|
|
11 |
* modify it under the terms of the GNU Lesser General Public
|
|
|
12 |
* License as published by the Free Software Foundation; either
|
|
|
13 |
* version 2.1 of the License, or (at your option) any later version.
|
|
|
14 |
*
|
|
|
15 |
* This library is distributed in the hope that it will be useful,
|
|
|
16 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
17 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
18 |
* Lesser General Public License for more details.
|
|
|
19 |
*
|
|
|
20 |
* You should have received a copy of the GNU Lesser General Public
|
|
|
21 |
* License along with this library; if not, write to the Free Software
|
|
|
22 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
23 |
|
|
|
24 |
You may contact the author of Snoopy by e-mail at:
|
|
|
25 |
monte@ispi.net
|
|
|
26 |
|
|
|
27 |
Or, write to:
|
|
|
28 |
Monte Ohrt
|
|
|
29 |
CTO, ispi
|
|
|
30 |
237 S. 70th suite 220
|
|
|
31 |
Lincoln, NE 68510
|
|
|
32 |
|
|
|
33 |
The latest version of Snoopy can be obtained from:
|
|
|
34 |
http://snoopy.sourceforge.com
|
|
|
35 |
|
|
|
36 |
*************************************************/
|
|
|
37 |
|
|
|
38 |
class Snoopy
|
|
|
39 |
{
|
|
|
40 |
/**** Public variables ****/
|
|
|
41 |
|
|
|
42 |
/* user definable vars */
|
|
|
43 |
|
|
|
44 |
var $host = "www.php.net"; // host name we are connecting to
|
|
|
45 |
var $port = 80; // port we are connecting to
|
|
|
46 |
var $proxy_host = ""; // proxy host to use
|
|
|
47 |
var $proxy_port = ""; // proxy port to use
|
|
|
48 |
var $agent = "Snoopy v1.0"; // agent we masquerade as
|
|
|
49 |
var $referer = ""; // referer info to pass
|
|
|
50 |
var $cookies = array(); // array of cookies to pass
|
|
|
51 |
// $cookies["username"]="joe";
|
|
|
52 |
var $rawheaders = array(); // array of raw headers to send
|
|
|
53 |
// $rawheaders["Content-type"]="text/html";
|
|
|
54 |
|
|
|
55 |
var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
|
|
|
56 |
var $lastredirectaddr = ""; // contains address of last redirected address
|
|
|
57 |
var $offsiteok = true; // allows redirection off-site
|
|
|
58 |
var $maxframes = 0; // frame content depth maximum. 0 = disallow
|
|
|
59 |
var $expandlinks = true; // expand links to fully qualified URLs.
|
|
|
60 |
// this only applies to fetchlinks()
|
|
|
61 |
// or submitlinks()
|
|
|
62 |
var $passcookies = true; // pass set cookies back through redirects
|
|
|
63 |
// NOTE: this currently does not respect
|
|
|
64 |
// dates, domains or paths.
|
|
|
65 |
|
|
|
66 |
var $user = ""; // user for http authentication
|
|
|
67 |
var $pass = ""; // password for http authentication
|
|
|
68 |
|
|
|
69 |
// http accept types
|
|
|
70 |
var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
|
|
|
71 |
|
|
|
72 |
var $results = ""; // where the content is put
|
|
|
73 |
|
|
|
74 |
var $error = ""; // error messages sent here
|
|
|
75 |
var $response_code = ""; // response code returned from server
|
|
|
76 |
var $headers = array(); // headers returned from server sent here
|
|
|
77 |
var $maxlength = 500000; // max return data length (body)
|
|
|
78 |
var $read_timeout = 0; // timeout on read operations, in seconds
|
|
|
79 |
// supported only since PHP 4 Beta 4
|
|
|
80 |
// set to 0 to disallow timeouts
|
|
|
81 |
var $timed_out = false; // if a read operation timed out
|
|
|
82 |
var $status = 0; // http request status
|
|
|
83 |
|
|
|
84 |
var $curl_path = "/usr/bin/curl";
|
|
|
85 |
// Snoopy will use cURL for fetching
|
|
|
86 |
// SSL content if a full system path to
|
|
|
87 |
// the cURL binary is supplied here.
|
|
|
88 |
// set to false if you do not have
|
|
|
89 |
// cURL installed. See http://curl.haxx.se
|
|
|
90 |
// for details on installing cURL.
|
|
|
91 |
// Snoopy does *not* use the cURL
|
|
|
92 |
// library functions built into php,
|
|
|
93 |
// as these functions are not stable
|
|
|
94 |
// as of this Snoopy release.
|
|
|
95 |
|
|
|
96 |
// send Accept-encoding: gzip?
|
|
|
97 |
var $use_gzip = true;
|
|
|
98 |
|
|
|
99 |
/**** Private variables ****/
|
|
|
100 |
|
|
|
101 |
var $_maxlinelen = 4096; // max line length (headers)
|
|
|
102 |
|
|
|
103 |
var $_httpmethod = "GET"; // default http request method
|
|
|
104 |
var $_httpversion = "HTTP/1.0"; // default http request version
|
|
|
105 |
var $_submit_method = "POST"; // default submit method
|
|
|
106 |
var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
|
|
|
107 |
var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
|
|
|
108 |
var $_redirectaddr = false; // will be set if page fetched is a redirect
|
|
|
109 |
var $_redirectdepth = 0; // increments on an http redirect
|
|
|
110 |
var $_frameurls = array(); // frame src urls
|
|
|
111 |
var $_framedepth = 0; // increments on frame depth
|
|
|
112 |
|
|
|
113 |
var $_isproxy = false; // set if using a proxy server
|
|
|
114 |
var $_fp_timeout = 30; // timeout for socket connection
|
|
|
115 |
|
|
|
116 |
/*======================================================================*\
|
|
|
117 |
Function: fetch
|
|
|
118 |
Purpose: fetch the contents of a web page
|
|
|
119 |
(and possibly other protocols in the
|
|
|
120 |
future like ftp, nntp, gopher, etc.)
|
|
|
121 |
Input: $URI the location of the page to fetch
|
|
|
122 |
Output: $this->results the output text from the fetch
|
|
|
123 |
\*======================================================================*/
|
|
|
124 |
|
|
|
125 |
function fetch($URI)
|
|
|
126 |
{
|
|
|
127 |
|
|
|
128 |
//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
|
|
|
129 |
$URI_PARTS = parse_url($URI);
|
|
|
130 |
if (!empty($URI_PARTS["user"]))
|
|
|
131 |
$this->user = $URI_PARTS["user"];
|
|
|
132 |
if (!empty($URI_PARTS["pass"]))
|
|
|
133 |
$this->pass = $URI_PARTS["pass"];
|
|
|
134 |
|
|
|
135 |
switch($URI_PARTS["scheme"])
|
|
|
136 |
{
|
|
|
137 |
case "http":
|
|
|
138 |
$this->host = $URI_PARTS["host"];
|
|
|
139 |
if(!empty($URI_PARTS["port"]))
|
|
|
140 |
$this->port = $URI_PARTS["port"];
|
|
|
141 |
if($this->_connect($fp))
|
|
|
142 |
{
|
|
|
143 |
if($this->_isproxy)
|
|
|
144 |
{
|
|
|
145 |
// using proxy, send entire URI
|
|
|
146 |
$this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
|
|
|
147 |
}
|
|
|
148 |
else
|
|
|
149 |
{
|
|
|
150 |
$path = $URI_PARTS["path"].(isset($URI_PARTS["query"]) ? "?".$URI_PARTS["query"] : "");
|
|
|
151 |
// no proxy, send only the path
|
|
|
152 |
$this->_httprequest($path, $fp, $URI, $this->_httpmethod);
|
|
|
153 |
}
|
|
|
154 |
|
|
|
155 |
$this->_disconnect($fp);
|
|
|
156 |
|
|
|
157 |
if($this->_redirectaddr)
|
|
|
158 |
{
|
|
|
159 |
/* url was redirected, check if we've hit the max depth */
|
|
|
160 |
if($this->maxredirs > $this->_redirectdepth)
|
|
|
161 |
{
|
|
|
162 |
// only follow redirect if it's on this site, or offsiteok is true
|
|
|
163 |
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
|
|
|
164 |
{
|
|
|
165 |
/* follow the redirect */
|
|
|
166 |
$this->_redirectdepth++;
|
|
|
167 |
$this->lastredirectaddr=$this->_redirectaddr;
|
|
|
168 |
$this->fetch($this->_redirectaddr);
|
|
|
169 |
}
|
|
|
170 |
}
|
|
|
171 |
}
|
|
|
172 |
|
|
|
173 |
if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
|
|
|
174 |
{
|
|
|
175 |
$frameurls = $this->_frameurls;
|
|
|
176 |
$this->_frameurls = array();
|
|
|
177 |
|
|
|
178 |
while(list(,$frameurl) = each($frameurls))
|
|
|
179 |
{
|
|
|
180 |
if($this->_framedepth < $this->maxframes)
|
|
|
181 |
{
|
|
|
182 |
$this->fetch($frameurl);
|
|
|
183 |
$this->_framedepth++;
|
|
|
184 |
}
|
|
|
185 |
else
|
|
|
186 |
break;
|
|
|
187 |
}
|
|
|
188 |
}
|
|
|
189 |
}
|
|
|
190 |
else
|
|
|
191 |
{
|
|
|
192 |
return false;
|
|
|
193 |
}
|
|
|
194 |
return true;
|
|
|
195 |
break;
|
|
|
196 |
case "https":
|
|
|
197 |
if(!$this->curl_path || (!is_executable($this->curl_path))) {
|
|
|
198 |
$this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n";
|
|
|
199 |
return false;
|
|
|
200 |
}
|
|
|
201 |
$this->host = $URI_PARTS["host"];
|
|
|
202 |
if(!empty($URI_PARTS["port"]))
|
|
|
203 |
$this->port = $URI_PARTS["port"];
|
|
|
204 |
if($this->_isproxy)
|
|
|
205 |
{
|
|
|
206 |
// using proxy, send entire URI
|
|
|
207 |
$this->_httpsrequest($URI,$URI,$this->_httpmethod);
|
|
|
208 |
}
|
|
|
209 |
else
|
|
|
210 |
{
|
|
|
211 |
$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
|
|
|
212 |
// no proxy, send only the path
|
|
|
213 |
$this->_httpsrequest($path, $URI, $this->_httpmethod);
|
|
|
214 |
}
|
|
|
215 |
|
|
|
216 |
if($this->_redirectaddr)
|
|
|
217 |
{
|
|
|
218 |
/* url was redirected, check if we've hit the max depth */
|
|
|
219 |
if($this->maxredirs > $this->_redirectdepth)
|
|
|
220 |
{
|
|
|
221 |
// only follow redirect if it's on this site, or offsiteok is true
|
|
|
222 |
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
|
|
|
223 |
{
|
|
|
224 |
/* follow the redirect */
|
|
|
225 |
$this->_redirectdepth++;
|
|
|
226 |
$this->lastredirectaddr=$this->_redirectaddr;
|
|
|
227 |
$this->fetch($this->_redirectaddr);
|
|
|
228 |
}
|
|
|
229 |
}
|
|
|
230 |
}
|
|
|
231 |
|
|
|
232 |
if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
|
|
|
233 |
{
|
|
|
234 |
$frameurls = $this->_frameurls;
|
|
|
235 |
$this->_frameurls = array();
|
|
|
236 |
|
|
|
237 |
while(list(,$frameurl) = each($frameurls))
|
|
|
238 |
{
|
|
|
239 |
if($this->_framedepth < $this->maxframes)
|
|
|
240 |
{
|
|
|
241 |
$this->fetch($frameurl);
|
|
|
242 |
$this->_framedepth++;
|
|
|
243 |
}
|
|
|
244 |
else
|
|
|
245 |
break;
|
|
|
246 |
}
|
|
|
247 |
}
|
|
|
248 |
return true;
|
|
|
249 |
break;
|
|
|
250 |
default:
|
|
|
251 |
// not a valid protocol
|
|
|
252 |
$this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
|
|
|
253 |
return false;
|
|
|
254 |
break;
|
|
|
255 |
}
|
|
|
256 |
return true;
|
|
|
257 |
}
|
|
|
258 |
|
|
|
259 |
|
|
|
260 |
|
|
|
261 |
/*======================================================================*\
|
|
|
262 |
Private functions
|
|
|
263 |
\*======================================================================*/
|
|
|
264 |
|
|
|
265 |
|
|
|
266 |
/*======================================================================*\
|
|
|
267 |
Function: _striplinks
|
|
|
268 |
Purpose: strip the hyperlinks from an html document
|
|
|
269 |
Input: $document document to strip.
|
|
|
270 |
Output: $match an array of the links
|
|
|
271 |
\*======================================================================*/
|
|
|
272 |
|
|
|
273 |
function _striplinks($document)
|
|
|
274 |
{
|
|
|
275 |
preg_match_all("'<\s*a\s+.*href\s*=\s* # find <a href=
|
|
|
276 |
([\"\'])? # find single or double quote
|
|
|
277 |
(?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
|
|
|
278 |
# quote, otherwise match up to next space
|
|
|
279 |
'isx",$document,$links);
|
|
|
280 |
|
|
|
281 |
|
|
|
282 |
// catenate the non-empty matches from the conditional subpattern
|
|
|
283 |
|
|
|
284 |
while(list($key,$val) = each($links[2]))
|
|
|
285 |
{
|
|
|
286 |
if(!empty($val))
|
|
|
287 |
$match[] = $val;
|
|
|
288 |
}
|
|
|
289 |
|
|
|
290 |
while(list($key,$val) = each($links[3]))
|
|
|
291 |
{
|
|
|
292 |
if(!empty($val))
|
|
|
293 |
$match[] = $val;
|
|
|
294 |
}
|
|
|
295 |
|
|
|
296 |
// return the links
|
|
|
297 |
return $match;
|
|
|
298 |
}
|
|
|
299 |
|
|
|
300 |
/*======================================================================*\
|
|
|
301 |
Function: _stripform
|
|
|
302 |
Purpose: strip the form elements from an html document
|
|
|
303 |
Input: $document document to strip.
|
|
|
304 |
Output: $match an array of the links
|
|
|
305 |
\*======================================================================*/
|
|
|
306 |
|
|
|
307 |
function _stripform($document)
|
|
|
308 |
{
|
|
|
309 |
preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
|
|
|
310 |
|
|
|
311 |
// catenate the matches
|
|
|
312 |
$match = implode("\r\n",$elements[0]);
|
|
|
313 |
|
|
|
314 |
// return the links
|
|
|
315 |
return $match;
|
|
|
316 |
}
|
|
|
317 |
|
|
|
318 |
|
|
|
319 |
|
|
|
320 |
/*======================================================================*\
|
|
|
321 |
Function: _striptext
|
|
|
322 |
Purpose: strip the text from an html document
|
|
|
323 |
Input: $document document to strip.
|
|
|
324 |
Output: $text the resulting text
|
|
|
325 |
\*======================================================================*/
|
|
|
326 |
|
|
|
327 |
function _striptext($document)
|
|
|
328 |
{
|
|
|
329 |
|
|
|
330 |
// I didn't use preg eval (//e) since that is only available in PHP 4.0.
|
|
|
331 |
// so, list your entities one by one here. I included some of the
|
|
|
332 |
// more common ones.
|
|
|
333 |
|
|
|
334 |
$search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
|
|
|
335 |
"'<[\/\!]*?[^<>]*?>'si", // strip out html tags
|
|
|
336 |
"'([\r\n])[\s]+'", // strip out white space
|
|
|
337 |
"'&(quote|#34);'i", // replace html entities
|
|
|
338 |
"'&(amp|#38);'i",
|
|
|
339 |
"'&(lt|#60);'i",
|
|
|
340 |
"'&(gt|#62);'i",
|
|
|
341 |
"'&(nbsp|#160);'i",
|
|
|
342 |
"'&(iexcl|#161);'i",
|
|
|
343 |
"'&(cent|#162);'i",
|
|
|
344 |
"'&(pound|#163);'i",
|
|
|
345 |
"'&(copy|#169);'i"
|
|
|
346 |
);
|
|
|
347 |
$replace = array( "",
|
|
|
348 |
"",
|
|
|
349 |
"\\1",
|
|
|
350 |
"\"",
|
|
|
351 |
"&",
|
|
|
352 |
"<",
|
|
|
353 |
">",
|
|
|
354 |
" ",
|
|
|
355 |
chr(161),
|
|
|
356 |
chr(162),
|
|
|
357 |
chr(163),
|
|
|
358 |
chr(169));
|
|
|
359 |
|
|
|
360 |
$text = preg_replace($search,$replace,$document);
|
|
|
361 |
|
|
|
362 |
return $text;
|
|
|
363 |
}
|
|
|
364 |
|
|
|
365 |
/*======================================================================*\
|
|
|
366 |
Function: _expandlinks
|
|
|
367 |
Purpose: expand each link into a fully qualified URL
|
|
|
368 |
Input: $links the links to qualify
|
|
|
369 |
$URI the full URI to get the base from
|
|
|
370 |
Output: $expandedLinks the expanded links
|
|
|
371 |
\*======================================================================*/
|
|
|
372 |
|
|
|
373 |
function _expandlinks($links,$URI)
|
|
|
374 |
{
|
|
|
375 |
|
|
|
376 |
preg_match("/^[^\?]+/",$URI,$match);
|
|
|
377 |
|
|
|
378 |
$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
|
|
|
379 |
|
|
|
380 |
$search = array( "|^http://".preg_quote($this->host)."|i",
|
|
|
381 |
"|^(?!http://)(\/)?(?!mailto:)|i",
|
|
|
382 |
"|/\./|",
|
|
|
383 |
"|/[^\/]+/\.\./|"
|
|
|
384 |
);
|
|
|
385 |
|
|
|
386 |
$replace = array( "",
|
|
|
387 |
$match."/",
|
|
|
388 |
"/",
|
|
|
389 |
"/"
|
|
|
390 |
);
|
|
|
391 |
|
|
|
392 |
$expandedLinks = preg_replace($search,$replace,$links);
|
|
|
393 |
|
|
|
394 |
return $expandedLinks;
|
|
|
395 |
}
|
|
|
396 |
|
|
|
397 |
/*======================================================================*\
|
|
|
398 |
Function: _httprequest
|
|
|
399 |
Purpose: go get the http data from the server
|
|
|
400 |
Input: $url the url to fetch
|
|
|
401 |
$fp the current open file pointer
|
|
|
402 |
$URI the full URI
|
|
|
403 |
$body body contents to send if any (POST)
|
|
|
404 |
Output:
|
|
|
405 |
\*======================================================================*/
|
|
|
406 |
|
|
|
407 |
function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
|
|
|
408 |
{
|
|
|
409 |
if($this->passcookies && $this->_redirectaddr)
|
|
|
410 |
$this->setcookies();
|
|
|
411 |
|
|
|
412 |
$URI_PARTS = parse_url($URI);
|
|
|
413 |
if(empty($url))
|
|
|
414 |
$url = "/";
|
|
|
415 |
$headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
|
|
|
416 |
if(!empty($this->agent))
|
|
|
417 |
$headers .= "User-Agent: ".$this->agent."\r\n";
|
|
|
418 |
if(!empty($this->host) && !isset($this->rawheaders['Host']))
|
|
|
419 |
$headers .= "Host: ".$this->host."\r\n";
|
|
|
420 |
if(!empty($this->accept))
|
|
|
421 |
$headers .= "Accept: ".$this->accept."\r\n";
|
|
|
422 |
|
|
|
423 |
if($this->use_gzip) {
|
|
|
424 |
// make sure PHP was built with --with-zlib
|
|
|
425 |
// and we can handle gzipp'ed data
|
|
|
426 |
if ( function_exists('gzinflate') ) {
|
|
|
427 |
$headers .= "Accept-encoding: gzip\r\n";
|
|
|
428 |
}
|
|
|
429 |
else {
|
|
|
430 |
trigger_error(
|
|
|
431 |
"use_gzip is on, but PHP was built without zlib support.".
|
|
|
432 |
" Requesting file(s) without gzip encoding.",
|
|
|
433 |
E_USER_NOTICE);
|
|
|
434 |
}
|
|
|
435 |
}
|
|
|
436 |
|
|
|
437 |
if(!empty($this->referer))
|
|
|
438 |
$headers .= "Referer: ".$this->referer."\r\n";
|
|
|
439 |
if(!empty($this->cookies))
|
|
|
440 |
{
|
|
|
441 |
if(!is_array($this->cookies))
|
|
|
442 |
$this->cookies = (array)$this->cookies;
|
|
|
443 |
|
|
|
444 |
reset($this->cookies);
|
|
|
445 |
if ( count($this->cookies) > 0 ) {
|
|
|
446 |
$cookie_headers .= 'Cookie: ';
|
|
|
447 |
foreach ( $this->cookies as $cookieKey => $cookieVal ) {
|
|
|
448 |
$cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
|
|
|
449 |
}
|
|
|
450 |
$headers .= substr($cookie_headers,0,-2) . "\r\n";
|
|
|
451 |
}
|
|
|
452 |
}
|
|
|
453 |
if(!empty($this->rawheaders))
|
|
|
454 |
{
|
|
|
455 |
if(!is_array($this->rawheaders))
|
|
|
456 |
$this->rawheaders = (array)$this->rawheaders;
|
|
|
457 |
while(list($headerKey,$headerVal) = each($this->rawheaders))
|
|
|
458 |
$headers .= $headerKey.": ".$headerVal."\r\n";
|
|
|
459 |
}
|
|
|
460 |
if(!empty($content_type)) {
|
|
|
461 |
$headers .= "Content-type: $content_type";
|
|
|
462 |
if ($content_type == "multipart/form-data")
|
|
|
463 |
$headers .= "; boundary=".$this->_mime_boundary;
|
|
|
464 |
$headers .= "\r\n";
|
|
|
465 |
}
|
|
|
466 |
if(!empty($body))
|
|
|
467 |
$headers .= "Content-length: ".strlen($body)."\r\n";
|
|
|
468 |
if(!empty($this->user) || !empty($this->pass))
|
|
|
469 |
$headers .= "Authorization: BASIC ".base64_encode($this->user.":".$this->pass)."\r\n";
|
|
|
470 |
|
|
|
471 |
$headers .= "\r\n";
|
|
|
472 |
|
|
|
473 |
// set the read timeout if needed
|
|
|
474 |
if ($this->read_timeout > 0)
|
|
|
475 |
socket_set_timeout($fp, $this->read_timeout);
|
|
|
476 |
$this->timed_out = false;
|
|
|
477 |
|
|
|
478 |
fwrite($fp,$headers.$body,strlen($headers.$body));
|
|
|
479 |
|
|
|
480 |
$this->_redirectaddr = false;
|
|
|
481 |
unset($this->headers);
|
|
|
482 |
|
|
|
483 |
// content was returned gzip encoded?
|
|
|
484 |
$is_gzipped = false;
|
|
|
485 |
|
|
|
486 |
while($currentHeader = fgets($fp,$this->_maxlinelen))
|
|
|
487 |
{
|
|
|
488 |
if ($this->read_timeout > 0 && $this->_check_timeout($fp))
|
|
|
489 |
{
|
|
|
490 |
$this->status=-100;
|
|
|
491 |
return false;
|
|
|
492 |
}
|
|
|
493 |
|
|
|
494 |
// if($currentHeader == "\r\n")
|
|
|
495 |
if(preg_match("/^\r?\n$/", $currentHeader) )
|
|
|
496 |
break;
|
|
|
497 |
|
|
|
498 |
// if a header begins with Location: or URI:, set the redirect
|
|
|
499 |
if(preg_match("/^(Location:|URI:)/i",$currentHeader))
|
|
|
500 |
{
|
|
|
501 |
// get URL portion of the redirect
|
|
|
502 |
preg_match("/^(Location:|URI:)\s+(.*)/",chop($currentHeader),$matches);
|
|
|
503 |
// look for :// in the Location header to see if hostname is included
|
|
|
504 |
if(!preg_match("|\:\/\/|",$matches[2]))
|
|
|
505 |
{
|
|
|
506 |
// no host in the path, so prepend
|
|
|
507 |
$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
|
|
|
508 |
// eliminate double slash
|
|
|
509 |
if(!preg_match("|^/|",$matches[2]))
|
|
|
510 |
$this->_redirectaddr .= "/".$matches[2];
|
|
|
511 |
else
|
|
|
512 |
$this->_redirectaddr .= $matches[2];
|
|
|
513 |
}
|
|
|
514 |
else
|
|
|
515 |
$this->_redirectaddr = $matches[2];
|
|
|
516 |
}
|
|
|
517 |
|
|
|
518 |
if(preg_match("|^HTTP/|",$currentHeader))
|
|
|
519 |
{
|
|
|
520 |
if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
|
|
|
521 |
{
|
|
|
522 |
$this->status= $status[1];
|
|
|
523 |
}
|
|
|
524 |
$this->response_code = $currentHeader;
|
|
|
525 |
}
|
|
|
526 |
|
|
|
527 |
if (preg_match("/Content-Encoding: gzip/", $currentHeader) ) {
|
|
|
528 |
$is_gzipped = true;
|
|
|
529 |
}
|
|
|
530 |
|
|
|
531 |
$this->headers[] = $currentHeader;
|
|
|
532 |
}
|
|
|
533 |
|
|
|
534 |
# $results = fread($fp, $this->maxlength);
|
|
|
535 |
$results = "";
|
|
|
536 |
while ( $data = fread($fp, $this->maxlength) ) {
|
|
|
537 |
$results .= $data;
|
|
|
538 |
if (
|
|
|
539 |
strlen($results) > $this->maxlength ) {
|
|
|
540 |
break;
|
|
|
541 |
}
|
|
|
542 |
}
|
|
|
543 |
|
|
|
544 |
// gunzip
|
|
|
545 |
if ( $is_gzipped ) {
|
|
|
546 |
// per http://www.php.net/manual/en/function.gzencode.php
|
|
|
547 |
$results = substr($results, 10);
|
|
|
548 |
$results = gzinflate($results);
|
|
|
549 |
}
|
|
|
550 |
|
|
|
551 |
if ($this->read_timeout > 0 && $this->_check_timeout($fp))
|
|
|
552 |
{
|
|
|
553 |
$this->status=-100;
|
|
|
554 |
return false;
|
|
|
555 |
}
|
|
|
556 |
|
|
|
557 |
// check if there is a a redirect meta tag
|
|
|
558 |
|
|
|
559 |
if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
|
|
|
560 |
{
|
|
|
561 |
$this->_redirectaddr = $this->_expandlinks($match[1],$URI);
|
|
|
562 |
}
|
|
|
563 |
|
|
|
564 |
// have we hit our frame depth and is there frame src to fetch?
|
|
|
565 |
if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
|
|
|
566 |
{
|
|
|
567 |
$this->results[] = $results;
|
|
|
568 |
for($x=0; $x<count($match[1]); $x++)
|
|
|
569 |
$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
|
|
|
570 |
}
|
|
|
571 |
// have we already fetched framed content?
|
|
|
572 |
elseif(is_array($this->results))
|
|
|
573 |
$this->results[] = $results;
|
|
|
574 |
// no framed content
|
|
|
575 |
else
|
|
|
576 |
$this->results = $results;
|
|
|
577 |
|
|
|
578 |
return true;
|
|
|
579 |
}
|
|
|
580 |
|
|
|
581 |
/*======================================================================*\
|
|
|
582 |
Function: _httpsrequest
|
|
|
583 |
Purpose: go get the https data from the server using curl
|
|
|
584 |
Input: $url the url to fetch
|
|
|
585 |
$URI the full URI
|
|
|
586 |
$body body contents to send if any (POST)
|
|
|
587 |
Output:
|
|
|
588 |
\*======================================================================*/
|
|
|
589 |
|
|
|
590 |
function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
|
|
|
591 |
{
|
|
|
592 |
if($this->passcookies && $this->_redirectaddr)
|
|
|
593 |
$this->setcookies();
|
|
|
594 |
|
|
|
595 |
$headers = array();
|
|
|
596 |
|
|
|
597 |
$URI_PARTS = parse_url($URI);
|
|
|
598 |
if(empty($url))
|
|
|
599 |
$url = "/";
|
|
|
600 |
// GET ... header not needed for curl
|
|
|
601 |
//$headers[] = $http_method." ".$url." ".$this->_httpversion;
|
|
|
602 |
if(!empty($this->agent))
|
|
|
603 |
$headers[] = "User-Agent: ".$this->agent;
|
|
|
604 |
if(!empty($this->host))
|
|
|
605 |
$headers[] = "Host: ".$this->host;
|
|
|
606 |
if(!empty($this->accept))
|
|
|
607 |
$headers[] = "Accept: ".$this->accept;
|
|
|
608 |
if(!empty($this->referer))
|
|
|
609 |
$headers[] = "Referer: ".$this->referer;
|
|
|
610 |
if(!empty($this->cookies))
|
|
|
611 |
{
|
|
|
612 |
if(!is_array($this->cookies))
|
|
|
613 |
$this->cookies = (array)$this->cookies;
|
|
|
614 |
|
|
|
615 |
reset($this->cookies);
|
|
|
616 |
if ( count($this->cookies) > 0 ) {
|
|
|
617 |
$cookie_str = 'Cookie: ';
|
|
|
618 |
foreach ( $this->cookies as $cookieKey => $cookieVal ) {
|
|
|
619 |
$cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
|
|
|
620 |
}
|
|
|
621 |
$headers[] = substr($cookie_str,0,-2);
|
|
|
622 |
}
|
|
|
623 |
}
|
|
|
624 |
if(!empty($this->rawheaders))
|
|
|
625 |
{
|
|
|
626 |
if(!is_array($this->rawheaders))
|
|
|
627 |
$this->rawheaders = (array)$this->rawheaders;
|
|
|
628 |
while(list($headerKey,$headerVal) = each($this->rawheaders))
|
|
|
629 |
$headers[] = $headerKey.": ".$headerVal;
|
|
|
630 |
}
|
|
|
631 |
if(!empty($content_type)) {
|
|
|
632 |
if ($content_type == "multipart/form-data")
|
|
|
633 |
$headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
|
|
|
634 |
else
|
|
|
635 |
$headers[] = "Content-type: $content_type";
|
|
|
636 |
}
|
|
|
637 |
if(!empty($body))
|
|
|
638 |
$headers[] = "Content-length: ".strlen($body);
|
|
|
639 |
if(!empty($this->user) || !empty($this->pass))
|
|
|
640 |
$headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
|
|
|
641 |
|
|
|
642 |
for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
|
|
|
643 |
$cmdline_params .= " -H \"".$headers[$curr_header]."\"";
|
|
|
644 |
}
|
|
|
645 |
|
|
|
646 |
if(!empty($body))
|
|
|
647 |
$cmdline_params .= " -d \"$body\"";
|
|
|
648 |
|
|
|
649 |
if($this->read_timeout > 0)
|
|
|
650 |
$cmdline_params .= " -m ".$this->read_timeout;
|
|
|
651 |
|
|
|
652 |
$headerfile = uniqid(time());
|
|
|
653 |
|
|
|
654 |
# accept self-signed certs
|
|
|
655 |
$cmdline_params .= " -k";
|
|
|
656 |
exec($this->curl_path." -D \"/tmp/$headerfile\"".escapeshellcmd($cmdline_params)." ".escapeshellcmd($URI),$results,$return);
|
|
|
657 |
|
|
|
658 |
if($return)
|
|
|
659 |
{
|
|
|
660 |
$this->error = "Error: cURL could not retrieve the document, error $return.";
|
|
|
661 |
return false;
|
|
|
662 |
}
|
|
|
663 |
|
|
|
664 |
|
|
|
665 |
$results = implode("\r\n",$results);
|
|
|
666 |
|
|
|
667 |
$result_headers = file("/tmp/$headerfile");
|
|
|
668 |
|
|
|
669 |
$this->_redirectaddr = false;
|
|
|
670 |
unset($this->headers);
|
|
|
671 |
|
|
|
672 |
for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
|
|
|
673 |
{
|
|
|
674 |
|
|
|
675 |
// if a header begins with Location: or URI:, set the redirect
|
|
|
676 |
if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
|
|
|
677 |
{
|
|
|
678 |
// get URL portion of the redirect
|
|
|
679 |
preg_match("/^(Location: |URI:)(.*)/",chop($result_headers[$currentHeader]),$matches);
|
|
|
680 |
// look for :// in the Location header to see if hostname is included
|
|
|
681 |
if(!preg_match("|\:\/\/|",$matches[2]))
|
|
|
682 |
{
|
|
|
683 |
// no host in the path, so prepend
|
|
|
684 |
$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
|
|
|
685 |
// eliminate double slash
|
|
|
686 |
if(!preg_match("|^/|",$matches[2]))
|
|
|
687 |
$this->_redirectaddr .= "/".$matches[2];
|
|
|
688 |
else
|
|
|
689 |
$this->_redirectaddr .= $matches[2];
|
|
|
690 |
}
|
|
|
691 |
else
|
|
|
692 |
$this->_redirectaddr = $matches[2];
|
|
|
693 |
}
|
|
|
694 |
|
|
|
695 |
if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
|
|
|
696 |
{
|
|
|
697 |
$this->response_code = $result_headers[$currentHeader];
|
|
|
698 |
if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$this->response_code, $match))
|
|
|
699 |
{
|
|
|
700 |
$this->status= $match[1];
|
|
|
701 |
}
|
|
|
702 |
}
|
|
|
703 |
$this->headers[] = $result_headers[$currentHeader];
|
|
|
704 |
}
|
|
|
705 |
|
|
|
706 |
// check if there is a a redirect meta tag
|
|
|
707 |
|
|
|
708 |
if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
|
|
|
709 |
{
|
|
|
710 |
$this->_redirectaddr = $this->_expandlinks($match[1],$URI);
|
|
|
711 |
}
|
|
|
712 |
|
|
|
713 |
// have we hit our frame depth and is there frame src to fetch?
|
|
|
714 |
if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
|
|
|
715 |
{
|
|
|
716 |
$this->results[] = $results;
|
|
|
717 |
for($x=0; $x<count($match[1]); $x++)
|
|
|
718 |
$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
|
|
|
719 |
}
|
|
|
720 |
// have we already fetched framed content?
|
|
|
721 |
elseif(is_array($this->results))
|
|
|
722 |
$this->results[] = $results;
|
|
|
723 |
// no framed content
|
|
|
724 |
else
|
|
|
725 |
$this->results = $results;
|
|
|
726 |
|
|
|
727 |
unlink("/tmp/$headerfile");
|
|
|
728 |
|
|
|
729 |
return true;
|
|
|
730 |
}
|
|
|
731 |
|
|
|
732 |
/*======================================================================*\
|
|
|
733 |
Function: setcookies()
|
|
|
734 |
Purpose: set cookies for a redirection
|
|
|
735 |
\*======================================================================*/
|
|
|
736 |
|
|
|
737 |
function setcookies()
|
|
|
738 |
{
|
|
|
739 |
for($x=0; $x<count($this->headers); $x++)
|
|
|
740 |
{
|
|
|
741 |
if(preg_match("/^set-cookie:[\s]+([^=]+)=([^;]+)/i", $this->headers[$x],$match))
|
|
|
742 |
$this->cookies[$match[1]] = $match[2];
|
|
|
743 |
}
|
|
|
744 |
}
|
|
|
745 |
|
|
|
746 |
|
|
|
747 |
/*======================================================================*\
|
|
|
748 |
Function: _check_timeout
|
|
|
749 |
Purpose: checks whether timeout has occurred
|
|
|
750 |
Input: $fp file pointer
|
|
|
751 |
\*======================================================================*/
|
|
|
752 |
|
|
|
753 |
function _check_timeout($fp)
|
|
|
754 |
{
|
|
|
755 |
if ($this->read_timeout > 0) {
|
|
|
756 |
$fp_status = socket_get_status($fp);
|
|
|
757 |
if ($fp_status["timed_out"]) {
|
|
|
758 |
$this->timed_out = true;
|
|
|
759 |
return true;
|
|
|
760 |
}
|
|
|
761 |
}
|
|
|
762 |
return false;
|
|
|
763 |
}
|
|
|
764 |
|
|
|
765 |
/*======================================================================*\
|
|
|
766 |
Function: _connect
|
|
|
767 |
Purpose: make a socket connection
|
|
|
768 |
Input: $fp file pointer
|
|
|
769 |
\*======================================================================*/
|
|
|
770 |
|
|
|
771 |
function _connect(&$fp)
|
|
|
772 |
{
|
|
|
773 |
if(!empty($this->proxy_host) && !empty($this->proxy_port))
|
|
|
774 |
{
|
|
|
775 |
$this->_isproxy = true;
|
|
|
776 |
$host = $this->proxy_host;
|
|
|
777 |
$port = $this->proxy_port;
|
|
|
778 |
}
|
|
|
779 |
else
|
|
|
780 |
{
|
|
|
781 |
$host = $this->host;
|
|
|
782 |
$port = $this->port;
|
|
|
783 |
}
|
|
|
784 |
|
|
|
785 |
$this->status = 0;
|
|
|
786 |
|
|
|
787 |
if($fp = fsockopen(
|
|
|
788 |
$host,
|
|
|
789 |
$port,
|
|
|
790 |
$errno,
|
|
|
791 |
$errstr,
|
|
|
792 |
$this->_fp_timeout
|
|
|
793 |
))
|
|
|
794 |
{
|
|
|
795 |
// socket connection succeeded
|
|
|
796 |
|
|
|
797 |
return true;
|
|
|
798 |
}
|
|
|
799 |
else
|
|
|
800 |
{
|
|
|
801 |
// socket connection failed
|
|
|
802 |
$this->status = $errno;
|
|
|
803 |
switch($errno)
|
|
|
804 |
{
|
|
|
805 |
case -3:
|
|
|
806 |
$this->error="socket creation failed (-3)";
|
|
|
807 |
case -4:
|
|
|
808 |
$this->error="dns lookup failure (-4)";
|
|
|
809 |
case -5:
|
|
|
810 |
$this->error="connection refused or timed out (-5)";
|
|
|
811 |
default:
|
|
|
812 |
$this->error="connection failed (".$errno.")";
|
|
|
813 |
}
|
|
|
814 |
return false;
|
|
|
815 |
}
|
|
|
816 |
}
|
|
|
817 |
/*======================================================================*\
|
|
|
818 |
Function: _disconnect
|
|
|
819 |
Purpose: disconnect a socket connection
|
|
|
820 |
Input: $fp file pointer
|
|
|
821 |
\*======================================================================*/
|
|
|
822 |
|
|
|
823 |
function _disconnect($fp)
|
|
|
824 |
{
|
|
|
825 |
return(fclose($fp));
|
|
|
826 |
}
|
|
|
827 |
|
|
|
828 |
|
|
|
829 |
/*======================================================================*\
|
|
|
830 |
Function: _prepare_post_body
|
|
|
831 |
Purpose: Prepare post body according to encoding type
|
|
|
832 |
Input: $formvars - form variables
|
|
|
833 |
$formfiles - form upload files
|
|
|
834 |
Output: post body
|
|
|
835 |
\*======================================================================*/
|
|
|
836 |
|
|
|
837 |
function _prepare_post_body($formvars, $formfiles)
|
|
|
838 |
{
|
|
|
839 |
settype($formvars, "array");
|
|
|
840 |
settype($formfiles, "array");
|
|
|
841 |
|
|
|
842 |
if (count($formvars) == 0 && count($formfiles) == 0)
|
|
|
843 |
return;
|
|
|
844 |
|
|
|
845 |
switch ($this->_submit_type) {
|
|
|
846 |
case "application/x-www-form-urlencoded":
|
|
|
847 |
reset($formvars);
|
|
|
848 |
while(list($key,$val) = each($formvars)) {
|
|
|
849 |
if (is_array($val) || is_object($val)) {
|
|
|
850 |
while (list($cur_key, $cur_val) = each($val)) {
|
|
|
851 |
$postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
|
|
|
852 |
}
|
|
|
853 |
} else
|
|
|
854 |
$postdata .= urlencode($key)."=".urlencode($val)."&";
|
|
|
855 |
}
|
|
|
856 |
break;
|
|
|
857 |
|
|
|
858 |
case "multipart/form-data":
|
|
|
859 |
$this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
|
|
|
860 |
|
|
|
861 |
reset($formvars);
|
|
|
862 |
while(list($key,$val) = each($formvars)) {
|
|
|
863 |
if (is_array($val) || is_object($val)) {
|
|
|
864 |
while (list($cur_key, $cur_val) = each($val)) {
|
|
|
865 |
$postdata .= "--".$this->_mime_boundary."\r\n";
|
|
|
866 |
$postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
|
|
|
867 |
$postdata .= "$cur_val\r\n";
|
|
|
868 |
}
|
|
|
869 |
} else {
|
|
|
870 |
$postdata .= "--".$this->_mime_boundary."\r\n";
|
|
|
871 |
$postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
|
|
|
872 |
$postdata .= "$val\r\n";
|
|
|
873 |
}
|
|
|
874 |
}
|
|
|
875 |
|
|
|
876 |
reset($formfiles);
|
|
|
877 |
while (list($field_name, $file_names) = each($formfiles)) {
|
|
|
878 |
settype($file_names, "array");
|
|
|
879 |
while (list(, $file_name) = each($file_names)) {
|
|
|
880 |
if (!is_readable($file_name)) continue;
|
|
|
881 |
|
|
|
882 |
$fp = fopen($file_name, "r");
|
|
|
883 |
$file_content = fread($fp, filesize($file_name));
|
|
|
884 |
fclose($fp);
|
|
|
885 |
$base_name = basename($file_name);
|
|
|
886 |
|
|
|
887 |
$postdata .= "--".$this->_mime_boundary."\r\n";
|
|
|
888 |
$postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
|
|
|
889 |
$postdata .= "$file_content\r\n";
|
|
|
890 |
}
|
|
|
891 |
}
|
|
|
892 |
$postdata .= "--".$this->_mime_boundary."--\r\n";
|
|
|
893 |
break;
|
|
|
894 |
}
|
|
|
895 |
|
|
|
896 |
return $postdata;
|
|
|
897 |
}
|
|
|
898 |
}
|
|
|
899 |
|
|
|
900 |
?>
|