Merge pull request #624 from julienCXX/pr-curl-http-fetch
Added (and set as default) a cURL-based method for fetching HTTP content
This commit is contained in:
commit
d0d3623172
1 changed files with 150 additions and 10 deletions
|
@ -1,6 +1,7 @@
|
|||
<?php
|
||||
/**
|
||||
* GET an HTTP URL to retrieve its content
|
||||
* Uses the cURL library or a fallback method
|
||||
*
|
||||
* @param string $url URL to get (http://...)
|
||||
* @param int $timeout network timeout (in seconds)
|
||||
|
@ -20,9 +21,13 @@
|
|||
* echo 'There was an error: '.htmlspecialchars($headers[0]);
|
||||
* }
|
||||
*
|
||||
* @see http://php.net/manual/en/function.file-get-contents.php
|
||||
* @see http://php.net/manual/en/function.stream-context-create.php
|
||||
* @see http://php.net/manual/en/function.get-headers.php
|
||||
* @see https://secure.php.net/manual/en/ref.curl.php
|
||||
* @see https://secure.php.net/manual/en/functions.anonymous.php
|
||||
* @see https://secure.php.net/manual/en/function.preg-split.php
|
||||
* @see https://secure.php.net/manual/en/function.explode.php
|
||||
* @see http://stackoverflow.com/q/17641073
|
||||
* @see http://stackoverflow.com/q/9183178
|
||||
* @see http://stackoverflow.com/q/1462720
|
||||
*/
|
||||
function get_http_response($url, $timeout = 30, $maxBytes = 4194304)
|
||||
{
|
||||
|
@ -33,25 +38,160 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304)
|
|||
return array(array(0 => 'Invalid HTTP Url'), false);
|
||||
}
|
||||
|
||||
$userAgent =
|
||||
'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:45.0)'
|
||||
. ' Gecko/20100101 Firefox/45.0';
|
||||
$acceptLanguage =
|
||||
substr(setlocale(LC_COLLATE, 0), 0, 2) . ',en-US;q=0.7,en;q=0.3';
|
||||
$maxRedirs = 3;
|
||||
|
||||
if (!function_exists('curl_init')) {
|
||||
return get_http_response_fallback(
|
||||
$cleanUrl,
|
||||
$timeout,
|
||||
$maxBytes,
|
||||
$userAgent,
|
||||
$acceptLanguage,
|
||||
$maxRedirs
|
||||
);
|
||||
}
|
||||
|
||||
$ch = curl_init($cleanUrl);
|
||||
if ($ch === false) {
|
||||
return array(array(0 => 'curl_init() error'), false);
|
||||
}
|
||||
|
||||
// General cURL settings
|
||||
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
|
||||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
||||
curl_setopt($ch, CURLOPT_HEADER, true);
|
||||
curl_setopt(
|
||||
$ch,
|
||||
CURLOPT_HTTPHEADER,
|
||||
array('Accept-Language: ' . $acceptLanguage)
|
||||
);
|
||||
curl_setopt($ch, CURLOPT_MAXREDIRS, $maxRedirs);
|
||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
|
||||
curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
|
||||
|
||||
// Max download size management
|
||||
curl_setopt($ch, CURLOPT_BUFFERSIZE, 1024);
|
||||
curl_setopt($ch, CURLOPT_NOPROGRESS, false);
|
||||
curl_setopt($ch, CURLOPT_PROGRESSFUNCTION,
|
||||
function($arg0, $arg1, $arg2, $arg3, $arg4 = 0) use ($maxBytes)
|
||||
{
|
||||
if (version_compare(phpversion(), '5.5', '<')) {
|
||||
// PHP version lower than 5.5
|
||||
// Callback has 4 arguments
|
||||
$downloaded = $arg1;
|
||||
} else {
|
||||
// Callback has 5 arguments
|
||||
$downloaded = $arg2;
|
||||
}
|
||||
// Non-zero return stops downloading
|
||||
return ($downloaded > $maxBytes) ? 1 : 0;
|
||||
}
|
||||
);
|
||||
|
||||
$response = curl_exec($ch);
|
||||
$errorNo = curl_errno($ch);
|
||||
$errorStr = curl_error($ch);
|
||||
$headSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
|
||||
curl_close($ch);
|
||||
|
||||
if ($response === false) {
|
||||
if ($errorNo == CURLE_COULDNT_RESOLVE_HOST) {
|
||||
/*
|
||||
* Workaround to match fallback method behaviour
|
||||
* Removing this would require updating
|
||||
* GetHttpUrlTest::testGetInvalidRemoteUrl()
|
||||
*/
|
||||
return array(false, false);
|
||||
}
|
||||
return array(array(0 => 'curl_exec() error: ' . $errorStr), false);
|
||||
}
|
||||
|
||||
// Formatting output like the fallback method
|
||||
$rawHeaders = substr($response, 0, $headSize);
|
||||
|
||||
// Keep only headers from latest redirection
|
||||
$rawHeadersArrayRedirs = explode("\r\n\r\n", trim($rawHeaders));
|
||||
$rawHeadersLastRedir = end($rawHeadersArrayRedirs);
|
||||
|
||||
$content = substr($response, $headSize);
|
||||
$headers = array();
|
||||
foreach (preg_split('~[\r\n]+~', $rawHeadersLastRedir) as $line) {
|
||||
if (empty($line) or ctype_space($line)) {
|
||||
continue;
|
||||
}
|
||||
$splitLine = explode(': ', $line, 2);
|
||||
if (count($splitLine) > 1) {
|
||||
$key = $splitLine[0];
|
||||
$value = $splitLine[1];
|
||||
if (array_key_exists($key, $headers)) {
|
||||
if (!is_array($headers[$key])) {
|
||||
$headers[$key] = array(0 => $headers[$key]);
|
||||
}
|
||||
$headers[$key][] = $value;
|
||||
} else {
|
||||
$headers[$key] = $value;
|
||||
}
|
||||
} else {
|
||||
$headers[] = $splitLine[0];
|
||||
}
|
||||
}
|
||||
|
||||
return array($headers, $content);
|
||||
}
|
||||
|
||||
/**
|
||||
* GET an HTTP URL to retrieve its content (fallback method)
|
||||
*
|
||||
* @param string $cleanUrl URL to get (http://... valid and in ASCII form)
|
||||
* @param int $timeout network timeout (in seconds)
|
||||
* @param int $maxBytes maximum downloaded bytes
|
||||
* @param string $userAgent "User-Agent" header
|
||||
* @param string $acceptLanguage "Accept-Language" header
|
||||
* @param int $maxRedr maximum amount of redirections followed
|
||||
*
|
||||
* @return array HTTP response headers, downloaded content
|
||||
*
|
||||
* Output format:
|
||||
* [0] = associative array containing HTTP response headers
|
||||
* [1] = URL content (downloaded data)
|
||||
*
|
||||
* @see http://php.net/manual/en/function.file-get-contents.php
|
||||
* @see http://php.net/manual/en/function.stream-context-create.php
|
||||
* @see http://php.net/manual/en/function.get-headers.php
|
||||
*/
|
||||
function get_http_response_fallback(
|
||||
$cleanUrl,
|
||||
$timeout,
|
||||
$maxBytes,
|
||||
$userAgent,
|
||||
$acceptLanguage,
|
||||
$maxRedr
|
||||
) {
|
||||
$options = array(
|
||||
'http' => array(
|
||||
'method' => 'GET',
|
||||
'timeout' => $timeout,
|
||||
'user_agent' => 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:45.0)'
|
||||
.' Gecko/20100101 Firefox/45.0',
|
||||
'accept_language' => substr(setlocale(LC_COLLATE, 0), 0, 2) . ',en-US;q=0.7,en;q=0.3',
|
||||
'user_agent' => $userAgent,
|
||||
'header' => "Accept: */*\r\n"
|
||||
. 'Accept-Language: ' . $acceptLanguage
|
||||
)
|
||||
);
|
||||
|
||||
stream_context_set_default($options);
|
||||
list($headers, $finalUrl) = get_redirected_headers($cleanUrl);
|
||||
list($headers, $finalUrl) = get_redirected_headers($cleanUrl, $maxRedr);
|
||||
if (! $headers || strpos($headers[0], '200 OK') === false) {
|
||||
$options['http']['request_fulluri'] = true;
|
||||
stream_context_set_default($options);
|
||||
list($headers, $finalUrl) = get_redirected_headers($cleanUrl);
|
||||
list($headers, $finalUrl) = get_redirected_headers($cleanUrl, $maxRedr);
|
||||
}
|
||||
|
||||
if (! $headers || strpos($headers[0], '200 OK') === false) {
|
||||
if (! $headers) {
|
||||
return array($headers, false);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue