HTTP: move utils to a proper file, add tests
Relates to #333 Modifications: - move HTTP utils to 'application/HttpUtils.php' - simplify logic - replace 'http_parse_headers_shaarli' by built-in 'get_headers()' - remove superfluous '$status' parameter (provided by the HTTP headers) - apply coding conventions - add test coverage (unitary only) Signed-off-by: VirtualTam <virtualtam@flibidi.net>
This commit is contained in:
parent
f5d6b19b73
commit
451314eb48
3 changed files with 122 additions and 78 deletions
52
application/HttpUtils.php
Normal file
52
application/HttpUtils.php
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* GET an HTTP URL to retrieve its content
|
||||||
|
*
|
||||||
|
* @param string $url URL to get (http://...)
|
||||||
|
* @param int $timeout network timeout (in seconds)
|
||||||
|
* @param int $maxBytes maximum downloaded bytes (default: 4 MiB)
|
||||||
|
*
|
||||||
|
* @return array HTTP response headers, downloaded content
|
||||||
|
*
|
||||||
|
* Output format:
|
||||||
|
* [0] = associative array containing HTTP response headers
|
||||||
|
* [1] = URL content (downloaded data)
|
||||||
|
*
|
||||||
|
* Example:
|
||||||
|
* list($headers, $data) = get_http_url('http://sebauvage.net/');
|
||||||
|
* if (strpos($headers[0], '200 OK') !== false) {
|
||||||
|
* echo 'Data type: '.htmlspecialchars($headers['Content-Type']);
|
||||||
|
* } else {
|
||||||
|
* echo 'There was an error: '.htmlspecialchars($headers[0]);
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* @see http://php.net/manual/en/function.file-get-contents.php
|
||||||
|
* @see http://php.net/manual/en/function.stream-context-create.php
|
||||||
|
* @see http://php.net/manual/en/function.get-headers.php
|
||||||
|
*/
|
||||||
|
function get_http_url($url, $timeout = 30, $maxBytes = 4194304)
|
||||||
|
{
|
||||||
|
$options = array(
|
||||||
|
'http' => array(
|
||||||
|
'method' => 'GET',
|
||||||
|
'timeout' => $timeout,
|
||||||
|
'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0)'
|
||||||
|
.' Gecko/20100101 Firefox/23.0'
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
$context = stream_context_create($options);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// TODO: catch Exception in calling code (thumbnailer)
|
||||||
|
$content = file_get_contents($url, false, $context, -1, $maxBytes);
|
||||||
|
} catch (Exception $exc) {
|
||||||
|
return array(array(0 => 'HTTP Error'), $exc->getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!$content) {
|
||||||
|
return array(array(0 => 'HTTP Error'), '');
|
||||||
|
}
|
||||||
|
|
||||||
|
return array(get_headers($url, 1), $content);
|
||||||
|
}
|
110
index.php
110
index.php
|
@ -59,6 +59,7 @@
|
||||||
// Shaarli library
|
// Shaarli library
|
||||||
require_once 'application/Cache.php';
|
require_once 'application/Cache.php';
|
||||||
require_once 'application/CachedPage.php';
|
require_once 'application/CachedPage.php';
|
||||||
|
require_once 'application/HttpUtils.php';
|
||||||
require_once 'application/LinkDB.php';
|
require_once 'application/LinkDB.php';
|
||||||
require_once 'application/TimeZone.php';
|
require_once 'application/TimeZone.php';
|
||||||
require_once 'application/Url.php';
|
require_once 'application/Url.php';
|
||||||
|
@ -209,9 +210,11 @@ function checkUpdate()
|
||||||
// Get latest version number at most once a day.
|
// Get latest version number at most once a day.
|
||||||
if (!is_file($GLOBALS['config']['UPDATECHECK_FILENAME']) || (filemtime($GLOBALS['config']['UPDATECHECK_FILENAME'])<time()-($GLOBALS['config']['UPDATECHECK_INTERVAL'])))
|
if (!is_file($GLOBALS['config']['UPDATECHECK_FILENAME']) || (filemtime($GLOBALS['config']['UPDATECHECK_FILENAME'])<time()-($GLOBALS['config']['UPDATECHECK_INTERVAL'])))
|
||||||
{
|
{
|
||||||
$version=shaarli_version;
|
$version = shaarli_version;
|
||||||
list($httpstatus,$headers,$data) = getHTTP('https://raw.githubusercontent.com/shaarli/Shaarli/master/shaarli_version.php',2);
|
list($headers, $data) = get_http_url('https://raw.githubusercontent.com/shaarli/Shaarli/master/shaarli_version.php', 2);
|
||||||
if (strpos($httpstatus,'200 OK')!==false) $version=str_replace(' */ ?>','',str_replace('<?php /* ','',$data));
|
if (strpos($headers[0], '200 OK') !== false) {
|
||||||
|
$version = str_replace(' */ ?>', '', str_replace('<?php /* ', '', $data));
|
||||||
|
}
|
||||||
// If failed, never mind. We don't want to bother the user with that.
|
// If failed, never mind. We don't want to bother the user with that.
|
||||||
file_put_contents($GLOBALS['config']['UPDATECHECK_FILENAME'],$version); // touch file date
|
file_put_contents($GLOBALS['config']['UPDATECHECK_FILENAME'],$version); // touch file date
|
||||||
}
|
}
|
||||||
|
@ -535,53 +538,6 @@ function linkdate2iso8601($linkdate)
|
||||||
return date('c',linkdate2timestamp($linkdate)); // 'c' is for ISO 8601 date format.
|
return date('c',linkdate2timestamp($linkdate)); // 'c' is for ISO 8601 date format.
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse HTTP response headers and return an associative array.
|
|
||||||
function http_parse_headers_shaarli( $headers )
|
|
||||||
{
|
|
||||||
$res=array();
|
|
||||||
foreach($headers as $header)
|
|
||||||
{
|
|
||||||
$i = strpos($header,': ');
|
|
||||||
if ($i!==false)
|
|
||||||
{
|
|
||||||
$key=substr($header,0,$i);
|
|
||||||
$value=substr($header,$i+2,strlen($header)-$i-2);
|
|
||||||
$res[$key]=$value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return $res;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* GET an URL.
|
|
||||||
Input: $url : URL to get (http://...)
|
|
||||||
$timeout : Network timeout (will wait this many seconds for an anwser before giving up).
|
|
||||||
Output: An array. [0] = HTTP status message (e.g. "HTTP/1.1 200 OK") or error message
|
|
||||||
[1] = associative array containing HTTP response headers (e.g. echo getHTTP($url)[1]['Content-Type'])
|
|
||||||
[2] = data
|
|
||||||
Example: list($httpstatus,$headers,$data) = getHTTP('http://sebauvage.net/');
|
|
||||||
if (strpos($httpstatus,'200 OK')!==false)
|
|
||||||
echo 'Data type: '.htmlspecialchars($headers['Content-Type']);
|
|
||||||
else
|
|
||||||
echo 'There was an error: '.htmlspecialchars($httpstatus)
|
|
||||||
*/
|
|
||||||
function getHTTP($url,$timeout=30)
|
|
||||||
{
|
|
||||||
try
|
|
||||||
{
|
|
||||||
$options = array('http'=>array('method'=>'GET','timeout' => $timeout, 'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0) Gecko/20100101 Firefox/23.0')); // Force network timeout
|
|
||||||
$context = stream_context_create($options);
|
|
||||||
$data=file_get_contents($url,false,$context,-1, 4000000); // We download at most 4 Mb from source.
|
|
||||||
if (!$data) { return array('HTTP Error',array(),''); }
|
|
||||||
$httpStatus=$http_response_header[0]; // e.g. "HTTP/1.1 200 OK"
|
|
||||||
$responseHeaders=http_parse_headers_shaarli($http_response_header);
|
|
||||||
return array($httpStatus,$responseHeaders,$data);
|
|
||||||
}
|
|
||||||
catch (Exception $e) // getHTTP *can* fail silently (we don't care if the title cannot be fetched)
|
|
||||||
{
|
|
||||||
return array($e->getMessage(),'','');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract title from an HTML document.
|
// Extract title from an HTML document.
|
||||||
// (Returns an empty string if not found.)
|
// (Returns an empty string if not found.)
|
||||||
function html_extract_title($html)
|
function html_extract_title($html)
|
||||||
|
@ -1516,9 +1472,10 @@ function renderPage()
|
||||||
$private = (!empty($_GET['private']) && $_GET['private'] === "1" ? 1 : 0);
|
$private = (!empty($_GET['private']) && $_GET['private'] === "1" ? 1 : 0);
|
||||||
// If this is an HTTP(S) link, we try go get the page to extract the title (otherwise we will to straight to the edit form.)
|
// If this is an HTTP(S) link, we try go get the page to extract the title (otherwise we will to straight to the edit form.)
|
||||||
if (empty($title) && strpos($url->getScheme(), 'http') !== false) {
|
if (empty($title) && strpos($url->getScheme(), 'http') !== false) {
|
||||||
list($status,$headers,$data) = getHTTP($url,4); // Short timeout to keep the application responsive.
|
// Short timeout to keep the application responsive
|
||||||
|
list($headers, $data) = get_http_url($url, 4);
|
||||||
// FIXME: Decode charset according to specified in either 1) HTTP response headers or 2) <head> in html
|
// FIXME: Decode charset according to specified in either 1) HTTP response headers or 2) <head> in html
|
||||||
if (strpos($status,'200 OK')!==false) {
|
if (strpos($headers[0], '200 OK') !== false) {
|
||||||
// Look for charset in html header.
|
// Look for charset in html header.
|
||||||
preg_match('#<meta .*charset=.*>#Usi', $data, $meta);
|
preg_match('#<meta .*charset=.*>#Usi', $data, $meta);
|
||||||
|
|
||||||
|
@ -2186,8 +2143,9 @@ function genThumbnail()
|
||||||
}
|
}
|
||||||
else // This is a flickr page (html)
|
else // This is a flickr page (html)
|
||||||
{
|
{
|
||||||
list($httpstatus,$headers,$data) = getHTTP($url,20); // Get the flickr html page.
|
// Get the flickr html page.
|
||||||
if (strpos($httpstatus,'200 OK')!==false)
|
list($headers, $data) = get_http_url($url, 20);
|
||||||
|
if (strpos($headers[0], '200 OK') !== false)
|
||||||
{
|
{
|
||||||
// flickr now nicely provides the URL of the thumbnail in each flickr page.
|
// flickr now nicely provides the URL of the thumbnail in each flickr page.
|
||||||
preg_match('!<link rel=\"image_src\" href=\"(.+?)\"!',$data,$matches);
|
preg_match('!<link rel=\"image_src\" href=\"(.+?)\"!',$data,$matches);
|
||||||
|
@ -2206,9 +2164,9 @@ function genThumbnail()
|
||||||
|
|
||||||
if ($imageurl!='')
|
if ($imageurl!='')
|
||||||
{ // Let's download the image.
|
{ // Let's download the image.
|
||||||
list($httpstatus,$headers,$data) = getHTTP($imageurl,10); // Image is 240x120, so 10 seconds to download should be enough.
|
// Image is 240x120, so 10 seconds to download should be enough.
|
||||||
if (strpos($httpstatus,'200 OK')!==false)
|
list($headers, $data) = get_http_url($imageurl, 10);
|
||||||
{
|
if (strpos($headers[0], '200 OK') !== false) {
|
||||||
file_put_contents($GLOBALS['config']['CACHEDIR'].'/'.$thumbname,$data); // Save image to cache.
|
file_put_contents($GLOBALS['config']['CACHEDIR'].'/'.$thumbname,$data); // Save image to cache.
|
||||||
header('Content-Type: image/jpeg');
|
header('Content-Type: image/jpeg');
|
||||||
echo $data;
|
echo $data;
|
||||||
|
@ -2222,15 +2180,13 @@ function genThumbnail()
|
||||||
// This is more complex: we have to perform a HTTP request, then parse the result.
|
// This is more complex: we have to perform a HTTP request, then parse the result.
|
||||||
// Maybe we should deport this to JavaScript ? Example: http://stackoverflow.com/questions/1361149/get-img-thumbnails-from-vimeo/4285098#4285098
|
// Maybe we should deport this to JavaScript ? Example: http://stackoverflow.com/questions/1361149/get-img-thumbnails-from-vimeo/4285098#4285098
|
||||||
$vid = substr(parse_url($url,PHP_URL_PATH),1);
|
$vid = substr(parse_url($url,PHP_URL_PATH),1);
|
||||||
list($httpstatus,$headers,$data) = getHTTP('https://vimeo.com/api/v2/video/'.escape($vid).'.php',5);
|
list($headers, $data) = get_http_url('https://vimeo.com/api/v2/video/'.escape($vid).'.php', 5);
|
||||||
if (strpos($httpstatus,'200 OK')!==false)
|
if (strpos($headers[0], '200 OK') !== false) {
|
||||||
{
|
|
||||||
$t = unserialize($data);
|
$t = unserialize($data);
|
||||||
$imageurl = $t[0]['thumbnail_medium'];
|
$imageurl = $t[0]['thumbnail_medium'];
|
||||||
// Then we download the image and serve it to our client.
|
// Then we download the image and serve it to our client.
|
||||||
list($httpstatus,$headers,$data) = getHTTP($imageurl,10);
|
list($headers, $data) = get_http_url($imageurl, 10);
|
||||||
if (strpos($httpstatus,'200 OK')!==false)
|
if (strpos($headers[0], '200 OK') !== false) {
|
||||||
{
|
|
||||||
file_put_contents($GLOBALS['config']['CACHEDIR'].'/'.$thumbname,$data); // Save image to cache.
|
file_put_contents($GLOBALS['config']['CACHEDIR'].'/'.$thumbname,$data); // Save image to cache.
|
||||||
header('Content-Type: image/jpeg');
|
header('Content-Type: image/jpeg');
|
||||||
echo $data;
|
echo $data;
|
||||||
|
@ -2244,17 +2200,16 @@ function genThumbnail()
|
||||||
// The thumbnail for TED talks is located in the <link rel="image_src" [...]> tag on that page
|
// The thumbnail for TED talks is located in the <link rel="image_src" [...]> tag on that page
|
||||||
// http://www.ted.com/talks/mikko_hypponen_fighting_viruses_defending_the_net.html
|
// http://www.ted.com/talks/mikko_hypponen_fighting_viruses_defending_the_net.html
|
||||||
// <link rel="image_src" href="http://images.ted.com/images/ted/28bced335898ba54d4441809c5b1112ffaf36781_389x292.jpg" />
|
// <link rel="image_src" href="http://images.ted.com/images/ted/28bced335898ba54d4441809c5b1112ffaf36781_389x292.jpg" />
|
||||||
list($httpstatus,$headers,$data) = getHTTP($url,5);
|
list($headers, $data) = get_http_url($url, 5);
|
||||||
if (strpos($httpstatus,'200 OK')!==false)
|
if (strpos($headers[0], '200 OK') !== false) {
|
||||||
{
|
|
||||||
// Extract the link to the thumbnail
|
// Extract the link to the thumbnail
|
||||||
preg_match('!link rel="image_src" href="(http://images.ted.com/images/ted/.+_\d+x\d+\.jpg)"!',$data,$matches);
|
preg_match('!link rel="image_src" href="(http://images.ted.com/images/ted/.+_\d+x\d+\.jpg)"!',$data,$matches);
|
||||||
if (!empty($matches[1]))
|
if (!empty($matches[1]))
|
||||||
{ // Let's download the image.
|
{ // Let's download the image.
|
||||||
$imageurl=$matches[1];
|
$imageurl=$matches[1];
|
||||||
list($httpstatus,$headers,$data) = getHTTP($imageurl,20); // No control on image size, so wait long enough.
|
// No control on image size, so wait long enough
|
||||||
if (strpos($httpstatus,'200 OK')!==false)
|
list($headers, $data) = get_http_url($imageurl, 20);
|
||||||
{
|
if (strpos($headers[0], '200 OK') !== false) {
|
||||||
$filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname;
|
$filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname;
|
||||||
file_put_contents($filepath,$data); // Save image to cache.
|
file_put_contents($filepath,$data); // Save image to cache.
|
||||||
if (resizeImage($filepath))
|
if (resizeImage($filepath))
|
||||||
|
@ -2273,17 +2228,16 @@ function genThumbnail()
|
||||||
// There is no thumbnail available for xkcd comics, so download the whole image and resize it.
|
// There is no thumbnail available for xkcd comics, so download the whole image and resize it.
|
||||||
// http://xkcd.com/327/
|
// http://xkcd.com/327/
|
||||||
// <img src="http://imgs.xkcd.com/comics/exploits_of_a_mom.png" title="<BLABLA>" alt="<BLABLA>" />
|
// <img src="http://imgs.xkcd.com/comics/exploits_of_a_mom.png" title="<BLABLA>" alt="<BLABLA>" />
|
||||||
list($httpstatus,$headers,$data) = getHTTP($url,5);
|
list($headers, $data) = get_http_url($url, 5);
|
||||||
if (strpos($httpstatus,'200 OK')!==false)
|
if (strpos($headers[0], '200 OK') !== false) {
|
||||||
{
|
|
||||||
// Extract the link to the thumbnail
|
// Extract the link to the thumbnail
|
||||||
preg_match('!<img src="(http://imgs.xkcd.com/comics/.*)" title="[^s]!',$data,$matches);
|
preg_match('!<img src="(http://imgs.xkcd.com/comics/.*)" title="[^s]!',$data,$matches);
|
||||||
if (!empty($matches[1]))
|
if (!empty($matches[1]))
|
||||||
{ // Let's download the image.
|
{ // Let's download the image.
|
||||||
$imageurl=$matches[1];
|
$imageurl=$matches[1];
|
||||||
list($httpstatus,$headers,$data) = getHTTP($imageurl,20); // No control on image size, so wait long enough.
|
// No control on image size, so wait long enough
|
||||||
if (strpos($httpstatus,'200 OK')!==false)
|
list($headers, $data) = get_http_url($imageurl, 20);
|
||||||
{
|
if (strpos($headers[0], '200 OK') !== false) {
|
||||||
$filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname;
|
$filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname;
|
||||||
file_put_contents($filepath,$data); // Save image to cache.
|
file_put_contents($filepath,$data); // Save image to cache.
|
||||||
if (resizeImage($filepath))
|
if (resizeImage($filepath))
|
||||||
|
@ -2300,9 +2254,9 @@ function genThumbnail()
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// For all other domains, we try to download the image and make a thumbnail.
|
// For all other domains, we try to download the image and make a thumbnail.
|
||||||
list($httpstatus,$headers,$data) = getHTTP($url,30); // We allow 30 seconds max to download (and downloads are limited to 4 Mb)
|
// We allow 30 seconds max to download (and downloads are limited to 4 Mb)
|
||||||
if (strpos($httpstatus,'200 OK')!==false)
|
list($headers, $data) = get_http_url($url, 30);
|
||||||
{
|
if (strpos($headers[0], '200 OK') !== false) {
|
||||||
$filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname;
|
$filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname;
|
||||||
file_put_contents($filepath,$data); // Save image to cache.
|
file_put_contents($filepath,$data); // Save image to cache.
|
||||||
if (resizeImage($filepath))
|
if (resizeImage($filepath))
|
||||||
|
|
38
tests/HttpUtilsTest.php
Normal file
38
tests/HttpUtilsTest.php
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* HttpUtils' tests
|
||||||
|
*/
|
||||||
|
|
||||||
|
require_once 'application/HttpUtils.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unitary tests for get_http_url()
|
||||||
|
*/
|
||||||
|
class GetHttpUrlTest extends PHPUnit_Framework_TestCase
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Get an invalid local URL
|
||||||
|
*/
|
||||||
|
public function testGetInvalidLocalUrl()
|
||||||
|
{
|
||||||
|
list($headers, $content) = get_http_url('/non/existent', 1);
|
||||||
|
$this->assertEquals('HTTP Error', $headers[0]);
|
||||||
|
$this->assertRegexp(
|
||||||
|
'/failed to open stream: No such file or directory/',
|
||||||
|
$content
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get an invalid remote URL
|
||||||
|
*/
|
||||||
|
public function testGetInvalidRemoteUrl()
|
||||||
|
{
|
||||||
|
list($headers, $content) = get_http_url('http://non.existent', 1);
|
||||||
|
$this->assertEquals('HTTP Error', $headers[0]);
|
||||||
|
$this->assertRegexp(
|
||||||
|
'/Name or service not known/',
|
||||||
|
$content
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue