From 451314eb48c7d922264adc6eada8a0273b12344c Mon Sep 17 00:00:00 2001 From: VirtualTam Date: Tue, 1 Sep 2015 21:45:06 +0200 Subject: [PATCH] HTTP: move utils to a proper file, add tests Relates to #333 Modifications: - move HTTP utils to 'application/HttpUtils.php' - simplify logic - replace 'http_parse_headers_shaarli' by built-in 'get_headers()' - remove superfluous '$status' parameter (provided by the HTTP headers) - apply coding conventions - add test coverage (unitary only) Signed-off-by: VirtualTam --- application/HttpUtils.php | 52 ++++++++++++++++++ index.php | 110 +++++++++++--------------------------- tests/HttpUtilsTest.php | 38 +++++++++++++ 3 files changed, 122 insertions(+), 78 deletions(-) create mode 100644 application/HttpUtils.php create mode 100644 tests/HttpUtilsTest.php diff --git a/application/HttpUtils.php b/application/HttpUtils.php new file mode 100644 index 0000000..175333a --- /dev/null +++ b/application/HttpUtils.php @@ -0,0 +1,52 @@ + array( + 'method' => 'GET', + 'timeout' => $timeout, + 'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0)' + .' Gecko/20100101 Firefox/23.0' + ) + ); + + $context = stream_context_create($options); + + try { + // TODO: catch Exception in calling code (thumbnailer) + $content = file_get_contents($url, false, $context, -1, $maxBytes); + } catch (Exception $exc) { + return array(array(0 => 'HTTP Error'), $exc->getMessage()); + } + + if (!$content) { + return array(array(0 => 'HTTP Error'), ''); + } + + return array(get_headers($url, 1), $content); +} diff --git a/index.php b/index.php index 8863cc2..e39cff3 100755 --- a/index.php +++ b/index.php @@ -59,6 +59,7 @@ if (is_file($GLOBALS['config']['CONFIG_FILE'])) { // Shaarli library require_once 'application/Cache.php'; require_once 'application/CachedPage.php'; +require_once 'application/HttpUtils.php'; require_once 'application/LinkDB.php'; require_once 'application/TimeZone.php'; require_once 'application/Url.php'; @@ -209,9 +210,11 @@ function checkUpdate() // Get latest version number at most once a day. if (!is_file($GLOBALS['config']['UPDATECHECK_FILENAME']) || (filemtime($GLOBALS['config']['UPDATECHECK_FILENAME'])','',str_replace('', '', str_replace('array('method'=>'GET','timeout' => $timeout, 'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0) Gecko/20100101 Firefox/23.0')); // Force network timeout - $context = stream_context_create($options); - $data=file_get_contents($url,false,$context,-1, 4000000); // We download at most 4 Mb from source. - if (!$data) { return array('HTTP Error',array(),''); } - $httpStatus=$http_response_header[0]; // e.g. "HTTP/1.1 200 OK" - $responseHeaders=http_parse_headers_shaarli($http_response_header); - return array($httpStatus,$responseHeaders,$data); - } - catch (Exception $e) // getHTTP *can* fail silently (we don't care if the title cannot be fetched) - { - return array($e->getMessage(),'',''); - } -} - // Extract title from an HTML document. // (Returns an empty string if not found.) function html_extract_title($html) @@ -1516,9 +1472,10 @@ function renderPage() $private = (!empty($_GET['private']) && $_GET['private'] === "1" ? 1 : 0); // If this is an HTTP(S) link, we try go get the page to extract the title (otherwise we will to straight to the edit form.) if (empty($title) && strpos($url->getScheme(), 'http') !== false) { - list($status,$headers,$data) = getHTTP($url,4); // Short timeout to keep the application responsive. + // Short timeout to keep the application responsive + list($headers, $data) = get_http_url($url, 4); // FIXME: Decode charset according to specified in either 1) HTTP response headers or 2) in html - if (strpos($status,'200 OK')!==false) { + if (strpos($headers[0], '200 OK') !== false) { // Look for charset in html header. preg_match('##Usi', $data, $meta); @@ -2186,8 +2143,9 @@ function genThumbnail() } else // This is a flickr page (html) { - list($httpstatus,$headers,$data) = getHTTP($url,20); // Get the flickr html page. - if (strpos($httpstatus,'200 OK')!==false) + // Get the flickr html page. + list($headers, $data) = get_http_url($url, 20); + if (strpos($headers[0], '200 OK') !== false) { // flickr now nicely provides the URL of the thumbnail in each flickr page. preg_match('! tag on that page // http://www.ted.com/talks/mikko_hypponen_fighting_viruses_defending_the_net.html // - list($httpstatus,$headers,$data) = getHTTP($url,5); - if (strpos($httpstatus,'200 OK')!==false) - { + list($headers, $data) = get_http_url($url, 5); + if (strpos($headers[0], '200 OK') !== false) { // Extract the link to the thumbnail preg_match('!link rel="image_src" href="(http://images.ted.com/images/ted/.+_\d+x\d+\.jpg)"!',$data,$matches); if (!empty($matches[1])) { // Let's download the image. $imageurl=$matches[1]; - list($httpstatus,$headers,$data) = getHTTP($imageurl,20); // No control on image size, so wait long enough. - if (strpos($httpstatus,'200 OK')!==false) - { + // No control on image size, so wait long enough + list($headers, $data) = get_http_url($imageurl, 20); + if (strpos($headers[0], '200 OK') !== false) { $filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname; file_put_contents($filepath,$data); // Save image to cache. if (resizeImage($filepath)) @@ -2273,17 +2228,16 @@ function genThumbnail() // There is no thumbnail available for xkcd comics, so download the whole image and resize it. // http://xkcd.com/327/ // <BLABLA> - list($httpstatus,$headers,$data) = getHTTP($url,5); - if (strpos($httpstatus,'200 OK')!==false) - { + list($headers, $data) = get_http_url($url, 5); + if (strpos($headers[0], '200 OK') !== false) { // Extract the link to the thumbnail preg_match('!