Fixes #410 - Retrieve title fails in multiple cases
* `get_http_url()` renamed to `get_http_response()`. * Use the same HTTP context to retrieve response headers and content. * Follow HTTP 301 and 302 redirections to retrieve the title (default max 3 redirections). * Add `LinkUtils` to extract titles and charset. * Try to retrieve charset from HTTP headers first (new), then HTML content. * Use mb_string to re-encode title if necessary.
This commit is contained in:
parent
c0a50f3663
commit
1557cefbd7
8 changed files with 285 additions and 72 deletions
|
@ -19,7 +19,7 @@ class ApplicationUtils
|
||||||
*/
|
*/
|
||||||
public static function getLatestGitVersionCode($url, $timeout=2)
|
public static function getLatestGitVersionCode($url, $timeout=2)
|
||||||
{
|
{
|
||||||
list($headers, $data) = get_http_url($url, $timeout);
|
list($headers, $data) = get_http_response($url, $timeout);
|
||||||
|
|
||||||
if (strpos($headers[0], '200 OK') === false) {
|
if (strpos($headers[0], '200 OK') === false) {
|
||||||
error_log('Failed to retrieve ' . $url);
|
error_log('Failed to retrieve ' . $url);
|
||||||
|
|
49
application/HttpUtils.php
Normal file → Executable file
49
application/HttpUtils.php
Normal file → Executable file
|
@ -13,7 +13,7 @@
|
||||||
* [1] = URL content (downloaded data)
|
* [1] = URL content (downloaded data)
|
||||||
*
|
*
|
||||||
* Example:
|
* Example:
|
||||||
* list($headers, $data) = get_http_url('http://sebauvage.net/');
|
* list($headers, $data) = get_http_response('http://sebauvage.net/');
|
||||||
* if (strpos($headers[0], '200 OK') !== false) {
|
* if (strpos($headers[0], '200 OK') !== false) {
|
||||||
* echo 'Data type: '.htmlspecialchars($headers['Content-Type']);
|
* echo 'Data type: '.htmlspecialchars($headers['Content-Type']);
|
||||||
* } else {
|
* } else {
|
||||||
|
@ -24,31 +24,66 @@
|
||||||
* @see http://php.net/manual/en/function.stream-context-create.php
|
* @see http://php.net/manual/en/function.stream-context-create.php
|
||||||
* @see http://php.net/manual/en/function.get-headers.php
|
* @see http://php.net/manual/en/function.get-headers.php
|
||||||
*/
|
*/
|
||||||
function get_http_url($url, $timeout = 30, $maxBytes = 4194304)
|
function get_http_response($url, $timeout = 30, $maxBytes = 4194304)
|
||||||
{
|
{
|
||||||
|
$urlObj = new Url($url);
|
||||||
|
if (! filter_var($url, FILTER_VALIDATE_URL) || ! $urlObj->isHttp()) {
|
||||||
|
return array(array(0 => 'Invalid HTTP Url'), false);
|
||||||
|
}
|
||||||
|
|
||||||
$options = array(
|
$options = array(
|
||||||
'http' => array(
|
'http' => array(
|
||||||
'method' => 'GET',
|
'method' => 'GET',
|
||||||
'timeout' => $timeout,
|
'timeout' => $timeout,
|
||||||
'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0)'
|
'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0)'
|
||||||
.' Gecko/20100101 Firefox/23.0'
|
.' Gecko/20100101 Firefox/23.0',
|
||||||
|
'request_fulluri' => true,
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
$context = stream_context_create($options);
|
$context = stream_context_create($options);
|
||||||
|
stream_context_set_default($options);
|
||||||
|
|
||||||
|
list($headers, $finalUrl) = get_redirected_headers($urlObj->cleanup());
|
||||||
|
if (! $headers || strpos($headers[0], '200 OK') === false) {
|
||||||
|
return array($headers, false);
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// TODO: catch Exception in calling code (thumbnailer)
|
// TODO: catch Exception in calling code (thumbnailer)
|
||||||
$content = file_get_contents($url, false, $context, -1, $maxBytes);
|
$content = file_get_contents($finalUrl, false, $context, -1, $maxBytes);
|
||||||
} catch (Exception $exc) {
|
} catch (Exception $exc) {
|
||||||
return array(array(0 => 'HTTP Error'), $exc->getMessage());
|
return array(array(0 => 'HTTP Error'), $exc->getMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!$content) {
|
return array($headers, $content);
|
||||||
return array(array(0 => 'HTTP Error'), '');
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieve HTTP headers, following n redirections (temporary and permanent).
|
||||||
|
*
|
||||||
|
* @param string $url initial URL to reach.
|
||||||
|
* @param int $redirectionLimit max redirection follow..
|
||||||
|
*
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
function get_redirected_headers($url, $redirectionLimit = 3)
|
||||||
|
{
|
||||||
|
$headers = get_headers($url, 1);
|
||||||
|
|
||||||
|
// Headers found, redirection found, and limit not reached.
|
||||||
|
if ($redirectionLimit-- > 0
|
||||||
|
&& !empty($headers)
|
||||||
|
&& (strpos($headers[0], '301') !== false || strpos($headers[0], '302') !== false)
|
||||||
|
&& !empty($headers['Location'])) {
|
||||||
|
|
||||||
|
$redirection = is_array($headers['Location']) ? end($headers['Location']) : $headers['Location'];
|
||||||
|
if ($redirection != $url) {
|
||||||
|
return get_redirected_headers($redirection, $redirectionLimit);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return array(get_headers($url, 1), $content);
|
return array($headers, $url);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
79
application/LinkUtils.php
Executable file
79
application/LinkUtils.php
Executable file
|
@ -0,0 +1,79 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract title from an HTML document.
|
||||||
|
*
|
||||||
|
* @param string $html HTML content where to look for a title.
|
||||||
|
*
|
||||||
|
* @return bool|string Extracted title if found, false otherwise.
|
||||||
|
*/
|
||||||
|
function html_extract_title($html)
|
||||||
|
{
|
||||||
|
if (preg_match('!<title>(.*)</title>!is', $html, $matches)) {
|
||||||
|
return trim(str_replace("\n", ' ', $matches[1]));
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determine charset from downloaded page.
|
||||||
|
* Priority:
|
||||||
|
* 1. HTTP headers (Content type).
|
||||||
|
* 2. HTML content page (tag <meta charset>).
|
||||||
|
* 3. Use a default charset (default: UTF-8).
|
||||||
|
*
|
||||||
|
* @param array $headers HTTP headers array.
|
||||||
|
* @param string $htmlContent HTML content where to look for charset.
|
||||||
|
* @param string $defaultCharset Default charset to apply if other methods failed.
|
||||||
|
*
|
||||||
|
* @return string Determined charset.
|
||||||
|
*/
|
||||||
|
function get_charset($headers, $htmlContent, $defaultCharset = 'utf-8')
|
||||||
|
{
|
||||||
|
if ($charset = headers_extract_charset($headers)) {
|
||||||
|
return $charset;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($charset = html_extract_charset($htmlContent)) {
|
||||||
|
return $charset;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $defaultCharset;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract charset from HTTP headers if it's defined.
|
||||||
|
*
|
||||||
|
* @param array $headers HTTP headers array.
|
||||||
|
*
|
||||||
|
* @return bool|string Charset string if found (lowercase), false otherwise.
|
||||||
|
*/
|
||||||
|
function headers_extract_charset($headers)
|
||||||
|
{
|
||||||
|
if (! empty($headers['Content-Type']) && strpos($headers['Content-Type'], 'charset=') !== false) {
|
||||||
|
preg_match('/charset="?([^; ]+)/i', $headers['Content-Type'], $match);
|
||||||
|
if (! empty($match[1])) {
|
||||||
|
return strtolower(trim($match[1]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract charset HTML content (tag <meta charset>).
|
||||||
|
*
|
||||||
|
* @param string $html HTML content where to look for charset.
|
||||||
|
*
|
||||||
|
* @return bool|string Charset string if found, false otherwise.
|
||||||
|
*/
|
||||||
|
function html_extract_charset($html)
|
||||||
|
{
|
||||||
|
// Get encoding specified in HTML header.
|
||||||
|
preg_match('#<meta .*charset="?([^">/]+)"? */?>#Usi', $html, $enc);
|
||||||
|
if (!empty($enc[1])) {
|
||||||
|
return strtolower($enc[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
11
application/Url.php
Normal file → Executable file
11
application/Url.php
Normal file → Executable file
|
@ -118,7 +118,7 @@ class Url
|
||||||
*/
|
*/
|
||||||
public function __construct($url)
|
public function __construct($url)
|
||||||
{
|
{
|
||||||
$this->parts = parse_url($url);
|
$this->parts = parse_url(trim($url));
|
||||||
|
|
||||||
if (!empty($url) && empty($this->parts['scheme'])) {
|
if (!empty($url) && empty($this->parts['scheme'])) {
|
||||||
$this->parts['scheme'] = 'http';
|
$this->parts['scheme'] = 'http';
|
||||||
|
@ -201,4 +201,13 @@ public function getScheme() {
|
||||||
}
|
}
|
||||||
return $this->parts['scheme'];
|
return $this->parts['scheme'];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test if the Url is an HTTP one.
|
||||||
|
*
|
||||||
|
* @return true is HTTP, false otherwise.
|
||||||
|
*/
|
||||||
|
public function isHttp() {
|
||||||
|
return strpos(strtolower($this->parts['scheme']), 'http') !== false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
85
index.php
85
index.php
|
@ -152,6 +152,7 @@
|
||||||
require_once 'application/HttpUtils.php';
|
require_once 'application/HttpUtils.php';
|
||||||
require_once 'application/LinkDB.php';
|
require_once 'application/LinkDB.php';
|
||||||
require_once 'application/LinkFilter.php';
|
require_once 'application/LinkFilter.php';
|
||||||
|
require_once 'application/LinkUtils.php';
|
||||||
require_once 'application/TimeZone.php';
|
require_once 'application/TimeZone.php';
|
||||||
require_once 'application/Url.php';
|
require_once 'application/Url.php';
|
||||||
require_once 'application/Utils.php';
|
require_once 'application/Utils.php';
|
||||||
|
@ -578,13 +579,6 @@ function linkdate2iso8601($linkdate)
|
||||||
return date('c',linkdate2timestamp($linkdate)); // 'c' is for ISO 8601 date format.
|
return date('c',linkdate2timestamp($linkdate)); // 'c' is for ISO 8601 date format.
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract title from an HTML document.
|
|
||||||
// (Returns an empty string if not found.)
|
|
||||||
function html_extract_title($html)
|
|
||||||
{
|
|
||||||
return preg_match('!<title>(.*?)</title>!is', $html, $matches) ? trim(str_replace("\n",' ', $matches[1])) : '' ;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------------------------
|
// ------------------------------------------------------------------------------------------
|
||||||
// Token management for XSRF protection
|
// Token management for XSRF protection
|
||||||
// Token should be used in any form which acts on data (create,update,delete,import...).
|
// Token should be used in any form which acts on data (create,update,delete,import...).
|
||||||
|
@ -1642,7 +1636,7 @@ function renderPage()
|
||||||
|
|
||||||
// -------- User want to post a new link: Display link edit form.
|
// -------- User want to post a new link: Display link edit form.
|
||||||
if (isset($_GET['post'])) {
|
if (isset($_GET['post'])) {
|
||||||
$url = cleanup_url($_GET['post']);
|
$url = cleanup_url(escape($_GET['post']));
|
||||||
|
|
||||||
$link_is_new = false;
|
$link_is_new = false;
|
||||||
// Check if URL is not already in database (in this case, we will edit the existing link)
|
// Check if URL is not already in database (in this case, we will edit the existing link)
|
||||||
|
@ -1660,35 +1654,24 @@ function renderPage()
|
||||||
// If this is an HTTP(S) link, we try go get the page to extract the title (otherwise we will to straight to the edit form.)
|
// If this is an HTTP(S) link, we try go get the page to extract the title (otherwise we will to straight to the edit form.)
|
||||||
if (empty($title) && strpos(get_url_scheme($url), 'http') !== false) {
|
if (empty($title) && strpos(get_url_scheme($url), 'http') !== false) {
|
||||||
// Short timeout to keep the application responsive
|
// Short timeout to keep the application responsive
|
||||||
list($headers, $data) = get_http_url($url, 4);
|
list($headers, $content) = get_http_response($url, 4);
|
||||||
// FIXME: Decode charset according to specified in either 1) HTTP response headers or 2) <head> in html
|
|
||||||
if (strpos($headers[0], '200 OK') !== false) {
|
if (strpos($headers[0], '200 OK') !== false) {
|
||||||
// Look for charset in html header.
|
// Retrieve charset.
|
||||||
preg_match('#<meta .*charset=.*>#Usi', $data, $meta);
|
$charset = get_charset($headers, $content);
|
||||||
|
// Extract title.
|
||||||
// If found, extract encoding.
|
$title = html_extract_title($content);
|
||||||
if (!empty($meta[0])) {
|
|
||||||
// Get encoding specified in header.
|
|
||||||
preg_match('#charset="?(.*)"#si', $meta[0], $enc);
|
|
||||||
// If charset not found, use utf-8.
|
|
||||||
$html_charset = (!empty($enc[1])) ? strtolower($enc[1]) : 'utf-8';
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
$html_charset = 'utf-8';
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract title
|
|
||||||
$title = html_extract_title($data);
|
|
||||||
if (!empty($title)) {
|
|
||||||
// Re-encode title in utf-8 if necessary.
|
// Re-encode title in utf-8 if necessary.
|
||||||
$title = ($html_charset == 'iso-8859-1') ? utf8_encode($title) : $title;
|
if (! empty($title) && $charset != 'utf-8') {
|
||||||
|
$title = mb_convert_encoding($title, $charset, 'utf-8');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($url == '') {
|
if ($url == '') {
|
||||||
$url = '?' . smallHash($linkdate);
|
$url = '?' . smallHash($linkdate);
|
||||||
$title = 'Note: ';
|
$title = 'Note: ';
|
||||||
}
|
}
|
||||||
|
|
||||||
$link = array(
|
$link = array(
|
||||||
'linkdate' => $linkdate,
|
'linkdate' => $linkdate,
|
||||||
'title' => $title,
|
'title' => $title,
|
||||||
|
@ -2314,11 +2297,11 @@ function genThumbnail()
|
||||||
else // This is a flickr page (html)
|
else // This is a flickr page (html)
|
||||||
{
|
{
|
||||||
// Get the flickr html page.
|
// Get the flickr html page.
|
||||||
list($headers, $data) = get_http_url($url, 20);
|
list($headers, $content) = get_http_response($url, 20);
|
||||||
if (strpos($headers[0], '200 OK') !== false)
|
if (strpos($headers[0], '200 OK') !== false)
|
||||||
{
|
{
|
||||||
// flickr now nicely provides the URL of the thumbnail in each flickr page.
|
// flickr now nicely provides the URL of the thumbnail in each flickr page.
|
||||||
preg_match('!<link rel=\"image_src\" href=\"(.+?)\"!',$data,$matches);
|
preg_match('!<link rel=\"image_src\" href=\"(.+?)\"!', $content, $matches);
|
||||||
if (!empty($matches[1])) $imageurl=$matches[1];
|
if (!empty($matches[1])) $imageurl=$matches[1];
|
||||||
|
|
||||||
// In albums (and some other pages), the link rel="image_src" is not provided,
|
// In albums (and some other pages), the link rel="image_src" is not provided,
|
||||||
|
@ -2326,7 +2309,7 @@ function genThumbnail()
|
||||||
// <meta property="og:image" content="http://farm4.staticflickr.com/3398/3239339068_25d13535ff_z.jpg" />
|
// <meta property="og:image" content="http://farm4.staticflickr.com/3398/3239339068_25d13535ff_z.jpg" />
|
||||||
if ($imageurl=='')
|
if ($imageurl=='')
|
||||||
{
|
{
|
||||||
preg_match('!<meta property=\"og:image\" content=\"(.+?)\"!',$data,$matches);
|
preg_match('!<meta property=\"og:image\" content=\"(.+?)\"!', $content, $matches);
|
||||||
if (!empty($matches[1])) $imageurl=$matches[1];
|
if (!empty($matches[1])) $imageurl=$matches[1];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2335,11 +2318,12 @@ function genThumbnail()
|
||||||
if ($imageurl!='')
|
if ($imageurl!='')
|
||||||
{ // Let's download the image.
|
{ // Let's download the image.
|
||||||
// Image is 240x120, so 10 seconds to download should be enough.
|
// Image is 240x120, so 10 seconds to download should be enough.
|
||||||
list($headers, $data) = get_http_url($imageurl, 10);
|
list($headers, $content) = get_http_response($imageurl, 10);
|
||||||
if (strpos($headers[0], '200 OK') !== false) {
|
if (strpos($headers[0], '200 OK') !== false) {
|
||||||
file_put_contents($GLOBALS['config']['CACHEDIR'].'/'.$thumbname,$data); // Save image to cache.
|
// Save image to cache.
|
||||||
|
file_put_contents($GLOBALS['config']['CACHEDIR'].'/' . $thumbname, $content);
|
||||||
header('Content-Type: image/jpeg');
|
header('Content-Type: image/jpeg');
|
||||||
echo $data;
|
echo $content;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2350,16 +2334,17 @@ function genThumbnail()
|
||||||
// This is more complex: we have to perform a HTTP request, then parse the result.
|
// This is more complex: we have to perform a HTTP request, then parse the result.
|
||||||
// Maybe we should deport this to JavaScript ? Example: http://stackoverflow.com/questions/1361149/get-img-thumbnails-from-vimeo/4285098#4285098
|
// Maybe we should deport this to JavaScript ? Example: http://stackoverflow.com/questions/1361149/get-img-thumbnails-from-vimeo/4285098#4285098
|
||||||
$vid = substr(parse_url($url,PHP_URL_PATH),1);
|
$vid = substr(parse_url($url,PHP_URL_PATH),1);
|
||||||
list($headers, $data) = get_http_url('https://vimeo.com/api/v2/video/'.escape($vid).'.php', 5);
|
list($headers, $content) = get_http_response('https://vimeo.com/api/v2/video/'.escape($vid).'.php', 5);
|
||||||
if (strpos($headers[0], '200 OK') !== false) {
|
if (strpos($headers[0], '200 OK') !== false) {
|
||||||
$t = unserialize($data);
|
$t = unserialize($content);
|
||||||
$imageurl = $t[0]['thumbnail_medium'];
|
$imageurl = $t[0]['thumbnail_medium'];
|
||||||
// Then we download the image and serve it to our client.
|
// Then we download the image and serve it to our client.
|
||||||
list($headers, $data) = get_http_url($imageurl, 10);
|
list($headers, $content) = get_http_response($imageurl, 10);
|
||||||
if (strpos($headers[0], '200 OK') !== false) {
|
if (strpos($headers[0], '200 OK') !== false) {
|
||||||
file_put_contents($GLOBALS['config']['CACHEDIR'].'/'.$thumbname,$data); // Save image to cache.
|
// Save image to cache.
|
||||||
|
file_put_contents($GLOBALS['config']['CACHEDIR'] . '/' . $thumbname, $content);
|
||||||
header('Content-Type: image/jpeg');
|
header('Content-Type: image/jpeg');
|
||||||
echo $data;
|
echo $content;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2370,18 +2355,18 @@ function genThumbnail()
|
||||||
// The thumbnail for TED talks is located in the <link rel="image_src" [...]> tag on that page
|
// The thumbnail for TED talks is located in the <link rel="image_src" [...]> tag on that page
|
||||||
// http://www.ted.com/talks/mikko_hypponen_fighting_viruses_defending_the_net.html
|
// http://www.ted.com/talks/mikko_hypponen_fighting_viruses_defending_the_net.html
|
||||||
// <link rel="image_src" href="http://images.ted.com/images/ted/28bced335898ba54d4441809c5b1112ffaf36781_389x292.jpg" />
|
// <link rel="image_src" href="http://images.ted.com/images/ted/28bced335898ba54d4441809c5b1112ffaf36781_389x292.jpg" />
|
||||||
list($headers, $data) = get_http_url($url, 5);
|
list($headers, $content) = get_http_response($url, 5);
|
||||||
if (strpos($headers[0], '200 OK') !== false) {
|
if (strpos($headers[0], '200 OK') !== false) {
|
||||||
// Extract the link to the thumbnail
|
// Extract the link to the thumbnail
|
||||||
preg_match('!link rel="image_src" href="(http://images.ted.com/images/ted/.+_\d+x\d+\.jpg)"!',$data,$matches);
|
preg_match('!link rel="image_src" href="(http://images.ted.com/images/ted/.+_\d+x\d+\.jpg)"!', $content, $matches);
|
||||||
if (!empty($matches[1]))
|
if (!empty($matches[1]))
|
||||||
{ // Let's download the image.
|
{ // Let's download the image.
|
||||||
$imageurl=$matches[1];
|
$imageurl=$matches[1];
|
||||||
// No control on image size, so wait long enough
|
// No control on image size, so wait long enough
|
||||||
list($headers, $data) = get_http_url($imageurl, 20);
|
list($headers, $content) = get_http_response($imageurl, 20);
|
||||||
if (strpos($headers[0], '200 OK') !== false) {
|
if (strpos($headers[0], '200 OK') !== false) {
|
||||||
$filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname;
|
$filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname;
|
||||||
file_put_contents($filepath,$data); // Save image to cache.
|
file_put_contents($filepath, $content); // Save image to cache.
|
||||||
if (resizeImage($filepath))
|
if (resizeImage($filepath))
|
||||||
{
|
{
|
||||||
header('Content-Type: image/jpeg');
|
header('Content-Type: image/jpeg');
|
||||||
|
@ -2398,18 +2383,19 @@ function genThumbnail()
|
||||||
// There is no thumbnail available for xkcd comics, so download the whole image and resize it.
|
// There is no thumbnail available for xkcd comics, so download the whole image and resize it.
|
||||||
// http://xkcd.com/327/
|
// http://xkcd.com/327/
|
||||||
// <img src="http://imgs.xkcd.com/comics/exploits_of_a_mom.png" title="<BLABLA>" alt="<BLABLA>" />
|
// <img src="http://imgs.xkcd.com/comics/exploits_of_a_mom.png" title="<BLABLA>" alt="<BLABLA>" />
|
||||||
list($headers, $data) = get_http_url($url, 5);
|
list($headers, $content) = get_http_response($url, 5);
|
||||||
if (strpos($headers[0], '200 OK') !== false) {
|
if (strpos($headers[0], '200 OK') !== false) {
|
||||||
// Extract the link to the thumbnail
|
// Extract the link to the thumbnail
|
||||||
preg_match('!<img src="(http://imgs.xkcd.com/comics/.*)" title="[^s]!',$data,$matches);
|
preg_match('!<img src="(http://imgs.xkcd.com/comics/.*)" title="[^s]!', $content, $matches);
|
||||||
if (!empty($matches[1]))
|
if (!empty($matches[1]))
|
||||||
{ // Let's download the image.
|
{ // Let's download the image.
|
||||||
$imageurl=$matches[1];
|
$imageurl=$matches[1];
|
||||||
// No control on image size, so wait long enough
|
// No control on image size, so wait long enough
|
||||||
list($headers, $data) = get_http_url($imageurl, 20);
|
list($headers, $content) = get_http_response($imageurl, 20);
|
||||||
if (strpos($headers[0], '200 OK') !== false) {
|
if (strpos($headers[0], '200 OK') !== false) {
|
||||||
$filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname;
|
$filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname;
|
||||||
file_put_contents($filepath,$data); // Save image to cache.
|
// Save image to cache.
|
||||||
|
file_put_contents($filepath, $content);
|
||||||
if (resizeImage($filepath))
|
if (resizeImage($filepath))
|
||||||
{
|
{
|
||||||
header('Content-Type: image/jpeg');
|
header('Content-Type: image/jpeg');
|
||||||
|
@ -2425,10 +2411,11 @@ function genThumbnail()
|
||||||
{
|
{
|
||||||
// For all other domains, we try to download the image and make a thumbnail.
|
// For all other domains, we try to download the image and make a thumbnail.
|
||||||
// We allow 30 seconds max to download (and downloads are limited to 4 Mb)
|
// We allow 30 seconds max to download (and downloads are limited to 4 Mb)
|
||||||
list($headers, $data) = get_http_url($url, 30);
|
list($headers, $content) = get_http_response($url, 30);
|
||||||
if (strpos($headers[0], '200 OK') !== false) {
|
if (strpos($headers[0], '200 OK') !== false) {
|
||||||
$filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname;
|
$filepath=$GLOBALS['config']['CACHEDIR'].'/'.$thumbname;
|
||||||
file_put_contents($filepath,$data); // Save image to cache.
|
// Save image to cache.
|
||||||
|
file_put_contents($filepath, $content);
|
||||||
if (resizeImage($filepath))
|
if (resizeImage($filepath))
|
||||||
{
|
{
|
||||||
header('Content-Type: image/jpeg');
|
header('Content-Type: image/jpeg');
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
require_once 'application/HttpUtils.php';
|
require_once 'application/HttpUtils.php';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unitary tests for get_http_url()
|
* Unitary tests for get_http_response()
|
||||||
*/
|
*/
|
||||||
class GetHttpUrlTest extends PHPUnit_Framework_TestCase
|
class GetHttpUrlTest extends PHPUnit_Framework_TestCase
|
||||||
{
|
{
|
||||||
|
@ -15,12 +15,15 @@ class GetHttpUrlTest extends PHPUnit_Framework_TestCase
|
||||||
*/
|
*/
|
||||||
public function testGetInvalidLocalUrl()
|
public function testGetInvalidLocalUrl()
|
||||||
{
|
{
|
||||||
list($headers, $content) = get_http_url('/non/existent', 1);
|
// Local
|
||||||
$this->assertEquals('HTTP Error', $headers[0]);
|
list($headers, $content) = get_http_response('/non/existent', 1);
|
||||||
$this->assertRegexp(
|
$this->assertEquals('Invalid HTTP Url', $headers[0]);
|
||||||
'/failed to open stream: No such file or directory/',
|
$this->assertFalse($content);
|
||||||
$content
|
|
||||||
);
|
// Non HTTP
|
||||||
|
list($headers, $content) = get_http_response('ftp://save.tld/mysave', 1);
|
||||||
|
$this->assertEquals('Invalid HTTP Url', $headers[0]);
|
||||||
|
$this->assertFalse($content);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -28,11 +31,8 @@ public function testGetInvalidLocalUrl()
|
||||||
*/
|
*/
|
||||||
public function testGetInvalidRemoteUrl()
|
public function testGetInvalidRemoteUrl()
|
||||||
{
|
{
|
||||||
list($headers, $content) = get_http_url('http://non.existent', 1);
|
list($headers, $content) = @get_http_response('http://non.existent', 1);
|
||||||
$this->assertEquals('HTTP Error', $headers[0]);
|
$this->assertFalse($headers);
|
||||||
$this->assertRegexp(
|
$this->assertFalse($content);
|
||||||
'/Name or service not known/',
|
|
||||||
$content
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
85
tests/LinkUtilsTest.php
Normal file
85
tests/LinkUtilsTest.php
Normal file
|
@ -0,0 +1,85 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'application/LinkUtils.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class LinkUtilsTest.
|
||||||
|
*/
|
||||||
|
class LinkUtilsTest extends PHPUnit_Framework_TestCase
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Test html_extract_title() when the title is found.
|
||||||
|
*/
|
||||||
|
public function testHtmlExtractExistentTitle()
|
||||||
|
{
|
||||||
|
$title = 'Read me please.';
|
||||||
|
$html = '<html><meta>stuff</meta><title>'. $title .'</title></html>';
|
||||||
|
$this->assertEquals($title, html_extract_title($html));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test html_extract_title() when the title is not found.
|
||||||
|
*/
|
||||||
|
public function testHtmlExtractNonExistentTitle()
|
||||||
|
{
|
||||||
|
$html = '<html><meta>stuff</meta></html>';
|
||||||
|
$this->assertFalse(html_extract_title($html));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test get_charset() with all priorities.
|
||||||
|
*/
|
||||||
|
public function testGetCharset()
|
||||||
|
{
|
||||||
|
$headers = array('Content-Type' => 'text/html; charset=Headers');
|
||||||
|
$html = '<html><meta>stuff</meta><meta charset="Html"/></html>';
|
||||||
|
$default = 'default';
|
||||||
|
$this->assertEquals('headers', get_charset($headers, $html, $default));
|
||||||
|
$this->assertEquals('html', get_charset(array(), $html, $default));
|
||||||
|
$this->assertEquals($default, get_charset(array(), '', $default));
|
||||||
|
$this->assertEquals('utf-8', get_charset(array(), ''));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test headers_extract_charset() when the charset is found.
|
||||||
|
*/
|
||||||
|
public function testHeadersExtractExistentCharset()
|
||||||
|
{
|
||||||
|
$charset = 'x-MacCroatian';
|
||||||
|
$headers = array('Content-Type' => 'text/html; charset='. $charset);
|
||||||
|
$this->assertEquals(strtolower($charset), headers_extract_charset($headers));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test headers_extract_charset() when the charset is not found.
|
||||||
|
*/
|
||||||
|
public function testHeadersExtractNonExistentCharset()
|
||||||
|
{
|
||||||
|
$headers = array();
|
||||||
|
$this->assertFalse(headers_extract_charset($headers));
|
||||||
|
|
||||||
|
$headers = array('Content-Type' => 'text/html');
|
||||||
|
$this->assertFalse(headers_extract_charset($headers));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test html_extract_charset() when the charset is found.
|
||||||
|
*/
|
||||||
|
public function testHtmlExtractExistentCharset()
|
||||||
|
{
|
||||||
|
$charset = 'x-MacCroatian';
|
||||||
|
$html = '<html><meta>stuff2</meta><meta charset="'. $charset .'"/></html>';
|
||||||
|
$this->assertEquals(strtolower($charset), html_extract_charset($html));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test html_extract_charset() when the charset is not found.
|
||||||
|
*/
|
||||||
|
public function testHtmlExtractNonExistentCharset()
|
||||||
|
{
|
||||||
|
$html = '<html><meta>stuff</meta></html>';
|
||||||
|
$this->assertFalse(html_extract_charset($html));
|
||||||
|
$html = '<html><meta>stuff</meta><meta charset=""/></html>';
|
||||||
|
$this->assertFalse(html_extract_charset($html));
|
||||||
|
}
|
||||||
|
}
|
|
@ -156,4 +156,22 @@ function testAddTrailingSlash()
|
||||||
$this->assertEquals($strOn, add_trailing_slash($strOn));
|
$this->assertEquals($strOn, add_trailing_slash($strOn));
|
||||||
$this->assertEquals($strOn, add_trailing_slash($strOff));
|
$this->assertEquals($strOn, add_trailing_slash($strOff));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test valid HTTP url.
|
||||||
|
*/
|
||||||
|
function testUrlIsHttp()
|
||||||
|
{
|
||||||
|
$url = new Url(self::$baseUrl);
|
||||||
|
$this->assertTrue($url->isHttp());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test non HTTP url.
|
||||||
|
*/
|
||||||
|
function testUrlIsNotHttp()
|
||||||
|
{
|
||||||
|
$url = new Url('ftp://save.tld/mysave');
|
||||||
|
$this->assertFalse($url->isHttp());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue