Fixes - Retrieve title fails in multiple cases

* `get_http_url()` renamed to `get_http_response()`.
  * Use the same HTTP context to retrieve response headers and content.
  * Follow HTTP 301 and 302 redirections to retrieve the title (default max 3 redirections).
  * Add `LinkUtils` to extract titles and charset.
  * Try to retrieve charset from HTTP headers first (new), then HTML content.
  * Use mb_string to re-encode title if necessary.
This commit is contained in:
ArthurHoaro 2016-01-04 10:45:54 +01:00
parent c0a50f3663
commit 1557cefbd7
8 changed files with 285 additions and 72 deletions

View file

@ -6,7 +6,7 @@
require_once 'application/HttpUtils.php';
/**
* Unitary tests for get_http_url()
* Unitary tests for get_http_response()
*/
class GetHttpUrlTest extends PHPUnit_Framework_TestCase
{
@ -15,12 +15,15 @@ class GetHttpUrlTest extends PHPUnit_Framework_TestCase
*/
public function testGetInvalidLocalUrl()
{
list($headers, $content) = get_http_url('/non/existent', 1);
$this->assertEquals('HTTP Error', $headers[0]);
$this->assertRegexp(
'/failed to open stream: No such file or directory/',
$content
);
// Local
list($headers, $content) = get_http_response('/non/existent', 1);
$this->assertEquals('Invalid HTTP Url', $headers[0]);
$this->assertFalse($content);
// Non HTTP
list($headers, $content) = get_http_response('ftp://save.tld/mysave', 1);
$this->assertEquals('Invalid HTTP Url', $headers[0]);
$this->assertFalse($content);
}
/**
@ -28,11 +31,8 @@ class GetHttpUrlTest extends PHPUnit_Framework_TestCase
*/
public function testGetInvalidRemoteUrl()
{
list($headers, $content) = get_http_url('http://non.existent', 1);
$this->assertEquals('HTTP Error', $headers[0]);
$this->assertRegexp(
'/Name or service not known/',
$content
);
list($headers, $content) = @get_http_response('http://non.existent', 1);
$this->assertFalse($headers);
$this->assertFalse($content);
}
}

85
tests/LinkUtilsTest.php Normal file
View file

@ -0,0 +1,85 @@
<?php
require_once 'application/LinkUtils.php';
/**
* Class LinkUtilsTest.
*/
class LinkUtilsTest extends PHPUnit_Framework_TestCase
{
/**
* Test html_extract_title() when the title is found.
*/
public function testHtmlExtractExistentTitle()
{
$title = 'Read me please.';
$html = '<html><meta>stuff</meta><title>'. $title .'</title></html>';
$this->assertEquals($title, html_extract_title($html));
}
/**
* Test html_extract_title() when the title is not found.
*/
public function testHtmlExtractNonExistentTitle()
{
$html = '<html><meta>stuff</meta></html>';
$this->assertFalse(html_extract_title($html));
}
/**
* Test get_charset() with all priorities.
*/
public function testGetCharset()
{
$headers = array('Content-Type' => 'text/html; charset=Headers');
$html = '<html><meta>stuff</meta><meta charset="Html"/></html>';
$default = 'default';
$this->assertEquals('headers', get_charset($headers, $html, $default));
$this->assertEquals('html', get_charset(array(), $html, $default));
$this->assertEquals($default, get_charset(array(), '', $default));
$this->assertEquals('utf-8', get_charset(array(), ''));
}
/**
* Test headers_extract_charset() when the charset is found.
*/
public function testHeadersExtractExistentCharset()
{
$charset = 'x-MacCroatian';
$headers = array('Content-Type' => 'text/html; charset='. $charset);
$this->assertEquals(strtolower($charset), headers_extract_charset($headers));
}
/**
* Test headers_extract_charset() when the charset is not found.
*/
public function testHeadersExtractNonExistentCharset()
{
$headers = array();
$this->assertFalse(headers_extract_charset($headers));
$headers = array('Content-Type' => 'text/html');
$this->assertFalse(headers_extract_charset($headers));
}
/**
* Test html_extract_charset() when the charset is found.
*/
public function testHtmlExtractExistentCharset()
{
$charset = 'x-MacCroatian';
$html = '<html><meta>stuff2</meta><meta charset="'. $charset .'"/></html>';
$this->assertEquals(strtolower($charset), html_extract_charset($html));
}
/**
* Test html_extract_charset() when the charset is not found.
*/
public function testHtmlExtractNonExistentCharset()
{
$html = '<html><meta>stuff</meta></html>';
$this->assertFalse(html_extract_charset($html));
$html = '<html><meta>stuff</meta><meta charset=""/></html>';
$this->assertFalse(html_extract_charset($html));
}
}

View file

@ -156,4 +156,22 @@ class UrlTest extends PHPUnit_Framework_TestCase
$this->assertEquals($strOn, add_trailing_slash($strOn));
$this->assertEquals($strOn, add_trailing_slash($strOff));
}
/**
* Test valid HTTP url.
*/
function testUrlIsHttp()
{
$url = new Url(self::$baseUrl);
$this->assertTrue($url->isHttp());
}
/**
* Test non HTTP url.
*/
function testUrlIsNotHttp()
{
$url = new Url('ftp://save.tld/mysave');
$this->assertFalse($url->isHttp());
}
}