Fixes #410 - Retrieve title fails in multiple cases
* `get_http_url()` renamed to `get_http_response()`. * Use the same HTTP context to retrieve response headers and content. * Follow HTTP 301 and 302 redirections to retrieve the title (default max 3 redirections). * Add `LinkUtils` to extract titles and charset. * Try to retrieve charset from HTTP headers first (new), then HTML content. * Use mb_string to re-encode title if necessary.
This commit is contained in:
parent
c0a50f3663
commit
1557cefbd7
8 changed files with 285 additions and 72 deletions
tests
|
@ -6,7 +6,7 @@
|
|||
require_once 'application/HttpUtils.php';
|
||||
|
||||
/**
|
||||
* Unitary tests for get_http_url()
|
||||
* Unitary tests for get_http_response()
|
||||
*/
|
||||
class GetHttpUrlTest extends PHPUnit_Framework_TestCase
|
||||
{
|
||||
|
@ -15,12 +15,15 @@ class GetHttpUrlTest extends PHPUnit_Framework_TestCase
|
|||
*/
|
||||
public function testGetInvalidLocalUrl()
|
||||
{
|
||||
list($headers, $content) = get_http_url('/non/existent', 1);
|
||||
$this->assertEquals('HTTP Error', $headers[0]);
|
||||
$this->assertRegexp(
|
||||
'/failed to open stream: No such file or directory/',
|
||||
$content
|
||||
);
|
||||
// Local
|
||||
list($headers, $content) = get_http_response('/non/existent', 1);
|
||||
$this->assertEquals('Invalid HTTP Url', $headers[0]);
|
||||
$this->assertFalse($content);
|
||||
|
||||
// Non HTTP
|
||||
list($headers, $content) = get_http_response('ftp://save.tld/mysave', 1);
|
||||
$this->assertEquals('Invalid HTTP Url', $headers[0]);
|
||||
$this->assertFalse($content);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -28,11 +31,8 @@ class GetHttpUrlTest extends PHPUnit_Framework_TestCase
|
|||
*/
|
||||
public function testGetInvalidRemoteUrl()
|
||||
{
|
||||
list($headers, $content) = get_http_url('http://non.existent', 1);
|
||||
$this->assertEquals('HTTP Error', $headers[0]);
|
||||
$this->assertRegexp(
|
||||
'/Name or service not known/',
|
||||
$content
|
||||
);
|
||||
list($headers, $content) = @get_http_response('http://non.existent', 1);
|
||||
$this->assertFalse($headers);
|
||||
$this->assertFalse($content);
|
||||
}
|
||||
}
|
||||
|
|
85
tests/LinkUtilsTest.php
Normal file
85
tests/LinkUtilsTest.php
Normal file
|
@ -0,0 +1,85 @@
|
|||
<?php
|
||||
|
||||
require_once 'application/LinkUtils.php';
|
||||
|
||||
/**
|
||||
* Class LinkUtilsTest.
|
||||
*/
|
||||
class LinkUtilsTest extends PHPUnit_Framework_TestCase
|
||||
{
|
||||
/**
|
||||
* Test html_extract_title() when the title is found.
|
||||
*/
|
||||
public function testHtmlExtractExistentTitle()
|
||||
{
|
||||
$title = 'Read me please.';
|
||||
$html = '<html><meta>stuff</meta><title>'. $title .'</title></html>';
|
||||
$this->assertEquals($title, html_extract_title($html));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test html_extract_title() when the title is not found.
|
||||
*/
|
||||
public function testHtmlExtractNonExistentTitle()
|
||||
{
|
||||
$html = '<html><meta>stuff</meta></html>';
|
||||
$this->assertFalse(html_extract_title($html));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test get_charset() with all priorities.
|
||||
*/
|
||||
public function testGetCharset()
|
||||
{
|
||||
$headers = array('Content-Type' => 'text/html; charset=Headers');
|
||||
$html = '<html><meta>stuff</meta><meta charset="Html"/></html>';
|
||||
$default = 'default';
|
||||
$this->assertEquals('headers', get_charset($headers, $html, $default));
|
||||
$this->assertEquals('html', get_charset(array(), $html, $default));
|
||||
$this->assertEquals($default, get_charset(array(), '', $default));
|
||||
$this->assertEquals('utf-8', get_charset(array(), ''));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test headers_extract_charset() when the charset is found.
|
||||
*/
|
||||
public function testHeadersExtractExistentCharset()
|
||||
{
|
||||
$charset = 'x-MacCroatian';
|
||||
$headers = array('Content-Type' => 'text/html; charset='. $charset);
|
||||
$this->assertEquals(strtolower($charset), headers_extract_charset($headers));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test headers_extract_charset() when the charset is not found.
|
||||
*/
|
||||
public function testHeadersExtractNonExistentCharset()
|
||||
{
|
||||
$headers = array();
|
||||
$this->assertFalse(headers_extract_charset($headers));
|
||||
|
||||
$headers = array('Content-Type' => 'text/html');
|
||||
$this->assertFalse(headers_extract_charset($headers));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test html_extract_charset() when the charset is found.
|
||||
*/
|
||||
public function testHtmlExtractExistentCharset()
|
||||
{
|
||||
$charset = 'x-MacCroatian';
|
||||
$html = '<html><meta>stuff2</meta><meta charset="'. $charset .'"/></html>';
|
||||
$this->assertEquals(strtolower($charset), html_extract_charset($html));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test html_extract_charset() when the charset is not found.
|
||||
*/
|
||||
public function testHtmlExtractNonExistentCharset()
|
||||
{
|
||||
$html = '<html><meta>stuff</meta></html>';
|
||||
$this->assertFalse(html_extract_charset($html));
|
||||
$html = '<html><meta>stuff</meta><meta charset=""/></html>';
|
||||
$this->assertFalse(html_extract_charset($html));
|
||||
}
|
||||
}
|
|
@ -156,4 +156,22 @@ class UrlTest extends PHPUnit_Framework_TestCase
|
|||
$this->assertEquals($strOn, add_trailing_slash($strOn));
|
||||
$this->assertEquals($strOn, add_trailing_slash($strOff));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test valid HTTP url.
|
||||
*/
|
||||
function testUrlIsHttp()
|
||||
{
|
||||
$url = new Url(self::$baseUrl);
|
||||
$this->assertTrue($url->isHttp());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test non HTTP url.
|
||||
*/
|
||||
function testUrlIsNotHttp()
|
||||
{
|
||||
$url = new Url('ftp://save.tld/mysave');
|
||||
$this->assertFalse($url->isHttp());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue