Merge pull request #1569 from ArthurHoaro/fix/bad-encoding

Fix warning if the encoding retrieved from external headers is invalid
This commit is contained in:
ArthurHoaro 2020-09-30 11:35:57 +02:00 committed by GitHub
commit c3fca560b6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 15 additions and 2 deletions

View file

@ -26,7 +26,7 @@ function html_extract_title($html)
*/
function header_extract_charset($header)
{
preg_match('/charset="?([^; ]+)/i', $header, $match);
preg_match('/charset=["\']?([^; "\']+)/i', $header, $match);
if (! empty($match[1])) {
return strtolower(trim($match[1]));
}

View file

@ -69,7 +69,7 @@ class ManageShaareController extends ShaarliAdminController
$retrieveDescription
)
);
if (! empty($title) && strtolower($charset) !== 'utf-8') {
if (! empty($title) && strtolower($charset) !== 'utf-8' && mb_check_encoding($charset)) {
$title = mb_convert_encoding($title, 'utf-8', $charset);
}
}

View file

@ -42,6 +42,19 @@ class LinkUtilsTest extends TestCase
$this->assertEquals(strtolower($charset), header_extract_charset($headers));
}
/**
* Test headers_extract_charset() when the charset is found with odd quotes.
*/
public function testHeadersExtractExistentCharsetWithQuotes()
{
$charset = 'x-MacCroatian';
$headers = 'text/html; charset="' . $charset . '"otherstuff="test"';
$this->assertEquals(strtolower($charset), header_extract_charset($headers));
$headers = 'text/html; charset=\'' . $charset . '\'otherstuff="test"';
$this->assertEquals(strtolower($charset), header_extract_charset($headers));
}
/**
* Test headers_extract_charset() when the charset is not found.
*/