Fix warning if the encoding retrieved from external headers is invalid

Also fixed the regex to support this failing header: charset="utf-8"\r\n"
This commit is contained in:
ArthurHoaro 2020-09-26 13:28:38 +02:00
parent d0ae1ba273
commit 1ea09a1b8b
3 changed files with 15 additions and 2 deletions

View file

@ -26,7 +26,7 @@ function html_extract_title($html)
*/
function header_extract_charset($header)
{
preg_match('/charset="?([^; ]+)/i', $header, $match);
preg_match('/charset=["\']?([^; "\']+)/i', $header, $match);
if (! empty($match[1])) {
return strtolower(trim($match[1]));
}

View file

@ -69,7 +69,7 @@ class ManageShaareController extends ShaarliAdminController
$retrieveDescription
)
);
if (! empty($title) && strtolower($charset) !== 'utf-8') {
if (! empty($title) && strtolower($charset) !== 'utf-8' && mb_check_encoding($charset)) {
$title = mb_convert_encoding($title, 'utf-8', $charset);
}
}

View file

@ -42,6 +42,19 @@ class LinkUtilsTest extends TestCase
$this->assertEquals(strtolower($charset), header_extract_charset($headers));
}
/**
* Test headers_extract_charset() when the charset is found with odd quotes.
*/
public function testHeadersExtractExistentCharsetWithQuotes()
{
$charset = 'x-MacCroatian';
$headers = 'text/html; charset="' . $charset . '"otherstuff="test"';
$this->assertEquals(strtolower($charset), header_extract_charset($headers));
$headers = 'text/html; charset=\'' . $charset . '\'otherstuff="test"';
$this->assertEquals(strtolower($charset), header_extract_charset($headers));
}
/**
* Test headers_extract_charset() when the charset is not found.
*/