Fix warning if the encoding retrieved from external headers is invalid

Also fixed the regex to support this failing header: charset="utf-8"\r\n"
This commit is contained in:
ArthurHoaro 2020-09-26 13:28:38 +02:00
parent d0ae1ba273
commit 1ea09a1b8b
3 changed files with 15 additions and 2 deletions

View file

@ -26,7 +26,7 @@ function html_extract_title($html)
*/ */
function header_extract_charset($header) function header_extract_charset($header)
{ {
preg_match('/charset="?([^; ]+)/i', $header, $match); preg_match('/charset=["\']?([^; "\']+)/i', $header, $match);
if (! empty($match[1])) { if (! empty($match[1])) {
return strtolower(trim($match[1])); return strtolower(trim($match[1]));
} }

View file

@ -69,7 +69,7 @@ public function displayCreateForm(Request $request, Response $response): Respons
$retrieveDescription $retrieveDescription
) )
); );
if (! empty($title) && strtolower($charset) !== 'utf-8') { if (! empty($title) && strtolower($charset) !== 'utf-8' && mb_check_encoding($charset)) {
$title = mb_convert_encoding($title, 'utf-8', $charset); $title = mb_convert_encoding($title, 'utf-8', $charset);
} }
} }

View file

@ -42,6 +42,19 @@ public function testHeadersExtractExistentCharset()
$this->assertEquals(strtolower($charset), header_extract_charset($headers)); $this->assertEquals(strtolower($charset), header_extract_charset($headers));
} }
/**
* Test headers_extract_charset() when the charset is found with odd quotes.
*/
public function testHeadersExtractExistentCharsetWithQuotes()
{
$charset = 'x-MacCroatian';
$headers = 'text/html; charset="' . $charset . '"otherstuff="test"';
$this->assertEquals(strtolower($charset), header_extract_charset($headers));
$headers = 'text/html; charset=\'' . $charset . '\'otherstuff="test"';
$this->assertEquals(strtolower($charset), header_extract_charset($headers));
}
/** /**
* Test headers_extract_charset() when the charset is not found. * Test headers_extract_charset() when the charset is not found.
*/ */