Merge pull request #1133 from ArthurHoaro/hotfix/title-dl

Title retrieval fixes
This commit is contained in:
ArthurHoaro 2018-05-02 18:28:09 +02:00 committed by GitHub
commit 3e35fc10e5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 14 additions and 5 deletions

View file

@ -11,6 +11,7 @@
*/
function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_getinfo')
{
$isRedirected = false;
/**
* cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
*
@ -22,16 +23,24 @@ function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_get
*
* @return int|bool length of $data or false if we need to stop the download
*/
return function(&$ch, $data) use ($curlGetInfo, &$charset, &$title) {
return function(&$ch, $data) use ($curlGetInfo, &$charset, &$title, &$isRedirected) {
$responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
if (!empty($responseCode) && $responseCode != 200) {
if (!empty($responseCode) && in_array($responseCode, [301, 302])) {
$isRedirected = true;
return strlen($data);
}
if (!empty($responseCode) && $responseCode !== 200) {
return false;
}
// After a redirection, the content type will keep the previous request value
// until it finds the next content-type header.
if (! $isRedirected || strpos(strtolower($data), 'content-type') !== false) {
$contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE);
}
if (!empty($contentType) && strpos($contentType, 'text/html') === false) {
return false;
}
if (empty($charset)) {
if (!empty($contentType) && empty($charset)) {
$charset = header_extract_charset($contentType);
}
if (empty($charset)) {

View file

@ -1376,8 +1376,8 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history, $sessionManager,
// The callback will fill $charset and $title with data from the downloaded page.
get_http_response(
$url,
$conf->get('general.download_max_size', 4194304),
$conf->get('general.download_timeout', 30),
$conf->get('general.download_max_size', 4194304),
get_curl_download_callback($charset, $title)
);
if (! empty($title) && strtolower($charset) != 'utf-8') {