Merge pull request #1133 from ArthurHoaro/hotfix/title-dl

Title retrieval fixes
This commit is contained in:
ArthurHoaro 2018-05-02 18:28:09 +02:00 committed by GitHub
commit 3e35fc10e5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 14 additions and 5 deletions

View file

@ -11,6 +11,7 @@
*/ */
function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_getinfo') function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_getinfo')
{ {
$isRedirected = false;
/** /**
* cURL callback function for CURLOPT_WRITEFUNCTION (called during the download). * cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
* *
@ -22,16 +23,24 @@ function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_get
* *
* @return int|bool length of $data or false if we need to stop the download * @return int|bool length of $data or false if we need to stop the download
*/ */
return function(&$ch, $data) use ($curlGetInfo, &$charset, &$title) { return function(&$ch, $data) use ($curlGetInfo, &$charset, &$title, &$isRedirected) {
$responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE); $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
if (!empty($responseCode) && $responseCode != 200) { if (!empty($responseCode) && in_array($responseCode, [301, 302])) {
$isRedirected = true;
return strlen($data);
}
if (!empty($responseCode) && $responseCode !== 200) {
return false; return false;
} }
$contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE); // After a redirection, the content type will keep the previous request value
// until it finds the next content-type header.
if (! $isRedirected || strpos(strtolower($data), 'content-type') !== false) {
$contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE);
}
if (!empty($contentType) && strpos($contentType, 'text/html') === false) { if (!empty($contentType) && strpos($contentType, 'text/html') === false) {
return false; return false;
} }
if (empty($charset)) { if (!empty($contentType) && empty($charset)) {
$charset = header_extract_charset($contentType); $charset = header_extract_charset($contentType);
} }
if (empty($charset)) { if (empty($charset)) {

View file

@ -1376,8 +1376,8 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history, $sessionManager,
// The callback will fill $charset and $title with data from the downloaded page. // The callback will fill $charset and $title with data from the downloaded page.
get_http_response( get_http_response(
$url, $url,
$conf->get('general.download_max_size', 4194304),
$conf->get('general.download_timeout', 30), $conf->get('general.download_timeout', 30),
$conf->get('general.download_max_size', 4194304),
get_curl_download_callback($charset, $title) get_curl_download_callback($charset, $title)
); );
if (! empty($title) && strtolower($charset) != 'utf-8') { if (! empty($title) && strtolower($charset) != 'utf-8') {