From 6a4872520cbbc012b5a8358cd50c78844afe8d07 Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Sat, 8 Jun 2019 13:59:19 +0200 Subject: [PATCH] Automatically retrieve description for new bookmarks If the option is enabled, it will try to find a meta tag containing the page description and keywords, just like we do for the page title. It will either look for regular meta tag or OpenGraph ones. The option is disabled by default. Note that keywords meta tags is mostly not used. In `configure` template, the variable associated with this setting is `$retrieve_description`. Fixes #1302 --- application/bookmark/LinkUtils.php | 85 +++++++++- application/config/ConfigManager.php | 1 + doc/md/Shaarli-configuration.md | 2 + inc/languages/fr/LC_MESSAGES/shaarli.po | 128 ++++++++------- index.php | 5 +- tests/bookmark/LinkUtilsTest.php | 204 ++++++++++++++++++++++-- tpl/default/configure.html | 16 ++ tpl/vintage/configure.html | 8 + 8 files changed, 374 insertions(+), 75 deletions(-) diff --git a/application/bookmark/LinkUtils.php b/application/bookmark/LinkUtils.php index 35a5b29..77eb2d9 100644 --- a/application/bookmark/LinkUtils.php +++ b/application/bookmark/LinkUtils.php @@ -7,13 +7,25 @@ use Shaarli\Bookmark\LinkDB; * * @param string $charset to extract from the downloaded page (reference) * @param string $title to extract from the downloaded page (reference) + * @param string $description to extract from the downloaded page (reference) + * @param string $keywords to extract from the downloaded page (reference) + * @param bool $retrieveDescription Automatically tries to retrieve description and keywords from HTML content * @param string $curlGetInfo Optionally overrides curl_getinfo function * * @return Closure */ -function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_getinfo') -{ +function get_curl_download_callback( + &$charset, + &$title, + &$description, + &$keywords, + $retrieveDescription, + $curlGetInfo = 'curl_getinfo' +) { $isRedirected = false; + $currentChunk = 0; + $foundChunk = null; + /** * cURL callback function for CURLOPT_WRITEFUNCTION (called during the download). * @@ -25,7 +37,18 @@ function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_get * * @return int|bool length of $data or false if we need to stop the download */ - return function (&$ch, $data) use ($curlGetInfo, &$charset, &$title, &$isRedirected) { + return function (&$ch, $data) use ( + $retrieveDescription, + $curlGetInfo, + &$charset, + &$title, + &$description, + &$keywords, + &$isRedirected, + &$currentChunk, + &$foundChunk + ) { + $currentChunk++; $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE); if (!empty($responseCode) && in_array($responseCode, [301, 302])) { $isRedirected = true; @@ -50,9 +73,34 @@ function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_get } if (empty($title)) { $title = html_extract_title($data); + $foundChunk = ! empty($title) ? $currentChunk : $foundChunk; } + if ($retrieveDescription && empty($description)) { + $description = html_extract_tag('description', $data); + $foundChunk = ! empty($description) ? $currentChunk : $foundChunk; + } + if ($retrieveDescription && empty($keywords)) { + $keywords = html_extract_tag('keywords', $data); + if (! empty($keywords)) { + $foundChunk = $currentChunk; + // Keywords use the format tag1, tag2 multiple words, tag + // So we format them to match Shaarli's separator and glue multiple words with '-' + $keywords = implode(' ', array_map(function($keyword) { + return implode('-', preg_split('/\s+/', trim($keyword))); + }, explode(',', $keywords))); + } + } + // We got everything we want, stop the download. - if (!empty($responseCode) && !empty($contentType) && !empty($charset) && !empty($title)) { + // If we already found either the title, description or keywords, + // it's highly unlikely that we'll found the other metas further than + // in the same chunk of data or the next one. So we also stop the download after that. + if ((!empty($responseCode) && !empty($contentType) && !empty($charset)) && $foundChunk !== null + && (! $retrieveDescription + || $foundChunk < $currentChunk + || (!empty($title) && !empty($description) && !empty($keywords)) + ) + ) { return false; } @@ -110,6 +158,35 @@ function html_extract_charset($html) return false; } +/** + * Extract meta tag from HTML content in either: + * - OpenGraph: + * - Meta tag: + * + * @param string $tag Name of the tag to retrieve. + * @param string $html HTML content where to look for charset. + * + * @return bool|string Charset string if found, false otherwise. + */ +function html_extract_tag($tag, $html) +{ + $propertiesKey = ['property', 'name', 'itemprop']; + $properties = implode('|', $propertiesKey); + // Try to retrieve OpenGraph image. + $ogRegex = '#]+(?:'. $properties .')=["\']?(?:og:)?'. $tag .'["\'\s][^>]*content=["\']?(.*?)["\'/>]#'; + // If the attributes are not in the order property => content (e.g. Github) + // New regex to keep this readable... more or less. + $ogRegexReverse = '#]+content=["\']([^"\']+)[^>]+(?:'. $properties .')=["\']?(?:og)?:'. $tag .'["\'\s/>]#'; + + if (preg_match($ogRegex, $html, $matches) > 0 + || preg_match($ogRegexReverse, $html, $matches) > 0 + ) { + return $matches[1]; + } + + return false; +} + /** * Count private links in given linklist. * diff --git a/application/config/ConfigManager.php b/application/config/ConfigManager.php index 3099392..c95e680 100644 --- a/application/config/ConfigManager.php +++ b/application/config/ConfigManager.php @@ -365,6 +365,7 @@ class ConfigManager $this->setEmpty('general.links_per_page', 20); $this->setEmpty('general.enabled_plugins', self::$DEFAULT_PLUGINS); $this->setEmpty('general.default_note_title', 'Note: '); + $this->setEmpty('general.retrieve_description', false); $this->setEmpty('updates.check_updates', false); $this->setEmpty('updates.check_updates_branch', 'stable'); diff --git a/doc/md/Shaarli-configuration.md b/doc/md/Shaarli-configuration.md index a931ab1..664e36d 100644 --- a/doc/md/Shaarli-configuration.md +++ b/doc/md/Shaarli-configuration.md @@ -56,6 +56,8 @@ _These settings should not be edited_ - **timezone**: See [the list of supported timezones](http://php.net/manual/en/timezones.php). - **enabled_plugins**: List of enabled plugins. - **default_note_title**: Default title of a new note. +- **retrieve_description** (boolean): If set to true, for every new links Shaarli will try +to retrieve the description and keywords from the HTML meta tags. ### Security diff --git a/inc/languages/fr/LC_MESSAGES/shaarli.po b/inc/languages/fr/LC_MESSAGES/shaarli.po index c2c73b2..611296f 100644 --- a/inc/languages/fr/LC_MESSAGES/shaarli.po +++ b/inc/languages/fr/LC_MESSAGES/shaarli.po @@ -1,8 +1,8 @@ msgid "" msgstr "" "Project-Id-Version: Shaarli\n" -"POT-Creation-Date: 2019-05-25 16:37+0200\n" -"PO-Revision-Date: 2019-05-25 16:37+0200\n" +"POT-Creation-Date: 2019-07-06 12:14+0200\n" +"PO-Revision-Date: 2019-07-06 12:17+0200\n" "Last-Translator: \n" "Language-Team: Shaarli\n" "Language: fr_FR\n" @@ -252,7 +252,7 @@ msgstr "404 Introuvable" msgid "Couldn't retrieve updater class methods." msgstr "Impossible de récupérer les méthodes de la classe Updater." -#: application/updater/Updater.php:526 index.php:1033 +#: application/updater/Updater.php:526 index.php:1034 msgid "" "You have enabled or changed thumbnails mode. Please synchronize them." @@ -337,8 +337,8 @@ msgid "You are not supposed to change a password on an Open Shaarli." msgstr "" "Vous n'êtes pas censé modifier le mot de passe d'un Shaarli en mode ouvert." -#: index.php:957 index.php:1007 index.php:1092 index.php:1122 index.php:1232 -#: index.php:1279 +#: index.php:957 index.php:1007 index.php:1094 index.php:1124 index.php:1234 +#: index.php:1281 msgid "Wrong token." msgstr "Jeton invalide." @@ -356,64 +356,64 @@ msgstr "Votre mot de passe a été modifié" msgid "Change password" msgstr "Modifier le mot de passe" -#: index.php:1053 +#: index.php:1054 msgid "Configuration was saved." msgstr "La configuration a été sauvegardée." -#: index.php:1076 tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:24 +#: index.php:1078 tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:24 msgid "Configure" msgstr "Configurer" -#: index.php:1086 tmp/changetag.b91ef64efc3688266305ea9b42e5017e.rtpl.php:13 +#: index.php:1088 tmp/changetag.b91ef64efc3688266305ea9b42e5017e.rtpl.php:13 #: tmp/tools.b91ef64efc3688266305ea9b42e5017e.rtpl.php:36 msgid "Manage tags" msgstr "Gérer les tags" -#: index.php:1105 +#: index.php:1107 #, php-format msgid "The tag was removed from %d link." msgid_plural "The tag was removed from %d links." msgstr[0] "Le tag a été supprimé de %d lien." msgstr[1] "Le tag a été supprimé de %d liens." -#: index.php:1106 +#: index.php:1108 #, php-format msgid "The tag was renamed in %d link." msgid_plural "The tag was renamed in %d links." msgstr[0] "Le tag a été renommé dans %d lien." msgstr[1] "Le tag a été renommé dans %d liens." -#: index.php:1113 tmp/addlink.b91ef64efc3688266305ea9b42e5017e.rtpl.php:13 +#: index.php:1115 tmp/addlink.b91ef64efc3688266305ea9b42e5017e.rtpl.php:13 msgid "Shaare a new link" msgstr "Partager un nouveau lien" -#: index.php:1342 tmp/linklist.b91ef64efc3688266305ea9b42e5017e.rtpl.php:169 +#: index.php:1344 tmp/linklist.b91ef64efc3688266305ea9b42e5017e.rtpl.php:169 msgid "Edit" msgstr "Modifier" -#: index.php:1342 index.php:1413 +#: index.php:1344 index.php:1416 #: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:26 #: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:26 msgid "Shaare" msgstr "Shaare" -#: index.php:1382 +#: index.php:1385 msgid "Note: " msgstr "Note : " -#: index.php:1421 +#: index.php:1424 msgid "Invalid link ID provided" msgstr "" -#: index.php:1441 tmp/export.b91ef64efc3688266305ea9b42e5017e.rtpl.php:65 +#: index.php:1444 tmp/export.b91ef64efc3688266305ea9b42e5017e.rtpl.php:65 msgid "Export" msgstr "Exporter" -#: index.php:1503 tmp/import.b91ef64efc3688266305ea9b42e5017e.rtpl.php:83 +#: index.php:1506 tmp/import.b91ef64efc3688266305ea9b42e5017e.rtpl.php:83 msgid "Import" msgstr "Importer" -#: index.php:1513 +#: index.php:1516 #, php-format msgid "" "The file you are trying to upload is probably bigger than what this " @@ -423,20 +423,20 @@ msgstr "" "le serveur web peut accepter (%s). Merci de l'envoyer en parties plus " "légères." -#: index.php:1558 tmp/pluginsadmin.b91ef64efc3688266305ea9b42e5017e.rtpl.php:26 +#: index.php:1561 tmp/pluginsadmin.b91ef64efc3688266305ea9b42e5017e.rtpl.php:26 #: tmp/tools.b91ef64efc3688266305ea9b42e5017e.rtpl.php:22 msgid "Plugin administration" msgstr "Administration des plugins" -#: index.php:1612 tmp/thumbnails.b91ef64efc3688266305ea9b42e5017e.rtpl.php:14 +#: index.php:1615 tmp/thumbnails.b91ef64efc3688266305ea9b42e5017e.rtpl.php:14 msgid "Thumbnails update" msgstr "Mise à jour des miniatures" -#: index.php:1778 +#: index.php:1781 msgid "Search: " msgstr "Recherche : " -#: index.php:1821 +#: index.php:1824 #, php-format msgid "" "
Sessions do not seem to work correctly on your server.
Make sure the " @@ -455,7 +455,7 @@ msgstr "" "des cookies. Nous vous recommandons d'accéder à votre serveur depuis son " "adresse IP ou un Fully Qualified Domain Name.
" -#: index.php:1831 +#: index.php:1834 msgid "Click to try again." msgstr "Cliquer ici pour réessayer." @@ -592,7 +592,7 @@ msgstr "Mauvaise réponse du hub %s" msgid "Enable PubSubHubbub feed publishing." msgstr "Active la publication de flux vers PubSubHubbub." -#: plugins/qrcode/qrcode.php:73 plugins/wallabag/wallabag.php:68 +#: plugins/qrcode/qrcode.php:72 plugins/wallabag/wallabag.php:68 msgid "For each link, add a QRCode icon." msgstr "Pour chaque lien, ajouter une icône de QRCode." @@ -679,6 +679,34 @@ msgstr "Vous pouvez aussi modifier les tags dans la" msgid "tag list" msgstr "liste des tags" +#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:143 +#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:312 +#: tmp/export.b91ef64efc3688266305ea9b42e5017e.rtpl.php:31 +msgid "All" +msgstr "Tous" + +#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:147 +#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:316 +msgid "Only common media hosts" +msgstr "Seulement les hébergeurs de média connus" + +#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:151 +#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:320 +msgid "None" +msgstr "Aucune" + +#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:158 +#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:297 +msgid "You need to enable the extension php-gd to use thumbnails." +msgstr "" +"Vous devez activer l'extension php-gd pour utiliser les " +"miniatures." + +#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:162 +#| msgid "Enable thumbnails" +msgid "Synchonize thumbnails" +msgstr "" + #: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:29 msgid "title" msgstr "titre" @@ -762,50 +790,41 @@ msgid "Notify me when a new release is ready" msgstr "Me notifier lorsqu'une nouvelle version est disponible" #: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:247 +msgid "Automatically retrieve description for new bookmarks" +msgstr "Récupérer automatiquement la description" + +#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:248 +msgid "Shaarli will try to retrieve the description from meta HTML headers" +msgstr "" +"Shaarli essaiera de récupérer la description depuis les balises HTML meta " +"dans les entêtes" + +#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:263 #: tmp/install.b91ef64efc3688266305ea9b42e5017e.rtpl.php:169 msgid "Enable REST API" msgstr "Activer l'API REST" -#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:248 +#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:264 #: tmp/install.b91ef64efc3688266305ea9b42e5017e.rtpl.php:170 msgid "Allow third party software to use Shaarli such as mobile application" msgstr "" "Permet aux applications tierces d'utiliser Shaarli, par exemple les " "applications mobiles" -#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:263 +#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:279 msgid "API secret" msgstr "Clé d'API secrète" -#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:277 +#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:293 msgid "Enable thumbnails" msgstr "Activer les miniatures" -#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:281 -msgid "You need to enable the extension php-gd to use thumbnails." -msgstr "" -"Vous devez activer l'extension php-gd pour utiliser les " -"miniatures." - -#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:285 +#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:301 #: tmp/tools.b91ef64efc3688266305ea9b42e5017e.rtpl.php:56 msgid "Synchronize thumbnails" msgstr "Synchroniser les miniatures" -#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:296 -#: tmp/export.b91ef64efc3688266305ea9b42e5017e.rtpl.php:31 -msgid "All" -msgstr "Tous" - -#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:300 -msgid "Only common media hosts" -msgstr "Seulement les hébergeurs de média connus" - -#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:304 -msgid "None" -msgstr "Aucune" - -#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:312 +#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:328 #: tmp/editlink.b91ef64efc3688266305ea9b42e5017e.rtpl.php:72 #: tmp/pluginsadmin.b91ef64efc3688266305ea9b42e5017e.rtpl.php:139 #: tmp/pluginsadmin.b91ef64efc3688266305ea9b42e5017e.rtpl.php:199 @@ -1149,17 +1168,13 @@ msgstr "Déconnexion" #: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:150 #: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:150 -#, fuzzy -#| msgid "Public" msgid "Set public" -msgstr "Publics" +msgstr "Rendre public" #: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:155 #: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:155 -#, fuzzy -#| msgid "Private" msgid "Set private" -msgstr "Privé" +msgstr "Rendre privé" #: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:187 #: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:187 @@ -1409,11 +1424,6 @@ msgstr "" "Glisser ce lien dans votre barre de favoris ou cliquer droit dessus et « " "Ajouter aux favoris »" -#, fuzzy -#~| msgid "Enable thumbnails" -#~ msgid "Synchonize thumbnails" -#~ msgstr "Activer les miniatures" - #~ msgid "" #~ "You need to browse your Shaarli over HTTPS to use this " #~ "functionality." diff --git a/index.php b/index.php index a14616e..957d8d9 100644 --- a/index.php +++ b/index.php @@ -1015,6 +1015,7 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history, $sessionManager, $conf->set('general.timezone', $tz); $conf->set('general.title', escape($_POST['title'])); $conf->set('general.header_link', escape($_POST['titleLink'])); + $conf->set('general.retrieve_description', !empty($_POST['retrieveDescription'])); $conf->set('resource.theme', escape($_POST['theme'])); $conf->set('security.session_protection_disabled', !empty($_POST['disablesessionprotection'])); $conf->set('privacy.default_private_links', !empty($_POST['privateLinkByDefault'])); @@ -1063,6 +1064,7 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history, $sessionManager, ); $PAGE->assign('continents', $continents); $PAGE->assign('cities', $cities); + $PAGE->assign('retrieve_description', $conf->get('general.retrieve_description')); $PAGE->assign('private_links_default', $conf->get('privacy.default_private_links', false)); $PAGE->assign('session_protection_disabled', $conf->get('security.session_protection_disabled', false)); $PAGE->assign('enable_rss_permalinks', $conf->get('feed.rss_permalinks', false)); @@ -1364,13 +1366,14 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history, $sessionManager, // If this is an HTTP(S) link, we try go get the page to extract // the title (otherwise we will to straight to the edit form.) if (empty($title) && strpos(get_url_scheme($url), 'http') !== false) { + $retrieveDescription = $conf->get('general.retrieve_description'); // Short timeout to keep the application responsive // The callback will fill $charset and $title with data from the downloaded page. get_http_response( $url, $conf->get('general.download_timeout', 30), $conf->get('general.download_max_size', 4194304), - get_curl_download_callback($charset, $title) + get_curl_download_callback($charset, $title, $description, $tags, $retrieveDescription) ); if (! empty($title) && strtolower($charset) != 'utf-8') { $title = mb_convert_encoding($title, 'utf-8', $charset); diff --git a/tests/bookmark/LinkUtilsTest.php b/tests/bookmark/LinkUtilsTest.php index 25fb304..78cb8f2 100644 --- a/tests/bookmark/LinkUtilsTest.php +++ b/tests/bookmark/LinkUtilsTest.php @@ -2,14 +2,16 @@ namespace Shaarli\Bookmark; +use PHPUnit\Framework\TestCase; use ReferenceLinkDB; +use Shaarli\Config\ConfigManager; require_once 'tests/utils/CurlUtils.php'; /** * Class LinkUtilsTest. */ -class LinkUtilsTest extends \PHPUnit\Framework\TestCase +class LinkUtilsTest extends TestCase { /** * Test html_extract_title() when the title is found. @@ -75,12 +77,57 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase $this->assertFalse(html_extract_charset($html)); } + /** + * Test html_extract_tag() when the tag '; + $this->assertEquals($description, html_extract_tag('description', $html)); + } + + /** + * Test html_extract_tag() when the tag assertFalse(html_extract_tag('description', $html)); + } + + /** + * Test html_extract_tag() when the tag '; + $this->assertEquals($description, html_extract_tag('description', $html)); + } + + /** + * Test html_extract_tag() when the tag '; + $this->assertFalse(html_extract_tag('description', $html)); + } + /** * Test the download callback with valid value */ public function testCurlDownloadCallbackOk() { - $callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_ok'); + $callback = get_curl_download_callback( + $charset, + $title, + $desc, + $keywords, + false, + 'ut_curl_getinfo_ok' + ); $data = [ 'HTTP/1.1 200 OK', 'Server: GitHub.com', @@ -90,7 +137,9 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase 'end' => 'th=device-width">' . 'Refactoring · GitHub' . '' + . '', ]; foreach ($data as $key => $line) { $ignore = null; @@ -102,6 +151,8 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase } $this->assertEquals('utf-8', $charset); $this->assertEquals('Refactoring · GitHub', $title); + $this->assertEmpty($desc); + $this->assertEmpty($keywords); } /** @@ -109,13 +160,22 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase */ public function testCurlDownloadCallbackOkNoCharset() { - $callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_no_charset'); + $callback = get_curl_download_callback( + $charset, + $title, + $desc, + $keywords, + false, + 'ut_curl_getinfo_no_charset' + ); $data = [ 'HTTP/1.1 200 OK', 'end' => 'th=device-width">' . 'Refactoring · GitHub' . '' + . '', ]; foreach ($data as $key => $line) { $ignore = null; @@ -123,6 +183,8 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase } $this->assertEmpty($charset); $this->assertEquals('Refactoring · GitHub', $title); + $this->assertEmpty($desc); + $this->assertEmpty($keywords); } /** @@ -130,14 +192,23 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase */ public function testCurlDownloadCallbackOkHtmlCharset() { - $callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_no_charset'); + $callback = get_curl_download_callback( + $charset, + $title, + $desc, + $keywords, + false, + 'ut_curl_getinfo_no_charset' + ); $data = [ 'HTTP/1.1 200 OK', '', 'end' => 'th=device-width">' . 'Refactoring · GitHub' . '' + . '', ]; foreach ($data as $key => $line) { $ignore = null; @@ -149,6 +220,8 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase } $this->assertEquals('utf-8', $charset); $this->assertEquals('Refactoring · GitHub', $title); + $this->assertEmpty($desc); + $this->assertEmpty($keywords); } /** @@ -156,7 +229,14 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase */ public function testCurlDownloadCallbackOkNoTitle() { - $callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_ok'); + $callback = get_curl_download_callback( + $charset, + $title, + $desc, + $keywords, + false, + 'ut_curl_getinfo_ok' + ); $data = [ 'HTTP/1.1 200 OK', 'end' => 'th=device-width">Refactoring · GitHub' + . 'Refactoring · GitHub' + . '' + . '', + ]; + foreach ($data as $key => $line) { + $ignore = null; + $expected = $key !== 'end' ? strlen($line) : false; + $this->assertEquals($expected, $callback($ignore, $line)); + if ($expected === false) { + break; + } + } + $this->assertEquals('utf-8', $charset); + $this->assertEquals('Refactoring · GitHub', $title); + $this->assertEquals('link desc', $desc); + $this->assertEquals('key1 key2', $keywords); + } + + /** + * Test the download callback with valid value, and retrieve_description option enabled, + * but no desc or keyword defined in the page. + */ + public function testCurlDownloadCallbackOkWithDescNotFound() + { + $callback = get_curl_download_callback( + $charset, + $title, + $desc, + $keywords, + true, + 'ut_curl_getinfo_ok' + ); + $data = [ + 'HTTP/1.1 200 OK', + 'Server: GitHub.com', + 'Date: Sat, 28 Oct 2017 12:01:33 GMT', + 'Content-Type: text/html; charset=utf-8', + 'Status: 200 OK', + 'th=device-width">' + . 'Refactoring · GitHub' + . ' +
+
+ +
+
+
+
+ +
+
+
diff --git a/tpl/vintage/configure.html b/tpl/vintage/configure.html index f1892fa..160286a 100644 --- a/tpl/vintage/configure.html +++ b/tpl/vintage/configure.html @@ -106,6 +106,14 @@ + + Automatically retrieve description for new bookmarks: + + + + + Enable REST API