Automatically retrieve description for new bookmarks

If the option is enabled, it will try to find a meta tag containing
the page description and keywords, just like we do for the page title.
It will either look for regular meta tag or OpenGraph ones.

The option is disabled by default.

Note that keywords meta tags is mostly not used.

In `configure` template, the variable associated with this setting
is `$retrieve_description`.

Fixes #1302
This commit is contained in:
ArthurHoaro 2019-06-08 13:59:19 +02:00
parent 5d8a958d5d
commit 6a4872520c
8 changed files with 374 additions and 75 deletions

View file

@ -7,13 +7,25 @@
*
* @param string $charset to extract from the downloaded page (reference)
* @param string $title to extract from the downloaded page (reference)
* @param string $description to extract from the downloaded page (reference)
* @param string $keywords to extract from the downloaded page (reference)
* @param bool $retrieveDescription Automatically tries to retrieve description and keywords from HTML content
* @param string $curlGetInfo Optionally overrides curl_getinfo function
*
* @return Closure
*/
function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_getinfo')
{
function get_curl_download_callback(
&$charset,
&$title,
&$description,
&$keywords,
$retrieveDescription,
$curlGetInfo = 'curl_getinfo'
) {
$isRedirected = false;
$currentChunk = 0;
$foundChunk = null;
/**
* cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
*
@ -25,7 +37,18 @@ function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_get
*
* @return int|bool length of $data or false if we need to stop the download
*/
return function (&$ch, $data) use ($curlGetInfo, &$charset, &$title, &$isRedirected) {
return function (&$ch, $data) use (
$retrieveDescription,
$curlGetInfo,
&$charset,
&$title,
&$description,
&$keywords,
&$isRedirected,
&$currentChunk,
&$foundChunk
) {
$currentChunk++;
$responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
if (!empty($responseCode) && in_array($responseCode, [301, 302])) {
$isRedirected = true;
@ -50,9 +73,34 @@ function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_get
}
if (empty($title)) {
$title = html_extract_title($data);
$foundChunk = ! empty($title) ? $currentChunk : $foundChunk;
}
if ($retrieveDescription && empty($description)) {
$description = html_extract_tag('description', $data);
$foundChunk = ! empty($description) ? $currentChunk : $foundChunk;
}
if ($retrieveDescription && empty($keywords)) {
$keywords = html_extract_tag('keywords', $data);
if (! empty($keywords)) {
$foundChunk = $currentChunk;
// Keywords use the format tag1, tag2 multiple words, tag
// So we format them to match Shaarli's separator and glue multiple words with '-'
$keywords = implode(' ', array_map(function($keyword) {
return implode('-', preg_split('/\s+/', trim($keyword)));
}, explode(',', $keywords)));
}
}
// We got everything we want, stop the download.
if (!empty($responseCode) && !empty($contentType) && !empty($charset) && !empty($title)) {
// If we already found either the title, description or keywords,
// it's highly unlikely that we'll found the other metas further than
// in the same chunk of data or the next one. So we also stop the download after that.
if ((!empty($responseCode) && !empty($contentType) && !empty($charset)) && $foundChunk !== null
&& (! $retrieveDescription
|| $foundChunk < $currentChunk
|| (!empty($title) && !empty($description) && !empty($keywords))
)
) {
return false;
}
@ -110,6 +158,35 @@ function html_extract_charset($html)
return false;
}
/**
* Extract meta tag from HTML content in either:
* - OpenGraph: <meta property="og:[tag]" ...>
* - Meta tag: <meta name="[tag]" ...>
*
* @param string $tag Name of the tag to retrieve.
* @param string $html HTML content where to look for charset.
*
* @return bool|string Charset string if found, false otherwise.
*/
function html_extract_tag($tag, $html)
{
$propertiesKey = ['property', 'name', 'itemprop'];
$properties = implode('|', $propertiesKey);
// Try to retrieve OpenGraph image.
$ogRegex = '#<meta[^>]+(?:'. $properties .')=["\']?(?:og:)?'. $tag .'["\'\s][^>]*content=["\']?(.*?)["\'/>]#';
// If the attributes are not in the order property => content (e.g. Github)
// New regex to keep this readable... more or less.
$ogRegexReverse = '#<meta[^>]+content=["\']([^"\']+)[^>]+(?:'. $properties .')=["\']?(?:og)?:'. $tag .'["\'\s/>]#';
if (preg_match($ogRegex, $html, $matches) > 0
|| preg_match($ogRegexReverse, $html, $matches) > 0
) {
return $matches[1];
}
return false;
}
/**
* Count private links in given linklist.
*

View file

@ -365,6 +365,7 @@ protected function setDefaultValues()
$this->setEmpty('general.links_per_page', 20);
$this->setEmpty('general.enabled_plugins', self::$DEFAULT_PLUGINS);
$this->setEmpty('general.default_note_title', 'Note: ');
$this->setEmpty('general.retrieve_description', false);
$this->setEmpty('updates.check_updates', false);
$this->setEmpty('updates.check_updates_branch', 'stable');

View file

@ -56,6 +56,8 @@ _These settings should not be edited_
- **timezone**: See [the list of supported timezones](http://php.net/manual/en/timezones.php).
- **enabled_plugins**: List of enabled plugins.
- **default_note_title**: Default title of a new note.
- **retrieve_description** (boolean): If set to true, for every new links Shaarli will try
to retrieve the description and keywords from the HTML meta tags.
### Security

View file

@ -1,8 +1,8 @@
msgid ""
msgstr ""
"Project-Id-Version: Shaarli\n"
"POT-Creation-Date: 2019-05-25 16:37+0200\n"
"PO-Revision-Date: 2019-05-25 16:37+0200\n"
"POT-Creation-Date: 2019-07-06 12:14+0200\n"
"PO-Revision-Date: 2019-07-06 12:17+0200\n"
"Last-Translator: \n"
"Language-Team: Shaarli\n"
"Language: fr_FR\n"
@ -252,7 +252,7 @@ msgstr "404 Introuvable"
msgid "Couldn't retrieve updater class methods."
msgstr "Impossible de récupérer les méthodes de la classe Updater."
#: application/updater/Updater.php:526 index.php:1033
#: application/updater/Updater.php:526 index.php:1034
msgid ""
"You have enabled or changed thumbnails mode. <a href=\"?do=thumbs_update"
"\">Please synchronize them</a>."
@ -337,8 +337,8 @@ msgid "You are not supposed to change a password on an Open Shaarli."
msgstr ""
"Vous n'êtes pas censé modifier le mot de passe d'un Shaarli en mode ouvert."
#: index.php:957 index.php:1007 index.php:1092 index.php:1122 index.php:1232
#: index.php:1279
#: index.php:957 index.php:1007 index.php:1094 index.php:1124 index.php:1234
#: index.php:1281
msgid "Wrong token."
msgstr "Jeton invalide."
@ -356,64 +356,64 @@ msgstr "Votre mot de passe a été modifié"
msgid "Change password"
msgstr "Modifier le mot de passe"
#: index.php:1053
#: index.php:1054
msgid "Configuration was saved."
msgstr "La configuration a été sauvegardée."
#: index.php:1076 tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:24
#: index.php:1078 tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:24
msgid "Configure"
msgstr "Configurer"
#: index.php:1086 tmp/changetag.b91ef64efc3688266305ea9b42e5017e.rtpl.php:13
#: index.php:1088 tmp/changetag.b91ef64efc3688266305ea9b42e5017e.rtpl.php:13
#: tmp/tools.b91ef64efc3688266305ea9b42e5017e.rtpl.php:36
msgid "Manage tags"
msgstr "Gérer les tags"
#: index.php:1105
#: index.php:1107
#, php-format
msgid "The tag was removed from %d link."
msgid_plural "The tag was removed from %d links."
msgstr[0] "Le tag a été supprimé de %d lien."
msgstr[1] "Le tag a été supprimé de %d liens."
#: index.php:1106
#: index.php:1108
#, php-format
msgid "The tag was renamed in %d link."
msgid_plural "The tag was renamed in %d links."
msgstr[0] "Le tag a été renommé dans %d lien."
msgstr[1] "Le tag a été renommé dans %d liens."
#: index.php:1113 tmp/addlink.b91ef64efc3688266305ea9b42e5017e.rtpl.php:13
#: index.php:1115 tmp/addlink.b91ef64efc3688266305ea9b42e5017e.rtpl.php:13
msgid "Shaare a new link"
msgstr "Partager un nouveau lien"
#: index.php:1342 tmp/linklist.b91ef64efc3688266305ea9b42e5017e.rtpl.php:169
#: index.php:1344 tmp/linklist.b91ef64efc3688266305ea9b42e5017e.rtpl.php:169
msgid "Edit"
msgstr "Modifier"
#: index.php:1342 index.php:1413
#: index.php:1344 index.php:1416
#: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:26
#: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:26
msgid "Shaare"
msgstr "Shaare"
#: index.php:1382
#: index.php:1385
msgid "Note: "
msgstr "Note : "
#: index.php:1421
#: index.php:1424
msgid "Invalid link ID provided"
msgstr ""
#: index.php:1441 tmp/export.b91ef64efc3688266305ea9b42e5017e.rtpl.php:65
#: index.php:1444 tmp/export.b91ef64efc3688266305ea9b42e5017e.rtpl.php:65
msgid "Export"
msgstr "Exporter"
#: index.php:1503 tmp/import.b91ef64efc3688266305ea9b42e5017e.rtpl.php:83
#: index.php:1506 tmp/import.b91ef64efc3688266305ea9b42e5017e.rtpl.php:83
msgid "Import"
msgstr "Importer"
#: index.php:1513
#: index.php:1516
#, php-format
msgid ""
"The file you are trying to upload is probably bigger than what this "
@ -423,20 +423,20 @@ msgstr ""
"le serveur web peut accepter (%s). Merci de l'envoyer en parties plus "
"légères."
#: index.php:1558 tmp/pluginsadmin.b91ef64efc3688266305ea9b42e5017e.rtpl.php:26
#: index.php:1561 tmp/pluginsadmin.b91ef64efc3688266305ea9b42e5017e.rtpl.php:26
#: tmp/tools.b91ef64efc3688266305ea9b42e5017e.rtpl.php:22
msgid "Plugin administration"
msgstr "Administration des plugins"
#: index.php:1612 tmp/thumbnails.b91ef64efc3688266305ea9b42e5017e.rtpl.php:14
#: index.php:1615 tmp/thumbnails.b91ef64efc3688266305ea9b42e5017e.rtpl.php:14
msgid "Thumbnails update"
msgstr "Mise à jour des miniatures"
#: index.php:1778
#: index.php:1781
msgid "Search: "
msgstr "Recherche : "
#: index.php:1821
#: index.php:1824
#, php-format
msgid ""
"<pre>Sessions do not seem to work correctly on your server.<br>Make sure the "
@ -455,7 +455,7 @@ msgstr ""
"des cookies. Nous vous recommandons d'accéder à votre serveur depuis son "
"adresse IP ou un <em>Fully Qualified Domain Name</em>.<br>"
#: index.php:1831
#: index.php:1834
msgid "Click to try again."
msgstr "Cliquer ici pour réessayer."
@ -592,7 +592,7 @@ msgstr "Mauvaise réponse du hub %s"
msgid "Enable PubSubHubbub feed publishing."
msgstr "Active la publication de flux vers PubSubHubbub."
#: plugins/qrcode/qrcode.php:73 plugins/wallabag/wallabag.php:68
#: plugins/qrcode/qrcode.php:72 plugins/wallabag/wallabag.php:68
msgid "For each link, add a QRCode icon."
msgstr "Pour chaque lien, ajouter une icône de QRCode."
@ -679,6 +679,34 @@ msgstr "Vous pouvez aussi modifier les tags dans la"
msgid "tag list"
msgstr "liste des tags"
#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:143
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:312
#: tmp/export.b91ef64efc3688266305ea9b42e5017e.rtpl.php:31
msgid "All"
msgstr "Tous"
#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:147
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:316
msgid "Only common media hosts"
msgstr "Seulement les hébergeurs de média connus"
#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:151
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:320
msgid "None"
msgstr "Aucune"
#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:158
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:297
msgid "You need to enable the extension <code>php-gd</code> to use thumbnails."
msgstr ""
"Vous devez activer l'extension <code>php-gd</code> pour utiliser les "
"miniatures."
#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:162
#| msgid "Enable thumbnails"
msgid "Synchonize thumbnails"
msgstr ""
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:29
msgid "title"
msgstr "titre"
@ -762,50 +790,41 @@ msgid "Notify me when a new release is ready"
msgstr "Me notifier lorsqu'une nouvelle version est disponible"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:247
msgid "Automatically retrieve description for new bookmarks"
msgstr "Récupérer automatiquement la description"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:248
msgid "Shaarli will try to retrieve the description from meta HTML headers"
msgstr ""
"Shaarli essaiera de récupérer la description depuis les balises HTML meta "
"dans les entêtes"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:263
#: tmp/install.b91ef64efc3688266305ea9b42e5017e.rtpl.php:169
msgid "Enable REST API"
msgstr "Activer l'API REST"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:248
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:264
#: tmp/install.b91ef64efc3688266305ea9b42e5017e.rtpl.php:170
msgid "Allow third party software to use Shaarli such as mobile application"
msgstr ""
"Permet aux applications tierces d'utiliser Shaarli, par exemple les "
"applications mobiles"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:263
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:279
msgid "API secret"
msgstr "Clé d'API secrète"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:277
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:293
msgid "Enable thumbnails"
msgstr "Activer les miniatures"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:281
msgid "You need to enable the extension <code>php-gd</code> to use thumbnails."
msgstr ""
"Vous devez activer l'extension <code>php-gd</code> pour utiliser les "
"miniatures."
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:285
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:301
#: tmp/tools.b91ef64efc3688266305ea9b42e5017e.rtpl.php:56
msgid "Synchronize thumbnails"
msgstr "Synchroniser les miniatures"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:296
#: tmp/export.b91ef64efc3688266305ea9b42e5017e.rtpl.php:31
msgid "All"
msgstr "Tous"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:300
msgid "Only common media hosts"
msgstr "Seulement les hébergeurs de média connus"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:304
msgid "None"
msgstr "Aucune"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:312
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:328
#: tmp/editlink.b91ef64efc3688266305ea9b42e5017e.rtpl.php:72
#: tmp/pluginsadmin.b91ef64efc3688266305ea9b42e5017e.rtpl.php:139
#: tmp/pluginsadmin.b91ef64efc3688266305ea9b42e5017e.rtpl.php:199
@ -1149,17 +1168,13 @@ msgstr "Déconnexion"
#: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:150
#: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:150
#, fuzzy
#| msgid "Public"
msgid "Set public"
msgstr "Publics"
msgstr "Rendre public"
#: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:155
#: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:155
#, fuzzy
#| msgid "Private"
msgid "Set private"
msgstr "Privé"
msgstr "Rendre privé"
#: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:187
#: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:187
@ -1409,11 +1424,6 @@ msgstr ""
"Glisser ce lien dans votre barre de favoris ou cliquer droit dessus et « "
"Ajouter aux favoris »"
#, fuzzy
#~| msgid "Enable thumbnails"
#~ msgid "Synchonize thumbnails"
#~ msgstr "Activer les miniatures"
#~ msgid ""
#~ "You need to browse your Shaarli over <strong>HTTPS</strong> to use this "
#~ "functionality."

View file

@ -1015,6 +1015,7 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history, $sessionManager,
$conf->set('general.timezone', $tz);
$conf->set('general.title', escape($_POST['title']));
$conf->set('general.header_link', escape($_POST['titleLink']));
$conf->set('general.retrieve_description', !empty($_POST['retrieveDescription']));
$conf->set('resource.theme', escape($_POST['theme']));
$conf->set('security.session_protection_disabled', !empty($_POST['disablesessionprotection']));
$conf->set('privacy.default_private_links', !empty($_POST['privateLinkByDefault']));
@ -1063,6 +1064,7 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history, $sessionManager,
);
$PAGE->assign('continents', $continents);
$PAGE->assign('cities', $cities);
$PAGE->assign('retrieve_description', $conf->get('general.retrieve_description'));
$PAGE->assign('private_links_default', $conf->get('privacy.default_private_links', false));
$PAGE->assign('session_protection_disabled', $conf->get('security.session_protection_disabled', false));
$PAGE->assign('enable_rss_permalinks', $conf->get('feed.rss_permalinks', false));
@ -1364,13 +1366,14 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history, $sessionManager,
// If this is an HTTP(S) link, we try go get the page to extract
// the title (otherwise we will to straight to the edit form.)
if (empty($title) && strpos(get_url_scheme($url), 'http') !== false) {
$retrieveDescription = $conf->get('general.retrieve_description');
// Short timeout to keep the application responsive
// The callback will fill $charset and $title with data from the downloaded page.
get_http_response(
$url,
$conf->get('general.download_timeout', 30),
$conf->get('general.download_max_size', 4194304),
get_curl_download_callback($charset, $title)
get_curl_download_callback($charset, $title, $description, $tags, $retrieveDescription)
);
if (! empty($title) && strtolower($charset) != 'utf-8') {
$title = mb_convert_encoding($title, 'utf-8', $charset);

View file

@ -2,14 +2,16 @@
namespace Shaarli\Bookmark;
use PHPUnit\Framework\TestCase;
use ReferenceLinkDB;
use Shaarli\Config\ConfigManager;
require_once 'tests/utils/CurlUtils.php';
/**
* Class LinkUtilsTest.
*/
class LinkUtilsTest extends \PHPUnit\Framework\TestCase
class LinkUtilsTest extends TestCase
{
/**
* Test html_extract_title() when the title is found.
@ -75,12 +77,57 @@ public function testHtmlExtractNonExistentCharset()
$this->assertFalse(html_extract_charset($html));
}
/**
* Test html_extract_tag() when the tag <meta name= is found.
*/
public function testHtmlExtractExistentNameTag()
{
$description = 'Bob and Alice share cookies.';
$html = '<html><meta>stuff2</meta><meta name="description" content="' . $description . '"/></html>';
$this->assertEquals($description, html_extract_tag('description', $html));
}
/**
* Test html_extract_tag() when the tag <meta name= is not found.
*/
public function testHtmlExtractNonExistentNameTag()
{
$html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>';
$this->assertFalse(html_extract_tag('description', $html));
}
/**
* Test html_extract_tag() when the tag <meta property="og: is found.
*/
public function testHtmlExtractExistentOgTag()
{
$description = 'Bob and Alice share cookies.';
$html = '<html><meta>stuff2</meta><meta property="og:description" content="' . $description . '"/></html>';
$this->assertEquals($description, html_extract_tag('description', $html));
}
/**
* Test html_extract_tag() when the tag <meta property="og: is not found.
*/
public function testHtmlExtractNonExistentOgTag()
{
$html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>';
$this->assertFalse(html_extract_tag('description', $html));
}
/**
* Test the download callback with valid value
*/
public function testCurlDownloadCallbackOk()
{
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_ok');
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_ok'
);
$data = [
'HTTP/1.1 200 OK',
'Server: GitHub.com',
@ -90,7 +137,9 @@ public function testCurlDownloadCallbackOk()
'end' => 'th=device-width">'
. '<title>Refactoring · GitHub</title>'
. '<link rel="search" type="application/opensea',
'<title>ignored</title>',
'<title>ignored</title>'
. '<meta name="description" content="desc" />'
. '<meta name="keywords" content="key1,key2" />',
];
foreach ($data as $key => $line) {
$ignore = null;
@ -102,6 +151,8 @@ public function testCurlDownloadCallbackOk()
}
$this->assertEquals('utf-8', $charset);
$this->assertEquals('Refactoring · GitHub', $title);
$this->assertEmpty($desc);
$this->assertEmpty($keywords);
}
/**
@ -109,13 +160,22 @@ public function testCurlDownloadCallbackOk()
*/
public function testCurlDownloadCallbackOkNoCharset()
{
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_no_charset');
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_no_charset'
);
$data = [
'HTTP/1.1 200 OK',
'end' => 'th=device-width">'
. '<title>Refactoring · GitHub</title>'
. '<link rel="search" type="application/opensea',
'<title>ignored</title>',
'<title>ignored</title>'
. '<meta name="description" content="desc" />'
. '<meta name="keywords" content="key1,key2" />',
];
foreach ($data as $key => $line) {
$ignore = null;
@ -123,6 +183,8 @@ public function testCurlDownloadCallbackOkNoCharset()
}
$this->assertEmpty($charset);
$this->assertEquals('Refactoring · GitHub', $title);
$this->assertEmpty($desc);
$this->assertEmpty($keywords);
}
/**
@ -130,14 +192,23 @@ public function testCurlDownloadCallbackOkNoCharset()
*/
public function testCurlDownloadCallbackOkHtmlCharset()
{
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_no_charset');
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_no_charset'
);
$data = [
'HTTP/1.1 200 OK',
'<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />',
'end' => 'th=device-width">'
. '<title>Refactoring · GitHub</title>'
. '<link rel="search" type="application/opensea',
'<title>ignored</title>',
'<title>ignored</title>'
. '<meta name="description" content="desc" />'
. '<meta name="keywords" content="key1,key2" />',
];
foreach ($data as $key => $line) {
$ignore = null;
@ -149,6 +220,8 @@ public function testCurlDownloadCallbackOkHtmlCharset()
}
$this->assertEquals('utf-8', $charset);
$this->assertEquals('Refactoring · GitHub', $title);
$this->assertEmpty($desc);
$this->assertEmpty($keywords);
}
/**
@ -156,7 +229,14 @@ public function testCurlDownloadCallbackOkHtmlCharset()
*/
public function testCurlDownloadCallbackOkNoTitle()
{
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_ok');
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_ok'
);
$data = [
'HTTP/1.1 200 OK',
'end' => 'th=device-width">Refactoring · GitHub<link rel="search" type="application/opensea',
@ -168,6 +248,8 @@ public function testCurlDownloadCallbackOkNoTitle()
}
$this->assertEquals('utf-8', $charset);
$this->assertEmpty($title);
$this->assertEmpty($desc);
$this->assertEmpty($keywords);
}
/**
@ -175,7 +257,14 @@ public function testCurlDownloadCallbackOkNoTitle()
*/
public function testCurlDownloadCallbackInvalidContentType()
{
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_ct_ko');
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_ct_ko'
);
$ignore = null;
$this->assertFalse($callback($ignore, ''));
$this->assertEmpty($charset);
@ -187,7 +276,14 @@ public function testCurlDownloadCallbackInvalidContentType()
*/
public function testCurlDownloadCallbackInvalidResponseCode()
{
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_rc_ko');
$callback = $callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_rc_ko'
);
$ignore = null;
$this->assertFalse($callback($ignore, ''));
$this->assertEmpty($charset);
@ -199,13 +295,99 @@ public function testCurlDownloadCallbackInvalidResponseCode()
*/
public function testCurlDownloadCallbackInvalidContentTypeAndResponseCode()
{
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_rs_ct_ko');
$callback = $callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_rs_ct_ko'
);
$ignore = null;
$this->assertFalse($callback($ignore, ''));
$this->assertEmpty($charset);
$this->assertEmpty($title);
}
/**
* Test the download callback with valid value, and retrieve_description option enabled.
*/
public function testCurlDownloadCallbackOkWithDesc()
{
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
true,
'ut_curl_getinfo_ok'
);
$data = [
'HTTP/1.1 200 OK',
'Server: GitHub.com',
'Date: Sat, 28 Oct 2017 12:01:33 GMT',
'Content-Type: text/html; charset=utf-8',
'Status: 200 OK',
'th=device-width">'
. '<title>Refactoring · GitHub</title>'
. '<link rel="search" type="application/opensea',
'end' => '<title>ignored</title>'
. '<meta name="description" content="link desc" />'
. '<meta name="keywords" content="key1,key2" />',
];
foreach ($data as $key => $line) {
$ignore = null;
$expected = $key !== 'end' ? strlen($line) : false;
$this->assertEquals($expected, $callback($ignore, $line));
if ($expected === false) {
break;
}
}
$this->assertEquals('utf-8', $charset);
$this->assertEquals('Refactoring · GitHub', $title);
$this->assertEquals('link desc', $desc);
$this->assertEquals('key1 key2', $keywords);
}
/**
* Test the download callback with valid value, and retrieve_description option enabled,
* but no desc or keyword defined in the page.
*/
public function testCurlDownloadCallbackOkWithDescNotFound()
{
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
true,
'ut_curl_getinfo_ok'
);
$data = [
'HTTP/1.1 200 OK',
'Server: GitHub.com',
'Date: Sat, 28 Oct 2017 12:01:33 GMT',
'Content-Type: text/html; charset=utf-8',
'Status: 200 OK',
'th=device-width">'
. '<title>Refactoring · GitHub</title>'
. '<link rel="search" type="application/opensea',
'end' => '<title>ignored</title>',
];
foreach ($data as $key => $line) {
$ignore = null;
$expected = $key !== 'end' ? strlen($line) : false;
$this->assertEquals($expected, $callback($ignore, $line));
if ($expected === false) {
break;
}
}
$this->assertEquals('utf-8', $charset);
$this->assertEquals('Refactoring · GitHub', $title);
$this->assertEmpty($desc);
$this->assertEmpty($keywords);
}
/**
* Test count_private.
*/

View file

@ -212,6 +212,22 @@ <h2 class="window-title">{'Configure'|t}</h2>
</div>
</div>
</div>
<div class="pure-g">
<div class="pure-u-lg-{$ratioLabel} pure-u-{$ratioLabelMobile}">
<div class="form-label">
<label for="retrieveDescription">
<span class="label-name">{'Automatically retrieve description for new bookmarks'|t}</span><br>
<span class="label-desc">{'Shaarli will try to retrieve the description from meta HTML headers'|t}</span>
</label>
</div>
</div>
<div class="pure-u-lg-{$ratioInput} pure-u-{$ratioInputMobile}">
<div class="form-input">
<input type="checkbox" name="retrieveDescription" id="retrieveDescription"
{if="$retrieve_description"}checked{/if}/>
</div>
</div>
</div>
<div class="pure-g">
<div class="pure-u-lg-{$ratioLabel} pure-u-{$ratioLabelMobile}">
<div class="form-label">

View file

@ -106,6 +106,14 @@
<label for="updateCheck">&nbsp;Notify me when a new release is ready</label>
</td>
</tr>
<tr>
<td valign="top"><b>Automatically retrieve description for new bookmarks:</b></td>
<td>
<input type="checkbox" name="retrieveDescription" id="retrieveDescription"
{if="$retrieve_description"}checked{/if}/>
<label for="retrieveDescription">&nbsp;Shaarli will try to retrieve the description from meta HTML headers</label>
</td>
</tr>
<tr>
<td valign="top"><b>Enable REST API</b></td>
<td>