Merge pull request #1313 from ArthurHoaro/feature/desc-retrieval

Automatically retrieve description for new bookmarks
This commit is contained in:
ArthurHoaro 2019-07-06 12:34:02 +02:00 committed by GitHub
commit c03c90a13e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 374 additions and 75 deletions

View file

@ -7,13 +7,25 @@
* *
* @param string $charset to extract from the downloaded page (reference) * @param string $charset to extract from the downloaded page (reference)
* @param string $title to extract from the downloaded page (reference) * @param string $title to extract from the downloaded page (reference)
* @param string $description to extract from the downloaded page (reference)
* @param string $keywords to extract from the downloaded page (reference)
* @param bool $retrieveDescription Automatically tries to retrieve description and keywords from HTML content
* @param string $curlGetInfo Optionally overrides curl_getinfo function * @param string $curlGetInfo Optionally overrides curl_getinfo function
* *
* @return Closure * @return Closure
*/ */
function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_getinfo') function get_curl_download_callback(
{ &$charset,
&$title,
&$description,
&$keywords,
$retrieveDescription,
$curlGetInfo = 'curl_getinfo'
) {
$isRedirected = false; $isRedirected = false;
$currentChunk = 0;
$foundChunk = null;
/** /**
* cURL callback function for CURLOPT_WRITEFUNCTION (called during the download). * cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
* *
@ -25,7 +37,18 @@ function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_get
* *
* @return int|bool length of $data or false if we need to stop the download * @return int|bool length of $data or false if we need to stop the download
*/ */
return function (&$ch, $data) use ($curlGetInfo, &$charset, &$title, &$isRedirected) { return function (&$ch, $data) use (
$retrieveDescription,
$curlGetInfo,
&$charset,
&$title,
&$description,
&$keywords,
&$isRedirected,
&$currentChunk,
&$foundChunk
) {
$currentChunk++;
$responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE); $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
if (!empty($responseCode) && in_array($responseCode, [301, 302])) { if (!empty($responseCode) && in_array($responseCode, [301, 302])) {
$isRedirected = true; $isRedirected = true;
@ -50,9 +73,34 @@ function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_get
} }
if (empty($title)) { if (empty($title)) {
$title = html_extract_title($data); $title = html_extract_title($data);
$foundChunk = ! empty($title) ? $currentChunk : $foundChunk;
} }
if ($retrieveDescription && empty($description)) {
$description = html_extract_tag('description', $data);
$foundChunk = ! empty($description) ? $currentChunk : $foundChunk;
}
if ($retrieveDescription && empty($keywords)) {
$keywords = html_extract_tag('keywords', $data);
if (! empty($keywords)) {
$foundChunk = $currentChunk;
// Keywords use the format tag1, tag2 multiple words, tag
// So we format them to match Shaarli's separator and glue multiple words with '-'
$keywords = implode(' ', array_map(function($keyword) {
return implode('-', preg_split('/\s+/', trim($keyword)));
}, explode(',', $keywords)));
}
}
// We got everything we want, stop the download. // We got everything we want, stop the download.
if (!empty($responseCode) && !empty($contentType) && !empty($charset) && !empty($title)) { // If we already found either the title, description or keywords,
// it's highly unlikely that we'll found the other metas further than
// in the same chunk of data or the next one. So we also stop the download after that.
if ((!empty($responseCode) && !empty($contentType) && !empty($charset)) && $foundChunk !== null
&& (! $retrieveDescription
|| $foundChunk < $currentChunk
|| (!empty($title) && !empty($description) && !empty($keywords))
)
) {
return false; return false;
} }
@ -110,6 +158,35 @@ function html_extract_charset($html)
return false; return false;
} }
/**
* Extract meta tag from HTML content in either:
* - OpenGraph: <meta property="og:[tag]" ...>
* - Meta tag: <meta name="[tag]" ...>
*
* @param string $tag Name of the tag to retrieve.
* @param string $html HTML content where to look for charset.
*
* @return bool|string Charset string if found, false otherwise.
*/
function html_extract_tag($tag, $html)
{
$propertiesKey = ['property', 'name', 'itemprop'];
$properties = implode('|', $propertiesKey);
// Try to retrieve OpenGraph image.
$ogRegex = '#<meta[^>]+(?:'. $properties .')=["\']?(?:og:)?'. $tag .'["\'\s][^>]*content=["\']?(.*?)["\'/>]#';
// If the attributes are not in the order property => content (e.g. Github)
// New regex to keep this readable... more or less.
$ogRegexReverse = '#<meta[^>]+content=["\']([^"\']+)[^>]+(?:'. $properties .')=["\']?(?:og)?:'. $tag .'["\'\s/>]#';
if (preg_match($ogRegex, $html, $matches) > 0
|| preg_match($ogRegexReverse, $html, $matches) > 0
) {
return $matches[1];
}
return false;
}
/** /**
* Count private links in given linklist. * Count private links in given linklist.
* *

View file

@ -365,6 +365,7 @@ protected function setDefaultValues()
$this->setEmpty('general.links_per_page', 20); $this->setEmpty('general.links_per_page', 20);
$this->setEmpty('general.enabled_plugins', self::$DEFAULT_PLUGINS); $this->setEmpty('general.enabled_plugins', self::$DEFAULT_PLUGINS);
$this->setEmpty('general.default_note_title', 'Note: '); $this->setEmpty('general.default_note_title', 'Note: ');
$this->setEmpty('general.retrieve_description', false);
$this->setEmpty('updates.check_updates', false); $this->setEmpty('updates.check_updates', false);
$this->setEmpty('updates.check_updates_branch', 'stable'); $this->setEmpty('updates.check_updates_branch', 'stable');

View file

@ -56,6 +56,8 @@ _These settings should not be edited_
- **timezone**: See [the list of supported timezones](http://php.net/manual/en/timezones.php). - **timezone**: See [the list of supported timezones](http://php.net/manual/en/timezones.php).
- **enabled_plugins**: List of enabled plugins. - **enabled_plugins**: List of enabled plugins.
- **default_note_title**: Default title of a new note. - **default_note_title**: Default title of a new note.
- **retrieve_description** (boolean): If set to true, for every new links Shaarli will try
to retrieve the description and keywords from the HTML meta tags.
### Security ### Security

View file

@ -1,8 +1,8 @@
msgid "" msgid ""
msgstr "" msgstr ""
"Project-Id-Version: Shaarli\n" "Project-Id-Version: Shaarli\n"
"POT-Creation-Date: 2019-05-25 16:37+0200\n" "POT-Creation-Date: 2019-07-06 12:14+0200\n"
"PO-Revision-Date: 2019-05-25 16:37+0200\n" "PO-Revision-Date: 2019-07-06 12:17+0200\n"
"Last-Translator: \n" "Last-Translator: \n"
"Language-Team: Shaarli\n" "Language-Team: Shaarli\n"
"Language: fr_FR\n" "Language: fr_FR\n"
@ -252,7 +252,7 @@ msgstr "404 Introuvable"
msgid "Couldn't retrieve updater class methods." msgid "Couldn't retrieve updater class methods."
msgstr "Impossible de récupérer les méthodes de la classe Updater." msgstr "Impossible de récupérer les méthodes de la classe Updater."
#: application/updater/Updater.php:526 index.php:1033 #: application/updater/Updater.php:526 index.php:1034
msgid "" msgid ""
"You have enabled or changed thumbnails mode. <a href=\"?do=thumbs_update" "You have enabled or changed thumbnails mode. <a href=\"?do=thumbs_update"
"\">Please synchronize them</a>." "\">Please synchronize them</a>."
@ -337,8 +337,8 @@ msgid "You are not supposed to change a password on an Open Shaarli."
msgstr "" msgstr ""
"Vous n'êtes pas censé modifier le mot de passe d'un Shaarli en mode ouvert." "Vous n'êtes pas censé modifier le mot de passe d'un Shaarli en mode ouvert."
#: index.php:957 index.php:1007 index.php:1092 index.php:1122 index.php:1232 #: index.php:957 index.php:1007 index.php:1094 index.php:1124 index.php:1234
#: index.php:1279 #: index.php:1281
msgid "Wrong token." msgid "Wrong token."
msgstr "Jeton invalide." msgstr "Jeton invalide."
@ -356,64 +356,64 @@ msgstr "Votre mot de passe a été modifié"
msgid "Change password" msgid "Change password"
msgstr "Modifier le mot de passe" msgstr "Modifier le mot de passe"
#: index.php:1053 #: index.php:1054
msgid "Configuration was saved." msgid "Configuration was saved."
msgstr "La configuration a été sauvegardée." msgstr "La configuration a été sauvegardée."
#: index.php:1076 tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:24 #: index.php:1078 tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:24
msgid "Configure" msgid "Configure"
msgstr "Configurer" msgstr "Configurer"
#: index.php:1086 tmp/changetag.b91ef64efc3688266305ea9b42e5017e.rtpl.php:13 #: index.php:1088 tmp/changetag.b91ef64efc3688266305ea9b42e5017e.rtpl.php:13
#: tmp/tools.b91ef64efc3688266305ea9b42e5017e.rtpl.php:36 #: tmp/tools.b91ef64efc3688266305ea9b42e5017e.rtpl.php:36
msgid "Manage tags" msgid "Manage tags"
msgstr "Gérer les tags" msgstr "Gérer les tags"
#: index.php:1105 #: index.php:1107
#, php-format #, php-format
msgid "The tag was removed from %d link." msgid "The tag was removed from %d link."
msgid_plural "The tag was removed from %d links." msgid_plural "The tag was removed from %d links."
msgstr[0] "Le tag a été supprimé de %d lien." msgstr[0] "Le tag a été supprimé de %d lien."
msgstr[1] "Le tag a été supprimé de %d liens." msgstr[1] "Le tag a été supprimé de %d liens."
#: index.php:1106 #: index.php:1108
#, php-format #, php-format
msgid "The tag was renamed in %d link." msgid "The tag was renamed in %d link."
msgid_plural "The tag was renamed in %d links." msgid_plural "The tag was renamed in %d links."
msgstr[0] "Le tag a été renommé dans %d lien." msgstr[0] "Le tag a été renommé dans %d lien."
msgstr[1] "Le tag a été renommé dans %d liens." msgstr[1] "Le tag a été renommé dans %d liens."
#: index.php:1113 tmp/addlink.b91ef64efc3688266305ea9b42e5017e.rtpl.php:13 #: index.php:1115 tmp/addlink.b91ef64efc3688266305ea9b42e5017e.rtpl.php:13
msgid "Shaare a new link" msgid "Shaare a new link"
msgstr "Partager un nouveau lien" msgstr "Partager un nouveau lien"
#: index.php:1342 tmp/linklist.b91ef64efc3688266305ea9b42e5017e.rtpl.php:169 #: index.php:1344 tmp/linklist.b91ef64efc3688266305ea9b42e5017e.rtpl.php:169
msgid "Edit" msgid "Edit"
msgstr "Modifier" msgstr "Modifier"
#: index.php:1342 index.php:1413 #: index.php:1344 index.php:1416
#: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:26 #: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:26
#: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:26 #: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:26
msgid "Shaare" msgid "Shaare"
msgstr "Shaare" msgstr "Shaare"
#: index.php:1382 #: index.php:1385
msgid "Note: " msgid "Note: "
msgstr "Note : " msgstr "Note : "
#: index.php:1421 #: index.php:1424
msgid "Invalid link ID provided" msgid "Invalid link ID provided"
msgstr "" msgstr ""
#: index.php:1441 tmp/export.b91ef64efc3688266305ea9b42e5017e.rtpl.php:65 #: index.php:1444 tmp/export.b91ef64efc3688266305ea9b42e5017e.rtpl.php:65
msgid "Export" msgid "Export"
msgstr "Exporter" msgstr "Exporter"
#: index.php:1503 tmp/import.b91ef64efc3688266305ea9b42e5017e.rtpl.php:83 #: index.php:1506 tmp/import.b91ef64efc3688266305ea9b42e5017e.rtpl.php:83
msgid "Import" msgid "Import"
msgstr "Importer" msgstr "Importer"
#: index.php:1513 #: index.php:1516
#, php-format #, php-format
msgid "" msgid ""
"The file you are trying to upload is probably bigger than what this " "The file you are trying to upload is probably bigger than what this "
@ -423,20 +423,20 @@ msgstr ""
"le serveur web peut accepter (%s). Merci de l'envoyer en parties plus " "le serveur web peut accepter (%s). Merci de l'envoyer en parties plus "
"légères." "légères."
#: index.php:1558 tmp/pluginsadmin.b91ef64efc3688266305ea9b42e5017e.rtpl.php:26 #: index.php:1561 tmp/pluginsadmin.b91ef64efc3688266305ea9b42e5017e.rtpl.php:26
#: tmp/tools.b91ef64efc3688266305ea9b42e5017e.rtpl.php:22 #: tmp/tools.b91ef64efc3688266305ea9b42e5017e.rtpl.php:22
msgid "Plugin administration" msgid "Plugin administration"
msgstr "Administration des plugins" msgstr "Administration des plugins"
#: index.php:1612 tmp/thumbnails.b91ef64efc3688266305ea9b42e5017e.rtpl.php:14 #: index.php:1615 tmp/thumbnails.b91ef64efc3688266305ea9b42e5017e.rtpl.php:14
msgid "Thumbnails update" msgid "Thumbnails update"
msgstr "Mise à jour des miniatures" msgstr "Mise à jour des miniatures"
#: index.php:1778 #: index.php:1781
msgid "Search: " msgid "Search: "
msgstr "Recherche : " msgstr "Recherche : "
#: index.php:1821 #: index.php:1824
#, php-format #, php-format
msgid "" msgid ""
"<pre>Sessions do not seem to work correctly on your server.<br>Make sure the " "<pre>Sessions do not seem to work correctly on your server.<br>Make sure the "
@ -455,7 +455,7 @@ msgstr ""
"des cookies. Nous vous recommandons d'accéder à votre serveur depuis son " "des cookies. Nous vous recommandons d'accéder à votre serveur depuis son "
"adresse IP ou un <em>Fully Qualified Domain Name</em>.<br>" "adresse IP ou un <em>Fully Qualified Domain Name</em>.<br>"
#: index.php:1831 #: index.php:1834
msgid "Click to try again." msgid "Click to try again."
msgstr "Cliquer ici pour réessayer." msgstr "Cliquer ici pour réessayer."
@ -592,7 +592,7 @@ msgstr "Mauvaise réponse du hub %s"
msgid "Enable PubSubHubbub feed publishing." msgid "Enable PubSubHubbub feed publishing."
msgstr "Active la publication de flux vers PubSubHubbub." msgstr "Active la publication de flux vers PubSubHubbub."
#: plugins/qrcode/qrcode.php:73 plugins/wallabag/wallabag.php:68 #: plugins/qrcode/qrcode.php:72 plugins/wallabag/wallabag.php:68
msgid "For each link, add a QRCode icon." msgid "For each link, add a QRCode icon."
msgstr "Pour chaque lien, ajouter une icône de QRCode." msgstr "Pour chaque lien, ajouter une icône de QRCode."
@ -679,6 +679,34 @@ msgstr "Vous pouvez aussi modifier les tags dans la"
msgid "tag list" msgid "tag list"
msgstr "liste des tags" msgstr "liste des tags"
#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:143
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:312
#: tmp/export.b91ef64efc3688266305ea9b42e5017e.rtpl.php:31
msgid "All"
msgstr "Tous"
#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:147
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:316
msgid "Only common media hosts"
msgstr "Seulement les hébergeurs de média connus"
#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:151
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:320
msgid "None"
msgstr "Aucune"
#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:158
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:297
msgid "You need to enable the extension <code>php-gd</code> to use thumbnails."
msgstr ""
"Vous devez activer l'extension <code>php-gd</code> pour utiliser les "
"miniatures."
#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:162
#| msgid "Enable thumbnails"
msgid "Synchonize thumbnails"
msgstr ""
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:29 #: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:29
msgid "title" msgid "title"
msgstr "titre" msgstr "titre"
@ -762,50 +790,41 @@ msgid "Notify me when a new release is ready"
msgstr "Me notifier lorsqu'une nouvelle version est disponible" msgstr "Me notifier lorsqu'une nouvelle version est disponible"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:247 #: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:247
msgid "Automatically retrieve description for new bookmarks"
msgstr "Récupérer automatiquement la description"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:248
msgid "Shaarli will try to retrieve the description from meta HTML headers"
msgstr ""
"Shaarli essaiera de récupérer la description depuis les balises HTML meta "
"dans les entêtes"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:263
#: tmp/install.b91ef64efc3688266305ea9b42e5017e.rtpl.php:169 #: tmp/install.b91ef64efc3688266305ea9b42e5017e.rtpl.php:169
msgid "Enable REST API" msgid "Enable REST API"
msgstr "Activer l'API REST" msgstr "Activer l'API REST"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:248 #: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:264
#: tmp/install.b91ef64efc3688266305ea9b42e5017e.rtpl.php:170 #: tmp/install.b91ef64efc3688266305ea9b42e5017e.rtpl.php:170
msgid "Allow third party software to use Shaarli such as mobile application" msgid "Allow third party software to use Shaarli such as mobile application"
msgstr "" msgstr ""
"Permet aux applications tierces d'utiliser Shaarli, par exemple les " "Permet aux applications tierces d'utiliser Shaarli, par exemple les "
"applications mobiles" "applications mobiles"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:263 #: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:279
msgid "API secret" msgid "API secret"
msgstr "Clé d'API secrète" msgstr "Clé d'API secrète"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:277 #: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:293
msgid "Enable thumbnails" msgid "Enable thumbnails"
msgstr "Activer les miniatures" msgstr "Activer les miniatures"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:281 #: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:301
msgid "You need to enable the extension <code>php-gd</code> to use thumbnails."
msgstr ""
"Vous devez activer l'extension <code>php-gd</code> pour utiliser les "
"miniatures."
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:285
#: tmp/tools.b91ef64efc3688266305ea9b42e5017e.rtpl.php:56 #: tmp/tools.b91ef64efc3688266305ea9b42e5017e.rtpl.php:56
msgid "Synchronize thumbnails" msgid "Synchronize thumbnails"
msgstr "Synchroniser les miniatures" msgstr "Synchroniser les miniatures"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:296 #: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:328
#: tmp/export.b91ef64efc3688266305ea9b42e5017e.rtpl.php:31
msgid "All"
msgstr "Tous"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:300
msgid "Only common media hosts"
msgstr "Seulement les hébergeurs de média connus"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:304
msgid "None"
msgstr "Aucune"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:312
#: tmp/editlink.b91ef64efc3688266305ea9b42e5017e.rtpl.php:72 #: tmp/editlink.b91ef64efc3688266305ea9b42e5017e.rtpl.php:72
#: tmp/pluginsadmin.b91ef64efc3688266305ea9b42e5017e.rtpl.php:139 #: tmp/pluginsadmin.b91ef64efc3688266305ea9b42e5017e.rtpl.php:139
#: tmp/pluginsadmin.b91ef64efc3688266305ea9b42e5017e.rtpl.php:199 #: tmp/pluginsadmin.b91ef64efc3688266305ea9b42e5017e.rtpl.php:199
@ -1149,17 +1168,13 @@ msgstr "Déconnexion"
#: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:150 #: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:150
#: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:150 #: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:150
#, fuzzy
#| msgid "Public"
msgid "Set public" msgid "Set public"
msgstr "Publics" msgstr "Rendre public"
#: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:155 #: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:155
#: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:155 #: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:155
#, fuzzy
#| msgid "Private"
msgid "Set private" msgid "Set private"
msgstr "Privé" msgstr "Rendre privé"
#: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:187 #: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:187
#: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:187 #: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:187
@ -1409,11 +1424,6 @@ msgstr ""
"Glisser ce lien dans votre barre de favoris ou cliquer droit dessus et « " "Glisser ce lien dans votre barre de favoris ou cliquer droit dessus et « "
"Ajouter aux favoris »" "Ajouter aux favoris »"
#, fuzzy
#~| msgid "Enable thumbnails"
#~ msgid "Synchonize thumbnails"
#~ msgstr "Activer les miniatures"
#~ msgid "" #~ msgid ""
#~ "You need to browse your Shaarli over <strong>HTTPS</strong> to use this " #~ "You need to browse your Shaarli over <strong>HTTPS</strong> to use this "
#~ "functionality." #~ "functionality."

View file

@ -1015,6 +1015,7 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history, $sessionManager,
$conf->set('general.timezone', $tz); $conf->set('general.timezone', $tz);
$conf->set('general.title', escape($_POST['title'])); $conf->set('general.title', escape($_POST['title']));
$conf->set('general.header_link', escape($_POST['titleLink'])); $conf->set('general.header_link', escape($_POST['titleLink']));
$conf->set('general.retrieve_description', !empty($_POST['retrieveDescription']));
$conf->set('resource.theme', escape($_POST['theme'])); $conf->set('resource.theme', escape($_POST['theme']));
$conf->set('security.session_protection_disabled', !empty($_POST['disablesessionprotection'])); $conf->set('security.session_protection_disabled', !empty($_POST['disablesessionprotection']));
$conf->set('privacy.default_private_links', !empty($_POST['privateLinkByDefault'])); $conf->set('privacy.default_private_links', !empty($_POST['privateLinkByDefault']));
@ -1063,6 +1064,7 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history, $sessionManager,
); );
$PAGE->assign('continents', $continents); $PAGE->assign('continents', $continents);
$PAGE->assign('cities', $cities); $PAGE->assign('cities', $cities);
$PAGE->assign('retrieve_description', $conf->get('general.retrieve_description'));
$PAGE->assign('private_links_default', $conf->get('privacy.default_private_links', false)); $PAGE->assign('private_links_default', $conf->get('privacy.default_private_links', false));
$PAGE->assign('session_protection_disabled', $conf->get('security.session_protection_disabled', false)); $PAGE->assign('session_protection_disabled', $conf->get('security.session_protection_disabled', false));
$PAGE->assign('enable_rss_permalinks', $conf->get('feed.rss_permalinks', false)); $PAGE->assign('enable_rss_permalinks', $conf->get('feed.rss_permalinks', false));
@ -1364,13 +1366,14 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history, $sessionManager,
// If this is an HTTP(S) link, we try go get the page to extract // If this is an HTTP(S) link, we try go get the page to extract
// the title (otherwise we will to straight to the edit form.) // the title (otherwise we will to straight to the edit form.)
if (empty($title) && strpos(get_url_scheme($url), 'http') !== false) { if (empty($title) && strpos(get_url_scheme($url), 'http') !== false) {
$retrieveDescription = $conf->get('general.retrieve_description');
// Short timeout to keep the application responsive // Short timeout to keep the application responsive
// The callback will fill $charset and $title with data from the downloaded page. // The callback will fill $charset and $title with data from the downloaded page.
get_http_response( get_http_response(
$url, $url,
$conf->get('general.download_timeout', 30), $conf->get('general.download_timeout', 30),
$conf->get('general.download_max_size', 4194304), $conf->get('general.download_max_size', 4194304),
get_curl_download_callback($charset, $title) get_curl_download_callback($charset, $title, $description, $tags, $retrieveDescription)
); );
if (! empty($title) && strtolower($charset) != 'utf-8') { if (! empty($title) && strtolower($charset) != 'utf-8') {
$title = mb_convert_encoding($title, 'utf-8', $charset); $title = mb_convert_encoding($title, 'utf-8', $charset);

View file

@ -2,14 +2,16 @@
namespace Shaarli\Bookmark; namespace Shaarli\Bookmark;
use PHPUnit\Framework\TestCase;
use ReferenceLinkDB; use ReferenceLinkDB;
use Shaarli\Config\ConfigManager;
require_once 'tests/utils/CurlUtils.php'; require_once 'tests/utils/CurlUtils.php';
/** /**
* Class LinkUtilsTest. * Class LinkUtilsTest.
*/ */
class LinkUtilsTest extends \PHPUnit\Framework\TestCase class LinkUtilsTest extends TestCase
{ {
/** /**
* Test html_extract_title() when the title is found. * Test html_extract_title() when the title is found.
@ -75,12 +77,57 @@ public function testHtmlExtractNonExistentCharset()
$this->assertFalse(html_extract_charset($html)); $this->assertFalse(html_extract_charset($html));
} }
/**
* Test html_extract_tag() when the tag <meta name= is found.
*/
public function testHtmlExtractExistentNameTag()
{
$description = 'Bob and Alice share cookies.';
$html = '<html><meta>stuff2</meta><meta name="description" content="' . $description . '"/></html>';
$this->assertEquals($description, html_extract_tag('description', $html));
}
/**
* Test html_extract_tag() when the tag <meta name= is not found.
*/
public function testHtmlExtractNonExistentNameTag()
{
$html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>';
$this->assertFalse(html_extract_tag('description', $html));
}
/**
* Test html_extract_tag() when the tag <meta property="og: is found.
*/
public function testHtmlExtractExistentOgTag()
{
$description = 'Bob and Alice share cookies.';
$html = '<html><meta>stuff2</meta><meta property="og:description" content="' . $description . '"/></html>';
$this->assertEquals($description, html_extract_tag('description', $html));
}
/**
* Test html_extract_tag() when the tag <meta property="og: is not found.
*/
public function testHtmlExtractNonExistentOgTag()
{
$html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>';
$this->assertFalse(html_extract_tag('description', $html));
}
/** /**
* Test the download callback with valid value * Test the download callback with valid value
*/ */
public function testCurlDownloadCallbackOk() public function testCurlDownloadCallbackOk()
{ {
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_ok'); $callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_ok'
);
$data = [ $data = [
'HTTP/1.1 200 OK', 'HTTP/1.1 200 OK',
'Server: GitHub.com', 'Server: GitHub.com',
@ -90,7 +137,9 @@ public function testCurlDownloadCallbackOk()
'end' => 'th=device-width">' 'end' => 'th=device-width">'
. '<title>Refactoring · GitHub</title>' . '<title>Refactoring · GitHub</title>'
. '<link rel="search" type="application/opensea', . '<link rel="search" type="application/opensea',
'<title>ignored</title>', '<title>ignored</title>'
. '<meta name="description" content="desc" />'
. '<meta name="keywords" content="key1,key2" />',
]; ];
foreach ($data as $key => $line) { foreach ($data as $key => $line) {
$ignore = null; $ignore = null;
@ -102,6 +151,8 @@ public function testCurlDownloadCallbackOk()
} }
$this->assertEquals('utf-8', $charset); $this->assertEquals('utf-8', $charset);
$this->assertEquals('Refactoring · GitHub', $title); $this->assertEquals('Refactoring · GitHub', $title);
$this->assertEmpty($desc);
$this->assertEmpty($keywords);
} }
/** /**
@ -109,13 +160,22 @@ public function testCurlDownloadCallbackOk()
*/ */
public function testCurlDownloadCallbackOkNoCharset() public function testCurlDownloadCallbackOkNoCharset()
{ {
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_no_charset'); $callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_no_charset'
);
$data = [ $data = [
'HTTP/1.1 200 OK', 'HTTP/1.1 200 OK',
'end' => 'th=device-width">' 'end' => 'th=device-width">'
. '<title>Refactoring · GitHub</title>' . '<title>Refactoring · GitHub</title>'
. '<link rel="search" type="application/opensea', . '<link rel="search" type="application/opensea',
'<title>ignored</title>', '<title>ignored</title>'
. '<meta name="description" content="desc" />'
. '<meta name="keywords" content="key1,key2" />',
]; ];
foreach ($data as $key => $line) { foreach ($data as $key => $line) {
$ignore = null; $ignore = null;
@ -123,6 +183,8 @@ public function testCurlDownloadCallbackOkNoCharset()
} }
$this->assertEmpty($charset); $this->assertEmpty($charset);
$this->assertEquals('Refactoring · GitHub', $title); $this->assertEquals('Refactoring · GitHub', $title);
$this->assertEmpty($desc);
$this->assertEmpty($keywords);
} }
/** /**
@ -130,14 +192,23 @@ public function testCurlDownloadCallbackOkNoCharset()
*/ */
public function testCurlDownloadCallbackOkHtmlCharset() public function testCurlDownloadCallbackOkHtmlCharset()
{ {
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_no_charset'); $callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_no_charset'
);
$data = [ $data = [
'HTTP/1.1 200 OK', 'HTTP/1.1 200 OK',
'<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />', '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />',
'end' => 'th=device-width">' 'end' => 'th=device-width">'
. '<title>Refactoring · GitHub</title>' . '<title>Refactoring · GitHub</title>'
. '<link rel="search" type="application/opensea', . '<link rel="search" type="application/opensea',
'<title>ignored</title>', '<title>ignored</title>'
. '<meta name="description" content="desc" />'
. '<meta name="keywords" content="key1,key2" />',
]; ];
foreach ($data as $key => $line) { foreach ($data as $key => $line) {
$ignore = null; $ignore = null;
@ -149,6 +220,8 @@ public function testCurlDownloadCallbackOkHtmlCharset()
} }
$this->assertEquals('utf-8', $charset); $this->assertEquals('utf-8', $charset);
$this->assertEquals('Refactoring · GitHub', $title); $this->assertEquals('Refactoring · GitHub', $title);
$this->assertEmpty($desc);
$this->assertEmpty($keywords);
} }
/** /**
@ -156,7 +229,14 @@ public function testCurlDownloadCallbackOkHtmlCharset()
*/ */
public function testCurlDownloadCallbackOkNoTitle() public function testCurlDownloadCallbackOkNoTitle()
{ {
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_ok'); $callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_ok'
);
$data = [ $data = [
'HTTP/1.1 200 OK', 'HTTP/1.1 200 OK',
'end' => 'th=device-width">Refactoring · GitHub<link rel="search" type="application/opensea', 'end' => 'th=device-width">Refactoring · GitHub<link rel="search" type="application/opensea',
@ -168,6 +248,8 @@ public function testCurlDownloadCallbackOkNoTitle()
} }
$this->assertEquals('utf-8', $charset); $this->assertEquals('utf-8', $charset);
$this->assertEmpty($title); $this->assertEmpty($title);
$this->assertEmpty($desc);
$this->assertEmpty($keywords);
} }
/** /**
@ -175,7 +257,14 @@ public function testCurlDownloadCallbackOkNoTitle()
*/ */
public function testCurlDownloadCallbackInvalidContentType() public function testCurlDownloadCallbackInvalidContentType()
{ {
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_ct_ko'); $callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_ct_ko'
);
$ignore = null; $ignore = null;
$this->assertFalse($callback($ignore, '')); $this->assertFalse($callback($ignore, ''));
$this->assertEmpty($charset); $this->assertEmpty($charset);
@ -187,7 +276,14 @@ public function testCurlDownloadCallbackInvalidContentType()
*/ */
public function testCurlDownloadCallbackInvalidResponseCode() public function testCurlDownloadCallbackInvalidResponseCode()
{ {
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_rc_ko'); $callback = $callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_rc_ko'
);
$ignore = null; $ignore = null;
$this->assertFalse($callback($ignore, '')); $this->assertFalse($callback($ignore, ''));
$this->assertEmpty($charset); $this->assertEmpty($charset);
@ -199,13 +295,99 @@ public function testCurlDownloadCallbackInvalidResponseCode()
*/ */
public function testCurlDownloadCallbackInvalidContentTypeAndResponseCode() public function testCurlDownloadCallbackInvalidContentTypeAndResponseCode()
{ {
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_rs_ct_ko'); $callback = $callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_rs_ct_ko'
);
$ignore = null; $ignore = null;
$this->assertFalse($callback($ignore, '')); $this->assertFalse($callback($ignore, ''));
$this->assertEmpty($charset); $this->assertEmpty($charset);
$this->assertEmpty($title); $this->assertEmpty($title);
} }
/**
* Test the download callback with valid value, and retrieve_description option enabled.
*/
public function testCurlDownloadCallbackOkWithDesc()
{
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
true,
'ut_curl_getinfo_ok'
);
$data = [
'HTTP/1.1 200 OK',
'Server: GitHub.com',
'Date: Sat, 28 Oct 2017 12:01:33 GMT',
'Content-Type: text/html; charset=utf-8',
'Status: 200 OK',
'th=device-width">'
. '<title>Refactoring · GitHub</title>'
. '<link rel="search" type="application/opensea',
'end' => '<title>ignored</title>'
. '<meta name="description" content="link desc" />'
. '<meta name="keywords" content="key1,key2" />',
];
foreach ($data as $key => $line) {
$ignore = null;
$expected = $key !== 'end' ? strlen($line) : false;
$this->assertEquals($expected, $callback($ignore, $line));
if ($expected === false) {
break;
}
}
$this->assertEquals('utf-8', $charset);
$this->assertEquals('Refactoring · GitHub', $title);
$this->assertEquals('link desc', $desc);
$this->assertEquals('key1 key2', $keywords);
}
/**
* Test the download callback with valid value, and retrieve_description option enabled,
* but no desc or keyword defined in the page.
*/
public function testCurlDownloadCallbackOkWithDescNotFound()
{
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
true,
'ut_curl_getinfo_ok'
);
$data = [
'HTTP/1.1 200 OK',
'Server: GitHub.com',
'Date: Sat, 28 Oct 2017 12:01:33 GMT',
'Content-Type: text/html; charset=utf-8',
'Status: 200 OK',
'th=device-width">'
. '<title>Refactoring · GitHub</title>'
. '<link rel="search" type="application/opensea',
'end' => '<title>ignored</title>',
];
foreach ($data as $key => $line) {
$ignore = null;
$expected = $key !== 'end' ? strlen($line) : false;
$this->assertEquals($expected, $callback($ignore, $line));
if ($expected === false) {
break;
}
}
$this->assertEquals('utf-8', $charset);
$this->assertEquals('Refactoring · GitHub', $title);
$this->assertEmpty($desc);
$this->assertEmpty($keywords);
}
/** /**
* Test count_private. * Test count_private.
*/ */

View file

@ -212,6 +212,22 @@ <h2 class="window-title">{'Configure'|t}</h2>
</div> </div>
</div> </div>
</div> </div>
<div class="pure-g">
<div class="pure-u-lg-{$ratioLabel} pure-u-{$ratioLabelMobile}">
<div class="form-label">
<label for="retrieveDescription">
<span class="label-name">{'Automatically retrieve description for new bookmarks'|t}</span><br>
<span class="label-desc">{'Shaarli will try to retrieve the description from meta HTML headers'|t}</span>
</label>
</div>
</div>
<div class="pure-u-lg-{$ratioInput} pure-u-{$ratioInputMobile}">
<div class="form-input">
<input type="checkbox" name="retrieveDescription" id="retrieveDescription"
{if="$retrieve_description"}checked{/if}/>
</div>
</div>
</div>
<div class="pure-g"> <div class="pure-g">
<div class="pure-u-lg-{$ratioLabel} pure-u-{$ratioLabelMobile}"> <div class="pure-u-lg-{$ratioLabel} pure-u-{$ratioLabelMobile}">
<div class="form-label"> <div class="form-label">

View file

@ -106,6 +106,14 @@
<label for="updateCheck">&nbsp;Notify me when a new release is ready</label> <label for="updateCheck">&nbsp;Notify me when a new release is ready</label>
</td> </td>
</tr> </tr>
<tr>
<td valign="top"><b>Automatically retrieve description for new bookmarks:</b></td>
<td>
<input type="checkbox" name="retrieveDescription" id="retrieveDescription"
{if="$retrieve_description"}checked{/if}/>
<label for="retrieveDescription">&nbsp;Shaarli will try to retrieve the description from meta HTML headers</label>
</td>
</tr>
<tr> <tr>
<td valign="top"><b>Enable REST API</b></td> <td valign="top"><b>Enable REST API</b></td>
<td> <td>