Merge pull request #1313 from ArthurHoaro/feature/desc-retrieval
Automatically retrieve description for new bookmarks
This commit is contained in:
commit
c03c90a13e
8 changed files with 374 additions and 75 deletions
|
@ -7,13 +7,25 @@
|
|||
*
|
||||
* @param string $charset to extract from the downloaded page (reference)
|
||||
* @param string $title to extract from the downloaded page (reference)
|
||||
* @param string $description to extract from the downloaded page (reference)
|
||||
* @param string $keywords to extract from the downloaded page (reference)
|
||||
* @param bool $retrieveDescription Automatically tries to retrieve description and keywords from HTML content
|
||||
* @param string $curlGetInfo Optionally overrides curl_getinfo function
|
||||
*
|
||||
* @return Closure
|
||||
*/
|
||||
function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_getinfo')
|
||||
{
|
||||
function get_curl_download_callback(
|
||||
&$charset,
|
||||
&$title,
|
||||
&$description,
|
||||
&$keywords,
|
||||
$retrieveDescription,
|
||||
$curlGetInfo = 'curl_getinfo'
|
||||
) {
|
||||
$isRedirected = false;
|
||||
$currentChunk = 0;
|
||||
$foundChunk = null;
|
||||
|
||||
/**
|
||||
* cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
|
||||
*
|
||||
|
@ -25,7 +37,18 @@ function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_get
|
|||
*
|
||||
* @return int|bool length of $data or false if we need to stop the download
|
||||
*/
|
||||
return function (&$ch, $data) use ($curlGetInfo, &$charset, &$title, &$isRedirected) {
|
||||
return function (&$ch, $data) use (
|
||||
$retrieveDescription,
|
||||
$curlGetInfo,
|
||||
&$charset,
|
||||
&$title,
|
||||
&$description,
|
||||
&$keywords,
|
||||
&$isRedirected,
|
||||
&$currentChunk,
|
||||
&$foundChunk
|
||||
) {
|
||||
$currentChunk++;
|
||||
$responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
|
||||
if (!empty($responseCode) && in_array($responseCode, [301, 302])) {
|
||||
$isRedirected = true;
|
||||
|
@ -50,9 +73,34 @@ function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_get
|
|||
}
|
||||
if (empty($title)) {
|
||||
$title = html_extract_title($data);
|
||||
$foundChunk = ! empty($title) ? $currentChunk : $foundChunk;
|
||||
}
|
||||
if ($retrieveDescription && empty($description)) {
|
||||
$description = html_extract_tag('description', $data);
|
||||
$foundChunk = ! empty($description) ? $currentChunk : $foundChunk;
|
||||
}
|
||||
if ($retrieveDescription && empty($keywords)) {
|
||||
$keywords = html_extract_tag('keywords', $data);
|
||||
if (! empty($keywords)) {
|
||||
$foundChunk = $currentChunk;
|
||||
// Keywords use the format tag1, tag2 multiple words, tag
|
||||
// So we format them to match Shaarli's separator and glue multiple words with '-'
|
||||
$keywords = implode(' ', array_map(function($keyword) {
|
||||
return implode('-', preg_split('/\s+/', trim($keyword)));
|
||||
}, explode(',', $keywords)));
|
||||
}
|
||||
}
|
||||
|
||||
// We got everything we want, stop the download.
|
||||
if (!empty($responseCode) && !empty($contentType) && !empty($charset) && !empty($title)) {
|
||||
// If we already found either the title, description or keywords,
|
||||
// it's highly unlikely that we'll found the other metas further than
|
||||
// in the same chunk of data or the next one. So we also stop the download after that.
|
||||
if ((!empty($responseCode) && !empty($contentType) && !empty($charset)) && $foundChunk !== null
|
||||
&& (! $retrieveDescription
|
||||
|| $foundChunk < $currentChunk
|
||||
|| (!empty($title) && !empty($description) && !empty($keywords))
|
||||
)
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -110,6 +158,35 @@ function html_extract_charset($html)
|
|||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract meta tag from HTML content in either:
|
||||
* - OpenGraph: <meta property="og:[tag]" ...>
|
||||
* - Meta tag: <meta name="[tag]" ...>
|
||||
*
|
||||
* @param string $tag Name of the tag to retrieve.
|
||||
* @param string $html HTML content where to look for charset.
|
||||
*
|
||||
* @return bool|string Charset string if found, false otherwise.
|
||||
*/
|
||||
function html_extract_tag($tag, $html)
|
||||
{
|
||||
$propertiesKey = ['property', 'name', 'itemprop'];
|
||||
$properties = implode('|', $propertiesKey);
|
||||
// Try to retrieve OpenGraph image.
|
||||
$ogRegex = '#<meta[^>]+(?:'. $properties .')=["\']?(?:og:)?'. $tag .'["\'\s][^>]*content=["\']?(.*?)["\'/>]#';
|
||||
// If the attributes are not in the order property => content (e.g. Github)
|
||||
// New regex to keep this readable... more or less.
|
||||
$ogRegexReverse = '#<meta[^>]+content=["\']([^"\']+)[^>]+(?:'. $properties .')=["\']?(?:og)?:'. $tag .'["\'\s/>]#';
|
||||
|
||||
if (preg_match($ogRegex, $html, $matches) > 0
|
||||
|| preg_match($ogRegexReverse, $html, $matches) > 0
|
||||
) {
|
||||
return $matches[1];
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Count private links in given linklist.
|
||||
*
|
||||
|
|
|
@ -365,6 +365,7 @@ protected function setDefaultValues()
|
|||
$this->setEmpty('general.links_per_page', 20);
|
||||
$this->setEmpty('general.enabled_plugins', self::$DEFAULT_PLUGINS);
|
||||
$this->setEmpty('general.default_note_title', 'Note: ');
|
||||
$this->setEmpty('general.retrieve_description', false);
|
||||
|
||||
$this->setEmpty('updates.check_updates', false);
|
||||
$this->setEmpty('updates.check_updates_branch', 'stable');
|
||||
|
|
|
@ -56,6 +56,8 @@ _These settings should not be edited_
|
|||
- **timezone**: See [the list of supported timezones](http://php.net/manual/en/timezones.php).
|
||||
- **enabled_plugins**: List of enabled plugins.
|
||||
- **default_note_title**: Default title of a new note.
|
||||
- **retrieve_description** (boolean): If set to true, for every new links Shaarli will try
|
||||
to retrieve the description and keywords from the HTML meta tags.
|
||||
|
||||
### Security
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: Shaarli\n"
|
||||
"POT-Creation-Date: 2019-05-25 16:37+0200\n"
|
||||
"PO-Revision-Date: 2019-05-25 16:37+0200\n"
|
||||
"POT-Creation-Date: 2019-07-06 12:14+0200\n"
|
||||
"PO-Revision-Date: 2019-07-06 12:17+0200\n"
|
||||
"Last-Translator: \n"
|
||||
"Language-Team: Shaarli\n"
|
||||
"Language: fr_FR\n"
|
||||
|
@ -252,7 +252,7 @@ msgstr "404 Introuvable"
|
|||
msgid "Couldn't retrieve updater class methods."
|
||||
msgstr "Impossible de récupérer les méthodes de la classe Updater."
|
||||
|
||||
#: application/updater/Updater.php:526 index.php:1033
|
||||
#: application/updater/Updater.php:526 index.php:1034
|
||||
msgid ""
|
||||
"You have enabled or changed thumbnails mode. <a href=\"?do=thumbs_update"
|
||||
"\">Please synchronize them</a>."
|
||||
|
@ -337,8 +337,8 @@ msgid "You are not supposed to change a password on an Open Shaarli."
|
|||
msgstr ""
|
||||
"Vous n'êtes pas censé modifier le mot de passe d'un Shaarli en mode ouvert."
|
||||
|
||||
#: index.php:957 index.php:1007 index.php:1092 index.php:1122 index.php:1232
|
||||
#: index.php:1279
|
||||
#: index.php:957 index.php:1007 index.php:1094 index.php:1124 index.php:1234
|
||||
#: index.php:1281
|
||||
msgid "Wrong token."
|
||||
msgstr "Jeton invalide."
|
||||
|
||||
|
@ -356,64 +356,64 @@ msgstr "Votre mot de passe a été modifié"
|
|||
msgid "Change password"
|
||||
msgstr "Modifier le mot de passe"
|
||||
|
||||
#: index.php:1053
|
||||
#: index.php:1054
|
||||
msgid "Configuration was saved."
|
||||
msgstr "La configuration a été sauvegardée."
|
||||
|
||||
#: index.php:1076 tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:24
|
||||
#: index.php:1078 tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:24
|
||||
msgid "Configure"
|
||||
msgstr "Configurer"
|
||||
|
||||
#: index.php:1086 tmp/changetag.b91ef64efc3688266305ea9b42e5017e.rtpl.php:13
|
||||
#: index.php:1088 tmp/changetag.b91ef64efc3688266305ea9b42e5017e.rtpl.php:13
|
||||
#: tmp/tools.b91ef64efc3688266305ea9b42e5017e.rtpl.php:36
|
||||
msgid "Manage tags"
|
||||
msgstr "Gérer les tags"
|
||||
|
||||
#: index.php:1105
|
||||
#: index.php:1107
|
||||
#, php-format
|
||||
msgid "The tag was removed from %d link."
|
||||
msgid_plural "The tag was removed from %d links."
|
||||
msgstr[0] "Le tag a été supprimé de %d lien."
|
||||
msgstr[1] "Le tag a été supprimé de %d liens."
|
||||
|
||||
#: index.php:1106
|
||||
#: index.php:1108
|
||||
#, php-format
|
||||
msgid "The tag was renamed in %d link."
|
||||
msgid_plural "The tag was renamed in %d links."
|
||||
msgstr[0] "Le tag a été renommé dans %d lien."
|
||||
msgstr[1] "Le tag a été renommé dans %d liens."
|
||||
|
||||
#: index.php:1113 tmp/addlink.b91ef64efc3688266305ea9b42e5017e.rtpl.php:13
|
||||
#: index.php:1115 tmp/addlink.b91ef64efc3688266305ea9b42e5017e.rtpl.php:13
|
||||
msgid "Shaare a new link"
|
||||
msgstr "Partager un nouveau lien"
|
||||
|
||||
#: index.php:1342 tmp/linklist.b91ef64efc3688266305ea9b42e5017e.rtpl.php:169
|
||||
#: index.php:1344 tmp/linklist.b91ef64efc3688266305ea9b42e5017e.rtpl.php:169
|
||||
msgid "Edit"
|
||||
msgstr "Modifier"
|
||||
|
||||
#: index.php:1342 index.php:1413
|
||||
#: index.php:1344 index.php:1416
|
||||
#: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:26
|
||||
#: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:26
|
||||
msgid "Shaare"
|
||||
msgstr "Shaare"
|
||||
|
||||
#: index.php:1382
|
||||
#: index.php:1385
|
||||
msgid "Note: "
|
||||
msgstr "Note : "
|
||||
|
||||
#: index.php:1421
|
||||
#: index.php:1424
|
||||
msgid "Invalid link ID provided"
|
||||
msgstr ""
|
||||
|
||||
#: index.php:1441 tmp/export.b91ef64efc3688266305ea9b42e5017e.rtpl.php:65
|
||||
#: index.php:1444 tmp/export.b91ef64efc3688266305ea9b42e5017e.rtpl.php:65
|
||||
msgid "Export"
|
||||
msgstr "Exporter"
|
||||
|
||||
#: index.php:1503 tmp/import.b91ef64efc3688266305ea9b42e5017e.rtpl.php:83
|
||||
#: index.php:1506 tmp/import.b91ef64efc3688266305ea9b42e5017e.rtpl.php:83
|
||||
msgid "Import"
|
||||
msgstr "Importer"
|
||||
|
||||
#: index.php:1513
|
||||
#: index.php:1516
|
||||
#, php-format
|
||||
msgid ""
|
||||
"The file you are trying to upload is probably bigger than what this "
|
||||
|
@ -423,20 +423,20 @@ msgstr ""
|
|||
"le serveur web peut accepter (%s). Merci de l'envoyer en parties plus "
|
||||
"légères."
|
||||
|
||||
#: index.php:1558 tmp/pluginsadmin.b91ef64efc3688266305ea9b42e5017e.rtpl.php:26
|
||||
#: index.php:1561 tmp/pluginsadmin.b91ef64efc3688266305ea9b42e5017e.rtpl.php:26
|
||||
#: tmp/tools.b91ef64efc3688266305ea9b42e5017e.rtpl.php:22
|
||||
msgid "Plugin administration"
|
||||
msgstr "Administration des plugins"
|
||||
|
||||
#: index.php:1612 tmp/thumbnails.b91ef64efc3688266305ea9b42e5017e.rtpl.php:14
|
||||
#: index.php:1615 tmp/thumbnails.b91ef64efc3688266305ea9b42e5017e.rtpl.php:14
|
||||
msgid "Thumbnails update"
|
||||
msgstr "Mise à jour des miniatures"
|
||||
|
||||
#: index.php:1778
|
||||
#: index.php:1781
|
||||
msgid "Search: "
|
||||
msgstr "Recherche : "
|
||||
|
||||
#: index.php:1821
|
||||
#: index.php:1824
|
||||
#, php-format
|
||||
msgid ""
|
||||
"<pre>Sessions do not seem to work correctly on your server.<br>Make sure the "
|
||||
|
@ -455,7 +455,7 @@ msgstr ""
|
|||
"des cookies. Nous vous recommandons d'accéder à votre serveur depuis son "
|
||||
"adresse IP ou un <em>Fully Qualified Domain Name</em>.<br>"
|
||||
|
||||
#: index.php:1831
|
||||
#: index.php:1834
|
||||
msgid "Click to try again."
|
||||
msgstr "Cliquer ici pour réessayer."
|
||||
|
||||
|
@ -592,7 +592,7 @@ msgstr "Mauvaise réponse du hub %s"
|
|||
msgid "Enable PubSubHubbub feed publishing."
|
||||
msgstr "Active la publication de flux vers PubSubHubbub."
|
||||
|
||||
#: plugins/qrcode/qrcode.php:73 plugins/wallabag/wallabag.php:68
|
||||
#: plugins/qrcode/qrcode.php:72 plugins/wallabag/wallabag.php:68
|
||||
msgid "For each link, add a QRCode icon."
|
||||
msgstr "Pour chaque lien, ajouter une icône de QRCode."
|
||||
|
||||
|
@ -679,6 +679,34 @@ msgstr "Vous pouvez aussi modifier les tags dans la"
|
|||
msgid "tag list"
|
||||
msgstr "liste des tags"
|
||||
|
||||
#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:143
|
||||
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:312
|
||||
#: tmp/export.b91ef64efc3688266305ea9b42e5017e.rtpl.php:31
|
||||
msgid "All"
|
||||
msgstr "Tous"
|
||||
|
||||
#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:147
|
||||
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:316
|
||||
msgid "Only common media hosts"
|
||||
msgstr "Seulement les hébergeurs de média connus"
|
||||
|
||||
#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:151
|
||||
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:320
|
||||
msgid "None"
|
||||
msgstr "Aucune"
|
||||
|
||||
#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:158
|
||||
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:297
|
||||
msgid "You need to enable the extension <code>php-gd</code> to use thumbnails."
|
||||
msgstr ""
|
||||
"Vous devez activer l'extension <code>php-gd</code> pour utiliser les "
|
||||
"miniatures."
|
||||
|
||||
#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:162
|
||||
#| msgid "Enable thumbnails"
|
||||
msgid "Synchonize thumbnails"
|
||||
msgstr ""
|
||||
|
||||
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:29
|
||||
msgid "title"
|
||||
msgstr "titre"
|
||||
|
@ -762,50 +790,41 @@ msgid "Notify me when a new release is ready"
|
|||
msgstr "Me notifier lorsqu'une nouvelle version est disponible"
|
||||
|
||||
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:247
|
||||
msgid "Automatically retrieve description for new bookmarks"
|
||||
msgstr "Récupérer automatiquement la description"
|
||||
|
||||
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:248
|
||||
msgid "Shaarli will try to retrieve the description from meta HTML headers"
|
||||
msgstr ""
|
||||
"Shaarli essaiera de récupérer la description depuis les balises HTML meta "
|
||||
"dans les entêtes"
|
||||
|
||||
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:263
|
||||
#: tmp/install.b91ef64efc3688266305ea9b42e5017e.rtpl.php:169
|
||||
msgid "Enable REST API"
|
||||
msgstr "Activer l'API REST"
|
||||
|
||||
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:248
|
||||
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:264
|
||||
#: tmp/install.b91ef64efc3688266305ea9b42e5017e.rtpl.php:170
|
||||
msgid "Allow third party software to use Shaarli such as mobile application"
|
||||
msgstr ""
|
||||
"Permet aux applications tierces d'utiliser Shaarli, par exemple les "
|
||||
"applications mobiles"
|
||||
|
||||
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:263
|
||||
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:279
|
||||
msgid "API secret"
|
||||
msgstr "Clé d'API secrète"
|
||||
|
||||
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:277
|
||||
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:293
|
||||
msgid "Enable thumbnails"
|
||||
msgstr "Activer les miniatures"
|
||||
|
||||
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:281
|
||||
msgid "You need to enable the extension <code>php-gd</code> to use thumbnails."
|
||||
msgstr ""
|
||||
"Vous devez activer l'extension <code>php-gd</code> pour utiliser les "
|
||||
"miniatures."
|
||||
|
||||
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:285
|
||||
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:301
|
||||
#: tmp/tools.b91ef64efc3688266305ea9b42e5017e.rtpl.php:56
|
||||
msgid "Synchronize thumbnails"
|
||||
msgstr "Synchroniser les miniatures"
|
||||
|
||||
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:296
|
||||
#: tmp/export.b91ef64efc3688266305ea9b42e5017e.rtpl.php:31
|
||||
msgid "All"
|
||||
msgstr "Tous"
|
||||
|
||||
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:300
|
||||
msgid "Only common media hosts"
|
||||
msgstr "Seulement les hébergeurs de média connus"
|
||||
|
||||
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:304
|
||||
msgid "None"
|
||||
msgstr "Aucune"
|
||||
|
||||
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:312
|
||||
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:328
|
||||
#: tmp/editlink.b91ef64efc3688266305ea9b42e5017e.rtpl.php:72
|
||||
#: tmp/pluginsadmin.b91ef64efc3688266305ea9b42e5017e.rtpl.php:139
|
||||
#: tmp/pluginsadmin.b91ef64efc3688266305ea9b42e5017e.rtpl.php:199
|
||||
|
@ -1149,17 +1168,13 @@ msgstr "Déconnexion"
|
|||
|
||||
#: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:150
|
||||
#: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:150
|
||||
#, fuzzy
|
||||
#| msgid "Public"
|
||||
msgid "Set public"
|
||||
msgstr "Publics"
|
||||
msgstr "Rendre public"
|
||||
|
||||
#: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:155
|
||||
#: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:155
|
||||
#, fuzzy
|
||||
#| msgid "Private"
|
||||
msgid "Set private"
|
||||
msgstr "Privé"
|
||||
msgstr "Rendre privé"
|
||||
|
||||
#: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:187
|
||||
#: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:187
|
||||
|
@ -1409,11 +1424,6 @@ msgstr ""
|
|||
"Glisser ce lien dans votre barre de favoris ou cliquer droit dessus et « "
|
||||
"Ajouter aux favoris »"
|
||||
|
||||
#, fuzzy
|
||||
#~| msgid "Enable thumbnails"
|
||||
#~ msgid "Synchonize thumbnails"
|
||||
#~ msgstr "Activer les miniatures"
|
||||
|
||||
#~ msgid ""
|
||||
#~ "You need to browse your Shaarli over <strong>HTTPS</strong> to use this "
|
||||
#~ "functionality."
|
||||
|
|
|
@ -1015,6 +1015,7 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history, $sessionManager,
|
|||
$conf->set('general.timezone', $tz);
|
||||
$conf->set('general.title', escape($_POST['title']));
|
||||
$conf->set('general.header_link', escape($_POST['titleLink']));
|
||||
$conf->set('general.retrieve_description', !empty($_POST['retrieveDescription']));
|
||||
$conf->set('resource.theme', escape($_POST['theme']));
|
||||
$conf->set('security.session_protection_disabled', !empty($_POST['disablesessionprotection']));
|
||||
$conf->set('privacy.default_private_links', !empty($_POST['privateLinkByDefault']));
|
||||
|
@ -1063,6 +1064,7 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history, $sessionManager,
|
|||
);
|
||||
$PAGE->assign('continents', $continents);
|
||||
$PAGE->assign('cities', $cities);
|
||||
$PAGE->assign('retrieve_description', $conf->get('general.retrieve_description'));
|
||||
$PAGE->assign('private_links_default', $conf->get('privacy.default_private_links', false));
|
||||
$PAGE->assign('session_protection_disabled', $conf->get('security.session_protection_disabled', false));
|
||||
$PAGE->assign('enable_rss_permalinks', $conf->get('feed.rss_permalinks', false));
|
||||
|
@ -1364,13 +1366,14 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history, $sessionManager,
|
|||
// If this is an HTTP(S) link, we try go get the page to extract
|
||||
// the title (otherwise we will to straight to the edit form.)
|
||||
if (empty($title) && strpos(get_url_scheme($url), 'http') !== false) {
|
||||
$retrieveDescription = $conf->get('general.retrieve_description');
|
||||
// Short timeout to keep the application responsive
|
||||
// The callback will fill $charset and $title with data from the downloaded page.
|
||||
get_http_response(
|
||||
$url,
|
||||
$conf->get('general.download_timeout', 30),
|
||||
$conf->get('general.download_max_size', 4194304),
|
||||
get_curl_download_callback($charset, $title)
|
||||
get_curl_download_callback($charset, $title, $description, $tags, $retrieveDescription)
|
||||
);
|
||||
if (! empty($title) && strtolower($charset) != 'utf-8') {
|
||||
$title = mb_convert_encoding($title, 'utf-8', $charset);
|
||||
|
|
|
@ -2,14 +2,16 @@
|
|||
|
||||
namespace Shaarli\Bookmark;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use ReferenceLinkDB;
|
||||
use Shaarli\Config\ConfigManager;
|
||||
|
||||
require_once 'tests/utils/CurlUtils.php';
|
||||
|
||||
/**
|
||||
* Class LinkUtilsTest.
|
||||
*/
|
||||
class LinkUtilsTest extends \PHPUnit\Framework\TestCase
|
||||
class LinkUtilsTest extends TestCase
|
||||
{
|
||||
/**
|
||||
* Test html_extract_title() when the title is found.
|
||||
|
@ -75,12 +77,57 @@ public function testHtmlExtractNonExistentCharset()
|
|||
$this->assertFalse(html_extract_charset($html));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test html_extract_tag() when the tag <meta name= is found.
|
||||
*/
|
||||
public function testHtmlExtractExistentNameTag()
|
||||
{
|
||||
$description = 'Bob and Alice share cookies.';
|
||||
$html = '<html><meta>stuff2</meta><meta name="description" content="' . $description . '"/></html>';
|
||||
$this->assertEquals($description, html_extract_tag('description', $html));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test html_extract_tag() when the tag <meta name= is not found.
|
||||
*/
|
||||
public function testHtmlExtractNonExistentNameTag()
|
||||
{
|
||||
$html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>';
|
||||
$this->assertFalse(html_extract_tag('description', $html));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test html_extract_tag() when the tag <meta property="og: is found.
|
||||
*/
|
||||
public function testHtmlExtractExistentOgTag()
|
||||
{
|
||||
$description = 'Bob and Alice share cookies.';
|
||||
$html = '<html><meta>stuff2</meta><meta property="og:description" content="' . $description . '"/></html>';
|
||||
$this->assertEquals($description, html_extract_tag('description', $html));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test html_extract_tag() when the tag <meta property="og: is not found.
|
||||
*/
|
||||
public function testHtmlExtractNonExistentOgTag()
|
||||
{
|
||||
$html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>';
|
||||
$this->assertFalse(html_extract_tag('description', $html));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the download callback with valid value
|
||||
*/
|
||||
public function testCurlDownloadCallbackOk()
|
||||
{
|
||||
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_ok');
|
||||
$callback = get_curl_download_callback(
|
||||
$charset,
|
||||
$title,
|
||||
$desc,
|
||||
$keywords,
|
||||
false,
|
||||
'ut_curl_getinfo_ok'
|
||||
);
|
||||
$data = [
|
||||
'HTTP/1.1 200 OK',
|
||||
'Server: GitHub.com',
|
||||
|
@ -90,7 +137,9 @@ public function testCurlDownloadCallbackOk()
|
|||
'end' => 'th=device-width">'
|
||||
. '<title>Refactoring · GitHub</title>'
|
||||
. '<link rel="search" type="application/opensea',
|
||||
'<title>ignored</title>',
|
||||
'<title>ignored</title>'
|
||||
. '<meta name="description" content="desc" />'
|
||||
. '<meta name="keywords" content="key1,key2" />',
|
||||
];
|
||||
foreach ($data as $key => $line) {
|
||||
$ignore = null;
|
||||
|
@ -102,6 +151,8 @@ public function testCurlDownloadCallbackOk()
|
|||
}
|
||||
$this->assertEquals('utf-8', $charset);
|
||||
$this->assertEquals('Refactoring · GitHub', $title);
|
||||
$this->assertEmpty($desc);
|
||||
$this->assertEmpty($keywords);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -109,13 +160,22 @@ public function testCurlDownloadCallbackOk()
|
|||
*/
|
||||
public function testCurlDownloadCallbackOkNoCharset()
|
||||
{
|
||||
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_no_charset');
|
||||
$callback = get_curl_download_callback(
|
||||
$charset,
|
||||
$title,
|
||||
$desc,
|
||||
$keywords,
|
||||
false,
|
||||
'ut_curl_getinfo_no_charset'
|
||||
);
|
||||
$data = [
|
||||
'HTTP/1.1 200 OK',
|
||||
'end' => 'th=device-width">'
|
||||
. '<title>Refactoring · GitHub</title>'
|
||||
. '<link rel="search" type="application/opensea',
|
||||
'<title>ignored</title>',
|
||||
'<title>ignored</title>'
|
||||
. '<meta name="description" content="desc" />'
|
||||
. '<meta name="keywords" content="key1,key2" />',
|
||||
];
|
||||
foreach ($data as $key => $line) {
|
||||
$ignore = null;
|
||||
|
@ -123,6 +183,8 @@ public function testCurlDownloadCallbackOkNoCharset()
|
|||
}
|
||||
$this->assertEmpty($charset);
|
||||
$this->assertEquals('Refactoring · GitHub', $title);
|
||||
$this->assertEmpty($desc);
|
||||
$this->assertEmpty($keywords);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -130,14 +192,23 @@ public function testCurlDownloadCallbackOkNoCharset()
|
|||
*/
|
||||
public function testCurlDownloadCallbackOkHtmlCharset()
|
||||
{
|
||||
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_no_charset');
|
||||
$callback = get_curl_download_callback(
|
||||
$charset,
|
||||
$title,
|
||||
$desc,
|
||||
$keywords,
|
||||
false,
|
||||
'ut_curl_getinfo_no_charset'
|
||||
);
|
||||
$data = [
|
||||
'HTTP/1.1 200 OK',
|
||||
'<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />',
|
||||
'end' => 'th=device-width">'
|
||||
. '<title>Refactoring · GitHub</title>'
|
||||
. '<link rel="search" type="application/opensea',
|
||||
'<title>ignored</title>',
|
||||
'<title>ignored</title>'
|
||||
. '<meta name="description" content="desc" />'
|
||||
. '<meta name="keywords" content="key1,key2" />',
|
||||
];
|
||||
foreach ($data as $key => $line) {
|
||||
$ignore = null;
|
||||
|
@ -149,6 +220,8 @@ public function testCurlDownloadCallbackOkHtmlCharset()
|
|||
}
|
||||
$this->assertEquals('utf-8', $charset);
|
||||
$this->assertEquals('Refactoring · GitHub', $title);
|
||||
$this->assertEmpty($desc);
|
||||
$this->assertEmpty($keywords);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -156,7 +229,14 @@ public function testCurlDownloadCallbackOkHtmlCharset()
|
|||
*/
|
||||
public function testCurlDownloadCallbackOkNoTitle()
|
||||
{
|
||||
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_ok');
|
||||
$callback = get_curl_download_callback(
|
||||
$charset,
|
||||
$title,
|
||||
$desc,
|
||||
$keywords,
|
||||
false,
|
||||
'ut_curl_getinfo_ok'
|
||||
);
|
||||
$data = [
|
||||
'HTTP/1.1 200 OK',
|
||||
'end' => 'th=device-width">Refactoring · GitHub<link rel="search" type="application/opensea',
|
||||
|
@ -168,6 +248,8 @@ public function testCurlDownloadCallbackOkNoTitle()
|
|||
}
|
||||
$this->assertEquals('utf-8', $charset);
|
||||
$this->assertEmpty($title);
|
||||
$this->assertEmpty($desc);
|
||||
$this->assertEmpty($keywords);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -175,7 +257,14 @@ public function testCurlDownloadCallbackOkNoTitle()
|
|||
*/
|
||||
public function testCurlDownloadCallbackInvalidContentType()
|
||||
{
|
||||
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_ct_ko');
|
||||
$callback = get_curl_download_callback(
|
||||
$charset,
|
||||
$title,
|
||||
$desc,
|
||||
$keywords,
|
||||
false,
|
||||
'ut_curl_getinfo_ct_ko'
|
||||
);
|
||||
$ignore = null;
|
||||
$this->assertFalse($callback($ignore, ''));
|
||||
$this->assertEmpty($charset);
|
||||
|
@ -187,7 +276,14 @@ public function testCurlDownloadCallbackInvalidContentType()
|
|||
*/
|
||||
public function testCurlDownloadCallbackInvalidResponseCode()
|
||||
{
|
||||
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_rc_ko');
|
||||
$callback = $callback = get_curl_download_callback(
|
||||
$charset,
|
||||
$title,
|
||||
$desc,
|
||||
$keywords,
|
||||
false,
|
||||
'ut_curl_getinfo_rc_ko'
|
||||
);
|
||||
$ignore = null;
|
||||
$this->assertFalse($callback($ignore, ''));
|
||||
$this->assertEmpty($charset);
|
||||
|
@ -199,13 +295,99 @@ public function testCurlDownloadCallbackInvalidResponseCode()
|
|||
*/
|
||||
public function testCurlDownloadCallbackInvalidContentTypeAndResponseCode()
|
||||
{
|
||||
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_rs_ct_ko');
|
||||
$callback = $callback = get_curl_download_callback(
|
||||
$charset,
|
||||
$title,
|
||||
$desc,
|
||||
$keywords,
|
||||
false,
|
||||
'ut_curl_getinfo_rs_ct_ko'
|
||||
);
|
||||
$ignore = null;
|
||||
$this->assertFalse($callback($ignore, ''));
|
||||
$this->assertEmpty($charset);
|
||||
$this->assertEmpty($title);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the download callback with valid value, and retrieve_description option enabled.
|
||||
*/
|
||||
public function testCurlDownloadCallbackOkWithDesc()
|
||||
{
|
||||
$callback = get_curl_download_callback(
|
||||
$charset,
|
||||
$title,
|
||||
$desc,
|
||||
$keywords,
|
||||
true,
|
||||
'ut_curl_getinfo_ok'
|
||||
);
|
||||
$data = [
|
||||
'HTTP/1.1 200 OK',
|
||||
'Server: GitHub.com',
|
||||
'Date: Sat, 28 Oct 2017 12:01:33 GMT',
|
||||
'Content-Type: text/html; charset=utf-8',
|
||||
'Status: 200 OK',
|
||||
'th=device-width">'
|
||||
. '<title>Refactoring · GitHub</title>'
|
||||
. '<link rel="search" type="application/opensea',
|
||||
'end' => '<title>ignored</title>'
|
||||
. '<meta name="description" content="link desc" />'
|
||||
. '<meta name="keywords" content="key1,key2" />',
|
||||
];
|
||||
foreach ($data as $key => $line) {
|
||||
$ignore = null;
|
||||
$expected = $key !== 'end' ? strlen($line) : false;
|
||||
$this->assertEquals($expected, $callback($ignore, $line));
|
||||
if ($expected === false) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
$this->assertEquals('utf-8', $charset);
|
||||
$this->assertEquals('Refactoring · GitHub', $title);
|
||||
$this->assertEquals('link desc', $desc);
|
||||
$this->assertEquals('key1 key2', $keywords);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the download callback with valid value, and retrieve_description option enabled,
|
||||
* but no desc or keyword defined in the page.
|
||||
*/
|
||||
public function testCurlDownloadCallbackOkWithDescNotFound()
|
||||
{
|
||||
$callback = get_curl_download_callback(
|
||||
$charset,
|
||||
$title,
|
||||
$desc,
|
||||
$keywords,
|
||||
true,
|
||||
'ut_curl_getinfo_ok'
|
||||
);
|
||||
$data = [
|
||||
'HTTP/1.1 200 OK',
|
||||
'Server: GitHub.com',
|
||||
'Date: Sat, 28 Oct 2017 12:01:33 GMT',
|
||||
'Content-Type: text/html; charset=utf-8',
|
||||
'Status: 200 OK',
|
||||
'th=device-width">'
|
||||
. '<title>Refactoring · GitHub</title>'
|
||||
. '<link rel="search" type="application/opensea',
|
||||
'end' => '<title>ignored</title>',
|
||||
];
|
||||
foreach ($data as $key => $line) {
|
||||
$ignore = null;
|
||||
$expected = $key !== 'end' ? strlen($line) : false;
|
||||
$this->assertEquals($expected, $callback($ignore, $line));
|
||||
if ($expected === false) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
$this->assertEquals('utf-8', $charset);
|
||||
$this->assertEquals('Refactoring · GitHub', $title);
|
||||
$this->assertEmpty($desc);
|
||||
$this->assertEmpty($keywords);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test count_private.
|
||||
*/
|
||||
|
|
|
@ -212,6 +212,22 @@ <h2 class="window-title">{'Configure'|t}</h2>
|
|||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="pure-g">
|
||||
<div class="pure-u-lg-{$ratioLabel} pure-u-{$ratioLabelMobile}">
|
||||
<div class="form-label">
|
||||
<label for="retrieveDescription">
|
||||
<span class="label-name">{'Automatically retrieve description for new bookmarks'|t}</span><br>
|
||||
<span class="label-desc">{'Shaarli will try to retrieve the description from meta HTML headers'|t}</span>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
<div class="pure-u-lg-{$ratioInput} pure-u-{$ratioInputMobile}">
|
||||
<div class="form-input">
|
||||
<input type="checkbox" name="retrieveDescription" id="retrieveDescription"
|
||||
{if="$retrieve_description"}checked{/if}/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="pure-g">
|
||||
<div class="pure-u-lg-{$ratioLabel} pure-u-{$ratioLabelMobile}">
|
||||
<div class="form-label">
|
||||
|
|
|
@ -106,6 +106,14 @@
|
|||
<label for="updateCheck"> Notify me when a new release is ready</label>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top"><b>Automatically retrieve description for new bookmarks:</b></td>
|
||||
<td>
|
||||
<input type="checkbox" name="retrieveDescription" id="retrieveDescription"
|
||||
{if="$retrieve_description"}checked{/if}/>
|
||||
<label for="retrieveDescription"> Shaarli will try to retrieve the description from meta HTML headers</label>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top"><b>Enable REST API</b></td>
|
||||
<td>
|
||||
|
|
Loading…
Reference in a new issue