Merge pull request #1313 from ArthurHoaro/feature/desc-retrieval

Automatically retrieve description for new bookmarks
This commit is contained in:
ArthurHoaro 2019-07-06 12:34:02 +02:00 committed by GitHub
commit c03c90a13e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 374 additions and 75 deletions

View file

@ -7,13 +7,25 @@ use Shaarli\Bookmark\LinkDB;
*
* @param string $charset to extract from the downloaded page (reference)
* @param string $title to extract from the downloaded page (reference)
* @param string $description to extract from the downloaded page (reference)
* @param string $keywords to extract from the downloaded page (reference)
* @param bool $retrieveDescription Automatically tries to retrieve description and keywords from HTML content
* @param string $curlGetInfo Optionally overrides curl_getinfo function
*
* @return Closure
*/
function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_getinfo')
{
function get_curl_download_callback(
&$charset,
&$title,
&$description,
&$keywords,
$retrieveDescription,
$curlGetInfo = 'curl_getinfo'
) {
$isRedirected = false;
$currentChunk = 0;
$foundChunk = null;
/**
* cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
*
@ -25,7 +37,18 @@ function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_get
*
* @return int|bool length of $data or false if we need to stop the download
*/
return function (&$ch, $data) use ($curlGetInfo, &$charset, &$title, &$isRedirected) {
return function (&$ch, $data) use (
$retrieveDescription,
$curlGetInfo,
&$charset,
&$title,
&$description,
&$keywords,
&$isRedirected,
&$currentChunk,
&$foundChunk
) {
$currentChunk++;
$responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
if (!empty($responseCode) && in_array($responseCode, [301, 302])) {
$isRedirected = true;
@ -50,9 +73,34 @@ function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_get
}
if (empty($title)) {
$title = html_extract_title($data);
$foundChunk = ! empty($title) ? $currentChunk : $foundChunk;
}
if ($retrieveDescription && empty($description)) {
$description = html_extract_tag('description', $data);
$foundChunk = ! empty($description) ? $currentChunk : $foundChunk;
}
if ($retrieveDescription && empty($keywords)) {
$keywords = html_extract_tag('keywords', $data);
if (! empty($keywords)) {
$foundChunk = $currentChunk;
// Keywords use the format tag1, tag2 multiple words, tag
// So we format them to match Shaarli's separator and glue multiple words with '-'
$keywords = implode(' ', array_map(function($keyword) {
return implode('-', preg_split('/\s+/', trim($keyword)));
}, explode(',', $keywords)));
}
}
// We got everything we want, stop the download.
if (!empty($responseCode) && !empty($contentType) && !empty($charset) && !empty($title)) {
// If we already found either the title, description or keywords,
// it's highly unlikely that we'll found the other metas further than
// in the same chunk of data or the next one. So we also stop the download after that.
if ((!empty($responseCode) && !empty($contentType) && !empty($charset)) && $foundChunk !== null
&& (! $retrieveDescription
|| $foundChunk < $currentChunk
|| (!empty($title) && !empty($description) && !empty($keywords))
)
) {
return false;
}
@ -110,6 +158,35 @@ function html_extract_charset($html)
return false;
}
/**
* Extract meta tag from HTML content in either:
* - OpenGraph: <meta property="og:[tag]" ...>
* - Meta tag: <meta name="[tag]" ...>
*
* @param string $tag Name of the tag to retrieve.
* @param string $html HTML content where to look for charset.
*
* @return bool|string Charset string if found, false otherwise.
*/
function html_extract_tag($tag, $html)
{
$propertiesKey = ['property', 'name', 'itemprop'];
$properties = implode('|', $propertiesKey);
// Try to retrieve OpenGraph image.
$ogRegex = '#<meta[^>]+(?:'. $properties .')=["\']?(?:og:)?'. $tag .'["\'\s][^>]*content=["\']?(.*?)["\'/>]#';
// If the attributes are not in the order property => content (e.g. Github)
// New regex to keep this readable... more or less.
$ogRegexReverse = '#<meta[^>]+content=["\']([^"\']+)[^>]+(?:'. $properties .')=["\']?(?:og)?:'. $tag .'["\'\s/>]#';
if (preg_match($ogRegex, $html, $matches) > 0
|| preg_match($ogRegexReverse, $html, $matches) > 0
) {
return $matches[1];
}
return false;
}
/**
* Count private links in given linklist.
*

View file

@ -365,6 +365,7 @@ class ConfigManager
$this->setEmpty('general.links_per_page', 20);
$this->setEmpty('general.enabled_plugins', self::$DEFAULT_PLUGINS);
$this->setEmpty('general.default_note_title', 'Note: ');
$this->setEmpty('general.retrieve_description', false);
$this->setEmpty('updates.check_updates', false);
$this->setEmpty('updates.check_updates_branch', 'stable');

View file

@ -56,6 +56,8 @@ _These settings should not be edited_
- **timezone**: See [the list of supported timezones](http://php.net/manual/en/timezones.php).
- **enabled_plugins**: List of enabled plugins.
- **default_note_title**: Default title of a new note.
- **retrieve_description** (boolean): If set to true, for every new links Shaarli will try
to retrieve the description and keywords from the HTML meta tags.
### Security

View file

@ -1,8 +1,8 @@
msgid ""
msgstr ""
"Project-Id-Version: Shaarli\n"
"POT-Creation-Date: 2019-05-25 16:37+0200\n"
"PO-Revision-Date: 2019-05-25 16:37+0200\n"
"POT-Creation-Date: 2019-07-06 12:14+0200\n"
"PO-Revision-Date: 2019-07-06 12:17+0200\n"
"Last-Translator: \n"
"Language-Team: Shaarli\n"
"Language: fr_FR\n"
@ -252,7 +252,7 @@ msgstr "404 Introuvable"
msgid "Couldn't retrieve updater class methods."
msgstr "Impossible de récupérer les méthodes de la classe Updater."
#: application/updater/Updater.php:526 index.php:1033
#: application/updater/Updater.php:526 index.php:1034
msgid ""
"You have enabled or changed thumbnails mode. <a href=\"?do=thumbs_update"
"\">Please synchronize them</a>."
@ -337,8 +337,8 @@ msgid "You are not supposed to change a password on an Open Shaarli."
msgstr ""
"Vous n'êtes pas censé modifier le mot de passe d'un Shaarli en mode ouvert."
#: index.php:957 index.php:1007 index.php:1092 index.php:1122 index.php:1232
#: index.php:1279
#: index.php:957 index.php:1007 index.php:1094 index.php:1124 index.php:1234
#: index.php:1281
msgid "Wrong token."
msgstr "Jeton invalide."
@ -356,64 +356,64 @@ msgstr "Votre mot de passe a été modifié"
msgid "Change password"
msgstr "Modifier le mot de passe"
#: index.php:1053
#: index.php:1054
msgid "Configuration was saved."
msgstr "La configuration a été sauvegardée."
#: index.php:1076 tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:24
#: index.php:1078 tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:24
msgid "Configure"
msgstr "Configurer"
#: index.php:1086 tmp/changetag.b91ef64efc3688266305ea9b42e5017e.rtpl.php:13
#: index.php:1088 tmp/changetag.b91ef64efc3688266305ea9b42e5017e.rtpl.php:13
#: tmp/tools.b91ef64efc3688266305ea9b42e5017e.rtpl.php:36
msgid "Manage tags"
msgstr "Gérer les tags"
#: index.php:1105
#: index.php:1107
#, php-format
msgid "The tag was removed from %d link."
msgid_plural "The tag was removed from %d links."
msgstr[0] "Le tag a été supprimé de %d lien."
msgstr[1] "Le tag a été supprimé de %d liens."
#: index.php:1106
#: index.php:1108
#, php-format
msgid "The tag was renamed in %d link."
msgid_plural "The tag was renamed in %d links."
msgstr[0] "Le tag a été renommé dans %d lien."
msgstr[1] "Le tag a été renommé dans %d liens."
#: index.php:1113 tmp/addlink.b91ef64efc3688266305ea9b42e5017e.rtpl.php:13
#: index.php:1115 tmp/addlink.b91ef64efc3688266305ea9b42e5017e.rtpl.php:13
msgid "Shaare a new link"
msgstr "Partager un nouveau lien"
#: index.php:1342 tmp/linklist.b91ef64efc3688266305ea9b42e5017e.rtpl.php:169
#: index.php:1344 tmp/linklist.b91ef64efc3688266305ea9b42e5017e.rtpl.php:169
msgid "Edit"
msgstr "Modifier"
#: index.php:1342 index.php:1413
#: index.php:1344 index.php:1416
#: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:26
#: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:26
msgid "Shaare"
msgstr "Shaare"
#: index.php:1382
#: index.php:1385
msgid "Note: "
msgstr "Note : "
#: index.php:1421
#: index.php:1424
msgid "Invalid link ID provided"
msgstr ""
#: index.php:1441 tmp/export.b91ef64efc3688266305ea9b42e5017e.rtpl.php:65
#: index.php:1444 tmp/export.b91ef64efc3688266305ea9b42e5017e.rtpl.php:65
msgid "Export"
msgstr "Exporter"
#: index.php:1503 tmp/import.b91ef64efc3688266305ea9b42e5017e.rtpl.php:83
#: index.php:1506 tmp/import.b91ef64efc3688266305ea9b42e5017e.rtpl.php:83
msgid "Import"
msgstr "Importer"
#: index.php:1513
#: index.php:1516
#, php-format
msgid ""
"The file you are trying to upload is probably bigger than what this "
@ -423,20 +423,20 @@ msgstr ""
"le serveur web peut accepter (%s). Merci de l'envoyer en parties plus "
"légères."
#: index.php:1558 tmp/pluginsadmin.b91ef64efc3688266305ea9b42e5017e.rtpl.php:26
#: index.php:1561 tmp/pluginsadmin.b91ef64efc3688266305ea9b42e5017e.rtpl.php:26
#: tmp/tools.b91ef64efc3688266305ea9b42e5017e.rtpl.php:22
msgid "Plugin administration"
msgstr "Administration des plugins"
#: index.php:1612 tmp/thumbnails.b91ef64efc3688266305ea9b42e5017e.rtpl.php:14
#: index.php:1615 tmp/thumbnails.b91ef64efc3688266305ea9b42e5017e.rtpl.php:14
msgid "Thumbnails update"
msgstr "Mise à jour des miniatures"
#: index.php:1778
#: index.php:1781
msgid "Search: "
msgstr "Recherche : "
#: index.php:1821
#: index.php:1824
#, php-format
msgid ""
"<pre>Sessions do not seem to work correctly on your server.<br>Make sure the "
@ -455,7 +455,7 @@ msgstr ""
"des cookies. Nous vous recommandons d'accéder à votre serveur depuis son "
"adresse IP ou un <em>Fully Qualified Domain Name</em>.<br>"
#: index.php:1831
#: index.php:1834
msgid "Click to try again."
msgstr "Cliquer ici pour réessayer."
@ -592,7 +592,7 @@ msgstr "Mauvaise réponse du hub %s"
msgid "Enable PubSubHubbub feed publishing."
msgstr "Active la publication de flux vers PubSubHubbub."
#: plugins/qrcode/qrcode.php:73 plugins/wallabag/wallabag.php:68
#: plugins/qrcode/qrcode.php:72 plugins/wallabag/wallabag.php:68
msgid "For each link, add a QRCode icon."
msgstr "Pour chaque lien, ajouter une icône de QRCode."
@ -679,6 +679,34 @@ msgstr "Vous pouvez aussi modifier les tags dans la"
msgid "tag list"
msgstr "liste des tags"
#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:143
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:312
#: tmp/export.b91ef64efc3688266305ea9b42e5017e.rtpl.php:31
msgid "All"
msgstr "Tous"
#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:147
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:316
msgid "Only common media hosts"
msgstr "Seulement les hébergeurs de média connus"
#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:151
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:320
msgid "None"
msgstr "Aucune"
#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:158
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:297
msgid "You need to enable the extension <code>php-gd</code> to use thumbnails."
msgstr ""
"Vous devez activer l'extension <code>php-gd</code> pour utiliser les "
"miniatures."
#: tmp/configure.90100d2eaf5d3705e14b9b4f78ecddc9.rtpl.php:162
#| msgid "Enable thumbnails"
msgid "Synchonize thumbnails"
msgstr ""
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:29
msgid "title"
msgstr "titre"
@ -762,50 +790,41 @@ msgid "Notify me when a new release is ready"
msgstr "Me notifier lorsqu'une nouvelle version est disponible"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:247
msgid "Automatically retrieve description for new bookmarks"
msgstr "Récupérer automatiquement la description"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:248
msgid "Shaarli will try to retrieve the description from meta HTML headers"
msgstr ""
"Shaarli essaiera de récupérer la description depuis les balises HTML meta "
"dans les entêtes"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:263
#: tmp/install.b91ef64efc3688266305ea9b42e5017e.rtpl.php:169
msgid "Enable REST API"
msgstr "Activer l'API REST"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:248
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:264
#: tmp/install.b91ef64efc3688266305ea9b42e5017e.rtpl.php:170
msgid "Allow third party software to use Shaarli such as mobile application"
msgstr ""
"Permet aux applications tierces d'utiliser Shaarli, par exemple les "
"applications mobiles"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:263
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:279
msgid "API secret"
msgstr "Clé d'API secrète"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:277
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:293
msgid "Enable thumbnails"
msgstr "Activer les miniatures"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:281
msgid "You need to enable the extension <code>php-gd</code> to use thumbnails."
msgstr ""
"Vous devez activer l'extension <code>php-gd</code> pour utiliser les "
"miniatures."
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:285
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:301
#: tmp/tools.b91ef64efc3688266305ea9b42e5017e.rtpl.php:56
msgid "Synchronize thumbnails"
msgstr "Synchroniser les miniatures"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:296
#: tmp/export.b91ef64efc3688266305ea9b42e5017e.rtpl.php:31
msgid "All"
msgstr "Tous"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:300
msgid "Only common media hosts"
msgstr "Seulement les hébergeurs de média connus"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:304
msgid "None"
msgstr "Aucune"
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:312
#: tmp/configure.b91ef64efc3688266305ea9b42e5017e.rtpl.php:328
#: tmp/editlink.b91ef64efc3688266305ea9b42e5017e.rtpl.php:72
#: tmp/pluginsadmin.b91ef64efc3688266305ea9b42e5017e.rtpl.php:139
#: tmp/pluginsadmin.b91ef64efc3688266305ea9b42e5017e.rtpl.php:199
@ -1149,17 +1168,13 @@ msgstr "Déconnexion"
#: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:150
#: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:150
#, fuzzy
#| msgid "Public"
msgid "Set public"
msgstr "Publics"
msgstr "Rendre public"
#: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:155
#: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:155
#, fuzzy
#| msgid "Private"
msgid "Set private"
msgstr "Privé"
msgstr "Rendre privé"
#: tmp/page.header.b91ef64efc3688266305ea9b42e5017e.rtpl.php:187
#: tmp/page.header.cedf684561d925457130839629000a81.rtpl.php:187
@ -1409,11 +1424,6 @@ msgstr ""
"Glisser ce lien dans votre barre de favoris ou cliquer droit dessus et « "
"Ajouter aux favoris »"
#, fuzzy
#~| msgid "Enable thumbnails"
#~ msgid "Synchonize thumbnails"
#~ msgstr "Activer les miniatures"
#~ msgid ""
#~ "You need to browse your Shaarli over <strong>HTTPS</strong> to use this "
#~ "functionality."

View file

@ -1015,6 +1015,7 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history, $sessionManager,
$conf->set('general.timezone', $tz);
$conf->set('general.title', escape($_POST['title']));
$conf->set('general.header_link', escape($_POST['titleLink']));
$conf->set('general.retrieve_description', !empty($_POST['retrieveDescription']));
$conf->set('resource.theme', escape($_POST['theme']));
$conf->set('security.session_protection_disabled', !empty($_POST['disablesessionprotection']));
$conf->set('privacy.default_private_links', !empty($_POST['privateLinkByDefault']));
@ -1063,6 +1064,7 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history, $sessionManager,
);
$PAGE->assign('continents', $continents);
$PAGE->assign('cities', $cities);
$PAGE->assign('retrieve_description', $conf->get('general.retrieve_description'));
$PAGE->assign('private_links_default', $conf->get('privacy.default_private_links', false));
$PAGE->assign('session_protection_disabled', $conf->get('security.session_protection_disabled', false));
$PAGE->assign('enable_rss_permalinks', $conf->get('feed.rss_permalinks', false));
@ -1364,13 +1366,14 @@ function renderPage($conf, $pluginManager, $LINKSDB, $history, $sessionManager,
// If this is an HTTP(S) link, we try go get the page to extract
// the title (otherwise we will to straight to the edit form.)
if (empty($title) && strpos(get_url_scheme($url), 'http') !== false) {
$retrieveDescription = $conf->get('general.retrieve_description');
// Short timeout to keep the application responsive
// The callback will fill $charset and $title with data from the downloaded page.
get_http_response(
$url,
$conf->get('general.download_timeout', 30),
$conf->get('general.download_max_size', 4194304),
get_curl_download_callback($charset, $title)
get_curl_download_callback($charset, $title, $description, $tags, $retrieveDescription)
);
if (! empty($title) && strtolower($charset) != 'utf-8') {
$title = mb_convert_encoding($title, 'utf-8', $charset);

View file

@ -2,14 +2,16 @@
namespace Shaarli\Bookmark;
use PHPUnit\Framework\TestCase;
use ReferenceLinkDB;
use Shaarli\Config\ConfigManager;
require_once 'tests/utils/CurlUtils.php';
/**
* Class LinkUtilsTest.
*/
class LinkUtilsTest extends \PHPUnit\Framework\TestCase
class LinkUtilsTest extends TestCase
{
/**
* Test html_extract_title() when the title is found.
@ -75,12 +77,57 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
$this->assertFalse(html_extract_charset($html));
}
/**
* Test html_extract_tag() when the tag <meta name= is found.
*/
public function testHtmlExtractExistentNameTag()
{
$description = 'Bob and Alice share cookies.';
$html = '<html><meta>stuff2</meta><meta name="description" content="' . $description . '"/></html>';
$this->assertEquals($description, html_extract_tag('description', $html));
}
/**
* Test html_extract_tag() when the tag <meta name= is not found.
*/
public function testHtmlExtractNonExistentNameTag()
{
$html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>';
$this->assertFalse(html_extract_tag('description', $html));
}
/**
* Test html_extract_tag() when the tag <meta property="og: is found.
*/
public function testHtmlExtractExistentOgTag()
{
$description = 'Bob and Alice share cookies.';
$html = '<html><meta>stuff2</meta><meta property="og:description" content="' . $description . '"/></html>';
$this->assertEquals($description, html_extract_tag('description', $html));
}
/**
* Test html_extract_tag() when the tag <meta property="og: is not found.
*/
public function testHtmlExtractNonExistentOgTag()
{
$html = '<html><meta>stuff2</meta><meta name="image" content="img"/></html>';
$this->assertFalse(html_extract_tag('description', $html));
}
/**
* Test the download callback with valid value
*/
public function testCurlDownloadCallbackOk()
{
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_ok');
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_ok'
);
$data = [
'HTTP/1.1 200 OK',
'Server: GitHub.com',
@ -90,7 +137,9 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
'end' => 'th=device-width">'
. '<title>Refactoring · GitHub</title>'
. '<link rel="search" type="application/opensea',
'<title>ignored</title>',
'<title>ignored</title>'
. '<meta name="description" content="desc" />'
. '<meta name="keywords" content="key1,key2" />',
];
foreach ($data as $key => $line) {
$ignore = null;
@ -102,6 +151,8 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
}
$this->assertEquals('utf-8', $charset);
$this->assertEquals('Refactoring · GitHub', $title);
$this->assertEmpty($desc);
$this->assertEmpty($keywords);
}
/**
@ -109,13 +160,22 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
*/
public function testCurlDownloadCallbackOkNoCharset()
{
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_no_charset');
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_no_charset'
);
$data = [
'HTTP/1.1 200 OK',
'end' => 'th=device-width">'
. '<title>Refactoring · GitHub</title>'
. '<link rel="search" type="application/opensea',
'<title>ignored</title>',
'<title>ignored</title>'
. '<meta name="description" content="desc" />'
. '<meta name="keywords" content="key1,key2" />',
];
foreach ($data as $key => $line) {
$ignore = null;
@ -123,6 +183,8 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
}
$this->assertEmpty($charset);
$this->assertEquals('Refactoring · GitHub', $title);
$this->assertEmpty($desc);
$this->assertEmpty($keywords);
}
/**
@ -130,14 +192,23 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
*/
public function testCurlDownloadCallbackOkHtmlCharset()
{
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_no_charset');
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_no_charset'
);
$data = [
'HTTP/1.1 200 OK',
'<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />',
'end' => 'th=device-width">'
. '<title>Refactoring · GitHub</title>'
. '<link rel="search" type="application/opensea',
'<title>ignored</title>',
'<title>ignored</title>'
. '<meta name="description" content="desc" />'
. '<meta name="keywords" content="key1,key2" />',
];
foreach ($data as $key => $line) {
$ignore = null;
@ -149,6 +220,8 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
}
$this->assertEquals('utf-8', $charset);
$this->assertEquals('Refactoring · GitHub', $title);
$this->assertEmpty($desc);
$this->assertEmpty($keywords);
}
/**
@ -156,7 +229,14 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
*/
public function testCurlDownloadCallbackOkNoTitle()
{
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_ok');
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_ok'
);
$data = [
'HTTP/1.1 200 OK',
'end' => 'th=device-width">Refactoring · GitHub<link rel="search" type="application/opensea',
@ -168,6 +248,8 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
}
$this->assertEquals('utf-8', $charset);
$this->assertEmpty($title);
$this->assertEmpty($desc);
$this->assertEmpty($keywords);
}
/**
@ -175,7 +257,14 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
*/
public function testCurlDownloadCallbackInvalidContentType()
{
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_ct_ko');
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_ct_ko'
);
$ignore = null;
$this->assertFalse($callback($ignore, ''));
$this->assertEmpty($charset);
@ -187,7 +276,14 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
*/
public function testCurlDownloadCallbackInvalidResponseCode()
{
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_rc_ko');
$callback = $callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_rc_ko'
);
$ignore = null;
$this->assertFalse($callback($ignore, ''));
$this->assertEmpty($charset);
@ -199,13 +295,99 @@ class LinkUtilsTest extends \PHPUnit\Framework\TestCase
*/
public function testCurlDownloadCallbackInvalidContentTypeAndResponseCode()
{
$callback = get_curl_download_callback($charset, $title, 'ut_curl_getinfo_rs_ct_ko');
$callback = $callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_rs_ct_ko'
);
$ignore = null;
$this->assertFalse($callback($ignore, ''));
$this->assertEmpty($charset);
$this->assertEmpty($title);
}
/**
* Test the download callback with valid value, and retrieve_description option enabled.
*/
public function testCurlDownloadCallbackOkWithDesc()
{
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
true,
'ut_curl_getinfo_ok'
);
$data = [
'HTTP/1.1 200 OK',
'Server: GitHub.com',
'Date: Sat, 28 Oct 2017 12:01:33 GMT',
'Content-Type: text/html; charset=utf-8',
'Status: 200 OK',
'th=device-width">'
. '<title>Refactoring · GitHub</title>'
. '<link rel="search" type="application/opensea',
'end' => '<title>ignored</title>'
. '<meta name="description" content="link desc" />'
. '<meta name="keywords" content="key1,key2" />',
];
foreach ($data as $key => $line) {
$ignore = null;
$expected = $key !== 'end' ? strlen($line) : false;
$this->assertEquals($expected, $callback($ignore, $line));
if ($expected === false) {
break;
}
}
$this->assertEquals('utf-8', $charset);
$this->assertEquals('Refactoring · GitHub', $title);
$this->assertEquals('link desc', $desc);
$this->assertEquals('key1 key2', $keywords);
}
/**
* Test the download callback with valid value, and retrieve_description option enabled,
* but no desc or keyword defined in the page.
*/
public function testCurlDownloadCallbackOkWithDescNotFound()
{
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
true,
'ut_curl_getinfo_ok'
);
$data = [
'HTTP/1.1 200 OK',
'Server: GitHub.com',
'Date: Sat, 28 Oct 2017 12:01:33 GMT',
'Content-Type: text/html; charset=utf-8',
'Status: 200 OK',
'th=device-width">'
. '<title>Refactoring · GitHub</title>'
. '<link rel="search" type="application/opensea',
'end' => '<title>ignored</title>',
];
foreach ($data as $key => $line) {
$ignore = null;
$expected = $key !== 'end' ? strlen($line) : false;
$this->assertEquals($expected, $callback($ignore, $line));
if ($expected === false) {
break;
}
}
$this->assertEquals('utf-8', $charset);
$this->assertEquals('Refactoring · GitHub', $title);
$this->assertEmpty($desc);
$this->assertEmpty($keywords);
}
/**
* Test count_private.
*/

View file

@ -212,6 +212,22 @@
</div>
</div>
</div>
<div class="pure-g">
<div class="pure-u-lg-{$ratioLabel} pure-u-{$ratioLabelMobile}">
<div class="form-label">
<label for="retrieveDescription">
<span class="label-name">{'Automatically retrieve description for new bookmarks'|t}</span><br>
<span class="label-desc">{'Shaarli will try to retrieve the description from meta HTML headers'|t}</span>
</label>
</div>
</div>
<div class="pure-u-lg-{$ratioInput} pure-u-{$ratioInputMobile}">
<div class="form-input">
<input type="checkbox" name="retrieveDescription" id="retrieveDescription"
{if="$retrieve_description"}checked{/if}/>
</div>
</div>
</div>
<div class="pure-g">
<div class="pure-u-lg-{$ratioLabel} pure-u-{$ratioLabelMobile}">
<div class="form-label">

View file

@ -106,6 +106,14 @@
<label for="updateCheck">&nbsp;Notify me when a new release is ready</label>
</td>
</tr>
<tr>
<td valign="top"><b>Automatically retrieve description for new bookmarks:</b></td>
<td>
<input type="checkbox" name="retrieveDescription" id="retrieveDescription"
{if="$retrieve_description"}checked{/if}/>
<label for="retrieveDescription">&nbsp;Shaarli will try to retrieve the description from meta HTML headers</label>
</td>
</tr>
<tr>
<td valign="top"><b>Enable REST API</b></td>
<td>