Add a setting to retrieve bookmark metadata asynchrounously

- There is a new standalone script (metadata.js) which requests
    a new controller to get bookmark metadata and fill the form async
  - This feature is enabled with the new setting: general.enable_async_metadata
    (enabled by default)
  - general.retrieve_description is now enabled by default
  - A small rotating loader animation has a been added to bookmark inputs
    when metadata is being retrieved (default template)
  - Custom JS htmlentities has been removed and  mathiasbynens/he
    library is used instead

Fixes #1563
This commit is contained in:
ArthurHoaro 2020-09-25 13:29:36 +02:00
parent f34554c6c2
commit 4cf3564d28
19 changed files with 447 additions and 75 deletions

View file

@ -175,6 +175,7 @@ translate:
eslint:
@yarn run eslint -c .dev/.eslintrc.js assets/vintage/js/
@yarn run eslint -c .dev/.eslintrc.js assets/default/js/
@yarn run eslint -c .dev/.eslintrc.js assets/common/js/
### Run CSSLint check against Shaarli's SCSS files
sasslint:

View file

@ -366,7 +366,8 @@ protected function setDefaultValues()
$this->setEmpty('general.links_per_page', 20);
$this->setEmpty('general.enabled_plugins', self::$DEFAULT_PLUGINS);
$this->setEmpty('general.default_note_title', 'Note: ');
$this->setEmpty('general.retrieve_description', false);
$this->setEmpty('general.retrieve_description', true);
$this->setEmpty('general.enable_async_metadata', true);
$this->setEmpty('updates.check_updates', false);
$this->setEmpty('updates.check_updates_branch', 'stable');

View file

@ -14,6 +14,7 @@
use Shaarli\Front\Controller\Visitor\ErrorNotFoundController;
use Shaarli\History;
use Shaarli\Http\HttpAccess;
use Shaarli\Http\MetadataRetriever;
use Shaarli\Netscape\NetscapeBookmarkUtils;
use Shaarli\Plugin\PluginManager;
use Shaarli\Render\PageBuilder;
@ -90,6 +91,10 @@ public function build(): ShaarliContainer
);
};
$container['metadataRetriever'] = function (ShaarliContainer $container): MetadataRetriever {
return new MetadataRetriever($container->conf, $container->httpAccess);
};
$container['pageBuilder'] = function (ShaarliContainer $container): PageBuilder {
return new PageBuilder(
$container->conf,

View file

@ -10,6 +10,7 @@
use Shaarli\Formatter\FormatterFactory;
use Shaarli\History;
use Shaarli\Http\HttpAccess;
use Shaarli\Http\MetadataRetriever;
use Shaarli\Netscape\NetscapeBookmarkUtils;
use Shaarli\Plugin\PluginManager;
use Shaarli\Render\PageBuilder;
@ -35,6 +36,7 @@
* @property History $history
* @property HttpAccess $httpAccess
* @property LoginManager $loginManager
* @property MetadataRetriever $metadataRetriever
* @property NetscapeBookmarkUtils $netscapeBookmarkUtils
* @property callable $notFoundHandler Overrides default Slim exception display
* @property PageBuilder $pageBuilder

View file

@ -53,36 +53,22 @@ public function displayCreateForm(Request $request, Response $response): Respons
// If this is an HTTP(S) link, we try go get the page to extract
// the title (otherwise we will to straight to the edit form.)
if (empty($title) && strpos(get_url_scheme($url) ?: '', 'http') !== false) {
$retrieveDescription = $this->container->conf->get('general.retrieve_description');
// Short timeout to keep the application responsive
// The callback will fill $charset and $title with data from the downloaded page.
$this->container->httpAccess->getHttpResponse(
$url,
$this->container->conf->get('general.download_timeout', 30),
$this->container->conf->get('general.download_max_size', 4194304),
$this->container->httpAccess->getCurlDownloadCallback(
$charset,
$title,
$description,
$tags,
$retrieveDescription
)
);
if (! empty($title) && strtolower($charset) !== 'utf-8' && mb_check_encoding($charset)) {
$title = mb_convert_encoding($title, 'utf-8', $charset);
}
if (true !== $this->container->conf->get('general.enable_async_metadata', true)
&& empty($title)
&& strpos(get_url_scheme($url) ?: '', 'http') !== false
) {
$metadata = $this->container->metadataRetriever->retrieve($url);
}
if (empty($url) && empty($title)) {
$title = $this->container->conf->get('general.default_note_title', t('Note: '));
if (empty($url)) {
$metadata['title'] = $this->container->conf->get('general.default_note_title', t('Note: '));
}
$link = [
'title' => $title,
'title' => $title ?? $metadata['title'] ?? '',
'url' => $url ?? '',
'description' => $description ?? '',
'tags' => $tags ?? '',
'description' => $description ?? $metadata['description'] ?? '',
'tags' => $tags ?? $metadata['tags'] ?? '',
'private' => $private,
];
} else {
@ -352,6 +338,8 @@ protected function displayForm(array $link, bool $isNew, Request $request, Respo
'source' => $request->getParam('source') ?? '',
'tags' => $tags,
'default_private_links' => $this->container->conf->get('privacy.default_private_links', false),
'async_metadata' => $this->container->conf->get('general.enable_async_metadata', true),
'retrieve_description' => $this->container->conf->get('general.retrieve_description', false),
]);
$this->executePageHooks('render_editlink', $data, TemplatePage::EDIT_LINK);

View file

@ -0,0 +1,29 @@
<?php
declare(strict_types=1);
namespace Shaarli\Front\Controller\Admin;
use Slim\Http\Request;
use Slim\Http\Response;
/**
* Controller used to retrieve/update bookmark's metadata.
*/
class MetadataController extends ShaarliAdminController
{
/**
* GET /admin/metadata/{url} - Attempt to retrieve the bookmark title from provided URL.
*/
public function ajaxRetrieveTitle(Request $request, Response $response): Response
{
$url = $request->getParam('url');
// Only try to extract metadata from URL with HTTP(s) scheme
if (!empty($url) && strpos(get_url_scheme($url) ?: '', 'http') !== false) {
return $response->withJson($this->container->metadataRetriever->retrieve($url));
}
return $response->withJson([]);
}
}

View file

@ -0,0 +1,68 @@
<?php
declare(strict_types=1);
namespace Shaarli\Http;
use Shaarli\Config\ConfigManager;
/**
* HTTP Tool used to extract metadata from external URL (title, description, etc.).
*/
class MetadataRetriever
{
/** @var ConfigManager */
protected $conf;
/** @var HttpAccess */
protected $httpAccess;
public function __construct(ConfigManager $conf, HttpAccess $httpAccess)
{
$this->conf = $conf;
$this->httpAccess = $httpAccess;
}
/**
* Retrieve metadata for given URL.
*
* @return array [
* 'title' => <remote title>,
* 'description' => <remote description>,
* 'tags' => <remote keywords>,
* ]
*/
public function retrieve(string $url): array
{
$charset = null;
$title = null;
$description = null;
$tags = null;
$retrieveDescription = $this->conf->get('general.retrieve_description');
// Short timeout to keep the application responsive
// The callback will fill $charset and $title with data from the downloaded page.
$this->httpAccess->getHttpResponse(
$url,
$this->conf->get('general.download_timeout', 30),
$this->conf->get('general.download_max_size', 4194304),
$this->httpAccess->getCurlDownloadCallback(
$charset,
$title,
$description,
$tags,
$retrieveDescription
)
);
if (!empty($title) && strtolower($charset) !== 'utf-8') {
$title = mb_convert_encoding($title, 'utf-8', $charset);
}
return [
'title' => $title,
'description' => $description,
'tags' => $tags,
];
}
}

View file

@ -0,0 +1,39 @@
import he from 'he';
function clearLoaders(loaders) {
if (loaders != null && loaders.length > 0) {
[...loaders].forEach((loader) => {
loader.classList.remove('loading-input');
});
}
}
(() => {
const loaders = document.querySelectorAll('.loading-input');
const inputTitle = document.querySelector('input[name="lf_title"]');
if (inputTitle != null && inputTitle.value.length > 0) {
clearLoaders(loaders);
return;
}
const url = document.querySelector('input[name="lf_url"]').value;
const basePath = document.querySelector('input[name="js_base_path"]').value;
const xhr = new XMLHttpRequest();
xhr.open('GET', `${basePath}/admin/metadata?url=${encodeURI(url)}`, true);
xhr.setRequestHeader('Content-Type', 'application/x-www-form-urlencoded');
xhr.onload = () => {
const result = JSON.parse(xhr.response);
Object.keys(result).forEach((key) => {
if (result[key] !== null && result[key].length) {
const element = document.querySelector(`input[name="lf_${key}"], textarea[name="lf_${key}"]`);
if (element != null && element.value.length === 0) {
element.value = he.decode(result[key]);
}
}
});
clearLoaders(loaders);
};
xhr.send();
})();

View file

@ -1,4 +1,5 @@
import Awesomplete from 'awesomplete';
import he from 'he';
/**
* Find a parent element according to its tag and its attributes
@ -95,15 +96,6 @@ function updateAwesompleteList(selector, tags, instances) {
return instances;
}
/**
* html_entities in JS
*
* @see http://stackoverflow.com/questions/18749591/encode-html-entities-in-javascript
*/
function htmlEntities(str) {
return str.replace(/[\u00A0-\u9999<>&]/gim, (i) => `&#${i.charCodeAt(0)};`);
}
/**
* Add the class 'hidden' to city options not attached to the current selected continent.
*
@ -569,7 +561,7 @@ function init(description) {
input.setAttribute('name', totag);
input.setAttribute('value', totag);
findParent(input, 'div', { class: 'rename-tag-form' }).style.display = 'none';
block.querySelector('a.tag-link').innerHTML = htmlEntities(totag);
block.querySelector('a.tag-link').innerHTML = he.encode(totag);
block
.querySelector('a.tag-link')
.setAttribute('href', `${basePath}/?searchtags=${encodeURIComponent(totag)}`);

View file

@ -1269,6 +1269,57 @@ form {
}
}
.loading-input {
position: relative;
@keyframes around {
0% {
transform: rotate(0deg);
}
100% {
transform: rotate(360deg);
}
}
.icon-container {
position: absolute;
right: 60px;
top: calc(50% - 10px);
}
.loader {
position: relative;
height: 20px;
width: 20px;
display: inline-block;
animation: around 5.4s infinite;
&::after,
&::before {
content: "";
background: $form-input-background;
position: absolute;
display: inline-block;
width: 100%;
height: 100%;
border-width: 2px;
border-color: #333 #333 transparent transparent;
border-style: solid;
border-radius: 20px;
box-sizing: border-box;
top: 0;
left: 0;
animation: around 0.7s ease-in-out infinite;
}
&::after {
animation: around 0.7s ease-in-out 0.1s infinite;
background: transparent;
}
}
}
// LOGIN
.login-form-container {
.remember-me {

View file

@ -150,6 +150,7 @@ _These settings should not be edited_
- **timezone**: See [the list of supported timezones](http://php.net/manual/en/timezones.php).
- **enabled_plugins**: List of enabled plugins.
- **default_note_title**: Default title of a new note.
- **enable_async_metadata** (boolean): Retrieve external bookmark metadata asynchronously to prevent bookmark creation slowdown.
- **retrieve_description** (boolean): If set to true, for every new Shaare Shaarli will try to retrieve the description and keywords from the HTML meta tags.
- **root_url**: Overrides automatic discovery of Shaarli instance's URL (e.g.) `https://sub.domain.tld/shaarli-folder/`.

View file

@ -129,7 +129,7 @@
$this->post('/plugins', '\Shaarli\Front\Controller\Admin\PluginsController:save');
$this->get('/token', '\Shaarli\Front\Controller\Admin\TokenController:getToken');
$this->get('/thumbnails', '\Shaarli\Front\Controller\Admin\ThumbnailsController:index');
$this->get('/metadata', '\Shaarli\Front\Controller\Admin\MetadataController:ajaxRetrieveTitle');
$this->get('/visibility/{visibility}', '\Shaarli\Front\Controller\Admin\SessionFilterController:visibility');
})->add('\Shaarli\Front\ShaarliAdminMiddleware');

View file

@ -7,6 +7,7 @@
"awesomplete": "^1.1.2",
"blazy": "^1.8.2",
"fork-awesome": "^1.1.7",
"he": "^1.2.0",
"pure-extras": "^1.0.0",
"purecss": "^1.0.0"
},

View file

@ -12,6 +12,7 @@
use Shaarli\Front\Controller\Visitor\ErrorNotFoundController;
use Shaarli\History;
use Shaarli\Http\HttpAccess;
use Shaarli\Http\MetadataRetriever;
use Shaarli\Netscape\NetscapeBookmarkUtils;
use Shaarli\Plugin\PluginManager;
use Shaarli\Render\PageBuilder;
@ -72,6 +73,7 @@ public function testBuildContainer(): void
static::assertInstanceOf(History::class, $container->history);
static::assertInstanceOf(HttpAccess::class, $container->httpAccess);
static::assertInstanceOf(LoginManager::class, $container->loginManager);
static::assertInstanceOf(MetadataRetriever::class, $container->metadataRetriever);
static::assertInstanceOf(NetscapeBookmarkUtils::class, $container->netscapeBookmarkUtils);
static::assertInstanceOf(PageBuilder::class, $container->pageBuilder);
static::assertInstanceOf(PageCacheManager::class, $container->pageCacheManager);

View file

@ -9,6 +9,7 @@
use Shaarli\Front\Controller\Admin\FrontAdminControllerMockHelper;
use Shaarli\Front\Controller\Admin\ManageShaareController;
use Shaarli\Http\HttpAccess;
use Shaarli\Http\MetadataRetriever;
use Shaarli\TestCase;
use Slim\Http\Request;
use Slim\Http\Response;
@ -25,6 +26,7 @@ public function setUp(): void
$this->createContainer();
$this->container->httpAccess = $this->createMock(HttpAccess::class);
$this->container->metadataRetriever = $this->createMock(MetadataRetriever::class);
$this->controller = new ManageShaareController($this->container);
}
@ -32,7 +34,7 @@ public function setUp(): void
* Test displaying bookmark create form
* Ensure that every step of the standard workflow works properly.
*/
public function testDisplayCreateFormWithUrl(): void
public function testDisplayCreateFormWithUrlAndWithMetadataRetrieval(): void
{
$this->container->environment = [
'HTTP_REFERER' => $referer = 'http://shaarli/subfolder/controller/?searchtag=abc'
@ -53,40 +55,20 @@ public function testDisplayCreateFormWithUrl(): void
});
$response = new Response();
$this->container->httpAccess
->expects(static::once())
->method('getCurlDownloadCallback')
->willReturnCallback(
function (&$charset, &$title, &$description, &$tags) use (
$remoteTitle,
$remoteDesc,
$remoteTags
): callable {
return function () use (
&$charset,
&$title,
&$description,
&$tags,
$remoteTitle,
$remoteDesc,
$remoteTags
): void {
$charset = 'ISO-8859-1';
$title = $remoteTitle;
$description = $remoteDesc;
$tags = $remoteTags;
};
$this->container->conf = $this->createMock(ConfigManager::class);
$this->container->conf->method('get')->willReturnCallback(function (string $param, $default) {
if ($param === 'general.enable_async_metadata') {
return false;
}
)
;
$this->container->httpAccess
->expects(static::once())
->method('getHttpResponse')
->with($expectedUrl, 30, 4194304)
->willReturnCallback(function($url, $timeout, $maxBytes, $callback): void {
$callback();
})
;
return $default;
});
$this->container->metadataRetriever->expects(static::once())->method('retrieve')->willReturn([
'title' => $remoteTitle,
'description' => $remoteDesc,
'tags' => $remoteTags,
]);
$this->container->bookmarkService
->expects(static::once())
@ -127,6 +109,72 @@ function (&$charset, &$title, &$description, &$tags) use (
static::assertSame($tags, $assignedVariables['tags']);
static::assertArrayHasKey('source', $assignedVariables);
static::assertArrayHasKey('default_private_links', $assignedVariables);
static::assertArrayHasKey('async_metadata', $assignedVariables);
static::assertArrayHasKey('retrieve_description', $assignedVariables);
}
/**
* Test displaying bookmark create form without any external metadata retrieval attempt
*/
public function testDisplayCreateFormWithUrlAndWithoutMetadata(): void
{
$this->container->environment = [
'HTTP_REFERER' => $referer = 'http://shaarli/subfolder/controller/?searchtag=abc'
];
$assignedVariables = [];
$this->assignTemplateVars($assignedVariables);
$url = 'http://url.tld/other?part=3&utm_ad=pay#hash';
$expectedUrl = str_replace('&utm_ad=pay', '', $url);
$request = $this->createMock(Request::class);
$request->method('getParam')->willReturnCallback(function (string $key) use ($url): ?string {
return $key === 'post' ? $url : null;
});
$response = new Response();
$this->container->metadataRetriever->expects(static::never())->method('retrieve');
$this->container->bookmarkService
->expects(static::once())
->method('bookmarksCountPerTag')
->willReturn($tags = ['tag1' => 2, 'tag2' => 1])
;
// Make sure that PluginManager hook is triggered
$this->container->pluginManager
->expects(static::at(0))
->method('executeHooks')
->willReturnCallback(function (string $hook, array $data): array {
static::assertSame('render_editlink', $hook);
static::assertSame('', $data['link']['title']);
static::assertSame('', $data['link']['description']);
return $data;
})
;
$result = $this->controller->displayCreateForm($request, $response);
static::assertSame(200, $result->getStatusCode());
static::assertSame('editlink', (string) $result->getBody());
static::assertSame('Shaare - Shaarli', $assignedVariables['pagetitle']);
static::assertSame($expectedUrl, $assignedVariables['link']['url']);
static::assertSame('', $assignedVariables['link']['title']);
static::assertSame('', $assignedVariables['link']['description']);
static::assertSame('', $assignedVariables['link']['tags']);
static::assertFalse($assignedVariables['link']['private']);
static::assertTrue($assignedVariables['link_is_new']);
static::assertSame($referer, $assignedVariables['http_referer']);
static::assertSame($tags, $assignedVariables['tags']);
static::assertArrayHasKey('source', $assignedVariables);
static::assertArrayHasKey('default_private_links', $assignedVariables);
static::assertArrayHasKey('async_metadata', $assignedVariables);
static::assertArrayHasKey('retrieve_description', $assignedVariables);
}
/**

View file

@ -0,0 +1,123 @@
<?php
declare(strict_types=1);
namespace Shaarli\Http;
use PHPUnit\Framework\TestCase;
use Shaarli\Config\ConfigManager;
class MetadataRetrieverTest extends TestCase
{
/** @var MetadataRetriever */
protected $retriever;
/** @var ConfigManager */
protected $conf;
/** @var HttpAccess */
protected $httpAccess;
public function setUp(): void
{
$this->conf = $this->createMock(ConfigManager::class);
$this->httpAccess = $this->createMock(HttpAccess::class);
$this->retriever = new MetadataRetriever($this->conf, $this->httpAccess);
$this->conf->method('get')->willReturnCallback(function (string $param, $default) {
return $default === null ? $param : $default;
});
}
/**
* Test metadata retrieve() with values returned
*/
public function testFullRetrieval(): void
{
$url = 'https://domain.tld/link';
$remoteTitle = 'Remote Title ';
$remoteDesc = 'Sometimes the meta description is relevant.';
$remoteTags = 'abc def';
$expectedResult = [
'title' => $remoteTitle,
'description' => $remoteDesc,
'tags' => $remoteTags,
];
$this->httpAccess
->expects(static::once())
->method('getCurlDownloadCallback')
->willReturnCallback(
function (&$charset, &$title, &$description, &$tags) use (
$remoteTitle,
$remoteDesc,
$remoteTags
): callable {
return function () use (
&$charset,
&$title,
&$description,
&$tags,
$remoteTitle,
$remoteDesc,
$remoteTags
): void {
$charset = 'ISO-8859-1';
$title = $remoteTitle;
$description = $remoteDesc;
$tags = $remoteTags;
};
}
)
;
$this->httpAccess
->expects(static::once())
->method('getHttpResponse')
->with($url, 30, 4194304)
->willReturnCallback(function($url, $timeout, $maxBytes, $callback): void {
$callback();
})
;
$result = $this->retriever->retrieve($url);
static::assertSame($expectedResult, $result);
}
/**
* Test metadata retrieve() without any value
*/
public function testEmptyRetrieval(): void
{
$url = 'https://domain.tld/link';
$expectedResult = [
'title' => null,
'description' => null,
'tags' => null,
];
$this->httpAccess
->expects(static::once())
->method('getCurlDownloadCallback')
->willReturnCallback(
function (&$charset, &$title, &$description, &$tags): callable {
return function () use (&$charset, &$title, &$description, &$tags): void {};
}
)
;
$this->httpAccess
->expects(static::once())
->method('getHttpResponse')
->with($url, 30, 4194304)
->willReturnCallback(function($url, $timeout, $maxBytes, $callback): void {
$callback();
})
;
$result = $this->retriever->retrieve($url);
static::assertSame($expectedResult, $result);
}
}

View file

@ -12,6 +12,8 @@
action="{$base_path}/admin/shaare"
class="page-form pure-u-lg-3-5 pure-u-22-24 page-form page-form-light"
>
{$asyncLoadClass=$link_is_new && $async_metadata && empty($link.title) ? 'loading-input' : ''}
<h2 class="window-title">
{if="!$link_is_new"}{'Edit Shaare'|t}{else}{'New Shaare'|t}{/if}
</h2>
@ -28,21 +30,32 @@ <h2 class="window-title">
<div>
<label for="lf_title">{'Title'|t}</label>
</div>
<div>
<input type="text" name="lf_title" id="lf_title" value="{$link.title}" class="lf_input autofocus">
<div class="{$asyncLoadClass}">
<input type="text" name="lf_title" id="lf_title" value="{$link.title}"
class="lf_input {if="!$async_metadata"}autofocus{/if}"
>
<div class="icon-container">
<i class="loader"></i>
</div>
</div>
<div>
<label for="lf_description">{'Description'|t}</label>
</div>
<div>
<div class="{if="$retrieve_description"}{$asyncLoadClass}{/if}">
<textarea name="lf_description" id="lf_description" class="autofocus">{$link.description}</textarea>
<div class="icon-container">
<i class="loader"></i>
</div>
</div>
<div>
<label for="lf_tags">{'Tags'|t}</label>
</div>
<div>
<div class="{if="$retrieve_description"}{$asyncLoadClass}{/if}">
<input type="text" name="lf_tags" id="lf_tags" value="{$link.tags}" class="lf_input autofocus"
data-list="{loop="$tags"}{$key}, {/loop}" data-multiple data-autofirst autocomplete="off" >
<div class="icon-container">
<i class="loader"></i>
</div>
</div>
<div>
@ -88,5 +101,6 @@ <h2 class="window-title">
</form>
</div>
{include="page.footer"}
{if="$link_is_new && $async_metadata"}<script src="{$asset_path}/js/metadata.min.js?v={$version_hash}#"></script>{/if}
</body>
</html>

View file

@ -20,6 +20,7 @@ module.exports = [
entry: {
thumbnails: './assets/common/js/thumbnails.js',
thumbnails_update: './assets/common/js/thumbnails-update.js',
metadata: './assets/common/js/metadata.js',
pluginsadmin: './assets/default/js/plugins-admin.js',
shaarli: [
'./assets/default/js/base.js',
@ -99,6 +100,7 @@ module.exports = [
].concat(glob.sync('./assets/vintage/img/*')),
markdown: './assets/common/css/markdown.css',
thumbnails: './assets/common/js/thumbnails.js',
metadata: './assets/common/js/metadata.js',
thumbnails_update: './assets/common/js/thumbnails-update.js',
},
output: {

View file

@ -2912,6 +2912,11 @@ hash.js@^1.0.0, hash.js@^1.0.3:
inherits "^2.0.3"
minimalistic-assert "^1.0.1"
he@^1.2.0:
version "1.2.0"
resolved "https://registry.yarnpkg.com/he/-/he-1.2.0.tgz#84ae65fa7eafb165fddb61566ae14baf05664f0f"
integrity sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==
hmac-drbg@^1.0.0:
version "1.0.1"
resolved "https://registry.yarnpkg.com/hmac-drbg/-/hmac-drbg-1.0.1.tgz#d2745701025a6c775a6c545793ed502fc0c649a1"