Merge pull request #1567 from ArthurHoaro/feature/async-title-retrieval

This commit is contained in:
ArthurHoaro 2020-10-20 10:14:28 +02:00 committed by GitHub
commit 9b3c1270bc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 689 additions and 257 deletions

View file

@ -175,6 +175,7 @@ translate:
eslint:
@yarn run eslint -c .dev/.eslintrc.js assets/vintage/js/
@yarn run eslint -c .dev/.eslintrc.js assets/default/js/
@yarn run eslint -c .dev/.eslintrc.js assets/common/js/
### Run CSSLint check against Shaarli's SCSS files
sasslint:

View file

@ -366,7 +366,8 @@ protected function setDefaultValues()
$this->setEmpty('general.links_per_page', 20);
$this->setEmpty('general.enabled_plugins', self::$DEFAULT_PLUGINS);
$this->setEmpty('general.default_note_title', 'Note: ');
$this->setEmpty('general.retrieve_description', false);
$this->setEmpty('general.retrieve_description', true);
$this->setEmpty('general.enable_async_metadata', true);
$this->setEmpty('updates.check_updates', false);
$this->setEmpty('updates.check_updates_branch', 'stable');

View file

@ -14,6 +14,7 @@
use Shaarli\Front\Controller\Visitor\ErrorNotFoundController;
use Shaarli\History;
use Shaarli\Http\HttpAccess;
use Shaarli\Http\MetadataRetriever;
use Shaarli\Netscape\NetscapeBookmarkUtils;
use Shaarli\Plugin\PluginManager;
use Shaarli\Render\PageBuilder;
@ -90,6 +91,10 @@ public function build(): ShaarliContainer
);
};
$container['metadataRetriever'] = function (ShaarliContainer $container): MetadataRetriever {
return new MetadataRetriever($container->conf, $container->httpAccess);
};
$container['pageBuilder'] = function (ShaarliContainer $container): PageBuilder {
return new PageBuilder(
$container->conf,

View file

@ -10,6 +10,7 @@
use Shaarli\Formatter\FormatterFactory;
use Shaarli\History;
use Shaarli\Http\HttpAccess;
use Shaarli\Http\MetadataRetriever;
use Shaarli\Netscape\NetscapeBookmarkUtils;
use Shaarli\Plugin\PluginManager;
use Shaarli\Render\PageBuilder;
@ -35,6 +36,7 @@
* @property History $history
* @property HttpAccess $httpAccess
* @property LoginManager $loginManager
* @property MetadataRetriever $metadataRetriever
* @property NetscapeBookmarkUtils $netscapeBookmarkUtils
* @property callable $notFoundHandler Overrides default Slim exception display
* @property PageBuilder $pageBuilder

View file

@ -53,36 +53,22 @@ public function displayCreateForm(Request $request, Response $response): Respons
// If this is an HTTP(S) link, we try go get the page to extract
// the title (otherwise we will to straight to the edit form.)
if (empty($title) && strpos(get_url_scheme($url) ?: '', 'http') !== false) {
$retrieveDescription = $this->container->conf->get('general.retrieve_description');
// Short timeout to keep the application responsive
// The callback will fill $charset and $title with data from the downloaded page.
$this->container->httpAccess->getHttpResponse(
$url,
$this->container->conf->get('general.download_timeout', 30),
$this->container->conf->get('general.download_max_size', 4194304),
$this->container->httpAccess->getCurlDownloadCallback(
$charset,
$title,
$description,
$tags,
$retrieveDescription
)
);
if (! empty($title) && strtolower($charset) !== 'utf-8' && mb_check_encoding($charset)) {
$title = mb_convert_encoding($title, 'utf-8', $charset);
}
if (true !== $this->container->conf->get('general.enable_async_metadata', true)
&& empty($title)
&& strpos(get_url_scheme($url) ?: '', 'http') !== false
) {
$metadata = $this->container->metadataRetriever->retrieve($url);
}
if (empty($url) && empty($title)) {
$title = $this->container->conf->get('general.default_note_title', t('Note: '));
if (empty($url)) {
$metadata['title'] = $this->container->conf->get('general.default_note_title', t('Note: '));
}
$link = [
'title' => $title,
'title' => $title ?? $metadata['title'] ?? '',
'url' => $url ?? '',
'description' => $description ?? '',
'tags' => $tags ?? '',
'description' => $description ?? $metadata['description'] ?? '',
'tags' => $tags ?? $metadata['tags'] ?? '',
'private' => $private,
];
} else {
@ -352,6 +338,8 @@ protected function displayForm(array $link, bool $isNew, Request $request, Respo
'source' => $request->getParam('source') ?? '',
'tags' => $tags,
'default_private_links' => $this->container->conf->get('privacy.default_private_links', false),
'async_metadata' => $this->container->conf->get('general.enable_async_metadata', true),
'retrieve_description' => $this->container->conf->get('general.retrieve_description', false),
]);
$this->executePageHooks('render_editlink', $data, TemplatePage::EDIT_LINK);

View file

@ -0,0 +1,29 @@
<?php
declare(strict_types=1);
namespace Shaarli\Front\Controller\Admin;
use Slim\Http\Request;
use Slim\Http\Response;
/**
* Controller used to retrieve/update bookmark's metadata.
*/
class MetadataController extends ShaarliAdminController
{
/**
* GET /admin/metadata/{url} - Attempt to retrieve the bookmark title from provided URL.
*/
public function ajaxRetrieveTitle(Request $request, Response $response): Response
{
$url = $request->getParam('url');
// Only try to extract metadata from URL with HTTP(s) scheme
if (!empty($url) && strpos(get_url_scheme($url) ?: '', 'http') !== false) {
return $response->withJson($this->container->metadataRetriever->retrieve($url));
}
return $response->withJson([]);
}
}

View file

@ -14,9 +14,14 @@
*/
class HttpAccess
{
public function getHttpResponse($url, $timeout = 30, $maxBytes = 4194304, $curlWriteFunction = null)
{
return get_http_response($url, $timeout, $maxBytes, $curlWriteFunction);
public function getHttpResponse(
$url,
$timeout = 30,
$maxBytes = 4194304,
$curlHeaderFunction = null,
$curlWriteFunction = null
) {
return get_http_response($url, $timeout, $maxBytes, $curlHeaderFunction, $curlWriteFunction);
}
public function getCurlDownloadCallback(
@ -24,16 +29,19 @@ public function getCurlDownloadCallback(
&$title,
&$description,
&$keywords,
$retrieveDescription,
$curlGetInfo = 'curl_getinfo'
$retrieveDescription
) {
return get_curl_download_callback(
$charset,
$title,
$description,
$keywords,
$retrieveDescription,
$curlGetInfo
$retrieveDescription
);
}
public function getCurlHeaderCallback(&$charset, $curlGetInfo = 'curl_getinfo')
{
return get_curl_header_callback($charset, $curlGetInfo);
}
}

View file

@ -6,12 +6,14 @@
* GET an HTTP URL to retrieve its content
* Uses the cURL library or a fallback method
*
* @param string $url URL to get (http://...)
* @param int $timeout network timeout (in seconds)
* @param int $maxBytes maximum downloaded bytes (default: 4 MiB)
* @param callable|string $curlWriteFunction Optional callback called during the download (cURL CURLOPT_WRITEFUNCTION).
* Can be used to add download conditions on the
* headers (response code, content type, etc.).
* @param string $url URL to get (http://...)
* @param int $timeout network timeout (in seconds)
* @param int $maxBytes maximum downloaded bytes (default: 4 MiB)
* @param callable|string $curlHeaderFunction Optional callback called during the download of headers
* (CURLOPT_HEADERFUNCTION)
* @param callable|string $curlWriteFunction Optional callback called during the download (cURL CURLOPT_WRITEFUNCTION).
* Can be used to add download conditions on the
* headers (response code, content type, etc.).
*
* @return array HTTP response headers, downloaded content
*
@ -35,8 +37,13 @@
* @see http://stackoverflow.com/q/9183178
* @see http://stackoverflow.com/q/1462720
*/
function get_http_response($url, $timeout = 30, $maxBytes = 4194304, $curlWriteFunction = null)
{
function get_http_response(
$url,
$timeout = 30,
$maxBytes = 4194304,
$curlHeaderFunction = null,
$curlWriteFunction = null
) {
$urlObj = new Url($url);
$cleanUrl = $urlObj->idnToAscii();
@ -70,7 +77,8 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304, $curlWriteF
// General cURL settings
curl_setopt($ch, CURLOPT_AUTOREFERER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_HEADER, true);
// Default header download if the $curlHeaderFunction is not defined
curl_setopt($ch, CURLOPT_HEADER, !is_callable($curlHeaderFunction));
curl_setopt(
$ch,
CURLOPT_HTTPHEADER,
@ -81,25 +89,21 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304, $curlWriteF
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
if (is_callable($curlWriteFunction)) {
curl_setopt($ch, CURLOPT_WRITEFUNCTION, $curlWriteFunction);
}
// Max download size management
curl_setopt($ch, CURLOPT_BUFFERSIZE, 1024*16);
curl_setopt($ch, CURLOPT_NOPROGRESS, false);
if (is_callable($curlHeaderFunction)) {
curl_setopt($ch, CURLOPT_HEADERFUNCTION, $curlHeaderFunction);
}
if (is_callable($curlWriteFunction)) {
curl_setopt($ch, CURLOPT_WRITEFUNCTION, $curlWriteFunction);
}
curl_setopt(
$ch,
CURLOPT_PROGRESSFUNCTION,
function ($arg0, $arg1, $arg2, $arg3, $arg4 = 0) use ($maxBytes) {
if (version_compare(phpversion(), '5.5', '<')) {
// PHP version lower than 5.5
// Callback has 4 arguments
$downloaded = $arg1;
} else {
// Callback has 5 arguments
$downloaded = $arg2;
}
function ($arg0, $arg1, $arg2, $arg3, $arg4) use ($maxBytes) {
$downloaded = $arg2;
// Non-zero return stops downloading
return ($downloaded > $maxBytes) ? 1 : 0;
}
@ -493,53 +497,22 @@ function is_https($server)
* Get cURL callback function for CURLOPT_WRITEFUNCTION
*
* @param string $charset to extract from the downloaded page (reference)
* @param string $title to extract from the downloaded page (reference)
* @param string $description to extract from the downloaded page (reference)
* @param string $keywords to extract from the downloaded page (reference)
* @param bool $retrieveDescription Automatically tries to retrieve description and keywords from HTML content
* @param string $curlGetInfo Optionally overrides curl_getinfo function
*
* @return Closure
*/
function get_curl_download_callback(
function get_curl_header_callback(
&$charset,
&$title,
&$description,
&$keywords,
$retrieveDescription,
$curlGetInfo = 'curl_getinfo'
) {
$isRedirected = false;
$currentChunk = 0;
$foundChunk = null;
/**
* cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
*
* While downloading the remote page, we check that the HTTP code is 200 and content type is 'html/text'
* Then we extract the title and the charset and stop the download when it's done.
*
* @param resource $ch cURL resource
* @param string $data chunk of data being downloaded
*
* @return int|bool length of $data or false if we need to stop the download
*/
return function (&$ch, $data) use (
$retrieveDescription,
$curlGetInfo,
&$charset,
&$title,
&$description,
&$keywords,
&$isRedirected,
&$currentChunk,
&$foundChunk
) {
$currentChunk++;
return function ($ch, $data) use ($curlGetInfo, &$charset, &$isRedirected) {
$responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
$chunkLength = strlen($data);
if (!empty($responseCode) && in_array($responseCode, [301, 302])) {
$isRedirected = true;
return strlen($data);
return $chunkLength;
}
if (!empty($responseCode) && $responseCode !== 200) {
return false;
@ -555,6 +528,56 @@ function get_curl_download_callback(
if (!empty($contentType) && empty($charset)) {
$charset = header_extract_charset($contentType);
}
return $chunkLength;
};
}
/**
* Get cURL callback function for CURLOPT_WRITEFUNCTION
*
* @param string $charset to extract from the downloaded page (reference)
* @param string $title to extract from the downloaded page (reference)
* @param string $description to extract from the downloaded page (reference)
* @param string $keywords to extract from the downloaded page (reference)
* @param bool $retrieveDescription Automatically tries to retrieve description and keywords from HTML content
* @param string $curlGetInfo Optionally overrides curl_getinfo function
*
* @return Closure
*/
function get_curl_download_callback(
&$charset,
&$title,
&$description,
&$keywords,
$retrieveDescription
) {
$currentChunk = 0;
$foundChunk = null;
/**
* cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
*
* While downloading the remote page, we check that the HTTP code is 200 and content type is 'html/text'
* Then we extract the title and the charset and stop the download when it's done.
*
* @param resource $ch cURL resource
* @param string $data chunk of data being downloaded
*
* @return int|bool length of $data or false if we need to stop the download
*/
return function ($ch, $data) use (
$retrieveDescription,
&$charset,
&$title,
&$description,
&$keywords,
&$currentChunk,
&$foundChunk
) {
$chunkLength = strlen($data);
$currentChunk++;
if (empty($charset)) {
$charset = html_extract_charset($data);
}
@ -562,6 +585,10 @@ function get_curl_download_callback(
$title = html_extract_title($data);
$foundChunk = ! empty($title) ? $currentChunk : $foundChunk;
}
if (empty($title)) {
$title = html_extract_tag('title', $data);
$foundChunk = ! empty($title) ? $currentChunk : $foundChunk;
}
if ($retrieveDescription && empty($description)) {
$description = html_extract_tag('description', $data);
$foundChunk = ! empty($description) ? $currentChunk : $foundChunk;
@ -591,6 +618,6 @@ function get_curl_download_callback(
return false;
}
return strlen($data);
return $chunkLength;
};
}

View file

@ -0,0 +1,69 @@
<?php
declare(strict_types=1);
namespace Shaarli\Http;
use Shaarli\Config\ConfigManager;
/**
* HTTP Tool used to extract metadata from external URL (title, description, etc.).
*/
class MetadataRetriever
{
/** @var ConfigManager */
protected $conf;
/** @var HttpAccess */
protected $httpAccess;
public function __construct(ConfigManager $conf, HttpAccess $httpAccess)
{
$this->conf = $conf;
$this->httpAccess = $httpAccess;
}
/**
* Retrieve metadata for given URL.
*
* @return array [
* 'title' => <remote title>,
* 'description' => <remote description>,
* 'tags' => <remote keywords>,
* ]
*/
public function retrieve(string $url): array
{
$charset = null;
$title = null;
$description = null;
$tags = null;
$retrieveDescription = $this->conf->get('general.retrieve_description');
// Short timeout to keep the application responsive
// The callback will fill $charset and $title with data from the downloaded page.
$this->httpAccess->getHttpResponse(
$url,
$this->conf->get('general.download_timeout', 30),
$this->conf->get('general.download_max_size', 4194304),
$this->httpAccess->getCurlHeaderCallback($charset),
$this->httpAccess->getCurlDownloadCallback(
$charset,
$title,
$description,
$tags,
$retrieveDescription
)
);
if (!empty($title) && strtolower($charset) !== 'utf-8') {
$title = mb_convert_encoding($title, 'utf-8', $charset);
}
return [
'title' => $title,
'description' => $description,
'tags' => $tags,
];
}
}

View file

@ -0,0 +1,39 @@
import he from 'he';
function clearLoaders(loaders) {
if (loaders != null && loaders.length > 0) {
[...loaders].forEach((loader) => {
loader.classList.remove('loading-input');
});
}
}
(() => {
const loaders = document.querySelectorAll('.loading-input');
const inputTitle = document.querySelector('input[name="lf_title"]');
if (inputTitle != null && inputTitle.value.length > 0) {
clearLoaders(loaders);
return;
}
const url = document.querySelector('input[name="lf_url"]').value;
const basePath = document.querySelector('input[name="js_base_path"]').value;
const xhr = new XMLHttpRequest();
xhr.open('GET', `${basePath}/admin/metadata?url=${encodeURI(url)}`, true);
xhr.setRequestHeader('Content-Type', 'application/x-www-form-urlencoded');
xhr.onload = () => {
const result = JSON.parse(xhr.response);
Object.keys(result).forEach((key) => {
if (result[key] !== null && result[key].length) {
const element = document.querySelector(`input[name="lf_${key}"], textarea[name="lf_${key}"]`);
if (element != null && element.value.length === 0) {
element.value = he.decode(result[key]);
}
}
});
clearLoaders(loaders);
};
xhr.send();
})();

View file

@ -1,4 +1,5 @@
import Awesomplete from 'awesomplete';
import he from 'he';
/**
* Find a parent element according to its tag and its attributes
@ -95,15 +96,6 @@ function updateAwesompleteList(selector, tags, instances) {
return instances;
}
/**
* html_entities in JS
*
* @see http://stackoverflow.com/questions/18749591/encode-html-entities-in-javascript
*/
function htmlEntities(str) {
return str.replace(/[\u00A0-\u9999<>&]/gim, (i) => `&#${i.charCodeAt(0)};`);
}
/**
* Add the class 'hidden' to city options not attached to the current selected continent.
*
@ -569,7 +561,7 @@ function init(description) {
input.setAttribute('name', totag);
input.setAttribute('value', totag);
findParent(input, 'div', { class: 'rename-tag-form' }).style.display = 'none';
block.querySelector('a.tag-link').innerHTML = htmlEntities(totag);
block.querySelector('a.tag-link').innerHTML = he.encode(totag);
block
.querySelector('a.tag-link')
.setAttribute('href', `${basePath}/?searchtags=${encodeURIComponent(totag)}`);

View file

@ -1273,6 +1273,57 @@ form {
}
}
.loading-input {
position: relative;
@keyframes around {
0% {
transform: rotate(0deg);
}
100% {
transform: rotate(360deg);
}
}
.icon-container {
position: absolute;
right: 60px;
top: calc(50% - 10px);
}
.loader {
position: relative;
height: 20px;
width: 20px;
display: inline-block;
animation: around 5.4s infinite;
&::after,
&::before {
content: "";
background: $form-input-background;
position: absolute;
display: inline-block;
width: 100%;
height: 100%;
border-width: 2px;
border-color: #333 #333 transparent transparent;
border-style: solid;
border-radius: 20px;
box-sizing: border-box;
top: 0;
left: 0;
animation: around 0.7s ease-in-out infinite;
}
&::after {
animation: around 0.7s ease-in-out 0.1s infinite;
background: transparent;
}
}
}
// LOGIN
.login-form-container {
.remember-me {

View file

@ -150,6 +150,7 @@ _These settings should not be edited_
- **timezone**: See [the list of supported timezones](http://php.net/manual/en/timezones.php).
- **enabled_plugins**: List of enabled plugins.
- **default_note_title**: Default title of a new note.
- **enable_async_metadata** (boolean): Retrieve external bookmark metadata asynchronously to prevent bookmark creation slowdown.
- **retrieve_description** (boolean): If set to true, for every new Shaare Shaarli will try to retrieve the description and keywords from the HTML meta tags.
- **root_url**: Overrides automatic discovery of Shaarli instance's URL (e.g.) `https://sub.domain.tld/shaarli-folder/`.

View file

@ -129,7 +129,7 @@
$this->post('/plugins', '\Shaarli\Front\Controller\Admin\PluginsController:save');
$this->get('/token', '\Shaarli\Front\Controller\Admin\TokenController:getToken');
$this->get('/thumbnails', '\Shaarli\Front\Controller\Admin\ThumbnailsController:index');
$this->get('/metadata', '\Shaarli\Front\Controller\Admin\MetadataController:ajaxRetrieveTitle');
$this->get('/visibility/{visibility}', '\Shaarli\Front\Controller\Admin\SessionFilterController:visibility');
})->add('\Shaarli\Front\ShaarliAdminMiddleware');

View file

@ -7,6 +7,7 @@
"awesomplete": "^1.1.2",
"blazy": "^1.8.2",
"fork-awesome": "^1.1.7",
"he": "^1.2.0",
"pure-extras": "^1.0.0",
"purecss": "^1.0.0"
},

View file

@ -216,60 +216,91 @@ public function testHtmlExtractNonExistentOgTag()
}
/**
* Test the download callback with valid value
* Test the header callback with valid value
*/
public function testCurlDownloadCallbackOk()
public function testCurlHeaderCallbackOk(): void
{
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_ok'
);
$callback = get_curl_header_callback($charset, 'ut_curl_getinfo_ok');
$data = [
'HTTP/1.1 200 OK',
'Server: GitHub.com',
'Date: Sat, 28 Oct 2017 12:01:33 GMT',
'Content-Type: text/html; charset=utf-8',
'Status: 200 OK',
'end' => 'th=device-width">'
];
foreach ($data as $chunk) {
static::assertIsInt($callback(null, $chunk));
}
static::assertSame('utf-8', $charset);
}
/**
* Test the download callback with valid value
*/
public function testCurlDownloadCallbackOk(): void
{
$charset = 'utf-8';
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false
);
$data = [
'th=device-width">'
. '<title>Refactoring · GitHub</title>'
. '<link rel="search" type="application/opensea',
'<title>ignored</title>'
. '<meta name="description" content="desc" />'
. '<meta name="keywords" content="key1,key2" />',
];
foreach ($data as $key => $line) {
$ignore = null;
$expected = $key !== 'end' ? strlen($line) : false;
$this->assertEquals($expected, $callback($ignore, $line));
if ($expected === false) {
break;
}
foreach ($data as $chunk) {
static::assertSame(strlen($chunk), $callback(null, $chunk));
}
$this->assertEquals('utf-8', $charset);
$this->assertEquals('Refactoring · GitHub', $title);
$this->assertEmpty($desc);
$this->assertEmpty($keywords);
static::assertSame('utf-8', $charset);
static::assertSame('Refactoring · GitHub', $title);
static::assertEmpty($desc);
static::assertEmpty($keywords);
}
/**
* Test the header callback with valid value
*/
public function testCurlHeaderCallbackNoCharset(): void
{
$callback = get_curl_header_callback($charset, 'ut_curl_getinfo_no_charset');
$data = [
'HTTP/1.1 200 OK',
];
foreach ($data as $chunk) {
static::assertSame(strlen($chunk), $callback(null, $chunk));
}
static::assertFalse($charset);
}
/**
* Test the download callback with valid values and no charset
*/
public function testCurlDownloadCallbackOkNoCharset()
public function testCurlDownloadCallbackOkNoCharset(): void
{
$charset = null;
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_no_charset'
false
);
$data = [
'HTTP/1.1 200 OK',
'end' => 'th=device-width">'
. '<title>Refactoring · GitHub</title>'
. '<link rel="search" type="application/opensea',
@ -277,10 +308,11 @@ public function testCurlDownloadCallbackOkNoCharset()
. '<meta name="description" content="desc" />'
. '<meta name="keywords" content="key1,key2" />',
];
foreach ($data as $key => $line) {
$ignore = null;
$this->assertEquals(strlen($line), $callback($ignore, $line));
foreach ($data as $chunk) {
static::assertSame(strlen($chunk), $callback(null, $chunk));
}
$this->assertEmpty($charset);
$this->assertEquals('Refactoring · GitHub', $title);
$this->assertEmpty($desc);
@ -290,18 +322,18 @@ public function testCurlDownloadCallbackOkNoCharset()
/**
* Test the download callback with valid values and no charset
*/
public function testCurlDownloadCallbackOkHtmlCharset()
public function testCurlDownloadCallbackOkHtmlCharset(): void
{
$charset = null;
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_no_charset'
false
);
$data = [
'HTTP/1.1 200 OK',
'<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />',
'end' => 'th=device-width">'
. '<title>Refactoring · GitHub</title>'
@ -310,14 +342,10 @@ public function testCurlDownloadCallbackOkHtmlCharset()
. '<meta name="description" content="desc" />'
. '<meta name="keywords" content="key1,key2" />',
];
foreach ($data as $key => $line) {
$ignore = null;
$expected = $key !== 'end' ? strlen($line) : false;
$this->assertEquals($expected, $callback($ignore, $line));
if ($expected === false) {
break;
}
foreach ($data as $chunk) {
static::assertSame(strlen($chunk), $callback(null, $chunk));
}
$this->assertEquals('utf-8', $charset);
$this->assertEquals('Refactoring · GitHub', $title);
$this->assertEmpty($desc);
@ -327,25 +355,26 @@ public function testCurlDownloadCallbackOkHtmlCharset()
/**
* Test the download callback with valid values and no title
*/
public function testCurlDownloadCallbackOkNoTitle()
public function testCurlDownloadCallbackOkNoTitle(): void
{
$charset = 'utf-8';
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_ok'
false
);
$data = [
'HTTP/1.1 200 OK',
'end' => 'th=device-width">Refactoring · GitHub<link rel="search" type="application/opensea',
'ignored',
];
foreach ($data as $key => $line) {
$ignore = null;
$this->assertEquals(strlen($line), $callback($ignore, $line));
foreach ($data as $chunk) {
static::assertSame(strlen($chunk), $callback(null, $chunk));
}
$this->assertEquals('utf-8', $charset);
$this->assertEmpty($title);
$this->assertEmpty($desc);
@ -353,81 +382,55 @@ public function testCurlDownloadCallbackOkNoTitle()
}
/**
* Test the download callback with an invalid content type.
* Test the header callback with an invalid content type.
*/
public function testCurlDownloadCallbackInvalidContentType()
public function testCurlHeaderCallbackInvalidContentType(): void
{
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_ct_ko'
);
$ignore = null;
$this->assertFalse($callback($ignore, ''));
$this->assertEmpty($charset);
$this->assertEmpty($title);
$callback = get_curl_header_callback($charset, 'ut_curl_getinfo_ct_ko');
$data = [
'HTTP/1.1 200 OK',
];
static::assertFalse($callback(null, $data[0]));
static::assertNull($charset);
}
/**
* Test the download callback with an invalid response code.
* Test the header callback with an invalid response code.
*/
public function testCurlDownloadCallbackInvalidResponseCode()
public function testCurlHeaderCallbackInvalidResponseCode(): void
{
$callback = $callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_rc_ko'
);
$ignore = null;
$this->assertFalse($callback($ignore, ''));
$this->assertEmpty($charset);
$this->assertEmpty($title);
$callback = get_curl_header_callback($charset, 'ut_curl_getinfo_rc_ko');
static::assertFalse($callback(null, ''));
static::assertNull($charset);
}
/**
* Test the download callback with an invalid content type and response code.
* Test the header callback with an invalid content type and response code.
*/
public function testCurlDownloadCallbackInvalidContentTypeAndResponseCode()
public function testCurlHeaderCallbackInvalidContentTypeAndResponseCode(): void
{
$callback = $callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
false,
'ut_curl_getinfo_rs_ct_ko'
);
$ignore = null;
$this->assertFalse($callback($ignore, ''));
$this->assertEmpty($charset);
$this->assertEmpty($title);
$callback = get_curl_header_callback($charset, 'ut_curl_getinfo_rs_ct_ko');
static::assertFalse($callback(null, ''));
static::assertNull($charset);
}
/**
* Test the download callback with valid value, and retrieve_description option enabled.
*/
public function testCurlDownloadCallbackOkWithDesc()
public function testCurlDownloadCallbackOkWithDesc(): void
{
$charset = 'utf-8';
$callback = get_curl_download_callback(
$charset,
$title,
$desc,
$keywords,
true,
'ut_curl_getinfo_ok'
true
);
$data = [
'HTTP/1.1 200 OK',
'Server: GitHub.com',
'Date: Sat, 28 Oct 2017 12:01:33 GMT',
'Content-Type: text/html; charset=utf-8',
'Status: 200 OK',
'th=device-width">'
. '<title>Refactoring · GitHub</title>'
. '<link rel="search" type="application/opensea',
@ -435,14 +438,11 @@ public function testCurlDownloadCallbackOkWithDesc()
. '<meta name="description" content="link desc" />'
. '<meta name="keywords" content="key1,key2" />',
];
foreach ($data as $key => $line) {
$ignore = null;
$expected = $key !== 'end' ? strlen($line) : false;
$this->assertEquals($expected, $callback($ignore, $line));
if ($expected === false) {
break;
}
foreach ($data as $chunk) {
static::assertSame(strlen($chunk), $callback(null, $chunk));
}
$this->assertEquals('utf-8', $charset);
$this->assertEquals('Refactoring · GitHub', $title);
$this->assertEquals('link desc', $desc);
@ -453,8 +453,9 @@ public function testCurlDownloadCallbackOkWithDesc()
* Test the download callback with valid value, and retrieve_description option enabled,
* but no desc or keyword defined in the page.
*/
public function testCurlDownloadCallbackOkWithDescNotFound()
public function testCurlDownloadCallbackOkWithDescNotFound(): void
{
$charset = 'utf-8';
$callback = get_curl_download_callback(
$charset,
$title,
@ -464,24 +465,16 @@ public function testCurlDownloadCallbackOkWithDescNotFound()
'ut_curl_getinfo_ok'
);
$data = [
'HTTP/1.1 200 OK',
'Server: GitHub.com',
'Date: Sat, 28 Oct 2017 12:01:33 GMT',
'Content-Type: text/html; charset=utf-8',
'Status: 200 OK',
'th=device-width">'
. '<title>Refactoring · GitHub</title>'
. '<link rel="search" type="application/opensea',
'end' => '<title>ignored</title>',
];
foreach ($data as $key => $line) {
$ignore = null;
$expected = $key !== 'end' ? strlen($line) : false;
$this->assertEquals($expected, $callback($ignore, $line));
if ($expected === false) {
break;
}
foreach ($data as $chunk) {
static::assertSame(strlen($chunk), $callback(null, $chunk));
}
$this->assertEquals('utf-8', $charset);
$this->assertEquals('Refactoring · GitHub', $title);
$this->assertEmpty($desc);

View file

@ -12,6 +12,7 @@
use Shaarli\Front\Controller\Visitor\ErrorNotFoundController;
use Shaarli\History;
use Shaarli\Http\HttpAccess;
use Shaarli\Http\MetadataRetriever;
use Shaarli\Netscape\NetscapeBookmarkUtils;
use Shaarli\Plugin\PluginManager;
use Shaarli\Render\PageBuilder;
@ -72,6 +73,7 @@ public function testBuildContainer(): void
static::assertInstanceOf(History::class, $container->history);
static::assertInstanceOf(HttpAccess::class, $container->httpAccess);
static::assertInstanceOf(LoginManager::class, $container->loginManager);
static::assertInstanceOf(MetadataRetriever::class, $container->metadataRetriever);
static::assertInstanceOf(NetscapeBookmarkUtils::class, $container->netscapeBookmarkUtils);
static::assertInstanceOf(PageBuilder::class, $container->pageBuilder);
static::assertInstanceOf(PageCacheManager::class, $container->pageCacheManager);

View file

@ -9,6 +9,7 @@
use Shaarli\Front\Controller\Admin\FrontAdminControllerMockHelper;
use Shaarli\Front\Controller\Admin\ManageShaareController;
use Shaarli\Http\HttpAccess;
use Shaarli\Http\MetadataRetriever;
use Shaarli\TestCase;
use Slim\Http\Request;
use Slim\Http\Response;
@ -25,6 +26,7 @@ public function setUp(): void
$this->createContainer();
$this->container->httpAccess = $this->createMock(HttpAccess::class);
$this->container->metadataRetriever = $this->createMock(MetadataRetriever::class);
$this->controller = new ManageShaareController($this->container);
}
@ -32,7 +34,7 @@ public function setUp(): void
* Test displaying bookmark create form
* Ensure that every step of the standard workflow works properly.
*/
public function testDisplayCreateFormWithUrl(): void
public function testDisplayCreateFormWithUrlAndWithMetadataRetrieval(): void
{
$this->container->environment = [
'HTTP_REFERER' => $referer = 'http://shaarli/subfolder/controller/?searchtag=abc'
@ -53,40 +55,20 @@ public function testDisplayCreateFormWithUrl(): void
});
$response = new Response();
$this->container->httpAccess
->expects(static::once())
->method('getCurlDownloadCallback')
->willReturnCallback(
function (&$charset, &$title, &$description, &$tags) use (
$remoteTitle,
$remoteDesc,
$remoteTags
): callable {
return function () use (
&$charset,
&$title,
&$description,
&$tags,
$remoteTitle,
$remoteDesc,
$remoteTags
): void {
$charset = 'ISO-8859-1';
$title = $remoteTitle;
$description = $remoteDesc;
$tags = $remoteTags;
};
}
)
;
$this->container->httpAccess
->expects(static::once())
->method('getHttpResponse')
->with($expectedUrl, 30, 4194304)
->willReturnCallback(function($url, $timeout, $maxBytes, $callback): void {
$callback();
})
;
$this->container->conf = $this->createMock(ConfigManager::class);
$this->container->conf->method('get')->willReturnCallback(function (string $param, $default) {
if ($param === 'general.enable_async_metadata') {
return false;
}
return $default;
});
$this->container->metadataRetriever->expects(static::once())->method('retrieve')->willReturn([
'title' => $remoteTitle,
'description' => $remoteDesc,
'tags' => $remoteTags,
]);
$this->container->bookmarkService
->expects(static::once())
@ -127,6 +109,72 @@ function (&$charset, &$title, &$description, &$tags) use (
static::assertSame($tags, $assignedVariables['tags']);
static::assertArrayHasKey('source', $assignedVariables);
static::assertArrayHasKey('default_private_links', $assignedVariables);
static::assertArrayHasKey('async_metadata', $assignedVariables);
static::assertArrayHasKey('retrieve_description', $assignedVariables);
}
/**
* Test displaying bookmark create form without any external metadata retrieval attempt
*/
public function testDisplayCreateFormWithUrlAndWithoutMetadata(): void
{
$this->container->environment = [
'HTTP_REFERER' => $referer = 'http://shaarli/subfolder/controller/?searchtag=abc'
];
$assignedVariables = [];
$this->assignTemplateVars($assignedVariables);
$url = 'http://url.tld/other?part=3&utm_ad=pay#hash';
$expectedUrl = str_replace('&utm_ad=pay', '', $url);
$request = $this->createMock(Request::class);
$request->method('getParam')->willReturnCallback(function (string $key) use ($url): ?string {
return $key === 'post' ? $url : null;
});
$response = new Response();
$this->container->metadataRetriever->expects(static::never())->method('retrieve');
$this->container->bookmarkService
->expects(static::once())
->method('bookmarksCountPerTag')
->willReturn($tags = ['tag1' => 2, 'tag2' => 1])
;
// Make sure that PluginManager hook is triggered
$this->container->pluginManager
->expects(static::at(0))
->method('executeHooks')
->willReturnCallback(function (string $hook, array $data): array {
static::assertSame('render_editlink', $hook);
static::assertSame('', $data['link']['title']);
static::assertSame('', $data['link']['description']);
return $data;
})
;
$result = $this->controller->displayCreateForm($request, $response);
static::assertSame(200, $result->getStatusCode());
static::assertSame('editlink', (string) $result->getBody());
static::assertSame('Shaare - Shaarli', $assignedVariables['pagetitle']);
static::assertSame($expectedUrl, $assignedVariables['link']['url']);
static::assertSame('', $assignedVariables['link']['title']);
static::assertSame('', $assignedVariables['link']['description']);
static::assertSame('', $assignedVariables['link']['tags']);
static::assertFalse($assignedVariables['link']['private']);
static::assertTrue($assignedVariables['link_is_new']);
static::assertSame($referer, $assignedVariables['http_referer']);
static::assertSame($tags, $assignedVariables['tags']);
static::assertArrayHasKey('source', $assignedVariables);
static::assertArrayHasKey('default_private_links', $assignedVariables);
static::assertArrayHasKey('async_metadata', $assignedVariables);
static::assertArrayHasKey('retrieve_description', $assignedVariables);
}
/**

View file

@ -0,0 +1,154 @@
<?php
declare(strict_types=1);
namespace Shaarli\Http;
use PHPUnit\Framework\TestCase;
use Shaarli\Config\ConfigManager;
class MetadataRetrieverTest extends TestCase
{
/** @var MetadataRetriever */
protected $retriever;
/** @var ConfigManager */
protected $conf;
/** @var HttpAccess */
protected $httpAccess;
public function setUp(): void
{
$this->conf = $this->createMock(ConfigManager::class);
$this->httpAccess = $this->createMock(HttpAccess::class);
$this->retriever = new MetadataRetriever($this->conf, $this->httpAccess);
$this->conf->method('get')->willReturnCallback(function (string $param, $default) {
return $default === null ? $param : $default;
});
}
/**
* Test metadata retrieve() with values returned
*/
public function testFullRetrieval(): void
{
$url = 'https://domain.tld/link';
$remoteTitle = 'Remote Title ';
$remoteDesc = 'Sometimes the meta description is relevant.';
$remoteTags = 'abc def';
$remoteCharset = 'utf-8';
$expectedResult = [
'title' => $remoteTitle,
'description' => $remoteDesc,
'tags' => $remoteTags,
];
$this->httpAccess
->expects(static::once())
->method('getCurlHeaderCallback')
->willReturnCallback(
function (&$charset) use (
$remoteCharset
): callable {
return function () use (
&$charset,
$remoteCharset
): void {
$charset = $remoteCharset;
};
}
)
;
$this->httpAccess
->expects(static::once())
->method('getCurlDownloadCallback')
->willReturnCallback(
function (&$charset, &$title, &$description, &$tags) use (
$remoteCharset,
$remoteTitle,
$remoteDesc,
$remoteTags
): callable {
return function () use (
&$charset,
&$title,
&$description,
&$tags,
$remoteCharset,
$remoteTitle,
$remoteDesc,
$remoteTags
): void {
static::assertSame($remoteCharset, $charset);
$title = $remoteTitle;
$description = $remoteDesc;
$tags = $remoteTags;
};
}
)
;
$this->httpAccess
->expects(static::once())
->method('getHttpResponse')
->with($url, 30, 4194304)
->willReturnCallback(function($url, $timeout, $maxBytes, $headerCallback, $dlCallback): void {
$headerCallback();
$dlCallback();
})
;
$result = $this->retriever->retrieve($url);
static::assertSame($expectedResult, $result);
}
/**
* Test metadata retrieve() without any value
*/
public function testEmptyRetrieval(): void
{
$url = 'https://domain.tld/link';
$expectedResult = [
'title' => null,
'description' => null,
'tags' => null,
];
$this->httpAccess
->expects(static::once())
->method('getCurlDownloadCallback')
->willReturnCallback(
function (): callable {
return function (): void {};
}
)
;
$this->httpAccess
->expects(static::once())
->method('getCurlHeaderCallback')
->willReturnCallback(
function (): callable {
return function (): void {};
}
)
;
$this->httpAccess
->expects(static::once())
->method('getHttpResponse')
->with($url, 30, 4194304)
->willReturnCallback(function($url, $timeout, $maxBytes, $headerCallback, $dlCallback): void {
$headerCallback();
$dlCallback();
})
;
$result = $this->retriever->retrieve($url);
static::assertSame($expectedResult, $result);
}
}

View file

@ -12,6 +12,8 @@
action="{$base_path}/admin/shaare"
class="page-form pure-u-lg-3-5 pure-u-22-24 page-form page-form-light"
>
{$asyncLoadClass=$link_is_new && $async_metadata && empty($link.title) ? 'loading-input' : ''}
<h2 class="window-title">
{if="!$link_is_new"}{'Edit Shaare'|t}{else}{'New Shaare'|t}{/if}
</h2>
@ -28,21 +30,32 @@ <h2 class="window-title">
<div>
<label for="lf_title">{'Title'|t}</label>
</div>
<div>
<input type="text" name="lf_title" id="lf_title" value="{$link.title}" class="lf_input autofocus">
<div class="{$asyncLoadClass}">
<input type="text" name="lf_title" id="lf_title" value="{$link.title}"
class="lf_input {if="!$async_metadata"}autofocus{/if}"
>
<div class="icon-container">
<i class="loader"></i>
</div>
</div>
<div>
<label for="lf_description">{'Description'|t}</label>
</div>
<div>
<div class="{if="$retrieve_description"}{$asyncLoadClass}{/if}">
<textarea name="lf_description" id="lf_description" class="autofocus">{$link.description}</textarea>
<div class="icon-container">
<i class="loader"></i>
</div>
</div>
<div>
<label for="lf_tags">{'Tags'|t}</label>
</div>
<div>
<div class="{if="$retrieve_description"}{$asyncLoadClass}{/if}">
<input type="text" name="lf_tags" id="lf_tags" value="{$link.tags}" class="lf_input autofocus"
data-list="{loop="$tags"}{$key}, {/loop}" data-multiple data-autofirst autocomplete="off" >
<div class="icon-container">
<i class="loader"></i>
</div>
</div>
<div>
@ -88,5 +101,6 @@ <h2 class="window-title">
</form>
</div>
{include="page.footer"}
{if="$link_is_new && $async_metadata"}<script src="{$asset_path}/js/metadata.min.js?v={$version_hash}#"></script>{/if}
</body>
</html>

View file

@ -20,6 +20,7 @@ module.exports = [
entry: {
thumbnails: './assets/common/js/thumbnails.js',
thumbnails_update: './assets/common/js/thumbnails-update.js',
metadata: './assets/common/js/metadata.js',
pluginsadmin: './assets/default/js/plugins-admin.js',
shaarli: [
'./assets/default/js/base.js',
@ -99,6 +100,7 @@ module.exports = [
].concat(glob.sync('./assets/vintage/img/*')),
markdown: './assets/common/css/markdown.css',
thumbnails: './assets/common/js/thumbnails.js',
metadata: './assets/common/js/metadata.js',
thumbnails_update: './assets/common/js/thumbnails-update.js',
},
output: {

View file

@ -2912,6 +2912,11 @@ hash.js@^1.0.0, hash.js@^1.0.3:
inherits "^2.0.3"
minimalistic-assert "^1.0.1"
he@^1.2.0:
version "1.2.0"
resolved "https://registry.yarnpkg.com/he/-/he-1.2.0.tgz#84ae65fa7eafb165fddb61566ae14baf05664f0f"
integrity sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==
hmac-drbg@^1.0.0:
version "1.0.1"
resolved "https://registry.yarnpkg.com/hmac-drbg/-/hmac-drbg-1.0.1.tgz#d2745701025a6c775a6c545793ed502fc0c649a1"