Support search highlights when matching URL content

DefaultFormatter:
  - format 'a' tag content and not href attribute
  - format hashtags properly
Markdown(Extra)Formatter:
  - Extend Parsedown to format highlight properly: https://github.com/erusev/parsedown/wiki/Tutorial:-Create-Extensions

Fixes #1681
This commit is contained in:
ArthurHoaro 2021-01-19 17:49:19 +01:00
parent ffa39719a1
commit 9ef8555ad2
10 changed files with 173 additions and 14 deletions

View file

@ -1,6 +1,7 @@
<?php
use Shaarli\Bookmark\Bookmark;
use Shaarli\Formatter\BookmarkDefaultFormatter;
/**
* Extract title from an HTML document.
@ -98,7 +99,18 @@ function html_extract_tag($tag, $html)
function text2clickable($text)
{
$regex = '!(((?:https?|ftp|file)://|apt:|magnet:)\S+[a-z0-9\(\)]/?)!si';
return preg_replace($regex, '<a href="$1">$1</a>', $text);
$format = function (array $match): string {
return '<a href="' .
str_replace(
BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_OPEN,
'',
str_replace(BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_CLOSE, '', $match[1])
) .
'">' . $match[1] . '</a>'
;
};
return preg_replace_callback($regex, $format, $text);
}
/**
@ -111,6 +123,9 @@ function text2clickable($text)
*/
function hashtag_autolink($description, $indexUrl = '')
{
$tokens = '(?:' . BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_OPEN . ')' .
'(?:' . BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_CLOSE . ')'
;
/*
* To support unicode: http://stackoverflow.com/a/35498078/1484919
* \p{Pc} - to match underscore
@ -118,9 +133,20 @@ function hashtag_autolink($description, $indexUrl = '')
* \p{L} - letter from any language
* \p{Mn} - any non marking space (accents, umlauts, etc)
*/
$regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui';
$replacement = '$1<a href="' . $indexUrl . './add-tag/$2" title="Hashtag $2">#$2</a>';
return preg_replace($regex, $replacement, $description);
$regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}' . $tokens . ']+)/mui';
$format = function (array $match) use ($indexUrl): string {
$cleanMatch = str_replace(
BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_OPEN,
'',
str_replace(BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_CLOSE, '', $match[2])
);
return $match[1] . '<a href="' . $indexUrl . './add-tag/' . $cleanMatch . '"' .
' title="Hashtag ' . $cleanMatch . '">' .
'#' . $match[2] .
'</a>';
};
return preg_replace_callback($regex, $format, $description);
}
/**

View file

@ -12,8 +12,8 @@
*/
class BookmarkDefaultFormatter extends BookmarkFormatter
{
protected const SEARCH_HIGHLIGHT_OPEN = '|@@HIGHLIGHT';
protected const SEARCH_HIGHLIGHT_CLOSE = 'HIGHLIGHT@@|';
public const SEARCH_HIGHLIGHT_OPEN = '||O_HIGHLIGHT';
public const SEARCH_HIGHLIGHT_CLOSE = '||C_HIGHLIGHT';
/**
* @inheritdoc

View file

@ -3,6 +3,7 @@
namespace Shaarli\Formatter;
use Shaarli\Config\ConfigManager;
use Shaarli\Formatter\Parsedown\ShaarliParsedownExtra;
/**
* Class BookmarkMarkdownExtraFormatter
@ -18,7 +19,6 @@ class BookmarkMarkdownExtraFormatter extends BookmarkMarkdownFormatter
public function __construct(ConfigManager $conf, bool $isLoggedIn)
{
parent::__construct($conf, $isLoggedIn);
$this->parsedown = new \ParsedownExtra();
$this->parsedown = new ShaarliParsedownExtra();
}
}

View file

@ -3,6 +3,7 @@
namespace Shaarli\Formatter;
use Shaarli\Config\ConfigManager;
use Shaarli\Formatter\Parsedown\ShaarliParsedown;
/**
* Class BookmarkMarkdownFormatter
@ -42,7 +43,7 @@ public function __construct(ConfigManager $conf, bool $isLoggedIn)
{
parent::__construct($conf, $isLoggedIn);
$this->parsedown = new \Parsedown();
$this->parsedown = new ShaarliParsedown();
$this->escape = $conf->get('security.markdown_escape', true);
$this->allowedProtocols = $conf->get('security.allowed_protocols', []);
}
@ -128,6 +129,9 @@ function ($match) use ($allowedProtocols, $indexUrl) {
protected function formatHashTags($description)
{
$indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : '';
$tokens = '(?:' . BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_OPEN . ')' .
'(?:' . BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_CLOSE . ')'
;
/*
* To support unicode: http://stackoverflow.com/a/35498078/1484919
@ -136,8 +140,15 @@ protected function formatHashTags($description)
* \p{L} - letter from any language
* \p{Mn} - any non marking space (accents, umlauts, etc)
*/
$regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui';
$replacement = '$1[#$2](' . $indexUrl . './add-tag/$2)';
$regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}' . $tokens . ']+)/mui';
$replacement = function (array $match) use ($indexUrl): string {
$cleanMatch = str_replace(
BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_OPEN,
'',
str_replace(BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_CLOSE, '', $match[2])
);
return $match[1] . '[#' . $match[2] . '](' . $indexUrl . './add-tag/' . $cleanMatch . ')';
};
$descriptionLines = explode(PHP_EOL, $description);
$descriptionOut = '';
@ -156,7 +167,7 @@ protected function formatHashTags($description)
}
if (!$codeBlockOn && !$codeLineOn) {
$descriptionLine = preg_replace($regex, $replacement, $descriptionLine);
$descriptionLine = preg_replace_callback($regex, $replacement, $descriptionLine);
}
$descriptionOut .= $descriptionLine;

View file

@ -0,0 +1,10 @@
<?php
declare(strict_types=1);
namespace Shaarli\Formatter\Parsedown;
class ShaarliParsedown extends \Parsedown
{
use ShaarliParsedownTrait;
}

View file

@ -0,0 +1,10 @@
<?php
declare(strict_types=1);
namespace Shaarli\Formatter\Parsedown;
class ShaarliParsedownExtra extends \ParsedownExtra
{
use ShaarliParsedownTrait;
}

View file

@ -0,0 +1,50 @@
<?php
declare(strict_types=1);
namespace Shaarli\Formatter\Parsedown;
use Shaarli\Formatter\BookmarkDefaultFormatter as Formatter;
trait ShaarliParsedownTrait
{
protected function inlineLink($excerpt)
{
return $this->shaarliFormatLink(parent::inlineLink($excerpt), true);
}
protected function inlineUrl($excerpt)
{
return $this->shaarliFormatLink(parent::inlineUrl($excerpt), false);
}
protected function shaarliFormatLink(?array $link, bool $fullWrap): ?array
{
if (
is_array($link)
&& strpos($link['element']['attributes']['href'], Formatter::SEARCH_HIGHLIGHT_OPEN) !== false
&& strpos($link['element']['attributes']['href'], Formatter::SEARCH_HIGHLIGHT_CLOSE) !== false
) {
$link['element']['attributes']['href'] = $this->shaarliRemoveSearchTokens(
$link['element']['attributes']['href']
);
if ($fullWrap) {
$link['element']['text'] = Formatter::SEARCH_HIGHLIGHT_OPEN .
$link['element']['text'] .
Formatter::SEARCH_HIGHLIGHT_CLOSE
;
}
}
return $link;
}
protected function shaarliRemoveSearchTokens(string $entry): string
{
$entry = str_replace(Formatter::SEARCH_HIGHLIGHT_OPEN, '', $entry);
$entry = str_replace(Formatter::SEARCH_HIGHLIGHT_CLOSE, '', $entry);
return $entry;
}
}

View file

@ -33,6 +33,7 @@ public function index(Request $request, Response $response): Response
$formatter = $this->container->formatterFactory->getFormatter();
$formatter->addContextData('base_path', $this->container->basePath);
$formatter->addContextData('index_url', index_url($this->container->environment));
$searchTags = normalize_spaces($request->getParam('searchtags') ?? '');
$searchTerm = escape(normalize_spaces($request->getParam('searchterm') ?? ''));
@ -157,6 +158,7 @@ public function permalink(Request $request, Response $response, array $args): Re
$formatter = $this->container->formatterFactory->getFormatter();
$formatter->addContextData('base_path', $this->container->basePath);
$formatter->addContextData('index_url', index_url($this->container->environment));
$data = array_merge(
$this->initializeTemplateVars(),

View file

@ -211,13 +211,17 @@ public function testFormatDescriptionWithSearchHighlight(): void
$this->formatter = new BookmarkDefaultFormatter($this->conf, false);
$bookmark = new Bookmark();
$bookmark->setDescription('This guide extends and expands on PSR-1, the basic coding standard.');
$bookmark->setDescription(
'This guide extends and expands on PSR-1, the basic coding standard.' . PHP_EOL .
'https://www.php-fig.org/psr/psr-1/'
);
$bookmark->addAdditionalContentEntry(
'search_highlight',
['description' => [
['start' => 0, 'end' => 10], // "This guide"
['start' => 45, 'end' => 50], // basic
['start' => 58, 'end' => 67], // standard.
['start' => 84, 'end' => 87], // fig
]]
);
@ -226,7 +230,10 @@ public function testFormatDescriptionWithSearchHighlight(): void
$this->assertSame(
'<span class="search-highlight">This guide</span> extends and expands on PSR-1, the ' .
'<span class="search-highlight">basic</span> coding ' .
'<span class="search-highlight">standard.</span>',
'<span class="search-highlight">standard.</span><br />' . PHP_EOL .
'<a href="https://www.php-fig.org/psr/psr-1/">' .
'https://www.php-<span class="search-highlight">fig</span>.org/psr/psr-1/' .
'</a>',
$link['description']
);
}

View file

@ -132,6 +132,49 @@ public function testFormatDescription()
$this->assertEquals($description, $link['description']);
}
/**
* Make sure that the description is properly formatted by the default formatter.
*/
public function testFormatDescriptionWithSearchHighlight()
{
$description = 'This a <strong>description</strong>'. PHP_EOL;
$description .= 'text https://sub.domain.tld?query=here&for=real#hash more text'. PHP_EOL;
$description .= 'Also, there is an #hashtag added'. PHP_EOL;
$description .= ' A N D KEEP SPACES ! '. PHP_EOL;
$description .= 'And [yet another link](https://other.domain.tld)'. PHP_EOL;
$bookmark = new Bookmark();
$bookmark->setDescription($description);
$bookmark->addAdditionalContentEntry(
'search_highlight',
['description' => [
['start' => 18, 'end' => 26], // cription
['start' => 49, 'end' => 52], // sub
['start' => 84, 'end' => 88], // hash
['start' => 118, 'end' => 123], // hasht
['start' => 203, 'end' => 215], // other.domain
]]
);
$link = $this->formatter->format($bookmark);
$description = '<div class="markdown"><p>';
$description .= 'This a &lt;strong&gt;des<span class="search-highlight">cription</span>&lt;/strong&gt;<br />' .
PHP_EOL;
$url = 'https://sub.domain.tld?query=here&amp;for=real#hash';
$highlighted = 'https://<span class="search-highlight">sub</span>.domain.tld';
$highlighted .= '?query=here&amp;for=real#<span class="search-highlight">hash</span>';
$description .= 'text <a href="'. $url .'">'. $highlighted .'</a> more text<br />'. PHP_EOL;
$description .= 'Also, there is an <a href="./add-tag/hashtag">#<span class="search-highlight">hasht</span>' .
'ag</a> added<br />'. PHP_EOL;
$description .= 'A N D KEEP SPACES !<br />' . PHP_EOL;
$description .= 'And <a href="https://other.domain.tld">' .
'<span class="search-highlight">yet another link</span></a>';
$description .= '</p></div>';
$this->assertEquals($description, $link['description']);
}
/**
* Test formatting URL with an index_url set
* It should prepend relative links.