Hashtag system

* Hashtag are auto-linked with a filter search
  * Supports unicode
  * Compatible with markdown (excluded in code blocks)
This commit is contained in:
ArthurHoaro 2016-05-10 23:18:04 +02:00
parent bb9ca54838
commit 9ccca40189
10 changed files with 271 additions and 104 deletions

View file

@ -409,7 +409,7 @@ You use the community supported version of the original Shaarli project, by Seba
$searchterm = !empty($filterRequest['searchterm']) ? escape($filterRequest['searchterm']) : '';
// Search tags + fullsearch.
if (empty($type) && ! empty($searchtags) && ! empty($searchterm)) {
if (! empty($searchtags) && ! empty($searchterm)) {
$type = LinkFilter::$FILTER_TAG | LinkFilter::$FILTER_TEXT;
$request = array($searchtags, $searchterm);
}

View file

@ -27,6 +27,11 @@ class LinkFilter
*/
public static $FILTER_DAY = 'FILTER_DAY';
/**
* @var string Allowed characters for hashtags (regex syntax).
*/
public static $HASHTAG_CHARS = '\p{Pc}\p{N}\p{L}\p{Mn}';
/**
* @var array all available links.
*/
@ -263,8 +268,10 @@ class LinkFilter
for ($i = 0 ; $i < count($searchtags) && $found; $i++) {
// Exclusive search, quit if tag found.
// Or, tag not found in the link, quit.
if (($searchtags[$i][0] == '-' && in_array(substr($searchtags[$i], 1), $linktags))
|| ($searchtags[$i][0] != '-') && ! in_array($searchtags[$i], $linktags)
if (($searchtags[$i][0] == '-'
&& $this->searchTagAndHashTag(substr($searchtags[$i], 1), $linktags, $link['description']))
|| ($searchtags[$i][0] != '-')
&& ! $this->searchTagAndHashTag($searchtags[$i], $linktags, $link['description'])
) {
$found = false;
}
@ -306,6 +313,28 @@ class LinkFilter
return $filtered;
}
/**
* Check if a tag is found in the taglist, or as an hashtag in the link description.
*
* @param string $tag Tag to search.
* @param array $taglist List of tags for the current link.
* @param string $description Link description.
*
* @return bool True if found, false otherwise.
*/
protected function searchTagAndHashTag($tag, $taglist, $description)
{
if (in_array($tag, $taglist)) {
return true;
}
if (preg_match('/(^| )#'. $tag .'([^'. self::$HASHTAG_CHARS .']|$)/mui', $description) > 0) {
return true;
}
return false;
}
/**
* Convert a list of tags (str) to an array. Also
* - handle case sensitivity.

View file

@ -91,5 +91,80 @@ function count_private($links)
foreach ($links as $link) {
$cpt = $link['private'] == true ? $cpt + 1 : $cpt;
}
return $cpt;
}
/**
* In a string, converts URLs to clickable links.
*
* @param string $text input string.
* @param string $redirector if a redirector is set, use it to gerenate links.
*
* @return string returns $text with all links converted to HTML links.
*
* @see Function inspired from http://www.php.net/manual/en/function.preg-replace.php#85722
*/
function text2clickable($text, $redirector = '')
{
$regex = '!(((?:https?|ftp|file)://|apt:|magnet:)\S+[[:alnum:]]/?)!si';
if (empty($redirector)) {
return preg_replace($regex, '<a href="$1">$1</a>', $text);
}
// Redirector is set, urlencode the final URL.
return preg_replace_callback(
$regex,
function ($matches) use ($redirector) {
return '<a href="' . $redirector . urlencode($matches[1]) .'">'. $matches[1] .'</a>';
},
$text
);
}
/**
* Auto-link hashtags.
*
* @param string $description Given description.
* @param string $indexUrl Root URL.
*
* @return string Description with auto-linked hashtags.
*/
function hashtag_autolink($description, $indexUrl = '')
{
/*
* To support unicode: http://stackoverflow.com/a/35498078/1484919
* \p{Pc} - to match underscore
* \p{N} - numeric character in any script
* \p{L} - letter from any language
* \p{Mn} - any non marking space (accents, umlauts, etc)
*/
$regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui';
$replacement = '$1<a href="'. $indexUrl .'?addtag=$2" title="Hashtag $2">#$2</a>';
return preg_replace($regex, $replacement, $description);
}
/**
* This function inserts &nbsp; where relevant so that multiple spaces are properly displayed in HTML
* even in the absence of <pre> (This is used in description to keep text formatting).
*
* @param string $text input text.
*
* @return string formatted text.
*/
function space2nbsp($text)
{
return preg_replace('/(^| ) /m', '$1&nbsp;', $text);
}
/**
* Format Shaarli's description
*
* @param string $description shaare's description.
* @param string $redirector if a redirector is set, use it to gerenate links.
*
* @return string formatted description.
*/
function format_description($description, $redirector = '', $indexUrl = '') {
return nl2br(space2nbsp(hashtag_autolink(text2clickable($description, $redirector), $indexUrl)));
}

View file

@ -197,59 +197,6 @@ function is_session_id_valid($sessionId)
return true;
}
/**
* In a string, converts URLs to clickable links.
*
* @param string $text input string.
* @param string $redirector if a redirector is set, use it to gerenate links.
*
* @return string returns $text with all links converted to HTML links.
*
* @see Function inspired from http://www.php.net/manual/en/function.preg-replace.php#85722
*/
function text2clickable($text, $redirector)
{
$regex = '!(((?:https?|ftp|file)://|apt:|magnet:)\S+[[:alnum:]]/?)!si';
if (empty($redirector)) {
return preg_replace($regex, '<a href="$1">$1</a>', $text);
}
// Redirector is set, urlencode the final URL.
return preg_replace_callback(
$regex,
function ($matches) use ($redirector) {
return '<a href="' . $redirector . urlencode($matches[1]) .'">'. $matches[1] .'</a>';
},
$text
);
}
/**
* This function inserts &nbsp; where relevant so that multiple spaces are properly displayed in HTML
* even in the absence of <pre> (This is used in description to keep text formatting).
*
* @param string $text input text.
*
* @return string formatted text.
*/
function space2nbsp($text)
{
return preg_replace('/(^| ) /m', '$1&nbsp;', $text);
}
/**
* Format Shaarli's description
* TODO: Move me to ApplicationUtils when it's ready.
*
* @param string $description shaare's description.
* @param string $redirector if a redirector is set, use it to gerenate links.
*
* @return string formatted description.
*/
function format_description($description, $redirector = false) {
return nl2br(space2nbsp(text2clickable($description, $redirector)));
}
/**
* Sniff browser language to set the locale automatically.
* Note that is may not work on your server if the corresponding locale is not installed.
@ -273,4 +220,4 @@ function autoLocale($headerLocale)
}
}
setlocale(LC_ALL, $attempts);
}
}

View file

@ -151,7 +151,44 @@ function hook_markdown_render_editlink($data)
*/
function reverse_text2clickable($description)
{
return preg_replace('!<a +href="([^ ]*)">[^ ]+</a>!m', '$1', $description);
$descriptionLines = explode(PHP_EOL, $description);
$descriptionOut = '';
$codeBlockOn = false;
$lineCount = 0;
foreach ($descriptionLines as $descriptionLine) {
// Detect line of code
$codeLineOn = preg_match('/^ /', $descriptionLine) > 0;
// Detect and toggle block of code
if (!$codeBlockOn) {
$codeBlockOn = preg_match('/^```/', $descriptionLine) > 0;
}
elseif (preg_match('/^```/', $descriptionLine) > 0) {
$codeBlockOn = false;
}
$hashtagTitle = ' title="Hashtag [^"]+"';
// Reverse `inline code` hashtags.
$descriptionLine = preg_replace(
'!(`[^`\n]*)<a href="[^ ]*"'. $hashtagTitle .'>([^<]+)</a>([^`\n]*`)!m',
'$1$2$3',
$descriptionLine
);
// Reverse hashtag links if we're in a code block.
$hashtagFilter = ($codeBlockOn || $codeLineOn) ? $hashtagTitle : '';
$descriptionLine = preg_replace(
'!<a href="[^ ]*"'. $hashtagFilter .'>([^<]+)</a>!m',
'$1',
$descriptionLine
);
$descriptionOut .= $descriptionLine;
if ($lineCount++ < count($descriptionLines) - 1) {
$descriptionOut .= PHP_EOL;
}
}
return $descriptionOut;
}
/**
@ -226,9 +263,9 @@ function process_markdown($description)
$parsedown = new Parsedown();
$processedDescription = $description;
$processedDescription = reverse_text2clickable($processedDescription);
$processedDescription = reverse_nl2br($processedDescription);
$processedDescription = reverse_space2nbsp($processedDescription);
$processedDescription = reverse_text2clickable($processedDescription);
$processedDescription = unescape($processedDescription);
$processedDescription = $parsedown
->setMarkupEscaped(false)

View file

@ -256,7 +256,7 @@ class LinkDBTest extends PHPUnit_Framework_TestCase
$link = self::$publicLinkDB->getLinkFromUrl('http://mediagoblin.org/');
$this->assertNotEquals(false, $link);
$this->assertEquals(
$this->assertContains(
'A free software media publishing platform',
$link['description']
);
@ -293,6 +293,7 @@ class LinkDBTest extends PHPUnit_Framework_TestCase
// The DB contains a link with `sTuff` and another one with `stuff` tag.
// They need to be grouped with the first case found (`sTuff`).
'sTuff' => 2,
'hashtag' => 2,
),
self::$publicLinkDB->allTags()
);
@ -315,6 +316,7 @@ class LinkDBTest extends PHPUnit_Framework_TestCase
'sTuff' => 2,
'-exclude' => 1,
'.hidden' => 1,
'hashtag' => 2,
),
self::$privateLinkDB->allTags()
);

View file

@ -387,4 +387,30 @@ class LinkFilterTest extends PHPUnit_Framework_TestCase
))
);
}
/**
* Filter links by #hashtag.
*/
public function testFilterByHashtag()
{
$hashtag = 'hashtag';
$this->assertEquals(
3,
count(self::$linkFilter->filter(
LinkFilter::$FILTER_TAG,
$hashtag
))
);
$hashtag = 'private';
$this->assertEquals(
1,
count(self::$linkFilter->filter(
LinkFilter::$FILTER_TAG,
$hashtag,
false,
true
))
);
}
}

View file

@ -93,4 +93,92 @@ class LinkUtilsTest extends PHPUnit_Framework_TestCase
$refDB = new ReferenceLinkDB();
$this->assertEquals($refDB->countPrivateLinks(), count_private($refDB->getLinks()));
}
/**
* Test text2clickable without a redirector being set.
*/
public function testText2clickableWithoutRedirector()
{
$text = 'stuff http://hello.there/is=someone#here otherstuff';
$expectedText = 'stuff <a href="http://hello.there/is=someone#here">http://hello.there/is=someone#here</a> otherstuff';
$processedText = text2clickable($text, '');
$this->assertEquals($expectedText, $processedText);
}
/**
* Test text2clickable a redirector set.
*/
public function testText2clickableWithRedirector()
{
$text = 'stuff http://hello.there/is=someone#here otherstuff';
$redirector = 'http://redirector.to';
$expectedText = 'stuff <a href="'.
$redirector .
urlencode('http://hello.there/is=someone#here') .
'">http://hello.there/is=someone#here</a> otherstuff';
$processedText = text2clickable($text, $redirector);
$this->assertEquals($expectedText, $processedText);
}
/**
* Test testSpace2nbsp.
*/
public function testSpace2nbsp()
{
$text = ' Are you thrilled by flags ?'. PHP_EOL .' Really?';
$expectedText = '&nbsp; Are you &nbsp; thrilled &nbsp;by flags &nbsp; ?'. PHP_EOL .'&nbsp;Really?';
$processedText = space2nbsp($text);
$this->assertEquals($expectedText, $processedText);
}
/**
* Test hashtags auto-link.
*/
public function testHashtagAutolink()
{
$index = 'http://domain.tld/';
$rawDescription = '#hashtag\n
# nothashtag\n
test#nothashtag #hashtag \#nothashtag\n
test #hashtag #hashtag test #hashtag.test\n
#hashtag #hashtag-nothashtag #hashtag_hashtag\n
What is #ашок anyway?\n
カタカナ #カタカナ」カタカナ\n';
$autolinkedDescription = hashtag_autolink($rawDescription, $index);
$this->assertContains($this->getHashtagLink('hashtag', $index), $autolinkedDescription);
$this->assertNotContains(' #hashtag', $autolinkedDescription);
$this->assertNotContains('>#nothashtag', $autolinkedDescription);
$this->assertContains($this->getHashtagLink('ашок', $index), $autolinkedDescription);
$this->assertContains($this->getHashtagLink('カタカナ', $index), $autolinkedDescription);
$this->assertContains($this->getHashtagLink('hashtag_hashtag', $index), $autolinkedDescription);
$this->assertNotContains($this->getHashtagLink('hashtag-nothashtag', $index), $autolinkedDescription);
}
/**
* Test hashtags auto-link without index URL.
*/
public function testHashtagAutolinkNoIndex()
{
$rawDescription = 'blabla #hashtag x#nothashtag';
$autolinkedDescription = hashtag_autolink($rawDescription);
$this->assertContains($this->getHashtagLink('hashtag'), $autolinkedDescription);
$this->assertNotContains(' #hashtag', $autolinkedDescription);
$this->assertNotContains('>#nothashtag', $autolinkedDescription);
}
/**
* Util function to build an hashtag link.
*
* @param string $hashtag Hashtag name.
* @param string $index Index URL.
*
* @return string HTML hashtag link.
*/
private function getHashtagLink($hashtag, $index = '')
{
$hashtagLink = '<a href="'. $index .'?addtag=$1" title="Hashtag $1">#$1</a>';
return str_replace('$1', $hashtag, $hashtagLink);
}
}

View file

@ -253,41 +253,4 @@ class UtilsTest extends PHPUnit_Framework_TestCase
is_session_id_valid('c0ZqcWF3VFE2NmJBdm1HMVQ0ZHJ3UmZPbTFsNGhkNHI=')
);
}
/**
* Test text2clickable without a redirector being set.
*/
public function testText2clickableWithoutRedirector()
{
$text = 'stuff http://hello.there/is=someone#here otherstuff';
$expectedText = 'stuff <a href="http://hello.there/is=someone#here">http://hello.there/is=someone#here</a> otherstuff';
$processedText = text2clickable($text, '');
$this->assertEquals($expectedText, $processedText);
}
/**
* Test text2clickable a redirector set.
*/
public function testText2clickableWithRedirector()
{
$text = 'stuff http://hello.there/is=someone#here otherstuff';
$redirector = 'http://redirector.to';
$expectedText = 'stuff <a href="'.
$redirector .
urlencode('http://hello.there/is=someone#here') .
'">http://hello.there/is=someone#here</a> otherstuff';
$processedText = text2clickable($text, $redirector);
$this->assertEquals($expectedText, $processedText);
}
/**
* Test testSpace2nbsp.
*/
public function testSpace2nbsp()
{
$text = ' Are you thrilled by flags ?'. PHP_EOL .' Really?';
$expectedText = '&nbsp; Are you &nbsp; thrilled &nbsp;by flags &nbsp; ?'. PHP_EOL .'&nbsp;Really?';
$processedText = space2nbsp($text);
$this->assertEquals($expectedText, $processedText);
}
}

View file

@ -18,7 +18,7 @@ class ReferenceLinkDB
$this->addLink(
'Link title: @website',
'?WDWyig',
'Stallman has a beard and is part of the Free Software Foundation (or not). Seriously, read this.',
'Stallman has a beard and is part of the Free Software Foundation (or not). Seriously, read this. #hashtag',
0,
'20150310_114651',
'sTuff'
@ -27,25 +27,25 @@ class ReferenceLinkDB
$this->addLink(
'Free as in Freedom 2.0 @website',
'https://static.fsf.org/nosvn/faif-2.0.pdf',
'Richard Stallman and the Free Software Revolution. Read this.',
'Richard Stallman and the Free Software Revolution. Read this. #hashtag',
0,
'20150310_114633',
'free gnu software stallman -exclude stuff'
'free gnu software stallman -exclude stuff hashtag'
);
$this->addLink(
'MediaGoblin',
'http://mediagoblin.org/',
'A free software media publishing platform',
'A free software media publishing platform #hashtagOther',
0,
'20130614_184135',
'gnu media web .hidden'
'gnu media web .hidden hashtag'
);
$this->addLink(
'w3c-markup-validator',
'https://dvcs.w3.org/hg/markup-validator/summary',
'Mercurial repository for the W3C Validator',
'Mercurial repository for the W3C Validator #private',
1,
'20141125_084734',
'css html w3c web Mercurial'
@ -54,7 +54,7 @@ class ReferenceLinkDB
$this->addLink(
'UserFriendly - Web Designer',
'http://ars.userfriendly.org/cartoons/?id=20121206',
'Naming conventions...',
'Naming conventions... #private',
0,
'20121206_142300',
'dev cartoon web'