Support text search across link fields.

This commit is contained in:
ArthurHoaro 2016-02-02 19:42:48 +01:00
parent bedd176a54
commit 522b278b03
4 changed files with 75 additions and 37 deletions

View file

@ -138,6 +138,7 @@ private function filterSmallHash($smallHash)
*/ */
private function filterFulltext($searchterms, $privateonly = false) private function filterFulltext($searchterms, $privateonly = false)
{ {
$filtered = array();
$search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8');
$exactRegex = '/"([^"]+)"/'; $exactRegex = '/"([^"]+)"/';
// Retrieve exact search terms. // Retrieve exact search terms.
@ -169,35 +170,32 @@ private function filterFulltext($searchterms, $privateonly = false)
continue; continue;
} }
// Iterate over searchable link fields. // Concatenate link fields to search across fields.
// Adds a '\' separator for exact search terms.
$content = '';
foreach ($keys as $key) { foreach ($keys as $key) {
// Be optimistic $content .= mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8') . '\\';
$found = true;
$haystack = mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8');
// First, we look for exact term search
for ($i = 0; $i < count($exactSearch) && $found; $i++) {
$found = strpos($haystack, $exactSearch[$i]) !== false;
}
// Iterate over keywords, if keyword is not found,
// no need to check for the others. We want all or nothing.
for ($i = 0; $i < count($andSearch) && $found; $i++) {
$found = strpos($haystack, $andSearch[$i]) !== false;
}
// Exclude terms.
for ($i = 0; $i < count($excludeSearch) && $found; $i++) {
$found = strpos($haystack, $excludeSearch[$i]) === false;
}
// One of the fields of the link matches, no need to check the other.
if ($found) {
break;
}
} }
// Be optimistic
$found = true;
// First, we look for exact term search
for ($i = 0; $i < count($exactSearch) && $found; $i++) {
$found = strpos($content, $exactSearch[$i]) !== false;
}
// Iterate over keywords, if keyword is not found,
// no need to check for the others. We want all or nothing.
for ($i = 0; $i < count($andSearch) && $found; $i++) {
$found = strpos($content, $andSearch[$i]) !== false;
}
// Exclude terms.
for ($i = 0; $i < count($excludeSearch) && $found; $i++) {
$found = strpos($content, $excludeSearch[$i]) === false;
}
if ($found) { if ($found) {
$filtered[$link['linkdate']] = $link; $filtered[$link['linkdate']] = $link;
} }

View file

@ -298,6 +298,7 @@ public function testAllTags()
'w3c' => 1, 'w3c' => 1,
'css' => 1, 'css' => 1,
'Mercurial' => 1, 'Mercurial' => 1,
'stuff' => 2,
'-exclude' => 1, '-exclude' => 1,
'.hidden' => 1, '.hidden' => 1,
), ),

View file

@ -164,6 +164,17 @@ public function testFilterUnknownSmallHash()
); );
} }
/**
* Full-text search - no result found.
*/
public function testFilterFullTextNoResult()
{
$this->assertEquals(
0,
count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, 'azertyuiop'))
);
}
/** /**
* Full-text search - result from a link's URL * Full-text search - result from a link's URL
*/ */
@ -262,28 +273,56 @@ public function testExcludeSearch()
{ {
$this->assertEquals( $this->assertEquals(
1, 1,
count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, 'free -software')) count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, 'free -gnu'))
); );
$this->assertEquals( $this->assertEquals(
7, 6,
count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '-software')) count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '-revolution'))
); );
} }
/** /**
* Full-text search - test AND, exact terms and exclusion combined. * Full-text search - test AND, exact terms and exclusion combined, across fields.
*/ */
public function testMultiSearch() public function testMultiSearch()
{ {
$this->assertEquals( $this->assertEquals(
2, 2,
count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '"Free Software " stallman "read this"')) count(self::$linkFilter->filter(
LinkFilter::$FILTER_TEXT,
'"Free Software " stallman "read this" @website stuff'
))
); );
$this->assertEquals( $this->assertEquals(
1, 1,
count(self::$linkFilter->filter(LinkFilter::$FILTER_TEXT, '"free software " stallman "read this" -beard')) count(self::$linkFilter->filter(
LinkFilter::$FILTER_TEXT,
'"free software " stallman "read this" -beard @website stuff'
))
);
}
/**
* Full-text search - make sure that exact search won't work across fields.
*/
public function testSearchExactTermMultiFieldsKo()
{
$this->assertEquals(
0,
count(self::$linkFilter->filter(
LinkFilter::$FILTER_TEXT,
'"designer naming"'
))
);
$this->assertEquals(
0,
count(self::$linkFilter->filter(
LinkFilter::$FILTER_TEXT,
'"designernaming"'
))
); );
} }

View file

@ -14,21 +14,21 @@ class ReferenceLinkDB
function __construct() function __construct()
{ {
$this->addLink( $this->addLink(
'Free as in Freedom 2.0', 'Free as in Freedom 2.0 @website',
'https://static.fsf.org/nosvn/faif-2.0.pdf', 'https://static.fsf.org/nosvn/faif-2.0.pdf',
'Richard Stallman and the Free Software Revolution. Read this.', 'Richard Stallman and the Free Software Revolution. Read this.',
0, 0,
'20150310_114633', '20150310_114633',
'free gnu software stallman -exclude' 'free gnu software stallman -exclude stuff'
); );
$this->addLink( $this->addLink(
'Note:', 'Link title: @website',
'local', 'local',
'Stallman has a beard and is part of the Free Software Foundation (or not). Seriously, read this.', 'Stallman has a beard and is part of the Free Software Foundation (or not). Seriously, read this.',
0, 0,
'20150310_114651', '20150310_114651',
'' 'stuff'
); );
$this->addLink( $this->addLink(