From 341527bae96e0d1c6a0cb5dfc790eacb3ef58bf8 Mon Sep 17 00:00:00 2001 From: Willi Eggeling Date: Sat, 26 Aug 2017 23:05:02 +0200 Subject: [PATCH] wildcard tag search support - when searching for tags you can now include '*' as wildcard placeholder - new search reduces overall overhead when filtering for tags - fixed combination with description tag search ('#' prefix) - tests added --- application/LinkFilter.php | 131 ++++++++++++++++--------- tests/api/controllers/GetLinksTest.php | 83 ++++++++++++++++ 2 files changed, 167 insertions(+), 47 deletions(-) diff --git a/application/LinkFilter.php b/application/LinkFilter.php index 9551952..99ecd1e 100644 --- a/application/LinkFilter.php +++ b/application/LinkFilter.php @@ -249,6 +249,51 @@ class LinkFilter return $filtered; } + /** + * generate a regex fragment out of a tag + * @param string $tag to to generate regexs from. may start with '-' to negate, contain '*' as wildcard + * @return string generated regex fragment + */ + private static function tag2regex($tag) + { + $len = strlen($tag); + if(!$len || $tag === "-" || $tag === "*"){ + // nothing to search, return empty regex + return ''; + } + if($tag[0] === "-") { + // query is negated + $i = 1; // use offset to start after '-' character + $regex = '(?!'; // create negative lookahead + } else { + $i = 0; // start at first character + $regex = '(?='; // use positive lookahead + } + $regex .= '.*(?:^| )'; // before tag may only be a space or the beginning + // iterate over string, separating it into placeholder and content + for(; $i < $len; $i++){ + if($tag[$i] === '*'){ + // placeholder found + $regex .= '[^ ]*?'; + } else { + // regular characters + $offset = strpos($tag, '*', $i); + if($offset === false){ + // no placeholder found, set offset to end of string + $offset = $len; + } + // subtract one, as we want to get before the placeholder or end of string + $offset -= 1; + // we got a tag name that we want to search for. escape any regex characters to prevent conflicts. + $regex .= preg_quote(substr($tag, $i, $offset - $i + 1), '/'); + // move $i on + $i = $offset; + } + } + $regex .= '(?:$| ))'; // after the tag may only be a space or the end + return $regex; + } + /** * Returns the list of links associated with a given list of tags * @@ -263,20 +308,32 @@ class LinkFilter */ public function filterTags($tags, $casesensitive = false, $visibility = 'all') { - // Implode if array for clean up. - $tags = is_array($tags) ? trim(implode(' ', $tags)) : $tags; - if (empty($tags)) { + // get single tags (we may get passed an array, even though the docs say different) + $inputTags = $tags; + if(!is_array($tags)) { + // we got an input string, split tags + $inputTags = preg_split('/(?:\s+)|,/', $inputTags, -1, PREG_SPLIT_NO_EMPTY); + } + + if(!count($inputTags)){ + // no input tags return $this->noFilter($visibility); } - $searchtags = self::tagsStrToArray($tags, $casesensitive); - $filtered = array(); - if (empty($searchtags)) { - return $filtered; + // build regex from all tags + $re = '/^' . implode(array_map("self::tag2regex", $inputTags)) . '.*$/'; + if(!$casesensitive) { + // make regex case insensitive + $re .= 'i'; } + // create resulting array + $filtered = array(); + + // iterate over each link foreach ($this->links as $key => $link) { - // ignore non private links when 'privatonly' is on. + // check level of visibility + // ignore non private links when 'privateonly' is on. if ($visibility !== 'all') { if (! $link['private'] && $visibility === 'private') { continue; @@ -284,25 +341,27 @@ class LinkFilter continue; } } - - $linktags = self::tagsStrToArray($link['tags'], $casesensitive); - - $found = true; - for ($i = 0 ; $i < count($searchtags) && $found; $i++) { - // Exclusive search, quit if tag found. - // Or, tag not found in the link, quit. - if (($searchtags[$i][0] == '-' - && $this->searchTagAndHashTag(substr($searchtags[$i], 1), $linktags, $link['description'])) - || ($searchtags[$i][0] != '-') - && ! $this->searchTagAndHashTag($searchtags[$i], $linktags, $link['description']) - ) { - $found = false; + $search = $link['tags']; // build search string, start with tags of current link + if(strlen(trim($link['description'])) && strpos($link['description'], '#') !== false){ + // description given and at least one possible tag found + $descTags = array(); + // find all tags in the form of #tag in the description + preg_match_all( + '/(? 0) { - return true; - } - - return false; - } - /** * Convert a list of tags (str) to an array. Also * - handle case sensitivity. diff --git a/tests/api/controllers/GetLinksTest.php b/tests/api/controllers/GetLinksTest.php index 4cb7022..d22ed3b 100644 --- a/tests/api/controllers/GetLinksTest.php +++ b/tests/api/controllers/GetLinksTest.php @@ -367,6 +367,89 @@ class GetLinksTest extends \PHPUnit_Framework_TestCase $this->assertEquals(1, count($data)); $this->assertEquals(41, $data[0]['id']); $this->assertEquals(self::NB_FIELDS_LINK, count($data[0])); + + // wildcard: placeholder at the start + $env = Environment::mock([ + 'REQUEST_METHOD' => 'GET', + 'QUERY_STRING' => 'searchtags=*Tuff', + ]); + $request = Request::createFromEnvironment($env); + $response = $this->controller->getLinks($request, new Response()); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode((string) $response->getBody(), true); + $this->assertEquals(2, count($data)); + $this->assertEquals(41, $data[0]['id']); + + // wildcard: placeholder at the end + $env = Environment::mock([ + 'REQUEST_METHOD' => 'GET', + 'QUERY_STRING' => 'searchtags=c*', + ]); + $request = Request::createFromEnvironment($env); + $response = $this->controller->getLinks($request, new Response()); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode((string) $response->getBody(), true); + $this->assertEquals(4, count($data)); + $this->assertEquals(6, $data[0]['id']); + + // wildcard: placeholder at the middle + $env = Environment::mock([ + 'REQUEST_METHOD' => 'GET', + 'QUERY_STRING' => 'searchtags=w*b', + ]); + $request = Request::createFromEnvironment($env); + $response = $this->controller->getLinks($request, new Response()); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode((string) $response->getBody(), true); + $this->assertEquals(4, count($data)); + $this->assertEquals(6, $data[0]['id']); + + // wildcard: match all + $env = Environment::mock([ + 'REQUEST_METHOD' => 'GET', + 'QUERY_STRING' => 'searchtags=*', + ]); + $request = Request::createFromEnvironment($env); + $response = $this->controller->getLinks($request, new Response()); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode((string) $response->getBody(), true); + $this->assertEquals(9, count($data)); + $this->assertEquals(41, $data[0]['id']); + + // wildcard: optional ('*' does not need to expand) + $env = Environment::mock([ + 'REQUEST_METHOD' => 'GET', + 'QUERY_STRING' => 'searchtags=*stuff*', + ]); + $request = Request::createFromEnvironment($env); + $response = $this->controller->getLinks($request, new Response()); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode((string) $response->getBody(), true); + $this->assertEquals(2, count($data)); + $this->assertEquals(41, $data[0]['id']); + + // wildcard: exclusions + $env = Environment::mock([ + 'REQUEST_METHOD' => 'GET', + 'QUERY_STRING' => 'searchtags=*a*+-*e*', + ]); + $request = Request::createFromEnvironment($env); + $response = $this->controller->getLinks($request, new Response()); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode((string) $response->getBody(), true); + $this->assertEquals(1, count($data)); + $this->assertEquals(41, $data[0]['id']); // finds '#hashtag' in descr. + + // wildcard: exclude all + $env = Environment::mock([ + 'REQUEST_METHOD' => 'GET', + 'QUERY_STRING' => 'searchtags=-*', + ]); + $request = Request::createFromEnvironment($env); + $response = $this->controller->getLinks($request, new Response()); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode((string) $response->getBody(), true); + $this->assertEquals(0, count($data)); } /**