Added support for OR (~) and optional AND (+) operators for tag searches.

This commit is contained in:
Keith Carangelo 2021-09-08 15:26:58 -04:00
parent 6e0f92acb4
commit a7d43caccb
4 changed files with 119 additions and 21 deletions

View file

@ -323,7 +323,6 @@ public function filterTags($tags, bool $casesensitive = false, string $visibilit
// we got an input string, split tags // we got an input string, split tags
$inputTags = tags_str2array($inputTags, $tagsSeparator); $inputTags = tags_str2array($inputTags, $tagsSeparator);
} }
if (count($inputTags) === 0) { if (count($inputTags) === 0) {
// no input tags // no input tags
return $this->noFilter($visibility); return $this->noFilter($visibility);
@ -341,7 +340,20 @@ public function filterTags($tags, bool $casesensitive = false, string $visibilit
} }
// build regex from all tags // build regex from all tags
$re = '/^' . implode(array_map([$this, 'tag2regex'], $inputTags)) . '.*$/'; $re_and = implode(array_map([$this, 'tag2regex'], $inputTags));
$re = '/^' . $re_and;
$orTags = array_filter(array_map( function ($tag) {
return startsWith($tag, '~') ? substr( $tag, 1 ) : null;
}, $inputTags ));
$re_or = implode('|', array_map([$this, 'tag2match_term'], $orTags));
if ($re_or) {
$re_or = '(' . $re_or . ')';
$re .= $this->term2match( $re_or, false );
}
$re .= '.*$/';
if (!$casesensitive) { if (!$casesensitive) {
// make regex case insensitive // make regex case insensitive
$re .= 'i'; $re .= 'i';
@ -460,33 +472,49 @@ public static function tagsStrToArray(string $tags, bool $casesensitive): array
/** /**
* generate a regex fragment out of a tag * generate a regex fragment out of a tag
* *
* @param string $tag to to generate regexs from. may start with '-' to negate, contain '*' as wildcard * @param string $tag to generate regexs from. may start with '-' to negate, contain '*' as wildcard. Tags starting with '~' are treated separately as an 'OR' clause.
* *
* @return string generated regex fragment * @return string generated regex fragment
*/ */
protected function tag2regex(string $tag): string protected function tag2regex(string $tag): string
{ {
$tagsSeparator = $this->conf->get('general.tags_separator', ' '); $tagsSeparator = $this->conf->get('general.tags_separator', ' ');
$len = strlen($tag); if (!$tag || $tag === "-" || $tag === "*" || $tag[0] === "~") {
if (!$len || $tag === "-" || $tag === "*") {
// nothing to search, return empty regex // nothing to search, return empty regex
return ''; return '';
} }
$negate = false;
if ($tag[0] === "+" && $tag[1]) {
$tag = substr( $tag, 1 ); // use offset to start after '+' character
}
if ($tag[0] === "-") { if ($tag[0] === "-") {
// query is negated // query is negated
$i = 1; // use offset to start after '-' character $tag = substr( $tag, 1 ); // use offset to start after '-' character
$regex = '(?!'; // create negative lookahead $negate = true;
} else {
$i = 0; // start at first character
$regex = '(?='; // use positive lookahead
} }
// before tag may only be the separator or the beginning $term = $this->tag2match_term( $tag );
$regex .= '.*(?:^|' . $tagsSeparator . ')';
return $this->term2match( $term, $negate );
}
/**
* generate a regex match term fragment out of a tag
*
* @param string $tag to to generate regexs from. This function assumes any leading flags ('-', '~') have been stripped. The wildcard flag '*' is expanded by this function and any other regex characters are escaped.
*
* @return string generated regex match term fragment
*/
protected function tag2match_term(string $tag): string
{
$tagsSeparator = $this->conf->get('general.tags_separator', ' ');
$len = strlen($tag);
$term = '';
// iterate over string, separating it into placeholder and content // iterate over string, separating it into placeholder and content
$i = 0; // start at first character
for (; $i < $len; $i++) { for (; $i < $len; $i++) {
if ($tag[$i] === '*') { if ($tag[$i] === '*') {
// placeholder found // placeholder found
$regex .= '[^' . $tagsSeparator . ']*?'; $term .= '[^' . $tagsSeparator . ']*?';
} else { } else {
// regular characters // regular characters
$offset = strpos($tag, '*', $i); $offset = strpos($tag, '*', $i);
@ -497,11 +525,33 @@ protected function tag2regex(string $tag): string
// subtract one, as we want to get before the placeholder or end of string // subtract one, as we want to get before the placeholder or end of string
$offset -= 1; $offset -= 1;
// we got a tag name that we want to search for. escape any regex characters to prevent conflicts. // we got a tag name that we want to search for. escape any regex characters to prevent conflicts.
$regex .= preg_quote(substr($tag, $i, $offset - $i + 1), '/'); $term .= preg_quote(substr($tag, $i, $offset - $i + 1), '/');
// move $i on // move $i on
$i = $offset; $i = $offset;
} }
} }
return $term;
}
/**
* generate a regex fragment out of a match term
*
* @param string $term is the match term already generated by tag2match_term
* @param bool $negate if true create a negative lookahead
*
* @return string generated regex fragment
*/
protected function term2match(string $term, bool $negate): string
{
$tagsSeparator = $this->conf->get('general.tags_separator', ' ');
$regex = $negate ? '(?!' : '(?='; // use negative or positive lookahead
// before tag may only be the separator or the beginning
$regex .= '.*(?:^|' . $tagsSeparator . ')';
$regex .= $term;
// after the tag may only be the separator or the end // after the tag may only be the separator or the end
$regex .= '(?:$|' . $tagsSeparator . '))'; $regex .= '(?:$|' . $tagsSeparator . '))';
return $regex; return $regex;

View file

@ -45,8 +45,18 @@ function refreshToken(basePath, callback) {
function createAwesompleteInstance(element, separator, tags = []) { function createAwesompleteInstance(element, separator, tags = []) {
const awesome = new Awesomplete(Awesomplete.$(element)); const awesome = new Awesomplete(Awesomplete.$(element));
// Tags are separated by separator // Tags are separated by separator. Ignore leading search flags
awesome.filter = (text, input) => Awesomplete.FILTER_CONTAINS(text, input.match(new RegExp(`[^${separator}]*$`))[0]); awesome.filter = (text, input) => {
let filter_func = Awesomplete.FILTER_CONTAINS;
let input_flagged = input.replace( /^[\-\~\+]/, '' );
if (input !== input_flagged) {
input = input_flagged;
filter_func = Awesomplete.FILTER_STARTSWITH;
}
return filter_func(text, input.match(new RegExp(`[^${separator}]*$`))[0]);
};
// Insert new selected tag in the input // Insert new selected tag in the input
awesome.replace = (text) => { awesome.replace = (text) => {
const before = awesome.input.value.match(new RegExp(`^.+${separator}+|`))[0]; const before = awesome.input.value.match(new RegExp(`^.+${separator}+|`))[0];

View file

@ -49,19 +49,21 @@ Shaarli can be used as a minimal blog, notepad, pastebin...: While adding or edi
### Search ### Search
- **Plain text search:** Use `Search text` to search in all fields of all Shaares (Title, URL, Description...). Use double-quotes (example `"exact search"`) to search for the exact expression. - **Plain text search:** Use `Search text` to search in all fields of all Shaares (Title, URL, Description...). Use double-quotes (example `"exact search"`) to search for the exact expression.
- **Tags search:** `Filter by tags` allow only displaying Shaares tagged with one or multiple tags (use space to separate tags). - **Tags search:** `Filter by tags` allow only displaying Shaares tagged with one or multiple tags (use space to separate tags). A plus sign `+` is optional and will restrict suggested tags to only those starting with the string (example: `pr` will hint `apron` and `printer` but `+pr` will only hint printer).
- **Hidden tags:** tags starting with a dot `.` (example `.secret`) are private. They can only be seen and searched when logged in. - **Hidden tags:** tags starting with a dot `.` (example `.secret`) are private. They can only be seen and searched when logged in.
- **Exclude text/tags:** Use the `-` operator before a word or tag to exclude Shaares matching this word from search results (`NOT` operator). - **Exclude text/tags:** Use the `-` operator before a word or tag to exclude Shaares matching this word from search results (`NOT` operator).
- **Optional tags:** Use the `~` operator before multiple tags to search for any one of them (`OR` operator).
- **Wildcard tag search:** An asterisk (`*`) can be used as a wildcard and will match any number of characters. Wildcards can appear in the middle of a search term or at the end (example: pro\*in\* will match programming and protein).
- **Untagged links:** Shaares without tags can be searched by clicking the `untagged` toggle button top left of the Shaares list (only when logged in). - **Untagged links:** Shaares without tags can be searched by clicking the `untagged` toggle button top left of the Shaares list (only when logged in).
Both exclude patterns and exact searches can be combined with normal searches (example `"exact search" term otherterm -notthis "very exact" stuff -notagain`). Only AND (and NOT) search is currrently supported. Both exclude patterns and exact searches can be combined with normal searches (example `"exact search" term otherterm -notthis "very exact" stuff -notagain`). Only AND (and NOT) text search is currently supported.
Active search terms are displayed on top of the link list. To remove terms/tags from the curent search, click the `x` next to any of them, or simply clear text/tag search fields. Active search terms are displayed on top of the link list. To remove terms/tags from the current search, click the `x` next to any of them, or simply clear text/tag search fields.
### Tag cloud ### Tag cloud
The `Tag cloud` page diplays a "cloud" or list view of all tags in your Shaarli (most frequently used tags are displayed with a bigger font size) The `Tag cloud` page displays a "cloud" or list view of all tags in your Shaarli (most frequently used tags are displayed with a bigger font size)
- **Tags list:** click on `Most used` or `Alphabetical` to display tags as a list. You can also edit/delete tags for this page. - **Tags list:** click on `Most used` or `Alphabetical` to display tags as a list. You can also edit/delete tags for this page.
@ -76,7 +78,7 @@ RSS/ATOM feeds feeds are available (in ATOM with `/feed/atom` and RSS with `/fee
- **Filtering RSS feeds:** RSS feeds and picture wall can also be restricted to only return items matching a text/tag search. For example, search for `photography` (text or tags) in Shaarli, then click the `RSS Feed` button. A feed with only matching results is displayed. - **Filtering RSS feeds:** RSS feeds and picture wall can also be restricted to only return items matching a text/tag search. For example, search for `photography` (text or tags) in Shaarli, then click the `RSS Feed` button. A feed with only matching results is displayed.
- Add the `&nb` parameter in feed URLs to specify the number of Shaares you want in a feed (default if not specified: `50`). The keyword `all` is available if you want everything. - Add the `&nb` parameter in feed URLs to specify the number of Shaares you want in a feed (default if not specified: `50`). The keyword `all` is available if you want everything.
- Add the `&permalinks` parameter in feed URLs to point permalinks to the corresponding shaarly entry/link instead of the direct, Shaare URL attribute - Add the `&permalinks` parameter in feed URLs to point permalinks to the corresponding Shaarli entry/link instead of the direct, Shaare URL attribute
![](images/rss-filter-1.png) ![](images/rss-filter-2.png) ![](images/rss-filter-1.png) ![](images/rss-filter-2.png)

View file

@ -124,6 +124,11 @@ public function testFilterOneTag()
count(self::$linkFilter->filter(BookmarkFilter::$FILTER_TAG, 'web', false)) count(self::$linkFilter->filter(BookmarkFilter::$FILTER_TAG, 'web', false))
); );
$this->assertEquals(
4,
count(self::$linkFilter->filter(BookmarkFilter::$FILTER_TAG, '+web', false))
);
$this->assertEquals( $this->assertEquals(
4, 4,
count(self::$linkFilter->filter(BookmarkFilter::$FILTER_TAG, 'web', false, 'all')) count(self::$linkFilter->filter(BookmarkFilter::$FILTER_TAG, 'web', false, 'all'))
@ -451,6 +456,37 @@ public function testFilterCrossedSearch()
); );
} }
/**
* Tag search with OR optional tags.
*/
public function testTagFilterOr()
{
$this->assertEquals(
5,
count(self::$linkFilter->filter(BookmarkFilter::$FILTER_TAG, '~cartoon ~web'))
);
$this->assertEquals(
6,
count(self::$linkFilter->filter(BookmarkFilter::$FILTER_TAG, '~c*t*n ~st*'))
);
$this->assertEquals(
2,
count(self::$linkFilter->filter(BookmarkFilter::$FILTER_TAG, '~cartoon ~web dev'))
);
$this->assertEquals(
2,
count(self::$linkFilter->filter(BookmarkFilter::$FILTER_TAG, '~cartoon ~web +dev'))
);
$this->assertEquals(
4,
count(self::$linkFilter->filter(BookmarkFilter::$FILTER_TAG, '~cartoon ~web -samba'))
);
}
/** /**
* Filter bookmarks by #hashtag. * Filter bookmarks by #hashtag.
*/ */