Added support for OR (~) and optional AND (+) operators for tag searches.

This commit is contained in:
Keith Carangelo 2021-09-08 15:26:58 -04:00
parent 6e0f92acb4
commit a7d43caccb
4 changed files with 119 additions and 21 deletions

View file

@ -323,7 +323,6 @@ class BookmarkFilter
// we got an input string, split tags
$inputTags = tags_str2array($inputTags, $tagsSeparator);
}
if (count($inputTags) === 0) {
// no input tags
return $this->noFilter($visibility);
@ -341,7 +340,20 @@ class BookmarkFilter
}
// build regex from all tags
$re = '/^' . implode(array_map([$this, 'tag2regex'], $inputTags)) . '.*$/';
$re_and = implode(array_map([$this, 'tag2regex'], $inputTags));
$re = '/^' . $re_and;
$orTags = array_filter(array_map( function ($tag) {
return startsWith($tag, '~') ? substr( $tag, 1 ) : null;
}, $inputTags ));
$re_or = implode('|', array_map([$this, 'tag2match_term'], $orTags));
if ($re_or) {
$re_or = '(' . $re_or . ')';
$re .= $this->term2match( $re_or, false );
}
$re .= '.*$/';
if (!$casesensitive) {
// make regex case insensitive
$re .= 'i';
@ -460,33 +472,49 @@ class BookmarkFilter
/**
* generate a regex fragment out of a tag
*
* @param string $tag to to generate regexs from. may start with '-' to negate, contain '*' as wildcard
* @param string $tag to generate regexs from. may start with '-' to negate, contain '*' as wildcard. Tags starting with '~' are treated separately as an 'OR' clause.
*
* @return string generated regex fragment
*/
protected function tag2regex(string $tag): string
{
$tagsSeparator = $this->conf->get('general.tags_separator', ' ');
$len = strlen($tag);
if (!$len || $tag === "-" || $tag === "*") {
if (!$tag || $tag === "-" || $tag === "*" || $tag[0] === "~") {
// nothing to search, return empty regex
return '';
}
$negate = false;
if ($tag[0] === "+" && $tag[1]) {
$tag = substr( $tag, 1 ); // use offset to start after '+' character
}
if ($tag[0] === "-") {
// query is negated
$i = 1; // use offset to start after '-' character
$regex = '(?!'; // create negative lookahead
} else {
$i = 0; // start at first character
$regex = '(?='; // use positive lookahead
$tag = substr( $tag, 1 ); // use offset to start after '-' character
$negate = true;
}
// before tag may only be the separator or the beginning
$regex .= '.*(?:^|' . $tagsSeparator . ')';
$term = $this->tag2match_term( $tag );
return $this->term2match( $term, $negate );
}
/**
* generate a regex match term fragment out of a tag
*
* @param string $tag to to generate regexs from. This function assumes any leading flags ('-', '~') have been stripped. The wildcard flag '*' is expanded by this function and any other regex characters are escaped.
*
* @return string generated regex match term fragment
*/
protected function tag2match_term(string $tag): string
{
$tagsSeparator = $this->conf->get('general.tags_separator', ' ');
$len = strlen($tag);
$term = '';
// iterate over string, separating it into placeholder and content
$i = 0; // start at first character
for (; $i < $len; $i++) {
if ($tag[$i] === '*') {
// placeholder found
$regex .= '[^' . $tagsSeparator . ']*?';
$term .= '[^' . $tagsSeparator . ']*?';
} else {
// regular characters
$offset = strpos($tag, '*', $i);
@ -497,11 +525,33 @@ class BookmarkFilter
// subtract one, as we want to get before the placeholder or end of string
$offset -= 1;
// we got a tag name that we want to search for. escape any regex characters to prevent conflicts.
$regex .= preg_quote(substr($tag, $i, $offset - $i + 1), '/');
$term .= preg_quote(substr($tag, $i, $offset - $i + 1), '/');
// move $i on
$i = $offset;
}
}
return $term;
}
/**
* generate a regex fragment out of a match term
*
* @param string $term is the match term already generated by tag2match_term
* @param bool $negate if true create a negative lookahead
*
* @return string generated regex fragment
*/
protected function term2match(string $term, bool $negate): string
{
$tagsSeparator = $this->conf->get('general.tags_separator', ' ');
$regex = $negate ? '(?!' : '(?='; // use negative or positive lookahead
// before tag may only be the separator or the beginning
$regex .= '.*(?:^|' . $tagsSeparator . ')';
$regex .= $term;
// after the tag may only be the separator or the end
$regex .= '(?:$|' . $tagsSeparator . '))';
return $regex;

View file

@ -45,8 +45,18 @@ function refreshToken(basePath, callback) {
function createAwesompleteInstance(element, separator, tags = []) {
const awesome = new Awesomplete(Awesomplete.$(element));
// Tags are separated by separator
awesome.filter = (text, input) => Awesomplete.FILTER_CONTAINS(text, input.match(new RegExp(`[^${separator}]*$`))[0]);
// Tags are separated by separator. Ignore leading search flags
awesome.filter = (text, input) => {
let filter_func = Awesomplete.FILTER_CONTAINS;
let input_flagged = input.replace( /^[\-\~\+]/, '' );
if (input !== input_flagged) {
input = input_flagged;
filter_func = Awesomplete.FILTER_STARTSWITH;
}
return filter_func(text, input.match(new RegExp(`[^${separator}]*$`))[0]);
};
// Insert new selected tag in the input
awesome.replace = (text) => {
const before = awesome.input.value.match(new RegExp(`^.+${separator}+|`))[0];

View file

@ -49,19 +49,21 @@ Shaarli can be used as a minimal blog, notepad, pastebin...: While adding or edi
### Search
- **Plain text search:** Use `Search text` to search in all fields of all Shaares (Title, URL, Description...). Use double-quotes (example `"exact search"`) to search for the exact expression.
- **Tags search:** `Filter by tags` allow only displaying Shaares tagged with one or multiple tags (use space to separate tags).
- **Tags search:** `Filter by tags` allow only displaying Shaares tagged with one or multiple tags (use space to separate tags). A plus sign `+` is optional and will restrict suggested tags to only those starting with the string (example: `pr` will hint `apron` and `printer` but `+pr` will only hint printer).
- **Hidden tags:** tags starting with a dot `.` (example `.secret`) are private. They can only be seen and searched when logged in.
- **Exclude text/tags:** Use the `-` operator before a word or tag to exclude Shaares matching this word from search results (`NOT` operator).
- **Optional tags:** Use the `~` operator before multiple tags to search for any one of them (`OR` operator).
- **Wildcard tag search:** An asterisk (`*`) can be used as a wildcard and will match any number of characters. Wildcards can appear in the middle of a search term or at the end (example: pro\*in\* will match programming and protein).
- **Untagged links:** Shaares without tags can be searched by clicking the `untagged` toggle button top left of the Shaares list (only when logged in).
Both exclude patterns and exact searches can be combined with normal searches (example `"exact search" term otherterm -notthis "very exact" stuff -notagain`). Only AND (and NOT) search is currrently supported.
Both exclude patterns and exact searches can be combined with normal searches (example `"exact search" term otherterm -notthis "very exact" stuff -notagain`). Only AND (and NOT) text search is currently supported.
Active search terms are displayed on top of the link list. To remove terms/tags from the curent search, click the `x` next to any of them, or simply clear text/tag search fields.
Active search terms are displayed on top of the link list. To remove terms/tags from the current search, click the `x` next to any of them, or simply clear text/tag search fields.
### Tag cloud
The `Tag cloud` page diplays a "cloud" or list view of all tags in your Shaarli (most frequently used tags are displayed with a bigger font size)
The `Tag cloud` page displays a "cloud" or list view of all tags in your Shaarli (most frequently used tags are displayed with a bigger font size)
- **Tags list:** click on `Most used` or `Alphabetical` to display tags as a list. You can also edit/delete tags for this page.
@ -76,7 +78,7 @@ RSS/ATOM feeds feeds are available (in ATOM with `/feed/atom` and RSS with `/fee
- **Filtering RSS feeds:** RSS feeds and picture wall can also be restricted to only return items matching a text/tag search. For example, search for `photography` (text or tags) in Shaarli, then click the `RSS Feed` button. A feed with only matching results is displayed.
- Add the `&nb` parameter in feed URLs to specify the number of Shaares you want in a feed (default if not specified: `50`). The keyword `all` is available if you want everything.
- Add the `&permalinks` parameter in feed URLs to point permalinks to the corresponding shaarly entry/link instead of the direct, Shaare URL attribute
- Add the `&permalinks` parameter in feed URLs to point permalinks to the corresponding Shaarli entry/link instead of the direct, Shaare URL attribute
![](images/rss-filter-1.png) ![](images/rss-filter-2.png)

View file

@ -124,6 +124,11 @@ class BookmarkFilterTest extends TestCase
count(self::$linkFilter->filter(BookmarkFilter::$FILTER_TAG, 'web', false))
);
$this->assertEquals(
4,
count(self::$linkFilter->filter(BookmarkFilter::$FILTER_TAG, '+web', false))
);
$this->assertEquals(
4,
count(self::$linkFilter->filter(BookmarkFilter::$FILTER_TAG, 'web', false, 'all'))
@ -451,6 +456,37 @@ class BookmarkFilterTest extends TestCase
);
}
/**
* Tag search with OR optional tags.
*/
public function testTagFilterOr()
{
$this->assertEquals(
5,
count(self::$linkFilter->filter(BookmarkFilter::$FILTER_TAG, '~cartoon ~web'))
);
$this->assertEquals(
6,
count(self::$linkFilter->filter(BookmarkFilter::$FILTER_TAG, '~c*t*n ~st*'))
);
$this->assertEquals(
2,
count(self::$linkFilter->filter(BookmarkFilter::$FILTER_TAG, '~cartoon ~web dev'))
);
$this->assertEquals(
2,
count(self::$linkFilter->filter(BookmarkFilter::$FILTER_TAG, '~cartoon ~web +dev'))
);
$this->assertEquals(
4,
count(self::$linkFilter->filter(BookmarkFilter::$FILTER_TAG, '~cartoon ~web -samba'))
);
}
/**
* Filter bookmarks by #hashtag.
*/