diff --git a/application/bookmark/Bookmark.php b/application/bookmark/Bookmark.php index fa45d2fc..ea565d1f 100644 --- a/application/bookmark/Bookmark.php +++ b/application/bookmark/Bookmark.php @@ -54,6 +54,9 @@ class Bookmark /** @var bool True if the bookmark can only be seen while logged in */ protected $private; + /** @var mixed[] Available to store any additional content for a bookmark. Currently used for search highlight. */ + protected $additionalContent = []; + /** * Initialize a link from array data. Especially useful to create a Bookmark from former link storage format. * @@ -95,6 +98,8 @@ public function fromArray(array $data): Bookmark * - the URL with the permalink * - the title with the URL * + * Also make sure that we do not save search highlights in the datastore. + * * @throws InvalidBookmarkException */ public function validate(): void @@ -112,6 +117,9 @@ public function validate(): void if (empty($this->title)) { $this->title = $this->url; } + if (array_key_exists('search_highlight', $this->additionalContent)) { + unset($this->additionalContent['search_highlight']); + } } /** @@ -435,6 +443,44 @@ public function setTagsString(?string $tags): Bookmark return $this; } + /** + * Get entire additionalContent array. + * + * @return mixed[] + */ + public function getAdditionalContent(): array + { + return $this->additionalContent; + } + + /** + * Set a single entry in additionalContent, by key. + * + * @param string $key + * @param mixed|null $value Any type of value can be set. + * + * @return $this + */ + public function addAdditionalContentEntry(string $key, $value): self + { + $this->additionalContent[$key] = $value; + + return $this; + } + + /** + * Get a single entry in additionalContent, by key. + * + * @param string $key + * @param mixed|null $default + * + * @return mixed|null can be any type or even null. + */ + public function getAdditionalContentEntry(string $key, $default = null) + { + return array_key_exists($key, $this->additionalContent) ? $this->additionalContent[$key] : $default; + } + /** * Rename a tag in tags list. * diff --git a/application/bookmark/BookmarkFilter.php b/application/bookmark/BookmarkFilter.php index 4232f114..c79386ea 100644 --- a/application/bookmark/BookmarkFilter.php +++ b/application/bookmark/BookmarkFilter.php @@ -201,7 +201,7 @@ private function filterFulltext(string $searchterms, string $visibility = 'all') return $this->noFilter($visibility); } - $filtered = array(); + $filtered = []; $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); $exactRegex = '/"([^"]+)"/'; // Retrieve exact search terms. @@ -213,8 +213,8 @@ private function filterFulltext(string $searchterms, string $visibility = 'all') $explodedSearchAnd = array_values(array_filter($explodedSearchAnd)); // Filter excluding terms and update andSearch. - $excludeSearch = array(); - $andSearch = array(); + $excludeSearch = []; + $andSearch = []; foreach ($explodedSearchAnd as $needle) { if ($needle[0] == '-' && strlen($needle) > 1) { $excludeSearch[] = substr($needle, 1); @@ -234,33 +234,38 @@ private function filterFulltext(string $searchterms, string $visibility = 'all') } } - // Concatenate link fields to search across fields. - // Adds a '\' separator for exact search terms. - $content = mb_convert_case($link->getTitle(), MB_CASE_LOWER, 'UTF-8') .'\\'; - $content .= mb_convert_case($link->getDescription(), MB_CASE_LOWER, 'UTF-8') .'\\'; - $content .= mb_convert_case($link->getUrl(), MB_CASE_LOWER, 'UTF-8') .'\\'; - $content .= mb_convert_case($link->getTagsString(), MB_CASE_LOWER, 'UTF-8') .'\\'; + $lengths = []; + $content = $this->buildFullTextSearchableLink($link, $lengths); // Be optimistic $found = true; + $foundPositions = []; // First, we look for exact term search - for ($i = 0; $i < count($exactSearch) && $found; $i++) { - $found = strpos($content, $exactSearch[$i]) !== false; - } - - // Iterate over keywords, if keyword is not found, + // Then iterate over keywords, if keyword is not found, // no need to check for the others. We want all or nothing. - for ($i = 0; $i < count($andSearch) && $found; $i++) { - $found = strpos($content, $andSearch[$i]) !== false; + foreach ([$exactSearch, $andSearch] as $search) { + for ($i = 0; $i < count($search) && $found !== false; $i++) { + $found = mb_strpos($content, $search[$i]); + if ($found === false) { + break; + } + + $foundPositions[] = ['start' => $found, 'end' => $found + mb_strlen($search[$i])]; + } } // Exclude terms. - for ($i = 0; $i < count($excludeSearch) && $found; $i++) { + for ($i = 0; $i < count($excludeSearch) && $found !== false; $i++) { $found = strpos($content, $excludeSearch[$i]) === false; } - if ($found) { + if ($found !== false) { + $link->addAdditionalContentEntry( + 'search_highlight', + $this->postProcessFoundPositions($lengths, $foundPositions) + ); + $filtered[$id] = $link; } } @@ -477,4 +482,74 @@ public static function tagsStrToArray(string $tags, bool $casesensitive): array return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY); } + + /** + * This method finalize the content of the foundPositions array, + * by associated all search results to their associated bookmark field, + * making sure that there is no overlapping results, etc. + * + * @param array $fieldLengths Start and end positions of every bookmark fields in the aggregated bookmark content. + * @param array $foundPositions Positions where the search results were found in the aggregated content. + * + * @return array Updated $foundPositions, by bookmark field. + */ + protected function postProcessFoundPositions(array $fieldLengths, array $foundPositions): array + { + // Sort results by starting position ASC. + usort($foundPositions, function (array $entryA, array $entryB): int { + return $entryA['start'] > $entryB['start'] ? 1 : -1; + }); + + $out = []; + $currentMax = -1; + foreach ($foundPositions as $foundPosition) { + // we do not allow overlapping highlights + if ($foundPosition['start'] < $currentMax) { + continue; + } + + $currentMax = $foundPosition['end']; + foreach ($fieldLengths as $part => $length) { + if ($foundPosition['start'] < $length['start'] || $foundPosition['start'] > $length['end']) { + continue; + } + + $out[$part][] = [ + 'start' => $foundPosition['start'] - $length['start'], + 'end' => $foundPosition['end'] - $length['start'], + ]; + break; + } + } + + return $out; + } + + /** + * Concatenate link fields to search across fields. Adds a '\' separator for exact search terms. + * Also populate $length array with starting and ending positions of every bookmark field + * inside concatenated content. + * + * @param Bookmark $link + * @param array $lengths (by reference) + * + * @return string Lowercase concatenated fields content. + */ + protected function buildFullTextSearchableLink(Bookmark $link, array &$lengths): string + { + $content = mb_convert_case($link->getTitle(), MB_CASE_LOWER, 'UTF-8') .'\\'; + $content .= mb_convert_case($link->getDescription(), MB_CASE_LOWER, 'UTF-8') .'\\'; + $content .= mb_convert_case($link->getUrl(), MB_CASE_LOWER, 'UTF-8') .'\\'; + $content .= mb_convert_case($link->getTagsString(), MB_CASE_LOWER, 'UTF-8') .'\\'; + + $lengths['title'] = ['start' => 0, 'end' => mb_strlen($link->getTitle())]; + $nextField = $lengths['title']['end'] + 1; + $lengths['description'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getDescription())]; + $nextField = $lengths['description']['end'] + 1; + $lengths['url'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getUrl())]; + $nextField = $lengths['url']['end'] + 1; + $lengths['tags'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getTagsString())]; + + return $content; + } } diff --git a/application/formatter/BookmarkDefaultFormatter.php b/application/formatter/BookmarkDefaultFormatter.php index 9d4a0fa0..d58a5e39 100644 --- a/application/formatter/BookmarkDefaultFormatter.php +++ b/application/formatter/BookmarkDefaultFormatter.php @@ -12,10 +12,13 @@ */ class BookmarkDefaultFormatter extends BookmarkFormatter { + const SEARCH_HIGHLIGHT_OPEN = '|@@HIGHLIGHT'; + const SEARCH_HIGHLIGHT_CLOSE = 'HIGHLIGHT@@|'; + /** * @inheritdoc */ - public function formatTitle($bookmark) + protected function formatTitle($bookmark) { return escape($bookmark->getTitle()); } @@ -23,10 +26,28 @@ public function formatTitle($bookmark) /** * @inheritdoc */ - public function formatDescription($bookmark) + protected function formatTitleHtml($bookmark) + { + $title = $this->tokenizeSearchHighlightField( + $bookmark->getTitle() ?? '', + $bookmark->getAdditionalContentEntry('search_highlight')['title'] ?? [] + ); + + return $this->replaceTokens(escape($title)); + } + + /** + * @inheritdoc + */ + protected function formatDescription($bookmark) { $indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : ''; - return format_description(escape($bookmark->getDescription()), $indexUrl); + $description = $this->tokenizeSearchHighlightField( + $bookmark->getDescription() ?? '', + $bookmark->getAdditionalContentEntry('search_highlight')['description'] ?? [] + ); + + return $this->replaceTokens(format_description(escape($description), $indexUrl)); } /** @@ -40,7 +61,27 @@ protected function formatTagList($bookmark) /** * @inheritdoc */ - public function formatTagString($bookmark) + protected function formatTagListHtml($bookmark) + { + if (empty($bookmark->getAdditionalContentEntry('search_highlight')['tags'])) { + return $this->formatTagList($bookmark); + } + + $tags = $this->tokenizeSearchHighlightField( + $bookmark->getTagsString(), + $bookmark->getAdditionalContentEntry('search_highlight')['tags'] + ); + $tags = $this->filterTagList(explode(' ', $tags)); + $tags = escape($tags); + $tags = $this->replaceTokensArray($tags); + + return $tags; + } + + /** + * @inheritdoc + */ + protected function formatTagString($bookmark) { return implode(' ', $this->formatTagList($bookmark)); } @@ -48,7 +89,7 @@ public function formatTagString($bookmark) /** * @inheritdoc */ - public function formatUrl($bookmark) + protected function formatUrl($bookmark) { if ($bookmark->isNote() && isset($this->contextData['index_url'])) { return rtrim($this->contextData['index_url'], '/') . '/' . escape(ltrim($bookmark->getUrl(), '/')); @@ -77,6 +118,19 @@ protected function formatRealUrl($bookmark) return escape($bookmark->getUrl()); } + /** + * @inheritdoc + */ + protected function formatUrlHtml($bookmark) + { + $url = $this->tokenizeSearchHighlightField( + $bookmark->getUrl() ?? '', + $bookmark->getAdditionalContentEntry('search_highlight')['url'] ?? [] + ); + + return $this->replaceTokens(escape($url)); + } + /** * @inheritdoc */ @@ -84,4 +138,72 @@ protected function formatThumbnail($bookmark) { return escape($bookmark->getThumbnail()); } + + /** + * Insert search highlight token in provided field content based on a list of search result positions + * + * @param string $fieldContent + * @param array|null $positions List of of search results with 'start' and 'end' positions. + * + * @return string Updated $fieldContent. + */ + protected function tokenizeSearchHighlightField(string $fieldContent, ?array $positions): string + { + if (empty($positions)) { + return $fieldContent; + } + + $insertedTokens = 0; + $tokenLength = strlen(static::SEARCH_HIGHLIGHT_OPEN); + foreach ($positions as $position) { + $position = [ + 'start' => $position['start'] + ($insertedTokens * $tokenLength), + 'end' => $position['end'] + ($insertedTokens * $tokenLength), + ]; + + $content = mb_substr($fieldContent, 0, $position['start']); + $content .= static::SEARCH_HIGHLIGHT_OPEN; + $content .= mb_substr($fieldContent, $position['start'], $position['end'] - $position['start']); + $content .= static::SEARCH_HIGHLIGHT_CLOSE; + $content .= mb_substr($fieldContent, $position['end']); + + $fieldContent = $content; + + $insertedTokens += 2; + } + + return $fieldContent; + } + + /** + * Replace search highlight tokens with HTML highlighted span. + * + * @param string $fieldContent + * + * @return string updated content. + */ + protected function replaceTokens(string $fieldContent): string + { + return str_replace( + [static::SEARCH_HIGHLIGHT_OPEN, static::SEARCH_HIGHLIGHT_CLOSE], + ['', ''], + $fieldContent + ); + } + + /** + * Apply replaceTokens to an array of content strings. + * + * @param string[] $fieldContents + * + * @return array + */ + protected function replaceTokensArray(array $fieldContents): array + { + foreach ($fieldContents as &$entry) { + $entry = $this->replaceTokens($entry); + } + + return $fieldContents; + } } diff --git a/application/formatter/BookmarkFormatter.php b/application/formatter/BookmarkFormatter.php index 0042dafe..e1b7f705 100644 --- a/application/formatter/BookmarkFormatter.php +++ b/application/formatter/BookmarkFormatter.php @@ -2,7 +2,7 @@ namespace Shaarli\Formatter; -use DateTime; +use DateTimeInterface; use Shaarli\Bookmark\Bookmark; use Shaarli\Config\ConfigManager; @@ -11,6 +11,29 @@ * * Abstract class processing all bookmark attributes through methods designed to be overridden. * + * List of available formatted fields: + * - id ID + * - shorturl Unique identifier, used in permalinks + * - url URL, can be altered in some way, e.g. passing through an HTTP reverse proxy + * - real_url (legacy) same as `url` + * - url_html URL to be displayed in HTML content (it can contain HTML tags) + * - title Title + * - title_html Title to be displayed in HTML content (it can contain HTML tags) + * - description Description content. It most likely contains HTML tags + * - thumbnail Thumbnail: path to local cache file, false if there is none, null if hasn't been retrieved + * - taglist List of tags (array) + * - taglist_urlencoded List of tags (array) URL encoded: it must be used to create a link to a URL containing a tag + * - taglist_html List of tags (array) to be displayed in HTML content (it can contain HTML tags) + * - tags Tags separated by a single whitespace + * - tags_urlencoded Tags separated by a single whitespace, URL encoded: must be used to create a link + * - sticky Is sticky (bool) + * - private Is private (bool) + * - class Additional CSS class + * - created Creation DateTime + * - updated Last edit DateTime + * - timestamp Creation timestamp + * - updated_timestamp Last edit timestamp + * * @package Shaarli\Formatter */ abstract class BookmarkFormatter @@ -55,13 +78,16 @@ public function format($bookmark) $out['shorturl'] = $this->formatShortUrl($bookmark); $out['url'] = $this->formatUrl($bookmark); $out['real_url'] = $this->formatRealUrl($bookmark); + $out['url_html'] = $this->formatUrlHtml($bookmark); $out['title'] = $this->formatTitle($bookmark); + $out['title_html'] = $this->formatTitleHtml($bookmark); $out['description'] = $this->formatDescription($bookmark); $out['thumbnail'] = $this->formatThumbnail($bookmark); - $out['urlencoded_taglist'] = $this->formatUrlEncodedTagList($bookmark); $out['taglist'] = $this->formatTagList($bookmark); - $out['urlencoded_tags'] = $this->formatUrlEncodedTagString($bookmark); + $out['taglist_urlencoded'] = $this->formatTagListUrlEncoded($bookmark); + $out['taglist_html'] = $this->formatTagListHtml($bookmark); $out['tags'] = $this->formatTagString($bookmark); + $out['tags_urlencoded'] = $this->formatTagStringUrlEncoded($bookmark); $out['sticky'] = $bookmark->isSticky(); $out['private'] = $bookmark->isPrivate(); $out['class'] = $this->formatClass($bookmark); @@ -69,6 +95,7 @@ public function format($bookmark) $out['updated'] = $this->formatUpdated($bookmark); $out['timestamp'] = $this->formatCreatedTimestamp($bookmark); $out['updated_timestamp'] = $this->formatUpdatedTimestamp($bookmark); + return $out; } @@ -135,6 +162,18 @@ protected function formatRealUrl($bookmark) return $this->formatUrl($bookmark); } + /** + * Format Url Html: to be displayed in HTML content, it can contains HTML tags. + * + * @param Bookmark $bookmark instance + * + * @return string formatted Url HTML + */ + protected function formatUrlHtml($bookmark) + { + return $this->formatUrl($bookmark); + } + /** * Format Title * @@ -147,6 +186,18 @@ protected function formatTitle($bookmark) return $bookmark->getTitle(); } + /** + * Format Title HTML: to be displayed in HTML content, it can contains HTML tags. + * + * @param Bookmark $bookmark instance + * + * @return string formatted Title + */ + protected function formatTitleHtml($bookmark) + { + return $bookmark->getTitle(); + } + /** * Format Description * @@ -190,11 +241,23 @@ protected function formatTagList($bookmark) * * @return array formatted Tags */ - protected function formatUrlEncodedTagList($bookmark) + protected function formatTagListUrlEncoded($bookmark) { return array_map('urlencode', $this->filterTagList($bookmark->getTags())); } + /** + * Format Tags HTML: to be displayed in HTML content, it can contains HTML tags. + * + * @param Bookmark $bookmark instance + * + * @return array formatted Tags + */ + protected function formatTagListHtml($bookmark) + { + return $this->formatTagList($bookmark); + } + /** * Format TagString * @@ -214,9 +277,9 @@ protected function formatTagString($bookmark) * * @return string formatted TagString */ - protected function formatUrlEncodedTagString($bookmark) + protected function formatTagStringUrlEncoded($bookmark) { - return implode(' ', $this->formatUrlEncodedTagList($bookmark)); + return implode(' ', $this->formatTagListUrlEncoded($bookmark)); } /** @@ -237,7 +300,7 @@ protected function formatClass($bookmark) * * @param Bookmark $bookmark instance * - * @return DateTime instance + * @return DateTimeInterface instance */ protected function formatCreated(Bookmark $bookmark) { @@ -249,7 +312,7 @@ protected function formatCreated(Bookmark $bookmark) * * @param Bookmark $bookmark instance * - * @return DateTime instance + * @return DateTimeInterface instance */ protected function formatUpdated(Bookmark $bookmark) { diff --git a/application/formatter/BookmarkMarkdownFormatter.php b/application/formatter/BookmarkMarkdownFormatter.php index 5d244d4c..f7714be9 100644 --- a/application/formatter/BookmarkMarkdownFormatter.php +++ b/application/formatter/BookmarkMarkdownFormatter.php @@ -56,7 +56,10 @@ public function formatDescription($bookmark) return parent::formatDescription($bookmark); } - $processedDescription = $bookmark->getDescription(); + $processedDescription = $this->tokenizeSearchHighlightField( + $bookmark->getDescription() ?? '', + $bookmark->getAdditionalContentEntry('search_highlight')['description'] ?? [] + ); $processedDescription = $this->filterProtocols($processedDescription); $processedDescription = $this->formatHashTags($processedDescription); $processedDescription = $this->reverseEscapedHtml($processedDescription); @@ -65,6 +68,7 @@ public function formatDescription($bookmark) ->setBreaksEnabled(true) ->text($processedDescription); $processedDescription = $this->sanitizeHtml($processedDescription); + $processedDescription = $this->replaceTokens($processedDescription); if (!empty($processedDescription)) { $processedDescription = '