221 lines
7.3 KiB
PHP
221 lines
7.3 KiB
PHP
<?php
|
|
|
|
namespace Shaarli\Formatter;
|
|
|
|
use Shaarli\Config\ConfigManager;
|
|
use Shaarli\Formatter\Parsedown\ShaarliParsedown;
|
|
|
|
/**
|
|
* Class BookmarkMarkdownFormatter
|
|
*
|
|
* Format bookmark description into Markdown format.
|
|
*
|
|
* @package Shaarli\Formatter
|
|
*/
|
|
class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter
|
|
{
|
|
/**
|
|
* When this tag is present in a bookmark, its description should not be processed with Markdown
|
|
*/
|
|
public const NO_MD_TAG = 'nomarkdown';
|
|
|
|
/** @var \Parsedown instance */
|
|
protected $parsedown;
|
|
|
|
/** @var bool used to escape HTML in Markdown or not.
|
|
* It MUST be set to true for shared instance as HTML content can
|
|
* introduce XSS vulnerabilities.
|
|
*/
|
|
protected $escape;
|
|
|
|
/**
|
|
* @var array List of allowed protocols for links inside bookmark's description.
|
|
*/
|
|
protected $allowedProtocols;
|
|
|
|
/**
|
|
* LinkMarkdownFormatter constructor.
|
|
*
|
|
* @param ConfigManager $conf instance
|
|
* @param bool $isLoggedIn
|
|
*/
|
|
public function __construct(ConfigManager $conf, bool $isLoggedIn)
|
|
{
|
|
parent::__construct($conf, $isLoggedIn);
|
|
|
|
$this->parsedown = new ShaarliParsedown();
|
|
$this->escape = $conf->get('security.markdown_escape', true);
|
|
$this->allowedProtocols = $conf->get('security.allowed_protocols', []);
|
|
}
|
|
|
|
/**
|
|
* @inheritdoc
|
|
*/
|
|
public function formatDescription($bookmark)
|
|
{
|
|
if (in_array(self::NO_MD_TAG, $bookmark->getTags())) {
|
|
return parent::formatDescription($bookmark);
|
|
}
|
|
|
|
$processedDescription = $this->tokenizeSearchHighlightField(
|
|
$bookmark->getDescription() ?? '',
|
|
$bookmark->getAdditionalContentEntry('search_highlight')['description'] ?? []
|
|
);
|
|
$processedDescription = $this->filterProtocols($processedDescription);
|
|
$processedDescription = $this->formatHashTags($processedDescription);
|
|
$processedDescription = $this->reverseEscapedHtml($processedDescription);
|
|
$processedDescription = $this->parsedown
|
|
->setMarkupEscaped($this->escape)
|
|
->setBreaksEnabled(true)
|
|
->text($processedDescription);
|
|
$processedDescription = $this->sanitizeHtml($processedDescription);
|
|
$processedDescription = $this->replaceTokens($processedDescription);
|
|
|
|
if (!empty($processedDescription)) {
|
|
$processedDescription = '<div class="markdown">' . $processedDescription . '</div>';
|
|
}
|
|
|
|
return $processedDescription;
|
|
}
|
|
|
|
/**
|
|
* Remove the NO markdown tag if it is present
|
|
*
|
|
* @inheritdoc
|
|
*/
|
|
protected function formatTagList($bookmark)
|
|
{
|
|
$out = parent::formatTagList($bookmark);
|
|
if ($this->isLoggedIn === false && ($pos = array_search(self::NO_MD_TAG, $out)) !== false) {
|
|
unset($out[$pos]);
|
|
return array_values($out);
|
|
}
|
|
return $out;
|
|
}
|
|
|
|
/**
|
|
* Replace not whitelisted protocols with http:// in given description.
|
|
* Also adds `index_url` to relative links if it's specified
|
|
*
|
|
* @param string $description input description text.
|
|
*
|
|
* @return string $description without malicious link.
|
|
*/
|
|
protected function filterProtocols($description)
|
|
{
|
|
$allowedProtocols = $this->allowedProtocols;
|
|
$indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : '';
|
|
|
|
return preg_replace_callback(
|
|
'#]\((.*?)\)#is',
|
|
function ($match) use ($allowedProtocols, $indexUrl) {
|
|
$link = startsWith($match[1], '?') || startsWith($match[1], '/') ? $indexUrl : '';
|
|
$link .= whitelist_protocols($match[1], $allowedProtocols);
|
|
return '](' . $link . ')';
|
|
},
|
|
$description
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Replace hashtag in Markdown links format
|
|
* E.g. `#hashtag` becomes `[#hashtag](./add-tag/hashtag)`
|
|
* It includes the index URL if specified.
|
|
*
|
|
* @param string $description
|
|
*
|
|
* @return string
|
|
*/
|
|
protected function formatHashTags($description)
|
|
{
|
|
$indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : '';
|
|
$tokens = '(?:' . BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_OPEN . ')' .
|
|
'(?:' . BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_CLOSE . ')'
|
|
;
|
|
|
|
/*
|
|
* To support unicode: http://stackoverflow.com/a/35498078/1484919
|
|
* \p{Pc} - to match underscore
|
|
* \p{N} - numeric character in any script
|
|
* \p{L} - letter from any language
|
|
* \p{Mn} - any non marking space (accents, umlauts, etc)
|
|
*/
|
|
$regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}' . $tokens . ']+)/mui';
|
|
$replacement = function (array $match) use ($indexUrl): string {
|
|
$cleanMatch = str_replace(
|
|
BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_OPEN,
|
|
'',
|
|
str_replace(BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_CLOSE, '', $match[2])
|
|
);
|
|
return $match[1] . '[#' . $match[2] . '](' . $indexUrl . './add-tag/' . $cleanMatch . ')';
|
|
};
|
|
|
|
$descriptionLines = explode(PHP_EOL, $description);
|
|
$descriptionOut = '';
|
|
$codeBlockOn = false;
|
|
$lineCount = 0;
|
|
|
|
foreach ($descriptionLines as $descriptionLine) {
|
|
// Detect line of code: starting with 4 spaces,
|
|
// except lists which can start with +/*/- or `2.` after spaces.
|
|
$codeLineOn = preg_match('/^ +(?=[^\+\*\-])(?=(?!\d\.).)/', $descriptionLine) > 0;
|
|
// Detect and toggle block of code
|
|
if (!$codeBlockOn) {
|
|
$codeBlockOn = preg_match('/^```/', $descriptionLine) > 0;
|
|
} elseif (preg_match('/^```/', $descriptionLine) > 0) {
|
|
$codeBlockOn = false;
|
|
}
|
|
|
|
if (!$codeBlockOn && !$codeLineOn) {
|
|
$descriptionLine = preg_replace_callback($regex, $replacement, $descriptionLine);
|
|
}
|
|
|
|
$descriptionOut .= $descriptionLine;
|
|
if ($lineCount++ < count($descriptionLines) - 1) {
|
|
$descriptionOut .= PHP_EOL;
|
|
}
|
|
}
|
|
|
|
return $descriptionOut;
|
|
}
|
|
|
|
/**
|
|
* Remove dangerous HTML tags (tags, iframe, etc.).
|
|
* Doesn't affect <code> content (already escaped by Parsedown).
|
|
*
|
|
* @param string $description input description text.
|
|
*
|
|
* @return string given string escaped.
|
|
*/
|
|
protected function sanitizeHtml($description)
|
|
{
|
|
$escapeTags = [
|
|
'script',
|
|
'style',
|
|
'link',
|
|
'iframe',
|
|
'frameset',
|
|
'frame',
|
|
];
|
|
foreach ($escapeTags as $tag) {
|
|
$description = preg_replace_callback(
|
|
'#<\s*' . $tag . '[^>]*>(.*</\s*' . $tag . '[^>]*>)?#is',
|
|
function ($match) {
|
|
return escape($match[0]);
|
|
},
|
|
$description
|
|
);
|
|
}
|
|
$description = preg_replace(
|
|
'#(<[^>]+\s)on[a-z]*="?[^ "]*"?#is',
|
|
'$1',
|
|
$description
|
|
);
|
|
return $description;
|
|
}
|
|
|
|
protected function reverseEscapedHtml($description)
|
|
{
|
|
return unescape($description);
|
|
}
|
|
}
|