Markdown: don't escape content + sanitize sensible tags
Instead of trying to fix broken content for Markdown parsing, parse it unescaped, then sanatize sensible tags such as scripts, etc.
This commit is contained in:
parent
bfec695df1
commit
2925687e1e
3 changed files with 58 additions and 16 deletions
|
@ -62,13 +62,29 @@ function endsWith($haystack, $needle, $case=true)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* htmlspecialchars wrapper
|
* Htmlspecialchars wrapper
|
||||||
|
*
|
||||||
|
* @param string $str the string to escape.
|
||||||
|
*
|
||||||
|
* @return string escaped.
|
||||||
*/
|
*/
|
||||||
function escape($str)
|
function escape($str)
|
||||||
{
|
{
|
||||||
return htmlspecialchars($str, ENT_COMPAT, 'UTF-8', false);
|
return htmlspecialchars($str, ENT_COMPAT, 'UTF-8', false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reverse the escape function.
|
||||||
|
*
|
||||||
|
* @param string $str the string to unescape.
|
||||||
|
*
|
||||||
|
* @return string unescaped string.
|
||||||
|
*/
|
||||||
|
function unescape($str)
|
||||||
|
{
|
||||||
|
return htmlspecialchars_decode($str);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Link sanitization before templating
|
* Link sanitization before templating
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -117,23 +117,43 @@ function reverse_space2nbsp($description)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Remove '>' at start of line auto generated by Shaarli core system
|
* Remove dangerous HTML tags (tags, iframe, etc.).
|
||||||
* to allow markdown blockquotes.
|
* Doesn't affect <code> content (already escaped by Parsedown).
|
||||||
*
|
*
|
||||||
* @param string $description input description text.
|
* @param string $description input description text.
|
||||||
*
|
*
|
||||||
* @return string $description without HTML links.
|
* @return string given string escaped.
|
||||||
*/
|
*/
|
||||||
function reset_quote_tags($description)
|
function sanitize_html($description)
|
||||||
{
|
{
|
||||||
return preg_replace('/^( *)> /m', '$1> ', $description);
|
$escapeTags = array(
|
||||||
|
'script',
|
||||||
|
'style',
|
||||||
|
'link',
|
||||||
|
'iframe',
|
||||||
|
'frameset',
|
||||||
|
'frame',
|
||||||
|
);
|
||||||
|
foreach ($escapeTags as $tag) {
|
||||||
|
$description = preg_replace_callback(
|
||||||
|
'#<\s*'. $tag .'[^>]*>(.*</\s*'. $tag .'[^>]*>)?#is',
|
||||||
|
function ($match) { return escape($match[0]); },
|
||||||
|
$description);
|
||||||
|
}
|
||||||
|
$description = preg_replace(
|
||||||
|
'#(<[^>]+)on[a-z]*="[^"]*"#is',
|
||||||
|
'$1',
|
||||||
|
$description);
|
||||||
|
return $description;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Render shaare contents through Markdown parser.
|
* Render shaare contents through Markdown parser.
|
||||||
* 1. Remove HTML generated by Shaarli core.
|
* 1. Remove HTML generated by Shaarli core.
|
||||||
* 2. Generate markdown descriptions.
|
* 2. Reverse the escape function.
|
||||||
* 3. Wrap description in 'markdown' CSS class.
|
* 3. Generate markdown descriptions.
|
||||||
|
* 4. Sanitize sensible HTML tags for security.
|
||||||
|
* 5. Wrap description in 'markdown' CSS class.
|
||||||
*
|
*
|
||||||
* @param string $description input description text.
|
* @param string $description input description text.
|
||||||
*
|
*
|
||||||
|
@ -147,11 +167,12 @@ function process_markdown($description)
|
||||||
$processedDescription = reverse_text2clickable($processedDescription);
|
$processedDescription = reverse_text2clickable($processedDescription);
|
||||||
$processedDescription = reverse_nl2br($processedDescription);
|
$processedDescription = reverse_nl2br($processedDescription);
|
||||||
$processedDescription = reverse_space2nbsp($processedDescription);
|
$processedDescription = reverse_space2nbsp($processedDescription);
|
||||||
$processedDescription = reset_quote_tags($processedDescription);
|
$processedDescription = unescape($processedDescription);
|
||||||
$processedDescription = $parsedown
|
$processedDescription = $parsedown
|
||||||
->setMarkupEscaped(false)
|
->setMarkupEscaped(false)
|
||||||
->setBreaksEnabled(true)
|
->setBreaksEnabled(true)
|
||||||
->text($processedDescription);
|
->text($processedDescription);
|
||||||
|
$processedDescription = sanitize_html($processedDescription);
|
||||||
$processedDescription = '<div class="markdown">'. $processedDescription . '</div>';
|
$processedDescription = '<div class="markdown">'. $processedDescription . '</div>';
|
||||||
|
|
||||||
return $processedDescription;
|
return $processedDescription;
|
||||||
|
|
|
@ -100,13 +100,18 @@ function testReverseSpace2nbsp()
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test reset_quote_tags()
|
* Test sanitize_html().
|
||||||
*/
|
*/
|
||||||
function testResetQuoteTags()
|
function testSanitizeHtml() {
|
||||||
{
|
$input = '< script src="js.js"/>';
|
||||||
$text = '> quote1'. PHP_EOL . ' > quote2 ' . PHP_EOL . 'noquote';
|
$input .= '< script attr>alert(\'xss\');</script>';
|
||||||
$processedText = escape($text);
|
$input .= '<style> * { display: none }</style>';
|
||||||
$reversedText = reset_quote_tags($processedText);
|
$output = escape($input);
|
||||||
$this->assertEquals($text, $reversedText);
|
$input .= '<a href="#" onmouseHover="alert(\'xss\');" attr="tt">link</a>';
|
||||||
|
$output .= '<a href="#" attr="tt">link</a>';
|
||||||
|
$this->assertEquals($output, sanitize_html($input));
|
||||||
|
// Do not touch escaped HTML.
|
||||||
|
$input = escape($input);
|
||||||
|
$this->assertEquals($input, sanitize_html($input));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue