Markdown: don't escape content + sanitize sensible tags

Instead of trying to fix broken content for Markdown parsing, parse it unescaped, then sanatize sensible tags such as scripts, etc.
This commit is contained in:
ArthurHoaro 2016-02-19 19:37:13 +01:00
parent bfec695df1
commit 2925687e1e
3 changed files with 58 additions and 16 deletions

View file

@ -62,13 +62,29 @@ function endsWith($haystack, $needle, $case=true)
} }
/** /**
* htmlspecialchars wrapper * Htmlspecialchars wrapper
*
* @param string $str the string to escape.
*
* @return string escaped.
*/ */
function escape($str) function escape($str)
{ {
return htmlspecialchars($str, ENT_COMPAT, 'UTF-8', false); return htmlspecialchars($str, ENT_COMPAT, 'UTF-8', false);
} }
/**
* Reverse the escape function.
*
* @param string $str the string to unescape.
*
* @return string unescaped string.
*/
function unescape($str)
{
return htmlspecialchars_decode($str);
}
/** /**
* Link sanitization before templating * Link sanitization before templating
*/ */

View file

@ -117,23 +117,43 @@ function reverse_space2nbsp($description)
} }
/** /**
* Remove '>' at start of line auto generated by Shaarli core system * Remove dangerous HTML tags (tags, iframe, etc.).
* to allow markdown blockquotes. * Doesn't affect <code> content (already escaped by Parsedown).
* *
* @param string $description input description text. * @param string $description input description text.
* *
* @return string $description without HTML links. * @return string given string escaped.
*/ */
function reset_quote_tags($description) function sanitize_html($description)
{ {
return preg_replace('/^( *)&gt; /m', '$1> ', $description); $escapeTags = array(
'script',
'style',
'link',
'iframe',
'frameset',
'frame',
);
foreach ($escapeTags as $tag) {
$description = preg_replace_callback(
'#<\s*'. $tag .'[^>]*>(.*</\s*'. $tag .'[^>]*>)?#is',
function ($match) { return escape($match[0]); },
$description);
}
$description = preg_replace(
'#(<[^>]+)on[a-z]*="[^"]*"#is',
'$1',
$description);
return $description;
} }
/** /**
* Render shaare contents through Markdown parser. * Render shaare contents through Markdown parser.
* 1. Remove HTML generated by Shaarli core. * 1. Remove HTML generated by Shaarli core.
* 2. Generate markdown descriptions. * 2. Reverse the escape function.
* 3. Wrap description in 'markdown' CSS class. * 3. Generate markdown descriptions.
* 4. Sanitize sensible HTML tags for security.
* 5. Wrap description in 'markdown' CSS class.
* *
* @param string $description input description text. * @param string $description input description text.
* *
@ -147,11 +167,12 @@ function process_markdown($description)
$processedDescription = reverse_text2clickable($processedDescription); $processedDescription = reverse_text2clickable($processedDescription);
$processedDescription = reverse_nl2br($processedDescription); $processedDescription = reverse_nl2br($processedDescription);
$processedDescription = reverse_space2nbsp($processedDescription); $processedDescription = reverse_space2nbsp($processedDescription);
$processedDescription = reset_quote_tags($processedDescription); $processedDescription = unescape($processedDescription);
$processedDescription = $parsedown $processedDescription = $parsedown
->setMarkupEscaped(false) ->setMarkupEscaped(false)
->setBreaksEnabled(true) ->setBreaksEnabled(true)
->text($processedDescription); ->text($processedDescription);
$processedDescription = sanitize_html($processedDescription);
$processedDescription = '<div class="markdown">'. $processedDescription . '</div>'; $processedDescription = '<div class="markdown">'. $processedDescription . '</div>';
return $processedDescription; return $processedDescription;

View file

@ -100,13 +100,18 @@ function testReverseSpace2nbsp()
} }
/** /**
* Test reset_quote_tags() * Test sanitize_html().
*/ */
function testResetQuoteTags() function testSanitizeHtml() {
{ $input = '< script src="js.js"/>';
$text = '> quote1'. PHP_EOL . ' > quote2 ' . PHP_EOL . 'noquote'; $input .= '< script attr>alert(\'xss\');</script>';
$processedText = escape($text); $input .= '<style> * { display: none }</style>';
$reversedText = reset_quote_tags($processedText); $output = escape($input);
$this->assertEquals($text, $reversedText); $input .= '<a href="#" onmouseHover="alert(\'xss\');" attr="tt">link</a>';
$output .= '<a href="#" attr="tt">link</a>';
$this->assertEquals($output, sanitize_html($input));
// Do not touch escaped HTML.
$input = escape($input);
$this->assertEquals($input, sanitize_html($input));
} }
} }