' * @return cleaned string, e.g. 'foobar' */ function stripRecursiveHTMLSection($string, $tag_name, $tag_start){ $open_tag = '<' . $tag_name; $close_tag = ''; $close_tag_length = strlen($close_tag); if(strpos($tag_start, $open_tag) === 0) { while(strpos($string, $tag_start) !== false) { $max_recursion = 100; $section_to_remove = null; $section_start = strpos($string, $tag_start); $search_offset = $section_start; do { $max_recursion--; $section_end = strpos($string, $close_tag, $search_offset); $search_offset = $section_end + $close_tag_length; $section_to_remove = substr($string, $section_start, $section_end - $section_start + $close_tag_length); $open_tag_count = substr_count($section_to_remove, $open_tag); $close_tag_count = substr_count($section_to_remove, $close_tag); } while ($open_tag_count > $close_tag_count && $max_recursion > 0); $string = str_replace($section_to_remove, '', $string); } } return $string; } /** * Convert Markdown tags into HTML tags. Only a subset of the Markdown syntax is implemented. * @param $string input string in Markdown format * @return output string in HTML format */ function markdownToHtml($string) { //For more details about how these regex work: // https://github.com/RSS-Bridge/rss-bridge/pull/802#discussion_r216138702 // Images: https://regex101.com/r/JW9Evr/1 // Links: https://regex101.com/r/eRGVe7/1 // Bold: https://regex101.com/r/2p40Y0/1 // Italic: https://regex101.com/r/xJkET9/1 // Separator: https://regex101.com/r/ZBEqFP/1 // Plain URL: https://regex101.com/r/2JHYwb/1 // Site name: https://regex101.com/r/qIuKYE/1 $string = preg_replace('/\!\[([^\]]+)\]$([^$ ]+)(?: [^\)]+)?\)/', '

', $string); $string = preg_replace('/\[([^\]]+)\]$([^$]+)\)/', '$1', $string); $string = preg_replace('/\*\*(.*)\*\*/U', '$1', $string); $string = preg_replace('/\*(.*)\*/U', '$1', $string); $string = preg_replace('/__(.*)__/U', '$1', $string); $string = preg_replace('/_(.*)_/U', '$1', $string); $string = preg_replace('/[-]{6,99}/', '

', $string); $string = str_replace(' ', '
', $string); $string = preg_replace('/([^"])(https?:\/\/[^ "<]+)([^"])/', '$1$2$3', $string . ' '); $string = preg_replace('/([^"\/])(www\.[^ "<]+)([^"])/', '$1$2$3', $string . ' '); //As the regex are not perfect, we need to fix and that are introduced in URLs // Fixup regex : https://regex101.com/r/NTRPf6/1 // Fixup regex : https://regex101.com/r/aNklRp/1 $count = 1; while($count > 0) { $string = preg_replace('/ (src|href)="([^"]+)([^"]+)"/U', ' $1="$2_$3"', $string, -1, $count); } $count = 1; while($count > 0) { $string = preg_replace('/ (src|href)="([^"]+)<\/i>([^"]+)"/U', ' $1="$2_$3"', $string, -1, $count); } return '
' . trim($string) . '
'; }