[core] Use Parsedown for Markdown parsing (#1783)
This commit is contained in:
parent
fe166d0216
commit
ff98efe8dc
7 changed files with 1744 additions and 45 deletions
|
@ -235,7 +235,7 @@ https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8
|
||||||
* [xurxof](https://github.com/xurxof)
|
* [xurxof](https://github.com/xurxof)
|
||||||
* [yardenac](https://github.com/yardenac)
|
* [yardenac](https://github.com/yardenac)
|
||||||
* [ZeNairolf](https://github.com/ZeNairolf)
|
* [ZeNairolf](https://github.com/ZeNairolf)
|
||||||
|
|
||||||
Licenses
|
Licenses
|
||||||
===
|
===
|
||||||
|
|
||||||
|
@ -243,6 +243,7 @@ The source code for RSS-Bridge is [Public Domain](UNLICENSE).
|
||||||
|
|
||||||
RSS-Bridge uses third party libraries with their own license:
|
RSS-Bridge uses third party libraries with their own license:
|
||||||
|
|
||||||
|
* [`Parsedown`](https://github.com/erusev/parsedown) licensed under the [MIT License](http://opensource.org/licenses/MIT)
|
||||||
* [`PHP Simple HTML DOM Parser`](http://simplehtmldom.sourceforge.net/) licensed under the [MIT License](http://opensource.org/licenses/MIT)
|
* [`PHP Simple HTML DOM Parser`](http://simplehtmldom.sourceforge.net/) licensed under the [MIT License](http://opensource.org/licenses/MIT)
|
||||||
* [`php-urljoin`](https://github.com/fluffy-critter/php-urljoin) licensed under the [MIT License](http://opensource.org/licenses/MIT)
|
* [`php-urljoin`](https://github.com/fluffy-critter/php-urljoin) licensed under the [MIT License](http://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
|
|
|
@ -100,7 +100,9 @@ class NyaaTorrentsBridge extends BridgeAbstract {
|
||||||
|
|
||||||
//Retrieve data from page contents
|
//Retrieve data from page contents
|
||||||
$item_title = str_replace(' :: Nyaa', '', $item_html->find('title', 0)->plaintext);
|
$item_title = str_replace(' :: Nyaa', '', $item_html->find('title', 0)->plaintext);
|
||||||
$item_desc = str_get_html(markdownToHtml($item_html->find('#torrent-description', 0)->innertext));
|
$item_desc = str_get_html(
|
||||||
|
markdownToHtml(html_entity_decode($item_html->find('#torrent-description', 0)->innertext))
|
||||||
|
);
|
||||||
$item_author = extractFromDelimiters($item_html->outertext, 'href="/user/', '"');
|
$item_author = extractFromDelimiters($item_html->outertext, 'href="/user/', '"');
|
||||||
$item_date = intval(extractFromDelimiters($item_html->outertext, 'data-timestamp="', '"'));
|
$item_date = intval(extractFromDelimiters($item_html->outertext, 'data-timestamp="', '"'));
|
||||||
|
|
||||||
|
|
|
@ -27,16 +27,13 @@ class RainbowSixSiegeBridge extends BridgeAbstract {
|
||||||
$uri = $uri . $jsonItem['button']['buttonUrl'];
|
$uri = $uri . $jsonItem['button']['buttonUrl'];
|
||||||
|
|
||||||
$thumbnail = '<img src="' . $jsonItem['thumbnail']['url'] . '" alt="Thumbnail">';
|
$thumbnail = '<img src="' . $jsonItem['thumbnail']['url'] . '" alt="Thumbnail">';
|
||||||
$content = $thumbnail . '<br />' . $jsonItem['content'];
|
$content = $thumbnail . '<br />' . markdownToHtml($jsonItem['content']);
|
||||||
|
|
||||||
// Line breaks
|
|
||||||
$content = preg_replace("/\r\n|\r|\n/", '<br/>', $content);
|
|
||||||
|
|
||||||
$item = array();
|
$item = array();
|
||||||
$item['uri'] = $uri;
|
$item['uri'] = $uri;
|
||||||
$item['id'] = $jsonItem['id'];
|
$item['id'] = $jsonItem['id'];
|
||||||
$item['title'] = $jsonItem['title'];
|
$item['title'] = $jsonItem['title'];
|
||||||
$item['content'] = markdownToHtml($content);
|
$item['content'] = $content;
|
||||||
$item['timestamp'] = strtotime($jsonItem['date']);
|
$item['timestamp'] = strtotime($jsonItem['date']);
|
||||||
|
|
||||||
$this->items[] = $item;
|
$this->items[] = $item;
|
||||||
|
|
40
lib/html.php
40
lib/html.php
|
@ -195,7 +195,7 @@ function stripRecursiveHTMLSection($string, $tag_name, $tag_start){
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convert Markdown into HTML. Only a subset of the Markdown syntax is implemented.
|
* Convert Markdown into HTML with Parsedown.
|
||||||
*
|
*
|
||||||
* @link https://daringfireball.net/projects/markdown/ Markdown
|
* @link https://daringfireball.net/projects/markdown/ Markdown
|
||||||
* @link https://github.github.com/gfm/ GitHub Flavored Markdown Spec
|
* @link https://github.github.com/gfm/ GitHub Flavored Markdown Spec
|
||||||
|
@ -205,40 +205,6 @@ function stripRecursiveHTMLSection($string, $tag_name, $tag_start){
|
||||||
*/
|
*/
|
||||||
function markdownToHtml($string) {
|
function markdownToHtml($string) {
|
||||||
|
|
||||||
//For more details about how these regex work:
|
$Parsedown = new Parsedown();
|
||||||
// https://github.com/RSS-Bridge/rss-bridge/pull/802#discussion_r216138702
|
return $Parsedown->text($string);
|
||||||
// Images: https://regex101.com/r/JW9Evr/2
|
|
||||||
// Links: https://regex101.com/r/eRGVe7/1
|
|
||||||
// Bold: https://regex101.com/r/2p40Y0/1
|
|
||||||
// Italic: https://regex101.com/r/xJkET9/1
|
|
||||||
// Separator: https://regex101.com/r/ZBEqFP/1
|
|
||||||
// Plain URL: https://regex101.com/r/2JHYwb/1
|
|
||||||
// Site name: https://regex101.com/r/qIuKYE/1
|
|
||||||
|
|
||||||
$string = preg_replace('/\!\[([^\]]*)\]\(([^\) ]+)(?: [^\)]+)?\)/', '<img src="$2" alt="$1" />', $string);
|
|
||||||
$string = preg_replace('/\[([^\]]+)\]\(([^\)]+)\)/', '<a href="$2">$1</a>', $string);
|
|
||||||
$string = preg_replace('/\*\*(.*)\*\*/U', '<b>$1</b>', $string);
|
|
||||||
$string = preg_replace('/\*(.*)\*/U', '<i>$1</i>', $string);
|
|
||||||
$string = preg_replace('/__(.*)__/U', '<b>$1</b>', $string);
|
|
||||||
$string = preg_replace('/_(.*)_/U', '<i>$1</i>', $string);
|
|
||||||
$string = preg_replace('/[-]{6,99}/', '<hr />', $string);
|
|
||||||
$string = str_replace(' ', '<br />', $string);
|
|
||||||
$string = preg_replace('/([^"])(https?:\/\/[^ "<]+)([^"])/', '$1<a href="$2">$2</a>$3', $string . ' ');
|
|
||||||
$string = preg_replace('/([^"\/])(www\.[^ "<]+)([^"])/', '$1<a href="http://$2">$2</a>$3', $string . ' ');
|
|
||||||
|
|
||||||
//As the regex are not perfect, we need to fix <i> and </i> that are introduced in URLs
|
|
||||||
// Fixup regex <i>: https://regex101.com/r/NTRPf6/1
|
|
||||||
// Fixup regex </i>: https://regex101.com/r/aNklRp/1
|
|
||||||
|
|
||||||
$count = 1;
|
|
||||||
while($count > 0) {
|
|
||||||
$string = preg_replace('/ (src|href)="([^"]+)<i>([^"]+)"/U', ' $1="$2_$3"', $string, -1, $count);
|
|
||||||
}
|
|
||||||
|
|
||||||
$count = 1;
|
|
||||||
while($count > 0) {
|
|
||||||
$string = preg_replace('/ (src|href)="([^"]+)<\/i>([^"]+)"/U', ' $1="$2_$3"', $string, -1, $count);
|
|
||||||
}
|
|
||||||
|
|
||||||
return '<div>' . trim($string) . '</div>';
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -82,5 +82,6 @@ require_once PATH_LIB . 'contents.php';
|
||||||
|
|
||||||
// Vendor
|
// Vendor
|
||||||
define('MAX_FILE_SIZE', 10000000); /* Allow larger files for simple_html_dom */
|
define('MAX_FILE_SIZE', 10000000); /* Allow larger files for simple_html_dom */
|
||||||
require_once PATH_LIB_VENDOR . 'simplehtmldom/simple_html_dom.php';
|
require_once PATH_LIB_VENDOR . 'parsedown/Parsedown.php';
|
||||||
require_once PATH_LIB_VENDOR . 'php-urljoin/src/urljoin.php';
|
require_once PATH_LIB_VENDOR . 'php-urljoin/src/urljoin.php';
|
||||||
|
require_once PATH_LIB_VENDOR . 'simplehtmldom/simple_html_dom.php';
|
||||||
|
|
20
vendor/parsedown/LICENSE.txt
vendored
Normal file
20
vendor/parsedown/LICENSE.txt
vendored
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
The MIT License (MIT)
|
||||||
|
|
||||||
|
Copyright (c) 2013-2018 Emanuil Rusev, erusev.com
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
|
the Software without restriction, including without limitation the rights to
|
||||||
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
1712
vendor/parsedown/Parsedown.php
vendored
Normal file
1712
vendor/parsedown/Parsedown.php
vendored
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue