From f09dedf16bf952a0f690660bac419c14e0f92c52 Mon Sep 17 00:00:00 2001 From: Teromene Date: Fri, 26 Feb 2016 14:58:03 +0000 Subject: [PATCH 1/8] Corrected PinterestBridge. --- bridges/PinterestBridge.php | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/bridges/PinterestBridge.php b/bridges/PinterestBridge.php index 3bb0530c..7d06fee7 100644 --- a/bridges/PinterestBridge.php +++ b/bridges/PinterestBridge.php @@ -51,7 +51,8 @@ class PinterestBridge extends BridgeAbstract{ $this->username = $param['u']; $this->board = $param['b']; - $html = file_get_html($this->getURI().'/'.urlencode($this->username).'/'.urlencode($this->board)) or $this->returnError('Could not request Pinterest.', 404); + $html = file_get_html($this->getURI().'/'.urlencode($this->username).'/'.urlencode($this->board)) or $this->returnError('Username and/or board not found', 404); + } else if (isset($param['q'])) { $this->query = $param['q']; @@ -76,13 +77,17 @@ class PinterestBridge extends BridgeAbstract{ if (isset($this->query)) { - $avatar = $div->find('img.creditImg', 0); - $username = $div->find('span.creditName', 0); - $board = $div->find('span.creditTitle', 0); + $avatar = $div->find('div.creditImg', 0)->find('img', 0); + $avatar = $avatar->getAttribute('data-src'); + $avatar = str_replace("\\", "", $avatar); + + + $username = $div->find('div.creditName', 0); + $board = $div->find('div.creditTitle', 0); $item->username =$username->innertext; $item->fullname = $board->innertext; - $item->avatar = $avatar->getAttribute('src'); + $item->avatar = $avatar; $item->content .= '
'.$item->username.''; $item->content .= '
'.$item->fullname; @@ -111,6 +116,6 @@ class PinterestBridge extends BridgeAbstract{ } public function getCacheDuration(){ - return 3600; + return 0; } } From 04ec53010c6373f99eec400c5ce010c0d818c7a8 Mon Sep 17 00:00:00 2001 From: Teromene Date: Fri, 26 Feb 2016 15:10:26 +0000 Subject: [PATCH 2/8] Corrected cache time. --- bridges/PinterestBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/PinterestBridge.php b/bridges/PinterestBridge.php index 7d06fee7..7df03c8c 100644 --- a/bridges/PinterestBridge.php +++ b/bridges/PinterestBridge.php @@ -116,6 +116,6 @@ class PinterestBridge extends BridgeAbstract{ } public function getCacheDuration(){ - return 0; + return 3600; } } From 036ea43e4fb2cb21de7893ae3a37f8525986a8f9 Mon Sep 17 00:00:00 2001 From: Teromene Date: Fri, 26 Feb 2016 18:17:48 +0000 Subject: [PATCH 3/8] Modified HTMLUtils, corrected few bugs. --- lib/HTMLUtils.php | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/lib/HTMLUtils.php b/lib/HTMLUtils.php index 4e6b5e24..cb1bc0b6 100644 --- a/lib/HTMLUtils.php +++ b/lib/HTMLUtils.php @@ -158,16 +158,16 @@ class HTMLSanitizer { var $onlyKeepText; - public static $DEFAULT_CLEAR_TAGS = ["script", "iframe"]; + public static $DEFAULT_CLEAR_TAGS = ["script", "iframe", "input", "form"]; public static $KEPT_ATTRIBUTES = ["title", "href", "src"]; - const ONLY_TEXT = null; + public static $ONLY_TEXT = []; - function __construct($tags_to_remove = HTMLSanitizer::DEFAULT_CLEAR_TAGS, $kept_attributes = HTMLSanitizer::KEPT_ATTRIBUTES, $only_keep_text = HTMLSanitizer::ONLY_TEXT) { + function __construct($tags_to_remove = null, $kept_attributes = null, $only_keep_text = null) { - $this->tagsToRemove = $tags_to_remove; - $this->keptAttributes = $kept_attributes; - $this->onlyKeepText = $only_keep_text; + $this->tagsToRemove = $tags_to_remove == null ? HTMLSanitizer::$DEFAULT_CLEAR_TAGS : $tags_to_remove; + $this->keptAttributes = $kept_attributes == null ? HTMLSanitizer::$KEPT_ATTRIBUTES : $kept_attributes; + $this->onlyKeepText = $only_keep_text == null ? HTMLSanitizer::$ONLY_TEXT : $only_keep_text; } @@ -175,7 +175,7 @@ class HTMLSanitizer { $htmlContent = str_get_html($textToSanitize); - foreach($htmlContent->find('*[!j_ai_pas_trouve_comment_tout_demander]') as $element) { + foreach($htmlContent->find('*[!vive_les_chapeaux]') as $element) { if(in_array($element->tag, $this->onlyKeepText)) { $element->outertext = $element->plaintext; } else if(in_array($element->tag, $this->tagsToRemove)) { @@ -192,10 +192,12 @@ class HTMLSanitizer { } public static function defaultImageSrcTo($content, $server) { foreach($content->find('img') as $image) { - if(strpos($image->src, '/')==0) { + + if(strpos($image->src, "http") == NULL && strpos($image->src, "//") == NULL && strpos($image->src, "data:") == NULL) { $image->src = $server.$image->src; - } + } } + return $content; } } From 1c1bcc20ffb1623fc426b3ad7199155647a5f350 Mon Sep 17 00:00:00 2001 From: Teromene Date: Fri, 26 Feb 2016 18:31:53 +0000 Subject: [PATCH 4/8] Changed the antiselector to be less likely to happen. --- lib/HTMLUtils.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/HTMLUtils.php b/lib/HTMLUtils.php index cb1bc0b6..1e2f0f78 100644 --- a/lib/HTMLUtils.php +++ b/lib/HTMLUtils.php @@ -175,7 +175,7 @@ class HTMLSanitizer { $htmlContent = str_get_html($textToSanitize); - foreach($htmlContent->find('*[!vive_les_chapeaux]') as $element) { + foreach($htmlContent->find('*[!b38fd2b1fe7f4747d6b1c1254ccd055e]') as $element) { if(in_array($element->tag, $this->onlyKeepText)) { $element->outertext = $element->plaintext; } else if(in_array($element->tag, $this->tagsToRemove)) { From e064b5fe28d5ee5b6808e0ed2aef14c0d0672653 Mon Sep 17 00:00:00 2001 From: Teromene Date: Fri, 26 Feb 2016 18:41:35 +0000 Subject: [PATCH 5/8] Corrected no content in AcrimedBridge, modified to make it use RSS-Expander. --- bridges/AcrimedBridge.php | 52 +++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/bridges/AcrimedBridge.php b/bridges/AcrimedBridge.php index d0f5abfc..bc40aabe 100644 --- a/bridges/AcrimedBridge.php +++ b/bridges/AcrimedBridge.php @@ -1,5 +1,5 @@ update = "2014-05-25"; } + public function collectData(array $param){ - function StripCDATA($string) { - $string = str_replace('', '', $string); - return $string; - } - function ExtractContent($url) { - $html2 = file_get_html($url); - $text = $html2->find('div.texte', 0)->innertext; - return $text; - } - $html = file_get_html('http://www.acrimed.org/spip.php?page=backend') or $this->returnError('Could not request Acrimed.', 404); - $limit = 0; + parent::collectExpandableDatas($param, "http://www.acrimed.org/spip.php?page=backend"); - foreach($html->find('item') as $element) { - if($limit < 10) { - $item = new \Item(); - $item->title = StripCDATA($element->find('title', 0)->innertext); - $item->uri = StripCDATA($element->find('guid', 0)->plaintext); - $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); - $item->content = ExtractContent($item->uri); - $this->items[] = $item; - $limit++; - } } - } + protected function parseRSSItem($newsItem) { + + $hs = new HTMLSanitizer(); + + $namespaces = $newsItem->getNameSpaces(true); + $dc = $newsItem->children($namespaces['dc']); + + $item = new Item(); + $item->uri = trim($newsItem->link); + $item->title = trim($newsItem->title); + $item->timestamp = strtotime($dc->date); + + $articlePage = file_get_html($newsItem->link); + $article = $hs->sanitize($articlePage->find('article.article1', 0)->innertext); + $article = HTMLSanitizer::defaultImageSrcTo($article, "http://www.acrimed.org/"); + + $item->content = $article; + + + return $item; + + } public function getName() { @@ -52,7 +53,6 @@ class AcrimedBridge extends BridgeAbstract{ } public function getCacheDuration(){ - return 3600*2; // 2 hours - // return 0; // 2 hours + return 0; // 2 hours } } From d92dc71fae41a9e95112c275c0dd1d33b07a36d2 Mon Sep 17 00:00:00 2001 From: Teromene Date: Fri, 26 Feb 2016 18:42:52 +0000 Subject: [PATCH 6/8] Fixed cache time. --- bridges/AcrimedBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/AcrimedBridge.php b/bridges/AcrimedBridge.php index bc40aabe..b432c72a 100644 --- a/bridges/AcrimedBridge.php +++ b/bridges/AcrimedBridge.php @@ -53,6 +53,6 @@ class AcrimedBridge extends RssExpander{ } public function getCacheDuration(){ - return 0; // 2 hours + return 4800; // 2 hours } } From d0defc62833c1b41999d2f8791c44bf6dd2f2940 Mon Sep 17 00:00:00 2001 From: Alexis Degrugillier Date: Sat, 27 Feb 2016 08:36:14 +0100 Subject: [PATCH 7/8] Update youtube bridge Before, the channel id was supposed to be a number. But Youtube changed how they store a channel id. It's no longer a number, it's a string. Now, user can enter a text string instead of a number. See the example with this channel id: UC9fGq2-6FaftcegcIadLf6A --- bridges/YoutubeBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 7147a23e..6869ac00 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -31,10 +31,10 @@ class YoutubeBridge extends BridgeAbstract { $this->parameters['By channel id'] = '[ { - "type" : "number", + "type" : "text", "identifier" : "c", "name" : "channel id", - "exampleValue" : "15", + "exampleValue" : "test", "required" : "required" } ]'; From b0c15c337768164d950933ae50a9c739ccdc5efd Mon Sep 17 00:00:00 2001 From: Albirew Date: Sun, 28 Feb 2016 11:25:56 +0100 Subject: [PATCH 8/8] Alternative for g.etfv.co favicon services g.etfv.co favicon services seems to be down since some time I replaced it with another open source project: besticon sauce: https://github.com/mat/besticon --- formats/AtomFormat.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/formats/AtomFormat.php b/formats/AtomFormat.php index 74e78db6..303e3866 100644 --- a/formats/AtomFormat.php +++ b/formats/AtomFormat.php @@ -23,7 +23,7 @@ class AtomFormat extends FormatAbstract{ $extraInfos = $this->getExtraInfos(); $title = xml_encode($extraInfos['name']); $uri = $extraInfos['uri']; - $icon = xml_encode('http://g.etfv.co/'. $uri .'?icon.jpg'); + $icon = xml_encode('http://icons.better-idea.org/icon?url='. $uri .'&size=64'); $uri = xml_encode($uri); $entries = '';