diff --git a/bridges/AcrimedBridge.php b/bridges/AcrimedBridge.php index d0f5abfc..b432c72a 100644 --- a/bridges/AcrimedBridge.php +++ b/bridges/AcrimedBridge.php @@ -1,5 +1,5 @@ update = "2014-05-25"; } + public function collectData(array $param){ - function StripCDATA($string) { - $string = str_replace('', '', $string); - return $string; - } - function ExtractContent($url) { - $html2 = file_get_html($url); - $text = $html2->find('div.texte', 0)->innertext; - return $text; - } - $html = file_get_html('http://www.acrimed.org/spip.php?page=backend') or $this->returnError('Could not request Acrimed.', 404); - $limit = 0; + parent::collectExpandableDatas($param, "http://www.acrimed.org/spip.php?page=backend"); - foreach($html->find('item') as $element) { - if($limit < 10) { - $item = new \Item(); - $item->title = StripCDATA($element->find('title', 0)->innertext); - $item->uri = StripCDATA($element->find('guid', 0)->plaintext); - $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); - $item->content = ExtractContent($item->uri); - $this->items[] = $item; - $limit++; - } } - } + protected function parseRSSItem($newsItem) { + + $hs = new HTMLSanitizer(); + + $namespaces = $newsItem->getNameSpaces(true); + $dc = $newsItem->children($namespaces['dc']); + + $item = new Item(); + $item->uri = trim($newsItem->link); + $item->title = trim($newsItem->title); + $item->timestamp = strtotime($dc->date); + + $articlePage = file_get_html($newsItem->link); + $article = $hs->sanitize($articlePage->find('article.article1', 0)->innertext); + $article = HTMLSanitizer::defaultImageSrcTo($article, "http://www.acrimed.org/"); + + $item->content = $article; + + + return $item; + + } public function getName() { @@ -52,7 +53,6 @@ class AcrimedBridge extends BridgeAbstract{ } public function getCacheDuration(){ - return 3600*2; // 2 hours - // return 0; // 2 hours + return 4800; // 2 hours } } diff --git a/bridges/PinterestBridge.php b/bridges/PinterestBridge.php index 3bb0530c..7df03c8c 100644 --- a/bridges/PinterestBridge.php +++ b/bridges/PinterestBridge.php @@ -51,7 +51,8 @@ class PinterestBridge extends BridgeAbstract{ $this->username = $param['u']; $this->board = $param['b']; - $html = file_get_html($this->getURI().'/'.urlencode($this->username).'/'.urlencode($this->board)) or $this->returnError('Could not request Pinterest.', 404); + $html = file_get_html($this->getURI().'/'.urlencode($this->username).'/'.urlencode($this->board)) or $this->returnError('Username and/or board not found', 404); + } else if (isset($param['q'])) { $this->query = $param['q']; @@ -76,13 +77,17 @@ class PinterestBridge extends BridgeAbstract{ if (isset($this->query)) { - $avatar = $div->find('img.creditImg', 0); - $username = $div->find('span.creditName', 0); - $board = $div->find('span.creditTitle', 0); + $avatar = $div->find('div.creditImg', 0)->find('img', 0); + $avatar = $avatar->getAttribute('data-src'); + $avatar = str_replace("\\", "", $avatar); + + + $username = $div->find('div.creditName', 0); + $board = $div->find('div.creditTitle', 0); $item->username =$username->innertext; $item->fullname = $board->innertext; - $item->avatar = $avatar->getAttribute('src'); + $item->avatar = $avatar; $item->content .= '
'.$item->username.''; $item->content .= '
'.$item->fullname; diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 7147a23e..6869ac00 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -31,10 +31,10 @@ class YoutubeBridge extends BridgeAbstract { $this->parameters['By channel id'] = '[ { - "type" : "number", + "type" : "text", "identifier" : "c", "name" : "channel id", - "exampleValue" : "15", + "exampleValue" : "test", "required" : "required" } ]'; diff --git a/formats/AtomFormat.php b/formats/AtomFormat.php index 74e78db6..303e3866 100644 --- a/formats/AtomFormat.php +++ b/formats/AtomFormat.php @@ -23,7 +23,7 @@ class AtomFormat extends FormatAbstract{ $extraInfos = $this->getExtraInfos(); $title = xml_encode($extraInfos['name']); $uri = $extraInfos['uri']; - $icon = xml_encode('http://g.etfv.co/'. $uri .'?icon.jpg'); + $icon = xml_encode('http://icons.better-idea.org/icon?url='. $uri .'&size=64'); $uri = xml_encode($uri); $entries = ''; diff --git a/lib/HTMLUtils.php b/lib/HTMLUtils.php index 4e6b5e24..1e2f0f78 100644 --- a/lib/HTMLUtils.php +++ b/lib/HTMLUtils.php @@ -158,16 +158,16 @@ class HTMLSanitizer { var $onlyKeepText; - public static $DEFAULT_CLEAR_TAGS = ["script", "iframe"]; + public static $DEFAULT_CLEAR_TAGS = ["script", "iframe", "input", "form"]; public static $KEPT_ATTRIBUTES = ["title", "href", "src"]; - const ONLY_TEXT = null; + public static $ONLY_TEXT = []; - function __construct($tags_to_remove = HTMLSanitizer::DEFAULT_CLEAR_TAGS, $kept_attributes = HTMLSanitizer::KEPT_ATTRIBUTES, $only_keep_text = HTMLSanitizer::ONLY_TEXT) { + function __construct($tags_to_remove = null, $kept_attributes = null, $only_keep_text = null) { - $this->tagsToRemove = $tags_to_remove; - $this->keptAttributes = $kept_attributes; - $this->onlyKeepText = $only_keep_text; + $this->tagsToRemove = $tags_to_remove == null ? HTMLSanitizer::$DEFAULT_CLEAR_TAGS : $tags_to_remove; + $this->keptAttributes = $kept_attributes == null ? HTMLSanitizer::$KEPT_ATTRIBUTES : $kept_attributes; + $this->onlyKeepText = $only_keep_text == null ? HTMLSanitizer::$ONLY_TEXT : $only_keep_text; } @@ -175,7 +175,7 @@ class HTMLSanitizer { $htmlContent = str_get_html($textToSanitize); - foreach($htmlContent->find('*[!j_ai_pas_trouve_comment_tout_demander]') as $element) { + foreach($htmlContent->find('*[!b38fd2b1fe7f4747d6b1c1254ccd055e]') as $element) { if(in_array($element->tag, $this->onlyKeepText)) { $element->outertext = $element->plaintext; } else if(in_array($element->tag, $this->tagsToRemove)) { @@ -192,10 +192,12 @@ class HTMLSanitizer { } public static function defaultImageSrcTo($content, $server) { foreach($content->find('img') as $image) { - if(strpos($image->src, '/')==0) { + + if(strpos($image->src, "http") == NULL && strpos($image->src, "//") == NULL && strpos($image->src, "data:") == NULL) { $image->src = $server.$image->src; - } + } } + return $content; } }