From 2aa9b8f0265ebad443eba32c8248bb95dd8c0cb9 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sat, 3 Sep 2016 22:17:36 +0200 Subject: [PATCH 01/17] [Bridge] Extend RssExpander to load ATOM formats --- lib/Bridge.php | 53 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index 2a83630f..e889d101 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -605,9 +605,16 @@ abstract class RssExpander extends HttpCachingBridgeAbstract { $rssContent = simplexml_load_string($content); $this->debugMessage('loaded RSS from ' . $name); - // TODO insert RSS format detection - // For now we always assume RSS 2.0 - $this->collect_RSS_2_0_data($rssContent); + + if(isset($rssContent->channel[0])){ // RSS format + // TODO insert RSS format detection + // For now we always assume RSS 2.0 + $this->collect_RSS_2_0_data($rssContent); + } elseif(isset($rssContent->entry[0])){ // ATOM format + $this->collect_ATOM_data($rssContent); + } else { // Unknown format + $this->returnServerError('The feed format is unknown!'); + } } protected function collect_RSS_2_0_data($rssContent){ @@ -620,6 +627,14 @@ abstract class RssExpander extends HttpCachingBridgeAbstract { } } + protected function collect_ATOM_data($content){ + $this->load_ATOM_feed_data($content); + foreach($content->entry as $item){ + $this->debugMessage('parsing item ' . var_export($item, true)); + $this->items[] = $this->parseRSSItem($item); + } + } + protected function RSS_2_0_time_to_timestamp($item){ return DateTime::createFromFormat('D, d M Y H:i:s e', $item->pubDate)->getTimestamp(); } @@ -631,6 +646,38 @@ abstract class RssExpander extends HttpCachingBridgeAbstract { $this->description = trim($rssContent->description); } + protected function load_ATOM_feed_data($content){ + $this->name = $content->title; + + // Find most best link (only one, or first of 'alternate') + if(!isset($content->link)){ + $this->uri = ''; + } elseif (count($content->link) === 1){ + $this->uri = $content->link[0]['href']; + } else { + $this->uri = ''; + foreach($content->link as $link){ + if(strtolower($link['rel']) === 'alternate'){ + $this->uri = $link['rel']; + break; + } + } + } + + if(isset($content->subtitle)) + $this->description = $content->subtitle; + } + + protected function parseATOMItem($feedItem){ + $item = array(); + if(isset($feedItem->id)) $item['uri'] = $feedItem->id; + if(isset($feedItem->title)) $item['title'] = $feedItem->title; + if(isset($feedItem->updated)) $item['timestamp'] = strtotime($feedItem->updated); + if(isset($feedItem->author)) $item['author'] = $feedItem->author->name; + if(isset($feedItem->content)) $item['content'] = $feedItem->content; + return $item; + } + /** * Method should return, from a source RSS item given by lastRSS, one of our Items objects * @param $item the input rss item From 0e5775012ff898074a39f4a9905a243968e16c5a Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 4 Sep 2016 11:01:13 +0200 Subject: [PATCH 02/17] [Bridge] Rename parameter 'name' to 'url' --- lib/Bridge.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index e889d101..ed4a143d 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -591,20 +591,20 @@ abstract class RssExpander extends HttpCachingBridgeAbstract { private $uri; private $description; - public function collectExpandableDatas($name){ - if(empty($name)){ - $this->returnServerError('There is no $name for this RSS expander'); + public function collectExpandableDatas($url){ + if(empty($url)){ + $this->returnServerError('There is no $url for this RSS expander'); } - $this->debugMessage('Loading from ' . $name); + $this->debugMessage('Loading from ' . $url); /* Notice we do not use cache here on purpose: * we want a fresh view of the RSS stream each time */ - $content = $this->getContents($name) or $this->returnServerError('Could not request ' . $name); + $content = $this->getContents($url) or $this->returnServerError('Could not request ' . $url); $rssContent = simplexml_load_string($content); - $this->debugMessage('loaded RSS from ' . $name); + $this->debugMessage('loaded RSS from ' . $url); if(isset($rssContent->channel[0])){ // RSS format // TODO insert RSS format detection From f7819658251743e4bbe407d71f485e8a43611c88 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 4 Sep 2016 11:45:14 +0200 Subject: [PATCH 03/17] [Bridge] Add RSS 0.91 parser --- lib/Bridge.php | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib/Bridge.php b/lib/Bridge.php index ed4a143d..f2e65256 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -678,6 +678,16 @@ abstract class RssExpander extends HttpCachingBridgeAbstract { return $item; } + protected function parseRSS_0_9_1_Item($feedItem){ + $item = array(); + if(isset($feedItem->link)) $item['uri'] = $feedItem->link; + if(isset($feedItem->title)) $item['title'] = $feedItem->title; + // rss 0.91 doesn't support timestamps + // rss 0.91 doesn't support authors + if(isset($feedItem->description)) $item['content'] = $feedItem->description; + return $item; + } + /** * Method should return, from a source RSS item given by lastRSS, one of our Items objects * @param $item the input rss item From 51a3a75aaca6c876e16c57cc376f31753cc73ea9 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 4 Sep 2016 12:32:56 +0200 Subject: [PATCH 04/17] [Bridge] Add RSS 1.0 and 2.0 parser --- lib/Bridge.php | 51 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index f2e65256..f96e0117 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -606,17 +606,33 @@ abstract class RssExpander extends HttpCachingBridgeAbstract { $rssContent = simplexml_load_string($content); $this->debugMessage('loaded RSS from ' . $url); - if(isset($rssContent->channel[0])){ // RSS format - // TODO insert RSS format detection - // For now we always assume RSS 2.0 - $this->collect_RSS_2_0_data($rssContent); - } elseif(isset($rssContent->entry[0])){ // ATOM format + $this->debugMessage('Detecting feed format/version'); + if(isset($rssContent->channel[0])){ + $this->debugMessage('Detected RSS format'); + if(isset($rssContent->item[0])){ + $this->debugMessage('Detected RSS 1.0 format'); + $this->collect_RSS_1_0_data($rssContent); + } else { + $this->debugMessage('Detected RSS 0.9x or 2.0 format'); + $this->collect_RSS_2_0_data($rssContent); + } + } elseif(isset($rssContent->entry[0])){ + $this->debugMessage('Detected ATOM format'); $this->collect_ATOM_data($rssContent); - } else { // Unknown format + } else { + $this->debugMessage('Unknown feed format/version'); $this->returnServerError('The feed format is unknown!'); } } + protected function collect_RSS_1_0_data($rssContent){ + $this->load_RSS_2_0_feed_data($rssContent->channel[0]); + foreach($rssContent->item as $item){ + $this->debugMessage('parsing item ' . var_export($item, true)); + $this->items[] = $this->parseRSSItem($item); + } + } + protected function collect_RSS_2_0_data($rssContent){ $rssContent = $rssContent->channel[0]; $this->debugMessage('RSS content is ===========\n' . var_export($rssContent, true) . '==========='); @@ -688,6 +704,29 @@ abstract class RssExpander extends HttpCachingBridgeAbstract { return $item; } + protected function parseRSS_1_0_Item($feedItem){ + // 1.0 adds optional elements around the 0.91 standard + return $this->parseRSS_0_9_1_Item($feedItem); + } + + protected function parseRSS_2_0_Item($feedItem){ + // Primary data is compatible to 0.91 + $item = $this->parseRSS_0_9_1_Item($feedItem); + if(isset($feedItem->pubDate)) $item['timestamp'] = strtotime($feedItem->pubDate); + if(isset($feedItem->author)){ + $item['author'] = $feedItem->author; + } else { + // Feed might use 'dc' namespace + $namespaces = $feedItem->getNamespaces(true); + if(isset($namespaces['dc'])){ + $dc = $feedItem->children($namespaces['dc']); + if(isset($dc->creator)) + $item['author'] = $dc->creator; + } + } + return $item; + } + /** * Method should return, from a source RSS item given by lastRSS, one of our Items objects * @param $item the input rss item From bf20a2f68736a412ac1c2aa5619e01cea2d71edc Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 4 Sep 2016 12:34:56 +0200 Subject: [PATCH 05/17] [Bridge] Remove uneccesary debug message --- lib/Bridge.php | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index f96e0117..91848440 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -601,10 +601,9 @@ abstract class RssExpander extends HttpCachingBridgeAbstract { /* Notice we do not use cache here on purpose: * we want a fresh view of the RSS stream each time */ - $content = $this->getContents($url) or $this->returnServerError('Could not request ' . $url); - + $content = $this->getContents($url) + or $this->returnServerError('Could not request ' . $url); $rssContent = simplexml_load_string($content); - $this->debugMessage('loaded RSS from ' . $url); $this->debugMessage('Detecting feed format/version'); if(isset($rssContent->channel[0])){ From 8fa0b9660fb9e9a3098617fe2731f2f1ef34e0a0 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 4 Sep 2016 12:40:42 +0200 Subject: [PATCH 06/17] [Bridge] Fix ATOM feed uri detection --- lib/Bridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index 91848440..b96d33b2 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -664,7 +664,7 @@ abstract class RssExpander extends HttpCachingBridgeAbstract { protected function load_ATOM_feed_data($content){ $this->name = $content->title; - // Find most best link (only one, or first of 'alternate') + // Find best link (only one, or first of 'alternate') if(!isset($content->link)){ $this->uri = ''; } elseif (count($content->link) === 1){ @@ -673,7 +673,7 @@ abstract class RssExpander extends HttpCachingBridgeAbstract { $this->uri = ''; foreach($content->link as $link){ if(strtolower($link['rel']) === 'alternate'){ - $this->uri = $link['rel']; + $this->uri = $link['href']; break; } } From 149b64879e78165e6b79882c04aad2c599a69a1b Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 4 Sep 2016 13:05:17 +0200 Subject: [PATCH 07/17] [Bridge] Support 'dc' namespace for RSS 1.0 --- lib/Bridge.php | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index b96d33b2..29683cf8 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -705,7 +705,16 @@ abstract class RssExpander extends HttpCachingBridgeAbstract { protected function parseRSS_1_0_Item($feedItem){ // 1.0 adds optional elements around the 0.91 standard - return $this->parseRSS_0_9_1_Item($feedItem); + $item = $this->parseRSS_0_9_1_Item($feedItem); + + $namespaces = $feedItem->getNamespaces(true); + if(isset($namespaces['dc'])){ + $dc = $feedItem->children($namespaces['dc']); + if(isset($dc->date)) $item['timestamp'] = strtotime($dc->date); + if(isset($dc->creator)) $item['author'] = $dc->creator; + } + + return $item; } protected function parseRSS_2_0_Item($feedItem){ @@ -719,8 +728,7 @@ abstract class RssExpander extends HttpCachingBridgeAbstract { $namespaces = $feedItem->getNamespaces(true); if(isset($namespaces['dc'])){ $dc = $feedItem->children($namespaces['dc']); - if(isset($dc->creator)) - $item['author'] = $dc->creator; + if(isset($dc->creator)) $item['author'] = $dc->creator; } } return $item; From 0770ca1ad110c8ac006880f963f4bc04a8e0a39a Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 4 Sep 2016 13:16:34 +0200 Subject: [PATCH 08/17] [FeedExpanderExample] Add bridge to test behavior --- bridges/FeedExpanderExampleBridge.php | 62 +++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 bridges/FeedExpanderExampleBridge.php diff --git a/bridges/FeedExpanderExampleBridge.php b/bridges/FeedExpanderExampleBridge.php new file mode 100644 index 00000000..09745658 --- /dev/null +++ b/bridges/FeedExpanderExampleBridge.php @@ -0,0 +1,62 @@ + array( + 'version' => array( + 'name' => 'Version', + 'type' => 'list', + 'required' => true, + 'title' => 'Select your feed format/version', + 'defaultValue' => 'RSS 2.0', + 'values' => array( + 'RSS 0.91' => 'rss_0_9_1', + 'RSS 1.0' => 'rss_1_0', + 'RSS 2.0' => 'rss_2_0', + 'ATOM 1.0' => 'atom_1_0' + ) + ) + ) + ); + + public function collectData(){ + switch($this->getInput('version')){ + case 'rss_0_9_1': + parent::collectExpandableDatas('http://static.userland.com/gems/backend/sampleRss.xml'); + break; + case 'rss_1_0': + parent::collectExpandableDatas('http://feeds.nature.com/nature/rss/current?format=xml'); + break; + case 'rss_2_0': + parent::collectExpandableDatas('http://feeds.rssboard.org/rssboard?format=xml'); + break; + case 'atom_1_0': + parent::collectExpandableDatas('http://segfault.linuxmint.com/feed/atom/'); + break; + default: $this->returnClientError('Unknown version ' . $this->getInput('version') . '!'); + } + } + + protected function parseRSSItem($newsItem) { + switch($this->getInput('version')){ + case 'rss_0_9_1': + return $this->parseRSS_0_9_1_Item($newsItem); + break; + case 'rss_1_0': + return $this->parseRSS_1_0_Item($newsItem); + break; + case 'rss_2_0': + return $this->parseRSS_2_0_Item($newsItem); + break; + case 'atom_1_0': + return $this->parseATOMItem($newsItem); + break; + default: $this->returnClientError('Unknown version ' . $this->getInput('version') . '!'); + } + } +} From 1de148bf5d0d6af2f2647dedd27eae0d83286873 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 4 Sep 2016 13:17:28 +0200 Subject: [PATCH 09/17] [TheOatMeal] Use core parser functions --- bridges/TheOatMealBridge.php | 41 ++++++------------------------------ 1 file changed, 7 insertions(+), 34 deletions(-) diff --git a/bridges/TheOatMealBridge.php b/bridges/TheOatMealBridge.php index dc6804aa..6e80fa8f 100644 --- a/bridges/TheOatMealBridge.php +++ b/bridges/TheOatMealBridge.php @@ -10,44 +10,17 @@ class TheOatmealBridge extends RssExpander{ $this->collectExpandableDatas('http://feeds.feedburner.com/oatmealfeed'); } - - /** - * Since the oatmeal produces a weird RSS feed, I have to fix it by loading the items separatly from the feed infos - */ - protected function collect_RSS_2_0_data($rssContent) { - $rssContent->registerXPathNamespace("dc", "http://purl.org/dc/elements/1.1/"); - $rssHeaderContent = $rssContent->channel[0]; - $this->debugMessage("RSS content is ===========\n".var_export($rssHeaderContent, true)."==========="); - $this->load_RSS_2_0_feed_data($rssHeaderContent); - foreach($rssContent->item as $item) { - $this->debugMessage("parsing item ".var_export($item, true)); - $this->items[] = $this->parseRSSItem($item); - } - } - - protected function parseRSSItem($newsItem) { - $namespaces = $newsItem->getNameSpaces(true); - $dc = $newsItem->children($namespaces['dc']); - $rdf = $newsItem->children($namespaces['rdf']); - $item = array(); - $item['title'] = trim($newsItem->title); - $this->debugMessage("browsing Oatmeal item ".var_export($newsItem, true)); - $item['uri']=(string) $newsItem->attributes($namespaces['rdf'])->about; - // now load that uri from cache - $this->debugMessage("now loading page ".$item['uri']); + $item = $this->parseRSS_1_0_Item($newsItem); + $articlePage = $this->get_cached($item['uri']); - $content = $articlePage->find('#comic', 0); - if($content==null) { - $content = $articlePage->find('#blog'); - } - $item['content'] = $content->innertext; + if(is_null($content)) // load alternative + $content = $articlePage->find('#blog', 0); + + if(!is_null($content)) + $item['content'] = $content->innertext; - $this->debugMessage("dc content is ".var_export($dc, true)); - $item['author'] = (string) $dc->creator; - $item['timestamp'] = DateTime::createFromFormat(DateTime::ISO8601, $dc->date)->getTimestamp(); - $this->debugMessage("writtem by ".$item['author']." on ".$item['timestamp']); return $item; } From 39788485ea6ffabc6f725e63161e89747ac90950 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 4 Sep 2016 13:26:17 +0200 Subject: [PATCH 10/17] [Bridge] Rename 'parseRSSItem' to 'parseItem' --- lib/Bridge.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index 29683cf8..689380a7 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -628,7 +628,7 @@ abstract class RssExpander extends HttpCachingBridgeAbstract { $this->load_RSS_2_0_feed_data($rssContent->channel[0]); foreach($rssContent->item as $item){ $this->debugMessage('parsing item ' . var_export($item, true)); - $this->items[] = $this->parseRSSItem($item); + $this->items[] = $this->parseItem($item); } } @@ -638,7 +638,7 @@ abstract class RssExpander extends HttpCachingBridgeAbstract { $this->load_RSS_2_0_feed_data($rssContent); foreach($rssContent->item as $item){ $this->debugMessage('parsing item ' . var_export($item, true)); - $this->items[] = $this->parseRSSItem($item); + $this->items[] = $this->parseItem($item); } } @@ -646,7 +646,7 @@ abstract class RssExpander extends HttpCachingBridgeAbstract { $this->load_ATOM_feed_data($content); foreach($content->entry as $item){ $this->debugMessage('parsing item ' . var_export($item, true)); - $this->items[] = $this->parseRSSItem($item); + $this->items[] = $this->parseItem($item); } } @@ -739,7 +739,7 @@ abstract class RssExpander extends HttpCachingBridgeAbstract { * @param $item the input rss item * @return a RSS-Bridge Item, with (hopefully) the whole content) */ - abstract protected function parseRSSItem($item); + abstract protected function parseItem($item); public function getURI(){ return $this->uri; From 878db6f96ea61441f419be59ea05ed303722570f Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 4 Sep 2016 13:26:40 +0200 Subject: [PATCH 11/17] [bridges] Rename 'parseRSSItem' to 'parseItem' --- bridges/AcrimedBridge.php | 2 +- bridges/FeedExpanderExampleBridge.php | 2 +- bridges/FreenewsBridge.php | 2 +- bridges/Les400CulsBridge.php | 2 +- bridges/TheOatMealBridge.php | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bridges/AcrimedBridge.php b/bridges/AcrimedBridge.php index 2f77a4cf..58d520ae 100644 --- a/bridges/AcrimedBridge.php +++ b/bridges/AcrimedBridge.php @@ -13,7 +13,7 @@ class AcrimedBridge extends RssExpander{ } - protected function parseRSSItem($newsItem) { + protected function parseItem($newsItem) { $hs = new HTMLSanitizer(); diff --git a/bridges/FeedExpanderExampleBridge.php b/bridges/FeedExpanderExampleBridge.php index 09745658..9d4335e5 100644 --- a/bridges/FeedExpanderExampleBridge.php +++ b/bridges/FeedExpanderExampleBridge.php @@ -42,7 +42,7 @@ class FeedExpanderExampleBridge extends RssExpander { } } - protected function parseRSSItem($newsItem) { + protected function parseItem($newsItem) { switch($this->getInput('version')){ case 'rss_0_9_1': return $this->parseRSS_0_9_1_Item($newsItem); diff --git a/bridges/FreenewsBridge.php b/bridges/FreenewsBridge.php index 60fd34e2..da1dcfbf 100644 --- a/bridges/FreenewsBridge.php +++ b/bridges/FreenewsBridge.php @@ -11,7 +11,7 @@ class FreenewsBridge extends RssExpander { parent::collectExpandableDatas(FREENEWS_RSS); } - protected function parseRSSItem($newsItem) { + protected function parseItem($newsItem) { $item = array(); $item['title'] = trim($newsItem->title); $this->debugMessage("item has for title \"".$item['title']."\""); diff --git a/bridges/Les400CulsBridge.php b/bridges/Les400CulsBridge.php index ce56537b..53d59f5a 100644 --- a/bridges/Les400CulsBridge.php +++ b/bridges/Les400CulsBridge.php @@ -11,7 +11,7 @@ class Les400CulsBridge extends RssExpander{ $this->collectExpandableDatas(self::URI.'feeds/'); } - protected function parseRSSItem($newsItem) { + protected function parseItem($newsItem) { $item = array(); $item['title'] = trim((string) $newsItem->title); $this->debugMessage("browsing item ".var_export($newsItem, true)); diff --git a/bridges/TheOatMealBridge.php b/bridges/TheOatMealBridge.php index 6e80fa8f..f8039871 100644 --- a/bridges/TheOatMealBridge.php +++ b/bridges/TheOatMealBridge.php @@ -10,7 +10,7 @@ class TheOatmealBridge extends RssExpander{ $this->collectExpandableDatas('http://feeds.feedburner.com/oatmealfeed'); } - protected function parseRSSItem($newsItem) { + protected function parseItem($newsItem) { $item = $this->parseRSS_1_0_Item($newsItem); $articlePage = $this->get_cached($item['uri']); From 546c0036df342867184a0a87a51458dfd90d09eb Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 4 Sep 2016 13:28:12 +0200 Subject: [PATCH 12/17] [Bridge] Rename class RssExpander to FeedExpander This class no is not RSS only anymore --- lib/Bridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index 689380a7..342c3956 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -585,7 +585,7 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { } } -abstract class RssExpander extends HttpCachingBridgeAbstract { +abstract class FeedExpander extends HttpCachingBridgeAbstract { private $name; private $uri; From 88f52196b8dcc4c42f01500de3fe6d9fda4652e0 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 4 Sep 2016 13:28:55 +0200 Subject: [PATCH 13/17] [bridges] Fix bridges extending RssExpander to FeedExpander --- bridges/AcrimedBridge.php | 2 +- bridges/FeedExpanderExampleBridge.php | 2 +- bridges/FreenewsBridge.php | 2 +- bridges/Les400CulsBridge.php | 2 +- bridges/TheOatMealBridge.php | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bridges/AcrimedBridge.php b/bridges/AcrimedBridge.php index 58d520ae..bee4456a 100644 --- a/bridges/AcrimedBridge.php +++ b/bridges/AcrimedBridge.php @@ -1,5 +1,5 @@ Date: Sun, 4 Sep 2016 13:32:58 +0200 Subject: [PATCH 14/17] [Les400Culs] Use internal RSS 2.0 parser --- bridges/Les400CulsBridge.php | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/bridges/Les400CulsBridge.php b/bridges/Les400CulsBridge.php index df59e5be..972909a3 100644 --- a/bridges/Les400CulsBridge.php +++ b/bridges/Les400CulsBridge.php @@ -1,35 +1,19 @@ collectExpandableDatas(self::URI.'feeds/'); + $this->collectExpandableDatas(self::URI . 'feeds/'); } - protected function parseItem($newsItem) { - $item = array(); - $item['title'] = trim((string) $newsItem->title); - $this->debugMessage("browsing item ".var_export($newsItem, true)); - if(empty($newsItem->guid)) { - $item['uri'] = (string) $newsItem->link; - } else { - $item['uri'] = (string) $newsItem->guid; - } - // now load that uri from cache - $this->debugMessage("now loading page ".$item['uri']); -// $articlePage = $this->get_cached($item['uri']); - -// $content = $articlePage->find('.post-container', 0); - $item['content'] = (string) $newsItem->description; - $item['author'] = (string) $newsItem->author; - $item['timestamp'] = $this->RSS_2_0_time_to_timestamp($newsItem); - return $item; + protected function parseItem($newsItem){ + return $this->parseRSS_2_0_Item($newsItem); } + public function getCacheDuration(){ return 7200; // 2h hours } From 778bbd8d851bc7cc54913baec476f170978524d1 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 4 Sep 2016 13:38:21 +0200 Subject: [PATCH 15/17] [Freenews] Use internal RSS 2.0 parser --- bridges/FreenewsBridge.php | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/bridges/FreenewsBridge.php b/bridges/FreenewsBridge.php index 676a7168..dbc46b9e 100644 --- a/bridges/FreenewsBridge.php +++ b/bridges/FreenewsBridge.php @@ -1,34 +1,22 @@ title); - $this->debugMessage("item has for title \"".$item['title']."\""); - if(empty($newsItem->guid)) { - $item['uri'] = (string) $newsItem->link; - } else { - $item['uri'] = (string) $newsItem->guid; - } - // now load that uri from cache - $this->debugMessage("now loading page ".$item['uri']); + $item = $this->parseRSS_2_0_Item($newsItem); + $articlePage = $this->get_cached($item['uri']); - $content = $articlePage->find('.post-container', 0); $item['content'] = $content->innertext; - $item['author'] = $articlePage->find('a[rel=author]', 0)->innertext; - // format should parse 2014-03-25T16:21:20Z. But, according to http://stackoverflow.com/a/10478469, it is not that simple - $item['timestamp'] = $this->RSS_2_0_time_to_timestamp($newsItem); + return $item; } } From acde8a2cea035d11a6197943c7d726ea4e9ae703 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 4 Sep 2016 13:46:57 +0200 Subject: [PATCH 16/17] [Bridge] Support 'dc:date' for RSS 2.0 parser --- lib/Bridge.php | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index 342c3956..72734df1 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -718,18 +718,21 @@ abstract class FeedExpander extends HttpCachingBridgeAbstract { } protected function parseRSS_2_0_Item($feedItem){ - // Primary data is compatible to 0.91 + // Primary data is compatible to 0.91 with some additional data $item = $this->parseRSS_0_9_1_Item($feedItem); - if(isset($feedItem->pubDate)) $item['timestamp'] = strtotime($feedItem->pubDate); + + $namespaces = $feedItem->getNamespaces(true); + if(isset($namespaces['dc'])) $dc = $feedItem->children($namespaces['dc']); + + if(isset($feedItem->pubDate)){ + $item['timestamp'] = strtotime($feedItem->pubDate); + } elseif(isset($dc->date)){ + $item['timestamp'] = strtotime($dc->date); + } if(isset($feedItem->author)){ $item['author'] = $feedItem->author; - } else { - // Feed might use 'dc' namespace - $namespaces = $feedItem->getNamespaces(true); - if(isset($namespaces['dc'])){ - $dc = $feedItem->children($namespaces['dc']); - if(isset($dc->creator)) $item['author'] = $dc->creator; - } + } elseif(isset($dc->creator)){ + $item['author'] = $dc->creator; } return $item; } From 15f24b3cf4b418633c2028e4318399b67b413130 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 4 Sep 2016 13:47:13 +0200 Subject: [PATCH 17/17] [Acrimed] Use internal RSS 2.0 parser --- bridges/AcrimedBridge.php | 48 ++++++++++++++------------------------- 1 file changed, 17 insertions(+), 31 deletions(-) diff --git a/bridges/AcrimedBridge.php b/bridges/AcrimedBridge.php index bee4456a..54c6f873 100644 --- a/bridges/AcrimedBridge.php +++ b/bridges/AcrimedBridge.php @@ -1,40 +1,26 @@ collectExpandableDatas("http://www.acrimed.org/spip.php?page=backend"); + } - public function collectData(){ + protected function parseItem($newsItem){ + $item = $this->parseRSS_2_0_Item($newsItem); - $this->collectExpandableDatas("http://www.acrimed.org/spip.php?page=backend"); + $hs = new HTMLSanitizer(); + $articlePage = $this->getSimpleHTMLDOM($newsItem->link); + $article = $hs->sanitize($articlePage->find('article.article1', 0)->innertext); + $article = HTMLSanitizer::defaultImageSrcTo($article, "http://www.acrimed.org/"); + $item['content'] = $article; - } - - protected function parseItem($newsItem) { - - $hs = new HTMLSanitizer(); - - $namespaces = $newsItem->getNameSpaces(true); - $dc = $newsItem->children($namespaces['dc']); - - $item = array(); - $item['uri'] = trim($newsItem->link); - $item['title'] = trim($newsItem->title); - $item['timestamp'] = strtotime($dc->date); - - $articlePage = $this->getSimpleHTMLDOM($newsItem->link); - $article = $hs->sanitize($articlePage->find('article.article1', 0)->innertext); - $article = HTMLSanitizer::defaultImageSrcTo($article, "http://www.acrimed.org/"); - - $item['content'] = $article; - - - return $item; - - } + return $item; + } public function getCacheDuration(){ return 4800; // 2 hours