From 9712d052b4a258fdc04fa8e6aaf898b6fdb3603f Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 3 Jul 2015 18:43:15 +0200 Subject: [PATCH 1/4] Implementing TheOatMeal bridge fast with RSSExpander --- bridges/TheOatMealBridge.php | 43 ++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 bridges/TheOatMealBridge.php diff --git a/bridges/TheOatMealBridge.php b/bridges/TheOatMealBridge.php new file mode 100644 index 00000000..3e23f762 --- /dev/null +++ b/bridges/TheOatMealBridge.php @@ -0,0 +1,43 @@ +title = trim($newsItem->title); +// $this->message("browsing item ".var_export($newsItem, true)); + if(empty($newsItem->guid)) { + $item->uri = $newsItem->link; + } else { + $item->uri = $newsItem->guid; + } + // now load that uri from cache + $this->message("now loading page ".$item->uri); + $articlePage = str_get_html($this->get_cached($item->uri)); + + $content = $articlePage->find('#comic', 0); + if($content==null) { + $content = $articlePage->find('#blog'); + } + $item->content = $newsItem->description; + $item->name = $newsItem->author; + $item->timestamp = $this->RSS_2_0_time_to_timestamp($newsItem); + return $item; + } + public function getCacheDuration(){ + return 7200; // 2h hours + } +} From 94ffb22fb057e0b07146c366503d0d396bcc8efe Mon Sep 17 00:00:00 2001 From: Nicolas Delsaux Date: Sun, 5 Jul 2015 15:24:06 +0200 Subject: [PATCH 2/4] un bridge The Oatmeal qui marche ... principalement pour les comics locaux (et pas pour explodingkittens) --- bridges/RssExpander.php | 4 ++-- bridges/TheOatMealBridge.php | 39 ++++++++++++++++++++++++++++-------- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/bridges/RssExpander.php b/bridges/RssExpander.php index 07268ea1..e8b2476d 100644 --- a/bridges/RssExpander.php +++ b/bridges/RssExpander.php @@ -16,7 +16,7 @@ abstract class RssExpander extends HttpCachingBridgeAbstract{ if (empty($param['url'])) { $this->returnError('There is no $param[\'url\'] for this RSS expander', 404); } - // $this->message("Loading from ".$param['url']); +// $this->message("Loading from ".$param['url']); // Notice WE DO NOT use cache here on purpose : we want a fresh view of the RSS stream each time $rssContent = simplexml_load_file($param['url']) or $this->returnError('Could not request '.$param['url'], 404); // $this->message("loaded RSS from ".$param['url']); @@ -25,7 +25,7 @@ abstract class RssExpander extends HttpCachingBridgeAbstract{ $this->collect_RSS_2_0_data($rssContent); } - private function collect_RSS_2_0_data($rssContent) { + protected function collect_RSS_2_0_data($rssContent) { $rssContent = $rssContent->channel[0]; // $this->message("RSS content is ===========\n".var_export($rssContent, true)."==========="); $this->load_RSS_2_0_feed_data($rssContent); diff --git a/bridges/TheOatMealBridge.php b/bridges/TheOatMealBridge.php index 3e23f762..527af5f6 100644 --- a/bridges/TheOatMealBridge.php +++ b/bridges/TheOatMealBridge.php @@ -8,17 +8,33 @@ require_once 'bridges/RssExpander.php'; define("THE_OATMEAL", "http://theoatmeal.com/"); define("RSS", "http://feeds.feedburner.com/oatmealfeed"); -class TheOatmeal extends RssExpander{ +class TheOatmealBridge extends RssExpander{ public function collectData(array $param){ $param['url'] = RSS; parent::collectData($param); } - + + + /** + * Since the oatmeal produces a weird RSS feed, I have to fix it by loading the items separatly from the feed infos + */ + protected function collect_RSS_2_0_data($rssContent) { + $rssContent->registerXPathNamespace("dc", "http://purl.org/dc/elements/1.1/"); + $rssHeaderContent = $rssContent->channel[0]; +// $this->message("RSS content is ===========\n".var_export($rssHeaderContent, true)."==========="); + $this->load_RSS_2_0_feed_data($rssHeaderContent); + foreach($rssContent->item as $item) { + $this->message("parsing item ".var_export($item, true)); + $this->items[] = $this->parseRSSItem($item); + } + } + + protected function parseRSSItem($newsItem) { $item = new Item(); - $item->title = trim($newsItem->title); -// $this->message("browsing item ".var_export($newsItem, true)); + $item->title = trim($newsItem->title); + $this->message("browsing Oatmeal item ".var_export($newsItem, true)); if(empty($newsItem->guid)) { $item->uri = $newsItem->link; } else { @@ -32,12 +48,19 @@ class TheOatmeal extends RssExpander{ if($content==null) { $content = $articlePage->find('#blog'); } - $item->content = $newsItem->description; - $item->name = $newsItem->author; - $item->timestamp = $this->RSS_2_0_time_to_timestamp($newsItem); + $item->content = $content->innertext; + + $namespaces = $newsItem->getNameSpaces(true); + + $dc = $newsItem->children($namespaces['dc']); + $this->message("dc content is ".var_export($dc, true)); + $item->name = $dc->creator; + $item->timestamp = DateTime::createFromFormat(DateTime::ISO8601, $dc->date)->getTimestamp(); + $this->message("writtem by ".$item->name." on ".$item->timestamp); return $item; } + public function getCacheDuration(){ - return 7200; // 2h hours + return 1; // 2h hours } } From 7b99ad36507bdea61009027bd5b0b75f4b8c0ab2 Mon Sep 17 00:00:00 2001 From: Nicolas Delsaux Date: Sun, 5 Jul 2015 15:27:39 +0200 Subject: [PATCH 3/4] I stupidly forgot to change cache duration ... --- bridges/TheOatMealBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/TheOatMealBridge.php b/bridges/TheOatMealBridge.php index 527af5f6..da0aab66 100644 --- a/bridges/TheOatMealBridge.php +++ b/bridges/TheOatMealBridge.php @@ -61,6 +61,6 @@ class TheOatmealBridge extends RssExpander{ } public function getCacheDuration(){ - return 1; // 2h hours + return 7200; // 2h hours } } From 6003daff9dac9dd45c9617420c186b224f7a3102 Mon Sep 17 00:00:00 2001 From: Nicolas Delsaux Date: Sun, 5 Jul 2015 15:51:17 +0200 Subject: [PATCH 4/4] fixed a small bug regarding url processing --- bridges/TheOatMealBridge.php | 12 ++++-------- index.php | 5 +++-- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/bridges/TheOatMealBridge.php b/bridges/TheOatMealBridge.php index da0aab66..413de755 100644 --- a/bridges/TheOatMealBridge.php +++ b/bridges/TheOatMealBridge.php @@ -32,14 +32,13 @@ class TheOatmealBridge extends RssExpander{ protected function parseRSSItem($newsItem) { + $namespaces = $newsItem->getNameSpaces(true); + $dc = $newsItem->children($namespaces['dc']); + $rdf = $newsItem->children($namespaces['rdf']); $item = new Item(); $item->title = trim($newsItem->title); $this->message("browsing Oatmeal item ".var_export($newsItem, true)); - if(empty($newsItem->guid)) { - $item->uri = $newsItem->link; - } else { - $item->uri = $newsItem->guid; - } + $item->uri=$newsItem->attributes($namespaces['rdf'])->about; // now load that uri from cache $this->message("now loading page ".$item->uri); $articlePage = str_get_html($this->get_cached($item->uri)); @@ -50,9 +49,6 @@ class TheOatmealBridge extends RssExpander{ } $item->content = $content->innertext; - $namespaces = $newsItem->getNameSpaces(true); - - $dc = $newsItem->children($namespaces['dc']); $this->message("dc content is ".var_export($dc, true)); $item->name = $dc->creator; $item->timestamp = DateTime::createFromFormat(DateTime::ISO8601, $dc->date)->getTimestamp(); diff --git a/index.php b/index.php index 34c05096..c6b4bbb8 100644 --- a/index.php +++ b/index.php @@ -13,7 +13,8 @@ TODO : date_default_timezone_set('UTC'); error_reporting(0); -//ini_set('display_errors','1'); error_reporting(E_ALL); // For debugging only. +//ini_set('display_errors','1'); +//error_reporting(E_ALL); // For debugging only. // extensions check if (!extension_loaded('openssl')) @@ -220,7 +221,7 @@ $formats = Format::searchInformation();

RSS-Bridge

-

·Reconnecting the Web·

+

·Reconnecting the Web·