From 01d6859f772374ac699b81f198d00f99b94dcde9 Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Mon, 12 Oct 2015 17:13:27 +0200 Subject: [PATCH] [Numerama] update bridge post extraction (issue #158) --- bridges/NumeramaBridge.php | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/bridges/NumeramaBridge.php b/bridges/NumeramaBridge.php index a7aef4b5..b5439dc8 100644 --- a/bridges/NumeramaBridge.php +++ b/bridges/NumeramaBridge.php @@ -2,12 +2,12 @@ /** * RssBridgeNumerama * Returns the 5 newest posts from http://www.numerama.com (full text) -* 2014-05-25 * * @name Numerama * @homepage http://www.numerama.com/ * @description Returns the 5 newest posts from Numerama (full text) * @maintainer mitsukarenai +* @update 2015-10-12 */ class NumeramaBridge extends BridgeAbstract{ @@ -18,14 +18,17 @@ class NumeramaBridge extends BridgeAbstract{ $string = str_replace(']]>', '', $string); return $string; } - function NumeramaExtractContent($url) { - $html2 = file_get_html($url); - $text = $html2->find('h2.intro', 0)->innertext; - $text = $text.$html2->find('div.content', 0)->innertext; - $text = strip_tags($text, '

      '); - return $text; - } - $html = file_get_html('http://www.numerama.com/rss/news.rss') or $this->returnError('Could not request Numerama.', 404); + + function NumeramaExtractContent($url) + { + $html2 = file_get_html($url); + $text = $html2->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block + $text = ''; // add post picture + $text = $text.$html2->find('article[class=post-content]', 0)->innertext; // extract the post + return $text; + } + + $html = file_get_html('http://www.numerama.com/rss/news.rss') or $this->returnError('Could not request Numerama.', 404); $limit = 0; foreach($html->find('item') as $element) {