diff --git a/bridges/DeveloppezDotComBridge.php b/bridges/DeveloppezDotComBridge.php new file mode 100644 index 00000000..bc868ae8 --- /dev/null +++ b/bridges/DeveloppezDotComBridge.php @@ -0,0 +1,75 @@ +', '', $string); + return $string; + } + + function convert_smart_quotes($string)//F***ing quotes from Microsoft Word badly encoded, here was the trick: http://stackoverflow.com/questions/1262038/how-to-replace-microsoft-encoded-quotes-in-php + { + $search = array(chr(145), + chr(146), + chr(147), + chr(148), + chr(151)); + + $replace = array("'", + "'", + '"', + '"', + '-'); + + return str_replace($search, $replace, $string); + } + + function DeveloppezDotComExtractContent($url) { + $articleHTMLContent = file_get_html($url); + $text = convert_smart_quotes($articleHTMLContent->find('div.content', 0)->innertext); + $text = utf8_encode($text); + return trim($text); + } + + $rssFeed = file_get_html('http://www.developpez.com/index/rss') or $this->returnError('Could not request http://www.developpez.com/index/rss', 404); + $limit = 0; + + foreach($rssFeed->find('item') as $element) { + if($limit < 2) { + $item = new \Item(); + $item->title = DeveloppezDotComStripCDATA($element->find('title', 0)->innertext); + $item->uri = DeveloppezDotComStripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $content = DeveloppezDotComExtractContent($item->uri); + $item->content = strlen($content) ? $content : $element->description;//In case of it is a tutorial, we just keep the original description + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'DeveloppezDotCom'; + } + + public function getURI(){ + return 'http://www.developpez.com/'; + } + + public function getCacheDuration(){ + return 1800; // 30min + } +} diff --git a/bridges/GizmodoFRBridge.php b/bridges/GizmodoFRBridge.php new file mode 100644 index 00000000..b2e8a21e --- /dev/null +++ b/bridges/GizmodoFRBridge.php @@ -0,0 +1,57 @@ +find('div.entry-thumbnail', 0)->innertext; + $text = $text.$articleHTMLContent->find('div.entry-excerpt', 0)->innertext; + $text = $text.$articleHTMLContent->find('div.entry-content', 0)->innertext; + foreach($articleHTMLContent->find('pagespeed_iframe') as $element) { + $text = $text.'

link to a iframe (could be a video): '.$element->src.'


'; + } + + $text = strip_tags($text, '

'); + return $text; + } + + $rssFeed = file_get_html('http://www.gizmodo.fr/feed') or $this->returnError('Could not request http://www.gizmodo.fr/feed', 404); + $limit = 0; + + foreach($rssFeed->find('item') as $element) { + if($limit < 15) { + $item = new \Item(); + $item->title = $element->find('title', 0)->innertext; + $item->uri = $element->find('guid', 0)->plaintext; + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = GizmodoFRExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'GizmodoFR'; + } + + public function getURI(){ + return 'http://www.gizmodo.fr/'; + } + + public function getCacheDuration(){ + return 1800; // 30min + } +} diff --git a/bridges/LeJournalDuGeekBridge.php b/bridges/LeJournalDuGeekBridge.php new file mode 100644 index 00000000..6620b76e --- /dev/null +++ b/bridges/LeJournalDuGeekBridge.php @@ -0,0 +1,69 @@ +', '', $string); + return $string; + } + + function LeJournalDuGeekExtractContent($url) { + $articleHTMLContent = file_get_html($url); + $text = $text.$articleHTMLContent->find('div.post-content', 0)->innertext; + foreach($articleHTMLContent->find('a.more') as $element) { + if ($element->innertext == "Source") { + $text = $text.'

Source : '.$element->href.'

'; + break; + } + } + foreach($articleHTMLContent->find('iframe') as $element) { + if (preg_match("/youtube/i", $element->src)) { + $text = $text.'// An IFRAME to Youtube was included in the article: '.$element->src.'
'; + } + } + + $text = strip_tags($text, '



  • '); + return $text; + } + + $rssFeed = file_get_html('http://www.journaldugeek.com/rss') or $this->returnError('Could not request http://www.journaldugeek.com/rss', 404); + $limit = 0; + + foreach($rssFeed->find('item') as $element) { + if($limit < 15) { + $item = new \Item(); + $item->title = LeJournalDuGeekStripCDATA($element->find('title', 0)->innertext); + $item->uri = LeJournalDuGeekStripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = LeJournalDuGeekExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'LeJournalDuGeek'; + } + + public function getURI(){ + return 'http://www.journaldugeek.com/'; + } + + public function getCacheDuration(){ + return 1800; // 30min + } +} diff --git a/bridges/NextInpactBridge.php b/bridges/NextInpactBridge.php index ad1d785f..ac1ec0fd 100644 --- a/bridges/NextInpactBridge.php +++ b/bridges/NextInpactBridge.php @@ -10,7 +10,7 @@ * @maintainer qwertygc */ class NextInpactBridge extends BridgeAbstract{ - + public function collectData(array $param){ function StripCDATA($string) { @@ -20,7 +20,8 @@ class NextInpactBridge extends BridgeAbstract{ } function ExtractContent($url) { $html2 = file_get_html($url); - $text = $html2->find('div[itemprop=articleBody]', 0)->innertext; + $text = '

    '.$html2->find('div#actu_entete > h2', 0)->innertext.'



    '; + $text = $text.$html2->find('div[itemprop=articleBody]', 0)->innertext; return $text; } $html = file_get_html('http://www.nextinpact.com/rss/news.xml') or $this->returnError('Could not request Nextinpact.', 404); @@ -37,7 +38,7 @@ class NextInpactBridge extends BridgeAbstract{ $limit++; } } - + } public function getName(){ diff --git a/bridges/NumeramaBridge.php b/bridges/NumeramaBridge.php index 80070750..0c581971 100644 --- a/bridges/NumeramaBridge.php +++ b/bridges/NumeramaBridge.php @@ -1,6 +1,6 @@