Merge pull request #153 from ORelio/master

Improve FaceBook, NextInpact, Wordpress bridges
This commit is contained in:
Mitsu 2015-09-20 17:03:15 +02:00
commit 36ba93eea3
3 changed files with 102 additions and 80 deletions

View file

@ -1,16 +1,16 @@
<?php <?php
/** /**
*
* @name Facebook * @name Facebook
* @homepage http://facebook.com/ * @homepage http://facebook.com/
* @description Input a page title or a profile log. For a profile log, please insert the parameter as follow : myExamplePage/132621766841117 * @description Input a page title or a profile log. For a profile log, please insert the parameter as follow : myExamplePage/132621766841117
* @update 03/08/2015 * @update 05/09/2015
* @maintainer teromene * @maintainer teromene
* @use1(u="username") * @use1(u="username")
*/ */
class FacebookBridge extends BridgeAbstract{ class FacebookBridge extends BridgeAbstract{
private $name;
public function collectData(array $param){ public function collectData(array $param){
$html = ''; $html = '';
@ -19,60 +19,74 @@ class FacebookBridge extends BridgeAbstract{
if(!strpos($param['u'], "/")) { if(!strpos($param['u'], "/")) {
$html = file_get_html('https://facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnError('No results for this query.', 404); $html = file_get_html('https://facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
} else { } else {
$html = file_get_html('https://facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404); $html = file_get_html('https://facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
} }
} else { } else {
$this->returnError('You must specify a Facebook username.', 400); $this->returnError('You must specify a Facebook username.', 400);
} }
$element = $html->find('[id^=PagePostsSectionPagelet-]')[0]->children(0)->children(0);
$element = $html->find("[id^=PagePostsSectionPagelet-]")[0]->children(0)->children(0);
if(isset($element)) { if(isset($element)) {
$author = str_replace(' | Facebook', '', $html->find('title#pageTitle', 0)->innertext);
$profilePic = 'https://graph.facebook.com/'.$param['u'].'/picture?width=200&amp;height=200';
$this->name = $author;
foreach($element->children() as $post) { foreach($element->children() as $post) {
$item = new \Item(); $item = new \Item();
if($post->hasAttribute("data-time")) { if($post->hasAttribute("data-time")) {
//Clean the content of the page //Clean the content of the page and convert relative links into absolute links
$content = preg_replace('/(?i)><div class=\"clearfix([^>]+)>(.+?)div\ class=\"userContent\"/i', "", $post); $content = preg_replace('/(?i)><div class=\"clearfix([^>]+)>(.+?)div\ class=\"userContent\"/i', '', $post);
$content = preg_replace('/(?i)><div class=\"_59tj([^>]+)>(.+?)<\/div><\/div><a/i', "", $content); $content = preg_replace('/(?i)><div class=\"_59tj([^>]+)>(.+?)<\/div><\/div><a/i', '', $content);
$content = preg_replace('/(?i)><div class=\"_3dp([^>]+)>(.+?)div\ class=\"[^u]+userContent\"/i', "", $content); $content = preg_replace('/(?i)><div class=\"_3dp([^>]+)>(.+?)div\ class=\"[^u]+userContent\"/i', '', $content);
$content = preg_replace('/(?i)><div class=\"_4l5([^>]+)>(.+?)<\/div>/i', "", $content); $content = preg_replace('/(?i)><div class=\"_4l5([^>]+)>(.+?)<\/div>/i', '', $content);
$content = str_replace(' href="/', ' href="https://facebook.com/', $content);
$content = strip_tags($content,"<a><img>"); $content = preg_replace('/ onmouseover=\"[^"]+\"/i', '', $content);
$content = preg_replace('/ onclick=\"[^"]+\"/i', '', $content);
$content = preg_replace('/<\/a [^>]+>/i', '</a>', $content);
$content = strip_tags($content,'<a><img>');
//Retrieve date of the post
$date = $post->find("abbr")[0]; $date = $post->find("abbr")[0];
if(isset($date) && $date->hasAttribute("data-utime")) { if(isset($date) && $date->hasAttribute('data-utime')) {
$date = $date->getAttribute("data-utime"); $date = $date->getAttribute('data-utime');
} else { } else {
$date = 0; $date = 0;
} }
$item->uri = 'https://facebook.com'.str_replace("&amp;", "&", $post->find("abbr")[0]->parent()->getAttribute("href")); //Build title from username and content
$title = $author;
if (strlen($title) > 24)
$title = substr($title, 0, strpos(wordwrap($title, 24), "\n")).'...';
$title = $title.' | '.strip_tags($content);
if (strlen($title) > 64)
$title = substr($title, 0, strpos(wordwrap($title, 64), "\n")).'...';
//Use first image as thumbnail if available, or profile pic fallback
$thumbnail = $post->find('img', 1)->src;
if (strlen($thumbnail) == 0)
$thumbnail = $profilePic;
//Build and add final item
$item->uri = 'https://facebook.com'.str_replace('&amp;', '&', $post->find('abbr')[0]->parent()->getAttribute('href'));
$item->thumbnailUri = $thumbnail;
$item->content = $content; $item->content = $content;
$item->title = $param['u']." | ".strip_tags($content); $item->title = $title;
$item->author = $author;
$item->timestamp = $date; $item->timestamp = $date;
$this->items[] = $item; $this->items[] = $item;
} }
} }
} }
} }
public function getName() { public function getName() {
return 'Facebook Bridge'; return (isset($this->name) ? $this->name.' - ' : '').'Facebook Bridge';
} }
public function getURI() { public function getURI() {
@ -83,5 +97,3 @@ class FacebookBridge extends BridgeAbstract{
return 300; // 5 minutes return 300; // 5 minutes
} }
} }
?>

View file

@ -4,10 +4,11 @@
* Returns the newest articles * Returns the newest articles
* 2014-05-25 * 2014-05-25
* *
* @name Nextinpact Bridge * @name NextInpact Bridge
* @homepage http://www.nextinpact.com/ * @homepage http://www.nextinpact.com/
* @description Returns the newest articles. * @description Returns the newest articles.
* @maintainer qwertygc * @maintainer qwertygc
* @update 2015-09-05
*/ */
class NextInpactBridge extends BridgeAbstract { class NextInpactBridge extends BridgeAbstract {
@ -22,12 +23,15 @@ class NextInpactBridge extends BridgeAbstract {
function ExtractContent($url) { function ExtractContent($url) {
$html2 = file_get_html($url); $html2 = file_get_html($url);
$text = '<p><em>'.$html2->find('span.sub_title', 0)->innertext.'</em></p>' $text = '<p><em>'.$html2->find('span.sub_title', 0)->innertext.'</em></p>'
.'<p><img src="'.$html2->find('div.container_main_image_article', 0)->find('img.dedicated',0)->src.'" /></p>' .'<p><img src="'.$html2->find('div.container_main_image_article', 0)->find('img.dedicated',0)->src.'" alt="-" /></p>'
.'<div>'.$html2->find('div[itemprop=articleBody]', 0)->innertext.'</div>'; .'<div>'.$html2->find('div[itemprop=articleBody]', 0)->innertext.'</div>';
$premium_article = $html2->find('h2.title_reserve_article', 0)->innertext;
if (strlen($premium_article) > 0)
$text = $text.'<p><em>'.$premium_article.'</em></p>';
return $text; return $text;
} }
$html = file_get_html('http://www.nextinpact.com/rss/news.xml') or $this->returnError('Could not request Nextinpact.', 404); $html = file_get_html('http://www.nextinpact.com/rss/news.xml') or $this->returnError('Could not request NextInpact.', 404);
$limit = 0; $limit = 0;
foreach($html->find('item') as $element) { foreach($html->find('item') as $element) {

View file

@ -8,7 +8,7 @@
* @homepage https://wordpress.com/ * @homepage https://wordpress.com/
* @description Returns the 3 newest full posts of a Wordpress blog * @description Returns the 3 newest full posts of a Wordpress blog
* @maintainer aledeg * @maintainer aledeg
* @update 2014-05-26 * @update 2015-09-05
* @use1(url="blog URL (required)", name="blog name") * @use1(url="blog URL (required)", name="blog name")
*/ */
class WordPressBridge extends BridgeAbstract { class WordPressBridge extends BridgeAbstract {
@ -24,14 +24,15 @@ class WordPressBridge extends BridgeAbstract {
} }
$html = file_get_html($this->url) or $this->returnError("Could not request {$this->url}.", 404); $html = file_get_html($this->url) or $this->returnError("Could not request {$this->url}.", 404);
$posts = $html->find('.post'); $posts = $html->find('.post');
if(!empty($posts) ) { if(!empty($posts) ) {
$i=0; $i=0;
foreach ($html->find('.post') as $article) { foreach ($html->find('.post') as $article) {
if($i < 3) { if($i < 3) {
$uri = $article->find('a', 0)->href; $uri = $article->find('a', 0)->href;
$this->items[] = $this->getDetails($uri); $thumbnail = $article->find('img', 0)->src;
$this->items[] = $this->getDetails($uri, $thumbnail);
$i++; $i++;
} }
} }
@ -41,14 +42,19 @@ class WordPressBridge extends BridgeAbstract {
} }
} }
private function getDetails($uri) { private function getDetails($uri, $thumbnail) {
$html = file_get_html($uri) or exit; $html = file_get_html($uri) or exit;
$article = $html->find('.post', 0);
$title = $article->find('h1', 0)->innertext;
if (strlen($title) == 0)
$title = $article->find('h2', 0)->innertext;
$item = new \Item(); $item = new \Item();
$article = $html->find('.post', 0);
$item->uri = $uri; $item->uri = $uri;
$item->title = $article->find('h1', 0)->innertext; $item->title = htmlspecialchars_decode($title);
$item->author = $article->find('a[rel=author]', 0)->innertext;
$item->thumbnailUri = $thumbnail;
$item->content = $this->clearContent($article->find('.entry-content,.entry', 0)->innertext); $item->content = $this->clearContent($article->find('.entry-content,.entry', 0)->innertext);
$item->timestamp = $this->getDate($uri); $item->timestamp = $this->getDate($uri);