diff --git a/bridges/FacebookBridge.php b/bridges/FacebookBridge.php index 351dc39c..109be9bb 100644 --- a/bridges/FacebookBridge.php +++ b/bridges/FacebookBridge.php @@ -1,87 +1,99 @@ returnError('No results for this query.', 404); + private $name; + + public function collectData(array $param){ + + $html = ''; + + if(isset($param['u'])) { + if(!strpos($param['u'], "/")) { + $html = file_get_html('https://facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnError('No results for this query.', 404); + } else { + $html = file_get_html('https://facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404); + } } else { - - $html = file_get_html('https://facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404); - + $this->returnError('You must specify a Facebook username.', 400); } - } else { - $this->returnError('You must specify a Facebook username.', 400); + $element = $html->find('[id^=PagePostsSectionPagelet-]')[0]->children(0)->children(0); - } + if(isset($element)) { + $author = str_replace(' | Facebook', '', $html->find('title#pageTitle', 0)->innertext); + $profilePic = 'https://graph.facebook.com/'.$param['u'].'/picture?width=200&height=200'; + $this->name = $author; - - $element = $html->find("[id^=PagePostsSectionPagelet-]")[0]->children(0)->children(0); - - if(isset($element)) { - - foreach($element->children() as $post) { - - $item = new \Item(); - - if($post->hasAttribute("data-time")) { - - //Clean the content of the page - $content = preg_replace('/(?i)>
]+)>(.+?)div\ class=\"userContent\"/i', "", $post); - $content = preg_replace('/(?i)>
]+)>(.+?)<\/div><\/div>
]+)>(.+?)div\ class=\"[^u]+userContent\"/i', "", $content); - $content = preg_replace('/(?i)>
]+)>(.+?)<\/div>/i', "", $content); - - $content = strip_tags($content,""); + foreach($element->children() as $post) { + $item = new \Item(); - $date = $post->find("abbr")[0]; - if(isset($date) && $date->hasAttribute("data-utime")) { - $date = $date->getAttribute("data-utime"); - } else { - $date = 0; + if($post->hasAttribute("data-time")) { + + //Clean the content of the page and convert relative links into absolute links + $content = preg_replace('/(?i)>
]+)>(.+?)div\ class=\"userContent\"/i', '', $post); + $content = preg_replace('/(?i)>
]+)>(.+?)<\/div><\/div>
]+)>(.+?)div\ class=\"[^u]+userContent\"/i', '', $content); + $content = preg_replace('/(?i)>
]+)>(.+?)<\/div>/i', '', $content); + $content = str_replace(' href="/', ' href="https://facebook.com/', $content); + $content = preg_replace('/ onmouseover=\"[^"]+\"/i', '', $content); + $content = preg_replace('/ onclick=\"[^"]+\"/i', '', $content); + $content = preg_replace('/<\/a [^>]+>/i', '', $content); + $content = strip_tags($content,''); + + //Retrieve date of the post + $date = $post->find("abbr")[0]; + if(isset($date) && $date->hasAttribute('data-utime')) { + $date = $date->getAttribute('data-utime'); + } else { + $date = 0; + } + + //Build title from username and content + $title = $author; + if (strlen($title) > 24) + $title = substr($title, 0, strpos(wordwrap($title, 24), "\n")).'...'; + $title = $title.' | '.strip_tags($content); + if (strlen($title) > 64) + $title = substr($title, 0, strpos(wordwrap($title, 64), "\n")).'...'; + + //Use first image as thumbnail if available, or profile pic fallback + $thumbnail = $post->find('img', 1)->src; + if (strlen($thumbnail) == 0) + $thumbnail = $profilePic; + + //Build and add final item + $item->uri = 'https://facebook.com'.str_replace('&', '&', $post->find('abbr')[0]->parent()->getAttribute('href')); + $item->thumbnailUri = $thumbnail; + $item->content = $content; + $item->title = $title; + $item->author = $author; + $item->timestamp = $date; + $this->items[] = $item; } - - $item->uri = 'https://facebook.com'.str_replace("&", "&", $post->find("abbr")[0]->parent()->getAttribute("href")); - - $item->content = $content; - $item->title = $param['u']." | ".strip_tags($content); - $item->timestamp = $date; - - $this->items[] = $item; } } + } + public function getName() { + return (isset($this->name) ? $this->name.' - ' : '').'Facebook Bridge'; + } - } + public function getURI() { + return 'http://facebook.com'; + } - public function getName(){ - return 'Facebook Bridge'; - } - - public function getURI(){ - return 'http://facebook.com'; - } - - public function getCacheDuration(){ - return 300; // 5 minutes - } + public function getCacheDuration() { + return 300; // 5 minutes + } } - -?> diff --git a/bridges/NextInpactBridge.php b/bridges/NextInpactBridge.php index 16126fda..c91a4837 100644 --- a/bridges/NextInpactBridge.php +++ b/bridges/NextInpactBridge.php @@ -4,10 +4,11 @@ * Returns the newest articles * 2014-05-25 * -* @name Nextinpact Bridge +* @name NextInpact Bridge * @homepage http://www.nextinpact.com/ * @description Returns the newest articles. * @maintainer qwertygc +* @update 2015-09-05 */ class NextInpactBridge extends BridgeAbstract { @@ -22,12 +23,15 @@ class NextInpactBridge extends BridgeAbstract { function ExtractContent($url) { $html2 = file_get_html($url); $text = '

'.$html2->find('span.sub_title', 0)->innertext.'

' - .'

' + .'

-

' .'
'.$html2->find('div[itemprop=articleBody]', 0)->innertext.'
'; + $premium_article = $html2->find('h2.title_reserve_article', 0)->innertext; + if (strlen($premium_article) > 0) + $text = $text.'

'.$premium_article.'

'; return $text; } - $html = file_get_html('http://www.nextinpact.com/rss/news.xml') or $this->returnError('Could not request Nextinpact.', 404); + $html = file_get_html('http://www.nextinpact.com/rss/news.xml') or $this->returnError('Could not request NextInpact.', 404); $limit = 0; foreach($html->find('item') as $element) { @@ -46,16 +50,16 @@ class NextInpactBridge extends BridgeAbstract { } - public function getName(){ - return 'Nextinpact Bridge'; - } + public function getName() { + return 'Nextinpact Bridge'; + } - public function getURI(){ - return 'http://www.nextinpact.com/'; - } + public function getURI() { + return 'http://www.nextinpact.com/'; + } - public function getCacheDuration(){ - return 3600; // 1 hour + public function getCacheDuration() { + return 3600; // 1 hour // return 0; - } + } } diff --git a/bridges/WordPressBridge.php b/bridges/WordPressBridge.php index 308835db..a2eee25b 100644 --- a/bridges/WordPressBridge.php +++ b/bridges/WordPressBridge.php @@ -8,7 +8,7 @@ * @homepage https://wordpress.com/ * @description Returns the 3 newest full posts of a Wordpress blog * @maintainer aledeg - * @update 2014-05-26 + * @update 2015-09-05 * @use1(url="blog URL (required)", name="blog name") */ class WordPressBridge extends BridgeAbstract { @@ -24,14 +24,15 @@ class WordPressBridge extends BridgeAbstract { } $html = file_get_html($this->url) or $this->returnError("Could not request {$this->url}.", 404); + $posts = $html->find('.post'); - $posts = $html->find('.post'); if(!empty($posts) ) { $i=0; foreach ($html->find('.post') as $article) { if($i < 3) { $uri = $article->find('a', 0)->href; - $this->items[] = $this->getDetails($uri); + $thumbnail = $article->find('img', 0)->src; + $this->items[] = $this->getDetails($uri, $thumbnail); $i++; } } @@ -41,14 +42,19 @@ class WordPressBridge extends BridgeAbstract { } } - private function getDetails($uri) { + private function getDetails($uri, $thumbnail) { $html = file_get_html($uri) or exit; + $article = $html->find('.post', 0); + + $title = $article->find('h1', 0)->innertext; + if (strlen($title) == 0) + $title = $article->find('h2', 0)->innertext; $item = new \Item(); - - $article = $html->find('.post', 0); $item->uri = $uri; - $item->title = $article->find('h1', 0)->innertext; + $item->title = htmlspecialchars_decode($title); + $item->author = $article->find('a[rel=author]', 0)->innertext; + $item->thumbnailUri = $thumbnail; $item->content = $this->clearContent($article->find('.entry-content,.entry', 0)->innertext); $item->timestamp = $this->getDate($uri);