From d7436c2d0a5c26fb350fde4f168894e79adc7562 Mon Sep 17 00:00:00 2001 From: ORelio Date: Sat, 5 Sep 2015 14:31:57 +0200 Subject: [PATCH] Improve Facebook Bridge Retrieve author display name from page title Build short readable title using name and content Convert relative links into absolute links Remove attributes from tags (may cause issues) Remove onclick and onmouseover (javascript code) Retrieve url of first image of post as thumnail Use author avatar as thumnail if no first image Use display name in feed name: name - Fb bridge Minor code indent fixes and use simple quotes --- bridges/FacebookBridge.php | 134 ++++++++++++++++++++----------------- 1 file changed, 73 insertions(+), 61 deletions(-) diff --git a/bridges/FacebookBridge.php b/bridges/FacebookBridge.php index 351dc39c..109be9bb 100644 --- a/bridges/FacebookBridge.php +++ b/bridges/FacebookBridge.php @@ -1,87 +1,99 @@ returnError('No results for this query.', 404); + private $name; + + public function collectData(array $param){ + + $html = ''; + + if(isset($param['u'])) { + if(!strpos($param['u'], "/")) { + $html = file_get_html('https://facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnError('No results for this query.', 404); + } else { + $html = file_get_html('https://facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404); + } } else { - - $html = file_get_html('https://facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404); - + $this->returnError('You must specify a Facebook username.', 400); } - } else { - $this->returnError('You must specify a Facebook username.', 400); + $element = $html->find('[id^=PagePostsSectionPagelet-]')[0]->children(0)->children(0); - } + if(isset($element)) { + $author = str_replace(' | Facebook', '', $html->find('title#pageTitle', 0)->innertext); + $profilePic = 'https://graph.facebook.com/'.$param['u'].'/picture?width=200&height=200'; + $this->name = $author; - - $element = $html->find("[id^=PagePostsSectionPagelet-]")[0]->children(0)->children(0); - - if(isset($element)) { - - foreach($element->children() as $post) { - - $item = new \Item(); - - if($post->hasAttribute("data-time")) { - - //Clean the content of the page - $content = preg_replace('/(?i)>
]+)>(.+?)div\ class=\"userContent\"/i', "", $post); - $content = preg_replace('/(?i)>
]+)>(.+?)<\/div><\/div>
]+)>(.+?)div\ class=\"[^u]+userContent\"/i', "", $content); - $content = preg_replace('/(?i)>
]+)>(.+?)<\/div>/i', "", $content); - - $content = strip_tags($content,""); + foreach($element->children() as $post) { + $item = new \Item(); - $date = $post->find("abbr")[0]; - if(isset($date) && $date->hasAttribute("data-utime")) { - $date = $date->getAttribute("data-utime"); - } else { - $date = 0; + if($post->hasAttribute("data-time")) { + + //Clean the content of the page and convert relative links into absolute links + $content = preg_replace('/(?i)>
]+)>(.+?)div\ class=\"userContent\"/i', '', $post); + $content = preg_replace('/(?i)>
]+)>(.+?)<\/div><\/div>
]+)>(.+?)div\ class=\"[^u]+userContent\"/i', '', $content); + $content = preg_replace('/(?i)>
]+)>(.+?)<\/div>/i', '', $content); + $content = str_replace(' href="/', ' href="https://facebook.com/', $content); + $content = preg_replace('/ onmouseover=\"[^"]+\"/i', '', $content); + $content = preg_replace('/ onclick=\"[^"]+\"/i', '', $content); + $content = preg_replace('/<\/a [^>]+>/i', '', $content); + $content = strip_tags($content,''); + + //Retrieve date of the post + $date = $post->find("abbr")[0]; + if(isset($date) && $date->hasAttribute('data-utime')) { + $date = $date->getAttribute('data-utime'); + } else { + $date = 0; + } + + //Build title from username and content + $title = $author; + if (strlen($title) > 24) + $title = substr($title, 0, strpos(wordwrap($title, 24), "\n")).'...'; + $title = $title.' | '.strip_tags($content); + if (strlen($title) > 64) + $title = substr($title, 0, strpos(wordwrap($title, 64), "\n")).'...'; + + //Use first image as thumbnail if available, or profile pic fallback + $thumbnail = $post->find('img', 1)->src; + if (strlen($thumbnail) == 0) + $thumbnail = $profilePic; + + //Build and add final item + $item->uri = 'https://facebook.com'.str_replace('&', '&', $post->find('abbr')[0]->parent()->getAttribute('href')); + $item->thumbnailUri = $thumbnail; + $item->content = $content; + $item->title = $title; + $item->author = $author; + $item->timestamp = $date; + $this->items[] = $item; } - - $item->uri = 'https://facebook.com'.str_replace("&", "&", $post->find("abbr")[0]->parent()->getAttribute("href")); - - $item->content = $content; - $item->title = $param['u']." | ".strip_tags($content); - $item->timestamp = $date; - - $this->items[] = $item; } } + } + public function getName() { + return (isset($this->name) ? $this->name.' - ' : '').'Facebook Bridge'; + } - } + public function getURI() { + return 'http://facebook.com'; + } - public function getName(){ - return 'Facebook Bridge'; - } - - public function getURI(){ - return 'http://facebook.com'; - } - - public function getCacheDuration(){ - return 300; // 5 minutes - } + public function getCacheDuration() { + return 300; // 5 minutes + } } - -?>