diff --git a/bridges/FB2Bridge.php b/bridges/FB2Bridge.php index cd4bb47f..be8168f6 100644 --- a/bridges/FB2Bridge.php +++ b/bridges/FB2Bridge.php @@ -65,14 +65,14 @@ class FB2Bridge extends BridgeAbstract { if($this->getInput('u') !== null) { $page = 'https://touch.facebook.com/' . $this->getInput('u'); $cookies = $this->getCookies($page); - $pageID = $this->getPageID($page, $cookies); + $pageInfo = $this->getPageInfos($page, $cookies); - if($pageID === null) { + if($pageInfo['userId'] === null) { echo <<buildContent($fileContent); - $author = $this->getInput('u'); + $author = $pageInfo['username']; foreach($html->find('article') as $content) { @@ -114,13 +111,17 @@ EOD; $content->find('footer', 0)->innertext = ''; } + if($content->find('._5rgu', 0) !== null) { + $content->find('._5rgu', 0)->innertext = ''; + } + // Replace emoticon images by their textual representation (part of the span) foreach($content->find('span[title*="emoticon"]') as $emoticon) { $emoticon->innertext = $emoticon->find('span[aria-hidden="true"]', 0)->innertext; } //Remove html nodes, keep only img, links, basic formatting - $content = strip_tags($content, '

'); + //$content = strip_tags($content, '

'); //Adapt link hrefs: convert relative links into absolute links and bypass external link redirection $content = preg_replace_callback('/ href=\"([^"]+)\"/i', $unescape_fb_link, $content); @@ -146,6 +147,11 @@ EOD; // "smile emoticon" back to ASCII emoticons eg ":)" $content = preg_replace_callback('/([^ <>]+) ([^<>]+)<\/u><\/i>/i', $unescape_fb_emote, $content); + //Remove the "...Plus" tag + $content = preg_replace( + '/… (|)","replaceifexists $regex = '/\\"html\\":(\".+\/div>"),"replace/'; preg_match($regex, $pageContent, $result); + return str_get_html(json_decode($result[1])); } @@ -237,8 +208,8 @@ EOD; return substr($cookies, 1); } - //Get the page ID from the Facebook page. - private function getPageID($page, $cookies){ + //Get the page ID and username from the Facebook page. + private function getPageInfos($page, $cookies){ $context = stream_context_create(array( 'http' => array( @@ -254,19 +225,28 @@ EOD; return -1; } + //Get the username + $usernameRegex = '/data-nt=\"FB:TEXT4\">(.*?)<\/div>/m'; + preg_match($usernameRegex, $pageContent, $usernameMatches); + if(count($usernameMatches) > 0) { + $username = $usernameMatches[1]; + } else { + $username = $this->getInput('u'); + } + //Get the page ID if we don't have a captcha $regex = '/page_id=([0-9]*)&/'; preg_match($regex, $pageContent, $matches); if(count($matches) > 0) { - return $matches[1]; + return array('userId' => $matches[1], 'username' => $username); } //Get the page ID if we do have a captcha $regex = '/"pageID":"([0-9]*)"/'; preg_match($regex, $pageContent, $matches); - return $matches[1]; + return array('userId' => $matches[1], 'username' => $username); }