2017-02-08 11:21:59 +00:00
2017-02-11 16:16:56 +01:00
class FB2Bridge extends BridgeAbstract {
2017-02-08 11:21:59 +00:00
2017-02-11 16:16:56 +01:00
const MAINTAINER = 'teromene';
const NAME = 'Facebook Alternate';
const URI = 'https://www.facebook.com/';
2017-02-08 11:21:59 +00:00
const CACHE_TIMEOUT = 1000;
2017-02-11 16:16:56 +01:00
const DESCRIPTION = 'Input a page title or a profile log. For a profile log,
please insert the parameter as follow : myExamplePage/132621766841117';
2017-02-08 11:21:59 +00:00
2017-02-11 16:16:56 +01:00
const PARAMETERS = array( array(
'u' => array(
'name' => 'Username',
'required' => true
2017-02-08 11:21:59 +00:00
2017-02-11 16:16:56 +01:00
public function collectData(){
2017-02-08 11:21:59 +00:00
2017-02-11 16:16:56 +01:00
function extractFromDelimiters($string, $start, $end){
2017-07-29 19:28:00 +02:00
if(strpos($string, $start) !== false) {
2017-02-08 11:21:59 +00:00
$section_retrieved = substr($string, strpos($string, $start) + strlen($start));
$section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
return $section_retrieved;
2017-02-11 16:16:56 +01:00
return false;
2017-02-08 11:21:59 +00:00
//Utility function for cleaning a Facebook link
2017-02-11 16:16:56 +01:00
$unescape_fb_link = function($matches){
2017-07-29 19:28:00 +02:00
if(is_array($matches) && count($matches) > 1) {
2017-02-08 11:21:59 +00:00
$link = $matches[1];
2017-02-11 16:16:56 +01:00
if(strpos($link, '/') === 0)
$link = self::URI . $link . '"';
if(strpos($link, 'facebook.com/l.php?u=') !== false)
$link = urldecode(extractFromDelimiters($link, 'facebook.com/l.php?u=', '&'));
return ' href="' . $link . '"';
2017-02-08 11:21:59 +00:00
//Utility function for converting facebook emoticons
2017-02-11 16:16:56 +01:00
$unescape_fb_emote = function($matches){
2017-02-08 11:21:59 +00:00
static $facebook_emoticons = array(
'smile' => ':)',
'frown' => ':(',
'tongue' => ':P',
'grin' => ':D',
'gasp' => ':O',
'wink' => ';)',
'pacman' => ':<',
'grumpy' => '>_<',
'unsure' => ':/',
'cry' => ':\'(',
'kiki' => '^_^',
'glasses' => '8-)',
'sunglasses' => 'B-)',
'heart' => '<3',
'devil' => ']:D',
'angel' => '0:)',
'squint' => '-_-',
'confused' => 'o_O',
'upset' => 'xD',
'colonthree' => ':3',
'like' => '👍');
$len = count($matches);
if ($len > 1)
for ($i = 1; $i < $len; $i++)
foreach ($facebook_emoticons as $name => $emote)
if ($matches[$i] === $name)
return $emote;
return $matches[0];
2017-07-29 19:28:00 +02:00
if($this->getInput('u') !== null) {
2017-02-11 16:16:56 +01:00
$page = 'https://touch.facebook.com/' . $this->getInput('u');
2017-02-08 11:21:59 +00:00
$cookies = $this->getCookies($page);
$pageID = $this->getPageID($page, $cookies);
2017-07-29 19:28:00 +02:00
if($pageID === null) {
2017-02-11 16:16:56 +01:00
echo <<<EOD
Unable to get the page id. You should consider getting the ID by hand, then importing it into FB2Bridge
2017-06-15 11:51:11 +01:00
} elseif($pageID == -1) {
echo <<<EOD
This page is not accessible without being logged in.
2017-02-11 16:16:56 +01:00
2017-02-08 11:21:59 +00:00
//Build the string for the first request
2017-02-11 16:16:56 +01:00
$requestString = 'https://touch.facebook.com/pages_reaction_units/more/?page_id='
. $pageID
. '&cursor={"card_id"%3A"videos"%2C"has_next_page"%3Atrue}&surface=mobile_page_home&unit_count=8';
2017-02-08 11:21:59 +00:00
$fileContent = file_get_contents($requestString);
$articleIndex = 0;
$maxArticle = 3;
$html = $this->buildContent($fileContent);
$author = $this->getInput('u');
2017-06-15 11:51:11 +01:00
2018-06-29 23:55:33 +02:00
foreach($html->find('article') as $content) {
2017-02-08 11:21:59 +00:00
$item = array();
2017-06-15 11:51:11 +01:00
2018-06-29 23:55:33 +02:00
$item['uri'] = 'http://touch.facebook.com'
. $content->find("div[class='_52jc _5qc4 _24u0 _36xo']", 0)->find('a', 0)->getAttribute('href');
2017-02-08 11:21:59 +00:00
2018-06-29 23:55:33 +02:00
if($content->find('header', 0) !== null) {
$content->find('header', 0)->innertext = '';
2017-06-15 11:42:59 +01:00
2017-06-15 11:51:11 +01:00
2018-06-29 23:55:33 +02:00
if($content->find('footer', 0) !== null) {
$content->find('footer', 0)->innertext = '';
2017-06-15 11:42:59 +01:00
2017-02-08 11:21:59 +00:00
//Remove html nodes, keep only img, links, basic formatting
2017-02-11 16:16:56 +01:00
$content = strip_tags($content, '<a><img><i><u><br><p>');
2017-02-08 11:21:59 +00:00
//Adapt link hrefs: convert relative links into absolute links and bypass external link redirection
$content = preg_replace_callback('/ href=\"([^"]+)\"/i', $unescape_fb_link, $content);
//Clean useless html tag properties and fix link closing tags
2017-02-11 16:16:56 +01:00
foreach (array(
'id') as $property_name)
$content = preg_replace('/ ' . $property_name . '=\"[^"]*\"/i', '', $content);
2017-02-08 11:21:59 +00:00
$content = preg_replace('/<\/a [^>]+>/i', '</a>', $content);
2017-02-11 16:16:56 +01:00
//Convert textual representation of emoticons eg
// "<i><u>smile emoticon</u></i>" back to ASCII emoticons eg ":)"
2017-02-08 11:21:59 +00:00
$content = preg_replace_callback('/<i><u>([^ <>]+) ([^<>]+)<\/u><\/i>/i', $unescape_fb_emote, $content);
$item['content'] = $content;
$title = $author;
if (strlen($title) > 24)
2017-02-11 16:16:56 +01:00
$title = substr($title, 0, strpos(wordwrap($title, 24), "\n")) . '...';
$title = $title . ' | ' . strip_tags($content);
2017-02-08 11:21:59 +00:00
if (strlen($title) > 64)
2017-02-11 16:16:56 +01:00
$title = substr($title, 0, strpos(wordwrap($title, 64), "\n")) . '...';
2017-02-08 11:21:59 +00:00
$item['title'] = $title;
$item['author'] = $author;
array_push($this->items, $item);
// Currently not used. Is used to get more than only 3 elements, as they appear on another page.
2017-02-11 16:16:56 +01:00
private function computeNextLink($string, $pageID){
2017-02-08 11:21:59 +00:00
2017-02-12 13:58:42 +01:00
$regex = implode(
2018-06-29 23:55:33 +02:00
2017-02-12 13:58:42 +01:00
2017-02-08 11:21:59 +00:00
preg_match($regex, $string, $result);
2017-02-12 13:58:42 +01:00
return implode(
2018-06-29 23:55:33 +02:00
2017-02-12 13:58:42 +01:00
2018-06-29 23:55:33 +02:00
2017-02-12 13:58:42 +01:00
2018-06-29 23:55:33 +02:00
2017-02-12 13:58:42 +01:00
2018-06-29 23:55:33 +02:00
2017-02-12 13:58:42 +01:00
2018-06-29 23:55:33 +02:00
2017-02-12 13:58:42 +01:00
2018-06-29 23:55:33 +02:00
2017-02-12 13:58:42 +01:00
2017-02-08 11:21:59 +00:00
//Builds the HTML from the encoded JS that Facebook provides.
2017-02-11 16:16:56 +01:00
private function buildContent($pageContent){
2017-02-08 11:21:59 +00:00
2018-06-29 23:55:33 +02:00
$regex = '/\\"html\\":\\"(.*?)\\",\\"replace/';
2017-02-08 11:21:59 +00:00
preg_match($regex, $pageContent, $result);
return str_get_html(html_entity_decode(json_decode('"' . $result[1] . '"')));
2017-02-11 16:16:56 +01:00
//Builds the cookie from the page, as Facebook sometimes refuses to give
//the page if no cookie is provided.
private function getCookies($pageURL){
2017-02-08 11:21:59 +00:00
$ctx = stream_context_create(array(
'http' => array(
2018-06-29 23:55:33 +02:00
'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0',
2017-02-08 11:21:59 +00:00
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
2017-02-11 16:16:56 +01:00
2017-02-08 11:21:59 +00:00
$a = file_get_contents($pageURL, 0, $ctx);
//First request to get the cookie
2018-06-29 23:55:33 +02:00
$cookies = '';
2017-07-29 19:28:00 +02:00
foreach($http_response_header as $hdr) {
2018-06-29 23:55:33 +02:00
if(strpos($hdr, 'Set-Cookie') !== false) {
$cLine = explode(':', $hdr)[1];
$cLine = explode(';', $cLine)[0];
$cookies .= ';' . $cLine;
2017-02-08 11:21:59 +00:00
return substr($cookies, 1);
//Get the page ID from the Facebook page.
2017-02-11 16:16:56 +01:00
private function getPageID($page, $cookies){
2017-02-08 11:21:59 +00:00
$context = stream_context_create(array(
'http' => array(
2018-06-29 23:55:33 +02:00
'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0',
2017-02-11 16:16:56 +01:00
'header' => 'Cookie: ' . $cookies
2017-02-08 11:21:59 +00:00
2017-02-11 16:16:56 +01:00
2017-02-08 11:21:59 +00:00
$pageContent = file_get_contents($page, 0, $context);
2018-06-29 23:55:33 +02:00
if(strpos($pageContent, 'signup-button') != false) {
2017-06-15 11:51:11 +01:00
return -1;
2017-02-08 11:21:59 +00:00
//Get the page ID if we don't have a captcha
2018-06-29 23:55:33 +02:00
$regex = '/page_id=([0-9]*)&/';
2017-02-08 11:21:59 +00:00
preg_match($regex, $pageContent, $matches);
2017-07-29 19:28:00 +02:00
if(count($matches) > 0) {
2017-02-08 11:21:59 +00:00
return $matches[1];
//Get the page ID if we do have a captcha
2018-06-29 23:55:33 +02:00
$regex = '/"pageID":"([0-9]*)"/';
2017-02-08 11:21:59 +00:00
preg_match($regex, $pageContent, $matches);
return $matches[1];
2017-02-11 16:16:56 +01:00
public function getName(){
return (isset($this->name) ? $this->name . ' - ' : '') . 'Facebook Bridge';
2017-02-08 11:21:59 +00:00
2017-02-11 16:16:56 +01:00
public function getURI(){
2017-02-08 11:21:59 +00:00
return 'http://facebook.com';
2017-02-11 16:16:56 +01:00
public function getCacheDuration(){
2017-02-08 11:21:59 +00:00
return 60 * 60 * 3; // 5 minutes