2015-08-31 22:59:08 +02:00
< ? php
class FacebookBridge extends BridgeAbstract {
2015-11-05 16:50:18 +01:00
public function loadMetadatas () {
2015-11-03 15:36:19 +01:00
2015-11-05 16:50:18 +01:00
$this -> maintainer = " teromene " ;
$this -> name = " Facebook " ;
2016-01-23 21:53:23 +01:00
$this -> uri = " http://www.facebook.com/ " ;
2015-11-05 16:50:18 +01:00
$this -> description = " Input a page title or a profile log. For a profile log, please insert the parameter as follow : myExamplePage/132621766841117 " ;
2016-03-31 11:45:35 +02:00
$this -> update = " 31/03/2016 " ;
2015-11-05 16:50:18 +01:00
$this -> parameters [] =
' [
{
" name " : " Username " ,
2015-11-11 22:45:24 +01:00
" identifier " : " u " ,
" required " : " required "
2015-11-05 16:50:18 +01:00
}
] ' ;
}
2015-08-31 22:59:08 +02:00
2016-01-23 21:53:23 +01:00
public function collectData ( array $param ) {
2015-08-31 22:59:08 +02:00
2015-10-23 12:08:18 +02:00
//Extract a string using start and end delimiters
function ExtractFromDelimiters ( $string , $start , $end ) {
if ( strpos ( $string , $start ) !== false ) {
$section_retrieved = substr ( $string , strpos ( $string , $start ) + strlen ( $start ));
$section_retrieved = substr ( $section_retrieved , 0 , strpos ( $section_retrieved , $end ));
return $section_retrieved ;
} return false ;
}
//Utility function for cleaning a Facebook link
$unescape_fb_link = function ( $matches ) {
if ( is_array ( $matches ) && count ( $matches ) > 1 ) {
$link = $ matches [ 1 ];
if ( strpos ( $link , '/' ) === 0 )
2016-01-23 21:53:23 +01:00
$link = 'https://www.facebook.com' . $link . '"' ;
2015-10-23 12:08:18 +02:00
if ( strpos ( $link , 'facebook.com/l.php?u=' ) !== false )
$link = urldecode ( ExtractFromDelimiters ( $link , 'facebook.com/l.php?u=' , '&' ));
return ' href="' . $link . '"' ;
}
};
2015-10-24 20:11:21 +02:00
//Utility function for converting facebook emoticons
$unescape_fb_emote = function ( $matches ) {
static $facebook_emoticons = array (
'smile' => ':)' ,
'frown' => ':(' ,
'tongue' => ':P' ,
'grin' => ':D' ,
'gasp' => ':O' ,
'wink' => ';)' ,
'pacman' => ':<' ,
'grumpy' => '>_<' ,
'unsure' => ':/' ,
'cry' => ':\'(' ,
'kiki' => '^_^' ,
'glasses' => '8-)' ,
'sunglasses' => 'B-)' ,
'heart' => '<3' ,
'devil' => ']:D' ,
'angel' => '0:)' ,
'squint' => '-_-' ,
'confused' => 'o_O' ,
'upset' => 'xD' ,
'colonthree' => ':3' ,
'like' => '👍' );
$len = count ( $matches );
if ( $len > 1 )
for ( $i = 1 ; $i < $len ; $i ++ )
foreach ( $facebook_emoticons as $name => $emote )
if ( $matches [ $i ] === $name )
return $emote ;
return $matches [ 0 ];
};
2016-01-23 21:53:23 +01:00
$html = null ;
2015-08-31 22:59:08 +02:00
2016-01-23 21:53:23 +01:00
//Handle captcha response sent by the viewer
if ( isset ( $_POST [ 'captcha_response' ]))
{
if ( session_status () == PHP_SESSION_NONE )
session_start ();
if ( isset ( $_SESSION [ 'captcha_fields' ], $_SESSION [ 'captcha_action' ]))
{
$captcha_action = $_SESSION [ 'captcha_action' ];
$captcha_fields = $_SESSION [ 'captcha_fields' ];
$captcha_fields [ 'captcha_response' ] = preg_replace ( " /[^a-zA-Z0-9]+/ " , " " , $_POST [ 'captcha_response' ]);
$http_options = array (
'http' => array (
'method' => 'POST' ,
'user_agent' => ini_get ( 'user_agent' ),
'header' => array ( " Content-type: application/x-www-form-urlencoded \r \n Referer: $captcha_action\r\nCookie : noscript=1 \r \n " ),
'content' => http_build_query ( $captcha_fields ),
),
);
$context = stream_context_create ( $http_options );
$html = file_get_contents ( $captcha_action , false , $context );
if ( $html === FALSE ) { $this -> returnError ( 'Failed to submit captcha response back to Facebook' , 500 ); }
unset ( $_SESSION [ 'captcha_fields' ]);
$html = str_get_html ( $html );
}
unset ( $_SESSION [ 'captcha_fields' ]);
unset ( $_SESSION [ 'captcha_action' ]);
}
//Retrieve page contents
if ( is_null ( $html )) {
if ( isset ( $param [ 'u' ])) {
if ( ! strpos ( $param [ 'u' ], " / " )) {
2016-06-25 23:17:42 +02:00
$html = $this -> file_get_html ( 'https://www.facebook.com/' . urlencode ( $param [ 'u' ]) . '?_fb_noscript=1' ) or $this -> returnError ( 'No results for this query.' , 404 );
2016-01-23 21:53:23 +01:00
} else {
2016-06-25 23:17:42 +02:00
$html = $this -> file_get_html ( 'https://www.facebook.com/pages/' . $param [ 'u' ] . '?_fb_noscript=1' ) or $this -> returnError ( 'No results for this query.' , 404 );
2016-01-23 21:53:23 +01:00
}
2015-09-05 14:31:57 +02:00
} else {
2016-01-23 21:53:23 +01:00
$this -> returnError ( 'You must specify a Facebook username.' , 400 );
2015-09-05 14:31:57 +02:00
}
}
2015-09-02 13:49:36 +02:00
2016-01-23 21:53:23 +01:00
//Handle captcha form?
$captcha = $html -> find ( 'div.captcha_interstitial' , 0 );
if ( ! is_null ( $captcha ))
{
//Save form for submitting after getting captcha response
if ( session_status () == PHP_SESSION_NONE )
session_start ();
$captcha_fields = array ();
foreach ( $captcha -> find ( 'input, button' ) as $input )
$captcha_fields [ $input -> name ] = $input -> value ;
$_SESSION [ 'captcha_fields' ] = $captcha_fields ;
$_SESSION [ 'captcha_action' ] = 'https://www.facebook.com' . $captcha -> find ( 'form' , 0 ) -> action ;
//Show captcha filling form to the viewer, proxying the captcha image
$img = base64_encode ( file_get_contents ( $captcha -> find ( 'img' , 0 ) -> src ));
header ( 'HTTP/1.1 500 ' . Http :: getMessageForCode ( 500 ));
header ( 'Content-Type: text/html' );
die ( '<form method="post" action="?' . $_SERVER [ 'QUERY_STRING' ] . '">'
. '<h2>Facebook captcha challenge</h2>'
. '<p>Unfortunately, rss-bridge cannot fetch the requested page.<br />'
. 'Facebook wants rss-bridge to resolve the following captcha:</p>'
. '<p><img src="data:image/png;base64,' . $img . '" /></p>'
. '<p><b>Response:</b> <input name="captcha_response" placeholder="please fill in" />'
. '<input type="submit" value="Submit!" /></p>'
. '</form>' );
}
//No captcha? We can carry on retrieving page contents :)
2016-06-12 22:10:59 +02:00
$element = $html -> find ( '#pagelet_timeline_main_column' )[ 0 ] -> children ( 0 ) -> children ( 0 ) -> children ( 0 ) -> next_sibling () -> children ( 0 );
2015-08-31 22:59:08 +02:00
2015-09-05 14:31:57 +02:00
if ( isset ( $element )) {
2015-08-31 22:59:08 +02:00
2015-09-05 14:31:57 +02:00
$author = str_replace ( ' | Facebook' , '' , $html -> find ( 'title#pageTitle' , 0 ) -> innertext );
$profilePic = 'https://graph.facebook.com/' . $param [ 'u' ] . '/picture?width=200&height=200' ;
$this -> name = $author ;
2015-08-31 22:59:08 +02:00
2015-09-05 14:31:57 +02:00
foreach ( $element -> children () as $post ) {
2015-08-31 22:59:08 +02:00
2015-09-05 14:31:57 +02:00
$item = new \Item ();
2016-06-12 22:10:59 +02:00
if ( count ( $post -> find ( 'abbr' )) > 0 ) {
2015-09-05 14:31:57 +02:00
2015-10-23 12:08:18 +02:00
//Retrieve post contents
2015-09-05 14:31:57 +02:00
$content = preg_replace ( '/(?i)><div class=\"clearfix([^>]+)>(.+?)div\ class=\"userContent\"/i' , '' , $post );
$content = preg_replace ( '/(?i)><div class=\"_59tj([^>]+)>(.+?)<\/div><\/div><a/i' , '' , $content );
$content = preg_replace ( '/(?i)><div class=\"_3dp([^>]+)>(.+?)div\ class=\"[^u]+userContent\"/i' , '' , $content );
$content = preg_replace ( '/(?i)><div class=\"_4l5([^>]+)>(.+?)<\/div>/i' , '' , $content );
2015-10-23 12:08:18 +02:00
//Remove html nodes, keep only img, links, basic formatting
$content = strip_tags ( $content , '<a><img><i><u>' );
//Adapt link hrefs: convert relative links into absolute links and bypass external link redirection
$content = preg_replace_callback ( '/ href=\"([^"]+)\"/i' , $unescape_fb_link , $content );
//Clean useless html tag properties and fix link closing tags
foreach ( array ( 'onmouseover' , 'onclick' , 'target' , 'ajaxify' , 'tabindex' ,
'class' , 'style' , 'data-[^=]*' , 'aria-[^=]*' , 'role' , 'rel' , 'id' ) as $property_name )
$content = preg_replace ( '/ ' . $property_name . '=\"[^"]*\"/i' , '' , $content );
2015-09-05 14:31:57 +02:00
$content = preg_replace ( '/<\/a [^>]+>/i' , '</a>' , $content );
2015-10-24 20:11:21 +02:00
//Convert textual representation of emoticons eg "<i><u>smile emoticon</u></i>" back to ASCII emoticons eg ":)"
$content = preg_replace_callback ( '/<i><u>([^ <>]+) ([^<>]+)<\/u><\/i>/i' , $unescape_fb_emote , $content );
2015-09-05 14:31:57 +02:00
//Retrieve date of the post
$date = $post -> find ( " abbr " )[ 0 ];
if ( isset ( $date ) && $date -> hasAttribute ( 'data-utime' )) {
$date = $date -> getAttribute ( 'data-utime' );
} else {
$date = 0 ;
}
//Build title from username and content
$title = $author ;
if ( strlen ( $title ) > 24 )
$title = substr ( $title , 0 , strpos ( wordwrap ( $title , 24 ), " \n " )) . '...' ;
$title = $title . ' | ' . strip_tags ( $content );
if ( strlen ( $title ) > 64 )
$title = substr ( $title , 0 , strpos ( wordwrap ( $title , 64 ), " \n " )) . '...' ;
//Use first image as thumbnail if available, or profile pic fallback
2016-06-12 22:10:59 +02:00
$thumbnail = $post -> find ( 'img' , 1 );
if ( is_object ( $thumbnail ))
$thumbnail = $thumbnail -> src ;
else $thumbnail = $profilePic ;
2015-09-05 14:31:57 +02:00
//Build and add final item
2016-01-23 21:53:23 +01:00
$item -> uri = 'https://facebook.com' . $post -> find ( 'abbr' )[ 0 ] -> parent () -> getAttribute ( 'href' );
2015-09-05 14:31:57 +02:00
$item -> thumbnailUri = $thumbnail ;
$item -> content = $content ;
$item -> title = $title ;
$item -> author = $author ;
$item -> timestamp = $date ;
$this -> items [] = $item ;
2015-09-02 13:49:36 +02:00
}
}
2015-08-31 22:59:08 +02:00
}
2016-01-23 21:53:23 +01:00
}
2015-08-31 22:59:08 +02:00
2016-01-23 21:53:23 +01:00
public function setDatas ( array $param ){
if ( isset ( $param [ 'captcha_response' ]))
unset ( $param [ 'captcha_response' ]);
parent :: setDatas ( $param );
2015-09-05 14:31:57 +02:00
}
2015-08-31 22:59:08 +02:00
2015-09-05 14:31:57 +02:00
public function getName () {
return ( isset ( $this -> name ) ? $this -> name . ' - ' : '' ) . 'Facebook Bridge' ;
}
2015-08-31 22:59:08 +02:00
2015-09-05 14:31:57 +02:00
public function getURI () {
return 'http://facebook.com' ;
}
2015-08-31 22:59:08 +02:00
2015-09-05 14:31:57 +02:00
public function getCacheDuration () {
return 300 ; // 5 minutes
}
2015-08-31 22:59:08 +02:00
}