From d16d5147363a30efa236007ffcaac520d3b24a08 Mon Sep 17 00:00:00 2001 From: ORelio Date: Sat, 23 Jan 2016 21:53:23 +0100 Subject: [PATCH] [Facebook] Capcha retrieval and response submit This does not solve the captcha issue but allows the viewer to manually solve the captcha by displaying a form and using the response from the viewer. Maybe a first step to automated captcha solving? This process relies on the use of a PHP session for storing captcha details so that the user cannot submit anything else than the response to the captcha. Response is filtered before being forwarded, also. Once captcha is solved we get a page ready to be parsed, as usual. Combined to some kind of OCR, this could automatically solve the captcha, but currently if only automates the process of retrieving the challenge and submitting the response. --- bridges/FacebookBridge.php | 88 ++++++++++++++++++++++++++++++++------ 1 file changed, 76 insertions(+), 12 deletions(-) diff --git a/bridges/FacebookBridge.php b/bridges/FacebookBridge.php index 49c8d26f..7c8a90bd 100644 --- a/bridges/FacebookBridge.php +++ b/bridges/FacebookBridge.php @@ -5,7 +5,7 @@ class FacebookBridge extends BridgeAbstract{ $this->maintainer = "teromene"; $this->name = "Facebook"; - $this->uri = "http://facebook.com/"; + $this->uri = "http://www.facebook.com/"; $this->description = "Input a page title or a profile log. For a profile log, please insert the parameter as follow : myExamplePage/132621766841117"; $this->update = "23/10/2015"; @@ -19,7 +19,7 @@ class FacebookBridge extends BridgeAbstract{ ]'; } - public function collectData(array $param){ + public function collectData(array $param) { //Extract a string using start and end delimiters function ExtractFromDelimiters($string, $start, $end) { @@ -35,7 +35,7 @@ class FacebookBridge extends BridgeAbstract{ if (is_array($matches) && count($matches) > 1) { $link = $matches[1]; if (strpos($link, '/') === 0) - $link = 'https://facebook.com'.$link.'"'; + $link = 'https://www.facebook.com'.$link.'"'; if (strpos($link, 'facebook.com/l.php?u=') !== false) $link = urldecode(ExtractFromDelimiters($link, 'facebook.com/l.php?u=', '&')); return ' href="'.$link.'"'; @@ -75,18 +75,77 @@ class FacebookBridge extends BridgeAbstract{ return $matches[0]; }; - $html = ''; + $html = null; - if(isset($param['u'])) { - if(!strpos($param['u'], "/")) { - $html = file_get_html('https://facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnError('No results for this query.', 404); - } else { - $html = file_get_html('https://facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404); + //Handle captcha response sent by the viewer + if (isset($_POST['captcha_response'])) + { + if (session_status() == PHP_SESSION_NONE) + session_start(); + if (isset($_SESSION['captcha_fields'], $_SESSION['captcha_action'])) + { + $captcha_action = $_SESSION['captcha_action']; + $captcha_fields = $_SESSION['captcha_fields']; + $captcha_fields['captcha_response'] = preg_replace("/[^a-zA-Z0-9]+/", "", $_POST['captcha_response']); + $http_options = array( + 'http' => array( + 'method' => 'POST', + 'user_agent'=> ini_get('user_agent'), + 'header'=>array("Content-type: application/x-www-form-urlencoded\r\nReferer: $captcha_action\r\nCookie: noscript=1\r\n"), + 'content' => http_build_query($captcha_fields), + ), + ); + $context = stream_context_create($http_options); + $html = file_get_contents($captcha_action, false, $context); + if ($html === FALSE) { $this->returnError('Failed to submit captcha response back to Facebook', 500); } + unset($_SESSION['captcha_fields']); + $html = str_get_html($html); } - } else { - $this->returnError('You must specify a Facebook username.', 400); + unset($_SESSION['captcha_fields']); + unset($_SESSION['captcha_action']); } + //Retrieve page contents + if (is_null($html)) { + if (isset($param['u'])) { + if (!strpos($param['u'], "/")) { + $html = file_get_html('https://www.facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnError('No results for this query.', 404); + } else { + $html = file_get_html('https://www.facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404); + } + } else { + $this->returnError('You must specify a Facebook username.', 400); + } + } + + //Handle captcha form? + $captcha = $html->find('div.captcha_interstitial', 0); + if (!is_null($captcha)) + { + //Save form for submitting after getting captcha response + if (session_status() == PHP_SESSION_NONE) + session_start(); + $captcha_fields = array(); + foreach ($captcha->find('input, button') as $input) + $captcha_fields[$input->name] = $input->value; + $_SESSION['captcha_fields'] = $captcha_fields; + $_SESSION['captcha_action'] = 'https://www.facebook.com'.$captcha->find('form', 0)->action; + + //Show captcha filling form to the viewer, proxying the captcha image + $img = base64_encode(file_get_contents($captcha->find('img', 0)->src)); + header('HTTP/1.1 500 '.Http::getMessageForCode(500)); + header('Content-Type: text/html'); + die('
' + .'

Facebook captcha challenge

' + .'

Unfortunately, rss-bridge cannot fetch the requested page.
' + .'Facebook wants rss-bridge to resolve the following captcha:

' + .'

' + .'

Response: ' + .'

' + .'
'); + } + + //No captcha? We can carry on retrieving page contents :) $element = $html->find('[id^=PagePostsSectionPagelet-]')[0]->children(0)->children(0); if(isset($element)) { @@ -144,7 +203,7 @@ class FacebookBridge extends BridgeAbstract{ $thumbnail = $profilePic; //Build and add final item - $item->uri = 'https://facebook.com'.str_replace('&', '&', $post->find('abbr')[0]->parent()->getAttribute('href')); + $item->uri = 'https://facebook.com'.$post->find('abbr')[0]->parent()->getAttribute('href'); $item->thumbnailUri = $thumbnail; $item->content = $content; $item->title = $title; @@ -154,7 +213,12 @@ class FacebookBridge extends BridgeAbstract{ } } } + } + public function setDatas(array $param){ + if (isset($param['captcha_response'])) + unset($param['captcha_response']); + parent::setDatas($param); } public function getName() {