Unfortunately, rss-bridge cannot fetch the requested page.
+Facebook wants rss-bridge to resolve the following captcha:
Response: +
+ +EOD; + + die($message); + } + + /** + * Checks if a capture response was received and tries to load the contents + * @return mixed null if no capture response was received, simplhtmldom document otherwise + */ + private function handleCaptchaResponse() { if (isset($_POST['captcha_response'])) { if (session_status() == PHP_SESSION_NONE) session_start(); + if (isset($_SESSION['captcha_fields'], $_SESSION['captcha_action'])) { $captcha_action = $_SESSION['captcha_action']; $captcha_fields = $_SESSION['captcha_fields']; $captcha_fields['captcha_response'] = preg_replace('/[^a-zA-Z0-9]+/', '', $_POST['captcha_response']); - $header = array("Content-type: -application/x-www-form-urlencoded\r\nReferer: $captcha_action\r\nCookie: noscript=1\r\n"); + $header = array( + 'Content-type: application/x-www-form-urlencoded', + 'Referer: ' . $captcha_action, + 'Cookie: noscript=1' + ); + $opts = array( CURLOPT_POST => 1, CURLOPT_POSTFIELDS => http_build_query($captcha_fields) ); - $html = getContents($captcha_action, $header, $opts); + $html = getSimpleHTMLDOM($captcha_action, $header, $opts) + or returnServerError('Failed to submit captcha response back to Facebook'); - if($html === false) { - returnServerError('Failed to submit captcha response back to Facebook'); - } - unset($_SESSION['captcha_fields']); - $html = str_get_html($html); + return $html; } + unset($_SESSION['captcha_fields']); unset($_SESSION['captcha_action']); } - //Retrieve page contents + return null; + } + + private function collectUserData(){ + + $html = $this->handleCaptchaResponse(); + + // Retrieve page contents if(is_null($html)) { - $header = array('Accept-Language: ' . getEnv('HTTP_ACCEPT_LANGUAGE') . "\r\n"); - // Check if the user provided a fully qualified URL - if (filter_var($this->getInput('u'), FILTER_VALIDATE_URL)) { + $header = array('Accept-Language: ' . getEnv('HTTP_ACCEPT_LANGUAGE')); - $urlparts = parse_url($this->getInput('u')); + $html = getSimpleHTMLDOM($this->getURI(), $header) + or returnServerError('No results for this query.'); - if($urlparts['host'] !== parse_url(self::URI)['host']) { - returnClientError('The host you provided is invalid! Received "' - . $urlparts['host'] - . '", expected "' - . parse_url(self::URI)['host'] - . '"!'); - } - - if(!array_key_exists('path', $urlparts) - || $urlparts['path'] === '/') { - returnClientError('The URL you provided doesn\'t contain the user name!'); - } - - $user = explode('/', $urlparts['path'])[1]; - - $html = getSimpleHTMLDOM(self::URI . urlencode($user) . '?_fb_noscript=1', $header) - or returnServerError('No results for this query.'); - - } else { - - // First character cannot be a forward slash - if(strpos($this->getInput('u'), '/') === 0) { - returnClientError('Remove leading slash "/" from the username!'); - } - - if(!strpos($this->getInput('u'), '/')) { - $html = getSimpleHTMLDOM(self::URI . urlencode($this->getInput('u')) . '?_fb_noscript=1', $header) - or returnServerError('No results for this query.'); - } else { - $html = getSimpleHTMLDOM(self::URI . 'pages/' . $this->getInput('u') . '?_fb_noscript=1', $header) - or returnServerError('No results for this query.'); - } - - } } - //Handle captcha form? + // Handle captcha form? $captcha = $html->find('div.captcha_interstitial', 0); - if (!is_null($captcha)) { - //Save form for submitting after getting captcha response - if (session_status() == PHP_SESSION_NONE) - session_start(); - $captcha_fields = array(); - foreach ($captcha->find('input, button') as $input) - $captcha_fields[$input->name] = $input->value; - $_SESSION['captcha_fields'] = $captcha_fields; - $_SESSION['captcha_action'] = $captcha->find('form', 0)->action; - //Show captcha filling form to the viewer, proxying the captcha image - $img = base64_encode(getContents($captcha->find('img', 0)->src)); - http_response_code(500); - header('Content-Type: text/html'); - $message = <<Unfortunately, rss-bridge cannot fetch the requested page.
- Facebook wants rss-bridge to resolve the following captcha:
Response: -
- -EOD; - die($message); + if (!is_null($captcha)) { + $this->returnCaptchaMessage($captcha); } - //No captcha? We can carry on retrieving page contents :) - //First, we check wether the page is public or not + // No captcha? We can carry on retrieving page contents :) + // First, we check wether the page is public or not $loginForm = $html->find('._585r', 0); + if($loginForm != null) { returnServerError('You must be logged in to view this page. This is not supported by RSS-Bridge.'); } + $html = defaultLinkTo($html, self::URI); + $element = $html ->find('#pagelet_timeline_main_column')[0] ->children(0) @@ -419,12 +503,9 @@ EOD; if(isset($element)) { - defaultLinkTo($element, self::URI); - $author = str_replace(' | Facebook', '', $html->find('title#pageTitle', 0)->innertext); - $profilePic = 'https://graph.facebook.com/' - . $this->getInput('u') - . '/picture?width=200&height=200#.image'; + + $profilePic = $html->find('meta[property="og:image"]', 0)->content; $this->authorName = $author; @@ -480,19 +561,18 @@ EOD; '', $content); - //Remove "SpSonsSoriSsés" + // Remove "SpSonsSoriSsés" $content = preg_replace( '/(?iU)]+ href="#" role="link" [^>}]+>.+<\/a>/iU', '', $content); - //Remove html nodes, keep only img, links, basic formatting + // Remove html nodes, keep only img, links, basic formatting $content = strip_tags($content, ''); - //Adapt link hrefs: convert relative links into absolute links and bypass external link redirection - $content = preg_replace_callback('/ href=\"([^"]+)\"/i', $unescape_fb_link, $content); + $content = $this->unescape_fb_link($content); - //Clean useless html tag properties and fix link closing tags + // Clean useless html tag properties and fix link closing tags foreach (array( 'onmouseover', 'onclick', @@ -505,31 +585,31 @@ EOD; 'aria-[^=]*', 'role', 'rel', - 'id') as $property_name) - $content = preg_replace('/ ' . $property_name . '=\"[^"]*\"/i', '', $content); + 'id') as $property_name) { + $content = preg_replace('/ ' . $property_name . '=\"[^"]*\"/i', '', $content); + } + $content = preg_replace('/<\/a [^>]+>/i', '
', $content); - //Convert textual representation of emoticons eg - //"smile emoticon" back to ASCII emoticons eg ":)" - $content = preg_replace_callback( - '/([^ <>]+) ([^<>]+)<\/u><\/i>/i', - $unescape_fb_emote, - $content - ); + $this->unescape_fb_emote($content); - //Retrieve date of the post + // Retrieve date of the post $date = $post->find('abbr')[0]; + if(isset($date) && $date->hasAttribute('data-utime')) { $date = $date->getAttribute('data-utime'); } else { $date = 0; } - //Build title from username and content + // Build title from username and content $title = $author; + if(strlen($title) > 24) $title = substr($title, 0, strpos(wordwrap($title, 24), "\n")) . '...'; + $title = $title . ' | ' . strip_tags($content); + if(strlen($title) > 64) $title = substr($title, 0, strpos(wordwrap($title, 64), "\n")) . '...'; @@ -545,8 +625,10 @@ EOD; $item['title'] = $title; $item['author'] = $author; $item['timestamp'] = $date; - if(strpos($item['content'], '