Use a new URL for the cursor. Should fix #851.

Remove the "...More" item in the output
Remove the information card data
This commit is contained in:
teromene 2018-10-18 18:07:07 +02:00
parent 16f0ee7104
commit 7596be65f2

View file

@ -65,14 +65,14 @@ class FB2Bridge extends BridgeAbstract {
if($this->getInput('u') !== null) { if($this->getInput('u') !== null) {
$page = 'https://touch.facebook.com/' . $this->getInput('u'); $page = 'https://touch.facebook.com/' . $this->getInput('u');
$cookies = $this->getCookies($page); $cookies = $this->getCookies($page);
$pageID = $this->getPageID($page, $cookies); $pageInfo = $this->getPageInfos($page, $cookies);
if($pageID === null) { if($pageInfo['userId'] === null) {
echo <<<EOD echo <<<EOD
Unable to get the page id. You should consider getting the ID by hand, then importing it into FB2Bridge Unable to get the page id. You should consider getting the ID by hand, then importing it into FB2Bridge
EOD; EOD;
die(); die();
} elseif($pageID == -1) { } elseif($pageInfo['userId'] == -1) {
echo <<<EOD echo <<<EOD
This page is not accessible without being logged in. This page is not accessible without being logged in.
EOD; EOD;
@ -81,17 +81,14 @@ EOD;
} }
//Build the string for the first request //Build the string for the first request
$requestString = 'https://touch.facebook.com/pages_reaction_units/more/?page_id=' $requestString = 'https://touch.facebook.com/page_content_list_view/more/?page_id='
. $pageID . $pageInfo['userId']
. '&cursor={"card_id"%3A"videos"%2C"has_next_page"%3Atrue}&surface=mobile_page_home&unit_count=8'; . '&start_cursor=1&num_to_fetch=10&surface_type=timeline';
$fileContent = getContents($requestString); $fileContent = getContents($requestString);
$articleIndex = 0;
$maxArticle = 3;
$html = $this->buildContent($fileContent); $html = $this->buildContent($fileContent);
$author = $this->getInput('u'); $author = $pageInfo['username'];
foreach($html->find('article') as $content) { foreach($html->find('article') as $content) {
@ -114,13 +111,17 @@ EOD;
$content->find('footer', 0)->innertext = ''; $content->find('footer', 0)->innertext = '';
} }
if($content->find('._5rgu', 0) !== null) {
$content->find('._5rgu', 0)->innertext = '';
}
// Replace emoticon images by their textual representation (part of the span) // Replace emoticon images by their textual representation (part of the span)
foreach($content->find('span[title*="emoticon"]') as $emoticon) { foreach($content->find('span[title*="emoticon"]') as $emoticon) {
$emoticon->innertext = $emoticon->find('span[aria-hidden="true"]', 0)->innertext; $emoticon->innertext = $emoticon->find('span[aria-hidden="true"]', 0)->innertext;
} }
//Remove html nodes, keep only img, links, basic formatting //Remove html nodes, keep only img, links, basic formatting
$content = strip_tags($content, '<a><img><i><u><br><p><h3><h4>'); //$content = strip_tags($content, '<a><img><i><u><br><p><h3><h4>');
//Adapt link hrefs: convert relative links into absolute links and bypass external link redirection //Adapt link hrefs: convert relative links into absolute links and bypass external link redirection
$content = preg_replace_callback('/ href=\"([^"]+)\"/i', $unescape_fb_link, $content); $content = preg_replace_callback('/ href=\"([^"]+)\"/i', $unescape_fb_link, $content);
@ -146,6 +147,11 @@ EOD;
// "<i><u>smile emoticon</u></i>" back to ASCII emoticons eg ":)" // "<i><u>smile emoticon</u></i>" back to ASCII emoticons eg ":)"
$content = preg_replace_callback('/<i><u>([^ <>]+) ([^<>]+)<\/u><\/i>/i', $unescape_fb_emote, $content); $content = preg_replace_callback('/<i><u>([^ <>]+) ([^<>]+)<\/u><\/i>/i', $unescape_fb_emote, $content);
//Remove the "...Plus" tag
$content = preg_replace(
'/… (<span>|)<a href="https:\/\/www\.facebook\.com\/story\.php\?story_fbid=.*?<\/a>/m',
'', $content, 1);
$item['content'] = html_entity_decode($content, ENT_QUOTES); $item['content'] = html_entity_decode($content, ENT_QUOTES);
$title = $author; $title = $author;
@ -165,48 +171,13 @@ EOD;
} }
// Currently not used. Is used to get more than only 3 elements, as they appear on another page.
private function computeNextLink($string, $pageID){
$regex = implode(
'',
array(
'/timeline_unit',
"\\\\\\\\u00253A1",
"\\\\\\\\u00253A([0-9]*)",
"\\\\\\\\u00253A([0-9]*)",
"\\\\\\\\u00253A([0-9]*)",
"\\\\\\\\u00253A([0-9]*)/"
)
);
preg_match($regex, $string, $result);
return implode(
'',
array(
'https://touch.facebook.com/pages_reaction_units/more/?page_id=',
$pageID,
'&cursor=%7B%22timeline_cursor%22%3A%22timeline_unit%3A1%3A',
$result[1],
'%3A',
$result[2],
'%3A',
$result[3],
'%3A',
$result[4],
'%22%2C%22timeline_section_cursor%22%3A%7B%7D%2C%22',
'has_next_page%22%3Atrue%7D&surface=mobile_page_home&unit_count=3'
)
);
}
//Builds the HTML from the encoded JS that Facebook provides. //Builds the HTML from the encoded JS that Facebook provides.
private function buildContent($pageContent){ private function buildContent($pageContent){
// The html ends with: // The html ends with:
// /div>","replaceifexists // /div>","replaceifexists
$regex = '/\\"html\\":(\".+\/div>"),"replace/'; $regex = '/\\"html\\":(\".+\/div>"),"replace/';
preg_match($regex, $pageContent, $result); preg_match($regex, $pageContent, $result);
return str_get_html(json_decode($result[1])); return str_get_html(json_decode($result[1]));
} }
@ -237,8 +208,8 @@ EOD;
return substr($cookies, 1); return substr($cookies, 1);
} }
//Get the page ID from the Facebook page. //Get the page ID and username from the Facebook page.
private function getPageID($page, $cookies){ private function getPageInfos($page, $cookies){
$context = stream_context_create(array( $context = stream_context_create(array(
'http' => array( 'http' => array(
@ -254,19 +225,28 @@ EOD;
return -1; return -1;
} }
//Get the username
$usernameRegex = '/data-nt=\"FB:TEXT4\">(.*?)<\/div>/m';
preg_match($usernameRegex, $pageContent, $usernameMatches);
if(count($usernameMatches) > 0) {
$username = $usernameMatches[1];
} else {
$username = $this->getInput('u');
}
//Get the page ID if we don't have a captcha //Get the page ID if we don't have a captcha
$regex = '/page_id=([0-9]*)&/'; $regex = '/page_id=([0-9]*)&/';
preg_match($regex, $pageContent, $matches); preg_match($regex, $pageContent, $matches);
if(count($matches) > 0) { if(count($matches) > 0) {
return $matches[1]; return array('userId' => $matches[1], 'username' => $username);
} }
//Get the page ID if we do have a captcha //Get the page ID if we do have a captcha
$regex = '/"pageID":"([0-9]*)"/'; $regex = '/"pageID":"([0-9]*)"/';
preg_match($regex, $pageContent, $matches); preg_match($regex, $pageContent, $matches);
return $matches[1]; return array('userId' => $matches[1], 'username' => $username);
} }