Use a new URL for the cursor. Should fix #851.
Remove the "...More" item in the output Remove the information card data
This commit is contained in:
parent
16f0ee7104
commit
7596be65f2
1 changed files with 31 additions and 51 deletions
|
@ -65,14 +65,14 @@ class FB2Bridge extends BridgeAbstract {
|
||||||
if($this->getInput('u') !== null) {
|
if($this->getInput('u') !== null) {
|
||||||
$page = 'https://touch.facebook.com/' . $this->getInput('u');
|
$page = 'https://touch.facebook.com/' . $this->getInput('u');
|
||||||
$cookies = $this->getCookies($page);
|
$cookies = $this->getCookies($page);
|
||||||
$pageID = $this->getPageID($page, $cookies);
|
$pageInfo = $this->getPageInfos($page, $cookies);
|
||||||
|
|
||||||
if($pageID === null) {
|
if($pageInfo['userId'] === null) {
|
||||||
echo <<<EOD
|
echo <<<EOD
|
||||||
Unable to get the page id. You should consider getting the ID by hand, then importing it into FB2Bridge
|
Unable to get the page id. You should consider getting the ID by hand, then importing it into FB2Bridge
|
||||||
EOD;
|
EOD;
|
||||||
die();
|
die();
|
||||||
} elseif($pageID == -1) {
|
} elseif($pageInfo['userId'] == -1) {
|
||||||
echo <<<EOD
|
echo <<<EOD
|
||||||
This page is not accessible without being logged in.
|
This page is not accessible without being logged in.
|
||||||
EOD;
|
EOD;
|
||||||
|
@ -81,17 +81,14 @@ EOD;
|
||||||
}
|
}
|
||||||
|
|
||||||
//Build the string for the first request
|
//Build the string for the first request
|
||||||
$requestString = 'https://touch.facebook.com/pages_reaction_units/more/?page_id='
|
$requestString = 'https://touch.facebook.com/page_content_list_view/more/?page_id='
|
||||||
. $pageID
|
. $pageInfo['userId']
|
||||||
. '&cursor={"card_id"%3A"videos"%2C"has_next_page"%3Atrue}&surface=mobile_page_home&unit_count=8';
|
. '&start_cursor=1&num_to_fetch=10&surface_type=timeline';
|
||||||
|
|
||||||
$fileContent = getContents($requestString);
|
$fileContent = getContents($requestString);
|
||||||
|
|
||||||
$articleIndex = 0;
|
|
||||||
$maxArticle = 3;
|
|
||||||
|
|
||||||
$html = $this->buildContent($fileContent);
|
$html = $this->buildContent($fileContent);
|
||||||
$author = $this->getInput('u');
|
$author = $pageInfo['username'];
|
||||||
|
|
||||||
foreach($html->find('article') as $content) {
|
foreach($html->find('article') as $content) {
|
||||||
|
|
||||||
|
@ -114,13 +111,17 @@ EOD;
|
||||||
$content->find('footer', 0)->innertext = '';
|
$content->find('footer', 0)->innertext = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if($content->find('._5rgu', 0) !== null) {
|
||||||
|
$content->find('._5rgu', 0)->innertext = '';
|
||||||
|
}
|
||||||
|
|
||||||
// Replace emoticon images by their textual representation (part of the span)
|
// Replace emoticon images by their textual representation (part of the span)
|
||||||
foreach($content->find('span[title*="emoticon"]') as $emoticon) {
|
foreach($content->find('span[title*="emoticon"]') as $emoticon) {
|
||||||
$emoticon->innertext = $emoticon->find('span[aria-hidden="true"]', 0)->innertext;
|
$emoticon->innertext = $emoticon->find('span[aria-hidden="true"]', 0)->innertext;
|
||||||
}
|
}
|
||||||
|
|
||||||
//Remove html nodes, keep only img, links, basic formatting
|
//Remove html nodes, keep only img, links, basic formatting
|
||||||
$content = strip_tags($content, '<a><img><i><u><br><p><h3><h4>');
|
//$content = strip_tags($content, '<a><img><i><u><br><p><h3><h4>');
|
||||||
|
|
||||||
//Adapt link hrefs: convert relative links into absolute links and bypass external link redirection
|
//Adapt link hrefs: convert relative links into absolute links and bypass external link redirection
|
||||||
$content = preg_replace_callback('/ href=\"([^"]+)\"/i', $unescape_fb_link, $content);
|
$content = preg_replace_callback('/ href=\"([^"]+)\"/i', $unescape_fb_link, $content);
|
||||||
|
@ -146,6 +147,11 @@ EOD;
|
||||||
// "<i><u>smile emoticon</u></i>" back to ASCII emoticons eg ":)"
|
// "<i><u>smile emoticon</u></i>" back to ASCII emoticons eg ":)"
|
||||||
$content = preg_replace_callback('/<i><u>([^ <>]+) ([^<>]+)<\/u><\/i>/i', $unescape_fb_emote, $content);
|
$content = preg_replace_callback('/<i><u>([^ <>]+) ([^<>]+)<\/u><\/i>/i', $unescape_fb_emote, $content);
|
||||||
|
|
||||||
|
//Remove the "...Plus" tag
|
||||||
|
$content = preg_replace(
|
||||||
|
'/… (<span>|)<a href="https:\/\/www\.facebook\.com\/story\.php\?story_fbid=.*?<\/a>/m',
|
||||||
|
'', $content, 1);
|
||||||
|
|
||||||
$item['content'] = html_entity_decode($content, ENT_QUOTES);
|
$item['content'] = html_entity_decode($content, ENT_QUOTES);
|
||||||
|
|
||||||
$title = $author;
|
$title = $author;
|
||||||
|
@ -165,48 +171,13 @@ EOD;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Currently not used. Is used to get more than only 3 elements, as they appear on another page.
|
|
||||||
private function computeNextLink($string, $pageID){
|
|
||||||
|
|
||||||
$regex = implode(
|
|
||||||
'',
|
|
||||||
array(
|
|
||||||
'/timeline_unit',
|
|
||||||
"\\\\\\\\u00253A1",
|
|
||||||
"\\\\\\\\u00253A([0-9]*)",
|
|
||||||
"\\\\\\\\u00253A([0-9]*)",
|
|
||||||
"\\\\\\\\u00253A([0-9]*)",
|
|
||||||
"\\\\\\\\u00253A([0-9]*)/"
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
preg_match($regex, $string, $result);
|
|
||||||
|
|
||||||
return implode(
|
|
||||||
'',
|
|
||||||
array(
|
|
||||||
'https://touch.facebook.com/pages_reaction_units/more/?page_id=',
|
|
||||||
$pageID,
|
|
||||||
'&cursor=%7B%22timeline_cursor%22%3A%22timeline_unit%3A1%3A',
|
|
||||||
$result[1],
|
|
||||||
'%3A',
|
|
||||||
$result[2],
|
|
||||||
'%3A',
|
|
||||||
$result[3],
|
|
||||||
'%3A',
|
|
||||||
$result[4],
|
|
||||||
'%22%2C%22timeline_section_cursor%22%3A%7B%7D%2C%22',
|
|
||||||
'has_next_page%22%3Atrue%7D&surface=mobile_page_home&unit_count=3'
|
|
||||||
)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
//Builds the HTML from the encoded JS that Facebook provides.
|
//Builds the HTML from the encoded JS that Facebook provides.
|
||||||
private function buildContent($pageContent){
|
private function buildContent($pageContent){
|
||||||
// The html ends with:
|
// The html ends with:
|
||||||
// /div>","replaceifexists
|
// /div>","replaceifexists
|
||||||
$regex = '/\\"html\\":(\".+\/div>"),"replace/';
|
$regex = '/\\"html\\":(\".+\/div>"),"replace/';
|
||||||
preg_match($regex, $pageContent, $result);
|
preg_match($regex, $pageContent, $result);
|
||||||
|
|
||||||
return str_get_html(json_decode($result[1]));
|
return str_get_html(json_decode($result[1]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -237,8 +208,8 @@ EOD;
|
||||||
return substr($cookies, 1);
|
return substr($cookies, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
//Get the page ID from the Facebook page.
|
//Get the page ID and username from the Facebook page.
|
||||||
private function getPageID($page, $cookies){
|
private function getPageInfos($page, $cookies){
|
||||||
|
|
||||||
$context = stream_context_create(array(
|
$context = stream_context_create(array(
|
||||||
'http' => array(
|
'http' => array(
|
||||||
|
@ -254,19 +225,28 @@ EOD;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Get the username
|
||||||
|
$usernameRegex = '/data-nt=\"FB:TEXT4\">(.*?)<\/div>/m';
|
||||||
|
preg_match($usernameRegex, $pageContent, $usernameMatches);
|
||||||
|
if(count($usernameMatches) > 0) {
|
||||||
|
$username = $usernameMatches[1];
|
||||||
|
} else {
|
||||||
|
$username = $this->getInput('u');
|
||||||
|
}
|
||||||
|
|
||||||
//Get the page ID if we don't have a captcha
|
//Get the page ID if we don't have a captcha
|
||||||
$regex = '/page_id=([0-9]*)&/';
|
$regex = '/page_id=([0-9]*)&/';
|
||||||
preg_match($regex, $pageContent, $matches);
|
preg_match($regex, $pageContent, $matches);
|
||||||
|
|
||||||
if(count($matches) > 0) {
|
if(count($matches) > 0) {
|
||||||
return $matches[1];
|
return array('userId' => $matches[1], 'username' => $username);
|
||||||
}
|
}
|
||||||
|
|
||||||
//Get the page ID if we do have a captcha
|
//Get the page ID if we do have a captcha
|
||||||
$regex = '/"pageID":"([0-9]*)"/';
|
$regex = '/"pageID":"([0-9]*)"/';
|
||||||
preg_match($regex, $pageContent, $matches);
|
preg_match($regex, $pageContent, $matches);
|
||||||
|
|
||||||
return $matches[1];
|
return array('userId' => $matches[1], 'username' => $username);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue