[FacebookBridge] Use touch.facebook.com for groups (#1817)

This commit is contained in:
Joshua Coales 2020-10-29 03:42:49 +00:00 committed by GitHub
parent 93cdf5e342
commit 6af87b2f32
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -175,7 +175,13 @@ class FacebookBridge extends BridgeAbstract {
$header = array(); $header = array();
} }
$html = getSimpleHTMLDOM($this->getURI(), $header) $touchURI = str_replace(
'https://www.facebook',
'https://touch.facebook',
$this->getURI()
);
$html = getSimpleHTMLDOM($touchURI, $header)
or returnServerError('Failed loading facebook page: ' . $this->getURI()); or returnServerError('Failed loading facebook page: ' . $this->getURI());
if(!$this->isPublicGroup($html)) { if(!$this->isPublicGroup($html)) {
@ -186,19 +192,19 @@ class FacebookBridge extends BridgeAbstract {
$this->groupName = $this->extractGroupName($html); $this->groupName = $this->extractGroupName($html);
$posts = $html->find('div.userContentWrapper') $posts = $html->find('div.story_body_container')
or returnServerError('Failed finding posts!'); or returnServerError('Failed finding posts!');
foreach($posts as $post) { foreach($posts as $post) {
$item = array(); $item = array();
$item['uri'] = $this->extractGroupURI($post); $item['uri'] = $this->extractGroupPostURI($post);
$item['title'] = $this->extractGroupTitle($post); $item['title'] = $this->extractGroupPostTitle($post);
$item['author'] = $this->extractGroupAuthor($post); $item['author'] = $this->extractGroupPostAuthor($post);
$item['content'] = $this->extractGroupContent($post); $item['content'] = $this->extractGroupPostContent($post);
$item['timestamp'] = $this->extractGroupTimestamp($post); $item['timestamp'] = $this->extractGroupPostTimestamp($post);
$item['enclosures'] = $this->extractGroupEnclosures($post); $item['enclosures'] = $this->extractGroupPostEnclosures($post);
$this->items[] = $item; $this->items[] = $item;
@ -232,6 +238,9 @@ class FacebookBridge extends BridgeAbstract {
if (strpos($provided_host, 'm.') === 0) { if (strpos($provided_host, 'm.') === 0) {
$provided_host = substr($provided_host, strlen('m.')); $provided_host = substr($provided_host, strlen('m.'));
} }
if (strpos($provided_host, 'touch.') === 0) {
$provided_host = substr($provided_host, strlen('touch.'));
}
$facebook_host = parse_url(self::URI)['host']; $facebook_host = parse_url(self::URI)['host'];
@ -245,24 +254,26 @@ class FacebookBridge extends BridgeAbstract {
} }
} }
/**
* @param $html simple_html_dom
* @return bool
*/
private function isPublicGroup($html) { private function isPublicGroup($html) {
// Facebook redirects to the groups about page for non-public groups // Facebook touch just presents a login page for non-public groups
$about = $html->find('#pagelet_group_about', 0); $title = $html->find('title', 0);
return $title->plaintext !== 'Log in to Facebook | Facebook';
return !($about);
} }
private function extractGroupName($html) { private function extractGroupName($html) {
$ogtitle = $html->find('meta[property="og:title"]', 0) $ogtitle = $html->find('._de1', 0)
or returnServerError('Unable to find group title!'); or returnServerError('Unable to find group title!');
return html_entity_decode($ogtitle->content, ENT_QUOTES); return html_entity_decode($ogtitle->plaintext, ENT_QUOTES);
} }
private function extractGroupURI($post) { private function extractGroupPostURI($post) {
$elements = $post->find('a') $elements = $post->find('a')
or returnServerError('Unable to find URI!'); or returnServerError('Unable to find URI!');
@ -280,57 +291,70 @@ class FacebookBridge extends BridgeAbstract {
} }
private function extractGroupContent($post) { private function extractGroupPostContent($post) {
$content = $post->find('div.userContent', 0) $content = $post->find('div._5rgt', 0)
or returnServerError('Unable to find user content!'); or returnServerError('Unable to find user content!');
return $content->innertext . $content->next_sibling()->innertext; $context_text = $content->innertext;
if ($content->next_sibling() !== null) {
$context_text .= $content->next_sibling()->innertext;
}
return $context_text;
} }
private function extractGroupTimestamp($post) { private function extractGroupPostTimestamp($post) {
$element = $post->find('abbr[data-utime]', 0) $element = $post->find('abbr', 0)
or returnServerError('Unable to find timestamp!'); or returnServerError('Unable to find timestamp!');
return $element->getAttribute('data-utime'); return $element->plaintext;
} }
private function extractGroupAuthor($post) { private function extractGroupPostAuthor($post) {
$element = $post->find('img', 0) $element = $post->find('h3 a', 0)
or returnServerError('Unable to find author information!'); or returnServerError('Unable to find author information!');
return $element->{'aria-label'}; return $element->plaintext;
} }
private function extractGroupEnclosures($post) { private function extractGroupPostEnclosures($post) {
$elements = $post->find('div.userContent', 0)->next_sibling()->find('img'); $elements = $post->find('span._6qdm');
if ($post->find('div._5rgt', 0)->next_sibling() !== null) {
array_push($elements, ...$post->find('div._5rgt', 0)->next_sibling()->find('i.img'));
}
$enclosures = array(); $enclosures = array();
$background_img_regex = '/background-image: ?url\\((.+?)\\);/';
foreach($elements as $enclosure) { foreach($elements as $enclosure) {
$enclosures[] = $enclosure->src; if(preg_match($background_img_regex, $enclosure, $matches) > 0) {
$bg_img_value = trim(html_entity_decode($matches[1], ENT_QUOTES), "'\"");
$bg_img_url = urldecode(preg_replace('/\\\([0-9a-z]{2}) /', '%$1', $bg_img_value));
$enclosures[] = urldecode($bg_img_url);
}
} }
return empty($enclosures) ? null : $enclosures; return empty($enclosures) ? null : $enclosures;
} }
private function extractGroupTitle($post) { private function extractGroupPostTitle($post) {
$element = $post->find('h5', 0) $element = $post->find('h3', 0)
or returnServerError('Unable to find title!'); or returnServerError('Unable to find title!');
if(strpos($element->plaintext, 'shared') === false) { if(strpos($element->plaintext, 'shared') === false) {
$content = strip_tags($this->extractGroupContent($post)); $content = strip_tags($this->extractGroupPostContent($post));
return $this->extractGroupAuthor($post) return $this->extractGroupPostAuthor($post)
. ' posted: ' . ' posted: '
. substr( . substr(
$content, $content,
@ -558,7 +582,7 @@ EOD;
} }
// No captcha? We can carry on retrieving page contents :) // No captcha? We can carry on retrieving page contents :)
// First, we check wether the page is public or not // First, we check whether the page is public or not
$loginForm = $html->find('._585r', 0); $loginForm = $html->find('._585r', 0);
if($loginForm != null) { if($loginForm != null) {