[FB2Bridge] Add images support
[FB2Bridge] Add basic "cards" support
This commit is contained in:
parent
30bc5179c2
commit
afd5ef0f1d
1 changed files with 36 additions and 10 deletions
|
@ -85,14 +85,12 @@ EOD;
|
||||||
. $pageInfo['userId']
|
. $pageInfo['userId']
|
||||||
. '&start_cursor=1&num_to_fetch=105&surface_type=timeline';
|
. '&start_cursor=1&num_to_fetch=105&surface_type=timeline';
|
||||||
$fileContent = getContents($requestString);
|
$fileContent = getContents($requestString);
|
||||||
|
|
||||||
$html = $this->buildContent($fileContent);
|
$html = $this->buildContent($fileContent);
|
||||||
$author = $pageInfo['username'];
|
$author = $pageInfo['username'];
|
||||||
|
|
||||||
foreach($html->find('article') as $content) {
|
foreach($html->find('article') as $content) {
|
||||||
|
|
||||||
$item = array();
|
$item = array();
|
||||||
|
|
||||||
preg_match('/publish_time\\\":([0-9]+),/', $content->getAttribute('data-store', 0), $match);
|
preg_match('/publish_time\\\":([0-9]+),/', $content->getAttribute('data-store', 0), $match);
|
||||||
if(isset($match[1]))
|
if(isset($match[1]))
|
||||||
$timestamp = $match[1];
|
$timestamp = $match[1];
|
||||||
|
@ -102,6 +100,12 @@ EOD;
|
||||||
$item['uri'] = html_entity_decode('http://touch.facebook.com'
|
$item['uri'] = html_entity_decode('http://touch.facebook.com'
|
||||||
. $content->find("div[class='_52jc _5qc4 _24u0 _36xo']", 0)->find('a', 0)->getAttribute('href'), ENT_QUOTES);
|
. $content->find("div[class='_52jc _5qc4 _24u0 _36xo']", 0)->find('a', 0)->getAttribute('href'), ENT_QUOTES);
|
||||||
|
|
||||||
|
//Decode images
|
||||||
|
$imagecleaned = preg_replace_callback('/<i [^>]* style="[^"]*url\(\'(.*?)\'\).*?><\/i>/m', function ($matches) {
|
||||||
|
return "<img src='" . str_replace(['\\3a ', '\\3d ', '\\26 '], [':', '=', '&'], $matches[1]) . "' />";
|
||||||
|
}, $content);
|
||||||
|
$content = str_get_html($imagecleaned);
|
||||||
|
|
||||||
if($content->find('header', 0) !== null) {
|
if($content->find('header', 0) !== null) {
|
||||||
$content->find('header', 0)->innertext = '';
|
$content->find('header', 0)->innertext = '';
|
||||||
}
|
}
|
||||||
|
@ -110,17 +114,13 @@ EOD;
|
||||||
$content->find('footer', 0)->innertext = '';
|
$content->find('footer', 0)->innertext = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
if($content->find('._5rgu', 0) !== null) {
|
|
||||||
$content->find('._5rgu', 0)->innertext = '';
|
|
||||||
}
|
|
||||||
|
|
||||||
// Replace emoticon images by their textual representation (part of the span)
|
// Replace emoticon images by their textual representation (part of the span)
|
||||||
foreach($content->find('span[title*="emoticon"]') as $emoticon) {
|
foreach($content->find('span[title*="emoticon"]') as $emoticon) {
|
||||||
$emoticon->innertext = $emoticon->find('span[aria-hidden="true"]', 0)->innertext;
|
$emoticon->innertext = $emoticon->find('span[aria-hidden="true"]', 0)->innertext;
|
||||||
}
|
}
|
||||||
|
|
||||||
//Remove html nodes, keep only img, links, basic formatting
|
//Remove html nodes, keep only img, links, basic formatting
|
||||||
//$content = strip_tags($content, '<a><img><i><u><br><p><h3><h4>');
|
$content = strip_tags($content, '<a><img><i><u><br><p><h3><h4><section>');
|
||||||
|
|
||||||
//Adapt link hrefs: convert relative links into absolute links and bypass external link redirection
|
//Adapt link hrefs: convert relative links into absolute links and bypass external link redirection
|
||||||
$content = preg_replace_callback('/ href=\"([^"]+)\"/i', $unescape_fb_link, $content);
|
$content = preg_replace_callback('/ href=\"([^"]+)\"/i', $unescape_fb_link, $content);
|
||||||
|
@ -133,7 +133,6 @@ EOD;
|
||||||
'ajaxify',
|
'ajaxify',
|
||||||
'tabindex',
|
'tabindex',
|
||||||
'class',
|
'class',
|
||||||
'style',
|
|
||||||
'data-[^=]*',
|
'data-[^=]*',
|
||||||
'aria-[^=]*',
|
'aria-[^=]*',
|
||||||
'role',
|
'role',
|
||||||
|
@ -151,6 +150,30 @@ EOD;
|
||||||
'/… (<span>|)<a href="https:\/\/www\.facebook\.com\/story\.php\?story_fbid=.*?<\/a>/m',
|
'/… (<span>|)<a href="https:\/\/www\.facebook\.com\/story\.php\?story_fbid=.*?<\/a>/m',
|
||||||
'', $content, 1);
|
'', $content, 1);
|
||||||
|
|
||||||
|
//Remove tracking images
|
||||||
|
$content = preg_replace('/<img src=\'.*?safe_image\.php.*?\' \/>/m', '', $content);
|
||||||
|
|
||||||
|
//Remove the double section tags
|
||||||
|
$content = str_replace(['<section><section>', '</section></section>'], ['<section>', '</section>'], $content);
|
||||||
|
|
||||||
|
//Move the section tag link upper, if it is down
|
||||||
|
$content = str_get_html($content);
|
||||||
|
$sectionContent = $content->find('section', 0);
|
||||||
|
if($sectionContent != null) {
|
||||||
|
$sectionLink = $sectionContent->nextSibling();
|
||||||
|
if($sectionLink != null) {
|
||||||
|
$fullLink = '<a href="' . $sectionLink->getAttribute('href') . '">' . $sectionContent->innertext . '</a>';
|
||||||
|
$sectionContent->innertext = $fullLink;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//Move the href tag upper if it is inside the section
|
||||||
|
foreach($content->find('section > a') as $sectionToFix) {
|
||||||
|
$sectionLink = $sectionToFix->getAttribute('href');
|
||||||
|
$section = $sectionToFix->parent();
|
||||||
|
$section->outertext = '<a href="' . $sectionLink . '">' . $section . '</a>';
|
||||||
|
}
|
||||||
|
|
||||||
$item['content'] = html_entity_decode($content, ENT_QUOTES);
|
$item['content'] = html_entity_decode($content, ENT_QUOTES);
|
||||||
|
|
||||||
$title = $author;
|
$title = $author;
|
||||||
|
@ -164,9 +187,10 @@ EOD;
|
||||||
$item['author'] = html_entity_decode($author, ENT_QUOTES);
|
$item['author'] = html_entity_decode($author, ENT_QUOTES);
|
||||||
$item['timestamp'] = html_entity_decode($timestamp, ENT_QUOTES);
|
$item['timestamp'] = html_entity_decode($timestamp, ENT_QUOTES);
|
||||||
|
|
||||||
if($item['timestamp'] != 0)
|
//if($item['timestamp'] != 0)
|
||||||
array_push($this->items, $item);
|
array_push($this->items, $item);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -177,7 +201,9 @@ EOD;
|
||||||
$regex = '/\\"html\\":(\".+\/div>"),"replace/';
|
$regex = '/\\"html\\":(\".+\/div>"),"replace/';
|
||||||
preg_match($regex, $pageContent, $result);
|
preg_match($regex, $pageContent, $result);
|
||||||
|
|
||||||
return str_get_html(json_decode($result[1]));
|
$htmlContent = html_entity_decode(json_decode($result[1]), ENT_QUOTES, 'UTF-8');
|
||||||
|
|
||||||
|
return str_get_html($htmlContent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue