[TwitterBridge] Fix the bridge using a brand new API
This commit is contained in:
parent
c4422bdbb5
commit
06891ae35f
3 changed files with 121 additions and 206 deletions
|
@ -2,6 +2,7 @@
|
||||||
class TwitterBridge extends BridgeAbstract {
|
class TwitterBridge extends BridgeAbstract {
|
||||||
const NAME = 'Twitter Bridge';
|
const NAME = 'Twitter Bridge';
|
||||||
const URI = 'https://twitter.com/';
|
const URI = 'https://twitter.com/';
|
||||||
|
const API_URI = 'https://api.twitter.com';
|
||||||
const CACHE_TIMEOUT = 300; // 5min
|
const CACHE_TIMEOUT = 300; // 5min
|
||||||
const DESCRIPTION = 'returns tweets';
|
const DESCRIPTION = 'returns tweets';
|
||||||
const MAINTAINER = 'pmaziere';
|
const MAINTAINER = 'pmaziere';
|
||||||
|
@ -168,6 +169,27 @@ EOD
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function getApiURI() {
|
||||||
|
switch($this->queriedContext) {
|
||||||
|
case 'By keyword or hashtag':
|
||||||
|
return self::API_URI
|
||||||
|
. '/2/search/adaptive.json?q='
|
||||||
|
. urlencode($this->getInput('q'))
|
||||||
|
. '&tweet_mode=extended';
|
||||||
|
case 'By username':
|
||||||
|
return self::API_URI
|
||||||
|
. '/2/timeline/profile/'
|
||||||
|
. $this->getRestId($this->getInput('u'))
|
||||||
|
. '.json?tweet_mode=extended';
|
||||||
|
case 'By list':
|
||||||
|
return self::API_URI
|
||||||
|
. '/2/timeline/list.json?list_id='
|
||||||
|
. $this->getListId($this->getInput('user'), $this->getInput('list'))
|
||||||
|
. '&tweet_mode=extended';
|
||||||
|
default: returnServerError('Invalid query context !');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public function collectData(){
|
public function collectData(){
|
||||||
$html = '';
|
$html = '';
|
||||||
$page = $this->getURI();
|
$page = $this->getURI();
|
||||||
|
@ -176,14 +198,9 @@ EOD
|
||||||
'User-Agent: Mozilla/5.0 (Windows NT 9.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
|
'User-Agent: Mozilla/5.0 (Windows NT 9.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
|
||||||
);
|
);
|
||||||
|
|
||||||
if(php_sapi_name() === 'cli' && empty(ini_get('curl.cainfo'))) {
|
$data = json_decode($this->getApiContents($this->getApiURI()));
|
||||||
$cookies = $this->getCookies($page);
|
|
||||||
$html = getSimpleHTMLDOM($page, array_merge($header, array("Cookie: $cookies")));
|
|
||||||
} else {
|
|
||||||
$html = getSimpleHTMLDOM($page, $header, array(CURLOPT_COOKIEFILE => ''));
|
|
||||||
}
|
|
||||||
|
|
||||||
if(!$html) {
|
if(!$data) {
|
||||||
switch($this->queriedContext) {
|
switch($this->queriedContext) {
|
||||||
case 'By keyword or hashtag':
|
case 'By keyword or hashtag':
|
||||||
returnServerError('No results for this query.');
|
returnServerError('No results for this query.');
|
||||||
|
@ -196,75 +213,33 @@ EOD
|
||||||
|
|
||||||
$hidePictures = $this->getInput('nopic');
|
$hidePictures = $this->getInput('nopic');
|
||||||
|
|
||||||
foreach($html->find('div.js-stream-tweet') as $tweet) {
|
foreach($data->globalObjects->tweets as $tweet) {
|
||||||
|
|
||||||
// Skip retweets?
|
// Skip retweets?
|
||||||
if($this->getInput('noretweet')
|
if($this->getInput('noretweet')
|
||||||
&& $tweet->find('div.context span.js-retweet-text a', 0)) {
|
&& isset($tweet->retweeted_status_id_str)) {
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// remove 'invisible' content
|
|
||||||
foreach($tweet->find('.invisible') as $invisible) {
|
|
||||||
$invisible->outertext = '';
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip protmoted tweets
|
|
||||||
$heading = $tweet->previousSibling();
|
|
||||||
if(!is_null($heading) &&
|
|
||||||
$heading->getAttribute('class') === 'promoted-tweet-heading'
|
|
||||||
) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
$item = array();
|
$item = array();
|
||||||
// extract username and sanitize
|
// extract username and sanitize
|
||||||
$item['username'] = htmlspecialchars_decode($tweet->getAttribute('data-screen-name'), ENT_QUOTES);
|
$user_info = $this->getUserInformation($tweet->user_id_str, $data->globalObjects);
|
||||||
// extract fullname (pseudonym)
|
|
||||||
$item['fullname'] = htmlspecialchars_decode($tweet->getAttribute('data-name'), ENT_QUOTES);
|
$item['username'] = $user_info->name;
|
||||||
// get author
|
$item['fullname'] = $user_info->screen_name;
|
||||||
$item['author'] = $item['fullname'] . ' (@' . $item['username'] . ')';
|
$item['author'] = $item['fullname'] . ' (@' . $item['username'] . ')';
|
||||||
if($rt = $tweet->find('div.context span.js-retweet-text a', 0)) {
|
$item['avatar'] = $user_info->profile_image_url_https;
|
||||||
$item['author'] .= ' RT: @' . $rt->plaintext;
|
|
||||||
}
|
$item['id'] = $tweet->id_str;
|
||||||
// get avatar link
|
$item['uri'] = self::URI . $tweet->user_id_str . '/status/' . $item['id'];
|
||||||
$item['avatar'] = $tweet->find('img', 0)->src;
|
|
||||||
// get TweetID
|
|
||||||
$item['id'] = $tweet->getAttribute('data-tweet-id');
|
|
||||||
// get tweet link
|
|
||||||
$item['uri'] = self::URI . substr($tweet->find('a.js-permalink', 0)->getAttribute('href'), 1);
|
|
||||||
// extract tweet timestamp
|
// extract tweet timestamp
|
||||||
$item['timestamp'] = $tweet->find('span.js-short-timestamp', 0)->getAttribute('data-time');
|
$item['timestamp'] = $tweet->created_at;
|
||||||
|
|
||||||
// generate the title
|
// generate the title
|
||||||
$item['title'] = strip_tags($this->fixAnchorSpacing(htmlspecialchars_decode(
|
$item['title'] = $tweet->full_text;
|
||||||
$tweet->find('p.js-tweet-text', 0), ENT_QUOTES), '<a>'));
|
$cleanedTweet = $tweet->full_text;
|
||||||
|
|
||||||
switch($this->queriedContext) {
|
// Add avatar
|
||||||
case 'By list':
|
|
||||||
// Check if filter applies to list (using raw content)
|
|
||||||
if($this->getInput('filter')) {
|
|
||||||
if(stripos($tweet->find('p.js-tweet-text', 0)->plaintext, $this->getInput('filter')) === false) {
|
|
||||||
continue 2; // switch + for-loop!
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
|
|
||||||
$this->processContentLinks($tweet);
|
|
||||||
$this->processEmojis($tweet);
|
|
||||||
|
|
||||||
// get tweet text
|
|
||||||
$cleanedTweet = str_replace(
|
|
||||||
'href="/',
|
|
||||||
'href="' . self::URI,
|
|
||||||
$tweet->find('p.js-tweet-text', 0)->innertext
|
|
||||||
);
|
|
||||||
|
|
||||||
// fix anchors missing spaces in-between
|
|
||||||
$cleanedTweet = $this->fixAnchorSpacing($cleanedTweet);
|
|
||||||
|
|
||||||
// Add picture to content
|
|
||||||
$picture_html = '';
|
$picture_html = '';
|
||||||
if(!$hidePictures) {
|
if(!$hidePictures) {
|
||||||
$picture_html = <<<EOD
|
$picture_html = <<<EOD
|
||||||
|
@ -278,31 +253,37 @@ EOD
|
||||||
EOD;
|
EOD;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add embeded image to content
|
// Get images
|
||||||
$image_html = '';
|
$image_html = '';
|
||||||
$images = $this->getImageURI($tweet);
|
if(isset($tweet->extended_entities->media) && !$this->getInput('noimg')) {
|
||||||
if(!$this->getInput('noimg') && !is_null($images)) {
|
foreach($tweet->extended_entities->media as $media) {
|
||||||
|
$image = $media->media_url_https;
|
||||||
foreach ($images as $image) {
|
$display_image = $media->display_url;
|
||||||
|
|
||||||
// Set image scaling
|
|
||||||
$image_orig = $this->getInput('noimgscaling') ? $image : $image . ':orig';
|
|
||||||
$image_thumb = $this->getInput('noimgscaling') ? $image : $image . ':thumb';
|
|
||||||
|
|
||||||
// add enclosures
|
// add enclosures
|
||||||
$item['enclosures'][] = $image_orig;
|
$item['enclosures'][] = $image;
|
||||||
|
|
||||||
$image_html .= <<<EOD
|
$image_html .= <<<EOD
|
||||||
<a href="{$image_orig}">
|
<a href="{$image}">
|
||||||
<img
|
<img
|
||||||
style="align:top; max-width:558px; border:1px solid black;"
|
style="align:top; max-width:558px; border:1px solid black;"
|
||||||
src="{$image_thumb}" />
|
src="{$display_image}" />
|
||||||
</a>
|
</a>
|
||||||
EOD;
|
EOD;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// add content
|
switch($this->queriedContext) {
|
||||||
|
case 'By list':
|
||||||
|
// Check if filter applies to list (using raw content)
|
||||||
|
if($this->getInput('filter')) {
|
||||||
|
if(stripos($cleanedTweet, $this->getInput('filter')) === false) {
|
||||||
|
continue 2; // switch + for-loop!
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
$item['content'] = <<<EOD
|
$item['content'] = <<<EOD
|
||||||
<div style="display: inline-block; vertical-align: top;">
|
<div style="display: inline-block; vertical-align: top;">
|
||||||
{$picture_html}
|
{$picture_html}
|
||||||
|
@ -315,151 +296,85 @@ EOD;
|
||||||
</div>
|
</div>
|
||||||
EOD;
|
EOD;
|
||||||
|
|
||||||
// add quoted tweet
|
|
||||||
$quotedTweet = $tweet->find('div.QuoteTweet', 0);
|
|
||||||
if($quotedTweet) {
|
|
||||||
// get tweet text
|
|
||||||
$cleanedQuotedTweet = str_replace(
|
|
||||||
'href="/',
|
|
||||||
'href="' . self::URI,
|
|
||||||
$quotedTweet->find('div.tweet-text', 0)->innertext
|
|
||||||
);
|
|
||||||
|
|
||||||
$this->processContentLinks($quotedTweet);
|
|
||||||
$this->processEmojis($quotedTweet);
|
|
||||||
|
|
||||||
// Add embeded image to content
|
|
||||||
$quotedImage_html = '';
|
|
||||||
$quotedImages = $this->getQuotedImageURI($tweet);
|
|
||||||
|
|
||||||
if(!$this->getInput('noimg') && !is_null($quotedImages)) {
|
|
||||||
|
|
||||||
foreach ($quotedImages as $image) {
|
|
||||||
|
|
||||||
// Set image scaling
|
|
||||||
$image_orig = $this->getInput('noimgscaling') ? $image : $image . ':orig';
|
|
||||||
$image_thumb = $this->getInput('noimgscaling') ? $image : $image . ':thumb';
|
|
||||||
|
|
||||||
// add enclosures
|
|
||||||
$item['enclosures'][] = $image_orig;
|
|
||||||
|
|
||||||
$quotedImage_html .= <<<EOD
|
|
||||||
<a href="{$image_orig}">
|
|
||||||
<img
|
|
||||||
style="align:top; max-width:558px; border:1px solid black;"
|
|
||||||
src="{$image_thumb}" />
|
|
||||||
</a>
|
|
||||||
EOD;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$item['content'] = <<<EOD
|
|
||||||
{$item['content']}
|
|
||||||
<hr>
|
|
||||||
<div style="display: inline-block; vertical-align: top;">
|
|
||||||
<blockquote>{$cleanedQuotedTweet}</blockquote>
|
|
||||||
</div>
|
|
||||||
<div style="display: block; vertical-align: top;">
|
|
||||||
<blockquote>{$quotedImage_html}</blockquote>
|
|
||||||
</div>
|
|
||||||
EOD;
|
|
||||||
}
|
|
||||||
$item['content'] = htmlspecialchars_decode($item['content'], ENT_QUOTES);
|
$item['content'] = htmlspecialchars_decode($item['content'], ENT_QUOTES);
|
||||||
|
|
||||||
// put out
|
// put out
|
||||||
$this->items[] = $item;
|
$this->items[] = $item;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
usort($this->items, array('TwitterBridge', 'compareTweetId'));
|
||||||
}
|
}
|
||||||
|
|
||||||
private function processEmojis($tweet){
|
private static function compareTweetId($tweet1, $tweet2) {
|
||||||
// process emojis (reduce size)
|
return (intval($tweet1['id']) < intval($tweet2['id']) ? 1 : -1);
|
||||||
foreach($tweet->find('img.Emoji') as $img) {
|
|
||||||
$img->style .= ' height: 1em;';
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private function processContentLinks($tweet){
|
//The aim of this function is to get an API key and a guest token
|
||||||
// processing content links
|
//This function takes 2 requests, and therefore is cached
|
||||||
foreach($tweet->find('a') as $link) {
|
private function getApiKey() {
|
||||||
if($link->hasAttribute('data-expanded-url')) {
|
|
||||||
$link->href = $link->getAttribute('data-expanded-url');
|
$cacheFac = new CacheFactory();
|
||||||
}
|
$cacheFac->setWorkingDir(PATH_LIB_CACHES);
|
||||||
$link->removeAttribute('data-expanded-url');
|
$cache = $cacheFac->create(Configuration::getConfig('cache', 'type'));
|
||||||
$link->removeAttribute('data-query-source');
|
$cache->setScope(get_called_class());
|
||||||
$link->removeAttribute('rel');
|
$cache->setKey(array('api_key'));
|
||||||
$link->removeAttribute('class');
|
$data = $cache->loadData();
|
||||||
$link->removeAttribute('target');
|
|
||||||
$link->removeAttribute('title');
|
if($data === null) {
|
||||||
}
|
$twitterPage = getContents('https://twitter.com');
|
||||||
|
$jsMainRegex = '/(https:\/\/abs\.twimg\.com\/responsive-web\/web\/main\.[^\.]+\.js)/m';
|
||||||
|
preg_match_all($jsMainRegex, $twitterPage, $jsMainMatches, PREG_SET_ORDER, 0);
|
||||||
|
$jsLink = $jsMainMatches[0][0];
|
||||||
|
$guestTokenRegex = '/gt=([0-9]*)/m';
|
||||||
|
preg_match_all($guestTokenRegex, $twitterPage, $guestTokenMatches, PREG_SET_ORDER, 0);
|
||||||
|
$guestToken = $guestTokenMatches[0][1];
|
||||||
|
|
||||||
|
$jsContent = getContents($jsLink);
|
||||||
|
$apiKeyRegex = '/([a-zA-Z0-9]{59}%[a-zA-Z0-9]{44})/m';
|
||||||
|
preg_match_all($apiKeyRegex, $jsContent, $apiKeyMatches, PREG_SET_ORDER, 0);
|
||||||
|
$apiKey = $apiKeyMatches[0][0];
|
||||||
|
$cache->saveData(array($apiKey, $guestToken));
|
||||||
|
return array($apiKey, $guestToken);
|
||||||
}
|
}
|
||||||
|
|
||||||
private function fixAnchorSpacing($content){
|
return $data;
|
||||||
// fix anchors missing spaces in-between
|
|
||||||
return str_replace(
|
}
|
||||||
'<a',
|
|
||||||
' <a',
|
private function getApiContents($uri) {
|
||||||
$content
|
$apiKeys = $this->getApiKey();
|
||||||
|
$headers = array('authorization: Bearer ' . $apiKeys[0],
|
||||||
|
'x-guest-token: ' . $apiKeys[1],
|
||||||
);
|
);
|
||||||
|
return getContents($uri, $headers);
|
||||||
}
|
}
|
||||||
|
|
||||||
private function getImageURI($tweet){
|
private function getRestId($username) {
|
||||||
// Find media in tweet
|
$searchparams = urlencode('{"screen_name":"' . strtolower($username) . '", "withHighlightedLabel":true}');
|
||||||
$images = array();
|
$searchURL = self::API_URI . '/graphql/-xfUfZsnR_zqjFd-IfrN5A/UserByScreenName?variables=' . $searchparams;
|
||||||
|
$searchResult = $this->getApiContents($searchURL);
|
||||||
|
$searchResult = json_decode($searchResult);
|
||||||
|
return $searchResult->data->user->rest_id;
|
||||||
|
}
|
||||||
|
|
||||||
$container = $tweet->find('div.AdaptiveMedia-container', 0);
|
private function getListId($username, $listName) {
|
||||||
|
$searchparams = urlencode('{"screenName":"'
|
||||||
|
. strtolower($username)
|
||||||
|
. '", "listSlug": "'
|
||||||
|
. $listName
|
||||||
|
. '", "withHighlightedLabel":false}');
|
||||||
|
$searchURL = self::API_URI . '/graphql/ErWsz9cObLel1BF-HjuBlA/ListBySlug?variables=' . $searchparams;
|
||||||
|
$searchResult = $this->getApiContents($searchURL);
|
||||||
|
$searchResult = json_decode($searchResult);
|
||||||
|
return $searchResult->data->user_by_screen_name->list->id_str;
|
||||||
|
}
|
||||||
|
|
||||||
if($container && $container->find('img', 0)) {
|
private function getUserInformation($userId, $apiData) {
|
||||||
foreach ($container->find('img') as $img) {
|
foreach($apiData->users as $user) {
|
||||||
$images[] = $img->src;
|
if($user->id_str == $userId) {
|
||||||
|
return $user;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!empty($images)) {
|
|
||||||
return $images;
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function getQuotedImageURI($tweet){
|
|
||||||
// Find media in tweet
|
|
||||||
$images = array();
|
|
||||||
|
|
||||||
$container = $tweet->find('div.QuoteMedia-container', 0);
|
|
||||||
|
|
||||||
if($container && $container->find('img', 0)) {
|
|
||||||
foreach ($container->find('img') as $img) {
|
|
||||||
$images[] = $img->src;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!empty($images)) {
|
|
||||||
return $images;
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function getCookies($pageURL){
|
|
||||||
|
|
||||||
$ctx = stream_context_create(array(
|
|
||||||
'http' => array(
|
|
||||||
'follow_location' => false
|
|
||||||
)
|
|
||||||
)
|
|
||||||
);
|
|
||||||
$a = file_get_contents($pageURL, 0, $ctx);
|
|
||||||
|
|
||||||
//First request to get the cookie
|
|
||||||
$cookies = '';
|
|
||||||
foreach($http_response_header as $hdr) {
|
|
||||||
if(stripos($hdr, 'Set-Cookie') !== false) {
|
|
||||||
$cLine = explode(':', $hdr)[1];
|
|
||||||
$cLine = explode(';', $cLine)[0];
|
|
||||||
$cookies .= ';' . $cLine;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return substr($cookies, 2);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
0
cache/pages/.gitkeep
vendored
0
cache/pages/.gitkeep
vendored
0
cache/server/.gitkeep
vendored
0
cache/server/.gitkeep
vendored
Loading…
Reference in a new issue