0) ? true : false; return $has_tld; } private function cleaner($url) { $U = explode(' ', $url); $W =array(); foreach ($U as $k => $u) { if (stristr($u,".")) { //only preg_match if there is a dot if ($this->containsTLD($u) === true) { unset($U[$k]); return $this->cleaner( implode(' ', $U) ); } } } return implode(' ', $U); } // (c) Kraoc / urlclean // https://github.com/kraoc/Leed-market/blob/master/urlclean/urlclean.plugin.disabled.php private function resolve_url($link) { // fallback to crawl to real url (slowest method and unsecure to privacy) if (function_exists('curl_init') && !ini_get('safe_mode')) { curl_setopt($ch, CURLOPT_USERAGENT, $ua); curl_setopt($ch, CURLOPT_URL, $link); curl_setopt($ch, CURLOPT_HEADER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); // >>> anonimization curl_setopt($ch, CURLOPT_COOKIESESSION, true); curl_setopt($ch, CURLOPT_REFERER, ''); // <<< anonimization $ch = curl_init(); $ua = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.16 (KHTML, like Gecko) Chrome/24.0.1304.0 Safari/537.16'; $a = curl_exec($ch); $link = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); } $link = preg_replace("/[&#?]xtor=(.)+/", "", $link); // remove: xtor $link = preg_replace("/utm_([^&#]|(&))+&*/", "", $link); // remove: utm_ // cleanup end of url $link = preg_replace("/\?&/", "", $link); if (isset($link[strlen($link) -1])){ if ($link[strlen($link) -1] == '?') $link = substr($link, 0, strlen($link) -1); } return $link; } public function collectData(array $param){ $html = ''; if (isset($param['q'])) { /* keyword search mode */ $html = file_get_html('https://twitter.com/search?q='.urlencode($param['q']).'&f=tweets') or $this->returnError('No results for this query.', 404); } elseif (isset($param['u'])) { /* user timeline mode */ $html = file_get_html('https://twitter.com/'.urlencode($param['u']).'/with_replies') or $this->returnError('Requested username can\'t be found.', 404); } else { $this->returnError('You must specify a keyword (?q=...) or a Twitter username (?u=...).', 400); } foreach($html->find('div.js-stream-tweet') as $tweet) { $item = new \Item(); // extract username and sanitize $item->username = $tweet->getAttribute('data-screen-name'); // extract fullname (pseudonym) $item->fullname = $tweet->getAttribute('data-name'); // get avatar link $item->avatar = $tweet->find('img', 0)->src; // get TweetID $item->id = $tweet->getAttribute('data-tweet-id'); // get tweet link $item->uri = 'https://twitter.com'.$tweet->find('a.js-permalink', 0)->getAttribute('href'); // extract tweet timestamp $item->timestamp = $tweet->find('span.js-short-timestamp', 0)->getAttribute('data-time'); // extract plaintext $item->content_simple = str_replace('href="/', 'href="https://twitter.com/', html_entity_decode(strip_tags($tweet->find('p.js-tweet-text', 0)->innertext, ''))); // processing content links foreach($tweet->find('a') as $link) { if($link->hasAttribute('data-expanded-url') ) { $link->href = $link->getAttribute('data-expanded-url'); } $link->removeAttribute('data-expanded-url'); $link->removeAttribute('data-query-source'); $link->removeAttribute('rel'); $link->removeAttribute('class'); $link->removeAttribute('target'); $link->removeAttribute('title'); } // get tweet text $item->content = 'avatar'.$item->username.' '.$item->fullname.'
'.str_replace('href="/', 'href="https://twitter.com/', $tweet->find('p.js-tweet-text', 0)->innertext).'
'; // generate the title // $item->title = $item->fullname . ' (@'. $item->username . ') | ' . $item->content_simple; $item->title = $item->content_simple; $item->title = preg_replace('|https?://www\.[a-z\.0-9]+|i', '', $item->title); // remove http(s) links $item->title = preg_replace('|www\.[a-z\.0-9]+|i', '', $item->title); // remove www. links $item->title = $this->cleaner($item->title); // remove all remaining links $item->title = trim($item->title); // remove extra spaces at beginning and end // convert all content links to real ones $regex = "/(http|https|ftp|ftps)\:\/\/[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(\/\S*)?/"; $item->content = preg_replace_callback($regex, function($url) { // do stuff with $url[0] here return $this->resolve_url($url[0]); }, $item->content); // put out $this->items[] = $item; } } public function getName(){ return 'Twitter Bridge Tweaked'; } public function getURI(){ return 'http://twitter.com'; } public function getCacheDuration(){ return 300; // 5 minutes } public function getUsername(){ return $this->items[0]->username; } }