diff --git a/bridges/FierPandaBridge.php b/bridges/FierPandaBridge.php index b6e00744..4cdfa8d9 100644 --- a/bridges/FierPandaBridge.php +++ b/bridges/FierPandaBridge.php @@ -7,9 +7,7 @@ class FierPandaBridge extends BridgeAbstract { public $description = "Returns latest articles from Fier Panda."; public function collectData(){ - $link = 'http://www.fier-panda.fr/'; - - $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request Fier Panda.'); + $html = $this->getSimpleHTMLDOM($this->uri) or $this->returnServerError('Could not request Fier Panda.'); foreach($html->find('div.container-content article') as $element) { $item = array(); diff --git a/bridges/FlickrExploreBridge.php b/bridges/FlickrExploreBridge.php index d407f769..58343f7c 100644 --- a/bridges/FlickrExploreBridge.php +++ b/bridges/FlickrExploreBridge.php @@ -3,11 +3,12 @@ class FlickrExploreBridge extends BridgeAbstract{ public $maintainer = "sebsauvage"; public $name = "Flickr Explore"; - public $uri = "https://www.flickr.com/explore"; + public $uri = "https://www.flickr.com/"; public $description = "Returns the latest interesting images from Flickr"; public function collectData(){ - $html = $this->getSimpleHTMLDOM('https://www.flickr.com/explore') or $this->returnServerError('Could not request Flickr.'); + $html = $this->getSimpleHTMLDOM($this->uri.'explore') + or $this->returnServerError('Could not request Flickr.'); foreach($html->find('.photo-list-photo-view') as $element) { // Get the styles @@ -21,10 +22,17 @@ class FlickrExploreBridge extends BridgeAbstract{ $imageID = reset($imageURIs); // Get the image JSON via Flickr API - $imageJSON = json_decode($this->getContents('https://api.flickr.com/services/rest/?method=flickr.photos.getInfo&api_key=103b574d49bd51f0e18bfe907da44a0f&photo_id='.$imageID.'&format=json&nojsoncallback=1')) or $this->returnServerError('Could not request Flickr.'); // FIXME: Request time too long... + $imageJSON = json_decode($this->getContents( + 'https://api.flickr.com/services/rest/?' + .'method=flickr.photos.getInfo&' + .'api_key=103b574d49bd51f0e18bfe907da44a0f&' + .'photo_id='.$imageID.'&' + .'format=json&' + .'nojsoncallback=1' + )) or $this->returnServerError('Could not request Flickr.'); // FIXME: Request time too long... $item = array(); - $item['uri'] = 'https://flickr.com/photo.gne?id='.$imageID; + $item['uri'] = $this->uri.'photo.gne?id='.$imageID; $item['content'] = ''; // FIXME: Filter javascript ? $item['title'] = $imageJSON->photo->title->_content; $this->items[] = $item; diff --git a/bridges/FlickrTagBridge.php b/bridges/FlickrTagBridge.php index 1a91c95c..2494567b 100644 --- a/bridges/FlickrTagBridge.php +++ b/bridges/FlickrTagBridge.php @@ -8,32 +8,35 @@ class FlickrTagBridge extends BridgeAbstract{ public $parameters = array( 'By keyword' => array( - 'q'=>array('name'=>'keyword') + 'q'=>array( + 'name'=>'keyword', + 'required'=>true + ) ), 'By username' => array( - 'u'=>array('name'=>'Username') + 'u'=>array( + 'name'=>'Username', + 'required'=>true + ) ), ); public function collectData(){ - $html = $this->getSimpleHTMLDOM('http://www.flickr.com/search/?q=vendee&s=rec') or $this->returnServerError('Could not request Flickr.'); - if ($this->getInput('q')) { /* keyword search mode */ - $this->request = $this->getInput('q'); - $html = $this->getSimpleHTMLDOM('http://www.flickr.com/search/?q='.urlencode($this->request).'&s=rec') or $this->returnServerError('No results for this query.'); - } - elseif ($this->getInput('u')) { /* user timeline mode */ - $this->request = $this->getInput('u'); - $html = $this->getSimpleHTMLDOM('http://www.flickr.com/photos/'.urlencode($this->request).'/') or $this->returnServerError('Requested username can\'t be found.'); - } - - else { - $this->returnClientError('You must specify a keyword or a Flickr username.'); + switch($this->queriedContext){ + case 'By keyword': + $html = $this->getSimpleHTMLDOM($this->uri.'search/?q='.urlencode($this->getInput('q')).'&s=rec') + or $this->returnServerError('No results for this query.'); + break; + case 'by username': + $html = $this->getSimpleHTMLDOM($this->uri.'photos/'.urlencode($this->getInput('u')).'/') + or $this->returnServerError('Requested username can\'t be found.'); + break; } foreach($html->find('span.photo_container') as $element) { $item = array(); - $item['uri'] = 'http://flickr.com'.$element->find('a',0)->href; + $item['uri'] = $this->uri.$element->find('a',0)->href; $thumbnailUri = $element->find('img',0)->getAttribute('data-defer-src'); $item['content'] = ''; // FIXME: Filter javascript ? $item['title'] = $element->find('a',0)->title; diff --git a/bridges/FootitoBridge.php b/bridges/FootitoBridge.php index 47518853..b2c40896 100644 --- a/bridges/FootitoBridge.php +++ b/bridges/FootitoBridge.php @@ -7,7 +7,8 @@ class FootitoBridge extends BridgeAbstract{ public $description = "Footito"; public function collectData(){ - $html = $this->getSimpleHTMLDOM('http://www.footito.fr/') or $this->returnServerError('Could not request Footito.'); + $html = $this->getSimpleHTMLDOM($this->uri) + or $this->returnServerError('Could not request Footito.'); foreach($html->find('div.post') as $element) { $item = array(); diff --git a/bridges/FourchanBridge.php b/bridges/FourchanBridge.php index f1954d77..eddfdde2 100644 --- a/bridges/FourchanBridge.php +++ b/bridges/FourchanBridge.php @@ -7,23 +7,26 @@ class FourchanBridge extends BridgeAbstract{ public $description = "Returns posts from the specified thread"; public $parameters = array( array( - 't'=>array('name'=>'Thread URL') + 't'=>array( + 'name'=>'Thread URL', + 'pattern'=>'(https:\/\/)?boards\.4chan\.org\/.*thread\/.*', + 'required'=>true + ) )); public function collectData(){ - if (!$this->getInput('t')) - $this->returnClientError('You must specify the thread URL (?t=...)'); - - $thread = parse_url($this->getInput('t')) or $this->returnClientError('This URL seems malformed, please check it.'); + $thread = parse_url($this->getInput('t')) + or $this->returnClientError('This URL seems malformed, please check it.'); if($thread['host'] !== 'boards.4chan.org') $this->returnClientError('4chan thread URL only.'); if(strpos($thread['path'], 'thread/') === FALSE) $this->returnClientError('You must specify the thread URL.'); - $url = 'https://boards.4chan.org'.$thread['path'].''; - $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError("Could not request 4chan, thread not found"); + $url = 'https://boards.4chan.org'.$thread['path']; + $html = $this->getSimpleHTMLDOM($url) + or $this->returnServerError("Could not request 4chan, thread not found"); foreach($html->find('div.postContainer') as $element) { $item = array(); diff --git a/bridges/FuturaSciencesBridge.php b/bridges/FuturaSciencesBridge.php index cc252b86..3a91467e 100644 --- a/bridges/FuturaSciencesBridge.php +++ b/bridges/FuturaSciencesBridge.php @@ -136,12 +136,9 @@ class FuturaSciencesBridge extends BridgeAbstract { } $url = $this->getURI().'rss/'.$this->getInput('feed').'.xml'; - if (empty($this->getInput('feed'))) - $this->returnClientError('Please select a feed to display.'.$url); - if ($this->getInput('feed') !== preg_replace('/[^a-zA-Z-\/]+/', '', $this->getInput('feed')) || substr_count($this->getInput('feed'), '/') > 1 || strlen($this->getInput('feed') > 64)) - $this->returnClientError('Invalid "feed" parameter.'.$url); - $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request Futura-Sciences: '.$url); + $html = $this->getSimpleHTMLDOM($url) + or $this->returnServerError('Could not request Futura-Sciences: '.$url); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/GBAtempBridge.php b/bridges/GBAtempBridge.php index eccd090a..bab42be7 100644 --- a/bridges/GBAtempBridge.php +++ b/bridges/GBAtempBridge.php @@ -54,14 +54,19 @@ class GBAtempBridge extends BridgeAbstract { } private function fetch_post_content($uri, $site_url) { - $html = $this->getSimpleHTMLDOM($uri) or $this->returnServerError('Could not request GBAtemp: '.$uri); + $html = $this->getSimpleHTMLDOM($uri); + if(!$html){ + return 'Could not request GBAtemp '.$uri; + } + $content = $html->find('div.messageContent', 0)->innertext; return $this->cleanup_post_content($content, $site_url); } public function collectData(){ - $html = $this->getSimpleHTMLDOM($this->uri) or $this->returnServerError('Could not request GBAtemp.'); + $html = $this->getSimpleHTMLDOM($this->uri) + or $this->returnServerError('Could not request GBAtemp.'); switch($this->getInput('type')){ case 'N': @@ -110,7 +115,10 @@ class GBAtempBridge extends BridgeAbstract { } public function getName() { - $type=array_search($this->getInput('type'),$param['type']['values']); + $type=array_search( + $this->getInput('type'), + $this->parameters[$this->queriedContext]['type']['values'] + ); return 'GBAtemp '.$type.' Bridge'; } diff --git a/bridges/GawkerBridge.php b/bridges/GawkerBridge.php deleted file mode 100644 index 69b5c31d..00000000 --- a/bridges/GawkerBridge.php +++ /dev/null @@ -1,68 +0,0 @@ -array( - 'name'=>'site id to put in uri between feeds.gawker.com and /full .. which is obviously not full AT ALL', - 'required'=>true - ) - )); - - - private function toURI($name) { - return RSS_PREFIX.$name.RSS_SUFFIX; - } - - public function collectData(){ - if (empty($this->getInput('site'))) { - trigger_error("If no site is provided, nothing is gonna happen", E_USER_ERROR); - } else { - $this->name = $this->getInput('site'); - $url = $this->toURI(strtolower($this->getInput('site'))); - } - $this->debugMessage("loading feed from ".$this->getURI()); - parent::collectExpandableDatas($url); - } - - protected function parseRSSItem($newsItem) { - $item = array(); - $item['uri'] = trim($newsItem->link); - $item['title'] = trim($newsItem->title); - $item['timestamp'] = $this->RSS_2_0_time_to_timestamp($newsItem); - $this->debugMessage("///////////////////////////////////////////////////////////////////////////////////////\nprocessing item ".var_export($item, true)."\n\n\nbuilt from\n\n\n".var_export($newsItem, true)); - try { - // now load that uri from cache - $this->debugMessage("loading page ".$item['uri']); - $articlePage = $this->get_cached($item['uri']); - if(is_object($articlePage)) { - $content = $articlePage->find('.post-content', 0); - HTMLSanitizer::defaultImageSrcTo($content, $this->getURI()); - $vcard = $articlePage->find('.vcard', 0); - if(is_object($vcard)) { - $authorLink = $vcard->find('a', 0); - $item['author'] = $authorLink->innertext; - // TODO use author link href to fill the feed info - } - $this->debugMessage("item quite loaded : ".var_export($item, true)); - // I set item content as last element, for easier var_export reading - $item['content'] = $content->innertext; - } else { - throw new Exception("cache content for ".$item['uri']." is NOT a Simple DOM parser object !"); - } - } catch(Exception $e) { - $this->debugMessage("obtaining ".$item['uri']." resulted in exception ".$e->getMessage().". Deleting cached page ..."); - // maybe file is incorrect. it should be discarded from cache - $this->remove_from_cache($item['url']); - $item['content'] = $e->getMessage(); - } - return $item; - } -} diff --git a/bridges/GelbooruBridge.php b/bridges/GelbooruBridge.php index 16f3f205..811a905b 100644 --- a/bridges/GelbooruBridge.php +++ b/bridges/GelbooruBridge.php @@ -15,21 +15,15 @@ class GelbooruBridge extends BridgeAbstract{ )); public function collectData(){ - $page = 0; - if ($this->getInput('p')) { - $page = (int)preg_replace("/[^0-9]/",'', $this->getInput('p')); - $page = $page - 1; - $page = $page * 63; - } - if ($this->getInput('t')) { - $tags = urlencode($this->getInput('t')); - } - $html = $this->getSimpleHTMLDOM("http://gelbooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Gelbooru.'); - + $html = $this->getSimpleHTMLDOM( + $this->uri.'index.php?page=post&s=list&' + .'&pid='.($this->getInput('p')?($this->getInput('p') -1)*63:'') + .'&tags='.urlencode($this->getInput('t')) + ) or $this->returnServerError('Could not request Gelbooru.'); foreach($html->find('div[class=content] span') as $element) { $item = array(); - $item['uri'] = 'http://gelbooru.com/'.$element->find('a', 0)->href; + $item['uri'] = $this->uri.$element->find('a', 0)->href; $item['postid'] = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); $item['timestamp'] = time(); $thumbnailUri = $element->find('img', 0)->src; diff --git a/bridges/GiphyBridge.php b/bridges/GiphyBridge.php index 79665189..95beea58 100644 --- a/bridges/GiphyBridge.php +++ b/bridges/GiphyBridge.php @@ -9,7 +9,10 @@ class GiphyBridge extends BridgeAbstract{ public $description = "Bridge for giphy.com"; public $parameters = array( array( - 's'=>array('name'=>'search tag'), + 's'=>array( + 'name'=>'search tag', + 'required'=>true + ), 'n'=>array( 'name'=>'max number of returned items', 'type'=>'number' @@ -19,16 +22,12 @@ class GiphyBridge extends BridgeAbstract{ public function collectData(){ $html = ''; $base_url = 'http://giphy.com'; - if ($this->getInput('s')) { /* keyword search mode */ - $html = $this->getSimpleHTMLDOM($base_url.'/search/'.urlencode($this->getInput('s').'/')) or $this->returnServerError('No results for this query.'); - } - else { - $this->returnClientError('You must specify a search worf (?s=...).'); - } + $html = $this->getSimpleHTMLDOM($this->uri.'/search/'.urlencode($this->getInput('s').'/')) + or $this->returnServerError('No results for this query.'); $max = GIPHY_LIMIT; if ($this->getInput('n')) { - $max = (integer) $this->getInput('n'); + $max = $this->getInput('n'); } $limit = 0; @@ -38,7 +37,8 @@ class GiphyBridge extends BridgeAbstract{ $node = $entry->first_child(); $href = $node->getAttribute('href'); - $html2 = $this->getSimpleHTMLDOM($base_url . $href) or $this->returnServerError('No results for this query.'); + $html2 = $this->getSimpleHTMLDOM($this->uri . $href) + or $this->returnServerError('No results for this query.'); $figure = $html2->getElementByTagName('figure'); $img = $figure->firstChild(); $caption = $figure->lastChild(); diff --git a/bridges/GithubIssueBridge.php b/bridges/GithubIssueBridge.php index 3708956f..480a2446 100644 --- a/bridges/GithubIssueBridge.php +++ b/bridges/GithubIssueBridge.php @@ -3,7 +3,7 @@ class GithubIssueBridge extends BridgeAbstract{ public $maintainer = 'Pierre Mazière'; public $name = 'Github Issue'; - public $uri = ''; + public $uri = 'https://github.com/'; public $description = 'Returns the issues or comments of an issue of a github project'; public $parameters=array( @@ -29,11 +29,13 @@ class GithubIssueBridge extends BridgeAbstract{ ); public function collectData(){ - $uri = 'https://github.com/'.$this->getInput('u').'/'.$this->getInput('p').'/issues/'.($this->getInput('i')?$this->getInput('i'):''); + $uri = $this->uri.$this->getInput('u').'/'.$this->getInput('p') + .'/issues/'.$this->getInput('i'); $html = $this->getSimpleHTMLDOM($uri) or $this->returnServerError('No results for Github Issue '.$this->getInput('i').' in project '.$this->getInput('u').'/'.$this->getInput('p')); - if($this->getInput('i')){ + switch($this->queriedContext){ + case 'Issue Comments': foreach($html->find('.js-comment-container') as $comment){ $item = array(); @@ -48,19 +50,20 @@ class GithubIssueBridge extends BridgeAbstract{ $this->items[]=$item; } - }else{ + break; + case 'Project Issues': foreach($html->find('.js-active-navigation-container .js-navigation-item') as $issue){ $item=array(); $info=$issue->find('.opened-by',0); $item['author']=$info->find('a',0)->plaintext; $item['timestamp']=strtotime($info->find('relative-time',0)->getAttribute('datetime')); $item['title']=$issue->find('.js-navigation-open',0)->plaintext; - $comments=$issue->firstChild()->firstChild() - ->nextSibling()->nextSibling()->nextSibling()->plaintext; + $comments=$issue->find('.col-5',0)->plaintext; $item['content']='Comments: '.($comments?$comments:'0'); - $item['uri']='https://github.com'.$issue->find('.js-navigation-open',0)->getAttribute('href'); + $item['uri']=$this->uri.$issue->find('.js-navigation-open',0)->getAttribute('href'); $this->items[]=$item; } + break; } } diff --git a/bridges/GitlabCommitsBridge.php b/bridges/GitlabCommitsBridge.php index 93328ed5..f5fe97e2 100644 --- a/bridges/GitlabCommitsBridge.php +++ b/bridges/GitlabCommitsBridge.php @@ -26,16 +26,11 @@ class GitlabCommitsBridge extends BridgeAbstract{ )); public function collectData(){ - $uri = $this->getInput('uri').'/'.$this->getInput('u').'/'.$this->getInput('p').'/commits/'; - if($this->getInput('b')){ - $uri.=$this->getInput('b'); - }else{ - $uri.='master'; - } + $uri = $this->getInput('uri').'/'.$this->getInput('u').'/' + .$this->getInput('p').'/commits/'.$this->getInput('b'); $html = $this->getSimpleHTMLDOM($uri) - or $this->returnServerError('No results for Gitlab Commits of project '.$this->getInput('uri').'/'.$this->getInput('u').'/'.$this->getInput('p')); - + or $this->returnServerError('No results for Gitlab Commits of project '.$uri); foreach($html->find('li.commit') as $commit){ diff --git a/bridges/GizmodoFRBridge.php b/bridges/GizmodoFRBridge.php index e79dcd4c..660da8a4 100644 --- a/bridges/GizmodoFRBridge.php +++ b/bridges/GizmodoFRBridge.php @@ -10,6 +10,9 @@ class GizmodoFRBridge extends BridgeAbstract{ function GizmodoFRExtractContent($url) { $articleHTMLContent = $this->getSimpleHTMLDOM($url); + if(!$articleHTMLContent){ + return 'Could not load '.$url; + } $text = $articleHTMLContent->find('div.entry-thumbnail', 0)->innertext; $text = $text.$articleHTMLContent->find('div.entry-excerpt', 0)->innertext; $text = $text.$articleHTMLContent->find('div.entry-content', 0)->innertext; @@ -21,7 +24,8 @@ class GizmodoFRBridge extends BridgeAbstract{ return $text; } - $rssFeed = $this->getSimpleHTMLDOM('http://www.gizmodo.fr/feed') or $this->returnServerError('Could not request http://www.gizmodo.fr/feed'); + $rssFeed = $this->getSimpleHTMLDOM($this->uri.'/feed') + or $this->returnServerError('Could not request '.$this->uri.'/feed'); $limit = 0; foreach($rssFeed->find('item') as $element) { diff --git a/bridges/GooglePlusPostBridge.php b/bridges/GooglePlusPostBridge.php index 9d3ef28b..a55620f2 100644 --- a/bridges/GooglePlusPostBridge.php +++ b/bridges/GooglePlusPostBridge.php @@ -6,7 +6,7 @@ class GooglePlusPostBridge extends BridgeAbstract public $maintainer = "Grummfy"; public $name = "Google Plus Post Bridge"; - public $uri = "http://plus.google.com/"; + public $uri = "https://plus.google.com/"; public $description = "Returns user public post (without API)."; public $parameters = array( array( @@ -16,20 +16,11 @@ class GooglePlusPostBridge extends BridgeAbstract ) )); - const GOOGLE_PLUS_BASE_URL = 'https://plus.google.com/'; - public function collectData() { - $param=$this->parameters[$queriedContext]; - if (!$this->getInput('username')) - { - $this->returnClientError('You must specify a username (?username=...).'); - } - - $this->request = $this->getInput('username'); // get content parsed // $html = $this->getSimpleHTMLDOM(__DIR__ . '/../posts2.html' - $html = $this->getSimpleHTMLDOM(self::GOOGLE_PLUS_BASE_URL . urlencode($this->request) . '/posts' + $html = $this->getSimpleHTMLDOM($this->uri . urlencode($this->getInput('username')) . '/posts' // force language , false, stream_context_create(array('http'=> array( 'header' => 'Accept-Language: fr,fr-be,fr-fr;q=0.8,en;q=0.4,en-us;q=0.2;*' . "\r\n" @@ -57,7 +48,7 @@ class GooglePlusPostBridge extends BridgeAbstract // $item['title'] = $item['fullname'] = $post->find('header.lea', 0)->plaintext; $item['avatar'] = $post->find('div.ys img', 0)->src; // var_dump((($post->find('a.o-U-s', 0)->getAllAttributes()))); - $item['uri'] = self::GOOGLE_PLUS_BASE_URL . $post->find('a.o-U-s', 0)->href; + $item['uri'] = $this->uri . $post->find('a.o-U-s', 0)->href; $item['timestamp'] = strtotime($post->find('a.o-U-s', 0)->plaintext); $this->items[] = $item; @@ -65,21 +56,21 @@ class GooglePlusPostBridge extends BridgeAbstract $hashtags = array(); foreach($post->find('a.d-s') as $hashtag) { - $hashtags[ trim($hashtag->plaintext) ] = self::GOOGLE_PLUS_BASE_URL . $hashtag->href; + $hashtags[ trim($hashtag->plaintext) ] = $this->uri . $hashtag->href; } $item['content'] = ''; // avatar display - $item['content'] .= '
avatar' . $item['username'] . '
'; $content = $post->find('div.Al', 0); // alter link // $content = $content->innertext; -// $content = str_replace('href="./', 'href="' . self::GOOGLE_PLUS_BASE_URL, $content); -// $content = str_replace('href="photos', 'href="' . self::GOOGLE_PLUS_BASE_URL . 'photos', $content); +// $content = str_replace('href="./', 'href="' . $this->uri, $content); +// $content = str_replace('href="photos', 'href="' . $this->uri . 'photos', $content); // XXX ugly but I don't have any idea how to do a better stuff, str_replace on link doesn't work as expected and ask too many checks foreach($content->find('a') as $link) { @@ -95,7 +86,7 @@ class GooglePlusPostBridge extends BridgeAbstract { $link->href = substr($link->href, 1); } - $link->href = self::GOOGLE_PLUS_BASE_URL . $link->href; + $link->href = $this->uri . $link->href; } } $content = $content->innertext; @@ -116,7 +107,7 @@ class GooglePlusPostBridge extends BridgeAbstract public function getURI() { - return $this->_url ?: 'http://plus.google.com/'; + return $this->_url ?: $this->uri; } public function getCacheDuration() diff --git a/bridges/GoogleSearchBridge.php b/bridges/GoogleSearchBridge.php index 2c2ecac8..fb3d7403 100644 --- a/bridges/GoogleSearchBridge.php +++ b/bridges/GoogleSearchBridge.php @@ -9,7 +9,6 @@ */ class GoogleSearchBridge extends BridgeAbstract{ - public $maintainer = "sebsauvage"; public $name = "Google search"; public $uri = "https://www.google.com/"; @@ -26,7 +25,10 @@ class GoogleSearchBridge extends BridgeAbstract{ public function collectData(){ $html = ''; - $html = $this->getSimpleHTMLDOM('https://www.google.com/search?q=' . urlencode($this->getInput('q')) . '&num=100&complete=0&tbs=qdr:y,sbd:1') or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($this->uri + .'search?q=' . urlencode($this->getInput('q')) + .'&num=100&complete=0&tbs=qdr:y,sbd:1') + or $this->returnServerError('No results for this query.'); $emIsRes = $html->find('div[id=ires]',0); if( !is_null($emIsRes) ){ diff --git a/bridges/GuruMedBridge.php b/bridges/GuruMedBridge.php index a91e5e32..297dc56d 100644 --- a/bridges/GuruMedBridge.php +++ b/bridges/GuruMedBridge.php @@ -13,7 +13,8 @@ class GuruMedBridge extends BridgeAbstract{ } public function collectData(){ - $html = $this->getSimpleHTMLDOM('http://gurumed.org/feed') or $this->returnServerError('Could not request Gurumed.'); + $html = $this->getSimpleHTMLDOM($this->uri.'feed') + or $this->returnServerError('Could not request Gurumed.'); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/HDWallpapersBridge.php b/bridges/HDWallpapersBridge.php index 8133c6e8..fde1aaab 100644 --- a/bridges/HDWallpapersBridge.php +++ b/bridges/HDWallpapersBridge.php @@ -1,31 +1,24 @@ array('name'=>'category'), + 'c'=>array( + 'name'=>'category', + 'defaultValue'=>'latest_wallpapers' + ), 'm'=>array('name'=>'max number of wallpapers'), 'r'=>array( 'name'=>'resolution', + 'defaultValue'=>'1920x1200', 'exampleValue'=>'1920x1200, 1680x1050,…' ) )); public function collectData(){ - $html = ''; - $baseUri = 'http://www.hdwallpapers.in'; - - $this->category = $this->getInput('c') ?: 'latest_wallpapers'; // Latest default - $this->resolution = $this->getInput('r') ?: '1920x1200'; // Wide wallpaper default - $category = $this->category; if (strrpos($category, 'wallpapers') !== strlen($category)-strlen('wallpapers')) { $category .= '-desktop-wallpapers'; @@ -36,7 +29,7 @@ class HDWallpapersBridge extends BridgeAbstract { $lastpage = 1; for ($page = 1; $page <= $lastpage; $page++) { - $link = $baseUri.'/'.$category.'/page/'.$page; + $link = $this->uri.'/'.$category.'/page/'.$page; $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('No results for this query.'); if ($page === 1) { @@ -49,10 +42,10 @@ class HDWallpapersBridge extends BridgeAbstract { $item = array(); // http://www.hdwallpapers.in/download/yosemite_reflections-1680x1050.jpg - $item['uri'] = $baseUri.'/download'.str_replace('wallpapers.html', $this->resolution.'.jpg', $element->href); + $item['uri'] = $this->uri.'/download'.str_replace('wallpapers.html', $this->getInput('r').'.jpg', $element->href); $item['timestamp'] = time(); $item['title'] = $element->find('p', 0)->text(); - $item['content'] = $item['title'].'
'; + $item['content'] = $item['title'].'
'; $this->items[] = $item; $num++; @@ -63,7 +56,7 @@ class HDWallpapersBridge extends BridgeAbstract { } public function getName(){ - return 'HDWallpapers - '.str_replace(['__', '_'], [' & ', ' '], $this->category).' ['.$this->resolution.']'; + return 'HDWallpapers - '.str_replace(['__', '_'], [' & ', ' '], $this->getInput('c')).' ['.$this->getInput('r').']'; } public function getCacheDuration(){ diff --git a/bridges/HentaiHavenBridge.php b/bridges/HentaiHavenBridge.php index 7be61450..e970d00d 100644 --- a/bridges/HentaiHavenBridge.php +++ b/bridges/HentaiHavenBridge.php @@ -7,7 +7,8 @@ class HentaiHavenBridge extends BridgeAbstract{ public $description = "Returns releases from Hentai Haven"; public function collectData(){ - $html = $this->getSimpleHTMLDOM('http://hentaihaven.org/') or $this->returnServerError('Could not request Hentai Haven.'); + $html = $this->getSimpleHTMLDOM($this->uri) + or $this->returnServerError('Could not request Hentai Haven.'); foreach($html->find('div.zoe-grid') as $element) { $item = array(); $item['uri'] = $element->find('div.brick-content h3 a', 0)->href; diff --git a/bridges/IdenticaBridge.php b/bridges/IdenticaBridge.php index d0896313..f99bde71 100644 --- a/bridges/IdenticaBridge.php +++ b/bridges/IdenticaBridge.php @@ -14,7 +14,6 @@ class IdenticaBridge extends BridgeAbstract{ )); public function collectData(){ - $html = ''; $html = $this->getSimpleHTMLDOM($this->getURI()) or $this->returnServerError('Requested username can\'t be found.'); diff --git a/bridges/InstagramBridge.php b/bridges/InstagramBridge.php index 4551e279..bec19b2c 100644 --- a/bridges/InstagramBridge.php +++ b/bridges/InstagramBridge.php @@ -46,7 +46,7 @@ class InstagramBridge extends BridgeAbstract{ { $item = array(); - $item['uri'] = "https://instagram.com/p/".$media->code."/"; + $item['uri'] = $this->uri.'/p/'.$media->code.'/'; $item['content'] = ''; if (isset($media->caption)) { @@ -61,7 +61,7 @@ class InstagramBridge extends BridgeAbstract{ } public function getName(){ - return $this->param['u']['value'] .' - Instagram Bridge'; + return $this->getInput('u') .' - Instagram Bridge'; } public function getURI(){ diff --git a/bridges/IsoHuntBridge.php b/bridges/IsoHuntBridge.php index b438940f..f99ceef5 100644 --- a/bridges/IsoHuntBridge.php +++ b/bridges/IsoHuntBridge.php @@ -1,483 +1,471 @@ array( - 'latest_category'=>array( - 'name'=>'Latest category', - 'type'=>'list', - 'required'=>true, - 'title'=>'Select your category', - 'defaultValue'=>'News', - 'values'=>array( - 'Hot Torrents'=>'hot_torrents', - 'News'=>'news', - 'Releases'=>'releases', - 'Torrents'=>'torrents' - ) - ) - ), - - /* - * Get feeds for one of the "torrent" categories - * Make sure to add new categories also to get_torrent_category_index($)! - * Elements are sorted by name ascending! - */ - 'By "Torrent" category' => array( - 'torrent_category'=>array( - 'name'=>'Torrent category', - 'type'=>'list', - 'required'=>true, - 'title'=>'Select your category', - 'defaultValue'=>'Anime', - 'values'=>array( - 'Adult'=>'adult', - 'Anime'=>'anime', - 'Books'=>'books', - 'Games'=>'games', - 'Movies'=>'movies', - 'Music'=>'music', - 'Other'=>'other', - 'Series & TV'=>'series_tv', - 'Software'=>'software' - ) - ), - 'torrent_popularity'=>array( - 'name'=>'Sort by popularity', - 'type'=>'checkbox', - 'title'=>'Activate to receive results by popularity' - ) - ), - - /* - * Get feeds for a specific search request - */ - 'Search torrent by name' => array( - 'search_name'=>array( - 'name'=>'Name', - 'required'=>true, - 'title'=>'Insert your search query', - 'exampleValue'=>'Bridge' - ), - 'search_category'=>array( - 'name'=>'Category', - 'type'=>'list', - 'title'=>'Select your category', - 'defaultValue'=>'All', - 'values'=>array( - 'Adult'=>'adult', - 'All'=>'all', - 'Anime'=>'anime', - 'Books'=>'books', - 'Games'=>'games', - 'Movies'=>'movies', - 'Music'=>'music', - 'Other'=>'other', - 'Series & TV'=>'series_tv', - 'Software'=>'software' - ) - ) + public $parameters = array( + /* + * Get feeds for one of the "latest" categories + * Notice: The categories "News" and "Top Searches" are received from the main page + * Elements are sorted by name ascending! + */ + 'By "Latest" category' => array( + 'latest_category'=>array( + 'name'=>'Latest category', + 'type'=>'list', + 'required'=>true, + 'title'=>'Select your category', + 'defaultValue'=>'news', + 'values'=>array( + 'Hot Torrents'=>'hot_torrents', + 'News'=>'news', + 'Releases'=>'releases', + 'Torrents'=>'torrents' ) - ); + ) + ), - public function collectData(){ - $request_path = '/'; // We'll request the main page by default + /* + * Get feeds for one of the "torrent" categories + * Make sure to add new categories also to get_torrent_category_index($)! + * Elements are sorted by name ascending! + */ + 'By "Torrent" category' => array( + 'torrent_category'=>array( + 'name'=>'Torrent category', + 'type'=>'list', + 'required'=>true, + 'title'=>'Select your category', + 'defaultValue'=>'anime', + 'values'=>array( + 'Adult'=>'adult', + 'Anime'=>'anime', + 'Books'=>'books', + 'Games'=>'games', + 'Movies'=>'movies', + 'Music'=>'music', + 'Other'=>'other', + 'Series & TV'=>'series_tv', + 'Software'=>'software' + ) + ), + 'torrent_popularity'=>array( + 'name'=>'Sort by popularity', + 'type'=>'checkbox', + 'title'=>'Activate to receive results by popularity' + ) + ), - if($this->getInput('latest_category')){ // Requesting one of the latest categories - $this->request_latest_category($this->getInput('latest_category')); - } elseif($this->getInput('torrent_category')){ // Requesting one of the torrent categories - $order_popularity = false; + /* + * Get feeds for a specific search request + */ + 'Search torrent by name' => array( + 'search_name'=>array( + 'name'=>'Name', + 'required'=>true, + 'title'=>'Insert your search query', + 'exampleValue'=>'Bridge' + ), + 'search_category'=>array( + 'name'=>'Category', + 'type'=>'list', + 'title'=>'Select your category', + 'defaultValue'=>'all', + 'values'=>array( + 'Adult'=>'adult', + 'All'=>'all', + 'Anime'=>'anime', + 'Books'=>'books', + 'Games'=>'games', + 'Movies'=>'movies', + 'Music'=>'music', + 'Other'=>'other', + 'Series & TV'=>'series_tv', + 'Software'=>'software' + ) + ) + ) + ); - if($this->getInput('torrent_popularity')) - $order_popularity = $this->getInput('torrent_popularity'); + public function getURI(){ + $uri=$this->uri; + switch($this->queriedContext){ + case 'By "Latest" category': + switch($this->getInput('latest_category')){ + case 'hot_torrents': + $uri .= 'statistic/hot/torrents'; + break; + case 'news': + break; + case 'releases': + $uri .= 'releases.php'; + break; + case 'torrents': + $uri .= 'latest.php'; + break; + } + break; - $this->request_torrent_category($this->getInput('torrent_category'), $order_popularity); - } else if($this->getInput('search_name')){ // Requesting search - if($this->getInput('search_category')) - $this->request_search($this->getInput('search_name'), $this->getInput('search_category')); - else - $this->request_search($this->getInput('search_name')); - } else { - $this->returnClientError('Unknown request!'); - } + case 'By "Torrent" category': + $uri .= $this->build_category_uri( + $this->getInput('torrent_category'), + $this->getInput('torrent_popularity') + ); + break; + + case 'Search torrent by name': + $category=$this->getInput('search_category'); + $uri .= $this->build_category_uri($category); + if($category!=='movies') + $uri .= '&ihq=' . urlencode($this->getInput('search_name')); + break; } - public function getCacheDuration(){ - return 300; // 5 minutes + return $uri; + } + + public function getName(){ + switch($this->queriedContext){ + case 'By "Latest" category': + $categoryName = + array_search( + $this->getInput('latest_category'), + $this->parameters['By "Latest" category']['latest_category']['values'] + ); + $name = 'Latest '.$categoryName.' - ' . $this->name; + break; + + case 'By "Torrent" category': + $categoryName = + array_search( + $this->getInput('torrent_category'), + $this->parameters['By "Torrent" category']['torrent_category']['values'] + ); + $name = 'Category: ' . $categoryName . ' - ' . $this->name; + break; + + case 'Search torrent by name': + $categoryName = + array_search( + $this->getInput('search_category'), + $this->parameters['Search torrent by name']['search_category']['values'] + ); + $name = 'Search: "' . $this->getInput('search_name') . '" in category: ' . $categoryName . ' - ' . $this->name; + break; } -#region Helper functions for "By "Torrent" category" + return $name; + } - private function request_torrent_category($category, $order_popularity){ - $category_name = $this->get_torrent_category_name($category); - $category_index = $this->get_torrent_category_index($category); - $this->name = 'Category: ' . $category_name . ' - ' . $this->name; - $this->uri .= $this->build_category_uri($category_index, $order_popularity); + public function collectData(){ + $html = $this->load_html($this->getURI()); - $html = $this->load_html($this->uri); - - if(strtolower(trim($category)) === 'movies') // This one is special (content wise) - $this->get_movie_torrents($html); - else - $this->get_latest_torrents($html); - } - - private function get_torrent_category_name($category){ - $parameter = $this->parameters['By "Torrent" category']; - $languages = $parameter['torrent_category']['values']; - - foreach($languages as $name=>$value) - if(strtolower(trim($value)) === strtolower(trim($category))) - return $name; - - return 'Unknown category'; - } - - private function get_torrent_category_index($category){ - switch(strtolower(trim($category))){ - case 'anime': return 1; - case 'software' : return 2; - case 'games' : return 3; - case 'adult' : return 4; - case 'movies' : return 5; - case 'music' : return 6; - case 'other' : return 7; - case 'series_tv' : return 8; - case 'books': return 9; - default: return 0; - } - } - -#endregion - - private function request_latest_category($category){ - switch($category){ - case 'hot_torrents': // This is a special case! (that's why return) - $this->name = 'Latest hot torrents - ' . $this->name; - $this->uri .= '/statistic/hot/torrents'; - $html = $this->load_html($this->uri); - $this->get_latest_hot_torrents($html); - return; - case 'news': // This is a special case! (that's why return) - $this->name = 'Latest news - ' . $this->name; - $this->uri .= '/'; - $html = $this->load_html($this->uri); - $this->get_latest_news($html); - return; - case 'releases': - $this->name = 'Latest releases - ' . $this->name; - $this->uri .= '/releases.php'; - break; - case 'torrents': - $this->name = 'Latest torrents - ' . $this->name; - $this->uri .= '/latest.php'; - break; - default: // No category applies - $this->returnClientError('Undefined category: ' . $category . '!'); - } - - $html = $this->load_html($this->uri); + switch($this->queriedContext){ + case 'By "Latest" category': + switch($this->getInput('latest_category')){ + case 'hot_torrents': + $this->get_latest_hot_torrents($html); + break; + case 'news': + $this->get_latest_news($html); + break; + case 'releases': + case 'torrents': $this->get_latest_torrents($html); + break; + } + break; + + case 'By "Torrent" category': + if($this->getInput('torrent_category') === 'movies'){ + // This one is special (content wise) + $this->get_movie_torrents($html); + }else{ + $this->get_latest_torrents($html); + } + break; + + case 'Search torrent by name': + if( $this->getInput('search_category') === 'movies'){ + // This one is special (content wise) + $this->get_movie_torrents($html); + } else { + $this->get_latest_torrents($html); + } + break; + } + } + + public function getCacheDuration(){ + return 300; // 5 minutes + } + + #region Helper functions for "Movie Torrents" + + private function get_movie_torrents($html){ + $container = $html->find('div#w0', 0); + if(!$container) + $this->returnServerError('Unable to find torrent container!'); + + $torrents = $container->find('article'); + if(!$torrents) + $this->returnServerError('Unable to find torrents!'); + + foreach($torrents as $torrent){ + + $anchor = $torrent->find('a', 0); + if(!$anchor) + $this->returnServerError('Unable to find anchor!'); + + $date = $torrent->find('small', 0); + if(!$date) + $this->returnServerError('Unable to find date!'); + + $item = array(); + + $item['uri'] = $this->fix_relative_uri($anchor->href); + $item['title'] = $anchor->title; + // $item['author'] = + $item['timestamp'] = strtotime($date->plaintext); + $item['content'] = $this->fix_relative_uri($torrent->innertext); + + $this->items[] = $item; + } + } + + #endregion + + #region Helper functions for "Latest Hot Torrents" + + private function get_latest_hot_torrents($html){ + $container = $html->find('div#serps', 0); + if(!$container) + $this->returnServerError('Unable to find torrent container!'); + + $torrents = $container->find('tr'); + if(!$torrents) + $this->returnServerError('Unable to find torrents!'); + + // Remove first element (header row) + $torrents = array_slice($torrents, 1); + + foreach($torrents as $torrent){ + + $cell = $torrent->find('td', 0); + if(!$cell) + $this->returnServerError('Unable to find cell!'); + + $element = $cell->find('a', 0); + if(!$element) + $this->returnServerError('Unable to find element!'); + + $item = array(); + + $item['uri'] = $element->href; + $item['title'] = $element->plaintext; + // $item['author'] = + // $item['timestamp'] = + // $item['content'] = + + $this->items[] = $item; + } + } + + #endregion + + #region Helper functions for "Latest News" + + private function get_latest_news($html){ + $container = $html->find('div#postcontainer', 0); + if(!$container) + $this->returnServerError('Unable to find post container!'); + + $posts = $container->find('div.index-post'); + if(!$posts) + $this->returnServerError('Unable to find posts!'); + + foreach($posts as $post){ + $item = array(); + + $item['uri'] = $this->latest_news_extract_uri($post); + $item['title'] = $this->latest_news_extract_title($post); + $item['author'] = $this->latest_news_extract_author($post); + $item['timestamp'] = $this->latest_news_extract_timestamp($post); + $item['content'] = $this->latest_news_extract_content($post); + + $this->items[] = $item; + } + } + + private function latest_news_extract_author($post){ + $author = $post->find('small', 0); + if(!$author) + $this->returnServerError('Unable to find author!'); + + // The author is hidden within a string like: 'Posted by {author} on {date}' + preg_match('/Posted\sby\s(.*)\son/i', $author->innertext, $matches); + + return $matches[1]; + } + + private function latest_news_extract_timestamp($post){ + $date = $post->find('small', 0); + if(!$date) + $this->returnServerError('Unable to find date!'); + + // The date is hidden within a string like: 'Posted by {author} on {date}' + preg_match('/Posted\sby\s.*\son\s(.*)/i', $date->innertext, $matches); + + $timestamp = strtotime($matches[1]); + + // Make sure date is not in the future (dates are given like 'Nov. 20' without year) + if($timestamp > time()){ + $timestamp = strtotime('-1 year', $timestamp); } -#region Helper functions for "Search torrent by name" + return $timestamp; + } - private function request_search($name, $category = 'all'){ - $category_name = $this->get_search_category_name($category); - $category_index = $this->get_search_category_index($category); + private function latest_news_extract_title($post){ + $title = $post->find('a', 0); + if(!$title) + $this->returnServerError('Unable to find title!'); - $this->name = 'Search: "' . $name . '" in category: ' . $category_name . ' - ' . $this->name; - $this->uri .= $this->build_category_uri($category_index); + return $title->plaintext; + } - if(strtolower(trim($category)) === 'movies'){ // This one is special (content wise) - $html = $this->load_html($this->uri); - $this->get_movie_torrents($html); - } else { - $this->uri .= '&ihq=' . urlencode($name); - $html = $this->load_html($this->uri); - $this->get_latest_torrents($html); - } + private function latest_news_extract_uri($post){ + $uri = $post->find('a', 0); + if(!$uri) + $this->returnServerError('Unable to find uri!'); + + return $uri->href; + } + + private function latest_news_extract_content($post){ + $content = $post->find('div', 0); + if(!$content) + $this->returnServerError('Unable to find content!'); + + // Remove

...

(title) + foreach($content->find('h2') as $element){ + $element->outertext = ''; } - private function get_search_category_name($category){ - $parameter = $this->parameters['Search torrent by name']; - $languages = $parameter['search_category']['values']; - - foreach($languages as $name=>$value) - if(strtolower(trim($value)) === strtolower(trim($category))) - return $name; - - return 'Unknown category'; + // Remove ... (author) + foreach($content->find('small') as $element){ + $element->outertext = ''; } - private function get_search_category_index($category){ - switch(strtolower(trim($category))){ - case 'all': return 0; - default: return $this->get_torrent_category_index($category); // Uses the same index - } + return $content->innertext; + } + + #endregion + + #region Helper functions for "Latest Torrents", "Latest Releases" and "Torrent Category" + + private function get_latest_torrents($html){ + $container = $html->find('div#serps', 0); + if(!$container) + $this->returnServerError('Unable to find torrent container!'); + + $torrents = $container->find('tr[data-key]'); + if(!$torrents) + $this->returnServerError('Unable to find torrents!'); + + foreach($torrents as $torrent){ + $item = array(); + + $item['uri'] = $this->latest_torrents_extract_uri($torrent); + $item['title'] = $this->latest_torrents_extract_title($torrent); + $item['author'] = $this->latest_torrents_extract_author($torrent); + $item['timestamp'] = $this->latest_torrents_extract_timestamp($torrent); + $item['content'] = ''; // There is no valuable content + + $this->items[] = $item; + } + } + + private function latest_torrents_extract_title($torrent){ + $cell = $torrent->find('td.title-row', 0); + if(!$cell) + $this->returnServerError('Unable to find title cell!'); + + $title = $cell->find('span', 0); + if(!$title) + $this->returnServerError('Unable to find title!'); + + return $title->plaintext; + } + + private function latest_torrents_extract_uri($torrent){ + $cell = $torrent->find('td.title-row', 0); + if(!$cell) + $this->returnServerError('Unable to find title cell!'); + + $uri = $cell->find('a', 0); + if(!$uri) + $this->returnServerError('Unable to find uri!'); + + return $this->fix_relative_uri($uri->href); + } + + private function latest_torrents_extract_author($torrent){ + $cell = $torrent->find('td.user-row', 0); + if(!$cell) + return; // No author + + $user = $cell->find('a', 0); + if(!$user) + $this->returnServerError('Unable to find user!'); + + return $user->plaintext; + } + + private function latest_torrents_extract_timestamp($torrent){ + $cell = $torrent->find('td.date-row', 0); + if(!$cell) + $this->returnServerError('Unable to find date cell!'); + + return strtotime('-' . $cell->plaintext, time()); + } + + #endregion + + #region Generic helper functions + + private function load_html($uri){ + $html = $this->getSimpleHTMLDOM($uri); + if(!$html) + $this->returnServerError('Unable to load ' . $uri . '!'); + + return $html; + } + + private function fix_relative_uri($uri){ + return preg_replace('/\//i', $this->uri, $uri, 1); + } + + private function build_category_uri($category, $order_popularity = false){ + switch($category){ + case 'anime': $index = 1; break; + case 'software' : $index = 2; break; + case 'games' : $index = 3; break; + case 'adult' : $index = 4; break; + case 'movies' : $index = 5; break; + case 'music' : $index = 6; break; + case 'other' : $index = 7; break; + case 'series_tv' : $index = 8; break; + case 'books': $index = 9; break; + case 'all': + default: $index = 0; break; } -#endregion + return 'torrents/?iht=' . $index . '&ihs=' . ($order_popularity ? 1 : 0) . '&age=0'; + } -#region Helper functions for "Movie Torrents" - - private function get_movie_torrents($html){ - $container = $html->find('div#w0', 0); - if(!$container) - $this->returnServerError('Unable to find torrent container!'); - - $torrents = $container->find('article'); - if(!$torrents) - $this->returnServerError('Unable to find torrents!'); - - foreach($torrents as $torrent){ - - $anchor = $torrent->find('a', 0); - if(!$anchor) - $this->returnServerError('Unable to find anchor!'); - - $date = $torrent->find('small', 0); - if(!$date) - $this->returnServerError('Unable to find date!'); - - $item = array(); - - $item['uri'] = $this->fix_relative_uri($anchor->href); - $item['title'] = $anchor->title; - // $item['author'] = - $item['timestamp'] = strtotime($date->plaintext); - $item['content'] = $this->fix_relative_uri($torrent->innertext); - - $this->items[] = $item; - } - } - -#endregion - -#region Helper functions for "Latest Hot Torrents" - - private function get_latest_hot_torrents($html){ - $container = $html->find('div#serps', 0); - if(!$container) - $this->returnServerError('Unable to find torrent container!'); - - $torrents = $container->find('tr'); - if(!$torrents) - $this->returnServerError('Unable to find torrents!'); - - // Remove first element (header row) - $torrents = array_slice($torrents, 1); - - foreach($torrents as $torrent){ - - $cell = $torrent->find('td', 0); - if(!$cell) - $this->returnServerError('Unable to find cell!'); - - $element = $cell->find('a', 0); - if(!$element) - $this->returnServerError('Unable to find element!'); - - $item = array(); - - $item['uri'] = $element->href; - $item['title'] = $element->plaintext; - // $item['author'] = - // $item['timestamp'] = - // $item['content'] = - - $this->items[] = $item; - } - } - -#endregion - -#region Helper functions for "Latest News" - - private function get_latest_news($html){ - $container = $html->find('div#postcontainer', 0); - if(!$container) - $this->returnServerError('Unable to find post container!'); - - $posts = $container->find('div.index-post'); - if(!$posts) - $this->returnServerError('Unable to find posts!'); - - foreach($posts as $post){ - $item = array(); - - $item['uri'] = $this->latest_news_extract_uri($post); - $item['title'] = $this->latest_news_extract_title($post); - $item['author'] = $this->latest_news_extract_author($post); - $item['timestamp'] = $this->latest_news_extract_timestamp($post); - $item['content'] = $this->latest_news_extract_content($post); - - $this->items[] = $item; - } - } - - private function latest_news_extract_author($post){ - $author = $post->find('small', 0); - if(!$author) - $this->returnServerError('Unable to find author!'); - - // The author is hidden within a string like: 'Posted by {author} on {date}' - preg_match('/Posted\sby\s(.*)\son/i', $author->innertext, $matches); - - return $matches[1]; - } - - private function latest_news_extract_timestamp($post){ - $date = $post->find('small', 0); - if(!$date) - $this->returnServerError('Unable to find date!'); - - // The date is hidden within a string like: 'Posted by {author} on {date}' - preg_match('/Posted\sby\s.*\son\s(.*)/i', $date->innertext, $matches); - - $timestamp = strtotime($matches[1]); - - // Make sure date is not in the future (dates are given like 'Nov. 20' without year) - if($timestamp > time()){ - $timestamp = strtotime('-1 year', $timestamp); - } - - return $timestamp; - } - - private function latest_news_extract_title($post){ - $title = $post->find('a', 0); - if(!$title) - $this->returnServerError('Unable to find title!'); - - return $title->plaintext; - } - - private function latest_news_extract_uri($post){ - $uri = $post->find('a', 0); - if(!$uri) - $this->returnServerError('Unable to find uri!'); - - return $uri->href; - } - - private function latest_news_extract_content($post){ - $content = $post->find('div', 0); - if(!$content) - $this->returnServerError('Unable to find content!'); - - // Remove

...

(title) - foreach($content->find('h2') as $element){ - $element->outertext = ''; - } - - // Remove ... (author) - foreach($content->find('small') as $element){ - $element->outertext = ''; - } - - return $content->innertext; - } - -#endregion - -#region Helper functions for "Latest Torrents", "Latest Releases" and "Torrent Category" - - private function get_latest_torrents($html){ - $container = $html->find('div#serps', 0); - if(!$container) - $this->returnServerError('Unable to find torrent container!'); - - $torrents = $container->find('tr[data-key]'); - if(!$torrents) - $this->returnServerError('Unable to find torrents!'); - - foreach($torrents as $torrent){ - $item = array(); - - $item['uri'] = $this->latest_torrents_extract_uri($torrent); - $item['title'] = $this->latest_torrents_extract_title($torrent); - $item['author'] = $this->latest_torrents_extract_author($torrent); - $item['timestamp'] = $this->latest_torrents_extract_timestamp($torrent); - $item['content'] = ''; // There is no valuable content - - $this->items[] = $item; - } - } - - private function latest_torrents_extract_title($torrent){ - $cell = $torrent->find('td.title-row', 0); - if(!$cell) - $this->returnServerError('Unable to find title cell!'); - - $title = $cell->find('span', 0); - if(!$title) - $this->returnServerError('Unable to find title!'); - - return $title->plaintext; - } - - private function latest_torrents_extract_uri($torrent){ - $cell = $torrent->find('td.title-row', 0); - if(!$cell) - $this->returnServerError('Unable to find title cell!'); - - $uri = $cell->find('a', 0); - if(!$uri) - $this->returnServerError('Unable to find uri!'); - - return $this->fix_relative_uri($uri->href); - } - - private function latest_torrents_extract_author($torrent){ - $cell = $torrent->find('td.user-row', 0); - if(!$cell) - return; // No author - - $user = $cell->find('a', 0); - if(!$user) - $this->returnServerError('Unable to find user!'); - - return $user->plaintext; - } - - private function latest_torrents_extract_timestamp($torrent){ - $cell = $torrent->find('td.date-row', 0); - if(!$cell) - $this->returnServerError('Unable to find date cell!'); - - return strtotime('-' . $cell->plaintext, time()); - } - -#endregion - -#region Generic helper functions - - private function load_html($uri){ - $html = $this->getSimpleHTMLDOM($uri); - if(!$html) - $this->returnServerError('Unable to load ' . $uri . '!'); - - return $html; - } - - private function fix_relative_uri($uri){ - return preg_replace('/\//i', 'https://isohunt.to/', $uri, 1); - } - - private function build_category_uri($index, $order_popularity = false){ - return '/torrents/?iht=' . $index . '&ihs=' . ($order_popularity ? 1 : 0) . '&age=0'; - } - -#endregion + #endregion } diff --git a/bridges/JapanExpoBridge.php b/bridges/JapanExpoBridge.php index 0a722bbc..b02304a9 100644 --- a/bridges/JapanExpoBridge.php +++ b/bridges/JapanExpoBridge.php @@ -7,12 +7,8 @@ class JapanExpoBridge extends HttpCachingBridgeAbstract { public $description = 'Returns most recent entries from Japan Expo actualités.'; public $parameters = array( array( 'mode'=>array( - 'name'=>'Mode', - 'type'=>'list', - 'values'=>array( - 'Titles only'=>'light', - 'Full Contents'=>'full' - ) + 'name'=>'Show full contents', + 'type'=>'checkbox', ) )); @@ -46,9 +42,9 @@ class JapanExpoBridge extends HttpCachingBridgeAbstract { } }; - $link = 'http://www.japan-expo-paris.com/fr/actualites'; - $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request JapanExpo: '.$link); - $fullcontent = (!empty($this->getInput('mode')) && $this->getInput('mode') == 'full'); + $html = $this->getSimpleHTMLDOM($this->uri) + or $this->returnServerError('Could not request JapanExpo: '.$this->uri); + $fullcontent = $this->getInput('mode'); $count = 0; foreach ($html->find('a._tile2') as $element) { @@ -60,22 +56,21 @@ class JapanExpoBridge extends HttpCachingBridgeAbstract { $thumbnail = trim($img_search_result[1], "'"); if ($fullcontent) { - if ($count < 5) { - if($this->get_cached_time($url) <= strtotime('-24 hours')) - $this->remove_from_cache($url); - - $article_html = $this->get_cached($url) or $this->returnServerError('Could not request JapanExpo: '.$url); - $header = $article_html->find('header.pageHeadBox', 0); - $timestamp = strtotime($header->find('time', 0)->datetime); - $title_html = $header->find('div.section', 0)->next_sibling(); - $title = $title_html->plaintext; - $headings = $title_html->next_sibling()->outertext; - $article = $article_html->find('div.content', 0)->innertext; - $article = preg_replace_callback('/]+ style="[^\(]+\(\'([^\']+)\'[^>]+>/i', $convert_article_images, $article); - $content = $headings.$article; - } else { - break; + if ($count >= 5) { + break; } + if($this->get_cached_time($url) <= strtotime('-24 hours')) + $this->remove_from_cache($url); + + $article_html = $this->get_cached($url) or $this->returnServerError('Could not request JapanExpo: '.$url); + $header = $article_html->find('header.pageHeadBox', 0); + $timestamp = strtotime($header->find('time', 0)->datetime); + $title_html = $header->find('div.section', 0)->next_sibling(); + $title = $title_html->plaintext; + $headings = $title_html->next_sibling()->outertext; + $article = $article_html->find('div.content', 0)->innertext; + $article = preg_replace_callback('/]+ style="[^\(]+\(\'([^\']+)\'[^>]+>/i', $convert_article_images, $article); + $content = $headings.$article; } else { $date_text = $element->find('span.date', 0)->plaintext; $timestamp = french_pubdate_to_timestamp($date_text); diff --git a/bridges/KonachanBridge.php b/bridges/KonachanBridge.php index ec941efb..50730dc9 100644 --- a/bridges/KonachanBridge.php +++ b/bridges/KonachanBridge.php @@ -9,20 +9,19 @@ class KonachanBridge extends BridgeAbstract{ public $parameters = array( array( 'p'=>array( 'name'=>'page', + 'defaultValue'=>1, 'type'=>'number' ), 't'=>array('name'=>'tags') )); public function collectData(){ - $page = 1;$tags=''; - if ($this->getInput('p')) { - $page = (int)preg_replace("/[^0-9]/",'', $this->getInput('p')); - } - if ($this->getInput('t')) { - $tags = urlencode($this->getInput('t')); - } - $html = $this->getSimpleHTMLDOM("http://konachan.com/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Konachan.'); + $html = $this->getSimpleHTMLDOM( + $this->uri.'/post?' + .'&page='.$this->getInput('p') + .'&tags='.urlencode($this->getInput('t')) + ) or $this->returnServerError('Could not request Konachan.'); + $input_json = explode('Post.register(', $html); foreach($input_json as $element) $data[] = preg_replace('/}\)(.*)/', '}', $element); @@ -31,7 +30,7 @@ class KonachanBridge extends BridgeAbstract{ foreach($data as $datai) { $json = json_decode($datai, TRUE); $item = array(); - $item['uri'] = 'http://konachan.com/post/show/'.$json['id']; + $item['uri'] = $this->uri.'/post/show/'.$json['id']; $item['postid'] = $json['id']; $item['timestamp'] = $json['created_at']; $item['imageUri'] = $json['file_url']; diff --git a/bridges/KununuBridge.php b/bridges/KununuBridge.php index 29a6a199..aa63c342 100644 --- a/bridges/KununuBridge.php +++ b/bridges/KununuBridge.php @@ -1,8 +1,8 @@ 'Site', 'type'=>'list', 'required'=>true, - 'exampleValue'=>'United States', 'title'=>'Select your site', 'values'=>array( 'Austria'=>'at', @@ -39,47 +38,40 @@ class KununuBridge extends HttpCachingBridgeAbstract { ) ); + private $companyName=''; + + public function getURI(){ + $company = $this->encode_umlauts(strtolower(str_replace(' ', '-', trim($this->getInput('company'))))); + $site=$this->getInput('site'); + $section = ''; + switch($site){ + case 'at': + case 'de': + case 'ch': + $section = 'kommentare'; + break; + case 'us': + $section = 'reviews'; + break; + } + + return $this->uri.$site.'/'.$company.'/'.$section; + } + + function getName(){ + $company = $this->encode_umlauts(strtolower(str_replace(' ', '-', trim($this->getInput('company'))))); + return ($this->companyName?:$company).' - '.$this->name; + } + public function collectData(){ - - // Get Site - $site = strtolower(trim($this->getInput('site'))); - if(!isset($site) || empty($site) || !$this->site_is_valid($site)) - $this->returnClientError('You must specify a valid site (&site=...)!'); - - // Get Company (fixing whitespace and umlauts) - $company = $this->encode_umlauts(strtolower(str_replace(' ', '-', trim($this->getInput('company'))))); - if(!isset($company) || empty($company)) - $this->returnClientError('You must specify a company (&company=...)!'); - - $full = false; // By default we'll load only short article - if($this->getInput('full')) - $full = strtolower(trim($this->getInput('full'))); - - // Get reviews section name (depends on site) - $section = ''; - switch($site){ - case 'at': - case 'de': - case 'ch': - $section = 'kommentare'; - break; - case 'us': - $section = 'reviews'; - break; - default: - $this->returnServerError('The reviews section is not defined for you selection!'); - } - - // Update URI for the content - $this->uri .= "/{$site}/{$company}/{$section}"; + $full = $this->getInput('full'); // Load page - $html = $this->getSimpleHTMLDOM($this->uri); - if($html === false) - $this->returnServerError('Unable to receive data from ' . $this->uri . '!'); - + $html = $this->getSimpleHTMLDOM($this->getURI()); + if(!$html) + $this->returnServerError('Unable to receive data from ' . $this->getURI() . '!'); // Update name for this request - $this->name = $this->extract_company_name($html) . ' - ' . $this->name; + $this->companyName = $this->extract_company_name($html); // Find the section with all the panels (reviews) $section = $html->find('section.kununu-scroll-element', 0); @@ -113,26 +105,11 @@ class KununuBridge extends HttpCachingBridgeAbstract { return 86400; // 1 day } - /** - * Returns true if the given site is part of the parameters list - */ - private function site_is_valid($site){ - $parameter = $this->parameters['global']; - $sites = $parameter['site']['values']; - - $site_names = array(); - - foreach($sites as $name=>$value) - $site_names[] = $value; - - return in_array($site, $site_names); - } - /** * Fixes relative URLs in the given text */ private function fix_url($text){ - return preg_replace('/href=(\'|\")\//i', 'href="https://www.kununu.com/', $text); + return preg_replace('/href=(\'|\")\//i', 'href="'.$this->uri, $text); } /** @@ -207,7 +184,7 @@ class KununuBridge extends HttpCachingBridgeAbstract { if($anchor === false) $this->returnServerError('Cannot find article URI!'); - return 'https://www.kununu.com' . $anchor->href; + return $this->uri . $anchor->href; } /** diff --git a/bridges/LWNprevBridge.php b/bridges/LWNprevBridge.php index 9e64a5cd..c2673621 100644 --- a/bridges/LWNprevBridge.php +++ b/bridges/LWNprevBridge.php @@ -1,9 +1,13 @@ uri.'free/bigpage'; + } private function jumpToNextTag(&$node){ while($node && $node->nodeType===XML_TEXT_NODE){ @@ -28,20 +32,7 @@ class LWNprevBridge extends BridgeAbstract{ public function collectData(){ // Because the LWN page is written in loose HTML and not XHTML, // Simple HTML Dom is not accurate enough for the job - - $uri='https://lwn.net/free/bigpage'; - $context=null; - if(defined('PROXY_URL')) { - $context = array( - 'http' => array( - 'proxy' => PROXY_URL, - 'request_fulluri' => true, - ), - ); - $context = stream_context_create($context); - } - - $content=file_get_contents($uri, false, $context) + $content=$this->getContents($this->getURI()) or $this->returnServerError('No results for LWNprev'); libxml_use_internal_errors(true); @@ -52,13 +43,12 @@ class LWNprevBridge extends BridgeAbstract{ $cat1=''; $cat2=''; - $realURI='https://lwn.net'; foreach($html->getElementsByTagName('a') as $a){ if($a->textContent==='Multi-page format'){ break; } } - $realURI.=$a->getAttribute('href'); + $realURI=$this->uri.$a->getAttribute('href'); $URICounter=0; $edition=$html->getElementsByTagName('h1')->item(0)->textContent; @@ -92,7 +82,7 @@ class LWNprevBridge extends BridgeAbstract{ $h2FirstChild=$h2->firstChild; $this->jumpToNextTag($h2FirstChild); if($h2FirstChild->nodeName==='a'){ - $item['uri']='https://lwn.net'.$h2FirstChild->getAttribute('href'); + $item['uri']=$this->uri.$h2FirstChild->getAttribute('href'); }else{ $item['uri']=$realURI.'#'.$URICounter; } diff --git a/bridges/LeBonCoinBridge.php b/bridges/LeBonCoinBridge.php index 3779952e..fab1a71a 100755 --- a/bridges/LeBonCoinBridge.php +++ b/bridges/LeBonCoinBridge.php @@ -3,7 +3,7 @@ class LeBonCoinBridge extends BridgeAbstract{ public $maintainer = "16mhz"; public $name = "LeBonCoin"; - public $uri = "http://www.leboncoin.fr"; + public $uri = "http://www.leboncoin.fr/"; public $description = "Returns most recent results from LeBonCoin for a region, and optionally a category and a keyword ."; public $parameters = array( array( @@ -44,6 +44,7 @@ class LeBonCoinBridge extends BridgeAbstract{ 'name'=>'Catégorie', 'type'=>'list', 'values'=>array( + 'TOUS'=>'', 'EMPLOI'=>'_emploi_', 'VEHICULES'=>array( 'Tous'=>'_vehicules_', @@ -136,14 +137,16 @@ class LeBonCoinBridge extends BridgeAbstract{ public function collectData(){ - $html = ''; - if (empty($this->getInput('c'))) { - $link = 'http://www.leboncoin.fr/annonces/offres/' . $this->getInput('r') . '/?f=a&th=1&q=' . urlencode($this->getInput('k')); - } - else { - $link = 'http://www.leboncoin.fr/' . $this->getInput('c') . '/offres/' . $this->getInput('r') . '/?f=a&th=1&q=' . urlencode($this->getInput('k')); - } - $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request LeBonCoin.'); + $category=$this->getInput('c'); + if (empty($category)){ + $category='annonces'; + } + + $html = $this->getSimpleHTMLDOM( + $this->uri.$category.'/offres/' . $this->getInput('r') . '/?' + .'f=a&th=1&' + .'q=' . urlencode($this->getInput('k')) + ) or $this->returnServerError('Could not request LeBonCoin.'); $list = $html->find('.tabsContent', 0); if($list === NULL) { diff --git a/bridges/LeJournalDuGeekBridge.php b/bridges/LeJournalDuGeekBridge.php index 370a1892..f1b89a1f 100644 --- a/bridges/LeJournalDuGeekBridge.php +++ b/bridges/LeJournalDuGeekBridge.php @@ -35,7 +35,8 @@ class LeJournalDuGeekBridge extends BridgeAbstract{ } public function collectData(){ - $rssFeed = $this->getSimpleHTMLDOM('http://www.journaldugeek.com/rss') or $this->returnServerError('Could not request http://www.journaldugeek.com/rss'); + $rssFeed = $this->getSimpleHTMLDOM($this->uri.'rss') + or $this->returnServerError('Could not request '.$this->uri.'/rss'); $limit = 0; foreach($rssFeed->find('item') as $element) { diff --git a/bridges/LeMondeInformatiqueBridge.php b/bridges/LeMondeInformatiqueBridge.php index a1aadab1..b744e76e 100644 --- a/bridges/LeMondeInformatiqueBridge.php +++ b/bridges/LeMondeInformatiqueBridge.php @@ -28,8 +28,9 @@ class LeMondeInformatiqueBridge extends BridgeAbstract { return $article_html; } - $feedUrl = 'http://www.lemondeinformatique.fr/rss/rss.xml'; - $html = $this->getSimpleHTMLDOM($feedUrl) or $this->returnServerError('Could not request LeMondeInformatique: '.$feedUrl); + $html = $this->getSimpleHTMLDOM($this->uri.'rss/rss.xml') + or $this->returnServerError('Could not request LeMondeInformatique: ' + .$this->uri.'rss/rss.xml'); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/Les400CulsBridge.php b/bridges/Les400CulsBridge.php index 14f701b8..3fea2fff 100644 --- a/bridges/Les400CulsBridge.php +++ b/bridges/Les400CulsBridge.php @@ -1,17 +1,14 @@ collectExpandableDatas($this->uri.'feeds/'); } protected function parseRSSItem($newsItem) { diff --git a/bridges/LesJoiesDuCodeBridge.php b/bridges/LesJoiesDuCodeBridge.php index db40bff4..1d835560 100644 --- a/bridges/LesJoiesDuCodeBridge.php +++ b/bridges/LesJoiesDuCodeBridge.php @@ -7,7 +7,8 @@ class LesJoiesDuCodeBridge extends BridgeAbstract{ public $description = "LesJoiesDuCode"; public function collectData(){ - $html = $this->getSimpleHTMLDOM('http://lesjoiesducode.fr/') or $this->returnServerError('Could not request LesJoiesDuCode.'); + $html = $this->getSimpleHTMLDOM($this->uri) + or $this->returnServerError('Could not request LesJoiesDuCode.'); foreach($html->find('div.blog-post') as $element) { $item = array(); diff --git a/bridges/LichessBridge.php b/bridges/LichessBridge.php index 12a178de..4e81e9ce 100644 --- a/bridges/LichessBridge.php +++ b/bridges/LichessBridge.php @@ -4,12 +4,13 @@ class LichessBridge extends HttpCachingBridgeAbstract { public $maintainer = 'AmauryCarrade'; public $name = 'Lichess Blog'; - public $uri = 'http://lichess.org/blog'; + public $uri = 'http://fr.lichess.org/blog'; public $description = 'Returns the 5 newest posts from the Lichess blog (full text)'; public function collectData() { - $xml_feed = $this->getSimpleHTMLDOM('http://fr.lichess.org/blog.atom') or $this->returnServerError('Could not retrieve Lichess blog feed.'); + $xml_feed = $this->getSimpleHTMLDOM($this->uri.'.atom') + or $this->returnServerError('Could not retrieve Lichess blog feed.'); $posts_loaded = 0; foreach($xml_feed->find('entry') as $entry) diff --git a/bridges/LinkedInCompany.php b/bridges/LinkedInCompanyBridge.php similarity index 82% rename from bridges/LinkedInCompany.php rename to bridges/LinkedInCompanyBridge.php index ccdf16c8..e23f9d48 100644 --- a/bridges/LinkedInCompany.php +++ b/bridges/LinkedInCompanyBridge.php @@ -1,5 +1,5 @@ getInput('c'); + $link = $this->uri.'company/'.$this->getInput('c'); - $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request LinkedIn.'); + $html = $this->getSimpleHTMLDOM($link) + or $this->returnServerError('Could not request LinkedIn.'); foreach($html->find('//*[@id="my-feed-post"]/li') as $element) { $title = $element->find('span.share-body', 0)->innertext; diff --git a/bridges/LolibooruBridge.php b/bridges/LolibooruBridge.php index c324dcc2..1bcf0376 100644 --- a/bridges/LolibooruBridge.php +++ b/bridges/LolibooruBridge.php @@ -9,20 +9,19 @@ class LolibooruBridge extends BridgeAbstract{ public $parameters = array( array( 'p'=>array( 'name'=>'page', + 'defaultValue'=>1, 'type'=>'number' ), 't'=>array('name'=>'tags') )); public function collectData(){ - $page = 1; $tags = ''; - if ($this->getInput('p')) { - $page = (int)preg_replace("/[^0-9]/",'', $this->getInput('p')); - } - if ($this->getInput('t')) { - $tags = urlencode($this->getInput('t')); - } - $html = $this->getSimpleHTMLDOM("http://lolibooru.moe/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Lolibooru.'); + $html = $this->getSimpleHTMLDOM( + $this->uri.'post?' + .'&page='.$this->getInput('p') + .'&tags='.urlencode($this->getInput('t')) + ) or $this->returnServerError('Could not request Lolibooru.'); + $input_json = explode('Post.register(', $html); foreach($input_json as $element) $data[] = preg_replace('/}\)(.*)/', '}', $element); @@ -31,7 +30,7 @@ class LolibooruBridge extends BridgeAbstract{ foreach($data as $datai) { $json = json_decode($datai, TRUE); $item = array(); - $item['uri'] = 'http://lolibooru.moe/post/show/'.$json['id']; + $item['uri'] = $this->uri.'post/show/'.$json['id']; $item['postid'] = $json['id']; $item['timestamp'] = $json['created_at']; $item['imageUri'] = $json['file_url']; diff --git a/bridges/MangareaderBridge.php b/bridges/MangareaderBridge.php index a6bc8531..cdf5a70e 100644 --- a/bridges/MangareaderBridge.php +++ b/bridges/MangareaderBridge.php @@ -4,7 +4,7 @@ class MangareaderBridge extends BridgeAbstract{ public $maintainer = "logmanoriginal"; public $name = "Mangareader Bridge"; - public $uri = "http://www.mangareader.net"; + public $uri = "http://www.mangareader.net/"; public $description = "Returns the latest updates, popular mangas or manga updates (new chapters)"; public $parameters = array( @@ -75,45 +75,25 @@ class MangareaderBridge extends BridgeAbstract{ ) ); + private $request=''; + public function collectData(){ - - $this->request = ''; - - $type = "latest"; // can be "latest", "popular" or "path". Default is "latest"! - $path = "latest"; - $limit = MANGAREADER_LIMIT; - - if($this->getInput('category')){ // Get popular updates - $type = "popular"; - $path = "popular"; - if($this->getInput('category') !== "all"){ - $path .= "/" . $this->getInput('category'); - } - } - - if($this->getInput('path')){ // Get manga updates - $type = "path"; - $path = $this->getInput('path'); - } - - if($this->getInput('limit') && $this->getInput('limit') !== ""){ // Get manga updates (optional parameter) - $limit = $this->getInput('limit'); - } - // We'll use the DOM parser for this as it makes navigation easier - $html = $this->getContents("http://www.mangareader.net/" . $path); + $html = $this->getContents($this->getURI()); if(!$html){ $this->returnClientError('Could not receive data for ' . $path . '!'); } + libxml_use_internal_errors(true); $doc = new DomDocument; @$doc->loadHTML($html); + libxml_clear_errors(); // Navigate via XPath $xpath = new DomXPath($doc); - // Build feed based on the context (site updates or manga updates) - if($type === "latest"){ - + $this->request = ''; + switch($this->queriedContext){ + case 'Get latest updates': $this->request = 'Latest updates'; // Query each item (consists of Manga + chapters) @@ -128,7 +108,7 @@ class MangareaderBridge extends BridgeAbstract{ if (isset($manga) && $chapters->length >= 1){ $item = array(); - $item['uri'] = 'http://www.mangareader.net' . htmlspecialchars($manga->getAttribute('href')); + $item['uri'] = $this->uri. htmlspecialchars($manga->getAttribute('href')); $item['title'] = htmlspecialchars($manga->nodeValue); // Add each chapter to the feed @@ -138,16 +118,15 @@ class MangareaderBridge extends BridgeAbstract{ if($item['content'] <> ""){ $item['content'] .= "
"; } - $item['content'] .= "" . htmlspecialchars($chapter->nodeValue) . ""; + $item['content'] .= "" . htmlspecialchars($chapter->nodeValue) . ""; } $this->items[] = $item; } } - } - - if($type === "popular"){ + break; + case 'Get popular mangas': $pagetitle = $xpath->query(".//*[@id='bodyalt']/h1")->item(0)->nodeValue; $this->request = substr($pagetitle, 0, strrpos($pagetitle, " -")); // "Popular mangas for ..." @@ -163,16 +142,20 @@ class MangareaderBridge extends BridgeAbstract{ $item = array(); $item['title'] = htmlspecialchars($xpath->query(".//*[@class='manga_name']//a", $manga)->item(0)->nodeValue); - $item['uri'] = 'http://www.mangareader.net' . $xpath->query(".//*[@class='manga_name']//a", $manga)->item(0)->getAttribute('href'); + $item['uri'] = $this->uri . $xpath->query(".//*[@class='manga_name']//a", $manga)->item(0)->getAttribute('href'); $item['author'] = htmlspecialchars($xpath->query("//*[@class='author_name']", $manga)->item(0)->nodeValue); $item['chaptercount'] = $xpath->query(".//*[@class='chapter_count']", $manga)->item(0)->nodeValue; $item['genre'] = htmlspecialchars($xpath->query(".//*[@class='manga_genre']", $manga)->item(0)->nodeValue); $item['content'] = '' . $item['title'] . '

' . $item['genre'] . '

' . $item['chaptercount'] . '

'; $this->items[] = $item; } - } + break; - if($type === "path") { + case 'Get manga updates': + $limit = $this->getInput('limit'); + if(empty($limit)){ + $limit = MANGAREADER_LIMIT; + } $this->request = $xpath->query(".//*[@id='mangaproperties']//*[@class='aname']")->item(0)->nodeValue; @@ -187,14 +170,15 @@ class MangareaderBridge extends BridgeAbstract{ foreach ($chapters as $chapter){ $item = array(); $item['title'] = htmlspecialchars($xpath->query("td[1]", $chapter)->item(0)->nodeValue); - $item['uri'] = 'http://www.mangareader.net' . $xpath->query("td[1]/a", $chapter)->item(0)->getAttribute('href'); + $item['uri'] = $this->uri . $xpath->query("td[1]/a", $chapter)->item(0)->getAttribute('href'); $item['timestamp'] = strtotime($xpath->query("td[2]", $chapter)->item(0)->nodeValue); array_unshift($this->items, $item); } + break; } // Return some dummy-data if no content available - if(count($this->items) == 0){ + if(empty($this->items)){ $item = array(); $item['content'] = "

No updates available

"; @@ -202,6 +186,25 @@ class MangareaderBridge extends BridgeAbstract{ } } + public function getURI(){ + switch($this->queriedContext){ + case 'Get latest updates': + $path = "latest"; + break; + case 'Get popular mangas': + $path = "popular"; + if($this->getInput('category') !== "all"){ + $path .= "/" . $this->getInput('category'); + } + break; + case 'Get manga updates': + $path = $this->getInput('path'); + break; + } + return $this->uri . $path; + } + + public function getName(){ return (!empty($this->request) ? $this->request . ' - ' : '') . 'Mangareader Bridge'; } diff --git a/bridges/MilbooruBridge.php b/bridges/MilbooruBridge.php index a4534d48..1dc08cfa 100644 --- a/bridges/MilbooruBridge.php +++ b/bridges/MilbooruBridge.php @@ -16,22 +16,16 @@ class MilbooruBridge extends BridgeAbstract{ )); public function collectData(){ - $page = 0;$tags=''; - if ($this->getInput('p')) { - $page = (int)preg_replace("/[^0-9]/",'', $this->getInput('p')); - } - if ($this->getInput('t')) { - $tags = urlencode($this->getInput('t')); - } - $html = $this->getSimpleHTMLDOM("http://sheslostcontrol.net/moe/shimmie/index.php?q=/post/list/$tags/$page") or $this->returnServerError('Could not request Milbooru.'); - + $html = $this->getSimpleHTMLDOM( + $this->uri.'?q=/post/list/'.urlencode($this->getInput('t')).'/'.$this->getInput('p') + )or $this->returnServerError('Could not request Milbooru.'); foreach($html->find('div[class=shm-image-list] span[class=thumb]') as $element) { $item = array(); - $item['uri'] = 'http://sheslostcontrol.net/moe/shimmie/'.$element->find('a', 0)->href; + $item['uri'] = $this->uri.$element->find('a', 0)->href; $item['postid'] = (int)preg_replace("/[^0-9]/",'', $element->find('a', 0)->getAttribute('data-post-id')); $item['timestamp'] = time(); - $thumbnailUri = 'http://sheslostcontrol.net/moe/shimmie/'.$element->find('img', 0)->src; + $thumbnailUri = $this->uri.$element->find('img', 0)->src; $item['tags'] = $element->find('a', 0)->getAttribute('data-tags'); $item['title'] = 'Milbooru | '.$item['postid']; $item['content'] = '
Tags: '.$item['tags']; diff --git a/bridges/MondeDiploBridge.php b/bridges/MondeDiploBridge.php index 5123ccd8..2a6afc4e 100644 --- a/bridges/MondeDiploBridge.php +++ b/bridges/MondeDiploBridge.php @@ -3,11 +3,12 @@ class MondeDiploBridge extends BridgeAbstract{ public $maintainer = "Pitchoule"; public $name = 'Monde Diplomatique'; - public $uri = 'http://www.monde-diplomatique.fr'; + public $uri = 'http://www.monde-diplomatique.fr/'; public $description = "Returns most recent results from MondeDiplo."; public function collectData(){ - $html = $this->getSimpleHTMLDOM($this->uri) or $this->returnServerError('Could not request MondeDiplo. for : ' . $link); + $html = $this->getSimpleHTMLDOM($this->uri) + or $this->returnServerError('Could not request MondeDiplo. for : ' . $this->uri); foreach($html->find('div.unarticle') as $article) { $element = $article->parent(); diff --git a/bridges/MsnMondeBridge.php b/bridges/MsnMondeBridge.php index 17a8a44d..5e481956 100644 --- a/bridges/MsnMondeBridge.php +++ b/bridges/MsnMondeBridge.php @@ -3,9 +3,13 @@ class MsnMondeBridge extends BridgeAbstract{ public $maintainer = "kranack"; public $name = 'MSN Actu Monde'; - public $uri = 'http://www.msn.com/fr-fr/actualite/monde'; + public $uri = 'http://www.msn.com/'; public $description = "Returns the 10 newest posts from MSN Actualités (full text)"; + public function getURI(){ + return $this->uri.'fr-fr/actualite/monde'; + } + private function MsnMondeExtractContent($url, &$item) { $html2 = $this->getSimpleHTMLDOM($url); $item['content'] = $html2->find('#content', 0)->find('article', 0)->find('section', 0)->plaintext; @@ -13,13 +17,13 @@ class MsnMondeBridge extends BridgeAbstract{ } public function collectData(){ - $html = $this->getSimpleHTMLDOM($this->uri) or $this->returnServerError('Could not request MsnMonde.'); + $html = $this->getSimpleHTMLDOM($this->getURI()) or $this->returnServerError('Could not request MsnMonde.'); $limit = 0; foreach($html->find('.smalla') as $article) { if($limit < 10) { $item = array(); $item['title'] = utf8_decode($article->find('h4', 0)->innertext); - $item['uri'] = "http://www.msn.com" . utf8_decode($article->find('a', 0)->href); + $item['uri'] = $this->uri . utf8_decode($article->find('a', 0)->href); $this->MsnMondeExtractContent($item['uri'], $item); $this->items[] = $item; $limit++; diff --git a/bridges/MspabooruBridge.php b/bridges/MspabooruBridge.php index 21dc76fa..e255f9ee 100644 --- a/bridges/MspabooruBridge.php +++ b/bridges/MspabooruBridge.php @@ -16,21 +16,16 @@ class MspabooruBridge extends BridgeAbstract{ )); public function collectData(){ - $page = 0;$tags=''; - if ($this->getInput('p')) { - $page = (int)preg_replace("/[^0-9]/",'', $this->getInput('p')); - $page = $page - 1; - $page = $page * 50; - } - if ($this->getInput('t')) { - $tags = urlencode($this->getInput('t')); - } - $html = $this->getSimpleHTMLDOM("http://mspabooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Mspabooru.'); + $html = $this->getSimpleHTMLDOM( + $this->uri.'index.php?page=post&s=list&' + .'&pid='.($this->getInput('p')?($this->getInput('p') -1)*50:'') + .'&tags='.urlencode($this->getInput('t')) + ) or $this->returnServerError('Could not request Mspabooru.'); foreach($html->find('div[class=content] span') as $element) { $item = array(); - $item['uri'] = 'http://mspabooru.com/'.$element->find('a', 0)->href; + $item['uri'] = $this->uri.$element->find('a', 0)->href; $item['postid'] = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); $item['timestamp'] = time(); $thumbnailUri = $element->find('img', 0)->src; diff --git a/bridges/NasaApodBridge.php b/bridges/NasaApodBridge.php index c30eccfc..9b333c8e 100644 --- a/bridges/NasaApodBridge.php +++ b/bridges/NasaApodBridge.php @@ -3,12 +3,12 @@ class NasaApodBridge extends BridgeAbstract{ public $maintainer = "corenting"; public $name = "NASA APOD Bridge"; - public $uri = "http://apod.nasa.gov/apod/astropix.html"; + public $uri = "http://apod.nasa.gov/apod/"; public $description = "Returns the 3 latest NASA APOD pictures and explanations"; public function collectData(){ - $html = $this->getSimpleHTMLDOM('http://apod.nasa.gov/apod/archivepix.html') or $this->returnServerError('Error while downloading the website content'); + $html = $this->getSimpleHTMLDOM($this->uri.'archivepix.html') or $this->returnServerError('Error while downloading the website content'); $list = explode("
", $html->find('b', 0)->innertext); for($i = 0; $i < 3;$i++) @@ -17,7 +17,7 @@ class NasaApodBridge extends BridgeAbstract{ $item = array(); $uri_page = $html->find('a',$i + 3)->href; - $uri = 'http://apod.nasa.gov/apod/'.$uri_page; + $uri = $this->uri.$uri_page; $item['uri'] = $uri; $picture_html = $this->getSimpleHTMLDOM($uri); diff --git a/bridges/NeuviemeArtBridge.php b/bridges/NeuviemeArtBridge.php index ff33fa1d..54261c28 100644 --- a/bridges/NeuviemeArtBridge.php +++ b/bridges/NeuviemeArtBridge.php @@ -16,7 +16,7 @@ class NeuviemeArtBridge extends BridgeAbstract { } return $string; } - $feedUrl = 'http://www.9emeart.fr/9emeart.rss'; + $feedUrl = $this->uri.'9emeart.rss'; $html = $this->getSimpleHTMLDOM($feedUrl) or $this->returnServerError('Could not request 9eme Art: '.$feedUrl); $limit = 0; @@ -32,9 +32,9 @@ class NeuviemeArtBridge extends BridgeAbstract { $article_image = $element->find('enclosure', 0)->url; foreach ($article_html->find('img.img_full') as $img) if ($img->alt == $article_title) - $article_image = 'http://www.9emeart.fr'.$img->src; + $article_image = $this->uri.$img->src; $article_content = '

' - .str_replace('src="/', 'src="http://www.9emeart.fr/', $article_html->find('div.newsGenerique_con', 0)->innertext); + .str_replace('src="/', 'src="'.$this->uri, $article_html->find('div.newsGenerique_con', 0)->innertext); $article_content = StripWithDelimiters($article_content, ''); $article_content = StripWithDelimiters($article_content, ''); $article_content = StripWithDelimiters($article_content, ''); diff --git a/bridges/NextInpactBridge.php b/bridges/NextInpactBridge.php index 1422a315..c5e39edf 100644 --- a/bridges/NextInpactBridge.php +++ b/bridges/NextInpactBridge.php @@ -24,7 +24,7 @@ class NextInpactBridge extends BridgeAbstract { } public function collectData(){ - $html = $this->getSimpleHTMLDOM('http://www.nextinpact.com/rss/news.xml') or $this->returnServerError('Could not request NextInpact.'); + $html = $this->getSimpleHTMLDOM($this->uri.'rss/news.xml') or $this->returnServerError('Could not request NextInpact.'); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/NextgovBridge.php b/bridges/NextgovBridge.php index 9c5f9e39..f7e508e1 100644 --- a/bridges/NextgovBridge.php +++ b/bridges/NextgovBridge.php @@ -44,43 +44,39 @@ class NextgovBridge extends BridgeAbstract { } $category = $this->getInput('category'); - if (empty($category)) - $category = 'all'; - if ($category !== preg_replace('/[^a-z-]+/', '', $category) || strlen($category > 32)) - $this->returnClientError('Invalid "category" parameter.'); $url = $this->getURI().'rss/'.$category.'/'; $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request Nextgov: '.$url); $limit = 0; foreach ($html->find('item') as $element) { - if ($limit < 10) { - - $article_url = ExtractFromDelimiters($element->innertext, '', ''); - $article_author = ExtractFromDelimiters($element->innertext, 'dc/elements/1.1/">', ''); - $article_title = $element->find('title', 0)->plaintext; - $article_subtitle = $element->find('description', 0)->plaintext; - $article_timestamp = strtotime($element->find('pubDate', 0)->plaintext); - $article_thumbnail = ExtractFromDelimiters($element->innertext, 'getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request Nextgov: '.$article_url); - - $contents = $article->find('div.wysiwyg', 0)->innertext; - $contents = StripWithDelimiters($contents, '
', '
'); - $contents = StripWithDelimiters($contents, ''); //ad outer div - $contents = StripWithDelimiters($contents, ''); - $contents = ($article_thumbnail == '' ? '' : '

') - .'

'.$article_subtitle.'

' - .trim($contents); - - $item = array(); - $item['uri'] = $article_url; - $item['title'] = $article_title; - $item['author'] = $article_author; - $item['timestamp'] = $article_timestamp; - $item['content'] = $contents; - $this->items[] = $item; - $limit++; + if ($limit >= 10) { + break; } - } + $article_url = ExtractFromDelimiters($element->innertext, '', ''); + $article_author = ExtractFromDelimiters($element->innertext, 'dc/elements/1.1/">', ''); + $article_title = $element->find('title', 0)->plaintext; + $article_subtitle = $element->find('description', 0)->plaintext; + $article_timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $article_thumbnail = ExtractFromDelimiters($element->innertext, 'getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request Nextgov: '.$article_url); + + $contents = $article->find('div.wysiwyg', 0)->innertext; + $contents = StripWithDelimiters($contents, '
', '
'); + $contents = StripWithDelimiters($contents, ''); //ad outer div + $contents = StripWithDelimiters($contents, ''); + $contents = ($article_thumbnail == '' ? '' : '

') + .'

'.$article_subtitle.'

' + .trim($contents); + + $item = array(); + $item['uri'] = $article_url; + $item['title'] = $article_title; + $item['author'] = $article_author; + $item['timestamp'] = $article_timestamp; + $item['content'] = $contents; + $this->items[] = $item; + $limit++; + } } } diff --git a/bridges/NiceMatinBridge.php b/bridges/NiceMatinBridge.php index d01ef53c..69dd8ddd 100644 --- a/bridges/NiceMatinBridge.php +++ b/bridges/NiceMatinBridge.php @@ -9,11 +9,11 @@ class NiceMatinBridge extends BridgeAbstract{ private function NiceMatinExtractContent($url) { $html = $this->getSimpleHTMLDOM($url); if(!$html) - $this->returnServerError('Could not acquire content from url: ' . $url . '!'); + return 'Could not acquire content from url: ' . $url . '!'; $content = $html->find('article', 0); if(!$content) - $this->returnServerError('Could not find \'section\'!'); + return 'Could not find \'section\'!'; $text = preg_replace('#(.*?)#is', '', $content->innertext); $text = strip_tags($text, '

'); @@ -21,25 +21,27 @@ class NiceMatinBridge extends BridgeAbstract{ } public function collectData(){ - $html = $this->getSimpleHTMLDOM('http://www.nicematin.com/derniere-minute/rss') or $this->returnServerError('Could not request NiceMatin.'); + $html = $this->getSimpleHTMLDOM($this->uri.'derniere-minute/rss') + or $this->returnServerError('Could not request NiceMatin.'); $limit = 0; foreach($html->find('item') as $element) { - if($limit < 10) { - // We need to fix the 'link' tag as simplehtmldom cannot parse it (just rename it and load back as dom) - $element_text = $element->outertext; - $element_text = str_replace('', '', $element_text); - $element_text = str_replace('', '', $element_text); - $element = str_get_html($element_text); + if($limit >= 10) { + break; + } + // We need to fix the 'link' tag as simplehtmldom cannot parse it (just rename it and load back as dom) + $element_text = $element->outertext; + $element_text = str_replace('', '', $element_text); + $element_text = str_replace('', '', $element_text); + $element = str_get_html($element_text); - $item = array(); - $item['title'] = $element->find('title', 0)->innertext; - $item['uri'] = $element->find('url', 0)->innertext; - $item['timestamp'] = strtotime($element->find('pubDate', 0)->plaintext); - $item['content'] = $this->NiceMatinExtractContent($item['uri']); - $this->items[] = $item; - $limit++; - } + $item = array(); + $item['title'] = $element->find('title', 0)->innertext; + $item['uri'] = $element->find('url', 0)->innertext; + $item['timestamp'] = strtotime($element->find('pubDate', 0)->plaintext); + $item['content'] = $this->NiceMatinExtractContent($item['uri']); + $this->items[] = $item; + $limit++; } } } diff --git a/bridges/NovelUpdatesBridge.php b/bridges/NovelUpdatesBridge.php index ad1f9bc8..3e61bec1 100644 --- a/bridges/NovelUpdatesBridge.php +++ b/bridges/NovelUpdatesBridge.php @@ -8,21 +8,23 @@ class NovelUpdatesBridge extends BridgeAbstract{ public $parameters = array( array( 'n'=>array( 'name'=>'Novel URL', + 'patterns'=>'http:\/\/www.novelupdates.com\/.*', 'required'=>true ) )); + private $seriesTitle=''; + public function collectData(){ - if (!$this->getInput('n')) - $this->returnClientError('You must specify the novel URL (/series/...)'); - $thread = parse_url($this->getInput('n')) or $this->returnClientError('This URL seems malformed, please check it.'); + $thread = parse_url($this->getInput('n')) + or $this->returnClientError('This URL seems malformed, please check it.'); if($thread['host'] !== 'www.novelupdates.com') $this->returnClientError('NovelUpdates URL only.'); if(strpos($thread['path'], 'series/') === FALSE) $this->returnClientError('You must specify the novel URL.'); - $url = 'http://www.novelupdates.com'.$thread['path'].''; + $url = $this->uri.$thread['path'].''; $fullhtml = $this->getSimpleHTMLDOM($url) or $this->returnServerError("Could not request NovelUpdates, novel not found"); - $this->request = $fullhtml->find('h4.seriestitle', 0)->plaintext; + $this->seriesTitle = $fullhtml->find('h4.seriestitle', 0)->plaintext; // dirty fix for nasty simpledom bug: https://github.com/sebsauvage/rss-bridge/issues/259 // forcefully removes tbody $html = $fullhtml->find('table#myTable', 0)->innertext; @@ -35,13 +37,13 @@ class NovelUpdatesBridge extends BridgeAbstract{ $item['title'] = $element->find('td', 2)->find('a', 0)->plaintext; $item['team'] = $element->find('td', 1)->innertext; $item['timestamp'] = strtotime($element->find('td', 0)->plaintext); - $item['content'] = ''.$this->request.' - '.$item['title'].' by '.$item['team'].'
'.$fullhtml->find('div.seriesimg', 0)->innertext.''; + $item['content'] = ''.$this->seriesTitle.' - '.$item['title'].' by '.$item['team'].'
'.$fullhtml->find('div.seriesimg', 0)->innertext.''; $this->items[] = $item; } } public function getName(){ - return (!empty($this->request) ? $this->request.' - ' : '') .'Novel Updates'; + return (!empty($this->seriesTitle) ? $this->seriesTitle.' - ' : '') .'Novel Updates'; } public function getCacheDuration(){ diff --git a/lib/Bridge.php b/lib/Bridge.php index d2c8ada9..c8f5cbac 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -352,6 +352,9 @@ abstract class BridgeAbstract implements BridgeInterface { } // Only keep guessed context parameters values + if(!isset($this->inputs[$this->queriedContext])){ + $this->inputs[$this->queriedContext]=array(); + } $this->inputs=array($this->queriedContext=>$this->inputs[$this->queriedContext]); $this->collectData();