From 3dd63ab6fd99a74f2d84a9fd1eefdde039510b3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Sun, 3 Jul 2016 23:11:28 +0200 Subject: [PATCH 01/14] enable proxy usage for individual bridge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- index.php | 6 +++++- lib/Bridge.php | 3 ++- lib/HTMLUtils.php | 22 +++++++++++++++++----- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/index.php b/index.php index 94406256..b26741ca 100644 --- a/index.php +++ b/index.php @@ -100,7 +100,7 @@ try{ // whitelist control if(!Bridge::isWhitelisted($whitelist_selection, $bridge)) { throw new \HttpException('This bridge is not whitelisted', 401); - die; + die; } $cache = Cache::create('FileCache'); @@ -111,6 +111,10 @@ try{ } else { $bridge->setCache($cache); // just add disable cache to your query to disable caching } + if(isset($_REQUEST['_p'])){ + $bridge->useProxy=true; + } + $bridge->setDatas($_REQUEST); $bridge->loadMetadatas(); $bridge->setDatas($_REQUEST); // Data transformation diff --git a/lib/Bridge.php b/lib/Bridge.php index a8732e06..2af6c156 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -21,6 +21,7 @@ abstract class BridgeAbstract implements BridgeInterface{ public $uri = ""; public $description = 'No description provided'; public $maintainer = 'No maintainer'; + public $useProxy = false; public $parameters = array(); /** @@ -111,7 +112,7 @@ abstract class BridgeAbstract implements BridgeInterface{ ), ); - if(defined('PROXY_URL')) { + if(defined('PROXY_URL') && $this->useProxy) { $contextOptions['http']['proxy'] = PROXY_URL; $contextOptions['http']['request_fulluri'] = true; diff --git a/lib/HTMLUtils.php b/lib/HTMLUtils.php index ade0ca42..7547dfc9 100644 --- a/lib/HTMLUtils.php +++ b/lib/HTMLUtils.php @@ -28,6 +28,12 @@ CARD; $card .= HTMLUtils::getFormHeader($bridgeName); if ($isActive){ + if(defined('PROXY_URL')){ + $idArg = 'arg-' . urlencode($bridgeName) . '-' . urlencode('proxy') . '-' . urlencode('_p'); + $card .= '' . PHP_EOL; + $card .= '
' . PHP_EOL; + } + $card .= HTMLUtils::getHelperButtonsFormat($formats); } else { $card .= 'Inactive'; @@ -40,13 +46,13 @@ CARD; if($hasGlobalParameter) $globalParameters = json_decode($bridgeElement->parameters['global'], true); - + foreach($bridgeElement->parameters as $parameterName => $parameter){ $parameter = json_decode($parameter, true); if(!is_numeric($parameterName) && $parameterName == 'global') continue; - + if($hasGlobalParameter) $parameter = array_merge($parameter, $globalParameters); @@ -82,7 +88,7 @@ CARD; $card .= '
' . PHP_EOL; } else if($inputEntry['type'] == 'list') { $card .= '' . PHP_EOL; + $card .= '
' . PHP_EOL; + } + $card .= HTMLUtils::getHelperButtonsFormat($formats); } else { $card .= 'Inactive'; } - + $card .= '' . PHP_EOL; } @@ -161,7 +173,7 @@ class HTMLSanitizer { $element->outertext = ''; } else { foreach($element->getAllAttributes() as $attributeName => $attribute) { - if(!in_array($attributeName, $this->keptAttributes)) + if(!in_array($attributeName, $this->keptAttributes)) $element->removeAttribute($attributeName); } } From 09d4e21bdcc88d060d47d02218efb34d03622bce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Sun, 3 Jul 2016 23:18:30 +0200 Subject: [PATCH 02/14] possibility to hide proxy address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- index.php | 2 ++ lib/HTMLUtils.php | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/index.php b/index.php index b26741ca..b21cd4c3 100644 --- a/index.php +++ b/index.php @@ -11,6 +11,8 @@ TODO : */ //define('PROXY_URL', 'tcp://192.168.0.0:28'); +// keep PROXY_NAME empty to display PROXY_URL instead +define('PROXY_NAME','Hidden Proxy Name'); date_default_timezone_set('UTC'); error_reporting(0); diff --git a/lib/HTMLUtils.php b/lib/HTMLUtils.php index 7547dfc9..7cf1ec3b 100644 --- a/lib/HTMLUtils.php +++ b/lib/HTMLUtils.php @@ -31,7 +31,7 @@ CARD; if(defined('PROXY_URL')){ $idArg = 'arg-' . urlencode($bridgeName) . '-' . urlencode('proxy') . '-' . urlencode('_p'); $card .= '' . PHP_EOL; - $card .= '
' . PHP_EOL; + $card .= '
' . PHP_EOL; } $card .= HTMLUtils::getHelperButtonsFormat($formats); @@ -109,7 +109,7 @@ CARD; if(defined('PROXY_URL')){ $idArg = 'arg-' . urlencode($bridgeName) . '-' . urlencode('proxy') . '-' . urlencode('_p'); $card .= '' . PHP_EOL; - $card .= '
' . PHP_EOL; + $card .= '
' . PHP_EOL; } $card .= HTMLUtils::getHelperButtonsFormat($formats); From d71674d2ba1fa613f42cd1c168bdbba4601505d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Sun, 3 Jul 2016 23:49:43 +0200 Subject: [PATCH 03/14] proxy settings must be the service provider decision MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit therefore, the provider decides if the service use a proxy or not, and if users can have the possibility to disable it on a bridge basis. Signed-off-by: Pierre Mazière --- index.php | 8 ++++++-- lib/Bridge.php | 2 +- lib/HTMLUtils.php | 16 ++++++++-------- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/index.php b/index.php index b21cd4c3..778a4b2a 100644 --- a/index.php +++ b/index.php @@ -11,6 +11,8 @@ TODO : */ //define('PROXY_URL', 'tcp://192.168.0.0:28'); +// Set to true if you allow users to disable proxy usage for specific bridges +define('PROXY_BYBRIDGE',false); // keep PROXY_NAME empty to display PROXY_URL instead define('PROXY_NAME','Hidden Proxy Name'); @@ -113,8 +115,10 @@ try{ } else { $bridge->setCache($cache); // just add disable cache to your query to disable caching } - if(isset($_REQUEST['_p'])){ - $bridge->useProxy=true; + if(defined('PROXY_URL') && PROXY_BYBRIDGE && + isset($_REQUEST['_noproxy']) + ){ + $bridge->useProxy=false; } $bridge->setDatas($_REQUEST); $bridge->loadMetadatas(); diff --git a/lib/Bridge.php b/lib/Bridge.php index 2af6c156..664fc93d 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -21,7 +21,7 @@ abstract class BridgeAbstract implements BridgeInterface{ public $uri = ""; public $description = 'No description provided'; public $maintainer = 'No maintainer'; - public $useProxy = false; + public $useProxy = true; public $parameters = array(); /** diff --git a/lib/HTMLUtils.php b/lib/HTMLUtils.php index 7cf1ec3b..1ab77992 100644 --- a/lib/HTMLUtils.php +++ b/lib/HTMLUtils.php @@ -28,10 +28,10 @@ CARD; $card .= HTMLUtils::getFormHeader($bridgeName); if ($isActive){ - if(defined('PROXY_URL')){ - $idArg = 'arg-' . urlencode($bridgeName) . '-' . urlencode('proxy') . '-' . urlencode('_p'); - $card .= '' . PHP_EOL; - $card .= '
' . PHP_EOL; + if(defined('PROXY_URL') && PROXY_BYBRIDGE){ + $idArg = 'arg-' . urlencode($bridgeName) . '-' . urlencode('proxyoff') . '-' . urlencode('_noproxy'); + $card .= '' . PHP_EOL; + $card .= '
' . PHP_EOL; } $card .= HTMLUtils::getHelperButtonsFormat($formats); @@ -106,10 +106,10 @@ CARD; } if ($isActive){ - if(defined('PROXY_URL')){ - $idArg = 'arg-' . urlencode($bridgeName) . '-' . urlencode('proxy') . '-' . urlencode('_p'); - $card .= '' . PHP_EOL; - $card .= '
' . PHP_EOL; + if(defined('PROXY_URL') && PROXY_BYBRIDGE){ + $idArg = 'arg-' . urlencode($bridgeName) . '-' . urlencode('proxyoff') . '-' . urlencode('_noproxy'); + $card .= '' . PHP_EOL; + $card .= '
' . PHP_EOL; } $card .= HTMLUtils::getHelperButtonsFormat($formats); From f43bbda83e9328832915f017f7a8cf83fcb2f50a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Fri, 8 Jul 2016 19:05:01 +0200 Subject: [PATCH 04/14] core: split and rename BridgeAbstract::file_get_html MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to have one method to get the data, potentially using the proxy if defined, and one method to get the Simple DOM HTML object from these data, with a more informative name Signed-off-by: Pierre Mazière --- lib/Bridge.php | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index 664fc93d..2daca2af 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -105,7 +105,7 @@ abstract class BridgeAbstract implements BridgeInterface{ return $this; } - protected function file_get_html($url, $use_include_path = false, $context=null, $offset = -1, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT){ + protected function getContents($url,$use_include_path=false,$context=null,$offset=0,$maxlen=null){ $contextOptions = array( 'http' => array( 'user_agent'=>ini_get('user_agent') @@ -125,9 +125,22 @@ abstract class BridgeAbstract implements BridgeInterface{ }; } } - return file_get_html($url,$use_include_path,$context,$offset,$maxLen, - $lowercase,$forceTagsClosed,$target_charset,$stripRN,$defaultBRText, - $defaultSpanText); + + if(is_null($maxlen)){ + $content=@file_get_contents($url, $use_include_path, $context, $offset); + }else{ + $content=@file_get_contents($url, $use_include_path, $context, $offset,$maxlen); + } + + if($content===false){ + $this->message('Cant\'t download '.$url ); + } + return $content; + } + + protected function getSimpleHTMLDOM($url, $use_include_path = false, $context=null, $offset = 0, $maxLen=null, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT){ + $content=$this->getContents($url,$use_include_path,$context,$offset,$maxLen); + return str_get_html($content,$lowercase,$forceTagsClosed,$target_charset,$stripRN,$defaultBRText,$defaultSpanText); } } From 3c0d13c1bb72a87f0d671692e205f45f04d94eba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Fri, 8 Jul 2016 19:06:35 +0200 Subject: [PATCH 05/14] bridges: use BridgeAbstract::getSimpleHTMLDOM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit instead of BridgeAbstract::file_get_html Signed-off-by: Pierre Mazière --- bridges/ABCTabsBridge.php | 4 +- bridges/AcrimedBridge.php | 2 +- bridges/AllocineFRBridge.php | 8 +-- bridges/AnimeUltimeBridge.php | 4 +- bridges/BandcampBridge.php | 2 +- bridges/BastaBridge.php | 8 +-- bridges/BlaguesDeMerdeBridge.php | 4 +- bridges/BooruprojectBridge.php | 14 ++--- bridges/CADBridge.php | 7 ++- bridges/CNETBridge.php | 4 +- bridges/CoinDeskBridge.php | 56 ++++++++++++++++++++ bridges/CollegeDeFranceBridge.php | 2 +- bridges/CommonDreamsBridge.php | 4 +- bridges/CopieDoubleBridge.php | 8 +-- bridges/CourrierInternationalBridge.php | 34 ++++++------ bridges/CpasbienBridge.php | 8 +-- bridges/CryptomeBridge.php | 2 +- bridges/DailymotionBridge.php | 12 ++--- bridges/DanbooruBridge.php | 14 ++--- bridges/DansTonChatBridge.php | 2 +- bridges/DauphineLibereBridge.php | 8 +-- bridges/DeveloppezDotComBridge.php | 6 +-- bridges/DilbertBridge.php | 2 +- bridges/DollbooruBridge.php | 14 ++--- bridges/DuckDuckGoBridge.php | 2 +- bridges/EZTVBridge.php | 4 +- bridges/EliteDangerousGalnetBridge.php | 4 +- bridges/ElsevierBridge.php | 6 +-- bridges/FSBridge.php | 56 ++++++++++++++++++++ bridges/FacebookBridge.php | 6 +-- bridges/FierPandaBridge.php | 2 +- bridges/FlickrExploreBridge.php | 2 +- bridges/FlickrTagBridge.php | 8 +-- bridges/FootitoBridge.php | 16 +++--- bridges/FourchanBridge.php | 4 +- bridges/FrandroidBridge.php | 69 +++++++++++++++++++++++++ bridges/FuturaSciencesBridge.php | 4 +- bridges/GBAtempBridge.php | 6 +-- bridges/GelbooruBridge.php | 14 ++--- bridges/GiphyBridge.php | 22 ++++---- bridges/GithubIssueBridge.php | 2 +- bridges/GitlabCommitsBridge.php | 2 +- bridges/GizmodoFRBridge.php | 4 +- bridges/GooglePlusPostBridge.php | 4 +- bridges/GoogleSearchBridge.php | 4 +- bridges/GuruMedBridge.php | 2 +- bridges/HDWallpapersBridge.php | 2 +- bridges/HentaiHavenBridge.php | 2 +- bridges/IdenticaBridge.php | 2 +- bridges/InstagramBridge.php | 18 +++---- bridges/JapanExpoBridge.php | 4 +- bridges/KonachanBridge.php | 14 ++--- bridges/KoreusBridge.php | 4 +- bridges/LeBonCoinBridge.php | 24 ++++----- bridges/LeJournalDuGeekBridge.php | 4 +- bridges/LeMondeInformatiqueBridge.php | 4 +- bridges/LeMotDuJourBridge.php | 55 ++++++++++++++++++++ bridges/LesJoiesDuCodeBridge.php | 16 +++--- bridges/LichessBridge.php | 4 +- bridges/LinkedInCompany.php | 2 +- bridges/LolibooruBridge.php | 14 ++--- bridges/MalikiBridge.php | 60 +++++++++++++++++++++ bridges/MemoLinuxBridge.php | 58 +++++++++++++++++++++ bridges/MilbooruBridge.php | 14 ++--- bridges/MondeDiploBridge.php | 4 +- bridges/MsnMondeBridge.php | 4 +- bridges/MspabooruBridge.php | 14 ++--- bridges/NakedSecurityBridge.php | 6 +-- bridges/NasaApodBridge.php | 4 +- bridges/NeuviemeArtBridge.php | 4 +- bridges/NextInpactBridge.php | 4 +- bridges/NiceMatinBridge.php | 8 +-- bridges/NovelUpdatesBridge.php | 2 +- bridges/NumeramaBridge.php | 1 - bridges/OpenClassroomsBridge.php | 4 +- bridges/OpenTheoryBridge.php | 56 ++++++++++++++++++++ bridges/ParuVenduImmoBridge.php | 18 +++---- bridges/PickyWallpapersBridge.php | 2 +- bridges/PinterestBridge.php | 36 ++++++------- bridges/PlanetLibreBridge.php | 4 +- bridges/ProjectMGameBridge.php | 2 +- bridges/RTBFBridge.php | 2 +- bridges/RaymondBridge.php | 53 +++++++++++++++++++ bridges/Releases3DSBridge.php | 2 +- bridges/ReporterreBridge.php | 4 +- bridges/Rue89Bridge.php | 2 +- bridges/Rule34Bridge.php | 14 ++--- bridges/Rule34pahealBridge.php | 14 ++--- bridges/SafebooruBridge.php | 14 ++--- bridges/SakugabooruBridge.php | 14 ++--- bridges/ScilogsBridge.php | 54 +++++++++++++++++++ bridges/ScmbBridge.php | 14 ++--- bridges/ScoopItBridge.php | 6 +-- bridges/SegfaultMintBridge.php | 55 ++++++++++++++++++++ bridges/Sexactu.php | 14 ++--- bridges/SiliconBridge.php | 4 +- bridges/StripeAPIChangeLogBridge.php | 2 +- bridges/SuperbWallpapersBridge.php | 4 +- bridges/T411Bridge.php | 4 +- bridges/TagBoardBridge.php | 6 +-- bridges/TbibBridge.php | 14 ++--- bridges/TheCodingLoveBridge.php | 18 +++---- bridges/ThePirateBayBridge.php | 4 +- bridges/TuxboardBridge.php | 64 +++++++++++++++++++++++ bridges/TwitterBridge.php | 20 +++---- bridges/UnsplashBridge.php | 2 +- bridges/ViadeoCompany.php | 2 +- bridges/VineBridge.php | 2 +- bridges/WallpaperStopBridge.php | 2 +- bridges/WhydBridge.php | 8 +-- bridges/WikipediaDEBridge.php | 48 +++++++++++++++++ bridges/WikipediaENBridge.php | 44 ++++++++++++++++ bridges/WikipediaEOBridge.php | 44 ++++++++++++++++ bridges/WikipediaFRBridge.php | 46 +++++++++++++++++ bridges/WordPressBridge.php | 8 +-- bridges/WorldOfTanks.php | 6 +-- bridges/XbooruBridge.php | 14 ++--- bridges/YandereBridge.php | 14 ++--- bridges/YoutubeBridge.php | 14 ++--- bridges/ZatazBridge.php | 4 +- bridges/ZoneTelechargementBridge.php | 2 +- 121 files changed, 1212 insertions(+), 396 deletions(-) create mode 100644 bridges/CoinDeskBridge.php create mode 100644 bridges/FSBridge.php create mode 100644 bridges/FrandroidBridge.php create mode 100644 bridges/LeMotDuJourBridge.php create mode 100644 bridges/MalikiBridge.php create mode 100644 bridges/MemoLinuxBridge.php create mode 100644 bridges/OpenTheoryBridge.php create mode 100644 bridges/RaymondBridge.php create mode 100644 bridges/ScilogsBridge.php create mode 100644 bridges/SegfaultMintBridge.php create mode 100644 bridges/TuxboardBridge.php create mode 100644 bridges/WikipediaDEBridge.php create mode 100644 bridges/WikipediaENBridge.php create mode 100644 bridges/WikipediaEOBridge.php create mode 100644 bridges/WikipediaFRBridge.php diff --git a/bridges/ABCTabsBridge.php b/bridges/ABCTabsBridge.php index 0c9bc68a..4fd24872 100644 --- a/bridges/ABCTabsBridge.php +++ b/bridges/ABCTabsBridge.php @@ -15,9 +15,9 @@ class ABCTabsBridge extends BridgeAbstract{ public function collectData(array $param){ $html = ''; - $html = $this->file_get_html('http://www.abc-tabs.com/tablatures/nouveautes.html') or $this->returnClientError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('http://www.abc-tabs.com/tablatures/nouveautes.html') or $this->returnClientError('No results for this query.'); $table = $html->find('table#myTable', 0)->children(1); - + foreach ($table->find('tr') as $tab) { $item = new \Item(); diff --git a/bridges/AcrimedBridge.php b/bridges/AcrimedBridge.php index 85e93fe7..fce0ce10 100644 --- a/bridges/AcrimedBridge.php +++ b/bridges/AcrimedBridge.php @@ -29,7 +29,7 @@ class AcrimedBridge extends RssExpander{ $item->title = trim($newsItem->title); $item->timestamp = strtotime($dc->date); - $articlePage = $this->file_get_html($newsItem->link); + $articlePage = $this->getSimpleHTMLDOM($newsItem->link); $article = $hs->sanitize($articlePage->find('article.article1', 0)->innertext); $article = HTMLSanitizer::defaultImageSrcTo($article, "http://www.acrimed.org/"); diff --git a/bridges/AllocineFRBridge.php b/bridges/AllocineFRBridge.php index ef017463..baaf4a1b 100644 --- a/bridges/AllocineFRBridge.php +++ b/bridges/AllocineFRBridge.php @@ -9,7 +9,7 @@ class AllocineFRBridge extends BridgeAbstract{ $this->description = "Bridge for allocine.fr"; $this->update = '2016-08-17'; - $this->parameters[] = + $this->parameters[] = '[ { "name" : "category", @@ -18,7 +18,7 @@ class AllocineFRBridge extends BridgeAbstract{ "required" : true, "exampleValue" : "Faux Raccord", "title" : "Select your category", - "values" : + "values" : [ { "name" : "Faux Raccord", @@ -64,12 +64,12 @@ class AllocineFRBridge extends BridgeAbstract{ // Update bridge name to match selection $this->name .= ' : ' . $category; - $html = $this->file_get_html($this->uri) or $this->returnServerError("Could not request {$this->uri}!"); + $html = $this->getSimpleTMLOM($this->uri) or $this->returnServerError("Could not request {$this->uri}!"); foreach($html->find('figure.media-meta-fig') as $element) { $item = new Item(); - + $title = $element->find('div.titlebar h3.title a', 0); $content = trim($element->innertext); $figCaption = strpos($content, $category); diff --git a/bridges/AnimeUltimeBridge.php b/bridges/AnimeUltimeBridge.php index 0f305e3a..7eae61c1 100644 --- a/bridges/AnimeUltimeBridge.php +++ b/bridges/AnimeUltimeBridge.php @@ -67,7 +67,7 @@ class AnimeUltimeBridge extends BridgeAbstract { //Retrive page contents $website = 'http://www.anime-ultime.net/'; $url = $website.'history-0-1/'.$requestFilter; - $html = $this->file_get_html($url) or $this->returnServerError('Could not request Anime-Ultime: '.$url); + $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request Anime-Ultime: '.$url); //Relases are sorted by day : process each day individually foreach ($html->find('div.history', 0)->find('h3') as $daySection) { @@ -110,7 +110,7 @@ class AnimeUltimeBridge extends BridgeAbstract { $item->content = $item_description; $this->items[] = $item; $processedOK++; - + //Stop processing once limit is reached if ($processedOK >= 10) return; diff --git a/bridges/BandcampBridge.php b/bridges/BandcampBridge.php index 1e6c2dcc..71bf36f9 100644 --- a/bridges/BandcampBridge.php +++ b/bridges/BandcampBridge.php @@ -26,7 +26,7 @@ class BandcampBridge extends BridgeAbstract{ $html = ''; if (isset($param['tag'])) { $this->request = $param['tag']; - $html = $this->file_get_html('http://bandcamp.com/tag/'.urlencode($this->request).'?sort_field=date') or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('http://bandcamp.com/tag/'.urlencode($this->request).'?sort_field=date') or $this->returnServerError('No results for this query.'); } else { $this->returnClientError('You must specify tag (/tag/...)'); diff --git a/bridges/BastaBridge.php b/bridges/BastaBridge.php index 35c9ae4d..6af218fe 100644 --- a/bridges/BastaBridge.php +++ b/bridges/BastaBridge.php @@ -7,14 +7,14 @@ class BastaBridge extends BridgeAbstract{ $this->description = "Returns the newest articles."; $this->update = '2016-08-17'; } - + public function collectData(array $param){ // Replaces all relative image URLs by absolute URLs. Relative URLs always start with 'local/'! function ReplaceImageUrl($content){ return preg_replace('/src=["\']{1}([^"\']+)/ims', 'src=\'http://www.bastamag.net/$1\'', $content); } - - $html = $this->file_get_html('http://www.bastamag.net/spip.php?page=backend') or $this->returnServerError('Could not request Bastamag.'); + + $html = $this->getSimpleHTMLDOM('http://www.bastamag.net/spip.php?page=backend') or $this->returnServerError('Could not request Bastamag.'); $limit = 0; foreach($html->find('item') as $element) { @@ -34,4 +34,4 @@ class BastaBridge extends BridgeAbstract{ return 3600*2; // 2 hours } } -?> \ No newline at end of file +?> diff --git a/bridges/BlaguesDeMerdeBridge.php b/bridges/BlaguesDeMerdeBridge.php index 7fb5d451..be36c0cc 100644 --- a/bridges/BlaguesDeMerdeBridge.php +++ b/bridges/BlaguesDeMerdeBridge.php @@ -12,8 +12,8 @@ class BlaguesDeMerdeBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://www.blaguesdemerde.fr/') or $this->returnServerError('Could not request BDM.'); - + $html = $this->getSimpleHTMLDOM('http://www.blaguesdemerde.fr/') or $this->returnServerError('Could not request BDM.'); + foreach($html->find('article.joke_contener') as $element) { $item = new Item(); $temp = $element->find('a'); diff --git a/bridges/BooruprojectBridge.php b/bridges/BooruprojectBridge.php index 17c1982b..b1423db5 100644 --- a/bridges/BooruprojectBridge.php +++ b/bridges/BooruprojectBridge.php @@ -32,29 +32,29 @@ class BooruprojectBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0; $tags = ''; - if (!empty($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (!empty($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); $page = $page - 1; $page = $page * 20; } - if (!empty($param['t'])) { - $tags = '&tags='.urlencode($param['t']); + if (!empty($param['t'])) { + $tags = '&tags='.urlencode($param['t']); } if (empty($param['i'])) { $this->returnServerError('Please enter a ***.booru.org instance.'); } - $html = $this->file_get_html("http://".$param['i'].".booru.org/index.php?page=post&s=list&pid=".$page.$tags) or $this->returnServerError('Could not request Booruproject.'); + $html = $this->getSimpleHTMLDOM("http://".$param['i'].".booru.org/index.php?page=post&s=list&pid=".$page.$tags) or $this->returnServerError('Could not request Booruproject.'); foreach($html->find('div[class=content] span') as $element) { $item = new \Item(); $item->uri = 'http://'.$param['i'].'.booru.org/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->find('a', 0)->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->find('a', 0)->getAttribute('id')); $item->timestamp = time(); $item->tags = $element->find('img', 0)->getAttribute('title'); $item->title = 'Booruproject '.$param['i'].' | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/CADBridge.php b/bridges/CADBridge.php index 17aa807b..d62041a4 100644 --- a/bridges/CADBridge.php +++ b/bridges/CADBridge.php @@ -9,7 +9,7 @@ class CADBridge extends BridgeAbstract{ } private function CADExtractContent($url) { - $html3 = $this->file_get_html($url); + $html3 = $this->getSimpleHTMLDOM($url); // The request might fail due to missing https support or wrong URL if($html3 == false) @@ -27,7 +27,6 @@ class CADBridge extends BridgeAbstract{ default: return 'Daily comic not released yet'; } - $img = implode ($url2[0]); $html3->clear(); unset ($html3); @@ -45,7 +44,7 @@ class CADBridge extends BridgeAbstract{ return $string; } - $html = $this->file_get_html('http://cdn2.cad-comic.com/rss.xml') or $this->returnServerError('Could not request CAD.'); + $html = $this->getSimpleHTMLDOM('http://cdn2.cad-comic.com/rss.xml') or $this->returnServerError('Could not request CAD.'); $limit = 0; foreach($html->find('item') as $element) { @@ -67,4 +66,4 @@ class CADBridge extends BridgeAbstract{ return 3600*2; // 2 hours } } -?> \ No newline at end of file +?> diff --git a/bridges/CNETBridge.php b/bridges/CNETBridge.php index 6fb4d93a..7be650e6 100644 --- a/bridges/CNETBridge.php +++ b/bridges/CNETBridge.php @@ -51,7 +51,7 @@ class CNETBridge extends BridgeAbstract { $this->topicName = $param['topic']; $pageUrl = 'http://www.cnet.com/'.(empty($this->topicName) ? '' : 'topics/'.$this->topicName.'/'); - $html = $this->file_get_html($pageUrl) or $this->returnServerError('Could not request CNET: '.$pageUrl); + $html = $this->getSimpleHTMLDOM($pageUrl) or $this->returnServerError('Could not request CNET: '.$pageUrl); $limit = 0; foreach($html->find('div.assetBody') as $element) { @@ -64,7 +64,7 @@ class CNETBridge extends BridgeAbstract { if (!empty($article_title) && !empty($article_uri) && strpos($article_uri, '/news/') !== false) { - $article_html = $this->file_get_html($article_uri) or $this->returnServerError('Could not request CNET: '.$article_uri); + $article_html = $this->getSimpleHTMLDOM($article_uri) or $this->returnServerError('Could not request CNET: '.$article_uri); $article_content = trim(CleanArticle(ExtractFromDelimiters($article_html, '
name = "CoinDesk"; + $this->uri = "http://www.coindesk.com/"; + $this->description = "Returns the 5 newest posts from CoinDesk (full text)"; + $this->update = "2014-05-30"; + + } + + public function collectData(array $param){ + + function CoinDeskStripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + function CoinDeskExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.single-content', 0)->innertext; + $text = strip_tags($text, '

'); + return $text; + } + $html = $this->getSimpleHTMLDOM('http://www.coindesk.com/feed/atom/') or $this->returnError('Could not request CoinDesk.', 404); + $limit = 0; + + foreach($html->find('entry') as $element) { + if($limit < 5) { + $item = new \Item(); + $item->title = CoinDeskStripCDATA($element->find('title', 0)->innertext); + $item->author = $element->find('author', 0)->plaintext; + $item->uri = $element->find('link', 0)->href; + $item->timestamp = strtotime($element->find('published', 0)->plaintext); + $item->content = CoinDeskExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'CoinDesk'; + } + + public function getURI(){ + return 'http://www.coindesk.com/'; + } + + public function getCacheDuration(){ + return 1800; // 30min + } +} diff --git a/bridges/CollegeDeFranceBridge.php b/bridges/CollegeDeFranceBridge.php index e809d6b4..1f79c62c 100644 --- a/bridges/CollegeDeFranceBridge.php +++ b/bridges/CollegeDeFranceBridge.php @@ -33,7 +33,7 @@ class CollegeDeFranceBridge extends BridgeAbstract{ * * */ - $html = $this->file_get_html('http://www.college-de-france.fr/components/search-audiovideo.jsp?fulltext=&siteid=1156951719600&lang=FR&type=all') or $this->returnServerError('Could not request CollegeDeFrance.'); + $html = $this->getSimpleHTMLDOM('http://www.college-de-france.fr/components/search-audiovideo.jsp?fulltext=&siteid=1156951719600&lang=FR&type=all') or $this->returnServerError('Could not request CollegeDeFrance.'); foreach($html->find('a[data-target]') as $element) { $item = new \Item(); $item->title = $element->find('.title', 0)->plaintext; diff --git a/bridges/CommonDreamsBridge.php b/bridges/CommonDreamsBridge.php index 11778668..9de9b6d6 100644 --- a/bridges/CommonDreamsBridge.php +++ b/bridges/CommonDreamsBridge.php @@ -10,7 +10,7 @@ class CommonDreamsBridge extends BridgeAbstract{ } private function CommonDreamsExtractContent($url) { - $html3 = $this->file_get_html($url); + $html3 = $this->getSimpleHTMLDOM($url); $text = $html3->find('div[class=field--type-text-with-summary]', 0)->innertext; $html3->clear(); unset ($html3); @@ -25,7 +25,7 @@ class CommonDreamsBridge extends BridgeAbstract{ return $string; } - $html = $this->file_get_html('http://www.commondreams.org/rss.xml') or $this->returnServerError('Could not request CommonDreams.'); + $html = $this->getSimpleHTMLDOM('http://www.commondreams.org/rss.xml') or $this->returnServerError('Could not request CommonDreams.'); $limit = 0; foreach($html->find('item') as $element) { if($limit < 4) { diff --git a/bridges/CopieDoubleBridge.php b/bridges/CopieDoubleBridge.php index cd3b4f68..cae96502 100644 --- a/bridges/CopieDoubleBridge.php +++ b/bridges/CopieDoubleBridge.php @@ -13,9 +13,9 @@ class CopieDoubleBridge extends BridgeAbstract{ public function collectData(array $param){ - $html = $this->file_get_html('http://www.copie-double.com/') or $this->returnServerError('Could not request CopieDouble.'); + $html = $this->getSimpleHTMLDOM('http://www.copie-double.com/') or $this->returnServerError('Could not request CopieDouble.'); $table = $html->find('table table', 2); - + foreach($table->find('tr') as $element) { $td = $element->find('td', 0); @@ -23,7 +23,7 @@ class CopieDoubleBridge extends BridgeAbstract{ if($td->class == "couleur_1") { $item = new Item(); - + $title = $td->innertext; $pos = strpos($title, "find("a", 0); $item->uri = "http://www.copie-double.com" . $a->href; - + $content = str_replace('src="/', 'src="http://www.copie-double.com/',$element->find("td", 0)->innertext); $content = str_replace('href="/', 'href="http://www.copie-double.com/',$content); $item->content = $content; diff --git a/bridges/CourrierInternationalBridge.php b/bridges/CourrierInternationalBridge.php index ab1a2c37..74ae314e 100644 --- a/bridges/CourrierInternationalBridge.php +++ b/bridges/CourrierInternationalBridge.php @@ -12,49 +12,49 @@ class CourrierInternationalBridge extends BridgeAbstract{ } public function collectData(array $param){ - + $html = ''; - $html = $this->file_get_html('http://www.courrierinternational.com/') or $this->returnServerError('Error.'); - + $html = $this->getSimpleHTMLDOM('http://www.courrierinternational.com/') or $this->returnServerError('Error.'); + + - $element = $html->find("article"); - $article_count = 1; + $article_count = 1; foreach($element as $article) { - + $item = new \Item(); - + $item->uri = $article->parent->getAttribute("href"); if(strpos($item->uri, "http") === FALSE) { $item->uri = "http://courrierinternational.fr/".$item->uri; } - - $page = $this->file_get_html($item->uri); + + $page = $this->getSimpleHTMLDOM($item->uri); $cleaner = new HTMLSanitizer(); - + $item->content = $cleaner->sanitize($page->find("div.article-text")[0]); $item->title = strip_tags($article->find(".title")[0]); $dateTime = date_parse($page->find("time")[0]); $item->timestamp = mktime( - $dateTime['hour'], - $dateTime['minute'], - $dateTime['second'], - $dateTime['month'], - $dateTime['day'], + $dateTime['hour'], + $dateTime['minute'], + $dateTime['second'], + $dateTime['month'], + $dateTime['day'], $dateTime['year'] ); - + $this->items[] = $item; $article_count ++; if($article_count > 5) break; - + } diff --git a/bridges/CpasbienBridge.php b/bridges/CpasbienBridge.php index 93ae21ab..801968c5 100644 --- a/bridges/CpasbienBridge.php +++ b/bridges/CpasbienBridge.php @@ -15,7 +15,7 @@ function content_get_html($contents, $maxLen=-1, $lowercase = true, $forceTagsCl } class CpasbienBridge extends HttpCachingBridgeAbstract{ - + private $request; public function loadMetadatas() { @@ -44,7 +44,7 @@ class CpasbienBridge extends HttpCachingBridgeAbstract{ $html = ''; if (isset($param['q'])) { /* keyword search mode */ $this->request = str_replace(" ","-",trim($param['q'])); - $html = $this->file_get_html($this->uri.'/recherche/'.urlencode($this->request).'.html') or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($this->uri.'/recherche/'.urlencode($this->request).'.html') or $this->returnServerError('No results for this query.'); } else { $this->returnClientError('You must specify a keyword (?q=...).'); @@ -53,7 +53,7 @@ class CpasbienBridge extends HttpCachingBridgeAbstract{ foreach ($html->find('#gauche',0)->find('div') as $episode) { if ($episode->getAttribute('class')=='ligne0' || $episode->getAttribute('class')=='ligne1') { - + $htmlepisode=content_get_html($this->get_cached($episode->find('a', 0)->getAttribute('href'))); $item = new \Item(); @@ -65,7 +65,7 @@ class CpasbienBridge extends HttpCachingBridgeAbstract{ $item->content = $textefiche->text(); } else { - $item->content = $htmlepisode->find('#textefiche', 0)->find('p',0)->text(); + $item->content = $htmlepisode->find('#textefiche', 0)->find('p',0)->text(); } $item->id = $episode->find('a', 0)->getAttribute('href'); diff --git a/bridges/CryptomeBridge.php b/bridges/CryptomeBridge.php index 68c8876d..4947de39 100644 --- a/bridges/CryptomeBridge.php +++ b/bridges/CryptomeBridge.php @@ -28,7 +28,7 @@ class CryptomeBridge extends BridgeAbstract{ // If you want HTTPS access instead, uncomment the following line: //$link = 'https://secure.netsolhost.com/cryptome.org/'; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request Cryptome.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request Cryptome.'); if (!empty($param['n'])) { /* number of documents */ $num = min(max(1, $param['n']+0), $num); } diff --git a/bridges/DailymotionBridge.php b/bridges/DailymotionBridge.php index c7822849..fccaec76 100644 --- a/bridges/DailymotionBridge.php +++ b/bridges/DailymotionBridge.php @@ -47,14 +47,14 @@ class DailymotionBridge extends BridgeAbstract{ function getMetadata($id) { $metadata=array(); - $html2 = file_get_html('http://www.dailymotion.com/video/'.$id) or $this->returnServerError('Could not request Dailymotion.'); + $html2 = $this->getSimpleHTMLDOM('http://www.dailymotion.com/video/'.$id) or $this->returnServerError('Could not request Dailymotion.'); $metadata['title'] = $html2->find('meta[property=og:title]', 0)->getAttribute('content'); $metadata['timestamp'] = strtotime($html2->find('meta[property=video:release_date]', 0)->getAttribute('content') ); $metadata['thumbnailUri'] = $html2->find('meta[property=og:image]', 0)->getAttribute('content'); $metadata['uri'] = $html2->find('meta[property=og:url]', 0)->getAttribute('content'); return $metadata; - } + } $html = ''; @@ -63,15 +63,15 @@ class DailymotionBridge extends BridgeAbstract{ if (isset($param['u'])) { // user timeline mode $this->request = $param['u']; - $html = $this->file_get_html('http://www.dailymotion.com/user/'.urlencode($this->request).'/1') or $this->returnServerError('Could not request Dailymotion.'); + $html = $this->getSimpleHTMLDOM('http://www.dailymotion.com/user/'.urlencode($this->request).'/1') or $this->returnServerError('Could not request Dailymotion.'); } else if (isset($param['p'])) { // playlist mode $this->request = strtok($param['p'], '_'); - $html = $this->file_get_html('http://www.dailymotion.com/playlist/'.urlencode($this->request).'') or $this->returnServerError('Could not request Dailymotion.'); + $html = $this->getSimpleHTMLDOM('http://www.dailymotion.com/playlist/'.urlencode($this->request).'') or $this->returnServerError('Could not request Dailymotion.'); } else if (isset($param['s'])) { // search mode - $this->request = $param['s']; $page = 1; if (isset($param['pa'])) $page = (int)preg_replace("/[^0-9]/",'', $param['pa']); - $html = $this->file_get_html('http://www.dailymotion.com/search/'.urlencode($this->request).'/'.$page.'') or $this->returnServerError('Could not request Dailymotion.'); + $this->request = $param['s']; $page = 1; if (isset($param['pa'])) $page = (int)preg_replace("/[^0-9]/",'', $param['pa']); + $html = $this->getSimpleHTMLDOM('http://www.dailymotion.com/search/'.urlencode($this->request).'/'.$page.'') or $this->returnServerError('Could not request Dailymotion.'); } else { $this->returnClientError('You must either specify a Dailymotion username (?u=...) or a playlist id (?p=...) or search (?s=...)'); diff --git a/bridges/DanbooruBridge.php b/bridges/DanbooruBridge.php index e8191930..45ad0293 100644 --- a/bridges/DanbooruBridge.php +++ b/bridges/DanbooruBridge.php @@ -25,23 +25,23 @@ class DanbooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 1;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://donmai.us/posts?&page=$page&tags=$tags") or $this->returnServerError('Could not request Danbooru.'); + $html = $this->getSimpleHTMLDOM("http://donmai.us/posts?&page=$page&tags=$tags") or $this->returnServerError('Could not request Danbooru.'); foreach($html->find('div[id=posts] article') as $element) { $item = new \Item(); $item->uri = 'http://donmai.us'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('data-id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('data-id')); $item->timestamp = time(); $thumbnailUri = 'http://donmai.us'.$element->find('img', 0)->src; $item->tags = $element->find('img', 0)->getAttribute('alt'); $item->title = 'Danbooru | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/DansTonChatBridge.php b/bridges/DansTonChatBridge.php index 748190f7..41ee8eca 100644 --- a/bridges/DansTonChatBridge.php +++ b/bridges/DansTonChatBridge.php @@ -15,7 +15,7 @@ class DansTonChatBridge extends BridgeAbstract{ $html = ''; $link = 'http://danstonchat.com/latest.html'; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request DansTonChat.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request DansTonChat.'); foreach($html->find('div.item') as $element) { $item = new \Item(); diff --git a/bridges/DauphineLibereBridge.php b/bridges/DauphineLibereBridge.php index a91ade9f..95bfe397 100644 --- a/bridges/DauphineLibereBridge.php +++ b/bridges/DauphineLibereBridge.php @@ -82,7 +82,7 @@ class DauphineLibereBridge extends BridgeAbstract { } private function ExtractContent($url, $context) { - $html2 = $this->file_get_html($url, false, $context); + $html2 = $this->getSimpleHTMLDOM($url,false,$context); $text = $html2->find('div.column', 0)->innertext; $text = preg_replace('@]*?>.*?@si', '', $text); return $text; @@ -102,10 +102,10 @@ class DauphineLibereBridge extends BridgeAbstract { if (isset($param['u'])) { /* user timeline mode */ $this->request = $param['u']; - $html = $this->file_get_html('http://www.ledauphine.com/'.$this->request.'/rss', false, $context) or $this->returnServerError('Could not request DauphineLibere.'); + $html = $this->getSimpleHTMLDOM('http://www.ledauphine.com/'.$this->request.'/rss',false,$context) or $this->returnServerError('Could not request DauphineLibere.'); } else { - $html = $this->file_get_html('http://www.ledauphine.com/rss', false, $context) or $this->returnServerError('Could not request DauphineLibere.'); + $html = $this->getSimpleHTMLDOM('http://www.ledauphine.com/rss',false,$context) or $this->returnServerError('Could not request DauphineLibere.'); } $limit = 0; @@ -126,4 +126,4 @@ class DauphineLibereBridge extends BridgeAbstract { return 3600*2; // 2 hours } } -?> \ No newline at end of file +?> diff --git a/bridges/DeveloppezDotComBridge.php b/bridges/DeveloppezDotComBridge.php index 5fd78663..bde3f9a0 100644 --- a/bridges/DeveloppezDotComBridge.php +++ b/bridges/DeveloppezDotComBridge.php @@ -15,7 +15,7 @@ class DeveloppezDotComBridge extends BridgeAbstract{ return $string; } - // F***ing quotes from Microsoft Word badly encoded, here was the trick: + // F***ing quotes from Microsoft Word badly encoded, here was the trick: // http://stackoverflow.com/questions/1262038/how-to-replace-microsoft-encoded-quotes-in-php private function convert_smart_quotes($string) { @@ -35,14 +35,14 @@ class DeveloppezDotComBridge extends BridgeAbstract{ } private function DeveloppezDotComExtractContent($url) { - $articleHTMLContent = $this->file_get_html($url); + $articleHTMLContent = $this->getSimpleHTMLDOM($url); $text = $this->convert_smart_quotes($articleHTMLContent->find('div.content', 0)->innertext); $text = utf8_encode($text); return trim($text); } public function collectData(array $param){ - $rssFeed = $this->file_get_html('http://www.developpez.com/index/rss') or $this->returnServerError('Could not request http://www.developpez.com/index/rss'); + $rssFeed = $this->getSimpleHTMLDOM('http://www.developpez.com/index/rss') or $this->returnServerError('Could not request http://www.developpez.com/index/rss'); $limit = 0; foreach($rssFeed->find('item') as $element) { diff --git a/bridges/DilbertBridge.php b/bridges/DilbertBridge.php index d94ce9a9..7e50e8db 100644 --- a/bridges/DilbertBridge.php +++ b/bridges/DilbertBridge.php @@ -13,7 +13,7 @@ class DilbertBridge extends BridgeAbstract { public function collectData(array $param) { - $html = $this->file_get_html($this->getURI()) or $this->returnServerError('Could not request Dilbert: '.$this->getURI()); + $html = $this->getSimpleHTMLDOM($this->getURI()) or $this->returnServerError('Could not request Dilbert: '.$this->getURI()); foreach ($html->find('section.comic-item') as $element) { diff --git a/bridges/DollbooruBridge.php b/bridges/DollbooruBridge.php index 76d8a40a..72055850 100644 --- a/bridges/DollbooruBridge.php +++ b/bridges/DollbooruBridge.php @@ -26,25 +26,25 @@ class DollbooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://dollbooru.org/post/list/$tags/$page") or $this->returnServerError('Could not request Dollbooru.'); + $html = $this->getSimpleHTMLDOM("http://dollbooru.org/post/list/$tags/$page") or $this->returnServerError('Could not request Dollbooru.'); foreach($html->find('div[class=shm-image-list] a') as $element) { $item = new \Item(); $item->uri = 'http://dollbooru.org'.$element->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('data-post-id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('data-post-id')); $item->timestamp = time(); $thumbnailUri = 'http://dollbooru.org'.$element->find('img', 0)->src; $item->tags = $element->getAttribute('data-tags'); $item->title = 'Dollbooru | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/DuckDuckGoBridge.php b/bridges/DuckDuckGoBridge.php index f09b8693..5398643a 100644 --- a/bridges/DuckDuckGoBridge.php +++ b/bridges/DuckDuckGoBridge.php @@ -22,7 +22,7 @@ class DuckDuckGoBridge extends BridgeAbstract{ $html = ''; $link = 'http://duckduckgo.com/html/?q='.$param[u].'+sort:date'; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request DuckDuckGo.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request DuckDuckGo.'); foreach($html->find('div.results_links') as $element) { $item = new \Item(); diff --git a/bridges/EZTVBridge.php b/bridges/EZTVBridge.php index 07a34693..551c8d9b 100644 --- a/bridges/EZTVBridge.php +++ b/bridges/EZTVBridge.php @@ -40,11 +40,11 @@ class EZTVBridge extends BridgeAbstract{ $this->returnClientError('You must provide a list of ID (?i=showID1,showID2,...)'); // Loop on show ids - $showList = explode(",",$param['i']); + $showList = explode(",",$param['i']); foreach($showList as $showID){ // Get show page - $html = $this->file_get_html('https://eztv.ch/shows/'.rawurlencode($showID).'/') or $this->returnServerError('Could not request EZTV for id "'.$showID.'"'); + $html = $this->getSimpleHTMLDOM('https://eztv.ch/shows/'.rawurlencode($showID).'/') or $this->returnServerError('Could not request EZTV for id "'.$showID.'"'); // Loop on each element that look like an episode entry... foreach($html->find('.forum_header_border') as $element) { diff --git a/bridges/EliteDangerousGalnetBridge.php b/bridges/EliteDangerousGalnetBridge.php index e519fc5f..d1b49951 100644 --- a/bridges/EliteDangerousGalnetBridge.php +++ b/bridges/EliteDangerousGalnetBridge.php @@ -13,7 +13,7 @@ class EliteDangerousGalnetBridge extends BridgeAbstract public function collectData(array $param) { - $html = $this->file_get_html('https://community.elitedangerous.com/galnet') or $this->returnServerError('Error while downloading the website content'); + $html = $this->getSimpleHTMLDOM('https://community.elitedangerous.com/galnet') or $this->returnServerError('Error while downloading the website content'); foreach($html->find('div.article') as $element) { $item = new Item(); @@ -40,4 +40,4 @@ class EliteDangerousGalnetBridge extends BridgeAbstract { return 3600 * 2; // 2 hours } -} \ No newline at end of file +} diff --git a/bridges/ElsevierBridge.php b/bridges/ElsevierBridge.php index 31ccecc9..ebee2a26 100644 --- a/bridges/ElsevierBridge.php +++ b/bridges/ElsevierBridge.php @@ -33,7 +33,7 @@ class ElsevierBridge extends BridgeAbstract{ $time = $article->find('.article-info', 0); if($time){ $timestring = trim($time->plaintext); - /* + /* The format depends on the age of an article: - Available online 29 July 2016 - July 2016 @@ -63,7 +63,7 @@ class ElsevierBridge extends BridgeAbstract{ public function collectData(array $param){ $uri = 'http://www.journals.elsevier.com/' . $param['j'] . '/recent-articles/'; - $html = file_get_html($uri) or $this->returnServerError('No results for Elsevier journal '.$param['j']); + $html = $this->getSimpleHTMLDOM($uri) or $this->returnServerError('No results for Elsevier journal '.$param['j']); foreach($html->find('.pod-listing') as $article){ $item = new \Item(); @@ -80,4 +80,4 @@ class ElsevierBridge extends BridgeAbstract{ return 43200; // 12h } } -?> \ No newline at end of file +?> diff --git a/bridges/FSBridge.php b/bridges/FSBridge.php new file mode 100644 index 00000000..cd212c3d --- /dev/null +++ b/bridges/FSBridge.php @@ -0,0 +1,56 @@ +maintainer = "qwertygc"; + $this->name = "Futurasciences"; + $this->uri = "http://www.futura-sciences.com"; + $this->description = "Returns the 5 newest posts from FS (full text)"; + $this->update = "03/11/2015"; + + } + + public function collectData(array $param){ + + function FS_StripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + function FS_ExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.fiche-actualite', 0)->innertext; + $text = preg_replace('@]*?>.*?@si', '', $text); + return $text; + } + $html = $this->getSimpleHTMLDOM('http://www.futura-sciences.com/rss/actualites.xml') or $this->returnError('Could not request Futura Sciences.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 5) { + $item = new \Item(); + $item->title = FS_StripCDATA($element->find('title', 0)->innertext); + $item->uri = FS_StripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = FS_ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'Futura Sciences'; + } + + public function getURI(){ + return 'http://www.futura-sciences.com/'; + } + + public function getCacheDuration(){ + return 3600; // 1 hour + // return 0; // 1 hour + } +} diff --git a/bridges/FacebookBridge.php b/bridges/FacebookBridge.php index 37eae8d6..a61aaba2 100644 --- a/bridges/FacebookBridge.php +++ b/bridges/FacebookBridge.php @@ -109,9 +109,9 @@ class FacebookBridge extends BridgeAbstract{ if (is_null($html)) { if (isset($param['u'])) { if (!strpos($param['u'], "/")) { - $html = $this->file_get_html('https://www.facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('https://www.facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnServerError('No results for this query.'); } else { - $html = $this->file_get_html('https://www.facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('https://www.facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnServerError('No results for this query.'); } } else { $this->returnClientError('You must specify a Facebook username.'); @@ -155,7 +155,7 @@ class FacebookBridge extends BridgeAbstract{ $this->name = $author; foreach($element->children() as $post) { - + $item = new \Item(); if (count($post->find('abbr')) > 0) { diff --git a/bridges/FierPandaBridge.php b/bridges/FierPandaBridge.php index 9b610fa6..22fd142a 100644 --- a/bridges/FierPandaBridge.php +++ b/bridges/FierPandaBridge.php @@ -14,7 +14,7 @@ Class FierPandaBridge extends BridgeAbstract{ public function collectData(array $param){ $link = 'http://www.fier-panda.fr/'; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request Fier Panda.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request Fier Panda.'); foreach($html->find('div.container-content article') as $element) { $item = new \Item(); diff --git a/bridges/FlickrExploreBridge.php b/bridges/FlickrExploreBridge.php index 37b619a1..f4443f74 100644 --- a/bridges/FlickrExploreBridge.php +++ b/bridges/FlickrExploreBridge.php @@ -12,7 +12,7 @@ class FlickrExploreBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('https://www.flickr.com/explore') or $this->returnServerError('Could not request Flickr.'); + $html = $this->getSimpleHTMLDOM('https://www.flickr.com/explore') or $this->returnServerError('Could not request Flickr.'); foreach($html->find('.photo-list-photo-view') as $element) { // Get the styles diff --git a/bridges/FlickrTagBridge.php b/bridges/FlickrTagBridge.php index 957efbbf..6797d62d 100644 --- a/bridges/FlickrTagBridge.php +++ b/bridges/FlickrTagBridge.php @@ -27,16 +27,16 @@ class FlickrTagBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://www.flickr.com/search/?q=vendee&s=rec') or $this->returnServerError('Could not request Flickr.'); + $html = $this->getSimpleHTMLDOM('http://www.flickr.com/search/?q=vendee&s=rec') or $this->returnServerError('Could not request Flickr.'); if (isset($param['q'])) { /* keyword search mode */ $this->request = $param['q']; - $html = $this->file_get_html('http://www.flickr.com/search/?q='.urlencode($this->request).'&s=rec') or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('http://www.flickr.com/search/?q='.urlencode($this->request).'&s=rec') or $this->returnServerError('No results for this query.'); } elseif (isset($param['u'])) { /* user timeline mode */ $this->request = $param['u']; - $html = $this->file_get_html('http://www.flickr.com/photos/'.urlencode($this->request).'/') or $this->returnServerError('Requested username can\'t be found.'); + $html = $this->getSimpleHTMLDOM('http://www.flickr.com/photos/'.urlencode($this->request).'/') or $this->returnServerError('Requested username can\'t be found.'); } - + else { $this->returnClientError('You must specify a keyword or a Flickr username.'); } diff --git a/bridges/FootitoBridge.php b/bridges/FootitoBridge.php index e4030a59..35f382cd 100644 --- a/bridges/FootitoBridge.php +++ b/bridges/FootitoBridge.php @@ -12,11 +12,11 @@ class FootitoBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://www.footito.fr/') or $this->returnServerError('Could not request Footito.'); - + $html = $this->getSimpleHTMLDOM('http://www.footito.fr/') or $this->returnServerError('Could not request Footito.'); + foreach($html->find('div.post') as $element) { $item = new Item(); - + $content = trim($element->innertext); $content = str_replace("content = $content; - + $title = $element->find('.contenu .texte ', 0)->plaintext; $item->title = $title; - + $info = $element->find('div.infos', 0); - + $item->timestamp = strtotime($info->find('time', 0)->datetime); $item->author = $info->find('a.auteur', 0)->plaintext; - + $this->items[] = $item; } } diff --git a/bridges/FourchanBridge.php b/bridges/FourchanBridge.php index 6c58761a..b0daed20 100644 --- a/bridges/FourchanBridge.php +++ b/bridges/FourchanBridge.php @@ -32,7 +32,7 @@ class FourchanBridge extends BridgeAbstract{ $this->returnClientError('You must specify the thread URL.'); $url = 'https://boards.4chan.org'.$thread['path'].''; - $html = $this->file_get_html($url) or $this->returnServerError("Could not request 4chan, thread not found"); + $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError("Could not request 4chan, thread not found"); foreach($html->find('div.postContainer') as $element) { $item = new \Item(); @@ -59,7 +59,7 @@ class FourchanBridge extends BridgeAbstract{ } $this->items = array_reverse($this->items); } - + public function getCacheDuration(){ return 300; // 5min } diff --git a/bridges/FrandroidBridge.php b/bridges/FrandroidBridge.php new file mode 100644 index 00000000..82bd62fe --- /dev/null +++ b/bridges/FrandroidBridge.php @@ -0,0 +1,69 @@ +maintainer = "Daiyousei"; + $this->name = "Frandroid"; + $this->uri = "http://www.frandroid.com/"; + $this->description = "Returns the RSS feed from Frandroid (full text articles)"; + $this->update = "2015-03-05"; + + } + + public function collectData(array $param) + { + + function FrandroidStripCDATA($string) + { + $string = str_replace('', '', $string); + return $string; + } + function FrandroidExtractContent($url) + { + $html2 = $this->getSimpleHTMLDOM($url); + $html3 = $html2->find('div.post-content', 0); + $html3->find('div.no-sidebar-ad-top', 0)->outertext = ''; + $ret = $html3->find('div.shortcode-container'); + foreach ($ret as $value) { + $value->outertext = ''; + } + + $html3->find('div#hrr-link', 0)->outertext = ''; + $text = $html3->innertext; + $text = strip_tags($text, '

      '); + return $text; + } + $html = $this->getSimpleHTMLDOM('http://feeds.feedburner.com/Frandroid?format=xml') or $this->returnError('Could not request Frandroid.', 404); + $limit = 0; + + foreach ($html->find('item') as $element) { + if ($limit < 5) { + $item = new \Item(); + $item->title = FrandroidStripCDATA($element->find('title', 0)->innertext); + $item->uri = FrandroidStripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = FrandroidExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName() + { + return 'Frandroid'; + } + + public function getURI() + { + return 'http://www.frandroid.com/'; + } + + public function getCacheDuration() + { + return 300; // 5min + } +} diff --git a/bridges/FuturaSciencesBridge.php b/bridges/FuturaSciencesBridge.php index 24b5f930..51505626 100644 --- a/bridges/FuturaSciencesBridge.php +++ b/bridges/FuturaSciencesBridge.php @@ -172,13 +172,13 @@ class FuturaSciencesBridge extends BridgeAbstract { $this->returnClientError('Invalid "feed" parameter.'.$url); $url = $this->getURI().'rss/'.$param['feed'].'.xml'; - $html = $this->file_get_html($url) or $this->returnServerError('Could not request Futura-Sciences: '.$url); + $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request Futura-Sciences: '.$url); $limit = 0; foreach($html->find('item') as $element) { if ($limit < 10) { $article_url = str_replace('#xtor=RSS-8', '', StripCDATA($element->find('guid', 0)->plaintext)); - $article = $this->file_get_html($article_url) or $this->returnServerError('Could not request Futura-Sciences: '.$article_url); + $article = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request Futura-Sciences: '.$article_url); $contents = $article->find('div.content', 0)->innertext; foreach (array( diff --git a/bridges/GBAtempBridge.php b/bridges/GBAtempBridge.php index cc592033..20cd0cc8 100644 --- a/bridges/GBAtempBridge.php +++ b/bridges/GBAtempBridge.php @@ -74,7 +74,7 @@ class GBAtempBridge extends BridgeAbstract { } private function fetch_post_content($uri, $site_url) { - $html = $this->file_get_html($uri) or $this->returnServerError('Could not request GBAtemp: '.$uri); + $html = $this->getSimpleHTMLDOM($uri) or $this->returnServerError('Could not request GBAtemp: '.$uri); $content = $html->find('div.messageContent', 0)->innertext; return $this->cleanup_post_content($content, $site_url); } @@ -91,7 +91,7 @@ class GBAtempBridge extends BridgeAbstract { } else $this->returnClientError('The provided type filter is invalid. Expecting N, R, T, or F.'); } else $this->returnClientError('Please provide a type filter. Expecting N, R, T, or F.'); - $html = $this->file_get_html($this->uri) or $this->returnServerError('Could not request GBAtemp.'); + $html = $this->getSimpleHTMLDOM($this->uri) or $this->returnServerError('Could not request GBAtemp.'); if ($typeFilter == 'N') { foreach ($html->find('li[class=news_item full]') as $newsItem) { @@ -106,7 +106,7 @@ class GBAtempBridge extends BridgeAbstract { foreach ($html->find('li.portal_review') as $reviewItem) { $url = $this->uri.$reviewItem->find('a', 0)->href; $title = $reviewItem->find('span.review_title', 0)->plaintext; - $content = $this->file_get_html($url) or $this->returnServerError('Could not request GBAtemp: '.$uri); + $content = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request GBAtemp: '.$uri); $author = $content->find('a.username', 0)->plaintext; $time = intval($this->ExtractFromDelimiters($content->find('abbr.DateTime', 0)->outertext, 'data-time="', '"')); $intro = '

      '.($content->find('div#review_intro', 0)->plaintext).'

      '; diff --git a/bridges/GelbooruBridge.php b/bridges/GelbooruBridge.php index 2d775128..cc2faa08 100644 --- a/bridges/GelbooruBridge.php +++ b/bridges/GelbooruBridge.php @@ -26,27 +26,27 @@ class GelbooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); $page = $page - 1; $page = $page * 63; } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://gelbooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Gelbooru.'); + $html = $this->getSimpleHTMLDOM("http://gelbooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Gelbooru.'); foreach($html->find('div[class=content] span') as $element) { $item = new \Item(); $item->uri = 'http://gelbooru.com/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); $item->timestamp = time(); $thumbnailUri = $element->find('img', 0)->src; $item->tags = $element->find('img', 0)->getAttribute('alt'); $item->title = 'Gelbooru | '.$item->postid; $item->content = '

      Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/GiphyBridge.php b/bridges/GiphyBridge.php index 5605fd19..497c1af2 100644 --- a/bridges/GiphyBridge.php +++ b/bridges/GiphyBridge.php @@ -30,10 +30,10 @@ class GiphyBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = ''; + $html = ''; $base_url = 'http://giphy.com'; if (isset($param['s'])) { /* keyword search mode */ - $html = $this->file_get_html($base_url.'/search/'.urlencode($param['s'].'/')) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($base_url.'/search/'.urlencode($param['s'].'/')) or $this->returnServerError('No results for this query.'); } else { $this->returnClientError('You must specify a search worf (?s=...).'); @@ -43,22 +43,22 @@ class GiphyBridge extends BridgeAbstract{ if (isset($param['n'])) { $max = (integer) $param['n']; } - + $limit = 0; $kw = urlencode($param['s']); foreach($html->find('div.hoverable-gif') as $entry) { if($limit < $max) { - $node = $entry->first_child(); - $href = $node->getAttribute('href'); - - $html2 = $this->file_get_html($base_url . $href) or $this->returnServerError('No results for this query.'); + $node = $entry->first_child(); + $href = $node->getAttribute('href'); + + $html2 = $this->getSimpleHTMLDOM($base_url . $href) or $this->returnServerError('No results for this query.'); $figure = $html2->getElementByTagName('figure'); $img = $figure->firstChild(); $caption = $figure->lastChild(); - + $item = new \Item(); $item->id = $img->getAttribute('data-gif_id'); - $item->uri = $img->getAttribute('data-bitly_gif_url'); + $item->uri = $img->getAttribute('data-bitly_gif_url'); $item->username = 'Giphy - '.ucfirst($kw); $title = $caption->innertext(); $title = preg_replace('/\s+/', ' ',$title); @@ -74,8 +74,8 @@ class GiphyBridge extends BridgeAbstract{ '' .'' .''; - - $this->items[] = $item; + + $this->items[] = $item; $limit++; } } diff --git a/bridges/GithubIssueBridge.php b/bridges/GithubIssueBridge.php index febbf347..6d228230 100644 --- a/bridges/GithubIssueBridge.php +++ b/bridges/GithubIssueBridge.php @@ -34,7 +34,7 @@ class GithubIssueBridge extends BridgeAbstract{ public function collectData(array $param){ $uri = 'https://github.com/'.$param['u'].'/'.$param['p'].'/issues/'.$param['i']; - $html = file_get_html($uri) + $html = $this->getSimpleHTMLDOM($uri) or $this->returnServerError('No results for Github Issue '.$param['i'].' in project '.$param['u'].'/'.$param['p']); foreach($html->find('.js-comment-container') as $comment){ diff --git a/bridges/GitlabCommitsBridge.php b/bridges/GitlabCommitsBridge.php index 85c6332b..16862d2c 100644 --- a/bridges/GitlabCommitsBridge.php +++ b/bridges/GitlabCommitsBridge.php @@ -44,7 +44,7 @@ class GitlabCommitsBridge extends BridgeAbstract{ $uri.='master'; } - $html = file_get_html($uri) + $html = $this->getSimpleHTMLDOM($uri) or $this->returnServerError('No results for Gitlab Commits of project '.$param['uri'].'/'.$param['u'].'/'.$param['p']); diff --git a/bridges/GizmodoFRBridge.php b/bridges/GizmodoFRBridge.php index ce0d44eb..0d900f97 100644 --- a/bridges/GizmodoFRBridge.php +++ b/bridges/GizmodoFRBridge.php @@ -14,7 +14,7 @@ class GizmodoFRBridge extends BridgeAbstract{ public function collectData(array $param){ function GizmodoFRExtractContent($url) { - $articleHTMLContent = $this->file_get_html($url); + $articleHTMLContent = $this->getSimpleHTMLDOM($url); $text = $articleHTMLContent->find('div.entry-thumbnail', 0)->innertext; $text = $text.$articleHTMLContent->find('div.entry-excerpt', 0)->innertext; $text = $text.$articleHTMLContent->find('div.entry-content', 0)->innertext; @@ -26,7 +26,7 @@ class GizmodoFRBridge extends BridgeAbstract{ return $text; } - $rssFeed = $this->file_get_html('http://www.gizmodo.fr/feed') or $this->returnServerError('Could not request http://www.gizmodo.fr/feed'); + $rssFeed = $this->getSimpleHTMLDOM('http://www.gizmodo.fr/feed') or $this->returnServerError('Could not request http://www.gizmodo.fr/feed'); $limit = 0; foreach($rssFeed->find('item') as $element) { diff --git a/bridges/GooglePlusPostBridge.php b/bridges/GooglePlusPostBridge.php index 104a13e8..9f1ffdbc 100644 --- a/bridges/GooglePlusPostBridge.php +++ b/bridges/GooglePlusPostBridge.php @@ -33,8 +33,8 @@ class GooglePlusPostBridge extends BridgeAbstract $this->request = $param['username']; // get content parsed -// $html = $this->file_get_html(__DIR__ . '/../posts2.html' - $html = $this->file_get_html(self::GOOGLE_PLUS_BASE_URL . urlencode($this->request) . '/posts' +// $html = $this->getSimpleHTMLDOM(__DIR__ . '/../posts2.html' + $html = $this->getSimpleHTMLDOM(self::GOOGLE_PLUS_BASE_URL . urlencode($this->request) . '/posts' // force language , false, stream_context_create(array('http'=> array( 'header' => 'Accept-Language: fr,fr-be,fr-fr;q=0.8,en;q=0.4,en-us;q=0.2;*' . "\r\n" diff --git a/bridges/GoogleSearchBridge.php b/bridges/GoogleSearchBridge.php index 0c68a299..e7db5a07 100644 --- a/bridges/GoogleSearchBridge.php +++ b/bridges/GoogleSearchBridge.php @@ -35,7 +35,7 @@ class GoogleSearchBridge extends BridgeAbstract{ if (isset($param['q'])) { /* keyword search mode */ $this->request = $param['q']; - $html = $this->file_get_html('https://www.google.com/search?q=' . urlencode($this->request) . '&num=100&complete=0&tbs=qdr:y,sbd:1') or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('https://www.google.com/search?q=' . urlencode($this->request) . '&num=100&complete=0&tbs=qdr:y,sbd:1') or $this->returnServerError('No results for this query.'); } else{ $this->returnClientError('You must specify a keyword (?q=...).'); @@ -45,7 +45,7 @@ class GoogleSearchBridge extends BridgeAbstract{ if( !is_null($emIsRes) ){ foreach($emIsRes->find('li[class=g]') as $element) { $item = new Item(); - + // Extract direct URL from google href (eg. /url?q=...) $t = $element->find('a[href]',0)->href; $item->uri = ''.$t; diff --git a/bridges/GuruMedBridge.php b/bridges/GuruMedBridge.php index 51c3e8db..f6585ae5 100644 --- a/bridges/GuruMedBridge.php +++ b/bridges/GuruMedBridge.php @@ -16,7 +16,7 @@ class GuruMedBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://gurumed.org/feed') or $this->returnServerError('Could not request Gurumed.'); + $html = $this->getSimpleHTMLDOM('http://gurumed.org/feed') or $this->returnServerError('Could not request Gurumed.'); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/HDWallpapersBridge.php b/bridges/HDWallpapersBridge.php index c6cc6b8b..dc4a3bfe 100644 --- a/bridges/HDWallpapersBridge.php +++ b/bridges/HDWallpapersBridge.php @@ -48,7 +48,7 @@ class HDWallpapersBridge extends BridgeAbstract { for ($page = 1; $page <= $lastpage; $page++) { $link = $baseUri.'/'.$category.'/page/'.$page; - $html = $this->file_get_html($link) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('No results for this query.'); if ($page === 1) { preg_match('/page\/(\d+)$/', $html->find('.pagination a', -2)->href, $matches); diff --git a/bridges/HentaiHavenBridge.php b/bridges/HentaiHavenBridge.php index 5f41a1bd..25934a0a 100644 --- a/bridges/HentaiHavenBridge.php +++ b/bridges/HentaiHavenBridge.php @@ -12,7 +12,7 @@ class HentaiHavenBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://hentaihaven.org/') or $this->returnServerError('Could not request Hentai Haven.'); + $html = $this->getSimpleHTMLDOM('http://hentaihaven.org/') or $this->returnServerError('Could not request Hentai Haven.'); foreach($html->find('div.zoe-grid') as $element) { $item = new \Item(); $item->uri = $element->find('div.brick-content h3 a', 0)->href; diff --git a/bridges/IdenticaBridge.php b/bridges/IdenticaBridge.php index a2c7d908..23f54a93 100644 --- a/bridges/IdenticaBridge.php +++ b/bridges/IdenticaBridge.php @@ -25,7 +25,7 @@ class IdenticaBridge extends BridgeAbstract{ $html = ''; if (isset($param['u'])) { /* user timeline mode */ $this->request = $param['u']; - $html = $this->file_get_html('https://identi.ca/'.urlencode($this->request)) or $this->returnServerError('Requested username can\'t be found.'); + $html = $this->getSimpleHTMLDOM('https://identi.ca/'.urlencode($this->request)) or $this->returnServerError('Requested username can\'t be found.'); } else { $this->returnClientError('You must specify an Identica username (?u=...).'); diff --git a/bridges/InstagramBridge.php b/bridges/InstagramBridge.php index e8f53a61..0e4359f7 100644 --- a/bridges/InstagramBridge.php +++ b/bridges/InstagramBridge.php @@ -25,35 +25,35 @@ class InstagramBridge extends BridgeAbstract{ $html = ''; if (isset($param['u'])) { /* user timeline mode */ $this->request = $param['u']; - $html = $this->file_get_html('http://instagram.com/'.urlencode($this->request)) or $this->returnServerError('Could not request Instagram.'); + $html = $this->getSimpleHTMLDOM('http://instagram.com/'.urlencode($this->request)) or $this->returnServerError('Could not request Instagram.'); } else { $this->returnClientError('You must specify a Instagram username (?u=...).'); } - + $innertext = null; - + foreach($html->find('script') as $script) { if ('' === $script->innertext) { continue; } - + $pos = strpos(trim($script->innertext), 'window._sharedData'); if (0 !== $pos) { continue; } - + $innertext = $script->innertext; break; } $json = trim(substr($innertext, $pos+18), ' =;'); $data = json_decode($json); - - - + + + $userMedia = $data->entry_data->ProfilePage[0]->user->media->nodes; foreach($userMedia as $media) @@ -70,7 +70,7 @@ class InstagramBridge extends BridgeAbstract{ } $item->timestamp = $media->date; $this->items[] = $item; - + } } diff --git a/bridges/JapanExpoBridge.php b/bridges/JapanExpoBridge.php index 0b262111..5b9d1031 100644 --- a/bridges/JapanExpoBridge.php +++ b/bridges/JapanExpoBridge.php @@ -59,7 +59,7 @@ class JapanExpoBridge extends BridgeAbstract{ }; $link = 'http://www.japan-expo-paris.com/fr/actualites'; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request JapanExpo: '.$link); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request JapanExpo: '.$link); $fullcontent = (!empty($param['mode']) && $param['mode'] == 'full'); $count = 0; @@ -73,7 +73,7 @@ class JapanExpoBridge extends BridgeAbstract{ if ($fullcontent) { if ($count < 5) { - $article_html = $this->file_get_html($url) or $this->returnServerError('Could not request JapanExpo: '.$url); + $article_html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request JapanExpo: '.$url); $header = $article_html->find('header.pageHeadBox', 0); $timestamp = strtotime($header->find('time', 0)->datetime); $title_html = $header->find('div.section', 0)->next_sibling(); diff --git a/bridges/KonachanBridge.php b/bridges/KonachanBridge.php index 064a1fd2..9bb08a5e 100644 --- a/bridges/KonachanBridge.php +++ b/bridges/KonachanBridge.php @@ -25,18 +25,18 @@ class KonachanBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 1;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://konachan.com/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Konachan.'); + $html = $this->getSimpleHTMLDOM("http://konachan.com/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Konachan.'); $input_json = explode('Post.register(', $html); foreach($input_json as $element) $data[] = preg_replace('/}\)(.*)/', '}', $element); unset($data[0]); - + foreach($data as $datai) { $json = json_decode($datai, TRUE); $item = new \Item(); @@ -45,7 +45,7 @@ class KonachanBridge extends BridgeAbstract{ $item->timestamp = $json['created_at']; $item->imageUri = $json['file_url']; $item->title = 'Konachan | '.$json['id']; - $item->content = '
      Tags: '.$json['tags']; + $item->content = '
      Tags: '.$json['tags']; $this->items[] = $item; } } diff --git a/bridges/KoreusBridge.php b/bridges/KoreusBridge.php index f532d058..4c516c64 100644 --- a/bridges/KoreusBridge.php +++ b/bridges/KoreusBridge.php @@ -16,14 +16,14 @@ class KoreusBridge extends BridgeAbstract{ } private function KoreusExtractContent($url) { - $html2 = $this->file_get_html($url); + $html2 = $this->getSimpeHTMLDOM($url); $text = $html2->find('p[class=itemText]', 0)->innertext; $text = utf8_encode(preg_replace('/(Sur le m.+?)+$/i','',$text)); return $text; } public function collectData(array $param){ - $html = $this->file_get_html('http://feeds.feedburner.com/Koreus-articles') or $this->returnServerError('Could not request Koreus.'); + $html = $this->getSimpleHTMLDOM('http://feeds.feedburner.com/Koreus-articles') or $this->returnServerError('Could not request Koreus.'); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/LeBonCoinBridge.php b/bridges/LeBonCoinBridge.php index 6072030c..2fb6b4c9 100755 --- a/bridges/LeBonCoinBridge.php +++ b/bridges/LeBonCoinBridge.php @@ -134,10 +134,10 @@ class LeBonCoinBridge extends BridgeAbstract{ "type" : "list", "values" : [ { "name" : "---- Select ----", "value" : "" }, - + { "name" : "", "value" : "" }, { "name" : "EMPLOI", "value" : "_emploi_" }, - + { "name" : "", "value" : "" }, { "name" : "VEHICULES", "value" : "_vehicules_" }, { "name" : "    Voitures", "value" : "voitures" }, @@ -149,14 +149,14 @@ class LeBonCoinBridge extends BridgeAbstract{ { "name" : "    Equipement Caravaning", "value" : "equipement_caravaning" }, { "name" : "    Nautisme", "value" : "nautisme" }, { "name" : "    Equipement Nautisme", "value" : "equipement_nautisme" }, - + { "name" : "", "value" : "" }, { "name" : "IMMOBILIER", "value" : "_immobilier_" }, { "name" : "    Ventes immobilieres", "value" : "ventes_immobilieres" }, { "name" : "    Locations", "value" : "locations" }, { "name" : "    Colocations", "value" : "colocations" }, { "name" : "    Bureaux & Commerces", "value" : "bureaux_commerces" }, - + { "name" : "", "value" : "" }, { "name" : "VACANCES", "value" : "_vacances_" }, { "name" : "    Locations gites", "value" : "locations_gites" }, @@ -164,14 +164,14 @@ class LeBonCoinBridge extends BridgeAbstract{ { "name" : "    Campings", "value" : "campings" }, { "name" : "    Hôtels", "value" : "hotels" }, { "name" : "    Hébergements insolites", "value" : "hebergements_insolites" }, - + { "name" : "", "value" : "" }, { "name" : "MULTIMEDIA", "value" : "_multimedia_" }, { "name" : "    Informatique", "value" : "informatique" }, { "name" : "    Consoles & Jeux vidéo", "value" : "consoles_jeux_video" }, { "name" : "    Image & Son", "value" : "image_son" }, { "name" : "    Téléphonie", "value" : "telephonie" }, - + { "name" : "", "value" : "" }, { "name" : "LOISIRS", "value" : "_loisirs_" }, { "name" : "    DVD / Films", "value" : "dvd_films" }, @@ -184,7 +184,7 @@ class LeBonCoinBridge extends BridgeAbstract{ { "name" : "    Collection", "value" : "collection" }, { "name" : "    Jeux & Jouets", "value" : "jeux_jouets" }, { "name" : "    Vins & Gastronomie", "value" : "vins_gastronomie" }, - + { "name" : "", "value" : "" }, { "name" : "MATERIEL PROFESSIONNEL", "value" : "_materiel_professionnel_" }, { "name" : "    Materiel Agricole", "value" : "materiel_agricole" }, @@ -196,7 +196,7 @@ class LeBonCoinBridge extends BridgeAbstract{ { "name" : "    Fournitures de Bureau", "value" : "fournitures_de_bureau" }, { "name" : "    Commerces & Marchés", "value" : "commerces_marches" }, { "name" : "    Matériel médical", "value" : "materiel_medical" }, - + { "name" : "", "value" : "" }, { "name" : "SERVICES", "value" : "_services_" }, { "name" : "    Prestations de services", "value" : "prestations_de_services" }, @@ -204,7 +204,7 @@ class LeBonCoinBridge extends BridgeAbstract{ { "name" : "    Evénements", "value" : "evenements" }, { "name" : "    Cours particuliers", "value" : "cours_particuliers" }, { "name" : "    Covoiturage", "value" : "covoiturage" }, - + { "name" : "", "value" : "" }, { "name" : "MAISON", "value" : "_maison_" }, { "name" : "    Ameublement", "value" : "ameublement" }, @@ -220,7 +220,7 @@ class LeBonCoinBridge extends BridgeAbstract{ { "name" : "    Montres & Bijoux", "value" : "montres_bijoux" }, { "name" : "    Equipement bébé", "value" : "equipement_bebe" }, { "name" : "    Vêtements bébé", "value" : "vetements_bebe" }, - + { "name" : "", "value" : "" }, { "name" : "AUTRES", "value" : "autres" } ] @@ -239,7 +239,7 @@ class LeBonCoinBridge extends BridgeAbstract{ else { $link = 'http://www.leboncoin.fr/' . $param['c'] . '/offres/' . $param['r'] . '/?f=a&th=1&q=' . urlencode($param['k']); } - $html = $this->file_get_html($link) or $this->returnServerError('Could not request LeBonCoin.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request LeBonCoin.'); $list = $html->find('.tabsContent', 0); if($list === NULL) { @@ -275,4 +275,4 @@ class LeBonCoinBridge extends BridgeAbstract{ $this->items[] = $item; } } -} \ No newline at end of file +} diff --git a/bridges/LeJournalDuGeekBridge.php b/bridges/LeJournalDuGeekBridge.php index 24c26853..afbdad37 100644 --- a/bridges/LeJournalDuGeekBridge.php +++ b/bridges/LeJournalDuGeekBridge.php @@ -16,7 +16,7 @@ class LeJournalDuGeekBridge extends BridgeAbstract{ } private function LeJournalDuGeekExtractContent($url) { - $articleHTMLContent = $this->file_get_html($url); + $articleHTMLContent = $this->getSimpleHMLDOM($url); $text = $articleHTMLContent->find('div.post-content', 0)->innertext; foreach($articleHTMLContent->find('a.more') as $element) { @@ -38,7 +38,7 @@ class LeJournalDuGeekBridge extends BridgeAbstract{ } public function collectData(array $param){ - $rssFeed = $this->file_get_html('http://www.journaldugeek.com/rss') or $this->returnServerError('Could not request http://www.journaldugeek.com/rss'); + $rssFeed = $this->getSimpleHTMLDOM('http://www.journaldugeek.com/rss') or $this->returnServerError('Could not request http://www.journaldugeek.com/rss'); $limit = 0; foreach($rssFeed->find('item') as $element) { diff --git a/bridges/LeMondeInformatiqueBridge.php b/bridges/LeMondeInformatiqueBridge.php index 89914b52..d97d1c66 100644 --- a/bridges/LeMondeInformatiqueBridge.php +++ b/bridges/LeMondeInformatiqueBridge.php @@ -34,7 +34,7 @@ class LeMondeInformatiqueBridge extends BridgeAbstract { } $feedUrl = 'http://www.lemondeinformatique.fr/rss/rss.xml'; - $html = $this->file_get_html($feedUrl) or $this->returnServerError('Could not request LeMondeInformatique: '.$feedUrl); + $html = $this->getSimpleHTMLDOM($feedUrl) or $this->returnServerError('Could not request LeMondeInformatique: '.$feedUrl); $limit = 0; foreach($html->find('item') as $element) { @@ -44,7 +44,7 @@ class LeMondeInformatiqueBridge extends BridgeAbstract { $article_uri = $element->innertext; $article_uri = substr($article_uri, strpos($article_uri, '') + 6); $article_uri = substr($article_uri, 0, strpos($article_uri, '')); - $article_html = $this->file_get_html($article_uri) or $this->returnServerError('Could not request LeMondeInformatique: '.$article_uri); + $article_html = $this->getSimpleHTMLDOM($article_uri) or $this->returnServerError('Could not request LeMondeInformatique: '.$article_uri); $article_content = CleanArticle($article_html->find('div#article', 0)->innertext); $article_title = $article_html->find('h1.cleanprint-title', 0)->plaintext; diff --git a/bridges/LeMotDuJourBridge.php b/bridges/LeMotDuJourBridge.php new file mode 100644 index 00000000..f9f12297 --- /dev/null +++ b/bridges/LeMotDuJourBridge.php @@ -0,0 +1,55 @@ +maintainer = "qwertygc"; + $this->name = "LeMotDuJour Bridge"; + $this->uri = "http://www.lemotdujour.com/"; + $this->description = "Returns the newest articles."; + $this->update = "2014-05-25"; + + } + + public function collectData(array $param){ + + function StripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + function ExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.single-contenu', 0)->innertext; + return $text; + } + $html = $this->getSimpleHTMLDOM('http://feeds2.feedburner.com/lemotdujour/lemotdujour') or $this->returnError('Could not request LeMotDuJour.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 10) { + $item = new \Item(); + $item->title = StripCDATA($element->find('title', 0)->innertext); + $item->uri = StripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'LeMotDuJour Bridge'; + } + + public function getURI(){ + return 'http://lemotdujour.com/'; + } + + public function getCacheDuration(){ + return 3600*2; // 2 hours + // return 0; // 2 hours + } +} diff --git a/bridges/LesJoiesDuCodeBridge.php b/bridges/LesJoiesDuCodeBridge.php index ddde42d6..7511cda5 100644 --- a/bridges/LesJoiesDuCodeBridge.php +++ b/bridges/LesJoiesDuCodeBridge.php @@ -12,14 +12,14 @@ class LesJoiesDuCodeBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://lesjoiesducode.fr/') or $this->returnServerError('Could not request LesJoiesDuCode.'); - + $html = $this->getSimpleHTMLDOM('http://lesjoiesducode.fr/') or $this->returnServerError('Could not request LesJoiesDuCode.'); + foreach($html->find('div.blog-post') as $element) { $item = new Item(); $temp = $element->find('h1 a', 0); $titre = html_entity_decode($temp->innertext); $url = $temp->href; - + $temp = $element->find('div.blog-post-content', 0); // retrieve .gif instead of static .jpg @@ -29,21 +29,21 @@ class LesJoiesDuCodeBridge extends BridgeAbstract{ $image->src = $img_src; } $content = $temp->innertext; - + $auteur = $temp->find('i', 0); $pos = strpos($auteur->innertext, "by"); - + if($pos > 0) { $auteur = trim(str_replace("*/", "", substr($auteur->innertext, ($pos + 2)))); $item->author = $auteur; } - - + + $item->content .= trim($content); $item->uri = $url; $item->title = trim($titre); - + $this->items[] = $item; } } diff --git a/bridges/LichessBridge.php b/bridges/LichessBridge.php index 8145cf70..7501d514 100644 --- a/bridges/LichessBridge.php +++ b/bridges/LichessBridge.php @@ -13,7 +13,7 @@ class LichessBridge extends BridgeAbstract public function collectData(array $param) { - $xml_feed = $this->file_get_html('http://fr.lichess.org/blog.atom') or $this->returnServerError('Could not retrieve Lichess blog feed.'); + $xml_feed = $this->getSimpleHTMLDOM('http://fr.lichess.org/blog.atom') or $this->returnServerError('Could not retrieve Lichess blog feed.'); $posts_loaded = 0; foreach($xml_feed->find('entry') as $entry) @@ -37,7 +37,7 @@ class LichessBridge extends BridgeAbstract private function retrieve_lichess_post($blog_post_uri) { - $blog_post_html = $this->file_get_html($blog_post_uri); + $blog_post_html = $this->getSimpleHTMLDOM($blog_post_uri); $blog_post_div = $blog_post_html->find('#lichess_blog', 0); $post_chapo = $blog_post_div->find('.shortlede', 0)->innertext; diff --git a/bridges/LinkedInCompany.php b/bridges/LinkedInCompany.php index 1f943011..950524aa 100644 --- a/bridges/LinkedInCompany.php +++ b/bridges/LinkedInCompany.php @@ -22,7 +22,7 @@ class LinkedInCompany extends BridgeAbstract{ $html = ''; $link = 'https://www.linkedin.com/company/'.$param[c]; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request LinkedIn.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request LinkedIn.'); foreach($html->find('//*[@id="my-feed-post"]/li') as $element) { $title = $element->find('span.share-body', 0)->innertext; diff --git a/bridges/LolibooruBridge.php b/bridges/LolibooruBridge.php index 3f4cae15..b10c39d8 100644 --- a/bridges/LolibooruBridge.php +++ b/bridges/LolibooruBridge.php @@ -26,18 +26,18 @@ class LolibooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 1; $tags = ''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://lolibooru.moe/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Lolibooru.'); + $html = $this->getSimpleHTMLDOM("http://lolibooru.moe/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Lolibooru.'); $input_json = explode('Post.register(', $html); foreach($input_json as $element) $data[] = preg_replace('/}\)(.*)/', '}', $element); unset($data[0]); - + foreach($data as $datai) { $json = json_decode($datai, TRUE); $item = new \Item(); @@ -46,7 +46,7 @@ class LolibooruBridge extends BridgeAbstract{ $item->timestamp = $json['created_at']; $item->imageUri = $json['file_url']; $item->title = 'Lolibooru | '.$json['id']; - $item->content = '
      Tags: '.$json['tags']; + $item->content = '
      Tags: '.$json['tags']; $this->items[] = $item; } } diff --git a/bridges/MalikiBridge.php b/bridges/MalikiBridge.php new file mode 100644 index 00000000..66c8081a --- /dev/null +++ b/bridges/MalikiBridge.php @@ -0,0 +1,60 @@ +maintainer = "mitsukarenai"; + $this->name = "Maliki"; + $this->uri = "http://www.maliki.com/"; + $this->description = "Returns Maliki's newest strips"; + $this->update = "2014-05-30"; + + } + + public function collectData(array $param){ + $html = $this->getSimpleHTMLDOM('http://www.maliki.com/') or $this->returnError('Could not request Maliki.', 404); + $count=0; + $latest=1; $latest_title=""; + $latest = $html->find('div.conteneur_page a', 1)->href; + $latest_title = $html->find('div.conteneur_page img', 0)->title; + + function MalikiExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = 'http://www.maliki.com/'.$html2->find('img', 0)->src; + $text = '
      '.$html2->find('div.imageetnews', 0)->plaintext; + return $text; + } + + $item = new \Item(); + $item->uri = 'http://www.maliki.com/'.$latest; + $item->title = $latest_title; + $item->timestamp = time(); + $item->content = MalikiExtractContent($item->uri); + $this->items[] = $item; + + + foreach($html->find('div.boite_strip') as $element) { + if(!empty($element->find('a',0)->href) and $count < 3) { + $item = new \Item(); + $item->uri = 'http://www.maliki.com/'.$element->find('a',0)->href; + $item->title = $element->find('img',0)->title; + $item->timestamp = strtotime(str_replace('/', '-', $element->find('span.stylepetit', 0)->innertext)); + $item->content = MalikiExtractContent($item->uri); + $this->items[] = $item; + $count++; + } + } + } + + public function getName(){ + return 'Maliki'; + } + + public function getURI(){ + return 'http://www.maliki.com/'; + } + + public function getCacheDuration(){ + return 86400*6; // 6 days + } +} diff --git a/bridges/MemoLinuxBridge.php b/bridges/MemoLinuxBridge.php new file mode 100644 index 00000000..1c41f145 --- /dev/null +++ b/bridges/MemoLinuxBridge.php @@ -0,0 +1,58 @@ +maintainer = "qwertygc"; + $this->name = "MemoLinux"; + $this->uri = "http://memo-linux.com/"; + $this->description = "Returns the 10 newest posts from MemoLinux (full text)"; + $this->update = "2015-01-30"; + + } + + public function collectData(array $param){ + + function StripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + + function ExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.entry-content', 0)->innertext; + $text = preg_replace('@]*?>.*?@si', '', $text); + $text = preg_replace('@]*?>.*?

@si', '', $text); + $text = preg_replace("/getSimpleHTMLDOM('http://memo-linux.com/feed/') or $this->returnError('Could not request MemoLinux.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 10) { + $item = new \Item(); + $item->title = StripCDATA($element->find('title', 0)->innertext); + $item->uri = StripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + } + + public function getName(){ + return 'MemoLinux'; + } + + public function getURI(){ + return 'http://memo-linux.com/feed/'; + } + + public function getCacheDuration(){ + return 3600*12; // 12 hours + } +} diff --git a/bridges/MilbooruBridge.php b/bridges/MilbooruBridge.php index 3ead7249..7e5fdaa8 100644 --- a/bridges/MilbooruBridge.php +++ b/bridges/MilbooruBridge.php @@ -25,25 +25,25 @@ class MilbooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://sheslostcontrol.net/moe/shimmie/index.php?q=/post/list/$tags/$page") or $this->returnServerError('Could not request Milbooru.'); + $html = $this->getSimpleHTMLDOM("http://sheslostcontrol.net/moe/shimmie/index.php?q=/post/list/$tags/$page") or $this->returnServerError('Could not request Milbooru.'); foreach($html->find('div[class=shm-image-list] span[class=thumb]') as $element) { $item = new \Item(); $item->uri = 'http://sheslostcontrol.net/moe/shimmie/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->find('a', 0)->getAttribute('data-post-id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->find('a', 0)->getAttribute('data-post-id')); $item->timestamp = time(); $thumbnailUri = 'http://sheslostcontrol.net/moe/shimmie/'.$element->find('img', 0)->src; $item->tags = $element->find('a', 0)->getAttribute('data-tags'); $item->title = 'Milbooru | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/MondeDiploBridge.php b/bridges/MondeDiploBridge.php index d60621c2..3cc0d815 100644 --- a/bridges/MondeDiploBridge.php +++ b/bridges/MondeDiploBridge.php @@ -9,8 +9,8 @@ class MondeDiploBridge extends BridgeAbstract{ $this->update = '2016-08-17'; } - public function collectData(array $param){ - $html = $this->file_get_html($this->uri) or $this->returnServerError('Could not request MondeDiplo. for : ' . $link); + public function collectData(array $param){ + $html = $this->getSimpleHTMLDOM($this->uri) or $this->returnServerError('Could not request MondeDiplo. for : ' . $link); foreach($html->find('div.unarticle') as $article) { $element = $article->parent(); diff --git a/bridges/MsnMondeBridge.php b/bridges/MsnMondeBridge.php index f4419fe7..e28c0e3d 100644 --- a/bridges/MsnMondeBridge.php +++ b/bridges/MsnMondeBridge.php @@ -10,13 +10,13 @@ class MsnMondeBridge extends BridgeAbstract{ } private function MsnMondeExtractContent($url, &$item) { - $html2 = $this->file_get_html($url); + $html2 = $this->getSimpleHTLDOM($url); $item->content = $html2->find('#content', 0)->find('article', 0)->find('section', 0)->plaintext; $item->timestamp = strtotime($html2->find('.authorinfo-txt', 0)->find('time', 0)->datetime); } public function collectData(array $param){ - $html = $this->file_get_html($this->uri) or $this->returnServerError('Could not request MsnMonde.'); + $html = $this->getSimpleHTMLDOM($this->uri) or $this->returnServerError('Could not request MsnMonde.'); $limit = 0; foreach($html->find('.smalla') as $article) { if($limit < 10) { diff --git a/bridges/MspabooruBridge.php b/bridges/MspabooruBridge.php index a67d5666..2e465fd3 100644 --- a/bridges/MspabooruBridge.php +++ b/bridges/MspabooruBridge.php @@ -26,27 +26,27 @@ class MspabooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); $page = $page - 1; $page = $page * 50; } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://mspabooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Mspabooru.'); + $html = $this->getSimpleHTMLDOM("http://mspabooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Mspabooru.'); foreach($html->find('div[class=content] span') as $element) { $item = new \Item(); $item->uri = 'http://mspabooru.com/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); $item->timestamp = time(); $thumbnailUri = $element->find('img', 0)->src; $item->tags = $element->find('img', 0)->getAttribute('alt'); $item->title = 'Mspabooru | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/NakedSecurityBridge.php b/bridges/NakedSecurityBridge.php index 01e46c19..568e8573 100644 --- a/bridges/NakedSecurityBridge.php +++ b/bridges/NakedSecurityBridge.php @@ -36,7 +36,7 @@ class NakedSecurityBridge extends BridgeAbstract { } $feedUrl = 'https://feeds.feedburner.com/nakedsecurity?format=xml'; - $html = $this->file_get_html($feedUrl) or $this->returnServerError('Could not request '.$this->getName().': '.$feedUrl); + $html = $this->getSimpleHTMLDOM($feedUrl) or $this->returnServerError('Could not request '.$this->getName().': '.$feedUrl); $limit = 0; foreach ($html->find('item') as $element) { @@ -44,7 +44,7 @@ class NakedSecurityBridge extends BridgeAbstract { //Retrieve article Uri and get that page $article_uri = $element->find('guid', 0)->plaintext; - $article_html = $this->file_get_html($article_uri) or $this->returnServerError('Could not request '.$this->getName().': '.$article_uri); + $article_html = $this->getSimpleHTMLDOM($article_uri) or $this->returnServerError('Could not request '.$this->getName().': '.$article_uri); //Build article contents from corresponding elements $article_title = trim($element->find('title', 0)->plaintext); @@ -68,4 +68,4 @@ class NakedSecurityBridge extends BridgeAbstract { } } } -} \ No newline at end of file +} diff --git a/bridges/NasaApodBridge.php b/bridges/NasaApodBridge.php index 8bf7d9bd..0e0e5f87 100644 --- a/bridges/NasaApodBridge.php +++ b/bridges/NasaApodBridge.php @@ -13,7 +13,7 @@ class NasaApodBridge extends BridgeAbstract{ public function collectData(array $param) { - $html = $this->file_get_html('http://apod.nasa.gov/apod/archivepix.html') or $this->returnServerError('Error while downloading the website content'); + $html = $this->getSimpleHTMLDOM('http://apod.nasa.gov/apod/archivepix.html') or $this->returnServerError('Error while downloading the website content'); $list = explode("
", $html->find('b', 0)->innertext); for($i = 0; $i < 3;$i++) @@ -25,7 +25,7 @@ class NasaApodBridge extends BridgeAbstract{ $uri = 'http://apod.nasa.gov/apod/'.$uri_page; $item->uri = $uri; - $picture_html = $this->file_get_html($uri); + $picture_html = $this->getSimpleHTMLDOM($uri); $picture_html_string = $picture_html->innertext; //Extract image and explanation diff --git a/bridges/NeuviemeArtBridge.php b/bridges/NeuviemeArtBridge.php index 96375ec0..b8a0ce8d 100644 --- a/bridges/NeuviemeArtBridge.php +++ b/bridges/NeuviemeArtBridge.php @@ -20,7 +20,7 @@ class NeuviemeArtBridge extends BridgeAbstract { } $feedUrl = 'http://www.9emeart.fr/9emeart.rss'; - $html = $this->file_get_html($feedUrl) or $this->returnServerError('Could not request 9eme Art: '.$feedUrl); + $html = $this->getSimpleHTMLDOM($feedUrl) or $this->returnServerError('Could not request 9eme Art: '.$feedUrl); $limit = 0; foreach ($html->find('item') as $element) { @@ -28,7 +28,7 @@ class NeuviemeArtBridge extends BridgeAbstract { //Retrieve article Uri and get that page $article_uri = $element->find('guid', 0)->plaintext; - $article_html = $this->file_get_html($article_uri) or $this->returnServerError('Could not request 9eme Art: '.$article_uri); + $article_html = $this->getSimpleHTMLDOM($article_uri) or $this->returnServerError('Could not request 9eme Art: '.$article_uri); //Build article contents from corresponding elements $article_title = trim($element->find('title', 0)->plaintext); diff --git a/bridges/NextInpactBridge.php b/bridges/NextInpactBridge.php index 5b121f7d..db74a244 100644 --- a/bridges/NextInpactBridge.php +++ b/bridges/NextInpactBridge.php @@ -16,7 +16,7 @@ class NextInpactBridge extends BridgeAbstract { } private function ExtractContent($url) { - $html2 = $this->file_get_html($url); + $html2 = $this->getSimpleHTLDOM($url); $text = '

'.$html2->find('span.sub_title', 0)->innertext.'

' .'

-

' .'
'.$html2->find('div[itemprop=articleBody]', 0)->innertext.'
'; @@ -27,7 +27,7 @@ class NextInpactBridge extends BridgeAbstract { } public function collectData(array $param) { - $html = $this->file_get_html('http://www.nextinpact.com/rss/news.xml') or $this->returnServerError('Could not request NextInpact.'); + $html = $this->getSimpleHTMLDOM('http://www.nextinpact.com/rss/news.xml') or $this->returnServerError('Could not request NextInpact.'); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/NiceMatinBridge.php b/bridges/NiceMatinBridge.php index 5960ffc5..aaf006c7 100644 --- a/bridges/NiceMatinBridge.php +++ b/bridges/NiceMatinBridge.php @@ -10,21 +10,21 @@ class NiceMatinBridge extends BridgeAbstract{ } private function NiceMatinExtractContent($url) { - $html = $this->file_get_html($url); + $html = $this->getSimpleHTMLDOM($url); if(!$html) $this->returnServerError('Could not acquire content from url: ' . $url . '!'); - + $content = $html->find('article', 0); if(!$content) $this->returnServerError('Could not find \'section\'!'); - + $text = preg_replace('#(.*?)#is', '', $content->innertext); $text = strip_tags($text, '

'); return $text; } public function collectData(array $param){ - $html = $this->file_get_html('http://www.nicematin.com/derniere-minute/rss') or $this->returnServerError('Could not request NiceMatin.'); + $html = $this->getSimpleHTMLDOM('http://www.nicematin.com/derniere-minute/rss') or $this->returnServerError('Could not request NiceMatin.'); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/NovelUpdatesBridge.php b/bridges/NovelUpdatesBridge.php index f7dedd13..2a5b9605 100644 --- a/bridges/NovelUpdatesBridge.php +++ b/bridges/NovelUpdatesBridge.php @@ -26,7 +26,7 @@ class NovelUpdatesBridge extends BridgeAbstract{ if(strpos($thread['path'], 'series/') === FALSE) $this->returnClientError('You must specify the novel URL.'); $url = 'http://www.novelupdates.com'.$thread['path'].''; - $fullhtml = $this->file_get_html($url) or $this->returnServerError("Could not request NovelUpdates, novel not found"); + $fullhtml = $this->getSimpleHTMLDOM($url) or $this->returnServerError("Could not request NovelUpdates, novel not found"); $this->request = $fullhtml->find('h4.seriestitle', 0)->plaintext; // dirty fix for nasty simpledom bug: https://github.com/sebsauvage/rss-bridge/issues/259 // forcefully removes tbody diff --git a/bridges/NumeramaBridge.php b/bridges/NumeramaBridge.php index cc949c2a..f5671b77 100644 --- a/bridges/NumeramaBridge.php +++ b/bridges/NumeramaBridge.php @@ -42,7 +42,6 @@ class NumeramaBridge extends BridgeAbstract{ $limit++; } } - } public function getCacheDuration() { diff --git a/bridges/OpenClassroomsBridge.php b/bridges/OpenClassroomsBridge.php index 4a0c3e0d..4c093ab9 100644 --- a/bridges/OpenClassroomsBridge.php +++ b/bridges/OpenClassroomsBridge.php @@ -64,11 +64,11 @@ class OpenClassroomsBridge extends BridgeAbstract{ { $this->returnServerError('Error: You must chose a category.'); } - + $html = ''; $link = 'https://openclassrooms.com/courses?categories='.$param['u'].'&title=&sort=updatedAt+desc'; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request OpenClassrooms.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request OpenClassrooms.'); foreach($html->find('.courseListItem') as $element) { $item = new \Item(); diff --git a/bridges/OpenTheoryBridge.php b/bridges/OpenTheoryBridge.php new file mode 100644 index 00000000..cccfaf36 --- /dev/null +++ b/bridges/OpenTheoryBridge.php @@ -0,0 +1,56 @@ +maintainer = "qwertygc"; + $this->name = "Opentheory"; + $this->uri = "http://open1theory.com"; + $this->description = "Returns the 5 newest posts from OpenTheory (full text)"; + $this->update = "02-08-2014"; + + } + + public function collectData(array $param){ + + function StripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + function ExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.entry-content', 0)->innertext; + $text = preg_replace('@]*?>.*?@si', '', $text); + return $text; + } + $html = $this->getSimpleHTMLDOM('http://open1theory.com/feed') or $this->returnError('Could not request OpenTheory.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 5) { + $item = new \Item(); + $item->title = StripCDATA($element->find('title', 0)->innertext); + $item->uri = StripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'OpenTheory'; + } + + public function getURI(){ + return 'http://open1theory.com/feed'; + } + + public function getCacheDuration(){ + return 3600; // 1 hour + // return 0; // 1 hour + } +} diff --git a/bridges/ParuVenduImmoBridge.php b/bridges/ParuVenduImmoBridge.php index 97285a50..65947df7 100644 --- a/bridges/ParuVenduImmoBridge.php +++ b/bridges/ParuVenduImmoBridge.php @@ -47,7 +47,7 @@ class ParuVenduImmoBridge extends BridgeAbstract $appartment = '&tbApp=1&tbDup=1&tbChb=1&tbLof=1&tbAtl=1&tbPla=1'; $maison = '&tbMai=1&tbVil=1&tbCha=1&tbPro=1&tbHot=1&tbMou=1&tbFer=1'; $link = $this->uri.'/immobilier/annonceimmofo/liste/listeAnnonces?tt=1'.$appartment.$maison; - + if (isset($param['minarea'])) { $this->request .= ' '.$param['minarea'].' m2'; $link .= '&sur0='.urlencode($param['minarea']); @@ -56,39 +56,39 @@ class ParuVenduImmoBridge extends BridgeAbstract if (isset($param['maxprice'])) { $link .= '&px1='.urlencode($param['maxprice']); } - + if (isset($param['pa'])) { $link .= '&pa='.urlencode($param['pa']); } - + if (isset($param['lo'])) { $this->request .= ' In: '.$param['lo']; $link .= '&lo='.urlencode($param['lo']); } - $html = $this->file_get_html($link) or $this->returnServerError('Could not request paruvendu.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request paruvendu.'); foreach($html->find('div.annonce a') as $element) { - + if (!$element->title) { continue; } - + $img =''; foreach($element->find('span.img img') as $img) { if ($img->original) { $img = ''; } } - + $desc = $element->find('span.desc')[0]->innertext; $desc = str_replace("voir l'annonce", '', $desc); - + $price = $element->find('span.price')[0]->innertext; list($href) = explode('#', $element->href); - + $item = new \Item(); $item->uri = $this->uri.$href; $item->title = $element->title; diff --git a/bridges/PickyWallpapersBridge.php b/bridges/PickyWallpapersBridge.php index 2e44cb22..a4feffec 100644 --- a/bridges/PickyWallpapersBridge.php +++ b/bridges/PickyWallpapersBridge.php @@ -55,7 +55,7 @@ class PickyWallpapersBridge extends BridgeAbstract { for ($page = 1; $page <= $lastpage; $page++) { $link = $baseUri.'/'.$this->resolution.'/'.$this->category.'/'.(!empty($this->subcategory)?$this->subcategory.'/':'').'page-'.$page.'/'; - $html = $this->file_get_html($link) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('No results for this query.'); if ($page === 1) { preg_match('/page-(\d+)\/$/', $html->find('.pages li a', -2)->href, $matches); diff --git a/bridges/PinterestBridge.php b/bridges/PinterestBridge.php index 70b41057..1012ea2a 100644 --- a/bridges/PinterestBridge.php +++ b/bridges/PinterestBridge.php @@ -38,7 +38,7 @@ class PinterestBridge extends BridgeAbstract{ public function collectData(array $param){ $html = ''; if (isset($param['u']) || isset($param['b'])) { - + if (empty($param['u'])) { $this->returnClientError('You must specify a Pinterest username (?u=...).'); @@ -48,33 +48,33 @@ class PinterestBridge extends BridgeAbstract{ { $this->returnClientError('You must specify a Pinterest board for this username (?b=...).'); } - + $this->username = $param['u']; $this->board = $param['b']; - $html = $this->file_get_html($this->getURI().'/'.urlencode($this->username).'/'.urlencode($this->board)) or $this->returnServerError('Username and/or board not found'); + $html = $this->getSimpleHTMLDOM($this->getURI().'/'.urlencode($this->username).'/'.urlencode($this->board)) or $this->returnServerError('Username and/or board not found'); } else if (isset($param['q'])) { $this->query = $param['q']; - $html = $this->file_get_html($this->getURI().'/search/?q='.urlencode($this->query)) or $this->returnServerError('Could not request Pinterest.'); + $html = $this->getSimpleHTMLDOM($this->getURI().'/search/?q='.urlencode($this->query)) or $this->returnServerError('Could not request Pinterest.'); } - + else { $this->returnClientError('You must specify a Pinterest username and a board name (?u=...&b=...).'); } - - + + foreach($html->find('div.pinWrapper') as $div) { $a = $div->find('a.pinImageWrapper',0); - + $img = $a->find('img', 0); - + $item = new \Item(); $item->uri = $this->getURI().$a->getAttribute('href'); $item->content = ''; - - + + if (isset($this->query)) { $avatar = $div->find('div.creditImg', 0)->find('img', 0); @@ -84,25 +84,25 @@ class PinterestBridge extends BridgeAbstract{ $username = $div->find('div.creditName', 0); $board = $div->find('div.creditTitle', 0); - - $item->username =$username->innertext; + + $item->username =$username->innertext; $item->fullname = $board->innertext; $item->avatar = $avatar; - + $item->content .= '
'.$item->username.''; $item->content .= '
'.$item->fullname; } - + $item->title = $img->getAttribute('alt'); - + //$item->timestamp = $media->created_time; $this->items[] = $item; - + } } public function getName(){ - + if (isset($this->query)) { return $this->query .' - Pinterest'; diff --git a/bridges/PlanetLibreBridge.php b/bridges/PlanetLibreBridge.php index fc536cd1..28a7a680 100644 --- a/bridges/PlanetLibreBridge.php +++ b/bridges/PlanetLibreBridge.php @@ -10,13 +10,13 @@ class PlanetLibreBridge extends BridgeAbstract{ } private function PlanetLibreExtractContent($url){ - $html2 = $this->file_get_html($url); + $html2 = $this->geSimpleHTMLDOM($url); $text = $html2->find('div[class="post-text"]', 0)->innertext; return $text; } public function collectData(array $param){ - $html = $this->file_get_html('http://www.planet-libre.org/') or $this->returnServerError('Could not request PlanetLibre.'); + $html = $this->getSimpleHTMLDOM('http://www.planet-libre.org/') or $this->returnServerError('Could not request PlanetLibre.'); $limit = 0; foreach($html->find('div.post') as $element) { if($limit < 5) { diff --git a/bridges/ProjectMGameBridge.php b/bridges/ProjectMGameBridge.php index 865b38db..080029d3 100644 --- a/bridges/ProjectMGameBridge.php +++ b/bridges/ProjectMGameBridge.php @@ -14,7 +14,7 @@ class ProjectMGameBridge extends BridgeAbstract{ public function collectData(array $param){ $html = ''; - $html = $this->file_get_html('http://projectmgame.com/en/') or $this->returnServerError('Error while downloading the Project M homepage'); + $html = $this->getSimpleHTMLDOM('http://projectmgame.com/en/') or $this->returnServerError('Error while downloading the Project M homepage'); foreach($html->find('article') as $article) { $item = new \Item(); diff --git a/bridges/RTBFBridge.php b/bridges/RTBFBridge.php index 433792e6..490ed7a4 100644 --- a/bridges/RTBFBridge.php +++ b/bridges/RTBFBridge.php @@ -25,7 +25,7 @@ class RTBFBridge extends BridgeAbstract { $count = 0; if (isset($param['c'])) { - $html = $this->file_get_html('http://www.rtbf.be/auvio/emissions/detail?id='.$param['c']) or $this->returnServerError('Could not request RTBF.'); + $html = $this->getSimpleHTMLDOM('http://www.rtbf.be/auvio/emissions/detail?id='.$param['c']) or $this->returnServerError('Could not request RTBF.'); foreach($html->find('section[id!=widget-ml-avoiraussi-] .rtbf-media-grid article') as $element) { if($count < $limit) { diff --git a/bridges/RaymondBridge.php b/bridges/RaymondBridge.php new file mode 100644 index 00000000..d2550520 --- /dev/null +++ b/bridges/RaymondBridge.php @@ -0,0 +1,53 @@ +maintainer = "pit-fgfjiudghdf"; + $this->name = "Raymond"; + $this->uri = "http://www.raymond.cc"; + $this->description = "Returns the 3 newest posts from Raymond.cc (full text)"; + $this->update = "2014-05-26"; + + } + + public function collectData(array $param){ + function raymondStripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + function raymondExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.entry-content', 0)->innertext; + $text = preg_replace('/class="ad".*/', '', $text); + $text = strip_tags($text, '

'); + $text = str_replace('(adsbygoogle = window.adsbygoogle || []).push({});', '', $text); + return $text; + } + $html = $this->getSimpleHTMLDOM('http://www.raymond.cc/blog/feed') or $this->returnError('Could not request raymond.', 404); + $limit = 0; + foreach($html->find('item') as $element) { + if($limit < 3) { + $item = new \Item(); + $item->title = raymondStripCDATA($element->find('title', 0)->innertext); + $item->uri = raymondStripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = raymondExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + public function getName(){ + return 'raymond'; + } + public function getURI(){ + return 'http://www.raymond.cc/blog'; + } + public function getCacheDuration(){ + return 3600*12; // 12 hour + } +} + diff --git a/bridges/Releases3DSBridge.php b/bridges/Releases3DSBridge.php index 8031582d..2a563f68 100644 --- a/bridges/Releases3DSBridge.php +++ b/bridges/Releases3DSBridge.php @@ -66,7 +66,7 @@ class Releases3DSBridge extends BridgeAbstract { //Retrieve cover art and short desc from IGN? $ignResult = false; $ignDescription = ''; $ignLink = ''; $ignDate = time(); $ignCoverArt = ''; $ignSearchUrl = 'http://www.ign.com/search?q='.urlencode($name); - if ($ignResult = $this->file_get_html($ignSearchUrl)) { + if ($ignResult = $this->getSimpleHTMLDOM($ignSearchUrl)) { $ignCoverArt = $ignResult->find('div.search-item-media', 0)->find('img', 0)->src; $ignDesc = $ignResult->find('div.search-item-description', 0)->plaintext; $ignLink = $ignResult->find('div.search-item-sub-title', 0)->find('a', 1)->href; diff --git a/bridges/ReporterreBridge.php b/bridges/ReporterreBridge.php index 8c1f553b..e93cec48 100644 --- a/bridges/ReporterreBridge.php +++ b/bridges/ReporterreBridge.php @@ -10,7 +10,7 @@ class ReporterreBridge extends BridgeAbstract{ } private function ExtractContentReporterre($url) { - $html2 = $this->file_get_html($url); + $html2 = $this->getSimpleHTMLDOM($url); foreach($html2->find('div[style=text-align:justify]') as $e) { $text = $e->outertext; @@ -27,7 +27,7 @@ class ReporterreBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://www.reporterre.net/spip.php?page=backend') or $this->returnServerError('Could not request Reporterre.'); + $html = $this->getSimpleHTMLDOM('http://www.reporterre.net/spip.php?page=backend') or $this->returnServerError('Could not request Reporterre.'); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/Rue89Bridge.php b/bridges/Rue89Bridge.php index 54c86d4e..5dd813cb 100644 --- a/bridges/Rue89Bridge.php +++ b/bridges/Rue89Bridge.php @@ -22,7 +22,7 @@ class Rue89Bridge extends BridgeAbstract{ public function collectData(array $param){ - $html = $this->file_get_html('http://api.rue89.nouvelobs.com/feed') or $this->returnServerError('Could not request Rue89.'); + $html = $this->getSimpleHTMLDOM('http://api.rue89.nouvelobs.com/feed') or $this->returnServerError('Could not request Rue89.'); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/Rule34Bridge.php b/bridges/Rule34Bridge.php index b868d910..b0d5f888 100644 --- a/bridges/Rule34Bridge.php +++ b/bridges/Rule34Bridge.php @@ -25,27 +25,27 @@ class Rule34Bridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); $page = $page - 1; $page = $page * 50; } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://rule34.xxx/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Rule34.'); + $html = $this->getSimpleHTMLDOM("http://rule34.xxx/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Rule34.'); foreach($html->find('div[class=content] span') as $element) { $item = new \Item(); $item->uri = 'http://rule34.xxx/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); $item->timestamp = time(); $thumbnailUri = $element->find('img', 0)->src; $item->tags = $element->find('img', 0)->getAttribute('alt'); $item->title = 'Rule34 | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/Rule34pahealBridge.php b/bridges/Rule34pahealBridge.php index 77b03191..42014df8 100644 --- a/bridges/Rule34pahealBridge.php +++ b/bridges/Rule34pahealBridge.php @@ -26,25 +26,25 @@ class Rule34pahealBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://rule34.paheal.net/post/list/$tags/$page") or $this->returnServerError('Could not request Rule34paheal.'); + $html = $this->getSimpleHTMLDOM("http://rule34.paheal.net/post/list/$tags/$page") or $this->returnServerError('Could not request Rule34paheal.'); foreach($html->find('div[class=shm-image-list] div[class=shm-thumb]') as $element) { $item = new \Item(); $item->uri = 'http://rule34.paheal.net'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->find('img', 0)->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->find('img', 0)->getAttribute('id')); $item->timestamp = time(); $thumbnailUri = $element->find('img', 0)->src; $item->tags = $element->getAttribute('data-tags'); $item->title = 'Rule34paheal | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/SafebooruBridge.php b/bridges/SafebooruBridge.php index cd0f1a38..b4b45f5c 100644 --- a/bridges/SafebooruBridge.php +++ b/bridges/SafebooruBridge.php @@ -26,27 +26,27 @@ class SafebooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); $page = $page - 1; $page = $page * 40; } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://safebooru.org/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Safebooru.'); + $html = $this->getSimpleHTMLDOM("http://safebooru.org/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Safebooru.'); foreach($html->find('div[class=content] span') as $element) { $item = new \Item(); $item->uri = 'http://safebooru.org/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); $item->timestamp = time(); $thumbnailUri = $element->find('img', 0)->src; $item->tags = $element->find('img', 0)->getAttribute('alt'); $item->title = 'Safebooru | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/SakugabooruBridge.php b/bridges/SakugabooruBridge.php index 8e7e3aad..aea71336 100644 --- a/bridges/SakugabooruBridge.php +++ b/bridges/SakugabooruBridge.php @@ -25,18 +25,18 @@ class SakugabooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 1;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://sakuga.yshi.org/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Sakugabooru.'); + $html = $this->getSimpleHTMLDOM("http://sakuga.yshi.org/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Sakugabooru.'); $input_json = explode('Post.register(', $html); foreach($input_json as $element) $data[] = preg_replace('/}\)(.*)/', '}', $element); unset($data[0]); - + foreach($data as $datai) { $json = json_decode($datai, TRUE); $item = new \Item(); @@ -45,7 +45,7 @@ class SakugabooruBridge extends BridgeAbstract{ $item->timestamp = $json['created_at']; $item->imageUri = $json['file_url']; $item->title = 'Sakugabooru | '.$json['id']; - $item->content = '
Tags: '.$json['tags']; + $item->content = '
Tags: '.$json['tags']; $this->items[] = $item; } } diff --git a/bridges/ScilogsBridge.php b/bridges/ScilogsBridge.php new file mode 100644 index 00000000..7186be41 --- /dev/null +++ b/bridges/ScilogsBridge.php @@ -0,0 +1,54 @@ +maintainer = "qwertygc"; + $this->name = "Scilogs Bridge"; + $this->uri = "http://www.scilogs.fr/"; + $this->description = "Returns the newest articles."; + $this->update = "2014-05-25"; + + } + + public function collectData(array $param){ + + function ScilogsStripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + function ScilogsExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.entrybody', 0)->innertext; + return $text; + } + $html = $this->getSimpleHTMLDOM('http://www.scilogs.fr/?wpmu-feed=posts') or $this->returnError('Could not request Scilogs.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 10) { + $item = new \Item(); + $item->title = ScilogsStripCDATA($element->find('title', 0)->innertext); + $item->uri = ScilogsStripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = ScilogsExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'Scilogs Bridge'; + } + + public function getURI(){ + return 'http://scilogs.fr/'; + } + + public function getCacheDuration(){ + return 3600*2; // 2 hours + } +} diff --git a/bridges/ScmbBridge.php b/bridges/ScmbBridge.php index 864d39c8..e2f631fc 100644 --- a/bridges/ScmbBridge.php +++ b/bridges/ScmbBridge.php @@ -10,20 +10,20 @@ class ScmbBridge extends BridgeAbstract{ $this->update = '2016-08-17'; } - + public function collectData(array $param){ $html = ''; - $html = $this->file_get_html('http://secouchermoinsbete.fr/') or $this->returnServerError('Could not request Se Coucher Moins Bete.'); - + $html = $this->getSimpleHTMLDOM('http://secouchermoinsbete.fr/') or $this->returnServerError('Could not request Se Coucher Moins Bete.'); + foreach($html->find('article') as $article) { $item = new \Item(); $item->uri = 'http://secouchermoinsbete.fr'.$article->find('p.summary a',0)->href; $item->title = $article->find('header h1 a',0)->innertext; - + $article->find('span.read-more',0)->outertext=''; // remove text "En savoir plus" from anecdote content $content = $article->find('p.summary a',0)->innertext; $content =substr($content,0,strlen($content)-17); // remove superfluous spaces at the end - + // get publication date $str_date = $article->find('time',0)->datetime; list($date, $time) = explode(' ', $str_date); @@ -31,8 +31,8 @@ class ScmbBridge extends BridgeAbstract{ list($h, $i) = explode(':', $time); $timestamp = mktime($h,$i,0,$m,$d,$y); $item->timestamp = $timestamp; - - + + $item->content = $content; $this->items[] = $item; } diff --git a/bridges/ScoopItBridge.php b/bridges/ScoopItBridge.php index ba3fc0b5..d7b4ed3d 100644 --- a/bridges/ScoopItBridge.php +++ b/bridges/ScoopItBridge.php @@ -24,9 +24,9 @@ class ScoopItBridge extends BridgeAbstract{ if ($param['u'] != '') { $this->request = $param['u']; $link = 'http://scoop.it/search?q=' .urlencode($this->request); - - $html = $this->file_get_html($link) or $this->returnServerError('Could not request ScoopIt. for : ' . $link); - + + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request ScoopIt. for : ' . $link); + foreach($html->find('div.post-view') as $element) { $item = new Item(); $item->uri = $element->find('a', 0)->href; diff --git a/bridges/SegfaultMintBridge.php b/bridges/SegfaultMintBridge.php new file mode 100644 index 00000000..b2ede5d0 --- /dev/null +++ b/bridges/SegfaultMintBridge.php @@ -0,0 +1,55 @@ +maintainer = "qwertygc"; + $this->name = "SegfaultMint"; + $this->uri = "http://segfault.linuxmint.com/"; + $this->description = "Returns the 5 newest posts from SegfaultMint (full text)"; + $this->update = "2014-07-05"; + + } + + public function collectData(array $param){ + + function StripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + function ExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('div.post-bodycopy', 0)->innertext; + $text = preg_replace('@]*?>.*?@si', '', $text); + return $text; + } + $html = $this->getSimpleHTMLDOM('http://segfault.linuxmint.com/feed/') or $this->returnError('Could not request segfault.', 404); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 5) { + $item = new \Item(); + $item->title = StripCDATA($element->find('title', 0)->innertext); + $item->uri = StripCDATA($element->find('guid', 0)->plaintext); + $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $item->content = ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName(){ + return 'Segfault Mint'; + } + + public function getURI(){ + return 'http://segfault.linuxmint.com/feed/'; + } + + public function getCacheDuration(){ + return 3600*24; // 24 hours + } +} diff --git a/bridges/Sexactu.php b/bridges/Sexactu.php index 9bb82c9a..2972ede1 100644 --- a/bridges/Sexactu.php +++ b/bridges/Sexactu.php @@ -15,10 +15,10 @@ class Sexactu extends BridgeAbstract{ $find = array('janvier', 'février', 'mars', 'avril', 'mai', 'juin', 'juillet', 'août', 'septembre', 'novembre', 'décembre'); $replace = array('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'); - $html = $this->file_get_html($this->getURI()) or $this->returnServerError('Could not request '.$this->getURI()); + $html = $this->getSimpleHTMLDOM($this->getURI()) or $this->returnServerError('Could not request '.$this->getURI()); foreach($html->find('.content-holder') as $contentHolder) { - // only use first list as second one only contains pages numbers + // only use first list as second one only contains pages numbers $articles = $contentHolder->find('ul', 0); foreach($articles->find('li') as $element) { // if you ask about that method_exists, there seems to be a bug in simple html dom @@ -38,7 +38,7 @@ $replace = array('January', 'February', 'March', 'April', 'May', 'June', 'July', $dateText = $titleTimestamp->innertext; $dateText = substr($dateText, strpos($dateText,',')+1); $dateText = str_replace($find, $replace, strtolower($dateText)); - $date = strtotime($dateText); + $date = strtotime($dateText); $item->timestamp = $date; $item->author = "Maïa Mazaurette"; @@ -50,9 +50,9 @@ $replace = array('January', 'February', 'March', 'April', 'May', 'June', 'July', $item->content = $elementText->innertext; $this->items[] = $item; } - + } - + } } } @@ -64,7 +64,7 @@ $replace = array('January', 'February', 'March', 'April', 'May', 'June', 'July', public function getCacheDuration(){ return 7200; // 2h hours } - + private function correctCase($str) { $sentences=explode('.', mb_strtolower($str, "UTF-8")); $str=""; @@ -73,7 +73,7 @@ $replace = array('January', 'February', 'March', 'April', 'May', 'June', 'July', { //upper case first char $sentence=ucfirst(trim($sentence)); - + //append sentence to output $str=$str.$sep.$sentence; $sep=". "; diff --git a/bridges/SiliconBridge.php b/bridges/SiliconBridge.php index df582bb5..e1bf84ae 100644 --- a/bridges/SiliconBridge.php +++ b/bridges/SiliconBridge.php @@ -20,7 +20,7 @@ class SiliconBridge extends BridgeAbstract { } $feedUrl = 'http://www.silicon.fr/feed'; - $html = $this->file_get_html($feedUrl) or $this->returnServerError('Could not request Silicon: '.$feedUrl); + $html = $this->getSimpleHTMLDOM($feedUrl) or $this->returnServerError('Could not request Silicon: '.$feedUrl); $limit = 0; foreach($html->find('item') as $element) { @@ -30,7 +30,7 @@ class SiliconBridge extends BridgeAbstract { $article_uri = $element->innertext; $article_uri = substr($article_uri, strpos($article_uri, '') + 6); $article_uri = substr($article_uri, 0, strpos($article_uri, '')); - $article_html = $this->file_get_html($article_uri) or $this->returnServerError('Could not request Silicon: '.$article_uri); + $article_html = $this->getSimpleHTMLDOM($article_uri) or $this->returnServerError('Could not request Silicon: '.$article_uri); //Build article contents from corresponding elements $thumbnailUri = $element->find('enclosure', 0)->url; diff --git a/bridges/StripeAPIChangeLogBridge.php b/bridges/StripeAPIChangeLogBridge.php index 0fb1d85c..334955f1 100644 --- a/bridges/StripeAPIChangeLogBridge.php +++ b/bridges/StripeAPIChangeLogBridge.php @@ -16,7 +16,7 @@ class StripeAPIChangeLogBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('https://stripe.com/docs/upgrades') + $html = $this->getSimpleHTMLDOM('https://stripe.com/docs/upgrades') or $this->returnServerError('No results for Stripe API Changelog'); diff --git a/bridges/SuperbWallpapersBridge.php b/bridges/SuperbWallpapersBridge.php index 36928be4..eb54dea5 100644 --- a/bridges/SuperbWallpapersBridge.php +++ b/bridges/SuperbWallpapersBridge.php @@ -47,12 +47,12 @@ class SuperbWallpapersBridge extends BridgeAbstract { // Get last page number $link = $baseUri.'/'.$this->category.'/9999.html'; - $html = $this->file_get_html($link); + $html = $this->getSimpleHTMLDOM($link); $lastpage = min($html->find('.paging .cpage', 0)->innertext(), ceil($max/36)); for ($page = 1; $page <= $lastpage; $page++) { $link = $baseUri.'/'.$this->category.'/'.$page.'.html'; - $html = $this->file_get_html($link) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('No results for this query.'); foreach($html->find('.wpl .i a') as $element) { $thumbnail = $element->find('img', 0); diff --git a/bridges/T411Bridge.php b/bridges/T411Bridge.php index 22f02c86..83f7a309 100644 --- a/bridges/T411Bridge.php +++ b/bridges/T411Bridge.php @@ -36,7 +36,7 @@ class T411Bridge extends BridgeAbstract { //Retrieve torrent listing from search results, which does not contain torrent description $url = $this->uri.'torrents/search/?'.$param['search'].'&order=added&type=desc'; - $html = $this->file_get_html($url) or $this->returnServerError('Could not request t411: '.$url); + $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request t411: '.$url); $results = $html->find('table.results', 0); if (is_null($results)) $this->returnServerError('No results from t411: '.$url); @@ -57,7 +57,7 @@ class T411Bridge extends BridgeAbstract { $item_date = strtotime($element->find('dd', 0)->plaintext); //Retrieve full description from torrent page - if ($item_html = $this->file_get_html($item_uri)) { + if ($item_html = $this->getSimpleHTMLDOM($item_uri)) { //Retrieve data from page contents $item_desc = $item_html->find('div.description', 0); diff --git a/bridges/TagBoardBridge.php b/bridges/TagBoardBridge.php index 7382a526..f3bd4c99 100644 --- a/bridges/TagBoardBridge.php +++ b/bridges/TagBoardBridge.php @@ -23,8 +23,8 @@ class TagBoardBridge extends BridgeAbstract{ $html = ''; $this->request = $param['u']; $link = 'https://post-cache.tagboard.com/search/' .$this->request; - - $html = $this->file_get_html($link) or $this->returnServerError('Could not request TagBoard for : ' . $link); + + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request TagBoard for : ' . $link); $parsed_json = json_decode($html); foreach($parsed_json->{'posts'} as $element) { @@ -49,4 +49,4 @@ class TagBoardBridge extends BridgeAbstract{ return 21600; // 6 hours } } - + diff --git a/bridges/TbibBridge.php b/bridges/TbibBridge.php index aeacdd66..59fa3e3e 100644 --- a/bridges/TbibBridge.php +++ b/bridges/TbibBridge.php @@ -25,27 +25,27 @@ class TbibBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); $page = $page - 1; $page = $page * 50; } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://tbib.org/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Tbib.'); + $html = $this->getSimpleHTMLDOM("http://tbib.org/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Tbib.'); foreach($html->find('div[class=content] span') as $element) { $item = new \Item(); $item->uri = 'http://tbib.org/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); $item->timestamp = time(); $thumbnailUri = $element->find('img', 0)->src; $item->tags = $element->find('img', 0)->getAttribute('alt'); $item->title = 'Tbib | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/TheCodingLoveBridge.php b/bridges/TheCodingLoveBridge.php index b8ca7c5a..f5d4b7fa 100644 --- a/bridges/TheCodingLoveBridge.php +++ b/bridges/TheCodingLoveBridge.php @@ -12,17 +12,17 @@ class TheCodingLoveBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = $this->file_get_html('http://thecodinglove.com/') or $this->returnServerError('Could not request The Coding Love.'); - + $html = $this->getSimpleHTMLDOM('http://thecodinglove.com/') or $this->returnServerError('Could not request The Coding Love.'); + foreach($html->find('div.post') as $element) { $item = new Item(); $temp = $element->find('h3 a', 0); - + $titre = $temp->innertext; $url = $temp->href; - + $temp = $element->find('div.bodytype', 0); - + // retrieve .gif instead of static .jpg $images = $temp->find('p.e img'); foreach($images as $image){ @@ -33,18 +33,18 @@ class TheCodingLoveBridge extends BridgeAbstract{ $auteur = $temp->find('i', 0); $pos = strpos($auteur->innertext, "by"); - + if($pos > 0) { $auteur = trim(str_replace("*/", "", substr($auteur->innertext, ($pos + 2)))); $item->author = $auteur; } - - + + $item->content .= trim($content); $item->uri = $url; $item->title = trim($titre); - + $this->items[] = $item; } } diff --git a/bridges/ThePirateBayBridge.php b/bridges/ThePirateBayBridge.php index 35b1391c..4f5650d0 100644 --- a/bridges/ThePirateBayBridge.php +++ b/bridges/ThePirateBayBridge.php @@ -58,9 +58,9 @@ class ThePirateBayBridge extends BridgeAbstract{ if (!isset($param['q'])) $this->returnClientError('You must specify keywords (?q=...)'); - $keywordsList = explode(";",$param['q']); + $keywordsList = explode(";",$param['q']); foreach($keywordsList as $keywords){ - $html = $this->file_get_html('https://thepiratebay.org/search/'.rawurlencode($keywords).'/0/3/0') or $this->returnServerError('Could not request TPB.'); + $html = $this->getSimpleHTMLDOM('https://thepiratebay.org/search/'.rawurlencode($keywords).'/0/3/0') or $this->returnServerError('Could not request TPB.'); if ($html->find('table#searchResult', 0) == FALSE) $this->returnServerError('No result for query '.$keywords); diff --git a/bridges/TuxboardBridge.php b/bridges/TuxboardBridge.php new file mode 100644 index 00000000..301e9b63 --- /dev/null +++ b/bridges/TuxboardBridge.php @@ -0,0 +1,64 @@ +maintainer = "superbaillot.net"; + $this->name = "Tuxboard"; + $this->uri = "http://www.tuxboard.com/"; + $this->description = "Tuxboard"; + $this->update = "2014-07-08"; + + } + + public function collectData(array $param){ + + function StripCDATA($string) { + $string = str_replace('', '', $string); + return $string; + } + + function ExtractContent($url) { + $html2 = $this->getSimpleHTMLDOM($url); + $text = $html2->find('article#page', 0)->innertext; + $text = preg_replace('@]*?>.*?@si', '', $text); + return $text; + } + + $html = $this->getSimpleHTMLDOM('http://www.tuxboard.com/feed/atom/') or $this->returnError('Could not request Tuxboard.', 404); + $limit = 0; + + foreach($html->find('entry') as $element) { + if($limit < 10) { + $item = new \Item(); + $item->title = StripCDATA($element->find('title', 0)->innertext); + $item->uri = $element->find('link', 0)->href; + $item->timestamp = strtotime($element->find('published', 0)->plaintext); + $item->content = ExtractContent($item->uri); + $this->items[] = $item; + $limit++; + } + } + + + + } + + public function getName(){ + return 'Tuxboard'; + } + + public function getURI(){ + return 'http://www.tuxboard.com'; + } + + public function getDescription(){ + return 'Tuxboard via rss-bridge'; + } + + public function getCacheDuration(){ + return 3600; // 1 hour + } +} +?> diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index 24970ca9..85108068 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -9,7 +9,7 @@ class TwitterBridge extends BridgeAbstract{ $this->description = "Returns tweets by keyword/hashtag or user name"; $this->update = '2016-08-17'; - $this->parameters["global"] = + $this->parameters["global"] = '[ { "name" : "Hide profile pictures", @@ -48,12 +48,12 @@ class TwitterBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = ''; + $html = ''; if (isset($param['q'])) { /* keyword search mode */ - $html = $this->file_get_html('https://twitter.com/search?q='.urlencode($param['q']).'&f=tweets') or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('https://twitter.com/search?q='.urlencode($param['q']).'&f=tweets') or $this->returnServerError('No results for this query.'); } elseif (isset($param['u'])) { /* user timeline mode */ - $html = $this->file_get_html('https://twitter.com/'.urlencode($param['u']).'/with_replies') or $this->returnServerError('Requested username can\'t be found.'); + $html = $this->getSimpleHTMLDOM('https://twitter.com/'.urlencode($param['u']).'/with_replies') or $this->returnServerError('Requested username can\'t be found.'); } else { $this->returnClientError('You must specify a keyword (?q=...) or a Twitter username (?u=...).'); @@ -68,20 +68,20 @@ class TwitterBridge extends BridgeAbstract{ // extract username and sanitize $item->username = $tweet->getAttribute('data-screen-name'); // extract fullname (pseudonym) - $item->fullname = $tweet->getAttribute('data-name'); + $item->fullname = $tweet->getAttribute('data-name'); // get author $item->author = $item->fullname . ' (@' . $item->username . ')'; // get avatar link - $item->avatar = $tweet->find('img', 0)->src; + $item->avatar = $tweet->find('img', 0)->src; // get TweetID $item->id = $tweet->getAttribute('data-tweet-id'); - // get tweet link - $item->uri = 'https://twitter.com'.$tweet->find('a.js-permalink', 0)->getAttribute('href'); + // get tweet link + $item->uri = 'https://twitter.com'.$tweet->find('a.js-permalink', 0)->getAttribute('href'); // extract tweet timestamp $item->timestamp = $tweet->find('span.js-short-timestamp', 0)->getAttribute('data-time'); // generate the title - $item->title = strip_tags($tweet->find('p.js-tweet-text', 0)->innertext); - + $item->title = strip_tags($tweet->find('p.js-tweet-text', 0)->innertext); + // processing content links foreach($tweet->find('a') as $link) { if($link->hasAttribute('data-expanded-url') ) { diff --git a/bridges/UnsplashBridge.php b/bridges/UnsplashBridge.php index adf75440..492de7bd 100644 --- a/bridges/UnsplashBridge.php +++ b/bridges/UnsplashBridge.php @@ -44,7 +44,7 @@ class UnsplashBridge extends BridgeAbstract { for ($page = 1; $page <= $lastpage; $page++) { $link = $baseUri.'/grid?page='.$page; - $html = $this->file_get_html($link) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('No results for this query.'); if ($page === 1) { preg_match('/=(\d+)$/', $html->find('.pagination > a[!class]', -1)->href, $matches); diff --git a/bridges/ViadeoCompany.php b/bridges/ViadeoCompany.php index fb4badbb..2eef3b6e 100644 --- a/bridges/ViadeoCompany.php +++ b/bridges/ViadeoCompany.php @@ -22,7 +22,7 @@ class ViadeoCompany extends BridgeAbstract{ $html = ''; $link = 'http://www.viadeo.com/fr/company/'.$param[c]; - $html = $this->file_get_html($link) or $this->returnServerError('Could not request Viadeo.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('Could not request Viadeo.'); foreach($html->find('//*[@id="company-newsfeed"]/ul/li') as $element) { $title = $element->find('p', 0)->innertext; diff --git a/bridges/VineBridge.php b/bridges/VineBridge.php index b8714441..8cfb5527 100644 --- a/bridges/VineBridge.php +++ b/bridges/VineBridge.php @@ -24,7 +24,7 @@ class VineBridge extends BridgeAbstract { $html = ''; $uri = 'http://vine.co/u/'.$param['u'].'?mode=list'; - $html = $this->file_get_html($uri) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($uri) or $this->returnServerError('No results for this query.'); foreach($html->find('.post') as $element) { $a = $element->find('a', 0); diff --git a/bridges/WallpaperStopBridge.php b/bridges/WallpaperStopBridge.php index 1888333f..36ba6745 100644 --- a/bridges/WallpaperStopBridge.php +++ b/bridges/WallpaperStopBridge.php @@ -56,7 +56,7 @@ class WallpaperStopBridge extends BridgeAbstract { for ($page = 1; $page <= $lastpage; $page++) { $link = $baseUri.'/'.$this->category.'-wallpaper/'.(!empty($this->subcategory)?$this->subcategory.'-wallpaper/':'').'desktop-wallpaper-'.$page.'.html'; - $html = $this->file_get_html($link) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM($link) or $this->returnServerError('No results for this query.'); if ($page === 1) { preg_match('/-(\d+)\.html$/', $html->find('.pagination > .last', 0)->href, $matches); diff --git a/bridges/WhydBridge.php b/bridges/WhydBridge.php index b7739e48..c32ca8f6 100644 --- a/bridges/WhydBridge.php +++ b/bridges/WhydBridge.php @@ -28,18 +28,18 @@ class WhydBridge extends BridgeAbstract{ { $this->request = $param['u']; if (strlen(preg_replace("/[^0-9a-f]/",'', $this->request)) == 24) { // is input the userid ? - $html = $this->file_get_html('http://www.whyd.com/u/'.preg_replace("/[^0-9a-f]/",'', $this->request)) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('http://www.whyd.com/u/'.preg_replace("/[^0-9a-f]/",'', $this->request)) or $this->returnServerError('No results for this query.'); } else { // input may be the username - $html = $this->file_get_html('http://www.whyd.com/search?q='.urlencode($this->request)) or $this->returnServerError('No results for this query.'); + $html = $this->getSimpleHTMLDOM('http://www.whyd.com/search?q='.urlencode($this->request)) or $this->returnServerError('No results for this query.'); for ($j = 0; $j < 5; $j++) { if (strtolower($html->find('div.user', $j)->find('a',0)->plaintext) == strtolower($this->request)) { - $html = $this->file_get_html('http://www.whyd.com' . $html->find('div.user', $j)->find('a', 0)->getAttribute('href')) or $this->returnServerError('No results for this query'); + $html = $this->getSimpleHTMLDOM('http://www.whyd.com' . $html->find('div.user', $j)->find('a', 0)->getAttribute('href')) or $this->returnServerError('No results for this query'); break; } } } $this->name = $html->find('div#profileTop', 0)->find('h1', 0)->plaintext; - } + } else { $this->returnClientError('You must specify username'); diff --git a/bridges/WikipediaDEBridge.php b/bridges/WikipediaDEBridge.php new file mode 100644 index 00000000..9ccd0ca8 --- /dev/null +++ b/bridges/WikipediaDEBridge.php @@ -0,0 +1,48 @@ +maintainer = "cnlpete"; + $this->name = "Wikipedia DE Today's Featured Article..."; + $this->uri = "https://de.wikipedia.org/"; + $this->description = "Returns the highlighted en.wikipedia.org article."; + $this->update = "2015-11-04"; + + } + + public function collectData(array $param){ + $html = ''; + $host = 'http://de.wikipedia.org'; + // If you want HTTPS access instead, uncomment the following line: + //$host = 'https://de.wikipedia.org'; + $link = '/wiki/Wikipedia:Hauptseite'; + + $html = $this->getSimpleHTMLDOM($host.$link) or $this->returnError('Could not request Wikipedia DE.', 404); + + $element = $html->find('div[id=mf-tfa]', 0); + $element->find('div', -1)->outertext = ''; + + $item = new \Item(); + $item->uri = $host.$element->find('p', 0)->find('a', 0)->href; + $item->title = $element->find('p',0)->find('a',0)->title; + + $html2 = $this->getSimpleHTMLDOM($item->uri) or $this->returnError('Could not request Wikipedia DE '.$item->title.'.', 404); + $element2 = $html2->find('div[id=mw-content-text]', 0); + $item->content = str_replace('href="/', 'href="'.$host.'/', $element2->innertext); + + $this->items[] = $item; + } + + public function getName(){ + return 'Wikipedia DE "Today\'s Featured Article"'; + } + + public function getURI(){ + return 'https://de.wikipedia.org/wiki/Wikipedia:Hauptseite'; + } + + public function getCacheDuration(){ + return 3600*8; // 8 hours + } +} diff --git a/bridges/WikipediaENBridge.php b/bridges/WikipediaENBridge.php new file mode 100644 index 00000000..ac8ab296 --- /dev/null +++ b/bridges/WikipediaENBridge.php @@ -0,0 +1,44 @@ +maintainer = "gsurrel"; + $this->name = "Wikipedia EN 'Today's Featured Article...'"; + $this->uri = "https://en.wikipedia.org/"; + $this->description = "Returns the highlighted en.wikipedia.org article."; + $this->update = "2014-05-25"; + + } + + public function collectData(array $param){ + $html = ''; + $host = 'http://en.wikipedia.org'; + // If you want HTTPS access instead, uncomment the following line: + //$host = 'https://en.wikipedia.org'; + $link = '/wiki/Main_Page'; + + $html = $this->getSimpleHTMLDOM($host.$link) or $this->returnError('Could not request Wikipedia EN.', 404); + + $element = $html->find('div[id=mp-tfa]', 0); + // Clean the bottom of the featured article + $element->find('div', -1)->outertext = ''; + $item = new \Item(); + $item->uri = $host.$element->find('p', 0)->find('a', 0)->href; + $item->title = $element->find('p',0)->find('a',0)->title; + $item->content = str_replace('href="/', 'href="'.$host.'/', $element->innertext); + $this->items[] = $item; + } + + public function getName(){ + return 'Wikipedia EN "Today\'s Featued Article"'; + } + + public function getURI(){ + return 'https://en.wikipedia.org/wiki/Main_Page'; + } + + public function getCacheDuration(){ + return 3600*4; // 4 hours + } +} diff --git a/bridges/WikipediaEOBridge.php b/bridges/WikipediaEOBridge.php new file mode 100644 index 00000000..a90c5032 --- /dev/null +++ b/bridges/WikipediaEOBridge.php @@ -0,0 +1,44 @@ +maintainer = "gsurrel"; + $this->name = "Wikipedia EO 'Artikolo de la semajno'"; + $this->uri = "https://eo.wikipedia.org/"; + $this->description = "Returns the highlighted eo.wikipedia.org article."; + $this->update = "2014-05-25"; + + } + + public function collectData(array $param){ + $html = ''; + $host = 'http://eo.wikipedia.org'; + // If you want HTTPS access instead, uncomment the following line: + //$host = 'https://eo.wikipedia.org'; + $link = '/wiki/Vikipedio:%C4%88efpa%C4%9Do'; + + $html = $this->getSimpleHTMLDOM($host.$link) or $this->returnError('Could not request Wikipedia EO.', 404); + + $element = $html->find('div[id=mf-tfa]', 0); + // Link to article + $link = $element->find('p', -2)->find('a', 0); + $item = new \Item(); + $item->uri = $host.$link->href; + $item->title = $link->title; + $item->content = str_replace('href="/', 'href="'.$host.'/', $element->innertext); + $this->items[] = $item; + } + + public function getName(){ + return 'Wikipedia EO "Artikolo de la semajno"'; + } + + public function getURI(){ + return 'https://eo.wikipedia.org/wiki/Vikipedio:%C4%88efpa%C4%9Do'; + } + + public function getCacheDuration(){ + return 3600*12; // 12 hours + } +} diff --git a/bridges/WikipediaFRBridge.php b/bridges/WikipediaFRBridge.php new file mode 100644 index 00000000..8ee65cda --- /dev/null +++ b/bridges/WikipediaFRBridge.php @@ -0,0 +1,46 @@ +maintainer = "gsurrel"; + $this->name = "Wikipedia FR 'Lumière sur...'"; + $this->uri = "https://fr.wikipedia.org/"; + $this->description = "Returns the highlighted fr.wikipedia.org article."; + $this->update = "2016-06-04"; + + } + + public function collectData(array $param){ + $html = ''; + $host = 'http://fr.wikipedia.org'; + // If you want HTTPS access instead, uncomment the following line: + //$host = 'https://fr.wikipedia.org'; + $link = '/wiki/Wikip%C3%A9dia:Accueil_principal'; + + $html = $this->getSimpleHTMLDOM($host.$link) or $this->returnError('Could not request Wikipedia FR.', 404); + + $element = $html->find('div[id=mf-lumieresur]', 0); + # Use the "Lire la suite" link to dependably get the title of the article + # usually it's a child of a li.BA element (Bon article) + # occasionally it's a li.AdQ (Article de qualité) + $lirelasuite_link = $element->find('.BA > i > a, .AdQ > i > a', 0); + $item = new \Item(); + $item->uri = $host.$lirelasuite_link->href; + $item->title = $lirelasuite_link->title; + $item->content = str_replace('href="/', 'href="'.$host.'/', $element->innertext); + $this->items[] = $item; + } + + public function getName(){ + return 'Wikipedia FR "Lumière sur..."'; + } + + public function getURI(){ + return 'https://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Accueil_principal'; + } + + public function getCacheDuration(){ + return 3600*4; // 4 hours + } +} diff --git a/bridges/WordPressBridge.php b/bridges/WordPressBridge.php index f8481015..ef728e5b 100644 --- a/bridges/WordPressBridge.php +++ b/bridges/WordPressBridge.php @@ -33,7 +33,7 @@ class WordPressBridge extends BridgeAbstract { return WORDPRESS_TYPE_ATOM; // Make ATOM default } - // Replaces all 'link' tags with 'url' for simplehtmldom to actually find 'links' ('url') + // Replaces all 'link' tags with 'url' for simplehtmldom to actually find 'links' ('url') private function ReplaceLinkTagsWithUrlTags($element){ // We need to fix the 'link' tag as simplehtmldom cannot parse it (just rename it and load back as dom) $element_text = $element->outertext; @@ -64,7 +64,7 @@ class WordPressBridge extends BridgeAbstract { } $this->url = $this->url.'/feed/atom'; - $html = $this->file_get_html($this->url) or $this->returnServerError("Could not request {$this->url}."); + $html = $this->getSimpleHTMLDOM($this->url) or $this->returnServerError("Could not request {$this->url}."); // Notice: We requested an ATOM feed, however some sites return RSS feeds instead! $type = $this->DetectContentType($html); @@ -97,7 +97,7 @@ class WordPressBridge extends BridgeAbstract { $item->timestamp = strtotime($article->find('updated', 0)->innertext); } - $article_html = $this->file_get_html($item->uri); + $article_html = $this->getSimpleHTMLDOM($item->uri); // Attempt to find most common content div if(empty($item->content)){ @@ -126,7 +126,7 @@ class WordPressBridge extends BridgeAbstract { $this->items[] = $item; $i++; } - } + } } else { $this->returnServerError("Sorry, {$this->url} doesn't seem to be a Wordpress blog."); } diff --git a/bridges/WorldOfTanks.php b/bridges/WorldOfTanks.php index 77fa7b10..fcfc57b4 100644 --- a/bridges/WorldOfTanks.php +++ b/bridges/WorldOfTanks.php @@ -70,15 +70,15 @@ class WorldOfTanks extends HttpCachingBridgeAbstract{ } else { $this->uri = WORLD_OF_TANKS.$this->lang.NEWS.'pc-browser/'.$param['category']."/"; } - $html = $this->file_get_html($this->getURI()) or $this->returnServerError('Could not request '.$this->getURI()); + $html = $this->getSimpleHTMLDOM($this->getURI()) or $this->returnServerError('Could not request '.$this->getURI()); $this->message("loaded HTML from ".$this->getURI()); - // customize name + // customize name $this->name = $html->find('title', 0)->innertext; foreach($html->find('.b-imgblock_ico') as $infoLink) { $this->parseLine($infoLink); } } - + private function parseLine($infoLink) { $item = new Item(); $item->uri = WORLD_OF_TANKS.$infoLink->href; diff --git a/bridges/XbooruBridge.php b/bridges/XbooruBridge.php index 691b46ce..4becacc1 100644 --- a/bridges/XbooruBridge.php +++ b/bridges/XbooruBridge.php @@ -25,27 +25,27 @@ class XbooruBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 0;$tags=''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); $page = $page - 1; $page = $page * 50; } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("http://xbooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Xbooru.'); + $html = $this->getSimpleHTMLDOM("http://xbooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnServerError('Could not request Xbooru.'); foreach($html->find('div[class=content] span') as $element) { $item = new \Item(); $item->uri = 'http://xbooru.com/'.$element->find('a', 0)->href; - $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); + $item->postid = (int)preg_replace("/[^0-9]/",'', $element->getAttribute('id')); $item->timestamp = time(); $thumbnailUri = $element->find('img', 0)->src; $item->tags = $element->find('img', 0)->getAttribute('alt'); $item->title = 'Xbooru | '.$item->postid; $item->content = '
Tags: '.$item->tags; - $this->items[] = $item; + $this->items[] = $item; } } diff --git a/bridges/YandereBridge.php b/bridges/YandereBridge.php index 223c9417..39afb25f 100644 --- a/bridges/YandereBridge.php +++ b/bridges/YandereBridge.php @@ -25,18 +25,18 @@ class YandereBridge extends BridgeAbstract{ public function collectData(array $param){ $page = 1; $tags = ''; - if (isset($param['p'])) { - $page = (int)preg_replace("/[^0-9]/",'', $param['p']); + if (isset($param['p'])) { + $page = (int)preg_replace("/[^0-9]/",'', $param['p']); } - if (isset($param['t'])) { - $tags = urlencode($param['t']); + if (isset($param['t'])) { + $tags = urlencode($param['t']); } - $html = $this->file_get_html("https://yande.re/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Yandere.'); + $html = $this->getSimpleHTMLDOM("https://yande.re/post?page=$page&tags=$tags") or $this->returnServerError('Could not request Yandere.'); $input_json = explode('Post.register(', $html); foreach($input_json as $element) $data[] = preg_replace('/}\)(.*)/', '}', $element); unset($data[0]); - + foreach($data as $datai) { $json = json_decode($datai, TRUE); $item = new \Item(); @@ -45,7 +45,7 @@ class YandereBridge extends BridgeAbstract{ $item->timestamp = $json['created_at']; $item->imageUri = $json['file_url']; $item->title = 'Yandere | '.$json['id']; - $item->content = '
Tags: '.$json['tags']; + $item->content = '
Tags: '.$json['tags']; $this->items[] = $item; } } diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 02ded71f..52be476a 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -1,6 +1,6 @@ file_get_html($this->uri."watch?v=$vid"); + $html = $this->getSimpleHTMLDOM($this->uri."watch?v=$vid"); $author = $html->innertext; $author = substr($author, strpos($author, '"author=') + 8); $author = substr($author, 0, strpos($author, '\u0026')); @@ -138,9 +138,9 @@ class YoutubeBridge extends BridgeAbstract { $url_listing = $this->uri.'channel/'.urlencode($this->request).'/videos'; } if (!empty($url_feed) && !empty($url_listing)) { - if ($xml = $this->file_get_html($url_feed)) { + if ($xml = $this->getSimpleHTMLDOM($url_feed)) { $this->ytBridgeParseXmlFeed($xml); - } else if ($html = $this->file_get_html($url_listing)) { + } else if ($html = $this->getSimpleHTMLDOM($url_listing)) { $this->ytBridgeParseHtmlListing($html, 'li.channels-content-item', 'h3'); } else $this->returnServerError("Could not request YouTube. Tried:\n - $url_feed\n - $url_listing"); } @@ -148,15 +148,15 @@ class YoutubeBridge extends BridgeAbstract { else if (isset($param['p'])) { /* playlist mode */ $this->request = $param['p']; $url_listing = $this->uri.'playlist?list='.urlencode($this->request); - $html = $this->file_get_html($url_listing) or $this->returnServerError("Could not request YouTube. Tried:\n - $url_listing"); + $html = $this->getSimpleHTMLDOM($url_listing) or $this->returnServerError("Could not request YouTube. Tried:\n - $url_listing"); $this->ytBridgeParseHtmlListing($html, 'tr.pl-video', '.pl-video-title a'); $this->request = 'Playlist: '.str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); } else if (isset($param['s'])) { /* search mode */ - $this->request = $param['s']; $page = 1; if (isset($param['pa'])) $page = (int)preg_replace("/[^0-9]/",'', $param['pa']); + $this->request = $param['s']; $page = 1; if (isset($param['pa'])) $page = (int)preg_replace("/[^0-9]/",'', $param['pa']); $url_listing = $this->uri.'results?search_query='.urlencode($this->request).'&page='.$page.'&filters=video&search_sort=video_date_uploaded'; - $html = $this->file_get_html($url_listing) or $this->returnServerError("Could not request YouTube. Tried:\n - $url_listing"); + $html = $this->getSimpleHTMLDOM($url_listing) or $this->returnServerError("Could not request YouTube. Tried:\n - $url_listing"); $this->ytBridgeParseHtmlListing($html, 'div.yt-lockup', 'h3'); $this->request = 'Search: '.str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); } diff --git a/bridges/ZatazBridge.php b/bridges/ZatazBridge.php index 0c381dc7..d0a53b21 100644 --- a/bridges/ZatazBridge.php +++ b/bridges/ZatazBridge.php @@ -12,7 +12,7 @@ class ZatazBridge extends BridgeAbstract { } public function collectData(array $param) { - $html = $this->file_get_html($this->uri) or $this->returnServerError('Could not request ' . $this->uri); + $html = $this->getSimpleHTMLDOM($this->uri) or $this->returnServerError('Could not request ' . $this->uri); $recent_posts = $html->find('#recent-posts-3', 0)->find('ul', 0)->find('li'); foreach ($recent_posts as $article) { @@ -24,7 +24,7 @@ class ZatazBridge extends BridgeAbstract { } private function getDetails($uri) { - $html = $this->file_get_html($uri) or exit; + $html = $this->getSimpleHTMLDOM($uri) or exit; $item = new \Item(); diff --git a/bridges/ZoneTelechargementBridge.php b/bridges/ZoneTelechargementBridge.php index d6e6714a..7d4b0be8 100644 --- a/bridges/ZoneTelechargementBridge.php +++ b/bridges/ZoneTelechargementBridge.php @@ -31,7 +31,7 @@ class ZoneTelechargementBridge extends BridgeAbstract { $category = '/'.$param['category'].'/'; $url = $this->getURI().$category.'rss.xml'; - $html = $this->file_get_html($url) or $this->returnServerError('Could not request Zone Telechargement: '.$url); + $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request Zone Telechargement: '.$url); foreach($html->find('item') as $element) { $item = new \Item(); From d74beb6c6a46f00190778be6ebd6f2a522444135 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Fri, 8 Jul 2016 19:07:30 +0200 Subject: [PATCH 06/14] core: move message() method to parent class BridgeAbstract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- lib/Bridge.php | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index 2daca2af..c5a8ae05 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -105,6 +105,18 @@ abstract class BridgeAbstract implements BridgeInterface{ return $this; } + public function message($text) { + if(!file_exists('DEBUG')){ + return; + } + $backtrace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 3); + $calling = $backtrace[2]; + $message = $calling["file"].":".$calling["line"] + ." class ".get_class($this)."->".$calling["function"] + ." - ".$text; + error_log($message); + } + protected function getContents($url,$use_include_path=false,$context=null,$offset=0,$maxlen=null){ $contextOptions = array( 'http' => array( @@ -233,15 +245,7 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { // filename is NO GOOD // unlink($filename); } - - public function message($text) { - $backtrace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 3); - $calling = $backtrace[2]; - $message = $calling["file"].":".$calling["line"] - ." class ".get_class($this)."->".$calling["function"] - ." - ".$text; - error_log($message); - } + } class Bridge{ From fe58d23c17f3cfdb7d0e2f5ee4baa7c7bf4936cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Fri, 8 Jul 2016 19:08:48 +0200 Subject: [PATCH 07/14] core: use proxy when defined in HttpCachingBridgeAbstract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- lib/Bridge.php | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index c5a8ae05..f8592f3b 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -185,6 +185,7 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { // $this->message("loading cached file from ".$filename." for page at url ".$url); // TODO touch file and its parent, and try to do neighbour deletion $this->refresh_in_cache($pageCacheDir, $filename); + $content=file_get_contents($filename); } else { // $this->message("we have no local copy of ".$url." Downloading to ".$filename); $dir = substr($filename, 0, strrpos($filename, '/')); @@ -192,11 +193,14 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { // $this->message("creating directories for ".$dir); mkdir($dir, 0777, true); } - $this->download_remote($url, $filename); + $content=$this->getContents($url); + if($content!==false){ + file_put_contents($filename,$content); + } } - return file_get_contents($filename); + return $content; } - + public function get_cached_time($url) { $simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url); // TODO build this from the variable given to Cache From 226484ba221a368ec033fecce074a8da4ce48ba2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Fri, 8 Jul 2016 19:09:37 +0200 Subject: [PATCH 08/14] core: remove useless HttpCachingBridgeAbstract::download_remote() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- lib/Bridge.php | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index f8592f3b..b2010ec0 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -223,23 +223,6 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { } } - public function download_remote($url , $save_path) { - $f = fopen( $save_path , 'w+'); - if($f) { - $handle = fopen($url , "rb"); - if($handle) { - while (!feof($handle)) { - $contents = fread($handle, 8192); - if($contents) { - fwrite($f , $contents); - } - } - fclose($handle); - } - fclose($f); - } - } - public function remove_from_cache($url) { $simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url); // TODO build this from the variable given to Cache From 86515a1560c77def9404eb484e088b19fca4da64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Fri, 8 Jul 2016 19:26:07 +0200 Subject: [PATCH 09/14] core: use proxy when defined in RssExpander MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- lib/Bridge.php | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index b2010ec0..cca4b5a8 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -77,7 +77,7 @@ abstract class BridgeAbstract implements BridgeInterface{ /** * Define default bridge name - */ + */ public function getName(){ return $this->name; } @@ -166,7 +166,7 @@ abstract class BridgeAbstract implements BridgeInterface{ * After all, rss-bridge is not respaw, isn't it ? */ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { - + /** * Maintain locally cached versions of pages to download to avoid multiple doiwnloads. * A file name is generated by replacing all "/" by "_", and the file is saved below this bridge cache @@ -213,7 +213,7 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { $this->get_cached($url); } return filectime($filename); - } + } private function refresh_in_cache($pageCacheDir, $filename) { $currentPath = $filename; @@ -267,7 +267,7 @@ class Bridge{ } $pathBridge = self::getDir() . $nameBridge . '.php'; - + if( !file_exists($pathBridge) ){ throw new \Exception('The bridge you looking for does not exist. It should be at path '.$pathBridge); } @@ -349,8 +349,11 @@ abstract class RssExpander extends HttpCachingBridgeAbstract{ } // $this->message("Loading from ".$param['url']); // Notice WE DO NOT use cache here on purpose : we want a fresh view of the RSS stream each time - $rssContent = simplexml_load_file($name) or $this->returnServerError('Could not request '.$name); -// $this->message("loaded RSS from ".$param['url']); + $content=$this->getContents($name) or + $this->returnServerError('Could not request '.$name); + + $rssContent = simplexml_load_string($content); + // $this->message("loaded RSS from ".$param['url']); // TODO insert RSS format detection // we suppose for now, we have some RSS 2.0 $this->collect_RSS_2_0_data($rssContent); From 6e2c7ceaf43c9e3f504fc2778c6eef23d9d1e25b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Tue, 9 Aug 2016 14:57:42 +0200 Subject: [PATCH 10/14] bridges: rename file_get_html to getSimpleHTMLDOM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- bridges/ArstechnicaBridge.php | 4 ++-- bridges/BastaBridge.php | 2 +- bridges/CastorusBridge.php | 22 +++++++++++----------- bridges/EstCeQuonMetEnProdBridge.php | 4 ++-- bridges/NextgovBridge.php | 6 +++--- bridges/NumeramaBridge.php | 4 ++-- bridges/SensCritiqueBridge.php | 12 ++++++------ bridges/ShanaprojectBridge.php | 4 ++-- bridges/TheHackerNewsBridge.php | 6 +++--- bridges/WeLiveSecurityBridge.php | 4 ++-- bridges/WikipediaBridge.php | 4 ++-- bridges/ZDNetBridge.php | 4 ++-- 12 files changed, 38 insertions(+), 38 deletions(-) diff --git a/bridges/ArstechnicaBridge.php b/bridges/ArstechnicaBridge.php index 59f0fabe..a0fcb54c 100644 --- a/bridges/ArstechnicaBridge.php +++ b/bridges/ArstechnicaBridge.php @@ -29,7 +29,7 @@ class ArstechnicaBridge extends BridgeAbstract { function ExtractContent($url) { #echo $url; - $html2 = file_get_html($url); + $html2 = getSimpleHTMLDOM($url); $text = $html2->find("section[id='article-guts']", 0); /*foreach ($text->find('