From 36d39d3f599ef2221455b7aab3d40a39280e8ac4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Sat, 25 Jun 2016 12:12:54 +0200 Subject: [PATCH 1/2] implement proxy feature without modifying simple_html_dom code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- lib/Bridge.php | 40 +++++++++++++++--------- vendor/simplehtmldom/simple_html_dom.php | 4 +-- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index 3a104a1b..8cc0bee9 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -90,6 +90,31 @@ abstract class BridgeAbstract implements BridgeInterface{ return $this; } + protected function file_get_html($url, $use_include_path = false, $context=null, $offset = -1, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT){ + $contextOptions = array( + 'http' => array( + 'user_agent'=>ini_get('user_agent') + ), + ); + + if(defined('PROXY_URL')) { + $contextOptions['http']['proxy'] = PROXY_URL; + $contextOptions['http']['request_fulluri'] = true; + + if(is_null($context)){ + $context = stream_context_create($contextOptions); + } else { + $prevContext=$context; + if(!stream_context_set_option($context,$contextOptions)){ + $context=$prevContext; + }; + } + } + return file_get_html($url,$use_include_path,$context,$offset,$maxLen, + $lowercase,$forceTagsClosed,$target_charset,$stripRN,$defaultBRtext, + $defaultSpanText); + } + } /** @@ -354,19 +379,4 @@ abstract class RssExpander extends HttpCachingBridgeAbstract{ } } -function advanced_file_get_contents($url) { - if(defined('PROXY_URL')) { - $context = array( - 'http' => array( - 'proxy' => PROXY_URL, - 'request_fulluri' => true, - ), - ); - $context = stream_context_create($context); - return file_get_contents($url, false, $context); - } else { - return file_get_contents($url); - } - -} diff --git a/vendor/simplehtmldom/simple_html_dom.php b/vendor/simplehtmldom/simple_html_dom.php index 22aaa340..b5d30898 100644 --- a/vendor/simplehtmldom/simple_html_dom.php +++ b/vendor/simplehtmldom/simple_html_dom.php @@ -73,7 +73,7 @@ function file_get_html($url, $use_include_path = false, $context=null, $offset = // We DO force the tags to be terminated. $dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText); // For sourceforge users: uncomment the next line and comment the retreive_url_contents line 2 lines down if it is not already done. - $contents = advanced_file_get_contents($url, $use_include_path, $context, $offset); + $contents = file_get_contents($url, $use_include_path, $context, $offset); // Paperg - use our own mechanism for getting the contents as we want to control the timeout. //$contents = retrieve_url_contents($url); if (empty($contents) || strlen($contents) > MAX_FILE_SIZE) @@ -1094,7 +1094,7 @@ class simple_html_dom function load_file() { $args = func_get_args(); - $this->load(call_user_func_array('advanced_file_get_contents', $args), true); + $this->load(call_user_func_array('file_get_contents', $args), true); // Throw an error if we can't properly load the dom. if (($error=error_get_last())!==null) { $this->clear(); From 955eecc299e67a1062a9e75149b6c098fb9d2cbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Sat, 25 Jun 2016 23:17:42 +0200 Subject: [PATCH 2/2] use BridgeAbstract::file_get_html in all bridges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit instead of simple_html_dom function file_get_html Signed-off-by: Pierre Mazière --- bridges/ABCTabsBridge.php | 2 +- bridges/AcrimedBridge.php | 2 +- bridges/AllocineFRBridge.php | 2 +- bridges/AllocineT5Bridge.php | 2 +- bridges/AllocineTueursEnSerieBridge.php | 2 +- bridges/AnimeUltimeBridge.php | 2 +- bridges/BandcampBridge.php | 2 +- bridges/BastaBridge.php | 4 ++-- bridges/BlaguesDeMerdeBridge.php | 2 +- bridges/BooruprojectBridge.php | 2 +- bridges/CADBridge.php | 4 ++-- bridges/CNETBridge.php | 4 ++-- bridges/CoinDeskBridge.php | 4 ++-- bridges/CollegeDeFranceBridge.php | 2 +- bridges/CommonDreamsBridge.php | 4 ++-- bridges/CopieDoubleBridge.php | 2 +- bridges/CourrierInternationalBridge.php | 4 ++-- bridges/CpasbienBridge.php | 2 +- bridges/CryptomeBridge.php | 2 +- bridges/DailymotionBridge.php | 8 ++++---- bridges/DanbooruBridge.php | 2 +- bridges/DansTonChatBridge.php | 2 +- bridges/DauphineLibereBridge.php | 6 +++--- bridges/DeveloppezDotComBridge.php | 4 ++-- bridges/DilbertBridge.php | 2 +- bridges/DollbooruBridge.php | 2 +- bridges/DuckDuckGoBridge.php | 2 +- bridges/EZTVBridge.php | 2 +- bridges/EliteDangerousGalnetBridge.php | 2 +- bridges/FSBridge.php | 4 ++-- bridges/FacebookBridge.php | 4 ++-- bridges/FierPandaBridge.php | 2 +- bridges/FlickrExploreBridge.php | 2 +- bridges/FlickrTagBridge.php | 6 +++--- bridges/FootitoBridge.php | 2 +- bridges/FourchanBridge.php | 2 +- bridges/FrandroidBridge.php | 4 ++-- bridges/FuturaSciencesBridge.php | 4 ++-- bridges/GBAtempBridge.php | 6 +++--- bridges/GelbooruBridge.php | 2 +- bridges/GiphyBridge.php | 4 ++-- bridges/GizmodoFRBridge.php | 4 ++-- bridges/GooglePlusPostBridge.php | 4 ++-- bridges/GoogleSearchBridge.php | 2 +- bridges/GuruMedBridge.php | 4 ++-- bridges/HDWallpapersBridge.php | 2 +- bridges/HentaiHavenBridge.php | 2 +- bridges/IdenticaBridge.php | 2 +- bridges/InstagramBridge.php | 2 +- bridges/JapanExpoBridge.php | 4 ++-- bridges/KonachanBridge.php | 2 +- bridges/KoreusBridge.php | 4 ++-- bridges/LeBonCoinBridge.php | 2 +- bridges/LeJournalDuGeekBridge.php | 4 ++-- bridges/LeMondeInformatiqueBridge.php | 4 ++-- bridges/LeMotDuJourBridge.php | 4 ++-- bridges/LesJoiesDuCodeBridge.php | 2 +- bridges/LichessBridge.php | 4 ++-- bridges/LinkedInCompany.php | 2 +- bridges/LolibooruBridge.php | 2 +- bridges/MalikiBridge.php | 4 ++-- bridges/MemoLinuxBridge.php | 4 ++-- bridges/MilbooruBridge.php | 2 +- bridges/MondeDiploBridge.php | 2 +- bridges/MsnMondeBridge.php | 4 ++-- bridges/MspabooruBridge.php | 2 +- bridges/NakedSecurityBridge.php | 4 ++-- bridges/NasaApodBridge.php | 4 ++-- bridges/NeuviemeArtBridge.php | 4 ++-- bridges/NextInpactBridge.php | 4 ++-- bridges/NiceMatinBridge.php | 4 ++-- bridges/NovelUpdatesBridge.php | 2 +- bridges/NumeramaBridge.php | 4 ++-- bridges/OpenClassroomsBridge.php | 2 +- bridges/OpenTheoryBridge.php | 4 ++-- bridges/ParuVenduImmoBridge.php | 2 +- bridges/PickyWallpapersBridge.php | 2 +- bridges/PinterestBridge.php | 4 ++-- bridges/PlanetLibreBridge.php | 4 ++-- bridges/ProjectMGameBridge.php | 2 +- bridges/RTBFBridge.php | 2 +- bridges/RaymondBridge.php | 4 ++-- bridges/Releases3DSBridge.php | 2 +- bridges/ReporterreBridge.php | 4 ++-- bridges/Rue89Bridge.php | 2 +- bridges/Rule34Bridge.php | 2 +- bridges/Rule34pahealBridge.php | 2 +- bridges/SafebooruBridge.php | 2 +- bridges/SakugabooruBridge.php | 2 +- bridges/ScilogsBridge.php | 4 ++-- bridges/ScmbBridge.php | 2 +- bridges/ScoopItBridge.php | 2 +- bridges/SegfaultMintBridge.php | 4 ++-- bridges/Sexactu.php | 2 +- bridges/SiliconBridge.php | 4 ++-- bridges/SuperbWallpapersBridge.php | 4 ++-- bridges/T411Bridge.php | 4 ++-- bridges/TagBoardBridge.php | 2 +- bridges/TbibBridge.php | 2 +- bridges/TheCodingLoveBridge.php | 2 +- bridges/ThePirateBayBridge.php | 2 +- bridges/TuxboardBridge.php | 4 ++-- bridges/TwitterBridge.php | 4 ++-- bridges/TwitterBridgeClean.php | 4 ++-- bridges/TwitterBridgeCleanExtended.php | 4 ++-- bridges/TwitterBridgeExtended.php | 4 ++-- bridges/TwitterBridgeTweaked.php | 4 ++-- bridges/UnsplashBridge.php | 2 +- bridges/ViadeoCompany.php | 2 +- bridges/VineBridge.php | 2 +- bridges/WallpaperStopBridge.php | 2 +- bridges/WhydBridge.php | 6 +++--- bridges/WikipediaDEBridge.php | 4 ++-- bridges/WikipediaENBridge.php | 2 +- bridges/WikipediaEOBridge.php | 2 +- bridges/WikipediaFRBridge.php | 2 +- bridges/WordPressBridge.php | 4 ++-- bridges/WorldOfTanks.php | 2 +- bridges/XbooruBridge.php | 2 +- bridges/YandereBridge.php | 2 +- bridges/YoutubeBridge.php | 10 +++++----- bridges/ZatazBridge.php | 4 ++-- bridges/ZoneTelechargementBridge.php | 2 +- 123 files changed, 187 insertions(+), 187 deletions(-) diff --git a/bridges/ABCTabsBridge.php b/bridges/ABCTabsBridge.php index 7481f3a1..591cca37 100644 --- a/bridges/ABCTabsBridge.php +++ b/bridges/ABCTabsBridge.php @@ -15,7 +15,7 @@ class ABCTabsBridge extends BridgeAbstract{ public function collectData(array $param){ $html = ''; - $html = file_get_html('http://www.abc-tabs.com/tablatures/nouveautes.html') or $this->returnError('No results for this query.', 404); + $html = $this->file_get_html('http://www.abc-tabs.com/tablatures/nouveautes.html') or $this->returnError('No results for this query.', 404); $table = $html->find('table#myTable', 0)->children(1); foreach ($table->find('tr') as $tab) diff --git a/bridges/AcrimedBridge.php b/bridges/AcrimedBridge.php index b432c72a..4ef08bd6 100644 --- a/bridges/AcrimedBridge.php +++ b/bridges/AcrimedBridge.php @@ -29,7 +29,7 @@ class AcrimedBridge extends RssExpander{ $item->title = trim($newsItem->title); $item->timestamp = strtotime($dc->date); - $articlePage = file_get_html($newsItem->link); + $articlePage = $this->file_get_html($newsItem->link); $article = $hs->sanitize($articlePage->find('article.article1', 0)->innertext); $article = HTMLSanitizer::defaultImageSrcTo($article, "http://www.acrimed.org/"); diff --git a/bridges/AllocineFRBridge.php b/bridges/AllocineFRBridge.php index c95fcf71..7270b97d 100644 --- a/bridges/AllocineFRBridge.php +++ b/bridges/AllocineFRBridge.php @@ -15,7 +15,7 @@ class AllocineFRBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = file_get_html($this->_URL) or $this->returnError('Could not request Allo cine.', 404); + $html = $this->file_get_html($this->_URL) or $this->returnError('Could not request Allo cine.', 404); foreach($html->find('figure.media-meta-fig') as $element) { diff --git a/bridges/AllocineT5Bridge.php b/bridges/AllocineT5Bridge.php index b52df289..4c13cdf4 100644 --- a/bridges/AllocineT5Bridge.php +++ b/bridges/AllocineT5Bridge.php @@ -15,7 +15,7 @@ class AllocineT5Bridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = file_get_html($this->_URL) or $this->returnError('Could not request Allo cine.', 404); + $html = $this->file_get_html($this->_URL) or $this->returnError('Could not request Allo cine.', 404); foreach($html->find('figure.media-meta-fig') as $element) { diff --git a/bridges/AllocineTueursEnSerieBridge.php b/bridges/AllocineTueursEnSerieBridge.php index 4320c01b..6a27e618 100644 --- a/bridges/AllocineTueursEnSerieBridge.php +++ b/bridges/AllocineTueursEnSerieBridge.php @@ -15,7 +15,7 @@ class AllocineTueursEnSerieBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = file_get_html($this->_URL) or $this->returnError('Could not request Allo cine.', 404); + $html = $this->file_get_html($this->_URL) or $this->returnError('Could not request Allo cine.', 404); foreach($html->find('figure.media-meta-fig') as $element) { diff --git a/bridges/AnimeUltimeBridge.php b/bridges/AnimeUltimeBridge.php index 2f832d70..20f0bfd1 100644 --- a/bridges/AnimeUltimeBridge.php +++ b/bridges/AnimeUltimeBridge.php @@ -67,7 +67,7 @@ class AnimeUltimeBridge extends BridgeAbstract { //Retrive page contents $website = 'http://www.anime-ultime.net/'; $url = $website.'history-0-1/'.$requestFilter; - $html = file_get_html($url) or $this->returnError('Could not request Anime-Ultime: '.$url, 500); + $html = $this->file_get_html($url) or $this->returnError('Could not request Anime-Ultime: '.$url, 500); //Relases are sorted by day : process each day individually foreach ($html->find('div.history', 0)->find('h3') as $daySection) { diff --git a/bridges/BandcampBridge.php b/bridges/BandcampBridge.php index ce002e5c..ce0c65cb 100644 --- a/bridges/BandcampBridge.php +++ b/bridges/BandcampBridge.php @@ -26,7 +26,7 @@ class BandcampBridge extends BridgeAbstract{ $html = ''; if (isset($param['tag'])) { $this->request = $param['tag']; - $html = file_get_html('http://bandcamp.com/tag/'.urlencode($this->request).'?sort_field=date') or $this->returnError('No results for this query.', 404); + $html = $this->file_get_html('http://bandcamp.com/tag/'.urlencode($this->request).'?sort_field=date') or $this->returnError('No results for this query.', 404); } else { $this->returnError('You must specify tag (/tag/...)', 400); diff --git a/bridges/BastaBridge.php b/bridges/BastaBridge.php index 0b84ff39..01f7611b 100644 --- a/bridges/BastaBridge.php +++ b/bridges/BastaBridge.php @@ -16,11 +16,11 @@ class BastaBridge extends BridgeAbstract{ function BastaExtractContent($url) { - $html2 = file_get_html($url); + $html2 = $this->file_get_html($url); $text = $html2->find('div.texte', 0)->innertext; return $text; } - $html = file_get_html('http://www.bastamag.net/spip.php?page=backend') or $this->returnError('Could not request Bastamag.', 404); + $html = $this->file_get_html('http://www.bastamag.net/spip.php?page=backend') or $this->returnError('Could not request Bastamag.', 404); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/BlaguesDeMerdeBridge.php b/bridges/BlaguesDeMerdeBridge.php index 2ba230f9..ca872420 100644 --- a/bridges/BlaguesDeMerdeBridge.php +++ b/bridges/BlaguesDeMerdeBridge.php @@ -12,7 +12,7 @@ class BlaguesDeMerdeBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = file_get_html('http://www.blaguesdemerde.fr/') or $this->returnError('Could not request BDM.', 404); + $html = $this->file_get_html('http://www.blaguesdemerde.fr/') or $this->returnError('Could not request BDM.', 404); foreach($html->find('article.joke_contener') as $element) { $item = new Item(); diff --git a/bridges/BooruprojectBridge.php b/bridges/BooruprojectBridge.php index 7440cc2c..9f9546a3 100644 --- a/bridges/BooruprojectBridge.php +++ b/bridges/BooruprojectBridge.php @@ -43,7 +43,7 @@ class BooruprojectBridge extends BridgeAbstract{ if (empty($param['i'])) { $this->returnError('Please enter a ***.booru.org instance.', 404); } - $html = file_get_html("http://".$param['i'].".booru.org/index.php?page=post&s=list&pid=".$page.$tags) or $this->returnError('Could not request Booruproject.', 404); + $html = $this->file_get_html("http://".$param['i'].".booru.org/index.php?page=post&s=list&pid=".$page.$tags) or $this->returnError('Could not request Booruproject.', 404); foreach($html->find('div[class=content] span') as $element) { diff --git a/bridges/CADBridge.php b/bridges/CADBridge.php index 28ef697e..7857282a 100644 --- a/bridges/CADBridge.php +++ b/bridges/CADBridge.php @@ -22,7 +22,7 @@ class CADBridge extends BridgeAbstract{ } function CADExtractContent($url) { - $html3 = file_get_html($url); + $html3 = $this->file_get_html($url); $htmlpart = explode("/", $url); if ($htmlpart[3] == 'cad') preg_match_all("/http:\/\/cdn2\.cad-comic\.com\/comics\/cad-\S*png/", $html3, $url2); @@ -36,7 +36,7 @@ class CADBridge extends BridgeAbstract{ return ''; } - $html = file_get_html('http://cdn2.cad-comic.com/rss.xml') or $this->returnError('Could not request CAD.', 404); + $html = $this->file_get_html('http://cdn2.cad-comic.com/rss.xml') or $this->returnError('Could not request CAD.', 404); $limit = 0; foreach($html->find('item') as $element) { if($limit < 5) { diff --git a/bridges/CNETBridge.php b/bridges/CNETBridge.php index 42c78a94..ea0898a2 100644 --- a/bridges/CNETBridge.php +++ b/bridges/CNETBridge.php @@ -51,7 +51,7 @@ class CNETBridge extends BridgeAbstract { $this->topicName = $param['topic']; $pageUrl = 'http://www.cnet.com/'.(empty($this->topicName) ? '' : 'topics/'.$this->topicName.'/'); - $html = file_get_html($pageUrl) or $this->returnError('Could not request CNET: '.$pageUrl, 500); + $html = $this->file_get_html($pageUrl) or $this->returnError('Could not request CNET: '.$pageUrl, 500); $limit = 0; foreach($html->find('div.assetBody') as $element) { @@ -65,7 +65,7 @@ class CNETBridge extends BridgeAbstract { if (!empty($article_title) && !empty($article_uri) && strpos($article_uri, '/news/') !== false) { - $article_html = file_get_html($article_uri) or $this->returnError('Could not request CNET: '.$article_uri, 500); + $article_html = $this->file_get_html($article_uri) or $this->returnError('Could not request CNET: '.$article_uri, 500); if (is_null($article_thumbnail)) $article_thumbnail = $article_html->find('div.originalImage', 0); diff --git a/bridges/CoinDeskBridge.php b/bridges/CoinDeskBridge.php index 79c30ebd..2ca88d1a 100644 --- a/bridges/CoinDeskBridge.php +++ b/bridges/CoinDeskBridge.php @@ -19,12 +19,12 @@ class CoinDeskBridge extends BridgeAbstract{ return $string; } function CoinDeskExtractContent($url) { - $html2 = file_get_html($url); + $html2 = $this->file_get_html($url); $text = $html2->find('div.single-content', 0)->innertext; $text = strip_tags($text, '

'); return $text; } - $html = file_get_html('http://www.coindesk.com/feed/atom/') or $this->returnError('Could not request CoinDesk.', 404); + $html = $this->file_get_html('http://www.coindesk.com/feed/atom/') or $this->returnError('Could not request CoinDesk.', 404); $limit = 0; foreach($html->find('entry') as $element) { diff --git a/bridges/CollegeDeFranceBridge.php b/bridges/CollegeDeFranceBridge.php index b83f828c..585cebaf 100644 --- a/bridges/CollegeDeFranceBridge.php +++ b/bridges/CollegeDeFranceBridge.php @@ -33,7 +33,7 @@ class CollegeDeFranceBridge extends BridgeAbstract{ * * */ - $html = file_get_html('http://www.college-de-france.fr/components/search-audiovideo.jsp?fulltext=&siteid=1156951719600&lang=FR&type=all') or $this->returnError('Could not request CollegeDeFrance.', 404); + $html = $this->file_get_html('http://www.college-de-france.fr/components/search-audiovideo.jsp?fulltext=&siteid=1156951719600&lang=FR&type=all') or $this->returnError('Could not request CollegeDeFrance.', 404); foreach($html->find('a[data-target]') as $element) { $item = new \Item(); $item->title = $element->find('.title', 0)->plaintext; diff --git a/bridges/CommonDreamsBridge.php b/bridges/CommonDreamsBridge.php index 2a30f37e..6f55b63c 100644 --- a/bridges/CommonDreamsBridge.php +++ b/bridges/CommonDreamsBridge.php @@ -20,14 +20,14 @@ class CommonDreamsBridge extends BridgeAbstract{ } function CommonDreamsExtractContent($url) { - $html3 = file_get_html($url); + $html3 = $this->file_get_html($url); $text = $html3->find('div[class=field--type-text-with-summary]', 0)->innertext; $html3->clear(); unset ($html3); return $text; } - $html = file_get_html('http://www.commondreams.org/rss.xml') or $this->returnError('Could not request CommonDreams.', 404); + $html = $this->file_get_html('http://www.commondreams.org/rss.xml') or $this->returnError('Could not request CommonDreams.', 404); $limit = 0; foreach($html->find('item') as $element) { if($limit < 4) { diff --git a/bridges/CopieDoubleBridge.php b/bridges/CopieDoubleBridge.php index 12b3a8a9..2030979e 100644 --- a/bridges/CopieDoubleBridge.php +++ b/bridges/CopieDoubleBridge.php @@ -13,7 +13,7 @@ class CopieDoubleBridge extends BridgeAbstract{ public function collectData(array $param){ - $html = file_get_html('http://www.copie-double.com/') or $this->returnError('Could not request CopieDouble.', 404); + $html = $this->file_get_html('http://www.copie-double.com/') or $this->returnError('Could not request CopieDouble.', 404); $table = $html->find('table table', 2); foreach($table->find('tr') as $element) diff --git a/bridges/CourrierInternationalBridge.php b/bridges/CourrierInternationalBridge.php index cf7126c9..e35eb470 100644 --- a/bridges/CourrierInternationalBridge.php +++ b/bridges/CourrierInternationalBridge.php @@ -15,7 +15,7 @@ class CourrierInternationalBridge extends BridgeAbstract{ $html = ''; - $html = file_get_html('http://www.courrierinternational.com/') or $this->returnError('Error.', 500); + $html = $this->file_get_html('http://www.courrierinternational.com/') or $this->returnError('Error.', 500); @@ -33,7 +33,7 @@ class CourrierInternationalBridge extends BridgeAbstract{ $item->uri = "http://courrierinternational.fr/".$item->uri; } - $page = file_get_html($item->uri); + $page = $this->file_get_html($item->uri); $cleaner = new HTMLSanitizer(); diff --git a/bridges/CpasbienBridge.php b/bridges/CpasbienBridge.php index 091921bc..1f67cf3f 100644 --- a/bridges/CpasbienBridge.php +++ b/bridges/CpasbienBridge.php @@ -44,7 +44,7 @@ class CpasbienBridge extends HttpCachingBridgeAbstract{ $html = ''; if (isset($param['q'])) { /* keyword search mode */ $this->request = str_replace(" ","-",trim($param['q'])); - $html = file_get_html($this->uri.'/recherche/'.urlencode($this->request).'.html') or $this->returnError('No results for this query.', 404); + $html = $this->file_get_html($this->uri.'/recherche/'.urlencode($this->request).'.html') or $this->returnError('No results for this query.', 404); } else { $this->returnError('You must specify a keyword (?q=...).', 400); diff --git a/bridges/CryptomeBridge.php b/bridges/CryptomeBridge.php index c76e0aac..9f80671b 100644 --- a/bridges/CryptomeBridge.php +++ b/bridges/CryptomeBridge.php @@ -28,7 +28,7 @@ class CryptomeBridge extends BridgeAbstract{ // If you want HTTPS access instead, uncomment the following line: //$link = 'https://secure.netsolhost.com/cryptome.org/'; - $html = file_get_html($link) or $this->returnError('Could not request Cryptome.', 404); + $html = $this->file_get_html($link) or $this->returnError('Could not request Cryptome.', 404); if (!empty($param['n'])) { /* number of documents */ $num = min(max(1, $param['n']+0), $num); } diff --git a/bridges/DailymotionBridge.php b/bridges/DailymotionBridge.php index 357f2eaa..c8bc429a 100644 --- a/bridges/DailymotionBridge.php +++ b/bridges/DailymotionBridge.php @@ -47,7 +47,7 @@ class DailymotionBridge extends BridgeAbstract{ function getMetadata($id) { $metadata=array(); - $html2 = file_get_html('http://www.dailymotion.com/video/'.$id) or $this->returnError('Could not request Dailymotion.', 404); + $html2 = $this->file_get_html('http://www.dailymotion.com/video/'.$id) or $this->returnError('Could not request Dailymotion.', 404); $metadata['title'] = $html2->find('meta[property=og:title]', 0)->getAttribute('content'); $metadata['timestamp'] = strtotime($html2->find('meta[property=video:release_date]', 0)->getAttribute('content') ); $metadata['thumbnailUri'] = $html2->find('meta[property=og:image]', 0)->getAttribute('content'); @@ -63,15 +63,15 @@ class DailymotionBridge extends BridgeAbstract{ if (isset($param['u'])) { // user timeline mode $this->request = $param['u']; - $html = file_get_html('http://www.dailymotion.com/user/'.urlencode($this->request).'/1') or $this->returnError('Could not request Dailymotion.', 404); + $html = $this->file_get_html('http://www.dailymotion.com/user/'.urlencode($this->request).'/1') or $this->returnError('Could not request Dailymotion.', 404); } else if (isset($param['p'])) { // playlist mode $this->request = strtok($param['p'], '_'); - $html = file_get_html('http://www.dailymotion.com/playlist/'.urlencode($this->request).'') or $this->returnError('Could not request Dailymotion.', 404); + $html = $this->file_get_html('http://www.dailymotion.com/playlist/'.urlencode($this->request).'') or $this->returnError('Could not request Dailymotion.', 404); } else if (isset($param['s'])) { // search mode $this->request = $param['s']; $page = 1; if (isset($param['pa'])) $page = (int)preg_replace("/[^0-9]/",'', $param['pa']); - $html = file_get_html('http://www.dailymotion.com/search/'.urlencode($this->request).'/'.$page.'') or $this->returnError('Could not request Dailymotion.', 404); + $html = $this->file_get_html('http://www.dailymotion.com/search/'.urlencode($this->request).'/'.$page.'') or $this->returnError('Could not request Dailymotion.', 404); } else { $this->returnError('You must either specify a Dailymotion username (?u=...) or a playlist id (?p=...) or search (?s=...)', 400); diff --git a/bridges/DanbooruBridge.php b/bridges/DanbooruBridge.php index 790b1983..ac4bef6a 100644 --- a/bridges/DanbooruBridge.php +++ b/bridges/DanbooruBridge.php @@ -31,7 +31,7 @@ class DanbooruBridge extends BridgeAbstract{ if (isset($param['t'])) { $tags = urlencode($param['t']); } - $html = file_get_html("http://donmai.us/posts?&page=$page&tags=$tags") or $this->returnError('Could not request Danbooru.', 404); + $html = $this->file_get_html("http://donmai.us/posts?&page=$page&tags=$tags") or $this->returnError('Could not request Danbooru.', 404); foreach($html->find('div[id=posts] article') as $element) { $item = new \Item(); $item->uri = 'http://donmai.us'.$element->find('a', 0)->href; diff --git a/bridges/DansTonChatBridge.php b/bridges/DansTonChatBridge.php index 95a1cf5a..947ed61d 100644 --- a/bridges/DansTonChatBridge.php +++ b/bridges/DansTonChatBridge.php @@ -15,7 +15,7 @@ class DansTonChatBridge extends BridgeAbstract{ $html = ''; $link = 'http://danstonchat.com/latest.html'; - $html = file_get_html($link) or $this->returnError('Could not request DansTonChat.', 404); + $html = $this->file_get_html($link) or $this->returnError('Could not request DansTonChat.', 404); foreach($html->find('div.item') as $element) { $item = new \Item(); diff --git a/bridges/DauphineLibereBridge.php b/bridges/DauphineLibereBridge.php index 3721876e..f33c21c7 100644 --- a/bridges/DauphineLibereBridge.php +++ b/bridges/DauphineLibereBridge.php @@ -87,17 +87,17 @@ class DauphineLibereBridge extends BridgeAbstract{ function ExtractContent($url) { - $html2 = file_get_html($url); + $html2 = $this->file_get_html($url); $text = $html2->find('div.column', 0)->innertext; $text = preg_replace('@]*?>.*?@si', '', $text); return $text; } if (isset($param['u'])) { /* user timeline mode */ $this->request = $param['u']; - $html = file_get_html('http://www.ledauphine.com/'.$this->request.'/rss') or $this->returnError('Could not request DauphineLibere.', 404); + $html = $this->file_get_html('http://www.ledauphine.com/'.$this->request.'/rss') or $this->returnError('Could not request DauphineLibere.', 404); } else { - $html = file_get_html('http://www.ledauphine.com/rss') or $this->returnError('Could not request DauphineLibere.', 404); + $html = $this->file_get_html('http://www.ledauphine.com/rss') or $this->returnError('Could not request DauphineLibere.', 404); } $limit = 0; diff --git a/bridges/DeveloppezDotComBridge.php b/bridges/DeveloppezDotComBridge.php index 7b236edf..9c147a8a 100644 --- a/bridges/DeveloppezDotComBridge.php +++ b/bridges/DeveloppezDotComBridge.php @@ -37,13 +37,13 @@ class DeveloppezDotComBridge extends BridgeAbstract{ } function DeveloppezDotComExtractContent($url) { - $articleHTMLContent = file_get_html($url); + $articleHTMLContent = $this->file_get_html($url); $text = convert_smart_quotes($articleHTMLContent->find('div.content', 0)->innertext); $text = utf8_encode($text); return trim($text); } - $rssFeed = file_get_html('http://www.developpez.com/index/rss') or $this->returnError('Could not request http://www.developpez.com/index/rss', 404); + $rssFeed = $this->file_get_html('http://www.developpez.com/index/rss') or $this->returnError('Could not request http://www.developpez.com/index/rss', 404); $limit = 0; foreach($rssFeed->find('item') as $element) { diff --git a/bridges/DilbertBridge.php b/bridges/DilbertBridge.php index e6d34244..38260288 100644 --- a/bridges/DilbertBridge.php +++ b/bridges/DilbertBridge.php @@ -13,7 +13,7 @@ class DilbertBridge extends BridgeAbstract { public function collectData(array $param) { - $html = file_get_html($this->getURI()) or $this->returnError('Could not request Dilbert: '.$this->getURI(), 500); + $html = $this->file_get_html($this->getURI()) or $this->returnError('Could not request Dilbert: '.$this->getURI(), 500); foreach ($html->find('section.comic-item') as $element) { diff --git a/bridges/DollbooruBridge.php b/bridges/DollbooruBridge.php index 637fa488..958fce5f 100644 --- a/bridges/DollbooruBridge.php +++ b/bridges/DollbooruBridge.php @@ -32,7 +32,7 @@ class DollbooruBridge extends BridgeAbstract{ if (isset($param['t'])) { $tags = urlencode($param['t']); } - $html = file_get_html("http://dollbooru.org/post/list/$tags/$page") or $this->returnError('Could not request Dollbooru.', 404); + $html = $this->file_get_html("http://dollbooru.org/post/list/$tags/$page") or $this->returnError('Could not request Dollbooru.', 404); foreach($html->find('div[class=shm-image-list] a') as $element) { diff --git a/bridges/DuckDuckGoBridge.php b/bridges/DuckDuckGoBridge.php index f0e5b9ae..eca63275 100644 --- a/bridges/DuckDuckGoBridge.php +++ b/bridges/DuckDuckGoBridge.php @@ -22,7 +22,7 @@ class DuckDuckGoBridge extends BridgeAbstract{ $html = ''; $link = 'http://duckduckgo.com/html/?q='.$param[u].'+sort:date'; - $html = file_get_html($link) or $this->returnError('Could not request DuckDuckGo.', 404); + $html = $this->file_get_html($link) or $this->returnError('Could not request DuckDuckGo.', 404); foreach($html->find('div.results_links') as $element) { $item = new \Item(); diff --git a/bridges/EZTVBridge.php b/bridges/EZTVBridge.php index c58348a0..62669772 100644 --- a/bridges/EZTVBridge.php +++ b/bridges/EZTVBridge.php @@ -44,7 +44,7 @@ class EZTVBridge extends BridgeAbstract{ foreach($showList as $showID){ // Get show page - $html = file_get_html('https://eztv.ch/shows/'.rawurlencode($showID).'/') or $this->returnError('Could not request EZTV for id "'.$showID.'"', 404); + $html = $this->file_get_html('https://eztv.ch/shows/'.rawurlencode($showID).'/') or $this->returnError('Could not request EZTV for id "'.$showID.'"', 404); // Loop on each element that look like an episode entry... foreach($html->find('.forum_header_border') as $element) { diff --git a/bridges/EliteDangerousGalnetBridge.php b/bridges/EliteDangerousGalnetBridge.php index 72748b9c..f9ff9816 100644 --- a/bridges/EliteDangerousGalnetBridge.php +++ b/bridges/EliteDangerousGalnetBridge.php @@ -13,7 +13,7 @@ class EliteDangerousGalnetBridge extends BridgeAbstract public function collectData(array $param) { - $html = file_get_html('https://community.elitedangerous.com/galnet') or $this->returnError('Error while downloading the website content', 404); + $html = $this->file_get_html('https://community.elitedangerous.com/galnet') or $this->returnError('Error while downloading the website content', 404); foreach($html->find('div.article') as $element) { $item = new Item(); diff --git a/bridges/FSBridge.php b/bridges/FSBridge.php index 11334d52..0071a74c 100644 --- a/bridges/FSBridge.php +++ b/bridges/FSBridge.php @@ -19,12 +19,12 @@ class FSBridge extends BridgeAbstract{ return $string; } function FS_ExtractContent($url) { - $html2 = file_get_html($url); + $html2 = $this->file_get_html($url); $text = $html2->find('div.fiche-actualite', 0)->innertext; $text = preg_replace('@]*?>.*?@si', '', $text); return $text; } - $html = file_get_html('http://www.futura-sciences.com/rss/actualites.xml') or $this->returnError('Could not request Futura Sciences.', 404); + $html = $this->file_get_html('http://www.futura-sciences.com/rss/actualites.xml') or $this->returnError('Could not request Futura Sciences.', 404); $limit = 0; foreach($html->find('item') as $element) { diff --git a/bridges/FacebookBridge.php b/bridges/FacebookBridge.php index 6483c111..103494fb 100644 --- a/bridges/FacebookBridge.php +++ b/bridges/FacebookBridge.php @@ -109,9 +109,9 @@ class FacebookBridge extends BridgeAbstract{ if (is_null($html)) { if (isset($param['u'])) { if (!strpos($param['u'], "/")) { - $html = file_get_html('https://www.facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnError('No results for this query.', 404); + $html = $this->file_get_html('https://www.facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnError('No results for this query.', 404); } else { - $html = file_get_html('https://www.facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404); + $html = $this->file_get_html('https://www.facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404); } } else { $this->returnError('You must specify a Facebook username.', 400); diff --git a/bridges/FierPandaBridge.php b/bridges/FierPandaBridge.php index 68c27b8b..cba96d32 100644 --- a/bridges/FierPandaBridge.php +++ b/bridges/FierPandaBridge.php @@ -14,7 +14,7 @@ Class FierPandaBridge extends BridgeAbstract{ public function collectData(array $param){ $link = 'http://www.fier-panda.fr/'; - $html = file_get_html($link) or $this->returnError('Could not request Fier Panda.', 404); + $html = $this->file_get_html($link) or $this->returnError('Could not request Fier Panda.', 404); foreach($html->find('div.container-content article') as $element) { $item = new \Item(); diff --git a/bridges/FlickrExploreBridge.php b/bridges/FlickrExploreBridge.php index c39c4899..9d83f27f 100644 --- a/bridges/FlickrExploreBridge.php +++ b/bridges/FlickrExploreBridge.php @@ -12,7 +12,7 @@ class FlickrExploreBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = file_get_html('http://www.flickr.com/explore') or $this->returnError('Could not request Flickr.', 404); + $html = $this->file_get_html('http://www.flickr.com/explore') or $this->returnError('Could not request Flickr.', 404); foreach($html->find('span.photo_container') as $element) { $item = new \Item(); diff --git a/bridges/FlickrTagBridge.php b/bridges/FlickrTagBridge.php index 19977530..918c7655 100644 --- a/bridges/FlickrTagBridge.php +++ b/bridges/FlickrTagBridge.php @@ -27,14 +27,14 @@ class FlickrTagBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = file_get_html('http://www.flickr.com/search/?q=vendee&s=rec') or $this->returnError('Could not request Flickr.', 404); + $html = $this->file_get_html('http://www.flickr.com/search/?q=vendee&s=rec') or $this->returnError('Could not request Flickr.', 404); if (isset($param['q'])) { /* keyword search mode */ $this->request = $param['q']; - $html = file_get_html('http://www.flickr.com/search/?q='.urlencode($this->request).'&s=rec') or $this->returnError('No results for this query.', 404); + $html = $this->file_get_html('http://www.flickr.com/search/?q='.urlencode($this->request).'&s=rec') or $this->returnError('No results for this query.', 404); } elseif (isset($param['u'])) { /* user timeline mode */ $this->request = $param['u']; - $html = file_get_html('http://www.flickr.com/photos/'.urlencode($this->request).'/') or $this->returnError('Requested username can\'t be found.', 404); + $html = $this->file_get_html('http://www.flickr.com/photos/'.urlencode($this->request).'/') or $this->returnError('Requested username can\'t be found.', 404); } else { diff --git a/bridges/FootitoBridge.php b/bridges/FootitoBridge.php index 467f0429..bc16d43f 100644 --- a/bridges/FootitoBridge.php +++ b/bridges/FootitoBridge.php @@ -12,7 +12,7 @@ class FootitoBridge extends BridgeAbstract{ } public function collectData(array $param){ - $html = file_get_html('http://www.footito.fr/') or $this->returnError('Could not request Footito.', 404); + $html = $this->file_get_html('http://www.footito.fr/') or $this->returnError('Could not request Footito.', 404); foreach($html->find('div.post') as $element) { $item = new Item(); diff --git a/bridges/FourchanBridge.php b/bridges/FourchanBridge.php index 70f0477d..2cb8b00d 100644 --- a/bridges/FourchanBridge.php +++ b/bridges/FourchanBridge.php @@ -32,7 +32,7 @@ class FourchanBridge extends BridgeAbstract{ $this->returnError('You must specify the thread URL.', 400); $url = 'https://boards.4chan.org'.$thread['path'].''; - $html = file_get_html($url) or $this->returnError("Could not request 4chan, thread not found", 404); + $html = $this->file_get_html($url) or $this->returnError("Could not request 4chan, thread not found", 404); foreach($html->find('div.postContainer') as $element) { $item = new \Item(); diff --git a/bridges/FrandroidBridge.php b/bridges/FrandroidBridge.php index 80aac5db..8b1625c9 100644 --- a/bridges/FrandroidBridge.php +++ b/bridges/FrandroidBridge.php @@ -22,7 +22,7 @@ class FrandroidBridge extends BridgeAbstract } function FrandroidExtractContent($url) { - $html2 = file_get_html($url); + $html2 = $this->file_get_html($url); $html3 = $html2->find('div.post-content', 0); $html3->find('div.no-sidebar-ad-top', 0)->outertext = ''; $ret = $html3->find('div.shortcode-container'); @@ -35,7 +35,7 @@ class FrandroidBridge extends BridgeAbstract $text = strip_tags($text, '