From 36d39d3f599ef2221455b7aab3d40a39280e8ac4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= Date: Sat, 25 Jun 2016 12:12:54 +0200 Subject: [PATCH] implement proxy feature without modifying simple_html_dom code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pierre Mazière --- lib/Bridge.php | 40 +++++++++++++++--------- vendor/simplehtmldom/simple_html_dom.php | 4 +-- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index 3a104a1b..8cc0bee9 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -90,6 +90,31 @@ abstract class BridgeAbstract implements BridgeInterface{ return $this; } + protected function file_get_html($url, $use_include_path = false, $context=null, $offset = -1, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT){ + $contextOptions = array( + 'http' => array( + 'user_agent'=>ini_get('user_agent') + ), + ); + + if(defined('PROXY_URL')) { + $contextOptions['http']['proxy'] = PROXY_URL; + $contextOptions['http']['request_fulluri'] = true; + + if(is_null($context)){ + $context = stream_context_create($contextOptions); + } else { + $prevContext=$context; + if(!stream_context_set_option($context,$contextOptions)){ + $context=$prevContext; + }; + } + } + return file_get_html($url,$use_include_path,$context,$offset,$maxLen, + $lowercase,$forceTagsClosed,$target_charset,$stripRN,$defaultBRtext, + $defaultSpanText); + } + } /** @@ -354,19 +379,4 @@ abstract class RssExpander extends HttpCachingBridgeAbstract{ } } -function advanced_file_get_contents($url) { - if(defined('PROXY_URL')) { - $context = array( - 'http' => array( - 'proxy' => PROXY_URL, - 'request_fulluri' => true, - ), - ); - $context = stream_context_create($context); - return file_get_contents($url, false, $context); - } else { - return file_get_contents($url); - } - -} diff --git a/vendor/simplehtmldom/simple_html_dom.php b/vendor/simplehtmldom/simple_html_dom.php index 22aaa340..b5d30898 100644 --- a/vendor/simplehtmldom/simple_html_dom.php +++ b/vendor/simplehtmldom/simple_html_dom.php @@ -73,7 +73,7 @@ function file_get_html($url, $use_include_path = false, $context=null, $offset = // We DO force the tags to be terminated. $dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText); // For sourceforge users: uncomment the next line and comment the retreive_url_contents line 2 lines down if it is not already done. - $contents = advanced_file_get_contents($url, $use_include_path, $context, $offset); + $contents = file_get_contents($url, $use_include_path, $context, $offset); // Paperg - use our own mechanism for getting the contents as we want to control the timeout. //$contents = retrieve_url_contents($url); if (empty($contents) || strlen($contents) > MAX_FILE_SIZE) @@ -1094,7 +1094,7 @@ class simple_html_dom function load_file() { $args = func_get_args(); - $this->load(call_user_func_array('advanced_file_get_contents', $args), true); + $this->load(call_user_func_array('file_get_contents', $args), true); // Throw an error if we can't properly load the dom. if (($error=error_get_last())!==null) { $this->clear();