implement proxy feature without modifying simple_html_dom code

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>
This commit is contained in:
Pierre Mazière 2016-06-25 12:12:54 +02:00
parent 040f4da73d
commit 36d39d3f59
2 changed files with 27 additions and 17 deletions

View file

@ -90,6 +90,31 @@ abstract class BridgeAbstract implements BridgeInterface{
return $this; return $this;
} }
protected function file_get_html($url, $use_include_path = false, $context=null, $offset = -1, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT){
$contextOptions = array(
'http' => array(
'user_agent'=>ini_get('user_agent')
),
);
if(defined('PROXY_URL')) {
$contextOptions['http']['proxy'] = PROXY_URL;
$contextOptions['http']['request_fulluri'] = true;
if(is_null($context)){
$context = stream_context_create($contextOptions);
} else {
$prevContext=$context;
if(!stream_context_set_option($context,$contextOptions)){
$context=$prevContext;
};
}
}
return file_get_html($url,$use_include_path,$context,$offset,$maxLen,
$lowercase,$forceTagsClosed,$target_charset,$stripRN,$defaultBRtext,
$defaultSpanText);
}
} }
/** /**
@ -354,19 +379,4 @@ abstract class RssExpander extends HttpCachingBridgeAbstract{
} }
} }
function advanced_file_get_contents($url) {
if(defined('PROXY_URL')) {
$context = array(
'http' => array(
'proxy' => PROXY_URL,
'request_fulluri' => true,
),
);
$context = stream_context_create($context);
return file_get_contents($url, false, $context);
} else {
return file_get_contents($url);
}
}

View file

@ -73,7 +73,7 @@ function file_get_html($url, $use_include_path = false, $context=null, $offset =
// We DO force the tags to be terminated. // We DO force the tags to be terminated.
$dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText); $dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText);
// For sourceforge users: uncomment the next line and comment the retreive_url_contents line 2 lines down if it is not already done. // For sourceforge users: uncomment the next line and comment the retreive_url_contents line 2 lines down if it is not already done.
$contents = advanced_file_get_contents($url, $use_include_path, $context, $offset); $contents = file_get_contents($url, $use_include_path, $context, $offset);
// Paperg - use our own mechanism for getting the contents as we want to control the timeout. // Paperg - use our own mechanism for getting the contents as we want to control the timeout.
//$contents = retrieve_url_contents($url); //$contents = retrieve_url_contents($url);
if (empty($contents) || strlen($contents) > MAX_FILE_SIZE) if (empty($contents) || strlen($contents) > MAX_FILE_SIZE)
@ -1094,7 +1094,7 @@ class simple_html_dom
function load_file() function load_file()
{ {
$args = func_get_args(); $args = func_get_args();
$this->load(call_user_func_array('advanced_file_get_contents', $args), true); $this->load(call_user_func_array('file_get_contents', $args), true);
// Throw an error if we can't properly load the dom. // Throw an error if we can't properly load the dom.
if (($error=error_get_last())!==null) { if (($error=error_get_last())!==null) {
$this->clear(); $this->clear();