From ee78e7613fa6ceeb7cfebf91a2b2fe0a202ad68b Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 25 Mar 2018 13:12:13 +0200 Subject: [PATCH] [contents] Replace file_get_contents by cURL cURL is a powerful library specifically designed to connect to many different types of servers with different types of protocols. For more detailed information refer to the PHP cURL manual: - http://php.net/manual/en/book.curl.php Due to this change some parameters for the getContents function were necessary (also applies to getSimpleHTMLDOM and getSimpleHTMLDOMCached): > $use_include_path removed This parameter has never been used and doesn't even make sense in this context; If set to true file_get_contents would also search for files in the include_path (specified in php.ini). > $context replaced by $header and $opts The $context parameter allowed for customization of the request in order to change how file_get_contents would acquire the data (i.e. using POST instead of GET, sending custom header, etc...) cURL also provides facilities to specify custom headers and change how it communicates to severs. cURL, however, is much more advanced. - $header is an optional parameter (empty by default). It receives an array of strings to send in the HTTP request header. See 'CURLOPT_HTTPHEADER': "An array of HTTP header fields to set, in the format array('Content-type: text/plain', 'Content-length: 100')" - php.net/manual/en/function.curl-setopt.php - $opts is an optional parameter (empty by default). It receives an array of options, where each option is a key-value-pair of a cURL option (CURLOPT_*) and it's associated parameter. This parameter accepts any of the CURLOPT_* settings. Example (sending POST instead of GET): $opts = array( CURLOPT_POST => 1, CURLOPT_POSTFIELDS => '&action=none' ); $html = getContents($url, array(), $opts); Refer to the cURL setopt manual for more information: - php.net/manual/en/function.curl-setopt.php > $offset and $maxlen removed These options were supported by file_get_contents, but there doesn't seem to be an equivalent in cURL. Since no caller uses them they are safe to remove. Compressed data / Encoding By using cURL instead of file_get_contents RSS-Bridge no longer has to handle compressed data manually. See 'CURLOPT_ENCODING': "[...] Supported encodings are "identity", "deflate", and "gzip". If an empty string, "", is set, a header containing all supported encoding types is sent." - http://php.net/manual/en/function.curl-setopt.php Notice: By default all encoding types are accepted (""). This can be changed by setting a custom option via $opts. Example: $opts = array(CURLOPT_ENCODING => 'gzip'); $html = getContents($url, array(), $opts); Proxy The proxy implementation should still work, but there doesn't seem to be an equivalent for 'request_fulluri = true'. To my understanding this isn't an issue because cURL knows how to handle proxy communication. --- lib/contents.php | 80 +++++++++++++----------------------------------- 1 file changed, 21 insertions(+), 59 deletions(-) diff --git a/lib/contents.php b/lib/contents.php index ec62c8db..f9b68d29 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -1,77 +1,41 @@ array( - 'user_agent' => ini_get('user_agent'), - 'accept_encoding' => 'gzip' - ) - ); +function getContents($url, $header = array(), $opts = array()){ + $ch = curl_init($url); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($ch, CURLOPT_HTTPHEADER, $header); + curl_setopt($ch, CURLOPT_USERAGENT, ini_get('user_agent')); + curl_setopt($ch, CURLOPT_ENCODING, ''); - if(defined('PROXY_URL') && !defined('NOPROXY')) { - $contextOptions['http']['proxy'] = PROXY_URL; - $contextOptions['http']['request_fulluri'] = true; - - if(is_null($context)) { - $context = stream_context_create($contextOptions); - } else { - $prevContext = $context; - if(!stream_context_set_option($context, $contextOptions)) { - $context = $prevContext; - } + if(is_array($opts)) { + foreach($opts as $key => $value) { + curl_setopt($ch, $key, $value); } } - if(is_null($maxlen)) { - $content = file_get_contents($url, $use_include_path, $context, $offset); - } else { - $content = file_get_contents($url, $use_include_path, $context, $offset, $maxlen); + if(defined('PROXY_URL') && !defined('NOPROXY')) { + curl_setopt($ch, CURLOPT_PROXY, PROXY_URL); } + $content = curl_exec($ch); + curl_close($ch); + if($content === false) debugMessage('Cant\'t download ' . $url); - // handle compressed data - foreach($http_response_header as $header) { - if(stristr($header, 'content-encoding')) { - switch(true) { - case stristr($header, 'gzip'): - $content = gzinflate(substr($content, 10, -8)); - break; - case stristr($header, 'compress'): - //TODO - case stristr($header, 'deflate'): - //TODO - case stristr($header, 'brotli'): - //TODO - returnServerError($header . '=> Not implemented yet'); - break; - case stristr($header, 'identity'): - break; - default: - returnServerError($header . '=> Unknown compression'); - } - } - } - return $content; } function getSimpleHTMLDOM($url, -$use_include_path = false, -$context = null, -$offset = 0, -$maxLen = null, +$header = array(), +$opts = array(), $lowercase = true, $forceTagsClosed = true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN = true, $defaultBRText = DEFAULT_BR_TEXT, $defaultSpanText = DEFAULT_SPAN_TEXT){ - $content = getContents($url, $use_include_path, $context, $offset, $maxLen); + $content = getContents($url, $header, $opts); return str_get_html($content, $lowercase, $forceTagsClosed, @@ -89,10 +53,8 @@ $defaultSpanText = DEFAULT_SPAN_TEXT){ */ function getSimpleHTMLDOMCached($url, $duration = 86400, -$use_include_path = false, -$context = null, -$offset = 0, -$maxLen = null, +$header = array(), +$opts = array(), $lowercase = true, $forceTagsClosed = true, $target_charset = DEFAULT_TARGET_CHARSET, @@ -116,7 +78,7 @@ $defaultSpanText = DEFAULT_SPAN_TEXT){ && (!defined('DEBUG') || DEBUG !== true)) { // Contents within duration $content = $cache->loadData(); } else { // Content not within duration - $content = getContents($url, $use_include_path, $context, $offset, $maxLen); + $content = getContents($url, $header, $opts); if($content !== false) { $cache->saveData($content); }