[contents] Add server side caching for all requests (If-Modified-Since) (#889)

This commit adds a cache for 'getContents' to '/cache/server'. All
contents are cached by default (even in debug mode). If debug mode
is enabled, the cached data is overwritten on each request.

In normal mode RSS-Bridge adds the 'If-Modified-Since' header with
the timestamp from the previously cached data (if available) to the
request.

Find more information on 'If-Modified-Since' here:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since

If the server responds with "304 Not Modified", the cached data is
returned.

If the server responds with "200 OK", the received data is written
to the cache (creates a new cache file if it doesn't exist yet).

No changes were made for all other response codes.

Servers that don't support the 'If-Modified-Since' header, will
respond with "200 OK".

For servers that respond with "304 Not Modified", the required band-
width will decrease and RSS-Bridge will responding faster.

Files in the cache are forcefully removed after 24 hours.

Notice: Only few servers actually do support 'If-Modified-Since'.
Thus, most bridges won't be affected by this change.
This commit is contained in:
LogMANOriginal 2018-11-19 17:53:08 +01:00 committed by GitHub
parent 96a518c9e7
commit 7b261d1cc2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -14,6 +14,21 @@
/** /**
* Gets contents from the Internet. * Gets contents from the Internet.
* *
* **Content caching** (disabled in debug mode)
*
* A copy of the received content is stored in a local cache folder `server/` at
* {@see PATH_CACHE}. The `If-Modified-Since` header is added to the request, if
* the provided URL has been cached before.
*
* When the server responds with `304 Not Modified`, the cached data is returned.
* This will improve response times and reduce bandwidth for servers that support
* the `If-Modified-Since` header.
*
* Cached files are forcefully removed after 24 hours.
*
* @link https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since
* If-Modified-Since
*
* @param string $url The URL. * @param string $url The URL.
* @param array $header (optional) A list of cURL header. * @param array $header (optional) A list of cURL header.
* For more information follow the links below. * For more information follow the links below.
@ -29,6 +44,14 @@
function getContents($url, $header = array(), $opts = array()){ function getContents($url, $header = array(), $opts = array()){
Debug::log('Reading contents from "' . $url . '"'); Debug::log('Reading contents from "' . $url . '"');
// Initialize cache
$cache = Cache::create('FileCache');
$cache->setPath(PATH_CACHE . 'server/');
$cache->purgeCache(86400); // 24 hours (forced)
$params = [$url];
$cache->setParameters($params);
$ch = curl_init($url); $ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
@ -64,9 +87,22 @@ function getContents($url, $header = array(), $opts = array()){
// We always want the response header as part of the data! // We always want the response header as part of the data!
curl_setopt($ch, CURLOPT_HEADER, true); curl_setopt($ch, CURLOPT_HEADER, true);
// Build "If-Modified-Since" header
if(!Debug::isEnabled() && $time = $cache->getTime()) { // Skip if cache file doesn't exist
Debug::log('Adding If-Modified-Since');
curl_setopt($ch, CURLOPT_TIMEVALUE, $time);
curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
}
// Enables logging for the outgoing header
curl_setopt($ch, CURLINFO_HEADER_OUT, true);
$data = curl_exec($ch); $data = curl_exec($ch);
$curlError = curl_error($ch); $curlError = curl_error($ch);
$curlErrno = curl_errno($ch); $curlErrno = curl_errno($ch);
$curlInfo = curl_getinfo($ch);
Debug::log('Outgoing header: ' . json_encode($curlInfo));
if($data === false) if($data === false)
Debug::log('Cant\'t download ' . $url . ' cUrl error: ' . $curlError . ' (' . $curlErrno . ')'); Debug::log('Cant\'t download ' . $url . ' cUrl error: ' . $curlError . ' (' . $curlErrno . ')');
@ -80,8 +116,18 @@ function getContents($url, $header = array(), $opts = array()){
$headers = parseResponseHeader($header); $headers = parseResponseHeader($header);
$finalHeader = end($headers); $finalHeader = end($headers);
if($errorCode !== 200) { curl_close($ch);
switch($errorCode) {
case 200: // Contents received
Debug::log('New contents received');
$data = substr($data, $headerSize);
$cache->saveData($data);
return $data;
case 304: // Not modified, use cached data
Debug::log('Contents not modified on host, returning cached data');
return $cache->loadData();
default:
if(array_key_exists('Server', $finalHeader) && strpos($finalHeader['Server'], 'cloudflare') !== false) { if(array_key_exists('Server', $finalHeader) && strpos($finalHeader['Server'], 'cloudflare') !== false) {
returnServerError(<<< EOD returnServerError(<<< EOD
The server responded with a Cloudflare challenge, which is not supported by RSS-Bridge! The server responded with a Cloudflare challenge, which is not supported by RSS-Bridge!
@ -96,9 +142,6 @@ Please make sure your input parameters are correct!
EOD EOD
, $errorCode); , $errorCode);
} }
curl_close($ch);
return substr($data, $headerSize);
} }
/** /**