[contents] Add server side caching for all requests (If-Modified-Since) (#889)

This commit adds a cache for 'getContents' to '/cache/server'. All
contents are cached by default (even in debug mode). If debug mode
is enabled, the cached data is overwritten on each request.

In normal mode RSS-Bridge adds the 'If-Modified-Since' header with
the timestamp from the previously cached data (if available) to the
request.

Find more information on 'If-Modified-Since' here:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since

If the server responds with "304 Not Modified", the cached data is
returned.

If the server responds with "200 OK", the received data is written
to the cache (creates a new cache file if it doesn't exist yet).

No changes were made for all other response codes.

Servers that don't support the 'If-Modified-Since' header, will
respond with "200 OK".

For servers that respond with "304 Not Modified", the required band-
width will decrease and RSS-Bridge will responding faster.

Files in the cache are forcefully removed after 24 hours.

Notice: Only few servers actually do support 'If-Modified-Since'.
Thus, most bridges won't be affected by this change.
This commit is contained in:
LogMANOriginal 2018-11-19 17:53:08 +01:00 committed by GitHub
parent 96a518c9e7
commit 7b261d1cc2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -14,6 +14,21 @@
/** /**
* Gets contents from the Internet. * Gets contents from the Internet.
* *
* **Content caching** (disabled in debug mode)
*
* A copy of the received content is stored in a local cache folder `server/` at
* {@see PATH_CACHE}. The `If-Modified-Since` header is added to the request, if
* the provided URL has been cached before.
*
* When the server responds with `304 Not Modified`, the cached data is returned.
* This will improve response times and reduce bandwidth for servers that support
* the `If-Modified-Since` header.
*
* Cached files are forcefully removed after 24 hours.
*
* @link https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since
* If-Modified-Since
*
* @param string $url The URL. * @param string $url The URL.
* @param array $header (optional) A list of cURL header. * @param array $header (optional) A list of cURL header.
* For more information follow the links below. * For more information follow the links below.
@ -29,6 +44,14 @@
function getContents($url, $header = array(), $opts = array()){ function getContents($url, $header = array(), $opts = array()){
Debug::log('Reading contents from "' . $url . '"'); Debug::log('Reading contents from "' . $url . '"');
// Initialize cache
$cache = Cache::create('FileCache');
$cache->setPath(PATH_CACHE . 'server/');
$cache->purgeCache(86400); // 24 hours (forced)
$params = [$url];
$cache->setParameters($params);
$ch = curl_init($url); $ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
@ -64,9 +87,22 @@ function getContents($url, $header = array(), $opts = array()){
// We always want the response header as part of the data! // We always want the response header as part of the data!
curl_setopt($ch, CURLOPT_HEADER, true); curl_setopt($ch, CURLOPT_HEADER, true);
// Build "If-Modified-Since" header
if(!Debug::isEnabled() && $time = $cache->getTime()) { // Skip if cache file doesn't exist
Debug::log('Adding If-Modified-Since');
curl_setopt($ch, CURLOPT_TIMEVALUE, $time);
curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
}
// Enables logging for the outgoing header
curl_setopt($ch, CURLINFO_HEADER_OUT, true);
$data = curl_exec($ch); $data = curl_exec($ch);
$curlError = curl_error($ch); $curlError = curl_error($ch);
$curlErrno = curl_errno($ch); $curlErrno = curl_errno($ch);
$curlInfo = curl_getinfo($ch);
Debug::log('Outgoing header: ' . json_encode($curlInfo));
if($data === false) if($data === false)
Debug::log('Cant\'t download ' . $url . ' cUrl error: ' . $curlError . ' (' . $curlErrno . ')'); Debug::log('Cant\'t download ' . $url . ' cUrl error: ' . $curlError . ' (' . $curlErrno . ')');
@ -80,25 +116,32 @@ function getContents($url, $header = array(), $opts = array()){
$headers = parseResponseHeader($header); $headers = parseResponseHeader($header);
$finalHeader = end($headers); $finalHeader = end($headers);
if($errorCode !== 200) { curl_close($ch);
if(array_key_exists('Server', $finalHeader) && strpos($finalHeader['Server'], 'cloudflare') !== false) { switch($errorCode) {
case 200: // Contents received
Debug::log('New contents received');
$data = substr($data, $headerSize);
$cache->saveData($data);
return $data;
case 304: // Not modified, use cached data
Debug::log('Contents not modified on host, returning cached data');
return $cache->loadData();
default:
if(array_key_exists('Server', $finalHeader) && strpos($finalHeader['Server'], 'cloudflare') !== false) {
returnServerError(<<< EOD returnServerError(<<< EOD
The server responded with a Cloudflare challenge, which is not supported by RSS-Bridge! The server responded with a Cloudflare challenge, which is not supported by RSS-Bridge!
If this error persists longer than a week, please consider opening an issue on GitHub! If this error persists longer than a week, please consider opening an issue on GitHub!
EOD EOD
); );
} }
returnError(<<<EOD returnError(<<<EOD
The requested resource cannot be found! The requested resource cannot be found!
Please make sure your input parameters are correct! Please make sure your input parameters are correct!
EOD EOD
, $errorCode); , $errorCode);
} }
curl_close($ch);
return substr($data, $headerSize);
} }
/** /**