Merge branch 'live'
This commit is contained in:
commit
0f6fdb0c5b
8 changed files with 359 additions and 85 deletions
lib
|
@ -71,6 +71,102 @@ abstract class BridgeAbstract implements BridgeInterface{
|
|||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set default image SRC attribute to point on given server when none is provided (that's to say when image src starts with '/'
|
||||
*/
|
||||
public function defaultImageSrcTo($content, $server) {
|
||||
foreach($content->find('img') as $image) {
|
||||
if(strpos($image->src, '/')==0) {
|
||||
$image->src = $server.$image->src;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extension of BridgeAbstract allowing caching of files downloaded over http files.
|
||||
* This is specially useful for sites from Gawker or Liberation networks, which allow pages excerpts top be viewed together on index, while full pages have to be downloaded
|
||||
* separately.
|
||||
* This class mainly provides a get_cached method which will will download the file from its remote location.
|
||||
* TODO allow file cache invalidation by touching files on access, and removing files/directories which have not been touched since ... a long time
|
||||
* After all, rss-bridge is not respaw, isn't it ?
|
||||
*/
|
||||
abstract class HttpCachingBridgeAbstract extends BridgeAbstract {
|
||||
|
||||
/**
|
||||
* Maintain locally cached versions of pages to download to avoid multiple doiwnloads.
|
||||
* A file name is generated by replacing all "/" by "_", and the file is saved below this bridge cache
|
||||
* @param url url to cache
|
||||
* @return content of file as string
|
||||
*/
|
||||
public function get_cached($url) {
|
||||
$simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url);
|
||||
// TODO build this from the variable given to Cache
|
||||
$pageCacheDir = __DIR__ . '/../cache/'."pages/";
|
||||
$filename = $pageCacheDir.$simplified_url;
|
||||
if (substr($filename, -1) == '/') {
|
||||
$filename = $filename."index.html";
|
||||
}
|
||||
if(file_exists($filename)) {
|
||||
// $this->message("loading cached file from ".$filename." for page at url ".$url);
|
||||
// TODO touch file and its parent, and try to do neighbour deletion
|
||||
$this->refresh_in_cache($pageCacheDir, $filename);
|
||||
} else {
|
||||
// $this->message("we have no local copy of ".$url." Downloading to ".$filename);
|
||||
$dir = substr($filename, 0, strrpos($filename, '/'));
|
||||
if(!is_dir($dir)) {
|
||||
// $this->message("creating directories for ".$dir);
|
||||
mkdir($dir, 0777, true);
|
||||
}
|
||||
$this->download_remote($url, $filename);
|
||||
}
|
||||
return file_get_contents($filename);
|
||||
}
|
||||
|
||||
private function refresh_in_cache($pageCacheDir, $filename) {
|
||||
$currentPath = $filename;
|
||||
while(!$pageCacheDir==$currentPath) {
|
||||
touch($currentPath);
|
||||
$currentPath = dirname($currentPath);
|
||||
}
|
||||
}
|
||||
|
||||
public function download_remote($url , $save_path) {
|
||||
$f = fopen( $save_path , 'w+');
|
||||
if($f) {
|
||||
$handle = fopen($url , "rb");
|
||||
if($handle) {
|
||||
while (!feof($handle)) {
|
||||
$contents = fread($handle, 8192);
|
||||
if($contents) {
|
||||
fwrite($f , $contents);
|
||||
}
|
||||
}
|
||||
fclose($handle);
|
||||
}
|
||||
fclose($f);
|
||||
}
|
||||
}
|
||||
|
||||
public function remove_from_cache($url) {
|
||||
$simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url);
|
||||
// TODO build this from the variable given to Cache
|
||||
$pageCacheDir = __DIR__ . '/../cache/'."pages/";
|
||||
$filename = realpath($pageCacheDir.$simplified_url);
|
||||
$this->message("removing from cache \"".$filename."\" WELL, NOT REALLY");
|
||||
// filename is NO GOOD
|
||||
// unlink($filename);
|
||||
}
|
||||
|
||||
public function message($text) {
|
||||
$backtrace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 3);
|
||||
$calling = $backtrace[2];
|
||||
$message = $calling["file"].":".$calling["line"]
|
||||
." class ".get_class($this)."->".$calling["function"]
|
||||
." - ".$text;
|
||||
error_log($message);
|
||||
}
|
||||
}
|
||||
|
||||
class Bridge{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue