From d80efed1f43ccd810c90ea6654cc25e3de01b95d Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Wed, 24 Aug 2016 18:09:34 +0200 Subject: [PATCH 01/15] [Bridge] Use space indentation everywhere --- lib/Bridge.php | 82 +++++++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index 05fe77e3..9d195b1a 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -17,12 +17,12 @@ abstract class BridgeAbstract implements BridgeInterface{ protected $cache; protected $items = array(); - public $name = "Unnamed bridge"; - public $uri = ""; - public $description = 'No description provided'; - public $maintainer = 'No maintainer'; - public $useProxy = true; - public $parameters = array(); + public $name = "Unnamed bridge"; + public $uri = ""; + public $description = 'No description provided'; + public $maintainer = 'No maintainer'; + public $useProxy = true; + public $parameters = array(); /** * Launch probative exception @@ -175,22 +175,22 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { */ public function get_cached($url) { $simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url); - // TODO build this from the variable given to Cache - $pageCacheDir = __DIR__ . '/../cache/'."pages/"; + // TODO build this from the variable given to Cache + $pageCacheDir = __DIR__ . '/../cache/'."pages/"; $filename = $pageCacheDir.$simplified_url; if (substr($filename, -1) == '/') { $filename = $filename."index.html"; } if(file_exists($filename)) { // $this->message("loading cached file from ".$filename." for page at url ".$url); - // TODO touch file and its parent, and try to do neighbour deletion + // TODO touch file and its parent, and try to do neighbour deletion $this->refresh_in_cache($pageCacheDir, $filename); $content=file_get_contents($filename); - } else { + } else { // $this->message("we have no local copy of ".$url." Downloading to ".$filename); $dir = substr($filename, 0, strrpos($filename, '/')); if(!is_dir($dir)) { -// $this->message("creating directories for ".$dir); +// $this->message("creating directories for ".$dir); mkdir($dir, 0777, true); } $content=$this->getContents($url); @@ -216,17 +216,17 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { } private function refresh_in_cache($pageCacheDir, $filename) { - $currentPath = $filename; - while(!$pageCacheDir==$currentPath) { - touch($currentPath); - $currentPath = dirname($currentPath); - } + $currentPath = $filename; + while(!$pageCacheDir==$currentPath) { + touch($currentPath); + $currentPath = dirname($currentPath); + } } public function remove_from_cache($url) { $simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url); - // TODO build this from the variable given to Cache - $pageCacheDir = __DIR__ . '/../cache/'."pages/"; + // TODO build this from the variable given to Cache + $pageCacheDir = __DIR__ . '/../cache/'."pages/"; $filename = realpath($pageCacheDir.$simplified_url); $this->message("removing from cache \"".$filename."\" WELL, NOT REALLY"); // filename is NO GOOD @@ -243,17 +243,17 @@ class Bridge{ throw new \LogicException('Please use ' . __CLASS__ . '::create for new object.'); } - /** - * Checks if a bridge is an instantiable bridge. - * @param string $nameBridge name of the bridge that you want to use - * @return true if it is an instantiable bridge, false otherwise. - */ - static public function isInstantiable($nameBridge) { + /** + * Checks if a bridge is an instantiable bridge. + * @param string $nameBridge name of the bridge that you want to use + * @return true if it is an instantiable bridge, false otherwise. + */ + static public function isInstantiable($nameBridge) { - $re = new ReflectionClass($nameBridge); - return $re->IsInstantiable(); + $re = new ReflectionClass($nameBridge); + return $re->IsInstantiable(); - } + } /** @@ -275,10 +275,10 @@ class Bridge{ require_once $pathBridge; - if(Bridge::isInstantiable($nameBridge)) { - return new $nameBridge(); + if(Bridge::isInstantiable($nameBridge)) { + return new $nameBridge(); } else { - return FALSE; + return FALSE; } } @@ -308,11 +308,11 @@ class Bridge{ * Lists the available bridges. * @return array List of the bridges */ - static public function listBridges() { + static public function listBridges() { - $pathDirBridge = self::getDir(); - $listBridge = array(); - $dirFiles = scandir($pathDirBridge); + $pathDirBridge = self::getDir(); + $listBridge = array(); + $dirFiles = scandir($pathDirBridge); if( $dirFiles !== false ){ foreach( $dirFiles as $fileName ) { @@ -322,17 +322,17 @@ class Bridge{ } } - return $listBridge; - } - static function isWhitelisted( $whitelist, $name ) { + return $listBridge; + } + static function isWhitelisted( $whitelist, $name ) { if(in_array($name, $whitelist) or in_array($name.'.php', $whitelist) or // DEPRECATED: the nameBridge notation will be removed in future releases in_array($name.'Bridge', $whitelist) or in_array($name.'Bridge.php', $whitelist) or count($whitelist) === 1 and trim($whitelist[0]) === '*') - return TRUE; - else - return FALSE; - } + return TRUE; + else + return FALSE; + } } From c0c4759cdea9b131cf25ed716840c54062332e11 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Wed, 24 Aug 2016 18:19:42 +0200 Subject: [PATCH 02/15] [Bridge] Enable all calls to message($) The 'message' function will only execute in debug mode, so no harm done if left active. --- lib/Bridge.php | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index 9d195b1a..3ac5cd25 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -182,15 +182,15 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { $filename = $filename."index.html"; } if(file_exists($filename)) { -// $this->message("loading cached file from ".$filename." for page at url ".$url); + $this->message("loading cached file from ".$filename." for page at url ".$url); // TODO touch file and its parent, and try to do neighbour deletion $this->refresh_in_cache($pageCacheDir, $filename); $content=file_get_contents($filename); } else { -// $this->message("we have no local copy of ".$url." Downloading to ".$filename); + $this->message("we have no local copy of ".$url." Downloading to ".$filename); $dir = substr($filename, 0, strrpos($filename, '/')); if(!is_dir($dir)) { -// $this->message("creating directories for ".$dir); + $this->message("creating directories for ".$dir); mkdir($dir, 0777, true); } $content=$this->getContents($url); @@ -346,13 +346,13 @@ abstract class RssExpander extends HttpCachingBridgeAbstract{ if (empty($name)) { $this->returnServerError('There is no $name for this RSS expander'); } -// $this->message("Loading from ".$param['url']); + $this->message("Loading from ".$param['url']); // Notice WE DO NOT use cache here on purpose : we want a fresh view of the RSS stream each time $content=$this->getContents($name) or $this->returnServerError('Could not request '.$name); $rssContent = simplexml_load_string($content); - // $this->message("loaded RSS from ".$param['url']); + $this->message("loaded RSS from ".$param['url']); // TODO insert RSS format detection // we suppose for now, we have some RSS 2.0 $this->collect_RSS_2_0_data($rssContent); @@ -360,10 +360,10 @@ abstract class RssExpander extends HttpCachingBridgeAbstract{ protected function collect_RSS_2_0_data($rssContent) { $rssContent = $rssContent->channel[0]; -// $this->message("RSS content is ===========\n".var_export($rssContent, true)."==========="); + $this->message("RSS content is ===========\n".var_export($rssContent, true)."==========="); $this->load_RSS_2_0_feed_data($rssContent); foreach($rssContent->item as $item) { -// $this->message("parsing item ".var_export($item, true)); + $this->message("parsing item ".var_export($item, true)); $this->items[] = $this->parseRSSItem($item); } } From 1e9edf49dec6aff6bd1289be48a91393de4f6048 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Wed, 24 Aug 2016 18:20:49 +0200 Subject: [PATCH 03/15] [bridges] Activate all calls to the 'message' function --- bridges/Freenews.php | 4 ++-- bridges/Gawker.php | 8 ++++---- bridges/Les400Culs.php | 4 ++-- bridges/TheOatMealBridge.php | 2 +- bridges/WorldOfTanks.php | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/bridges/Freenews.php b/bridges/Freenews.php index 04cbdaf5..649089b1 100644 --- a/bridges/Freenews.php +++ b/bridges/Freenews.php @@ -21,14 +21,14 @@ class Freenews extends RssExpander { protected function parseRSSItem($newsItem) { $item = array(); $item['title'] = trim($newsItem->title); -// $this->message("item has for title \"".$item['title']."\""); + $this->message("item has for title \"".$item['title']."\""); if(empty($newsItem->guid)) { $item['uri'] = (string) $newsItem->link; } else { $item['uri'] = (string) $newsItem->guid; } // now load that uri from cache -// $this->message("now loading page ".$item['uri']); + $this->message("now loading page ".$item['uri']); $articlePage = str_get_html($this->get_cached($item['uri'])); $content = $articlePage->find('.post-container', 0); diff --git a/bridges/Gawker.php b/bridges/Gawker.php index e8c75819..c219b272 100644 --- a/bridges/Gawker.php +++ b/bridges/Gawker.php @@ -28,7 +28,7 @@ class Gawker extends RssExpander{ $this->name = $param['site']; $url = $this->toURI(strtolower($param['site'])); } -// $this->message("loading feed from ".$this->getURI()); + $this->message("loading feed from ".$this->getURI()); parent::collectExpandableDatas($param, $url); } @@ -37,10 +37,10 @@ class Gawker extends RssExpander{ $item['uri'] = trim($newsItem->link); $item['title'] = trim($newsItem->title); $item['timestamp'] = $this->RSS_2_0_time_to_timestamp($newsItem); -// $this->message("///////////////////////////////////////////////////////////////////////////////////////\nprocessing item ".var_export($item, true)."\n\n\nbuilt from\n\n\n".var_export($newsItem, true)); + $this->message("///////////////////////////////////////////////////////////////////////////////////////\nprocessing item ".var_export($item, true)."\n\n\nbuilt from\n\n\n".var_export($newsItem, true)); try { // now load that uri from cache -// $this->message("loading page ".$item['uri']); + $this->message("loading page ".$item['uri']); $articlePage = str_get_html($this->get_cached($item['uri'])); if(is_object($articlePage)) { $content = $articlePage->find('.post-content', 0); @@ -51,7 +51,7 @@ class Gawker extends RssExpander{ $item['author'] = $authorLink->innertext; // TODO use author link href to fill the feed info } -// $this->message("item quite loaded : ".var_export($item, true)); + $this->message("item quite loaded : ".var_export($item, true)); // I set item content as last element, for easier var_export reading $item['content'] = $content->innertext; } else { diff --git a/bridges/Les400Culs.php b/bridges/Les400Culs.php index 49b2c1e1..f2d54a44 100644 --- a/bridges/Les400Culs.php +++ b/bridges/Les400Culs.php @@ -21,14 +21,14 @@ class Les400Culs extends RssExpander{ protected function parseRSSItem($newsItem) { $item = array(); $item['title'] = trim((string) $newsItem->title); -// $this->message("browsing item ".var_export($newsItem, true)); + $this->message("browsing item ".var_export($newsItem, true)); if(empty($newsItem->guid)) { $item['uri'] = (string) $newsItem->link; } else { $item['uri'] = (string) $newsItem->guid; } // now load that uri from cache -// $this->message("now loading page ".$item['uri']); + $this->message("now loading page ".$item['uri']); // $articlePage = str_get_html($this->get_cached($item['uri'])); // $content = $articlePage->find('.post-container', 0); diff --git a/bridges/TheOatMealBridge.php b/bridges/TheOatMealBridge.php index 4dfe8baf..5a580beb 100644 --- a/bridges/TheOatMealBridge.php +++ b/bridges/TheOatMealBridge.php @@ -24,7 +24,7 @@ class TheOatmealBridge extends RssExpander{ protected function collect_RSS_2_0_data($rssContent) { $rssContent->registerXPathNamespace("dc", "http://purl.org/dc/elements/1.1/"); $rssHeaderContent = $rssContent->channel[0]; -// $this->message("RSS content is ===========\n".var_export($rssHeaderContent, true)."==========="); + $this->message("RSS content is ===========\n".var_export($rssHeaderContent, true)."==========="); $this->load_RSS_2_0_feed_data($rssHeaderContent); foreach($rssContent->item as $item) { $this->message("parsing item ".var_export($item, true)); diff --git a/bridges/WorldOfTanks.php b/bridges/WorldOfTanks.php index 32e73dfc..d093394f 100644 --- a/bridges/WorldOfTanks.php +++ b/bridges/WorldOfTanks.php @@ -57,7 +57,7 @@ class WorldOfTanks extends HttpCachingBridgeAbstract{ $item = array(); $item['uri'] = WORLD_OF_TANKS.$infoLink->href; // now load that uri from cache -// $this->message("loading page ".$item['uri']); + $this->message("loading page ".$item['uri']); $articlePage = str_get_html($this->get_cached($item['uri'])); $content = $articlePage->find('.l-content', 0); HTMLSanitizer::defaultImageSrcTo($content, WORLD_OF_TANKS); From 07f664b2fa80425c7bed691e2dd2dd1ee39eb972 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Wed, 24 Aug 2016 18:32:31 +0200 Subject: [PATCH 04/15] [Bridge] Remove duplicate definition of member variables --- lib/Bridge.php | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index 3ac5cd25..dd8fa22e 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -338,10 +338,6 @@ class Bridge{ abstract class RssExpander extends HttpCachingBridgeAbstract{ - public $name; - public $uri; - public $description; - public function collectExpandableDatas(array $param, $name){ if (empty($name)) { $this->returnServerError('There is no $name for this RSS expander'); From 237a26e4fff4968c826297073fb8f696fa9930ec Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Wed, 24 Aug 2016 19:06:07 +0200 Subject: [PATCH 05/15] [bridges] Rename bridges that are missing 'Bridges' The word 'Bridge' is a requirement for the filename and class name. Also documented here (now): https://github.com/rss-bridge/rss-bridge/wiki/how-to-create-a-new-bridge --- bridges/{Freenews.php => FreenewsBridge.php} | 2 +- bridges/{Gawker.php => GawkerBridge.php} | 2 +- bridges/{Les400Culs.php => Les400CulsBridge.php} | 2 +- bridges/{WorldOfTanks.php => WorldOfTanksBridge.php} | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) rename bridges/{Freenews.php => FreenewsBridge.php} (97%) rename bridges/{Gawker.php => GawkerBridge.php} (98%) rename bridges/{Les400Culs.php => Les400CulsBridge.php} (96%) rename bridges/{WorldOfTanks.php => WorldOfTanksBridge.php} (97%) diff --git a/bridges/Freenews.php b/bridges/FreenewsBridge.php similarity index 97% rename from bridges/Freenews.php rename to bridges/FreenewsBridge.php index 649089b1..faf7330e 100644 --- a/bridges/Freenews.php +++ b/bridges/FreenewsBridge.php @@ -1,6 +1,6 @@ Date: Wed, 24 Aug 2016 19:33:38 +0200 Subject: [PATCH 06/15] [Bridge] Replace double quotes with single quotes This harmonizes the usage throughout the file. --- lib/Bridge.php | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index dd8fa22e..56633402 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -17,8 +17,8 @@ abstract class BridgeAbstract implements BridgeInterface{ protected $cache; protected $items = array(); - public $name = "Unnamed bridge"; - public $uri = ""; + public $name = 'Unnamed bridge'; + public $uri = ''; public $description = 'No description provided'; public $maintainer = 'No maintainer'; public $useProxy = true; @@ -111,9 +111,9 @@ abstract class BridgeAbstract implements BridgeInterface{ } $backtrace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 3); $calling = $backtrace[2]; - $message = $calling["file"].":".$calling["line"] - ." class ".get_class($this)."->".$calling["function"] - ." - ".$text; + $message = $calling['file'].':'.$calling['line'] + .' class '.get_class($this).'->'.$calling['function'] + .' - '.$text; error_log($message); } @@ -174,23 +174,23 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { * @return content of file as string */ public function get_cached($url) { - $simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url); + $simplified_url = str_replace(['http://', 'https://', '?', '&', '='], ['', '', '/', '/', '/'], $url); // TODO build this from the variable given to Cache - $pageCacheDir = __DIR__ . '/../cache/'."pages/"; + $pageCacheDir = __DIR__ . '/../cache/'.'pages/'; $filename = $pageCacheDir.$simplified_url; if (substr($filename, -1) == '/') { - $filename = $filename."index.html"; + $filename = $filename.'index.html'; } if(file_exists($filename)) { - $this->message("loading cached file from ".$filename." for page at url ".$url); + $this->message('loading cached file from '.$filename.' for page at url '.$url); // TODO touch file and its parent, and try to do neighbour deletion $this->refresh_in_cache($pageCacheDir, $filename); $content=file_get_contents($filename); } else { - $this->message("we have no local copy of ".$url." Downloading to ".$filename); + $this->message('we have no local copy of '.$url.' Downloading to '.$filename); $dir = substr($filename, 0, strrpos($filename, '/')); if(!is_dir($dir)) { - $this->message("creating directories for ".$dir); + $this->message('creating directories for '.$dir); mkdir($dir, 0777, true); } $content=$this->getContents($url); @@ -202,12 +202,12 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { } public function get_cached_time($url) { - $simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url); + $simplified_url = str_replace(['http://', 'https://', '?', '&', '='], ['', '', '/', '/', '/'], $url); // TODO build this from the variable given to Cache - $pageCacheDir = __DIR__ . '/../cache/'."pages/"; + $pageCacheDir = __DIR__ . '/../cache/'.'pages/'; $filename = $pageCacheDir.$simplified_url; if (substr($filename, -1) == '/') { - $filename = $filename."index.html"; + $filename = $filename.'index.html'; } if(!file_exists($filename)) { $this->get_cached($url); @@ -224,11 +224,11 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { } public function remove_from_cache($url) { - $simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url); + $simplified_url = str_replace(['http://', 'https://', '?', '&', '='], ['', '', '/', '/', '/'], $url); // TODO build this from the variable given to Cache - $pageCacheDir = __DIR__ . '/../cache/'."pages/"; + $pageCacheDir = __DIR__ . '/../cache/'.'pages/'; $filename = realpath($pageCacheDir.$simplified_url); - $this->message("removing from cache \"".$filename."\" WELL, NOT REALLY"); + $this->message('removing from cache \''.$filename.'\' WELL, NOT REALLY'); // filename is NO GOOD // unlink($filename); } @@ -342,13 +342,13 @@ abstract class RssExpander extends HttpCachingBridgeAbstract{ if (empty($name)) { $this->returnServerError('There is no $name for this RSS expander'); } - $this->message("Loading from ".$param['url']); + $this->message('Loading from '.$param['url']); // Notice WE DO NOT use cache here on purpose : we want a fresh view of the RSS stream each time $content=$this->getContents($name) or $this->returnServerError('Could not request '.$name); $rssContent = simplexml_load_string($content); - $this->message("loaded RSS from ".$param['url']); + $this->message('loaded RSS from '.$param['url']); // TODO insert RSS format detection // we suppose for now, we have some RSS 2.0 $this->collect_RSS_2_0_data($rssContent); @@ -356,10 +356,10 @@ abstract class RssExpander extends HttpCachingBridgeAbstract{ protected function collect_RSS_2_0_data($rssContent) { $rssContent = $rssContent->channel[0]; - $this->message("RSS content is ===========\n".var_export($rssContent, true)."==========="); + $this->message('RSS content is ===========\n'.var_export($rssContent, true).'==========='); $this->load_RSS_2_0_feed_data($rssContent); foreach($rssContent->item as $item) { - $this->message("parsing item ".var_export($item, true)); + $this->message('parsing item '.var_export($item, true)); $this->items[] = $this->parseRSSItem($item); } } From 429126e18a2fe4be89db058e76f05c44067e0876 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Wed, 24 Aug 2016 20:14:23 +0200 Subject: [PATCH 07/15] [Bridge] Cleanup file - Remove unnecessary documentation - Update/Clarify documentation where necessary - Remove empty lines - Put 'else' between closing and opening curly braces - Make sure curly braces start right after closing brace on functions '(){...' - Start lines with '.' and use proper indentation when using multi-line string combinations - Add spaces for function definitions/calls and assignments - Add space before opening curly brace after class definition 'class xyz {' --- lib/Bridge.php | 307 +++++++++++++++++++++++-------------------------- 1 file changed, 145 insertions(+), 162 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index 56633402..7f4f1bdc 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -1,10 +1,5 @@ items; } - - /** * Defined datas with parameters depending choose bridge - * Note : you can define a cache before with "setCache" + * Note : you can define a cache with "setCache" * @param array $param $_REQUEST, $_GET, $_POST, or array with bridge expected paramters */ public function setDatas(array $param){ - if( !is_null($this->cache) ){ + if(!is_null($this->cache)){ $this->cache->prepare($param); $time = $this->cache->getTime(); - } - else{ - $time = false; // No cache ? No time ! + } else { + $time = false; } - if( $time !== false && ( time() - $this->getCacheDuration() < $time ) ){ // Cache file has not expired. Serve it. + if($time !== false && (time() - $this->getCacheDuration() < $time)){ $this->items = $this->cache->loadData(); - } - else{ + } else { $this->collectData($param); - if( !is_null($this->cache) ){ // Cache defined ? We go to refresh is memory :D + if(!is_null($this->cache)){ $this->cache->saveData($this->getDatas()); } } } - /** - * Define default bridge name - */ public function getName(){ return $this->name; } - /** - * Define default bridge URI - */ public function getURI(){ return $this->uri; } - /** - * Define default duraction for cache - */ public function getCacheDuration(){ return 3600; } - /** - * Defined cache object to use - */ public function setCache(\CacheAbstract $cache){ $this->cache = $cache; return $this; } - public function message($text) { - if(!file_exists('DEBUG')){ - return; - } - $backtrace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 3); - $calling = $backtrace[2]; - $message = $calling['file'].':'.$calling['line'] - .' class '.get_class($this).'->'.$calling['function'] - .' - '.$text; - error_log($message); - } - - protected function getContents($url,$use_include_path=false,$context=null,$offset=0,$maxlen=null){ - $contextOptions = array( - 'http' => array( - 'user_agent'=>ini_get('user_agent') - ), - ); - - if(defined('PROXY_URL') && $this->useProxy) { - $contextOptions['http']['proxy'] = PROXY_URL; - $contextOptions['http']['request_fulluri'] = true; - - if(is_null($context)){ - $context = stream_context_create($contextOptions); - } else { - $prevContext=$context; - if(!stream_context_set_option($context,$contextOptions)){ - $context=$prevContext; - }; + public function message($text){ + if(!file_exists('DEBUG')) { + return; } - } - if(is_null($maxlen)){ - $content=@file_get_contents($url, $use_include_path, $context, $offset); - }else{ - $content=@file_get_contents($url, $use_include_path, $context, $offset,$maxlen); - } + $backtrace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 3); + $calling = $backtrace[2]; + $message = $calling['file'] . ':' + . $calling['line'] . ' class ' + . get_class($this) . '->' + . $calling['function'] . ' - ' + . $text; - if($content===false){ - $this->message('Cant\'t download '.$url ); - } - return $content; + error_log($message); } - protected function getSimpleHTMLDOM($url, $use_include_path = false, $context=null, $offset = 0, $maxLen=null, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT){ - $content=$this->getContents($url,$use_include_path,$context,$offset,$maxLen); - return str_get_html($content,$lowercase,$forceTagsClosed,$target_charset,$stripRN,$defaultBRText,$defaultSpanText); + protected function getContents($url, $use_include_path = false, $context = null, $offset = 0, $maxlen = null){ + $contextOptions = array( + 'http' => array( + 'user_agent' => ini_get('user_agent') + ), + ); + + if(defined('PROXY_URL') && $this->useProxy){ + $contextOptions['http']['proxy'] = PROXY_URL; + $contextOptions['http']['request_fulluri'] = true; + + if(is_null($context)){ + $context = stream_context_create($contextOptions); + } else { + $prevContext=$context; + if(!stream_context_set_option($context, $contextOptions)){ + $context = $prevContext; + } + } + } + + if(is_null($maxlen)){ + $content = @file_get_contents($url, $use_include_path, $context, $offset); + } else { + $content = @file_get_contents($url, $use_include_path, $context, $offset, $maxlen); + } + + if($content === false) + $this->message('Cant\'t download ' . $url); + + return $content; } + protected function getSimpleHTMLDOM($url, $use_include_path = false, $context = null, $offset = 0, $maxLen = null, $lowercase = true, $forceTagsClosed = true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN = true, $defaultBRText = DEFAULT_BR_TEXT, $defaultSpanText = DEFAULT_SPAN_TEXT){ + $content = $this->getContents($url, $use_include_path, $context, $offset, $maxLen); + return str_get_html($content, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText); + } } /** - * Extension of BridgeAbstract allowing caching of files downloaded over http files. - * This is specially useful for sites from Gawker or Liberation networks, which allow pages excerpts top be viewed together on index, while full pages have to be downloaded - * separately. - * This class mainly provides a get_cached method which will will download the file from its remote location. - * TODO allow file cache invalidation by touching files on access, and removing files/directories which have not been touched since ... a long time - * After all, rss-bridge is not respaw, isn't it ? + * Extension of BridgeAbstract allowing caching of files downloaded over http. + * TODO allow file cache invalidation by touching files on access, and removing + * files/directories which have not been touched since ... a long time */ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { /** - * Maintain locally cached versions of pages to download to avoid multiple doiwnloads. - * A file name is generated by replacing all "/" by "_", and the file is saved below this bridge cache + * Maintain locally cached versions of pages to download, to avoid multiple downloads. * @param url url to cache - * @return content of file as string + * @return content of the file as string */ - public function get_cached($url) { + public function get_cached($url){ $simplified_url = str_replace(['http://', 'https://', '?', '&', '='], ['', '', '/', '/', '/'], $url); + // TODO build this from the variable given to Cache - $pageCacheDir = __DIR__ . '/../cache/'.'pages/'; - $filename = $pageCacheDir.$simplified_url; - if (substr($filename, -1) == '/') { - $filename = $filename.'index.html'; + $pageCacheDir = __DIR__ . '/../cache/pages/'; + $filename = $pageCacheDir . $simplified_url; + + if(substr($filename, -1) == '/'){ + $filename = $filename . 'index.html'; } - if(file_exists($filename)) { - $this->message('loading cached file from '.$filename.' for page at url '.$url); + + if(file_exists($filename)){ + $this->message('loading cached file from ' . $filename . ' for page at url ' . $url); // TODO touch file and its parent, and try to do neighbour deletion $this->refresh_in_cache($pageCacheDir, $filename); - $content=file_get_contents($filename); + $content = file_get_contents($filename); } else { - $this->message('we have no local copy of '.$url.' Downloading to '.$filename); + $this->message('we have no local copy of ' . $url . ' Downloading to ' . $filename); $dir = substr($filename, 0, strrpos($filename, '/')); - if(!is_dir($dir)) { - $this->message('creating directories for '.$dir); + + if(!is_dir($dir)){ + $this->message('creating directories for ' . $dir); mkdir($dir, 0777, true); } - $content=$this->getContents($url); + + $content = $this->getContents($url); if($content!==false){ - file_put_contents($filename,$content); + file_put_contents($filename,$content); } } + return $content; } - public function get_cached_time($url) { + public function get_cached_time($url){ $simplified_url = str_replace(['http://', 'https://', '?', '&', '='], ['', '', '/', '/', '/'], $url); + // TODO build this from the variable given to Cache - $pageCacheDir = __DIR__ . '/../cache/'.'pages/'; - $filename = $pageCacheDir.$simplified_url; - if (substr($filename, -1) == '/') { - $filename = $filename.'index.html'; + $pageCacheDir = __DIR__ . '/../cache/pages/'; + $filename = $pageCacheDir . $simplified_url; + + if(substr($filename, -1) == '/'){ + $filename = $filename . 'index.html'; } - if(!file_exists($filename)) { + + if(!file_exists($filename)){ $this->get_cached($url); } + return filectime($filename); } - private function refresh_in_cache($pageCacheDir, $filename) { + private function refresh_in_cache($pageCacheDir, $filename){ $currentPath = $filename; - while(!$pageCacheDir==$currentPath) { + while(!$pageCacheDir == $currentPath){ touch($currentPath); $currentPath = dirname($currentPath); } } - public function remove_from_cache($url) { + public function remove_from_cache($url){ $simplified_url = str_replace(['http://', 'https://', '?', '&', '='], ['', '', '/', '/', '/'], $url); - // TODO build this from the variable given to Cache - $pageCacheDir = __DIR__ . '/../cache/'.'pages/'; - $filename = realpath($pageCacheDir.$simplified_url); - $this->message('removing from cache \''.$filename.'\' WELL, NOT REALLY'); - // filename is NO GOOD -// unlink($filename); - } + // TODO build this from the variable given to Cache + $pageCacheDir = __DIR__ . '/../cache/pages/'; + $filename = realpath($pageCacheDir . $simplified_url); + $this->message('removing from cache \'' . $filename . '\' WELL, NOT REALLY'); + // unlink($filename); + } } -class Bridge{ +class Bridge { static protected $dirBridge; @@ -248,46 +232,43 @@ class Bridge{ * @param string $nameBridge name of the bridge that you want to use * @return true if it is an instantiable bridge, false otherwise. */ - static public function isInstantiable($nameBridge) { - + static public function isInstantiable($nameBridge){ $re = new ReflectionClass($nameBridge); return $re->IsInstantiable(); - } - /** * Create a new bridge object * @param string $nameBridge Defined bridge name you want use * @return Bridge object dedicated */ static public function create($nameBridge){ - if( !preg_match('@^[A-Z][a-zA-Z0-9-]*$@', $nameBridge)){ + if(!preg_match('@^[A-Z][a-zA-Z0-9-]*$@', $nameBridge)){ throw new \InvalidArgumentException('Name bridge must be at least one uppercase follow or not by alphanumeric or dash characters.'); } - $nameBridge=$nameBridge.'Bridge'; + $nameBridge = $nameBridge . 'Bridge'; $pathBridge = self::getDir() . $nameBridge . '.php'; - if( !file_exists($pathBridge) ){ - throw new \Exception('The bridge you looking for does not exist. It should be at path '.$pathBridge); + if(!file_exists($pathBridge)){ + throw new \Exception('The bridge you looking for does not exist. It should be at path ' . $pathBridge); } require_once $pathBridge; - if(Bridge::isInstantiable($nameBridge)) { + if(Bridge::isInstantiable($nameBridge)){ return new $nameBridge(); } else { - return FALSE; + return false; } } static public function setDir($dirBridge){ - if( !is_string($dirBridge) ){ + if(!is_string($dirBridge)){ throw new \InvalidArgumentException('Dir bridge must be a string.'); } - if( !file_exists($dirBridge) ){ + if(!file_exists($dirBridge)){ throw new \Exception('Dir bridge does not exist.'); } @@ -297,7 +278,7 @@ class Bridge{ static public function getDir(){ $dirBridge = self::$dirBridge; - if( is_null($dirBridge) ){ + if(is_null($dirBridge)){ throw new \LogicException(__CLASS__ . ' class need to know bridge path !'); } @@ -308,68 +289,72 @@ class Bridge{ * Lists the available bridges. * @return array List of the bridges */ - static public function listBridges() { - + static public function listBridges(){ $pathDirBridge = self::getDir(); $listBridge = array(); $dirFiles = scandir($pathDirBridge); - if( $dirFiles !== false ){ - foreach( $dirFiles as $fileName ) { - if( preg_match('@^([^.]+)Bridge\.php$@U', $fileName, $out) ){ - $listBridge[] = $out[1]; + if($dirFiles !== false){ + foreach($dirFiles as $fileName){ + if(preg_match('@^([^.]+)Bridge\.php$@U', $fileName, $out)){ + $listBridge[] = $out[1]; + } } - } } return $listBridge; } - static function isWhitelisted( $whitelist, $name ) { - if(in_array($name, $whitelist) or in_array($name.'.php', $whitelist) or - // DEPRECATED: the nameBridge notation will be removed in future releases - in_array($name.'Bridge', $whitelist) or in_array($name.'Bridge.php', $whitelist) or - count($whitelist) === 1 and trim($whitelist[0]) === '*') - return TRUE; - else - return FALSE; - } + static function isWhitelisted($whitelist, $name){ + if(in_array($name, $whitelist) + or in_array($name . '.php', $whitelist) + or in_array($name . 'Bridge', $whitelist) // DEPRECATED + or in_array($name . 'Bridge.php', $whitelist) // DEPRECATED + or count($whitelist) === 1 and trim($whitelist[0]) === '*'){ + return true; + } else { + return false; + } + } } -abstract class RssExpander extends HttpCachingBridgeAbstract{ +abstract class RssExpander extends HttpCachingBridgeAbstract { public function collectExpandableDatas(array $param, $name){ - if (empty($name)) { + if(empty($name)){ $this->returnServerError('There is no $name for this RSS expander'); } - $this->message('Loading from '.$param['url']); - // Notice WE DO NOT use cache here on purpose : we want a fresh view of the RSS stream each time - $content=$this->getContents($name) or - $this->returnServerError('Could not request '.$name); + + $this->message('Loading from ' . $param['url']); + + /* Notice we do not use cache here on purpose: + * we want a fresh view of the RSS stream each time + */ + $content = $this->getContents($name) or $this->returnServerError('Could not request ' . $name); $rssContent = simplexml_load_string($content); - $this->message('loaded RSS from '.$param['url']); + $this->message('loaded RSS from ' . $param['url']); // TODO insert RSS format detection - // we suppose for now, we have some RSS 2.0 + // For now we always assume RSS 2.0 $this->collect_RSS_2_0_data($rssContent); } - protected function collect_RSS_2_0_data($rssContent) { + protected function collect_RSS_2_0_data($rssContent){ $rssContent = $rssContent->channel[0]; - $this->message('RSS content is ===========\n'.var_export($rssContent, true).'==========='); + $this->message('RSS content is ===========\n' . var_export($rssContent, true) . '==========='); $this->load_RSS_2_0_feed_data($rssContent); - foreach($rssContent->item as $item) { - $this->message('parsing item '.var_export($item, true)); + foreach($rssContent->item as $item){ + $this->message('parsing item ' . var_export($item, true)); $this->items[] = $this->parseRSSItem($item); } } - protected function RSS_2_0_time_to_timestamp($item) { + protected function RSS_2_0_time_to_timestamp($item){ return DateTime::createFromFormat('D, d M Y H:i:s e', $item->pubDate)->getTimestamp(); } // TODO set title, link, description, language, and so on - protected function load_RSS_2_0_feed_data($rssContent) { + protected function load_RSS_2_0_feed_data($rssContent){ $this->name = trim($rssContent->title); $this->uri = trim($rssContent->link); $this->description = trim($rssContent->description); @@ -382,9 +367,7 @@ abstract class RssExpander extends HttpCachingBridgeAbstract{ */ abstract protected function parseRSSItem($item); - public function getDescription() { + public function getDescription(){ return $this->description; } } - - From 9021a4b7cc910f94ca2939300b6b3ff233bbe03d Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Wed, 24 Aug 2016 20:17:26 +0200 Subject: [PATCH 08/15] [Bridge] Don't return object instance with 'setCache' --- lib/Bridge.php | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index 7f4f1bdc..0779aa58 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -77,8 +77,6 @@ abstract class BridgeAbstract implements BridgeInterface { public function setCache(\CacheAbstract $cache){ $this->cache = $cache; - - return $this; } public function message($text){ From 8252387386ed6c4b822fb9e6c29418ec67e54f89 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Wed, 24 Aug 2016 20:18:59 +0200 Subject: [PATCH 09/15] [Bridge] Rename 'message' to 'debugMessage' --- lib/Bridge.php | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index 0779aa58..e117d6a4 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -79,7 +79,7 @@ abstract class BridgeAbstract implements BridgeInterface { $this->cache = $cache; } - public function message($text){ + public function debugMessage($text){ if(!file_exists('DEBUG')) { return; } @@ -123,7 +123,7 @@ abstract class BridgeAbstract implements BridgeInterface { } if($content === false) - $this->message('Cant\'t download ' . $url); + $this->debugMessage('Cant\'t download ' . $url); return $content; } @@ -158,16 +158,16 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { } if(file_exists($filename)){ - $this->message('loading cached file from ' . $filename . ' for page at url ' . $url); + $this->debugMessage('loading cached file from ' . $filename . ' for page at url ' . $url); // TODO touch file and its parent, and try to do neighbour deletion $this->refresh_in_cache($pageCacheDir, $filename); $content = file_get_contents($filename); } else { - $this->message('we have no local copy of ' . $url . ' Downloading to ' . $filename); + $this->debugMessage('we have no local copy of ' . $url . ' Downloading to ' . $filename); $dir = substr($filename, 0, strrpos($filename, '/')); if(!is_dir($dir)){ - $this->message('creating directories for ' . $dir); + $this->debugMessage('creating directories for ' . $dir); mkdir($dir, 0777, true); } @@ -212,7 +212,7 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { // TODO build this from the variable given to Cache $pageCacheDir = __DIR__ . '/../cache/pages/'; $filename = realpath($pageCacheDir . $simplified_url); - $this->message('removing from cache \'' . $filename . '\' WELL, NOT REALLY'); + $this->debugMessage('removing from cache \'' . $filename . '\' WELL, NOT REALLY'); // unlink($filename); } } @@ -323,7 +323,7 @@ abstract class RssExpander extends HttpCachingBridgeAbstract { $this->returnServerError('There is no $name for this RSS expander'); } - $this->message('Loading from ' . $param['url']); + $this->debugMessage('Loading from ' . $param['url']); /* Notice we do not use cache here on purpose: * we want a fresh view of the RSS stream each time @@ -331,7 +331,7 @@ abstract class RssExpander extends HttpCachingBridgeAbstract { $content = $this->getContents($name) or $this->returnServerError('Could not request ' . $name); $rssContent = simplexml_load_string($content); - $this->message('loaded RSS from ' . $param['url']); + $this->debugMessage('loaded RSS from ' . $param['url']); // TODO insert RSS format detection // For now we always assume RSS 2.0 $this->collect_RSS_2_0_data($rssContent); @@ -339,10 +339,10 @@ abstract class RssExpander extends HttpCachingBridgeAbstract { protected function collect_RSS_2_0_data($rssContent){ $rssContent = $rssContent->channel[0]; - $this->message('RSS content is ===========\n' . var_export($rssContent, true) . '==========='); + $this->debugMessage('RSS content is ===========\n' . var_export($rssContent, true) . '==========='); $this->load_RSS_2_0_feed_data($rssContent); foreach($rssContent->item as $item){ - $this->message('parsing item ' . var_export($item, true)); + $this->debugMessage('parsing item ' . var_export($item, true)); $this->items[] = $this->parseRSSItem($item); } } From 5a7bc9b0c303744306020bb02870de4e3a101679 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Wed, 24 Aug 2016 20:19:30 +0200 Subject: [PATCH 10/15] [bridges] Fix all calls from 'message' to 'debugMessage' --- bridges/FreenewsBridge.php | 4 ++-- bridges/GawkerBridge.php | 10 +++++----- bridges/Les400CulsBridge.php | 4 ++-- bridges/TheOatMealBridge.php | 12 ++++++------ bridges/WorldOfTanksBridge.php | 4 ++-- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/bridges/FreenewsBridge.php b/bridges/FreenewsBridge.php index faf7330e..871d10bd 100644 --- a/bridges/FreenewsBridge.php +++ b/bridges/FreenewsBridge.php @@ -21,14 +21,14 @@ class FreenewsBridge extends RssExpander { protected function parseRSSItem($newsItem) { $item = array(); $item['title'] = trim($newsItem->title); - $this->message("item has for title \"".$item['title']."\""); + $this->debugMessage("item has for title \"".$item['title']."\""); if(empty($newsItem->guid)) { $item['uri'] = (string) $newsItem->link; } else { $item['uri'] = (string) $newsItem->guid; } // now load that uri from cache - $this->message("now loading page ".$item['uri']); + $this->debugMessage("now loading page ".$item['uri']); $articlePage = str_get_html($this->get_cached($item['uri'])); $content = $articlePage->find('.post-container', 0); diff --git a/bridges/GawkerBridge.php b/bridges/GawkerBridge.php index 66de62db..ea89af32 100644 --- a/bridges/GawkerBridge.php +++ b/bridges/GawkerBridge.php @@ -28,7 +28,7 @@ class GawkerBridge extends RssExpander{ $this->name = $param['site']; $url = $this->toURI(strtolower($param['site'])); } - $this->message("loading feed from ".$this->getURI()); + $this->debugMessage("loading feed from ".$this->getURI()); parent::collectExpandableDatas($param, $url); } @@ -37,10 +37,10 @@ class GawkerBridge extends RssExpander{ $item['uri'] = trim($newsItem->link); $item['title'] = trim($newsItem->title); $item['timestamp'] = $this->RSS_2_0_time_to_timestamp($newsItem); - $this->message("///////////////////////////////////////////////////////////////////////////////////////\nprocessing item ".var_export($item, true)."\n\n\nbuilt from\n\n\n".var_export($newsItem, true)); + $this->debugMessage("///////////////////////////////////////////////////////////////////////////////////////\nprocessing item ".var_export($item, true)."\n\n\nbuilt from\n\n\n".var_export($newsItem, true)); try { // now load that uri from cache - $this->message("loading page ".$item['uri']); + $this->debugMessage("loading page ".$item['uri']); $articlePage = str_get_html($this->get_cached($item['uri'])); if(is_object($articlePage)) { $content = $articlePage->find('.post-content', 0); @@ -51,14 +51,14 @@ class GawkerBridge extends RssExpander{ $item['author'] = $authorLink->innertext; // TODO use author link href to fill the feed info } - $this->message("item quite loaded : ".var_export($item, true)); + $this->debugMessage("item quite loaded : ".var_export($item, true)); // I set item content as last element, for easier var_export reading $item['content'] = $content->innertext; } else { throw new Exception("cache content for ".$item['uri']." is NOT a Simple DOM parser object !"); } } catch(Exception $e) { - $this->message("obtaining ".$item['uri']." resulted in exception ".$e->getMessage().". Deleting cached page ..."); + $this->debugMessage("obtaining ".$item['uri']." resulted in exception ".$e->getMessage().". Deleting cached page ..."); // maybe file is incorrect. it should be discarded from cache $this->remove_from_cache($item['url']); $item['content'] = $e->getMessage(); diff --git a/bridges/Les400CulsBridge.php b/bridges/Les400CulsBridge.php index 101b4032..695f6e90 100644 --- a/bridges/Les400CulsBridge.php +++ b/bridges/Les400CulsBridge.php @@ -21,14 +21,14 @@ class Les400CulsBridge extends RssExpander{ protected function parseRSSItem($newsItem) { $item = array(); $item['title'] = trim((string) $newsItem->title); - $this->message("browsing item ".var_export($newsItem, true)); + $this->debugMessage("browsing item ".var_export($newsItem, true)); if(empty($newsItem->guid)) { $item['uri'] = (string) $newsItem->link; } else { $item['uri'] = (string) $newsItem->guid; } // now load that uri from cache - $this->message("now loading page ".$item['uri']); + $this->debugMessage("now loading page ".$item['uri']); // $articlePage = str_get_html($this->get_cached($item['uri'])); // $content = $articlePage->find('.post-container', 0); diff --git a/bridges/TheOatMealBridge.php b/bridges/TheOatMealBridge.php index 5a580beb..a7bf308a 100644 --- a/bridges/TheOatMealBridge.php +++ b/bridges/TheOatMealBridge.php @@ -24,10 +24,10 @@ class TheOatmealBridge extends RssExpander{ protected function collect_RSS_2_0_data($rssContent) { $rssContent->registerXPathNamespace("dc", "http://purl.org/dc/elements/1.1/"); $rssHeaderContent = $rssContent->channel[0]; - $this->message("RSS content is ===========\n".var_export($rssHeaderContent, true)."==========="); + $this->debugMessage("RSS content is ===========\n".var_export($rssHeaderContent, true)."==========="); $this->load_RSS_2_0_feed_data($rssHeaderContent); foreach($rssContent->item as $item) { - $this->message("parsing item ".var_export($item, true)); + $this->debugMessage("parsing item ".var_export($item, true)); $this->items[] = $this->parseRSSItem($item); } } @@ -39,10 +39,10 @@ class TheOatmealBridge extends RssExpander{ $rdf = $newsItem->children($namespaces['rdf']); $item = array(); $item['title'] = trim($newsItem->title); - $this->message("browsing Oatmeal item ".var_export($newsItem, true)); + $this->debugMessage("browsing Oatmeal item ".var_export($newsItem, true)); $item['uri']=(string) $newsItem->attributes($namespaces['rdf'])->about; // now load that uri from cache - $this->message("now loading page ".$item['uri']); + $this->debugMessage("now loading page ".$item['uri']); $articlePage = str_get_html($this->get_cached($item['uri'])); $content = $articlePage->find('#comic', 0); @@ -51,10 +51,10 @@ class TheOatmealBridge extends RssExpander{ } $item['content'] = $content->innertext; - $this->message("dc content is ".var_export($dc, true)); + $this->debugMessage("dc content is ".var_export($dc, true)); $item['author'] = (string) $dc->creator; $item['timestamp'] = DateTime::createFromFormat(DateTime::ISO8601, $dc->date)->getTimestamp(); - $this->message("writtem by ".$item['author']." on ".$item['timestamp']); + $this->debugMessage("writtem by ".$item['author']." on ".$item['timestamp']); return $item; } diff --git a/bridges/WorldOfTanksBridge.php b/bridges/WorldOfTanksBridge.php index f6de74ad..b76c4834 100644 --- a/bridges/WorldOfTanksBridge.php +++ b/bridges/WorldOfTanksBridge.php @@ -45,7 +45,7 @@ class WorldOfTanksBridge extends HttpCachingBridgeAbstract{ $this->uri = WORLD_OF_TANKS.$this->lang.NEWS.'pc-browser/'.$param['category']."/"; } $html = $this->getSimpleHTMLDOM($this->getURI()) or $this->returnServerError('Could not request '.$this->getURI()); - $this->message("loaded HTML from ".$this->getURI()); + $this->debugMessage("loaded HTML from ".$this->getURI()); // customize name $this->name = $html->find('title', 0)->innertext; foreach($html->find('.b-imgblock_ico') as $infoLink) { @@ -57,7 +57,7 @@ class WorldOfTanksBridge extends HttpCachingBridgeAbstract{ $item = array(); $item['uri'] = WORLD_OF_TANKS.$infoLink->href; // now load that uri from cache - $this->message("loading page ".$item['uri']); + $this->debugMessage("loading page ".$item['uri']); $articlePage = str_get_html($this->get_cached($item['uri'])); $content = $articlePage->find('.l-content', 0); HTMLSanitizer::defaultImageSrcTo($content, WORLD_OF_TANKS); From 56cb116ce6cc5dd027a247fa4f2ba1bb54b4e0f3 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Wed, 24 Aug 2016 20:31:02 +0200 Subject: [PATCH 11/15] [Bridge] Format unreadable lines longer than 80 chars --- lib/Bridge.php | 43 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index e117d6a4..0dfe8f91 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -42,7 +42,8 @@ abstract class BridgeAbstract implements BridgeInterface { /** * Defined datas with parameters depending choose bridge * Note : you can define a cache with "setCache" - * @param array $param $_REQUEST, $_GET, $_POST, or array with bridge expected paramters + * @param array $param $_REQUEST, $_GET, $_POST, or array with expected + * bridge paramters */ public function setDatas(array $param){ if(!is_null($this->cache)){ @@ -87,15 +88,19 @@ abstract class BridgeAbstract implements BridgeInterface { $backtrace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 3); $calling = $backtrace[2]; $message = $calling['file'] . ':' - . $calling['line'] . ' class ' - . get_class($this) . '->' - . $calling['function'] . ' - ' - . $text; + . $calling['line'] . ' class ' + . get_class($this) . '->' + . $calling['function'] . ' - ' + . $text; error_log($message); } - protected function getContents($url, $use_include_path = false, $context = null, $offset = 0, $maxlen = null){ + protected function getContents($url + , $use_include_path = false + , $context = null + , $offset = 0 + , $maxlen = null){ $contextOptions = array( 'http' => array( 'user_agent' => ini_get('user_agent') @@ -128,9 +133,25 @@ abstract class BridgeAbstract implements BridgeInterface { return $content; } - protected function getSimpleHTMLDOM($url, $use_include_path = false, $context = null, $offset = 0, $maxLen = null, $lowercase = true, $forceTagsClosed = true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN = true, $defaultBRText = DEFAULT_BR_TEXT, $defaultSpanText = DEFAULT_SPAN_TEXT){ + protected function getSimpleHTMLDOM($url + , $use_include_path = false + , $context = null + , $offset = 0 + , $maxLen = null + , $lowercase = true + , $forceTagsClosed = true + , $target_charset = DEFAULT_TARGET_CHARSET + , $stripRN = true + , $defaultBRText = DEFAULT_BR_TEXT + , $defaultSpanText = DEFAULT_SPAN_TEXT){ $content = $this->getContents($url, $use_include_path, $context, $offset, $maxLen); - return str_get_html($content, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText); + return str_get_html($content + , $lowercase + , $forceTagsClosed + , $target_charset + , $stripRN + , $defaultBRText + , $defaultSpanText); } } @@ -242,7 +263,11 @@ class Bridge { */ static public function create($nameBridge){ if(!preg_match('@^[A-Z][a-zA-Z0-9-]*$@', $nameBridge)){ - throw new \InvalidArgumentException('Name bridge must be at least one uppercase follow or not by alphanumeric or dash characters.'); + $message = << Date: Wed, 24 Aug 2016 20:35:19 +0200 Subject: [PATCH 12/15] [Bridge] Rename to --- lib/Bridge.php | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index 0dfe8f91..bd23006e 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -172,20 +172,20 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { // TODO build this from the variable given to Cache $pageCacheDir = __DIR__ . '/../cache/pages/'; - $filename = $pageCacheDir . $simplified_url; + $filepath = $pageCacheDir . $simplified_url; - if(substr($filename, -1) == '/'){ - $filename = $filename . 'index.html'; + if(substr($filepath, -1) == '/'){ + $filepath .= 'index.html'; } - if(file_exists($filename)){ - $this->debugMessage('loading cached file from ' . $filename . ' for page at url ' . $url); + if(file_exists($filepath)){ + $this->debugMessage('loading cached file from ' . $filepath . ' for page at url ' . $url); // TODO touch file and its parent, and try to do neighbour deletion - $this->refresh_in_cache($pageCacheDir, $filename); - $content = file_get_contents($filename); + $this->refresh_in_cache($pageCacheDir, $filepath); + $content = file_get_contents($filepath); } else { - $this->debugMessage('we have no local copy of ' . $url . ' Downloading to ' . $filename); - $dir = substr($filename, 0, strrpos($filename, '/')); + $this->debugMessage('we have no local copy of ' . $url . ' Downloading to ' . $filepath); + $dir = substr($filepath, 0, strrpos($filepath, '/')); if(!is_dir($dir)){ $this->debugMessage('creating directories for ' . $dir); @@ -193,8 +193,8 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { } $content = $this->getContents($url); - if($content!==false){ - file_put_contents($filename,$content); + if($content !== false){ + file_put_contents($filepath, $content); } } @@ -206,21 +206,21 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { // TODO build this from the variable given to Cache $pageCacheDir = __DIR__ . '/../cache/pages/'; - $filename = $pageCacheDir . $simplified_url; + $filepath = $pageCacheDir . $simplified_url; - if(substr($filename, -1) == '/'){ - $filename = $filename . 'index.html'; + if(substr($filepath, -1) == '/'){ + $filepath .= 'index.html'; } - if(!file_exists($filename)){ + if(!file_exists($filepath)){ $this->get_cached($url); } - return filectime($filename); + return filectime($filepath); } - private function refresh_in_cache($pageCacheDir, $filename){ - $currentPath = $filename; + private function refresh_in_cache($pageCacheDir, $filepath){ + $currentPath = $filepath; while(!$pageCacheDir == $currentPath){ touch($currentPath); $currentPath = dirname($currentPath); @@ -232,9 +232,9 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { // TODO build this from the variable given to Cache $pageCacheDir = __DIR__ . '/../cache/pages/'; - $filename = realpath($pageCacheDir . $simplified_url); - $this->debugMessage('removing from cache \'' . $filename . '\' WELL, NOT REALLY'); - // unlink($filename); + $filepath = realpath($pageCacheDir . $simplified_url); + $this->debugMessage('removing from cache \'' . $filepath . '\' WELL, NOT REALLY'); + // unlink($filepath); } } From b9b2428f63a2b38ba2677343466f487095329601 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Wed, 24 Aug 2016 20:44:21 +0200 Subject: [PATCH 13/15] [Bridge] Add function to build chache file path --- lib/Bridge.php | 51 +++++++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index bd23006e..b80dc61d 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -168,20 +168,14 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { * @return content of the file as string */ public function get_cached($url){ - $simplified_url = str_replace(['http://', 'https://', '?', '&', '='], ['', '', '/', '/', '/'], $url); - // TODO build this from the variable given to Cache - $pageCacheDir = __DIR__ . '/../cache/pages/'; - $filepath = $pageCacheDir . $simplified_url; - - if(substr($filepath, -1) == '/'){ - $filepath .= 'index.html'; - } + $cacheDir = __DIR__ . '/../cache/pages/'; + $filepath = $this->buildCacheFilePath($url, $cacheDir); if(file_exists($filepath)){ $this->debugMessage('loading cached file from ' . $filepath . ' for page at url ' . $url); // TODO touch file and its parent, and try to do neighbour deletion - $this->refresh_in_cache($pageCacheDir, $filepath); + $this->refresh_in_cache($cacheDir, $filepath); $content = file_get_contents($filepath); } else { $this->debugMessage('we have no local copy of ' . $url . ' Downloading to ' . $filepath); @@ -202,15 +196,9 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { } public function get_cached_time($url){ - $simplified_url = str_replace(['http://', 'https://', '?', '&', '='], ['', '', '/', '/', '/'], $url); - // TODO build this from the variable given to Cache - $pageCacheDir = __DIR__ . '/../cache/pages/'; - $filepath = $pageCacheDir . $simplified_url; - - if(substr($filepath, -1) == '/'){ - $filepath .= 'index.html'; - } + $cacheDir = __DIR__ . '/../cache/pages/'; + $filepath = $this->buildCacheFilePath($url, $cacheDir); if(!file_exists($filepath)){ $this->get_cached($url); @@ -219,20 +207,37 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { return filectime($filepath); } - private function refresh_in_cache($pageCacheDir, $filepath){ + private function refresh_in_cache($cacheDir, $filepath){ $currentPath = $filepath; - while(!$pageCacheDir == $currentPath){ + while(!$cacheDir == $currentPath){ touch($currentPath); $currentPath = dirname($currentPath); } } - public function remove_from_cache($url){ - $simplified_url = str_replace(['http://', 'https://', '?', '&', '='], ['', '', '/', '/', '/'], $url); + private function buildCacheFilePath($url, $cacheDir){ + $simplified_url = str_replace( + ['http://', 'https://', '?', '&', '='], + ['', '', '/', '/', '/'], + $url); + if(substr($cacheDir, -1) !== '/'){ + $cacheDir .= '/'; + } + + $filepath = $cacheDir . $simplified_url; + + if(substr($filepath, -1) === '/'){ + $filepath .= 'index.html'; + } + + return $filepath; + } + + public function remove_from_cache($url){ // TODO build this from the variable given to Cache - $pageCacheDir = __DIR__ . '/../cache/pages/'; - $filepath = realpath($pageCacheDir . $simplified_url); + $cacheDir = __DIR__ . '/../cache/pages/'; + $filepath = $this->buildCacheFilePath($url, $cacheDir); $this->debugMessage('removing from cache \'' . $filepath . '\' WELL, NOT REALLY'); // unlink($filepath); } From 95404b8fc452f7bd44ba3b976144a4edebadf015 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Wed, 24 Aug 2016 20:48:12 +0200 Subject: [PATCH 14/15] [Bridge] Move 'Bridge' class at top of the file --- lib/Bridge.php | 206 ++++++++++++++++++++++++------------------------- 1 file changed, 103 insertions(+), 103 deletions(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index b80dc61d..c7bd693c 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -1,4 +1,107 @@ IsInstantiable(); + } + + /** + * Create a new bridge object + * @param string $nameBridge Defined bridge name you want use + * @return Bridge object dedicated + */ + static public function create($nameBridge){ + if(!preg_match('@^[A-Z][a-zA-Z0-9-]*$@', $nameBridge)){ + $message = <<IsInstantiable(); - } - - /** - * Create a new bridge object - * @param string $nameBridge Defined bridge name you want use - * @return Bridge object dedicated - */ - static public function create($nameBridge){ - if(!preg_match('@^[A-Z][a-zA-Z0-9-]*$@', $nameBridge)){ - $message = << Date: Wed, 24 Aug 2016 20:50:32 +0200 Subject: [PATCH 15/15] [Bridge] Fix missing function scope --- lib/Bridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Bridge.php b/lib/Bridge.php index c7bd693c..41c71a9b 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -89,7 +89,7 @@ EOD; return $listBridge; } - static function isWhitelisted($whitelist, $name){ + static public function isWhitelisted($whitelist, $name){ if(in_array($name, $whitelist) or in_array($name . '.php', $whitelist) or in_array($name . 'Bridge', $whitelist) // DEPRECATED