diff --git a/bridges/CpasbienBridge.php b/bridges/CpasbienBridge.php index d41d4423..862e58bb 100644 --- a/bridges/CpasbienBridge.php +++ b/bridges/CpasbienBridge.php @@ -33,7 +33,7 @@ class CpasbienBridge extends HttpCachingBridgeAbstract{ if ($episode->getAttribute('class')=='ligne0' || $episode->getAttribute('class')=='ligne1') { - $htmlepisode=str_get_html($this->get_cached($episode->find('a', 0)->getAttribute('href'))); + $htmlepisode=$this->get_cached($episode->find('a', 0)->getAttribute('href')); $item = array(); $item['author'] = $episode->find('a', 0)->text(); diff --git a/bridges/FreenewsBridge.php b/bridges/FreenewsBridge.php index 727f9f5f..0df3c6ca 100644 --- a/bridges/FreenewsBridge.php +++ b/bridges/FreenewsBridge.php @@ -25,7 +25,7 @@ class FreenewsBridge extends RssExpander { } // now load that uri from cache $this->debugMessage("now loading page ".$item['uri']); - $articlePage = str_get_html($this->get_cached($item['uri'])); + $articlePage = $this->get_cached($item['uri']); $content = $articlePage->find('.post-container', 0); $item['content'] = $content->innertext; diff --git a/bridges/GawkerBridge.php b/bridges/GawkerBridge.php index 60122208..ac52ae0c 100644 --- a/bridges/GawkerBridge.php +++ b/bridges/GawkerBridge.php @@ -45,7 +45,7 @@ class GawkerBridge extends RssExpander{ try { // now load that uri from cache $this->debugMessage("loading page ".$item['uri']); - $articlePage = str_get_html($this->get_cached($item['uri'])); + $articlePage = $this->get_cached($item['uri']); if(is_object($articlePage)) { $content = $articlePage->find('.post-content', 0); HTMLSanitizer::defaultImageSrcTo($content, $this->getURI()); diff --git a/bridges/JapanExpoBridge.php b/bridges/JapanExpoBridge.php index 7ac1f8a7..ea20592f 100644 --- a/bridges/JapanExpoBridge.php +++ b/bridges/JapanExpoBridge.php @@ -1,5 +1,5 @@ maintainer = 'Ginko'; @@ -64,7 +64,10 @@ class JapanExpoBridge extends BridgeAbstract{ if ($fullcontent) { if ($count < 5) { - $article_html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request JapanExpo: '.$url); + if($this->get_cached_time($url) <= strtotime('-24 hours')) + $this->remove_from_cache($url); + + $article_html = $this->get_cached($url) or $this->returnServerError('Could not request JapanExpo: '.$url); $header = $article_html->find('header.pageHeadBox', 0); $timestamp = strtotime($header->find('time', 0)->datetime); $title_html = $header->find('div.section', 0)->next_sibling(); diff --git a/bridges/KununuBridge.php b/bridges/KununuBridge.php index e81917d8..c4c2fa06 100644 --- a/bridges/KununuBridge.php +++ b/bridges/KununuBridge.php @@ -1,5 +1,5 @@ maintainer = "logmanoriginal"; $this->name = "Kununu Bridge"; /* This will be replaced later! */ @@ -248,7 +248,10 @@ class KununuBridge extends BridgeAbstract{ */ private function extract_full_description($uri){ // Load full article - $html = $this->getSimpleHTMLDOM($uri); + if($this->get_cached_time($uri) <= strtotime('-24 hours')) + $this->remove_from_cache($uri); + + $html = $this->get_cached($uri); if($html === false) $this->returnServerError('Could not load full description!'); diff --git a/bridges/Les400CulsBridge.php b/bridges/Les400CulsBridge.php index 2dd9883f..2925a290 100644 --- a/bridges/Les400CulsBridge.php +++ b/bridges/Les400CulsBridge.php @@ -29,7 +29,7 @@ class Les400CulsBridge extends RssExpander{ } // now load that uri from cache $this->debugMessage("now loading page ".$item['uri']); -// $articlePage = str_get_html($this->get_cached($item['uri'])); +// $articlePage = $this->get_cached($item['uri']); // $content = $articlePage->find('.post-container', 0); $item['content'] = (string) $newsItem->description; diff --git a/bridges/LichessBridge.php b/bridges/LichessBridge.php index 839b183a..1a340053 100644 --- a/bridges/LichessBridge.php +++ b/bridges/LichessBridge.php @@ -1,6 +1,6 @@ getSimpleHTMLDOM($blog_post_uri); + if($this->get_cached_time($blog_post_uri) <= strtotime('-24 hours')) + $this->remove_from_cache($blog_post_uriuri); + + $blog_post_html = $this->get_cached($blog_post_uri); $blog_post_div = $blog_post_html->find('#lichess_blog', 0); $post_chapo = $blog_post_div->find('.shortlede', 0)->innertext; diff --git a/bridges/NumeramaBridge.php b/bridges/NumeramaBridge.php index d9ae0835..132c1e6a 100644 --- a/bridges/NumeramaBridge.php +++ b/bridges/NumeramaBridge.php @@ -1,5 +1,5 @@ find('pubDate', 0)->plaintext); $article_url = NumeramaStripCDATA($element->find('guid', 0)->plaintext); - $article_html = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request Numerama: '.$article_url); + if($this->get_cached_time($article_url) <= strtotime('-24 hours')) + $this->remove_from_cache($article_url); + + $article_html = $this->get_cached($article_url) or $this->returnServerError('Could not request Numerama: '.$article_url); $contents = $article_html->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block $contents = ''; // add post picture $contents = $contents.$article_html->find('article[class=post-content]', 0)->innertext; // extract the post diff --git a/bridges/TheOatMealBridge.php b/bridges/TheOatMealBridge.php index c6ab0cd0..a152157b 100644 --- a/bridges/TheOatMealBridge.php +++ b/bridges/TheOatMealBridge.php @@ -43,7 +43,7 @@ class TheOatmealBridge extends RssExpander{ $item['uri']=(string) $newsItem->attributes($namespaces['rdf'])->about; // now load that uri from cache $this->debugMessage("now loading page ".$item['uri']); - $articlePage = str_get_html($this->get_cached($item['uri'])); + $articlePage = $this->get_cached($item['uri']); $content = $articlePage->find('#comic', 0); if($content==null) { diff --git a/bridges/WikipediaBridge.php b/bridges/WikipediaBridge.php index 7a284037..ea75441b 100644 --- a/bridges/WikipediaBridge.php +++ b/bridges/WikipediaBridge.php @@ -3,7 +3,7 @@ define('WIKIPEDIA_SUBJECT_TFA', 0); // Today's featured article define('WIKIPEDIA_SUBJECT_DYK', 1); // Did you know... -class WikipediaBridge extends BridgeAbstract{ +class WikipediaBridge extends HttpCachingBridgeAbstract { public function loadMetadatas(){ $this->maintainer = 'logmanoriginal'; $this->name = 'Wikipedia bridge for many languages'; @@ -188,7 +188,10 @@ class WikipediaBridge extends BridgeAbstract{ * Loads the full article from a given URI */ private function LoadFullArticle($uri){ - $content_html = $this->getSimpleHTMLDOM($uri); + if($this->get_cached_time($uri) <= strtotime('-24 hours')) + $this->remove_from_cache($uri); + + $content_html = $this->get_cached($uri); if(!$content_html) $this->returnServerError('Could not load site: ' . $uri . '!'); diff --git a/bridges/WordPressBridge.php b/bridges/WordPressBridge.php index 370b772c..271d2cba 100644 --- a/bridges/WordPressBridge.php +++ b/bridges/WordPressBridge.php @@ -1,7 +1,7 @@ find('updated', 0)->innertext); } - $article_html = $this->getSimpleHTMLDOM($item['uri']); + if($this->get_cached_time($item['uri']) <= strtotime('-24 hours')) + $this->remove_from_cache($item['uri']); + + $article_html = $this->get_cached($item['uri']); // Attempt to find most common content div if(!isset($item['content'])){ diff --git a/bridges/WorldOfTanksBridge.php b/bridges/WorldOfTanksBridge.php index 98f27bf0..e327685b 100644 --- a/bridges/WorldOfTanksBridge.php +++ b/bridges/WorldOfTanksBridge.php @@ -58,7 +58,7 @@ class WorldOfTanksBridge extends HttpCachingBridgeAbstract{ $item['uri'] = $this->uri.$infoLink->href; // now load that uri from cache $this->debugMessage("loading page ".$item['uri']); - $articlePage = str_get_html($this->get_cached($item['uri'])); + $articlePage = $this->get_cached($item['uri']); $content = $articlePage->find('.l-content', 0); HTMLSanitizer::defaultImageSrcTo($content, $this->uri); $item['title'] = $content->find('h1', 0)->innertext; diff --git a/lib/Bridge.php b/lib/Bridge.php index 55c85c06..7f4adee0 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -419,7 +419,7 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { } } - return $content; + return str_get_html($content); } public function get_cached_time($url){ @@ -465,8 +465,8 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { // TODO build this from the variable given to Cache $cacheDir = __DIR__ . '/../cache/pages/'; $filepath = $this->buildCacheFilePath($url, $cacheDir); - $this->debugMessage('removing from cache \'' . $filepath . '\' WELL, NOT REALLY'); - // unlink($filepath); + $this->debugMessage('removing from cache \'' . $filepath . '\''); + unlink($filepath); } }