From de16ba234e576b08bfedfd9828df5857ad814711 Mon Sep 17 00:00:00 2001 From: Kevin Lagaisse Date: Wed, 20 May 2015 21:47:58 +0200 Subject: [PATCH] =?UTF-8?q?Ajout=20de=20la=20date=20de=20publication=20en?= =?UTF-8?q?=20utilisant=20la=20date=20de=20derni=C3=A8re=20modification=20?= =?UTF-8?q?du=20cache?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bridges/CpasbienBridge.php | 29 ++++++++++++++++++++++++----- lib/Bridge.php | 16 +++++++++++++++- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/bridges/CpasbienBridge.php b/bridges/CpasbienBridge.php index d2d1980b..bd78cbd7 100644 --- a/bridges/CpasbienBridge.php +++ b/bridges/CpasbienBridge.php @@ -10,7 +10,23 @@ * @maintainer lagaisse * @use1(q="keywords like this") */ -class CpasbienBridge extends BridgeAbstract{ + +// simple_html_dom funtion to get the dom from contents instead from file +function content_get_html($contents, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT) +{ + // We DO force the tags to be terminated. + $dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText); + + if (empty($contents) || strlen($contents) > MAX_FILE_SIZE) + { + return false; + } + // The second parameter can force the selectors to all be lowercase. + $dom->load($contents, $lowercase, $stripRN); + return $dom; +} + +class CpasbienBridge extends HttpCachingBridgeAbstract{ private $request; @@ -27,14 +43,16 @@ class CpasbienBridge extends BridgeAbstract{ foreach ($html->find('#gauche',0)->find('div') as $episode) { if ($episode->getAttribute('class')=='ligne0' || $episode->getAttribute('class')=='ligne1') { - $htmlepisode=file_get_html($episode->find('a', 0)->getAttribute('href')); + + $htmlepisode=content_get_html($this->get_cached($episode->find('a', 0)->getAttribute('href'))); $item = new \Item(); $item->name = $episode->find('a', 0)->text(); $item->title = $episode->find('a', 0)->text(); - $element=$htmlepisode->find('#textefiche', 0)->find('p',1); - if (isset($element)) { - $item->content = $element->text(); + $item->timestamp = $this->get_cached_time($episode->find('a', 0)->getAttribute('href')); + $textefiche=$htmlepisode->find('#textefiche', 0)->find('p',1); + if (isset($textefiche)) { + $item->content = $textefiche->text(); } else { $item->content = $htmlepisode->find('#textefiche', 0)->find('p',0)->text(); @@ -42,6 +60,7 @@ class CpasbienBridge extends BridgeAbstract{ $item->id = $episode->find('a', 0)->getAttribute('href'); $item->uri = $this->getURI() . $htmlepisode->find('#telecharger',0)->getAttribute('href'); + $item->thumbnailUri = $htmlepisode->find('#bigcover', 0)->find('img',0)->getAttribute('src'); $this->items[] = $item; } } diff --git a/lib/Bridge.php b/lib/Bridge.php index 41b5e498..fcb5fbae 100644 --- a/lib/Bridge.php +++ b/lib/Bridge.php @@ -123,7 +123,21 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract { } return file_get_contents($filename); } - + + public function get_cached_time($url) { + $simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url); + // TODO build this from the variable given to Cache + $pageCacheDir = __DIR__ . '/../cache/'."pages/"; + $filename = $pageCacheDir.$simplified_url; + if (substr($filename, -1) == '/') { + $filename = $filename."index.html"; + } + if(!file_exists($filename)) { + $this->get_cached($url); + } + return filectime($filename); + } + private function refresh_in_cache($pageCacheDir, $filename) { $currentPath = $filename; while(!$pageCacheDir==$currentPath) {