Merge pull request #130 from lagaisse/CpasbienBridge

Cpasbien : Ajout de la date de publication en utilisant la date du cache
This commit is contained in:
Mitsu 2015-05-21 18:38:57 +02:00
commit 75585e7b52
2 changed files with 39 additions and 6 deletions

View file

@ -10,7 +10,23 @@
* @maintainer lagaisse * @maintainer lagaisse
* @use1(q="keywords like this") * @use1(q="keywords like this")
*/ */
class CpasbienBridge extends BridgeAbstract{
// simple_html_dom funtion to get the dom from contents instead from file
function content_get_html($contents, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT)
{
// We DO force the tags to be terminated.
$dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText);
if (empty($contents) || strlen($contents) > MAX_FILE_SIZE)
{
return false;
}
// The second parameter can force the selectors to all be lowercase.
$dom->load($contents, $lowercase, $stripRN);
return $dom;
}
class CpasbienBridge extends HttpCachingBridgeAbstract{
private $request; private $request;
@ -27,14 +43,16 @@ class CpasbienBridge extends BridgeAbstract{
foreach ($html->find('#gauche',0)->find('div') as $episode) { foreach ($html->find('#gauche',0)->find('div') as $episode) {
if ($episode->getAttribute('class')=='ligne0' || $episode->getAttribute('class')=='ligne1') if ($episode->getAttribute('class')=='ligne0' || $episode->getAttribute('class')=='ligne1')
{ {
$htmlepisode=file_get_html($episode->find('a', 0)->getAttribute('href'));
$htmlepisode=content_get_html($this->get_cached($episode->find('a', 0)->getAttribute('href')));
$item = new \Item(); $item = new \Item();
$item->name = $episode->find('a', 0)->text(); $item->name = $episode->find('a', 0)->text();
$item->title = $episode->find('a', 0)->text(); $item->title = $episode->find('a', 0)->text();
$element=$htmlepisode->find('#textefiche', 0)->find('p',1); $item->timestamp = $this->get_cached_time($episode->find('a', 0)->getAttribute('href'));
if (isset($element)) { $textefiche=$htmlepisode->find('#textefiche', 0)->find('p',1);
$item->content = $element->text(); if (isset($textefiche)) {
$item->content = $textefiche->text();
} }
else { else {
$item->content = $htmlepisode->find('#textefiche', 0)->find('p',0)->text(); $item->content = $htmlepisode->find('#textefiche', 0)->find('p',0)->text();
@ -42,6 +60,7 @@ class CpasbienBridge extends BridgeAbstract{
$item->id = $episode->find('a', 0)->getAttribute('href'); $item->id = $episode->find('a', 0)->getAttribute('href');
$item->uri = $this->getURI() . $htmlepisode->find('#telecharger',0)->getAttribute('href'); $item->uri = $this->getURI() . $htmlepisode->find('#telecharger',0)->getAttribute('href');
$item->thumbnailUri = $htmlepisode->find('#bigcover', 0)->find('img',0)->getAttribute('src');
$this->items[] = $item; $this->items[] = $item;
} }
} }

View file

@ -123,7 +123,21 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract {
} }
return file_get_contents($filename); return file_get_contents($filename);
} }
public function get_cached_time($url) {
$simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url);
// TODO build this from the variable given to Cache
$pageCacheDir = __DIR__ . '/../cache/'."pages/";
$filename = $pageCacheDir.$simplified_url;
if (substr($filename, -1) == '/') {
$filename = $filename."index.html";
}
if(!file_exists($filename)) {
$this->get_cached($url);
}
return filectime($filename);
}
private function refresh_in_cache($pageCacheDir, $filename) { private function refresh_in_cache($pageCacheDir, $filename) {
$currentPath = $filename; $currentPath = $filename;
while(!$pageCacheDir==$currentPath) { while(!$pageCacheDir==$currentPath) {