Merge pull request #130 from lagaisse/CpasbienBridge

Cpasbien : Ajout de la date de publication en utilisant la date du cache
This commit is contained in:
Mitsu 2015-05-21 18:38:57 +02:00
commit 75585e7b52
2 changed files with 39 additions and 6 deletions

View file

@ -10,7 +10,23 @@
* @maintainer lagaisse
* @use1(q="keywords like this")
*/
class CpasbienBridge extends BridgeAbstract{
// simple_html_dom funtion to get the dom from contents instead from file
function content_get_html($contents, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT)
{
// We DO force the tags to be terminated.
$dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText);
if (empty($contents) || strlen($contents) > MAX_FILE_SIZE)
{
return false;
}
// The second parameter can force the selectors to all be lowercase.
$dom->load($contents, $lowercase, $stripRN);
return $dom;
}
class CpasbienBridge extends HttpCachingBridgeAbstract{
private $request;
@ -27,14 +43,16 @@ class CpasbienBridge extends BridgeAbstract{
foreach ($html->find('#gauche',0)->find('div') as $episode) {
if ($episode->getAttribute('class')=='ligne0' || $episode->getAttribute('class')=='ligne1')
{
$htmlepisode=file_get_html($episode->find('a', 0)->getAttribute('href'));
$htmlepisode=content_get_html($this->get_cached($episode->find('a', 0)->getAttribute('href')));
$item = new \Item();
$item->name = $episode->find('a', 0)->text();
$item->title = $episode->find('a', 0)->text();
$element=$htmlepisode->find('#textefiche', 0)->find('p',1);
if (isset($element)) {
$item->content = $element->text();
$item->timestamp = $this->get_cached_time($episode->find('a', 0)->getAttribute('href'));
$textefiche=$htmlepisode->find('#textefiche', 0)->find('p',1);
if (isset($textefiche)) {
$item->content = $textefiche->text();
}
else {
$item->content = $htmlepisode->find('#textefiche', 0)->find('p',0)->text();
@ -42,6 +60,7 @@ class CpasbienBridge extends BridgeAbstract{
$item->id = $episode->find('a', 0)->getAttribute('href');
$item->uri = $this->getURI() . $htmlepisode->find('#telecharger',0)->getAttribute('href');
$item->thumbnailUri = $htmlepisode->find('#bigcover', 0)->find('img',0)->getAttribute('src');
$this->items[] = $item;
}
}

View file

@ -123,7 +123,21 @@ abstract class HttpCachingBridgeAbstract extends BridgeAbstract {
}
return file_get_contents($filename);
}
public function get_cached_time($url) {
$simplified_url = str_replace(["http://", "https://", "?", "&", "="], ["", "", "/", "/", "/"], $url);
// TODO build this from the variable given to Cache
$pageCacheDir = __DIR__ . '/../cache/'."pages/";
$filename = $pageCacheDir.$simplified_url;
if (substr($filename, -1) == '/') {
$filename = $filename."index.html";
}
if(!file_exists($filename)) {
$this->get_cached($url);
}
return filectime($filename);
}
private function refresh_in_cache($pageCacheDir, $filename) {
$currentPath = $filename;
while(!$pageCacheDir==$currentPath) {