[CourrierInternationalBridge] fix content parsing
Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>
This commit is contained in:
parent
ad534444fa
commit
3dcdaa1595
1 changed files with 10 additions and 4 deletions
|
@ -25,14 +25,20 @@ class CourrierInternationalBridge extends BridgeAbstract{
|
||||||
$item['uri'] = self::URI.$item['uri'];
|
$item['uri'] = self::URI.$item['uri'];
|
||||||
}
|
}
|
||||||
|
|
||||||
$page = $this->getSimpleHTMLDOM($item['uri']);
|
|
||||||
|
$page = $this->getSimpleHTMLDOMCached($item['uri']);
|
||||||
|
|
||||||
$cleaner = new HTMLSanitizer();
|
$cleaner = new HTMLSanitizer();
|
||||||
|
|
||||||
$item['content'] = $cleaner->sanitize($page->find("div.article-text")[0]);
|
$content = $page->find('.article-text',0);
|
||||||
$item['title'] = strip_tags($article->find(".title")[0]);
|
if(!$content){
|
||||||
|
$content = $page->find('.depeche-text',0);
|
||||||
|
}
|
||||||
|
|
||||||
$dateTime = date_parse($page->find("time")[0]);
|
$item['content'] = $cleaner->sanitize($content);
|
||||||
|
$item['title'] = strip_tags($article->find(".title",0));
|
||||||
|
|
||||||
|
$dateTime = date_parse($page->find("time",0));
|
||||||
|
|
||||||
$item['timestamp'] = mktime(
|
$item['timestamp'] = mktime(
|
||||||
$dateTime['hour'],
|
$dateTime['hour'],
|
||||||
|
|
Loading…
Reference in a new issue