[LeMondeInformatique] Handle special articles (#1039)

Fix content extraction for special article compiling previous articles
This commit is contained in:
ORelio 2019-03-02 19:03:29 +01:00 committed by LogMANOriginal
parent b96c25a3af
commit d37f0c14a0

View file

@ -20,12 +20,13 @@ class LeMondeInformatiqueBridge extends FeedExpander {
str_replace(
'/grande/',
'/petite/',
$article_html->find('.article-image', 0)->find('img', 0)->src
$article_html->find('.article-image > img, figure > img', 0)->src
)
);
//No response header sets the encoding, explicit conversion is needed or subsequent xml_encode() will fail
$item['content'] = utf8_encode($this->cleanArticle($article_html->find('div.col-primary', 0)->innertext));
$content_node = $article_html->find('div.col-primary, div.col-sm-9', 0);
$item['content'] = utf8_encode($this->cleanArticle($content_node->innertext));
$item['author'] = utf8_encode($article_html->find('div.author-infos', 0)->find('b', 0)->plaintext);
return $item;