[LeMondeInformatique] Handle special articles (#1039)

Fix content extraction for special article compiling previous articles
This commit is contained in:
ORelio 2019-03-02 19:03:29 +01:00 committed by LogMANOriginal
parent b96c25a3af
commit d37f0c14a0

View file

@ -20,12 +20,13 @@ class LeMondeInformatiqueBridge extends FeedExpander {
str_replace( str_replace(
'/grande/', '/grande/',
'/petite/', '/petite/',
$article_html->find('.article-image', 0)->find('img', 0)->src $article_html->find('.article-image > img, figure > img', 0)->src
) )
); );
//No response header sets the encoding, explicit conversion is needed or subsequent xml_encode() will fail //No response header sets the encoding, explicit conversion is needed or subsequent xml_encode() will fail
$item['content'] = utf8_encode($this->cleanArticle($article_html->find('div.col-primary', 0)->innertext)); $content_node = $article_html->find('div.col-primary, div.col-sm-9', 0);
$item['content'] = utf8_encode($this->cleanArticle($content_node->innertext));
$item['author'] = utf8_encode($article_html->find('div.author-infos', 0)->find('b', 0)->plaintext); $item['author'] = utf8_encode($article_html->find('div.author-infos', 0)->find('b', 0)->plaintext);
return $item; return $item;