[LeMondeInformatique] Handle special articles (#1039)
Fix content extraction for special article compiling previous articles
This commit is contained in:
parent
b96c25a3af
commit
d37f0c14a0
1 changed files with 3 additions and 2 deletions
|
@ -20,12 +20,13 @@ class LeMondeInformatiqueBridge extends FeedExpander {
|
||||||
str_replace(
|
str_replace(
|
||||||
'/grande/',
|
'/grande/',
|
||||||
'/petite/',
|
'/petite/',
|
||||||
$article_html->find('.article-image', 0)->find('img', 0)->src
|
$article_html->find('.article-image > img, figure > img', 0)->src
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
//No response header sets the encoding, explicit conversion is needed or subsequent xml_encode() will fail
|
//No response header sets the encoding, explicit conversion is needed or subsequent xml_encode() will fail
|
||||||
$item['content'] = utf8_encode($this->cleanArticle($article_html->find('div.col-primary', 0)->innertext));
|
$content_node = $article_html->find('div.col-primary, div.col-sm-9', 0);
|
||||||
|
$item['content'] = utf8_encode($this->cleanArticle($content_node->innertext));
|
||||||
$item['author'] = utf8_encode($article_html->find('div.author-infos', 0)->find('b', 0)->plaintext);
|
$item['author'] = utf8_encode($article_html->find('div.author-infos', 0)->find('b', 0)->plaintext);
|
||||||
|
|
||||||
return $item;
|
return $item;
|
||||||
|
|
Loading…
Reference in a new issue