From 3a6ccc4c296038c51370382de9899653764f3942 Mon Sep 17 00:00:00 2001 From: ORelio Date: Mon, 19 Dec 2016 20:04:32 +0100 Subject: [PATCH] Update Futura-Sciences field retrieval See #433 --- bridges/FuturaSciencesBridge.php | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/bridges/FuturaSciencesBridge.php b/bridges/FuturaSciencesBridge.php index 5f86497b..0b810668 100644 --- a/bridges/FuturaSciencesBridge.php +++ b/bridges/FuturaSciencesBridge.php @@ -89,7 +89,8 @@ class FuturaSciencesBridge extends FeedExpander { $article = getSimpleHTMLDOMCached($item['uri']) or returnServerError('Could not request Futura-Sciences: ' . $item['uri']); $item['content'] = $this->ExtractArticleContent($article); - $item['author'] = empty($this->ExtractAuthor($article)) ? $item['author'] : $this->ExtractAuthor($article); + $author = $this->ExtractAuthor($article); + $item['author'] = empty($author) ? $item['author'] : $author; return $item; } @@ -126,12 +127,16 @@ class FuturaSciencesBridge extends FeedExpander { } private function ExtractArticleContent($article){ - $contents = $article->find('section[class=module article-text article-text-classic bg-white]', 0)->innertext; + $contents = $article->find('section.article-text-classic', 0)->innertext; + $headline = trim($article->find('p.description', 0)->plaintext); + if (!empty($headline)) + $headline = '

'.$headline.'

'; foreach (array( '
StripRecursiveHTMLSection($contents , 'div', $div_start); } @@ -151,13 +157,14 @@ class FuturaSciencesBridge extends FeedExpander { $contents = $this->StripWithDelimiters($contents, 'fs:definition="', '"'); $contents = $this->StripWithDelimiters($contents, 'fs:xt:clicktype="', '"'); $contents = $this->StripWithDelimiters($contents, 'fs:xt:clickname="', '"'); + $contents = $this->StripWithDelimiters($contents, '