[EconomistBridge] Fixes for fetching new page structure (#1836)

This commit is contained in:
Tobias Alexander Franke 2020-11-29 10:31:20 +00:00 committed by GitHub
parent 7705d097e3
commit 56eb829a66
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -14,17 +14,28 @@ class EconomistBridge extends BridgeAbstract {
$html = getSimpleHTMLDOM(self::URI . '/latest/') $html = getSimpleHTMLDOM(self::URI . '/latest/')
or returnServerError('Could not fetch latest updates form The Economist.'); or returnServerError('Could not fetch latest updates form The Economist.');
foreach($html->find('article') as $element) { foreach($html->find('div.teaser') as $element) {
$a = $element->find('a', 0); $a = $element->find('a.headline-link', 0);
$href = $a->href;
if (substr($href, 0, 4) != 'http')
$href = self::URI . $a->href; $href = self::URI . $a->href;
$full = getSimpleHTMLDOMCached($href); $full = getSimpleHTMLDOMCached($href);
$article = $full->find('article', 0); $article = $full->find('article', 0);
$header = $article->find('span[itemprop="headline"]', 0);
$headerimg = $article->find('div[itemprop="image"]', 0)->find('img', 0);
$author = $article->find('p[itemprop="byline"]', 0);
$time = $article->find('time', 0);
$content = $article->find('div[itemprop="text"]', 0);
$section = array( $article->find('strong[itemprop="articleSection"]', 0)->plaintext );
$header = $article->find('h1', 0); // Author
$author = $article->find('span[itemprop="author"]', 0); if ($author)
$time = $article->find('time[itemprop="dateCreated"]', 0); $author = substr($author->innertext, 3, strlen($author));
$content = $article->find('div[itemprop="description"]', 0); else
$author = 'The Economist';
// Remove newsletter subscription box // Remove newsletter subscription box
$newsletter = $content->find('div[class="newsletter-form__message"]', 0); $newsletter = $content->find('div[class="newsletter-form__message"]', 0);
@ -40,19 +51,15 @@ class EconomistBridge extends BridgeAbstract {
if ($nextprev) if ($nextprev)
$nextprev->outertext = ''; $nextprev->outertext = '';
$section = array( $article->find('h3[itemprop="articleSection"]', 0)->plaintext );
$item = array(); $item = array();
$item['title'] = $header->find('span', 0)->innertext . ': ' $item['title'] = $header->innertext;
. $header->find('span', 1)->innertext;
$item['uri'] = $href; $item['uri'] = $href;
$item['timestamp'] = strtotime($time->datetime); $item['timestamp'] = strtotime($time->datetime);
$item['author'] = $author->innertext; $item['author'] = $author;
$item['categories'] = $section; $item['categories'] = $section;
$item['content'] = '<img style="max-width: 100%" src="' $item['content'] = '<img style="max-width: 100%" src="'
. $a->find('img', 0)->src . '">' . $content->innertext; . $headerimg->src . '">' . $content->innertext;
$this->items[] = $item; $this->items[] = $item;