[PcGamerBridge] - Add all articles, full content and images (#1420)

This commit is contained in:
Anchit Bajaj 2020-02-27 02:40:09 +05:30 committed by GitHub
parent 90147fc45c
commit f0363ba03b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -2,22 +2,43 @@
class PcGamerBridge extends BridgeAbstract class PcGamerBridge extends BridgeAbstract
{ {
const NAME = 'PC Gamer'; const NAME = 'PC Gamer';
const URI = 'https://www.pcgamer.com/'; const URI = 'https://www.pcgamer.com/archive/';
const DESCRIPTION = 'PC Gamer Most Read Stories'; const DESCRIPTION = 'PC Gamer Most Read Stories';
const MAINTAINER = 'mdemoss'; const CACHE_TIMEOUT = 3600;
const MAINTAINER = 'IceWreck, mdemoss';
public function collectData() public function collectData()
{ {
$html = getSimpleHTMLDOMCached($this->getURI(), 300); $html = getSimpleHTMLDOMCached($this->getURI(), 300);
$stories = $html->find('div#popularcontent li.most-popular-item'); $stories = $html->find('ul.basic-list li.day-article');
$i = 0;
// Find induvidual stories in the archive page
foreach ($stories as $element) { foreach ($stories as $element) {
if($i == 15) break;
$item['uri'] = $element->find('a', 0)->href; $item['uri'] = $element->find('a', 0)->href;
// error_log(print_r($item['uri'], TRUE));
$articleHtml = getSimpleHTMLDOMCached($item['uri']); $articleHtml = getSimpleHTMLDOMCached($item['uri']);
$item['title'] = $element->find('h4 a', 0)->plaintext; $item['title'] = $element->find('a', 0)->plaintext;
$item['timestamp'] = strtotime($articleHtml->find('meta[name=pub_date]', 0)->content); $item['timestamp'] = strtotime($articleHtml->find('meta[name=pub_date]', 0)->content);
$item['content'] = $articleHtml->find('meta[name=description]', 0)->content; $item['author'] = $articleHtml->find('span.by-author a', 0)->plaintext;
$item['author'] = $articleHtml->find('a[itemprop=author]', 0)->plaintext;
// Get the article content
$articleContents = $articleHtml->find('#article-body', 0);
/*
By default the img src has a link to an error image and then the actual image
is added in by JS. So we replace the error image with the actual full size image
whoose link is in one of the attributes of the img tag
*/
foreach($articleContents->find('img') as $img) {
$imgsrc = $img->getAttribute('data-original-mos');
// error_log($imgsrc);
$img->src = $imgsrc;
}
$item['content'] = $articleContents;
$this->items[] = $item; $this->items[] = $item;
$i++;
} }
} }
} }