[GuardianBridge] - New bridge for the Guardian (#1249)

* [GuardianBridge] - New bridge for the Guardian
2019-08-28 19:57:45 +05:30 · 2019-08-28 19:57:45 +05:30 · f27b267614
commit f27b267614
parent 8bff63d9c6
1 changed files with 96 additions and 0 deletions
--- a/bridges/TheGuardianBridge.php
+++ b/bridges/TheGuardianBridge.php
@ -0,0 +1,96 @@
 <?php
 class TheGuardianBridge extends FeedExpander {
 	const MAINTAINER = 'IceWreck';
 	const NAME = 'The Guardian Bridge';
 	const URI = 'https://www.theguardian.com/';
 	const CACHE_TIMEOUT = 600; // This is a news site, so don't cache for more than 10 mins
 	const DESCRIPTION = 'RSS feed for The Guardian';
 	const PARAMETERS = array( array(
 		'feed' => array(
 			'name' => 'Feed',
 			'type' => 'list',
 			'values' => array(
 				'World News' => 'world/rss',
 				'US News' => '/us-news/rss',
 				'UK News' => '/uk-news/rss',
 				'Europe News' => '/world/europe-news/rss',
 				'Asia News' => '/world/asia/rss',
 				'Tech' => '/uk/technology/rss',
 				'Business News' => '/uk/business/rss',
 				'Opinion' => '/uk/commentisfree/rss',
 				'Lifestyle' => '/uk/lifeandstyle/rss',
 				'Culture' => '/uk/culture/rss',
 				'Sports' => '/uk/sport/rss'
 			)
 		)
 		/*
 		Topicwise Links
 		You can find the base feed for any topic by appending /rss to the url.
 		Example:
 		https://feeds.theguardian.com/theguardian/uk-news/rss
 		https://feeds.theguardian.com/theguardian/us-news/rss
 		Or simply
 		https://www.theguardian.com/world/rss
 		Just add that topic as a value in the PARAMETERS const.
 		*/
 	));
 	public function collectData(){
 		$feed = $this->getInput('feed');
 		$feedURL = 'https://feeds.theguardian.com/theguardian/' . $feed;
 		$this->collectExpandableDatas($feedURL, 10);
 	}
 	protected function parseItem($newsItem){
 		$item = parent::parseItem($newsItem);
 		// --- Recovering the article ---
 		// $articlePage gets the entire page's contents
 		$articlePage = getSimpleHTMLDOM($newsItem->link);
 		// figure contain's the main article image
 		$article = $articlePage->find('figure', 0);
 		// content__article-body has the actual article
 		foreach($articlePage->find('.content__article-body') as $element)
 			$article = $article . $element;
 		// --- Fixing ugly elements ---
 		// Replace the image viewer and BS with the image itself
 		foreach($articlePage->find('a.article__img-container') as $uslElementLoc) {
 			$main_img = $uslElementLoc->find('img', 0);
 			$article = str_replace($uslElementLoc, $main_img, $article);
 		}
 		// List of all the crap in the article
 		$uselessElements = array(
 			'#show-caption',
 			'.element-atom',
 			'.submeta',
 			'youtube-media-atom',
 			'svg'
 		);
 		// Remove the listed crap
 		foreach($uselessElements as $uslElement) {
 			foreach($articlePage->find($uslElement) as $uslElementLoc) {
 				$article = str_replace($uslElementLoc, '', $article);
 			}
 		}
 		$item['content'] = $article;
 		return $item;
 	}
 }