From 43b7621f45c501626fe06dd996609630465430e1 Mon Sep 17 00:00:00 2001
From: hollowleviathan <hollowleviathan@gmail.com>
Date: Fri, 29 Jan 2021 17:57:40 +0000
Subject: [PATCH] [ReutersBridge] Add bridge (#1653)

---
 bridges/ReutersBridge.php | 246 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 246 insertions(+)
 create mode 100644 bridges/ReutersBridge.php

diff --git a/bridges/ReutersBridge.php b/bridges/ReutersBridge.php
new file mode 100644
index 00000000..cb6b4e38
--- /dev/null
+++ b/bridges/ReutersBridge.php
@@ -0,0 +1,246 @@
+<?php
+class ReutersBridge extends BridgeAbstract
+{
+	const MAINTAINER = 'hollowleviathan, spraynard, csisoap';
+	const NAME = 'Reuters Bridge';
+	const URI = 'https://reuters.com/';
+	const CACHE_TIMEOUT = 1800; // 30min
+	const DESCRIPTION = 'Returns news from Reuters';
+
+	private $feedName = self::NAME;
+
+	/**
+	 * Wireitem types allowed in the final story output
+	 */
+	const ALLOWED_WIREITEM_TYPES = array(
+		'story',
+		'headlines'
+	);
+
+	/**
+	 * Wireitem template types allowed in the final story output
+	 */
+	const ALLOWED_TEMPLATE_TYPES = array(
+		'story'
+	);
+
+	const PARAMETERS = array(
+		array(
+			'feed' => array(
+				'name' => 'News Feed',
+				'type' => 'list',
+				'title' => 'Feeds from Reuters U.S/International edition',
+				'values' => array(
+					'Aerospace and Defense' => 'aerospace',
+					'Business' => 'business',
+					'China' => 'china',
+					'Energy' => 'energy',
+					'Entertainment' => 'chan:8ym8q8dl',
+					'Environment' => 'chan:6u4f0jgs',
+					'Health' => 'chan:8hw7807a',
+					'Lifestyle' => 'life',
+					'Markets' => 'markets',
+					'Politics' => 'politics',
+					'Science' => 'science',
+					'Special Reports' => 'special-reports',
+					'Sports' => 'sports',
+					'Tech' => 'tech',
+					'Top News' => 'home/topnews',
+					'UK' => 'chan:61leiu7j',
+					'USA News' => 'us',
+					'Wire' => 'wire',
+					'World' => 'world',
+				)
+			)
+		)
+	);
+
+	/**
+	 * Performs an HTTP request to the Reuters API and returns decoded JSON
+	 * in the form of an associative array
+	 * @param string $feed_uri Parameter string to the Reuters API
+	 * @return array
+	 */
+	private function getJson($feed_uri)
+	{
+		$uri = "https://wireapi.reuters.com/v8$feed_uri";
+		$returned_data = getContents($uri);
+		return json_decode($returned_data, true);
+	}
+
+	/**
+	 * Takes in data from Reuters Wire API and
+	 * creates structured data in the form of a list
+	 * of story information.
+	 * @param array $data JSON collected from the Reuters Wire API
+	 */
+	private function processData($data)
+	{
+		/**
+		 * Gets a list of wire items which are groups of templates
+		 */
+		$reuters_allowed_wireitems = array_filter(
+			$data, function ($wireitem) {
+				return in_array(
+					$wireitem['wireitem_type'],
+					self::ALLOWED_WIREITEM_TYPES
+				);
+			}
+		);
+
+		/*
+		* Gets a list of "Templates", which is data containing a story
+		*/
+		$reuters_wireitem_templates = array_reduce(
+			$reuters_allowed_wireitems,
+			function (array $carry, array $wireitem) {
+				$wireitem_templates = $wireitem['templates'];
+				return array_merge(
+					$carry,
+					array_filter(
+						$wireitem_templates, function (
+							array $template_data
+						) {
+							return in_array(
+								$template_data['type'],
+								self::ALLOWED_TEMPLATE_TYPES
+							);
+						}
+					)
+				);
+			},
+			array()
+		);
+
+		return $reuters_wireitem_templates;
+	}
+
+	private function getArticle($feed_uri)
+	{
+		// This will make another request to API to get full detail of article and author's name.
+		$rawData = $this->getJson($feed_uri);
+		$reuters_wireitems = $rawData['wireitems'];
+		$processedData = $this->processData($reuters_wireitems);
+
+		$first = reset($processedData);
+		$article_content = $first['story']['body_items'];
+		$authorlist = $first['story']['authors'];
+		$category = $first['story']['channel']['name'];
+		$image_list = $first['story']['images'];
+		$img_placeholder = '';
+
+		foreach($image_list as $image) { // Add more image to article.
+			$image_url = $image['url'];
+			$image_caption = $image['caption'];
+			$img = "<img src=\"$image_url\">";
+			$img_caption = "<figcaption style=\"text-align: center;\"><i>$image_caption</i></figcaption>";
+			$figure = "<figure>$img \t $img_caption</figure>";
+			$img_placeholder = $img_placeholder . $figure;
+		}
+
+		$author = '';
+		$counter = 0;
+		foreach ($authorlist as $data) {
+			//Formatting author's name.
+			$counter++;
+			$name = $data['name'];
+			if ($counter == count($authorlist)) {
+				$author = $author . $name;
+			} else {
+				$author = $author . "$name, ";
+			}
+		}
+
+		$description = '';
+		foreach ($article_content as $content) {
+			$data;
+			if(isset($content['content'])) {
+				$data = $content['content'];
+			}
+			switch($content['type']) {
+				case 'paragraph':
+					$description = $description . "<p>$data</p>";
+					break;
+				case 'heading':
+					$description = $description . "<h3>$data</h3>";
+					break;
+				case 'infographics':
+					$description = $description . "<img src=\"$data\">";
+					break;
+				case 'inline_items':
+					$item_list = $content['items'];
+					$description = $description . '<p>';
+					foreach ($item_list as $item) {
+						if($item['type'] == 'text') {
+							$description = $description . $item['content'];
+						} else {
+							$description = $description . $item['symbol'];
+						}
+					}
+					$description = $description . '</p>';
+					break;
+				case 'p_table':
+					$description = $description . $content['content'];
+					break;
+			}
+		}
+
+		$content_detail = array(
+			'content' => $description,
+			'author' => $author,
+			'category' => $category,
+			'images' => $img_placeholder,
+		);
+		return $content_detail;
+	}
+
+	public function getName() {
+		return $this->feedName;
+	}
+
+	public function collectData()
+	{
+		$reuters_feed_name = $this->getInput('feed');
+
+		if(strpos($reuters_feed_name, 'chan:') !== false) {
+			// Now checking whether that feed has unique ID or not.
+			$feed_uri = "/feed/rapp/us/wirefeed/$reuters_feed_name";
+		} else {
+			$feed_uri = "/feed/rapp/us/tabbar/feeds/$reuters_feed_name";
+		}
+
+		$data = $this->getJson($feed_uri);
+
+		$reuters_wireitems = $data['wireitems'];
+		$this->feedName = $data['wire_name'] . ' | Reuters';
+		$processedData = $this->processData($reuters_wireitems);
+
+		// Merge all articles from Editor's Highlight section into existing array of templates.
+		$top_section = reset($reuters_wireitems);
+		if ($top_section['wireitem_type'] == 'headlines') {
+			$top_articles = $top_section['templates'][1]['headlines'];
+			$processedData = array_merge($top_articles, $processedData);
+		}
+
+		foreach ($processedData as $story) {
+			$item['uid'] = $story['story']['usn'];
+			$article_uri = $story['template_action']['api_path'];
+			$content_detail = $this->getArticle($article_uri);
+			$description = $content_detail['content'];
+			$author = $content_detail['author'];
+			$images = $content_detail['images'];
+			$item['categories'] = array($content_detail['category']);
+			$item['author'] = $author;
+			if (!(bool) $description) {
+				$description = $story['story']['lede']; // Just in case the content doesn't have anything.
+			} else {
+				$item['content'] = "$description  $images";
+			}
+
+			$item['title'] = $story['story']['hed'];
+			$item['timestamp'] = $story['story']['updated_at'];
+			$item['uri'] = $story['template_action']['url'];
+			$this->items[] = $item;
+		}
+	}
+}