[MrssFormat] Rework to make it valid RSS 2.0 + Media RSS (#996)

This commit is contained in:
fulmeek 2019-02-06 17:18:33 +01:00 committed by LogMANOriginal
parent 32d4da8b76
commit 80f6a8b3d4
6 changed files with 300 additions and 45 deletions

View File

@ -1,18 +1,45 @@
<?php
/**
* Mrss
* Documentation Source http://www.rssboard.org/media-rss
*/
* MrssFormat - RSS 2.0 + Media RSS
* http://www.rssboard.org/rss-specification
* http://www.rssboard.org/media-rss
*
* Validators:
* https://validator.w3.org/feed/
* http://www.rssboard.org/rss-validator/
*
* Notes about the implementation:
*
* - The item author is not supported as it needs to be an e-mail address to be
* valid.
* - The RSS specification does not explicitly allow to have more than one
* enclosure as every item is meant to provide one "story", thus having
* multiple enclosures per item may lead to unexpected behavior.
* On top of that, it requires to have a length specified, which RSS-Bridge
* can't provide.
* - The Media RSS extension comes in handy, since it allows to have multiple
* enclosures, even though they recommend to have only one enclosure because
* of the one-story-per-item reason. It only requires to specify the URL,
* everything else is optional.
* - Since the Media RSS extension has its own namespace, the output is a valid
* RSS 2.0 feed that works with feed readers that don't support the extension.
*/
class MrssFormat extends FormatAbstract {
public function stringify(){
$https = isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] == 'on' ? 's' : '';
$httpHost = isset($_SERVER['HTTP_HOST']) ? $_SERVER['HTTP_HOST'] : '';
$httpInfo = isset($_SERVER['PATH_INFO']) ? $_SERVER['PATH_INFO'] : '';
const ALLOWED_IMAGE_EXT = array(
'.gif', '.jpg', '.png'
);
$serverRequestUri = isset($_SERVER['REQUEST_URI']) ? $this->xml_encode($_SERVER['REQUEST_URI']) : '';
public function stringify(){
$urlPrefix = (isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] == 'on') ? 'https://' : 'http://';
$urlHost = (isset($_SERVER['HTTP_HOST'])) ? $_SERVER['HTTP_HOST'] : '';
$urlPath = (isset($_SERVER['PATH_INFO'])) ? $_SERVER['PATH_INFO'] : '';
$urlRequest = (isset($_SERVER['REQUEST_URI'])) ? $_SERVER['REQUEST_URI'] : '';
$feedUrl = $this->xml_encode($urlPrefix . $urlHost . $urlRequest);
$extraInfos = $this->getExtraInfos();
$title = $this->xml_encode($extraInfos['name']);
$icon = $extraInfos['icon'];
if(!empty($extraInfos['uri'])) {
$uri = $this->xml_encode($extraInfos['uri']);
@ -20,34 +47,48 @@ class MrssFormat extends FormatAbstract {
$uri = REPOSITORY;
}
$uriparts = parse_url($uri);
$icon = $this->xml_encode($uriparts['scheme'] . '://' . $uriparts['host'] . '/favicon.ico');
$items = '';
foreach($this->getItems() as $item) {
$itemAuthor = $this->xml_encode($item->getAuthor());
$itemTimestamp = $item->getTimestamp();
$itemTitle = $this->xml_encode($item->getTitle());
$itemUri = $this->xml_encode($item->getURI());
$itemTimestamp = $this->xml_encode(date(DATE_RFC2822, $item->getTimestamp()));
$itemContent = $this->xml_encode($this->sanitizeHtml($item->getContent()));
$entryID = $item->getUid();
$isPermaLink = 'false';
if (empty($entryID) && !empty($itemUri)) { // Fallback to provided URI
$entryID = $itemUri;
$isPermaLink = 'true';
}
if (empty($entryID)) // Fallback to title and content
$entryID = hash('sha1', $itemTitle . $itemContent);
$entryTitle = '';
if (!empty($itemTitle))
$entryTitle = '<title>' . $itemTitle . '</title>';
$entryLink = '';
if (!empty($itemUri))
$entryLink = '<link>' . $itemUri . '</link>';
$entryPublished = '';
if (!empty($itemTimestamp)) {
$entryPublished = '<pubDate>'
. $this->xml_encode(gmdate(DATE_RFC2822, $itemTimestamp))
. '</pubDate>';
}
$entryDescription = '';
if (!empty($itemContent))
$entryDescription = '<description>' . $itemContent . '</description>';
$entryEnclosuresWarning = '';
$entryEnclosures = '';
if(!empty($item->getEnclosures())) {
$entryEnclosures .= '<enclosure url="'
. $this->xml_encode($item->getEnclosures()[0])
. '" type="' . getMimeType($item->getEnclosures()[0]) . '" />';
if(count($item->getEnclosures()) > 1) {
$entryEnclosures .= PHP_EOL;
$entryEnclosuresWarning = '&lt;br&gt;Warning:
Some media files might not be shown to you. Consider using the ATOM format instead!';
foreach($item->getEnclosures() as $enclosure) {
$entryEnclosures .= '<atom:link rel="enclosure" href="'
. $enclosure . '" type="' . getMimeType($enclosure) . '" />'
. PHP_EOL;
}
}
foreach($item->getEnclosures() as $enclosure) {
$entryEnclosures .= '<media:content url="'
. $this->xml_encode($enclosure)
. '" type="' . getMimeType($enclosure) . '"/>'
. PHP_EOL;
}
$entryCategories = '';
@ -60,12 +101,11 @@ Some media files might not be shown to you. Consider using the ATOM format inste
$items .= <<<EOD
<item>
<title>{$itemTitle}</title>
<link>{$itemUri}</link>
<guid isPermaLink="true">{$itemUri}</guid>
<pubDate>{$itemTimestamp}</pubDate>
<description>{$itemContent}{$entryEnclosuresWarning}</description>
<author>{$itemAuthor}</author>
{$entryTitle}
{$entryLink}
<guid isPermaLink="{$isPermaLink}">{$entryID}</guid>
{$entryPublished}
{$entryDescription}
{$entryEnclosures}
{$entryCategories}
</item>
@ -75,22 +115,28 @@ EOD;
$charset = $this->getCharset();
/* xml attributes need to have certain characters escaped to be w3c compliant */
$imageTitle = htmlspecialchars($title, ENT_COMPAT);
$feedImage = '';
if (!empty($icon) && in_array(substr($icon, -4), self::ALLOWED_IMAGE_EXT)) {
$feedImage .= <<<EOD
<image>
<url>{$icon}</url>
<title>{$title}</title>
<link>{$uri}</link>
</image>
EOD;
}
/* Data are prepared, now let's begin the "MAGIE !!!" */
$toReturn = <<<EOD
<?xml version="1.0" encoding="{$charset}"?>
<rss version="2.0"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:media="http://search.yahoo.com/mrss/"
xmlns:atom="http://www.w3.org/2005/Atom">
<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>{$title}</title>
<link>http{$https}://{$httpHost}{$httpInfo}/</link>
<link>{$uri}</link>
<description>{$title}</description>
<image url="{$icon}" title="{$imageTitle}" link="{$uri}"/>
<atom:link rel="alternate" type="text/html" href="{$uri}" />
<atom:link rel="self" href="http{$https}://{$httpHost}{$serverRequestUri}" />
{$feedImage}
<atom:link rel="alternate" type="text/html" href="{$uri}"/>
<atom:link rel="self" href="{$feedUrl}" type="application/atom+xml"/>
{$items}
</channel>
</rss>

90
tests/MrssFormatTest.php Normal file
View File

@ -0,0 +1,90 @@
<?php
/**
* MrssFormat - RSS 2.0 + Media RSS
* http://www.rssboard.org/rss-specification
* http://www.rssboard.org/media-rss
*/
require_once __DIR__ . '/../lib/rssbridge.php';
use PHPUnit\Framework\TestCase;
class MrssFormatTest extends TestCase {
const PATH_SAMPLES = __DIR__ . '/samples/';
const PATH_EXPECTED = __DIR__ . '/samples/expectedMrssFormat/';
private $sample;
private $format;
private $data;
/**
* @dataProvider sampleProvider
* @runInSeparateProcess
* @requires function xdebug_get_headers
*/
public function testHeaders($path) {
$this->setSample($path);
$this->initFormat();
$this->assertContains(
'Content-Type: application/rss+xml; charset=' . $this->format->getCharset(),
xdebug_get_headers()
);
}
/**
* @dataProvider sampleProvider
* @runInSeparateProcess
*/
public function testOutput($path) {
$this->setSample($path);
$this->initFormat();
$this->assertXmlStringEqualsXmlFile($this->sample->expected, $this->data);
}
////////////////////////////////////////////////////////////////////////////
public function sampleProvider() {
$samples = array();
foreach (glob(self::PATH_SAMPLES . '*.json') as $path) {
$samples[basename($path, '.json')] = array($path);
}
return $samples;
}
private function setSample($path) {
$data = json_decode(file_get_contents($path), true);
if (isset($data['meta']) && isset($data['items'])) {
if (!empty($data['server']))
$this->setServerVars($data['server']);
$items = array();
foreach($data['items'] as $item) {
$items[] = new \FeedItem($item);
}
$this->sample = (object)array(
'meta' => $data['meta'],
'items' => $items,
'expected' => self::PATH_EXPECTED . basename($path, '.json') . '.xml'
);
} else {
$this->fail('invalid test sample: ' . basename($path, '.json'));
}
}
private function setServerVars($list) {
$_SERVER = array_merge($_SERVER, $list);
}
private function initFormat() {
$this->format = \Format::create('Mrss');
$this->format->setItems($this->sample->items);
$this->format->setExtraInfos($this->sample->meta);
$this->format->setLastModified(strtotime('2000-01-01 12:00:00 UTC'));
$this->data = $this->getActualOutput($this->format->display());
$this->assertNotFalse(simplexml_load_string($this->data));
ob_clean();
}
}

View File

@ -0,0 +1,64 @@
<?xml version="1.0"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/">
<channel>
<title>Sample feed with common data</title>
<link>https://example.com/blog/</link>
<description>Sample feed with common data</description>
<image>
<url>https://example.com/logo.png</url>
<title>Sample feed with common data</title>
<link>https://example.com/blog/</link>
</image>
<atom:link href="https://example.com/blog/" rel="alternate" type="text/html"/>
<atom:link href="https://example.com/feed?type=common&amp;items=4" rel="self" type="application/atom+xml"/>
<item>
<title>Test Entry</title>
<link>http://example.com/blog/test-entry</link>
<guid isPermaLink="true">http://example.com/blog/test-entry</guid>
<pubDate>Sat, 01 Dec 2018 12:00:00 +0000</pubDate>
<description>Hello world, this is a test entry.</description>
<category>test</category>
<category>Hello World</category>
<category>example</category>
</item>
<item>
<title>Announcing JSON Feed</title>
<link>https://jsonfeed.org/2017/05/17/announcing_json_feed</link>
<guid isPermaLink="true">https://jsonfeed.org/2017/05/17/announcing_json_feed</guid>
<pubDate>Wed, 17 May 2017 13:02:12 +0000</pubDate>
<description>&lt;p&gt;We — Manton Reece and Brent Simmons — have noticed that JSON has become the developers choice for APIs, and that developers will often go out of their way to avoid XML. JSON is simpler to read and write, and its less prone to bugs.&lt;/p&gt;
&lt;p&gt;So we developed JSON Feed, a format similar to &lt;a href="http://cyber.harvard.edu/rss/rss.html"&gt;RSS&lt;/a&gt; and &lt;a href="https://tools.ietf.org/html/rfc4287"&gt;Atom&lt;/a&gt; but in JSON. It reflects the lessons learned from our years of work reading and publishing feeds.&lt;/p&gt;
&lt;p&gt;&lt;a href="https://jsonfeed.org/version/1"&gt;See the spec&lt;/a&gt;. Its at version 1, which may be the only version ever needed. If future versions are needed, version 1 feeds will still be valid feeds.&lt;/p&gt;
&lt;h4&gt;Notes&lt;/h4&gt;
&lt;p&gt;We have a &lt;a href="https://github.com/manton/jsonfeed-wp"&gt;WordPress plugin&lt;/a&gt; and, coming soon, a JSON Feed Parser for Swift. As more code is written, by us and others, well update the &lt;a href="https://jsonfeed.org/code"&gt;code&lt;/a&gt; page.&lt;/p&gt;
&lt;p&gt;See &lt;a href="https://jsonfeed.org/mappingrssandatom"&gt;Mapping RSS and Atom to JSON Feed&lt;/a&gt; for more on the similarities between the formats.&lt;/p&gt;
&lt;p&gt;This website — the Markdown files and supporting resources — &lt;a href="https://github.com/brentsimmons/JSONFeed"&gt;is up on GitHub&lt;/a&gt;, and youre welcome to comment there.&lt;/p&gt;
&lt;p&gt;This website is also a blog, and you can subscribe to the &lt;a href="https://jsonfeed.org/xml/rss.xml"&gt;RSS feed&lt;/a&gt; or the &lt;a href="https://jsonfeed.org/feed.json"&gt;JSON feed&lt;/a&gt; (if your reader supports it).&lt;/p&gt;
&lt;p&gt;We worked with a number of people on this over the course of several months. We list them, and thank them, at the bottom of the &lt;a href="https://jsonfeed.org/version/1"&gt;spec&lt;/a&gt;. But — most importantly — &lt;a href="http://furbo.org/"&gt;Craig Hockenberry&lt;/a&gt; spent a little time making it look pretty. :)&lt;/p&gt;</description>
</item>
<item>
<title>Atom draft-07 snapshot</title>
<link>http://example.org/2005/04/02/atom</link>
<guid isPermaLink="false">dd6b6c920d3b340ab9e07faf6682f2a7c4f70134</guid>
<pubDate>Sun, 31 Jul 2005 12:29:29 +0000</pubDate>
<description>&lt;p&gt;&lt;i&gt;[Update: The Atom draft is finished.]&lt;/i&gt;&lt;/p&gt;</description>
<media:content url="http://example.org/audio/ph34r_my_podcast.mp3" type="audio/mpeg"/>
</item>
<item>
<title>Star City</title>
<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
<guid isPermaLink="true">http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</guid>
<pubDate>Tue, 03 Jun 2003 09:39:21 +0000</pubDate>
<description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description>
</item>
</channel>
</rss>

View File

@ -0,0 +1,10 @@
<?xml version="1.0"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/">
<channel>
<title>Sample feed with minimum data</title>
<link>https://github.com/RSS-Bridge/rss-bridge/</link>
<description>Sample feed with minimum data</description>
<atom:link href="https://github.com/RSS-Bridge/rss-bridge/" rel="alternate" type="text/html"/>
<atom:link href="https://example.com/feed" rel="self" type="application/atom+xml"/>
</channel>
</rss>

View File

@ -0,0 +1,19 @@
<?xml version="1.0"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/">
<channel>
<title>Sample feed with minimum data</title>
<link>https://github.com/RSS-Bridge/rss-bridge/</link>
<description>Sample feed with minimum data</description>
<atom:link href="https://github.com/RSS-Bridge/rss-bridge/" rel="alternate" type="text/html"/>
<atom:link href="https://example.com/feed" rel="self" type="application/atom+xml"/>
<item>
<title>Sample Item #1</title>
<guid isPermaLink="false">29f59918d266c56a935da13e4122b524298e5a39</guid>
</item>
<item>
<title>Sample Item #2</title>
<guid isPermaLink="false">edf358cad1a7ae255d6bc97640dd9d27738f1b7b</guid>
</item>
</channel>
</rss>

View File

@ -0,0 +1,26 @@
<?xml version="1.0"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/">
<channel>
<title>Sample microblog feed</title>
<link>https://example.com/blog/</link>
<description>Sample microblog feed</description>
<image>
<url>https://example.com/logo.png</url>
<title>Sample microblog feed</title>
<link>https://example.com/blog/</link>
</image>
<atom:link href="https://example.com/blog/" rel="alternate" type="text/html"/>
<atom:link href="https://example.com/feed" rel="self" type="application/atom+xml"/>
<item>
<guid isPermaLink="false">1918f084648b82057c1dd3faa3d091da82a6fac2</guid>
<pubDate>Sun, 07 Oct 2018 16:53:03 +0000</pubDate>
<description>Oh 😲 I found three monkeys 🙈🙉🙊</description>
</item>
<item>
<guid isPermaLink="false">e62189168a06dfa74f61c621c79c33c4c8517e1f</guid>
<pubDate>Sun, 07 Oct 2018 16:38:17 +0000</pubDate>
<description>Something happened</description>
</item>
</channel>
</rss>