core: Add FeedItem class (#940)

Add transformation from legacy items to FeedItems, before transforming
items to the desired format. This allows using legacy bridges alongside
bridges that return FeedItems.

As discussed in #940, instead of throwing exceptions on invalid
parameters, add messages to the debug log instead

Add support for strings to setTimestamp(). If the provided timestamp
is a string, automatically try to parse it using strtotime().

This allows bridges to simply use `$item['timestamp'] = $timestamp;`
instead of `$item['timestamp'] = strtotime($timestamp);`

Support simple_html_dom_node as input paramter for setURI

Support simple_html_dom_node as input parameter for setContent
This commit is contained in:
LogMANOriginal 2018-12-26 22:41:32 +01:00 committed by GitHub
parent 4095cad9b4
commit 988635dcf3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 583 additions and 66 deletions

View file

@ -27,30 +27,26 @@ class AtomFormat extends FormatAbstract{
$entries = '';
foreach($this->getItems() as $item) {
$entryAuthor = isset($item['author']) ? $this->xml_encode($item['author']) : '';
$entryTitle = isset($item['title']) ? $this->xml_encode($item['title']) : '';
$entryUri = isset($item['uri']) ? $this->xml_encode($item['uri']) : '';
$entryTimestamp = isset($item['timestamp']) ? $this->xml_encode(date(DATE_ATOM, $item['timestamp'])) : '';
$entryContent = isset($item['content']) ? $this->xml_encode($this->sanitizeHtml($item['content'])) : '';
$entryAuthor = $this->xml_encode($item->getAuthor());
$entryTitle = $this->xml_encode($item->getTitle());
$entryUri = $this->xml_encode($item->getURI());
$entryTimestamp = $this->xml_encode(date(DATE_ATOM, $item->getTimestamp()));
$entryContent = $this->xml_encode($this->sanitizeHtml($item->getContent()));
$entryEnclosures = '';
if(isset($item['enclosures'])) {
foreach($item['enclosures'] as $enclosure) {
$entryEnclosures .= '<link rel="enclosure" href="'
. $this->xml_encode($enclosure)
. '" type="' . getMimeType($enclosure) . '" />'
. PHP_EOL;
}
foreach($item->getEnclosures() as $enclosure) {
$entryEnclosures .= '<link rel="enclosure" href="'
. $this->xml_encode($enclosure)
. '" type="' . getMimeType($enclosure) . '" />'
. PHP_EOL;
}
$entryCategories = '';
if(isset($item['categories'])) {
foreach($item['categories'] as $category) {
$entryCategories .= '<category term="'
. $this->xml_encode($category)
. '"/>'
. PHP_EOL;
}
foreach($item->getCategories() as $category) {
$entryCategories .= '<category term="'
. $this->xml_encode($category)
. '"/>'
. PHP_EOL;
}
$entries .= <<<EOD

View file

@ -9,31 +9,31 @@ class HtmlFormat extends FormatAbstract {
$entries = '';
foreach($this->getItems() as $item) {
$entryAuthor = isset($item['author']) ? '<br /><p class="author">by: ' . $item['author'] . '</p>' : '';
$entryTitle = isset($item['title']) ? $this->sanitizeHtml(strip_tags($item['title'])) : '';
$entryUri = isset($item['uri']) ? $item['uri'] : $uri;
$entryAuthor = $item->getAuthor() ? '<br /><p class="author">by: ' . $item->getAuthor() . '</p>' : '';
$entryTitle = $this->sanitizeHtml(strip_tags($item->getTitle()));
$entryUri = $item->getURI() ?: $uri;
$entryTimestamp = '';
if(isset($item['timestamp'])) {
if($item->getTimestamp()) {
$entryTimestamp = '<time datetime="'
. date(DATE_ATOM, $item['timestamp'])
. date(DATE_ATOM, $item->getTimestamp())
. '">'
. date(DATE_ATOM, $item['timestamp'])
. date(DATE_ATOM, $item->getTimestamp())
. '</time>';
}
$entryContent = '';
if(isset($item['content'])) {
if($item->getContent()) {
$entryContent = '<div class="content">'
. $this->sanitizeHtml($item['content'])
. $this->sanitizeHtml($item->getContent())
. '</div>';
}
$entryEnclosures = '';
if(isset($item['enclosures'])) {
if(!empty($item->getEnclosures())) {
$entryEnclosures = '<div class="attachments"><p>Attachments:</p>';
foreach($item['enclosures'] as $enclosure) {
foreach($item->getEnclosures() as $enclosure) {
$url = $this->sanitizeHtml($enclosure);
$entryEnclosures .= '<li class="enclosure"><a href="'
@ -47,10 +47,10 @@ class HtmlFormat extends FormatAbstract {
}
$entryCategories = '';
if(isset($item['categories']) && count($item['categories']) > 0) {
if(!empty($item->getCategories())) {
$entryCategories = '<div class="categories"><p>Categories:</p>';
foreach($item['categories'] as $category) {
foreach($item->getCategories() as $category) {
$entryCategories .= '<li class="category">'
. $this->sanitizeHtml($category)

View file

@ -6,7 +6,13 @@
class JsonFormat extends FormatAbstract {
public function stringify(){
$items = $this->getItems();
$toReturn = json_encode($items, JSON_PRETTY_PRINT);
$data = array();
foreach($items as $item) {
$data[] = $item->toArray();
}
$toReturn = json_encode($data, JSON_PRETTY_PRINT);
// Remove invalid non-UTF8 characters
ini_set('mbstring.substitute_character', 'none');

View file

@ -25,24 +25,24 @@ class MrssFormat extends FormatAbstract {
$items = '';
foreach($this->getItems() as $item) {
$itemAuthor = isset($item['author']) ? $this->xml_encode($item['author']) : '';
$itemTitle = strip_tags(isset($item['title']) ? $this->xml_encode($item['title']) : '');
$itemUri = isset($item['uri']) ? $this->xml_encode($item['uri']) : '';
$itemTimestamp = isset($item['timestamp']) ? $this->xml_encode(date(DATE_RFC2822, $item['timestamp'])) : '';
$itemContent = isset($item['content']) ? $this->xml_encode($this->sanitizeHtml($item['content'])) : '';
$itemAuthor = $this->xml_encode($item->getAuthor());
$itemTitle = $this->xml_encode($item->getTitle());
$itemUri = $this->xml_encode($item->getURI());
$itemTimestamp = $this->xml_encode(date(DATE_RFC2822, $item->getTimestamp()));
$itemContent = $this->xml_encode($this->sanitizeHtml($item->getContent()));
$entryEnclosuresWarning = '';
$entryEnclosures = '';
if(isset($item['enclosures'])) {
if(!empty($item->getEnclosures())) {
$entryEnclosures .= '<enclosure url="'
. $this->xml_encode($item['enclosures'][0])
. '" type="' . getMimeType($item['enclosures'][0]) . '" />';
. $this->xml_encode($item->getEnclosures()[0])
. '" type="' . getMimeType($item->getEnclosures()[0]) . '" />';
if(count($item['enclosures']) > 1) {
if(count($item->getEnclosures()) > 1) {
$entryEnclosures .= PHP_EOL;
$entryEnclosuresWarning = '&lt;br&gt;Warning:
Some media files might not be shown to you. Consider using the ATOM format instead!';
foreach($item['enclosures'] as $enclosure) {
foreach($item->getEnclosures() as $enclosure) {
$entryEnclosures .= '<atom:link rel="enclosure" href="'
. $enclosure . '" type="' . getMimeType($enclosure) . '" />'
. PHP_EOL;
@ -51,13 +51,10 @@ Some media files might not be shown to you. Consider using the ATOM format inste
}
$entryCategories = '';
if(isset($item['categories'])) {
foreach($item['categories'] as $category) {
$entryCategories .= '<category>'
. $category . '</category>'
. PHP_EOL;
}
foreach($item->getCategories() as $category) {
$entryCategories .= '<category>'
. $category . '</category>'
. PHP_EOL;
}
$items .= <<<EOD

View file

@ -6,7 +6,13 @@
class PlaintextFormat extends FormatAbstract {
public function stringify(){
$items = $this->getItems();
$toReturn = print_r($items, true);
$data = array();
foreach($items as $item) {
$data[] = $item->toArray();
}
$toReturn = print_r($data, true);
// Remove invalid non-UTF8 characters
ini_set('mbstring.substitute_character', 'none');

View file

@ -229,7 +229,10 @@ try {
$cached = $cache->loadData();
if(isset($cached['items']) && isset($cached['extraInfos'])) {
$items = $cached['items'];
foreach($cached['items'] as $item) {
$items[] = new \FeedItem($item);
}
$infos = $cached['extraInfos'];
}
@ -240,6 +243,19 @@ try {
$bridge->collectData();
$items = $bridge->getItems();
// Transform "legacy" items to FeedItems if necessary.
// Remove this code when support for "legacy" items ends!
if(is_array($items[0])) {
$feedItems = array();
foreach($items as $item) {
$feedItems[] = new \FeedItem($item);
}
$items = $feedItems;
}
$infos = array(
'name' => $bridge->getName(),
'uri' => $bridge->getURI(),
@ -248,44 +264,52 @@ try {
} catch(Error $e) {
error_log($e);
$item = array();
$item = new \FeedItem();
// Create "new" error message every 24 hours
$params['_error_time'] = urlencode((int)(time() / 86400));
// Error 0 is a special case (i.e. "trying to get property of non-object")
if($e->getCode() === 0) {
$item['title'] = 'Bridge encountered an unexpected situation! (' . $params['_error_time'] . ')';
$item->setTitle('Bridge encountered an unexpected situation! (' . $params['_error_time'] . ')');
} else {
$item['title'] = 'Bridge returned error ' . $e->getCode() . '! (' . $params['_error_time'] . ')';
$item->setTitle('Bridge returned error ' . $e->getCode() . '! (' . $params['_error_time'] . ')');
}
$item['uri'] = (isset($_SERVER['REQUEST_URI']) ? parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH) : '')
. '?' . http_build_query($params);
$item['timestamp'] = time();
$item['content'] = buildBridgeException($e, $bridge);
$item->setURI(
(isset($_SERVER['REQUEST_URI']) ? parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH) : '')
. '?'
. http_build_query($params)
);
$item->setTimestamp(time());
$item->setContent(buildBridgeException($e, $bridge));
$items[] = $item;
} catch(Exception $e) {
error_log($e);
$item = array();
$item = new \FeedItem();
// Create "new" error message every 24 hours
$params['_error_time'] = urlencode((int)(time() / 86400));
$item['uri'] = (isset($_SERVER['REQUEST_URI']) ? parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH) : '')
. '?' . http_build_query($params);
$item['title'] = 'Bridge returned error ' . $e->getCode() . '! (' . $params['_error_time'] . ')';
$item['timestamp'] = time();
$item['content'] = buildBridgeException($e, $bridge);
$item->setURI(
(isset($_SERVER['REQUEST_URI']) ? parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH) : '')
. '?'
. http_build_query($params)
);
$item->setTitle('Bridge returned error ' . $e->getCode() . '! (' . $params['_error_time'] . ')');
$item->setTimestamp(time());
$item->setContent(buildBridgeException($e, $bridge));
$items[] = $item;
}
// Store data in cache
$cache->saveData(array(
'items' => $items,
'items' => array_map(function($i){ return $i->toArray(); }, $items),
'extraInfos' => $infos
));

487
lib/FeedItem.php Normal file
View file

@ -0,0 +1,487 @@
<?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
/**
* Represents a simple feed item for transformation into various feed formats.
*
* This class represents a feed item. A feed item is an entity that can be
* transformed into various feed formats. It holds a set of pre-defined
* properties:
*
* - **URI**: URI to the full article (i.e. "https://...")
* - **Title**: The title
* - **Timestamp**: A timestamp of when the item was first released
* - **Author**: Name of the author
* - **Content**: Body of the feed, as text or HTML
* - **Enclosures**: A list of links to media objects (images, videos, etc...)
* - **Categories**: A list of category names or tags to categorize the item
*
* _Note_: A feed item can have any number of additional parameters, all of which
* may or may not be transformed to the selected output format.
*
* _Remarks_: This class supports legacy items via {@see FeedItem::__construct()}
* (i.e. `$feedItem = \FeedItem($item);`). Support for legacy items may be removed
* in future versions of RSS-Bridge.
*/
class FeedItem {
/** @var string|null URI to the full article */
protected $uri = null;
/** @var string|null Title of the item */
protected $title = null;
/** @var int|null Timestamp of when the item was first released */
protected $timestamp = null;
/** @var string|null Name of the author */
protected $author = null;
/** @var string|null Body of the feed */
protected $content = null;
/** @var array List of links to media objects */
protected $enclosures = array();
/** @var array List of category names or tags */
protected $categories = array();
/** @var array Associative list of additional parameters */
protected $misc = array(); // Custom parameters
/**
* Create object from legacy item.
*
* The provided array must be an associative array of key-value-pairs, where
* keys may correspond to any of the properties of this class.
*
* Example use:
*
* ```PHP
* <?php
* $item = array();
*
* $item['uri'] = 'https://www.github.com/rss-bridge/rss-bridge/';
* $item['title'] = 'Title';
* $item['timestamp'] = strtotime('now');
* $item['autor'] = 'Unknown author';
* $item['content'] = 'Hello World!';
* $item['enclosures'] = array('https://github.com/favicon.ico');
* $item['categories'] = array('php', 'rss-bridge', 'awesome');
*
* $feedItem = new \FeedItem($item);
*
* ```
*
* The result of the code above is the same as the code below:
*
* ```PHP
* <?php
* $feedItem = \FeedItem();
*
* $feedItem->uri = 'https://www.github.com/rss-bridge/rss-bridge/';
* $feedItem->title = 'Title';
* $feedItem->timestamp = strtotime('now');
* $feedItem->autor = 'Unknown author';
* $feedItem->content = 'Hello World!';
* $feedItem->enclosures = array('https://github.com/favicon.ico');
* $feedItem->categories = array('php', 'rss-bridge', 'awesome');
* ```
*
* @param array $item (optional) A legacy item (empty: no legacy support).
* @return object A new object of this class
*/
public function __construct($item = array()) {
if(!is_array($item))
Debug::log('Item must be an array!');
foreach($item as $key => $value) {
$this->__set($key, $value);
}
}
/**
* Get current URI.
*
* Use {@see FeedItem::setURI()} to set the URI.
*
* @return string|null The URI or null if it hasn't been set.
*/
public function getURI() {
return $this->uri;
}
/**
* Set URI to the full article.
*
* Use {@see FeedItem::getURI()} to get the URI.
*
* _Note_: Removes whitespace from the beginning and end of the URI.
*
* _Remarks_: Uses the attribute "href" or "src" if the provided URI is an
* object of simple_html_dom_node.
*
* @param object|string $uri URI to the full article.
* @return self
*/
public function setURI($uri) {
$this->uri = null; // Clear previous data
if($uri instanceof simple_html_dom_node) {
if($uri->hasAttribute('href')) { // Anchor
$uri = $uri->href;
} elseif($uri->hasAttribute('src')) { // Image
$uri = $uri->src;
} else {
Debug::log('The item provided as URI is unknown!');
}
}
if(!is_string($uri)) {
Debug::log('URI must be a string!');
} elseif(!filter_var(
$uri,
FILTER_VALIDATE_URL,
FILTER_FLAG_SCHEME_REQUIRED | FILTER_FLAG_HOST_REQUIRED | FILTER_FLAG_PATH_REQUIRED)) {
Debug::log('URI must include a scheme, host and path!');
} else {
$scheme = parse_url($uri, PHP_URL_SCHEME);
if($scheme !== 'http' && $scheme !== 'https') {
Debug::log('URI scheme must be "http" or "https"!');
} else {
$this->uri = trim($uri);
}
}
return $this;
}
/**
* Get current title.
*
* Use {@see FeedItem::setTitle()} to set the title.
*
* @return string|null The current title or null if it hasn't been set.
*/
public function getTitle() {
return $this->title;
}
/**
* Set title.
*
* Use {@see FeedItem::getTitle()} to get the title.
*
* _Note_: Removes whitespace from beginning and end of the title.
*
* @param string $title The title
* @return self
*/
public function setTitle($title) {
$this->title = null; // Clear previous data
if(!is_string($title)) {
Debug::log('Title must be a string!');
} else {
$this->title = trim($title);
}
return $this;
}
/**
* Get current timestamp.
*
* Use {@see FeedItem::setTimestamp()} to set the timestamp.
*
* @return int|null The current timestamp or null if it hasn't been set.
*/
public function getTimestamp() {
return $this->timestamp;
}
/**
* Set timestamp of first release.
*
* _Note_: The timestamp should represent the number of seconds since
* January 1 1970 00:00:00 GMT (Unix time).
*
* _Remarks_: If the provided timestamp is a string (not numeric), this
* function automatically attempts to parse the string using
* [strtotime](http://php.net/manual/en/function.strtotime.php)
*
* @link http://php.net/manual/en/function.strtotime.php strtotime (PHP)
* @link https://en.wikipedia.org/wiki/Unix_time Unix time (Wikipedia)
*
* @param string|int $timestamp A timestamp of when the item was first released
* @return self
*/
public function setTimestamp($timestamp) {
$this->timestamp = null; // Clear previous data
if(!is_numeric($timestamp)
&& !$timestamp = strtotime($timestamp)) {
Debug::log('Unable to parse timestamp!');
}
if($timestamp <= 0) {
Debug::log('Timestamp must be greater than zero!');
} else {
$this->timestamp = $timestamp;
}
return $this;
}
/**
* Get the current author name.
*
* Use {@see FeedItem::setAuthor()} to set the author.
*
* @return string|null The author or null if it hasn't been set.
*/
public function getAuthor() {
return $this->author;
}
/**
* Set the author name.
*
* Use {@see FeedItem::getAuthor()} to get the author.
*
* @param string $author The author name.
* @return self
*/
public function setAuthor($author) {
$this->author = null; // Clear previous data
if(!is_string($author)) {
Debug::log('Author must be a string!');
} else {
$this->author = $author;
}
return $this;
}
/**
* Get item content.
*
* Use {@see FeedItem::setContent()} to set the item content.
*
* @return string|null The item content or null if it hasn't been set.
*/
public function getContent() {
return $this->content;
}
/**
* Set item content.
*
* Note: This function casts objects of type simple_html_dom and
* simple_html_dom_node to string.
*
* Use {@see FeedItem::getContent()} to get the current item content.
*
* @param string|object $content The item content as text or simple_html_dom
* object.
* @return self
*/
public function setContent($content) {
$this->content = null; // Clear previous data
if($content instanceof simple_html_dom
|| $content instanceof simple_html_dom_node) {
$content = (string)$content;
}
if(!is_string($content)) {
Debug::log('Content must be a string!');
} else {
$this->content = $content;
}
return $this;
}
/**
* Get item enclosures.
*
* Use {@see FeedItem::setEnclosures()} to set feed enclosures.
*
* @return array Enclosures as array of enclosure URIs.
*/
public function getEnclosures() {
return $this->enclosures;
}
/**
* Set item enclosures.
*
* Use {@see FeedItem::getEnclosures()} to get the current item enclosures.
*
* @param array $enclosures Array of enclosures, where each element links to
* one enclosure.
* @return self
*/
public function setEnclosures($enclosures) {
$this->enclosures = array(); // Clear previous data
if(!is_array($enclosures)) {
Debug::log('Enclosures must be an array!');
} else {
foreach($enclosures as $enclosure) {
if(!filter_var(
$enclosure,
FILTER_VALIDATE_URL,
FILTER_FLAG_SCHEME_REQUIRED | FILTER_FLAG_HOST_REQUIRED | FILTER_FLAG_PATH_REQUIRED)) {
Debug::log('Each enclosure must contain a scheme, host and path!');
} else {
$this->enclosures[] = $enclosure;
}
}
}
return $this;
}
/**
* Get item categories.
*
* Use {@see FeedItem::setCategories()} to set item categories.
*
* @param array The item categories.
*/
public function getCategories() {
return $this->categories;
}
/**
* Set item categories.
*
* Use {@see FeedItem::getCategories()} to get the current item categories.
*
* @param array $categories Array of categories, where each element defines
* a single category name.
* @return self
*/
public function setCategories($categories) {
$this->categories = array(); // Clear previous data
if(!is_array($categories)) {
Debug::log('Categories must be an array!');
} else {
foreach($categories as $category) {
if(!is_string($category)) {
Debug::log('Category must be a string!');
} else {
$this->categories[] = $category;
}
}
}
return $this;
}
/**
* Add miscellaneous elements to the item.
*
* @param string $key Name of the element.
* @param mixed $value Value of the element.
* @return self
*/
public function addMisc($key, $value) {
if(!is_string($key)) {
Debug::log('Key must be a string!');
} elseif(in_array($key, get_object_vars($this))) {
Debug::log('Key must be unique!');
} else {
$this->misc[$key] = $value;
}
return $this;
}
/**
* Transform current object to array
*
* @return array
*/
public function toArray() {
return array_merge(
array(
'uri' => $this->uri,
'title' => $this->title,
'timestamp' => $this->timestamp,
'author' => $this->author,
'content' => $this->content,
'enclosures' => $this->enclosures,
'categories' => $this->categories,
), $this->misc
);
}
/**
* Set item property
*
* Allows simple assignment to parameters. This method is slower, but easier
* to implement in some cases:
*
* ```PHP
* $item = new \FeedItem();
* $item->content = 'Hello World!';
* $item->my_id = 42;
* ```
*
* @param string $name Property name
* @param mixed $value Property value
*/
function __set($name, $value) {
switch($name) {
case 'uri': $this->setURI($value); break;
case 'title': $this->setTitle($value); break;
case 'timestamp': $this->setTimestamp($value); break;
case 'author': $this->setAuthor($value); break;
case 'content': $this->setContent($value); break;
case 'enclosures': $this->setEnclosures($value); break;
case 'categories': $this->setCategories($value); break;
default: $this->addMisc($name, $value);
}
}
/**
* Get item property
*
* Allows simple assignment to parameters. This method is slower, but easier
* to implement in some cases.
*
* @param string $name Property name
* @return mixed Property value
*/
function __get($name) {
switch($name) {
case 'uri': return $this->getURI();
case 'title': return $this->getTitle();
case 'timestamp': return $this->getTimestamp();
case 'author': return $this->getAuthor();
case 'content': return $this->getContent();
case 'enclosures': return $this->getEnclosures();
case 'categories': return $this->getCategories();
default:
if(array_key_exists($name, $this->misc))
return $this->misc[$name];
return null;
}
}
}

View file

@ -113,7 +113,7 @@ abstract class FormatAbstract implements FormatInterface {
* @param array $items {@inheritdoc}
*/
public function setItems(array $items){
$this->items = array_map(array($this, 'array_trim'), $items);
$this->items = $items;
return $this;
}

View file

@ -44,6 +44,7 @@ require_once PATH_LIB . 'CacheInterface.php';
require_once PATH_LIB . 'FormatInterface.php';
// Classes
require_once PATH_LIB . 'FeedItem.php';
require_once PATH_LIB . 'Debug.php';
require_once PATH_LIB . 'Exceptions.php';
require_once PATH_LIB . 'Format.php';