core: Add FeedItem class ()

Add transformation from legacy items to FeedItems, before transforming
items to the desired format. This allows using legacy bridges alongside
bridges that return FeedItems.

As discussed in , instead of throwing exceptions on invalid
parameters, add messages to the debug log instead

Add support for strings to setTimestamp(). If the provided timestamp
is a string, automatically try to parse it using strtotime().

This allows bridges to simply use `$item['timestamp'] = $timestamp;`
instead of `$item['timestamp'] = strtotime($timestamp);`

Support simple_html_dom_node as input paramter for setURI

Support simple_html_dom_node as input parameter for setContent
This commit is contained in:
LogMANOriginal 2018-12-26 22:41:32 +01:00 committed by GitHub
parent 4095cad9b4
commit 988635dcf3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 583 additions and 66 deletions

View file

@ -27,30 +27,26 @@ class AtomFormat extends FormatAbstract{
$entries = '';
foreach($this->getItems() as $item) {
$entryAuthor = isset($item['author']) ? $this->xml_encode($item['author']) : '';
$entryTitle = isset($item['title']) ? $this->xml_encode($item['title']) : '';
$entryUri = isset($item['uri']) ? $this->xml_encode($item['uri']) : '';
$entryTimestamp = isset($item['timestamp']) ? $this->xml_encode(date(DATE_ATOM, $item['timestamp'])) : '';
$entryContent = isset($item['content']) ? $this->xml_encode($this->sanitizeHtml($item['content'])) : '';
$entryAuthor = $this->xml_encode($item->getAuthor());
$entryTitle = $this->xml_encode($item->getTitle());
$entryUri = $this->xml_encode($item->getURI());
$entryTimestamp = $this->xml_encode(date(DATE_ATOM, $item->getTimestamp()));
$entryContent = $this->xml_encode($this->sanitizeHtml($item->getContent()));
$entryEnclosures = '';
if(isset($item['enclosures'])) {
foreach($item['enclosures'] as $enclosure) {
$entryEnclosures .= '<link rel="enclosure" href="'
. $this->xml_encode($enclosure)
. '" type="' . getMimeType($enclosure) . '" />'
. PHP_EOL;
}
foreach($item->getEnclosures() as $enclosure) {
$entryEnclosures .= '<link rel="enclosure" href="'
. $this->xml_encode($enclosure)
. '" type="' . getMimeType($enclosure) . '" />'
. PHP_EOL;
}
$entryCategories = '';
if(isset($item['categories'])) {
foreach($item['categories'] as $category) {
$entryCategories .= '<category term="'
. $this->xml_encode($category)
. '"/>'
. PHP_EOL;
}
foreach($item->getCategories() as $category) {
$entryCategories .= '<category term="'
. $this->xml_encode($category)
. '"/>'
. PHP_EOL;
}
$entries .= <<<EOD

View file

@ -9,31 +9,31 @@ class HtmlFormat extends FormatAbstract {
$entries = '';
foreach($this->getItems() as $item) {
$entryAuthor = isset($item['author']) ? '<br /><p class="author">by: ' . $item['author'] . '</p>' : '';
$entryTitle = isset($item['title']) ? $this->sanitizeHtml(strip_tags($item['title'])) : '';
$entryUri = isset($item['uri']) ? $item['uri'] : $uri;
$entryAuthor = $item->getAuthor() ? '<br /><p class="author">by: ' . $item->getAuthor() . '</p>' : '';
$entryTitle = $this->sanitizeHtml(strip_tags($item->getTitle()));
$entryUri = $item->getURI() ?: $uri;
$entryTimestamp = '';
if(isset($item['timestamp'])) {
if($item->getTimestamp()) {
$entryTimestamp = '<time datetime="'
. date(DATE_ATOM, $item['timestamp'])
. date(DATE_ATOM, $item->getTimestamp())
. '">'
. date(DATE_ATOM, $item['timestamp'])
. date(DATE_ATOM, $item->getTimestamp())
. '</time>';
}
$entryContent = '';
if(isset($item['content'])) {
if($item->getContent()) {
$entryContent = '<div class="content">'
. $this->sanitizeHtml($item['content'])
. $this->sanitizeHtml($item->getContent())
. '</div>';
}
$entryEnclosures = '';
if(isset($item['enclosures'])) {
if(!empty($item->getEnclosures())) {
$entryEnclosures = '<div class="attachments"><p>Attachments:</p>';
foreach($item['enclosures'] as $enclosure) {
foreach($item->getEnclosures() as $enclosure) {
$url = $this->sanitizeHtml($enclosure);
$entryEnclosures .= '<li class="enclosure"><a href="'
@ -47,10 +47,10 @@ class HtmlFormat extends FormatAbstract {
}
$entryCategories = '';
if(isset($item['categories']) && count($item['categories']) > 0) {
if(!empty($item->getCategories())) {
$entryCategories = '<div class="categories"><p>Categories:</p>';
foreach($item['categories'] as $category) {
foreach($item->getCategories() as $category) {
$entryCategories .= '<li class="category">'
. $this->sanitizeHtml($category)

View file

@ -6,7 +6,13 @@
class JsonFormat extends FormatAbstract {
public function stringify(){
$items = $this->getItems();
$toReturn = json_encode($items, JSON_PRETTY_PRINT);
$data = array();
foreach($items as $item) {
$data[] = $item->toArray();
}
$toReturn = json_encode($data, JSON_PRETTY_PRINT);
// Remove invalid non-UTF8 characters
ini_set('mbstring.substitute_character', 'none');

View file

@ -25,24 +25,24 @@ class MrssFormat extends FormatAbstract {
$items = '';
foreach($this->getItems() as $item) {
$itemAuthor = isset($item['author']) ? $this->xml_encode($item['author']) : '';
$itemTitle = strip_tags(isset($item['title']) ? $this->xml_encode($item['title']) : '');
$itemUri = isset($item['uri']) ? $this->xml_encode($item['uri']) : '';
$itemTimestamp = isset($item['timestamp']) ? $this->xml_encode(date(DATE_RFC2822, $item['timestamp'])) : '';
$itemContent = isset($item['content']) ? $this->xml_encode($this->sanitizeHtml($item['content'])) : '';
$itemAuthor = $this->xml_encode($item->getAuthor());
$itemTitle = $this->xml_encode($item->getTitle());
$itemUri = $this->xml_encode($item->getURI());
$itemTimestamp = $this->xml_encode(date(DATE_RFC2822, $item->getTimestamp()));
$itemContent = $this->xml_encode($this->sanitizeHtml($item->getContent()));
$entryEnclosuresWarning = '';
$entryEnclosures = '';
if(isset($item['enclosures'])) {
if(!empty($item->getEnclosures())) {
$entryEnclosures .= '<enclosure url="'
. $this->xml_encode($item['enclosures'][0])
. '" type="' . getMimeType($item['enclosures'][0]) . '" />';
. $this->xml_encode($item->getEnclosures()[0])
. '" type="' . getMimeType($item->getEnclosures()[0]) . '" />';
if(count($item['enclosures']) > 1) {
if(count($item->getEnclosures()) > 1) {
$entryEnclosures .= PHP_EOL;
$entryEnclosuresWarning = '&lt;br&gt;Warning:
Some media files might not be shown to you. Consider using the ATOM format instead!';
foreach($item['enclosures'] as $enclosure) {
foreach($item->getEnclosures() as $enclosure) {
$entryEnclosures .= '<atom:link rel="enclosure" href="'
. $enclosure . '" type="' . getMimeType($enclosure) . '" />'
. PHP_EOL;
@ -51,13 +51,10 @@ Some media files might not be shown to you. Consider using the ATOM format inste
}
$entryCategories = '';
if(isset($item['categories'])) {
foreach($item['categories'] as $category) {
$entryCategories .= '<category>'
. $category . '</category>'
. PHP_EOL;
}
foreach($item->getCategories() as $category) {
$entryCategories .= '<category>'
. $category . '</category>'
. PHP_EOL;
}
$items .= <<<EOD

View file

@ -6,7 +6,13 @@
class PlaintextFormat extends FormatAbstract {
public function stringify(){
$items = $this->getItems();
$toReturn = print_r($items, true);
$data = array();
foreach($items as $item) {
$data[] = $item->toArray();
}
$toReturn = print_r($data, true);
// Remove invalid non-UTF8 characters
ini_set('mbstring.substitute_character', 'none');

View file

@ -229,7 +229,10 @@ try {
$cached = $cache->loadData();
if(isset($cached['items']) && isset($cached['extraInfos'])) {
$items = $cached['items'];
foreach($cached['items'] as $item) {
$items[] = new \FeedItem($item);
}
$infos = $cached['extraInfos'];
}
@ -240,6 +243,19 @@ try {
$bridge->collectData();
$items = $bridge->getItems();
// Transform "legacy" items to FeedItems if necessary.
// Remove this code when support for "legacy" items ends!
if(is_array($items[0])) {
$feedItems = array();
foreach($items as $item) {
$feedItems[] = new \FeedItem($item);
}
$items = $feedItems;
}
$infos = array(
'name' => $bridge->getName(),
'uri' => $bridge->getURI(),
@ -248,44 +264,52 @@ try {
} catch(Error $e) {
error_log($e);
$item = array();
$item = new \FeedItem();
// Create "new" error message every 24 hours
$params['_error_time'] = urlencode((int)(time() / 86400));
// Error 0 is a special case (i.e. "trying to get property of non-object")
if($e->getCode() === 0) {
$item['title'] = 'Bridge encountered an unexpected situation! (' . $params['_error_time'] . ')';
$item->setTitle('Bridge encountered an unexpected situation! (' . $params['_error_time'] . ')');
} else {
$item['title'] = 'Bridge returned error ' . $e->getCode() . '! (' . $params['_error_time'] . ')';
$item->setTitle('Bridge returned error ' . $e->getCode() . '! (' . $params['_error_time'] . ')');
}
$item['uri'] = (isset($_SERVER['REQUEST_URI']) ? parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH) : '')
. '?' . http_build_query($params);
$item['timestamp'] = time();
$item['content'] = buildBridgeException($e, $bridge);
$item->setURI(
(isset($_SERVER['REQUEST_URI']) ? parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH) : '')
. '?'
. http_build_query($params)
);
$item->setTimestamp(time());
$item->setContent(buildBridgeException($e, $bridge));
$items[] = $item;
} catch(Exception $e) {
error_log($e);
$item = array();
$item = new \FeedItem();
// Create "new" error message every 24 hours
$params['_error_time'] = urlencode((int)(time() / 86400));
$item['uri'] = (isset($_SERVER['REQUEST_URI']) ? parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH) : '')
. '?' . http_build_query($params);
$item['title'] = 'Bridge returned error ' . $e->getCode() . '! (' . $params['_error_time'] . ')';
$item['timestamp'] = time();
$item['content'] = buildBridgeException($e, $bridge);
$item->setURI(
(isset($_SERVER['REQUEST_URI']) ? parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH) : '')
. '?'
. http_build_query($params)
);
$item->setTitle('Bridge returned error ' . $e->getCode() . '! (' . $params['_error_time'] . ')');
$item->setTimestamp(time());
$item->setContent(buildBridgeException($e, $bridge));
$items[] = $item;
}
// Store data in cache
$cache->saveData(array(
'items' => $items,
'items' => array_map(function($i){ return $i->toArray(); }, $items),
'extraInfos' => $infos
));

487
lib/FeedItem.php Normal file
View file

@ -0,0 +1,487 @@
<?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
/**
* Represents a simple feed item for transformation into various feed formats.
*
* This class represents a feed item. A feed item is an entity that can be
* transformed into various feed formats. It holds a set of pre-defined
* properties:
*
* - **URI**: URI to the full article (i.e. "https://...")
* - **Title**: The title
* - **Timestamp**: A timestamp of when the item was first released
* - **Author**: Name of the author
* - **Content**: Body of the feed, as text or HTML
* - **Enclosures**: A list of links to media objects (images, videos, etc...)
* - **Categories**: A list of category names or tags to categorize the item
*
* _Note_: A feed item can have any number of additional parameters, all of which
* may or may not be transformed to the selected output format.
*
* _Remarks_: This class supports legacy items via {@see FeedItem::__construct()}
* (i.e. `$feedItem = \FeedItem($item);`). Support for legacy items may be removed
* in future versions of RSS-Bridge.
*/
class FeedItem {
/** @var string|null URI to the full article */
protected $uri = null;
/** @var string|null Title of the item */
protected $title = null;
/** @var int|null Timestamp of when the item was first released */
protected $timestamp = null;
/** @var string|null Name of the author */
protected $author = null;
/** @var string|null Body of the feed */
protected $content = null;
/** @var array List of links to media objects */
protected $enclosures = array();
/** @var array List of category names or tags */
protected $categories = array();
/** @var array Associative list of additional parameters */
protected $misc = array(); // Custom parameters
/**
* Create object from legacy item.
*
* The provided array must be an associative array of key-value-pairs, where
* keys may correspond to any of the properties of this class.
*
* Example use:
*
* ```PHP
* <?php
* $item = array();
*
* $item['uri'] = 'https://www.github.com/rss-bridge/rss-bridge/';
* $item['title'] = 'Title';
* $item['timestamp'] = strtotime('now');
* $item['autor'] = 'Unknown author';
* $item['content'] = 'Hello World!';
* $item['enclosures'] = array('https://github.com/favicon.ico');
* $item['categories'] = array('php', 'rss-bridge', 'awesome');
*
* $feedItem = new \FeedItem($item);
*
* ```
*
* The result of the code above is the same as the code below:
*
* ```PHP
* <?php
* $feedItem = \FeedItem();
*
* $feedItem->uri = 'https://www.github.com/rss-bridge/rss-bridge/';
* $feedItem->title = 'Title';
* $feedItem->timestamp = strtotime('now');
* $feedItem->autor = 'Unknown author';
* $feedItem->content = 'Hello World!';
* $feedItem->enclosures = array('https://github.com/favicon.ico');
* $feedItem->categories = array('php', 'rss-bridge', 'awesome');
* ```
*
* @param array $item (optional) A legacy item (empty: no legacy support).
* @return object A new object of this class
*/
public function __construct($item = array()) {
if(!is_array($item))
Debug::log('Item must be an array!');
foreach($item as $key => $value) {
$this->__set($key, $value);
}
}
/**
* Get current URI.
*
* Use {@see FeedItem::setURI()} to set the URI.
*
* @return string|null The URI or null if it hasn't been set.
*/
public function getURI() {
return $this->uri;
}
/**
* Set URI to the full article.
*
* Use {@see FeedItem::getURI()} to get the URI.
*
* _Note_: Removes whitespace from the beginning and end of the URI.
*
* _Remarks_: Uses the attribute "href" or "src" if the provided URI is an
* object of simple_html_dom_node.
*
* @param object|string $uri URI to the full article.
* @return self
*/
public function setURI($uri) {
$this->uri = null; // Clear previous data
if($uri instanceof simple_html_dom_node) {
if($uri->hasAttribute('href')) { // Anchor
$uri = $uri->href;
} elseif($uri->hasAttribute('src')) { // Image
$uri = $uri->src;
} else {
Debug::log('The item provided as URI is unknown!');
}
}
if(!is_string($uri)) {
Debug::log('URI must be a string!');
} elseif(!filter_var(
$uri,
FILTER_VALIDATE_URL,
FILTER_FLAG_SCHEME_REQUIRED | FILTER_FLAG_HOST_REQUIRED | FILTER_FLAG_PATH_REQUIRED)) {
Debug::log('URI must include a scheme, host and path!');
} else {
$scheme = parse_url($uri, PHP_URL_SCHEME);
if($scheme !== 'http' && $scheme !== 'https') {
Debug::log('URI scheme must be "http" or "https"!');
} else {
$this->uri = trim($uri);
}
}
return $this;
}
/**
* Get current title.
*
* Use {@see FeedItem::setTitle()} to set the title.
*
* @return string|null The current title or null if it hasn't been set.
*/
public function getTitle() {
return $this->title;
}
/**
* Set title.
*
* Use {@see FeedItem::getTitle()} to get the title.
*
* _Note_: Removes whitespace from beginning and end of the title.
*
* @param string $title The title
* @return self
*/
public function setTitle($title) {
$this->title = null; // Clear previous data
if(!is_string($title)) {
Debug::log('Title must be a string!');
} else {
$this->title = trim($title);
}
return $this;
}
/**
* Get current timestamp.
*
* Use {@see FeedItem::setTimestamp()} to set the timestamp.
*
* @return int|null The current timestamp or null if it hasn't been set.
*/
public function getTimestamp() {
return $this->timestamp;
}
/**
* Set timestamp of first release.
*
* _Note_: The timestamp should represent the number of seconds since
* January 1 1970 00:00:00 GMT (Unix time).
*
* _Remarks_: If the provided timestamp is a string (not numeric), this
* function automatically attempts to parse the string using
* [strtotime](http://php.net/manual/en/function.strtotime.php)
*
* @link http://php.net/manual/en/function.strtotime.php strtotime (PHP)
* @link https://en.wikipedia.org/wiki/Unix_time Unix time (Wikipedia)
*
* @param string|int $timestamp A timestamp of when the item was first released
* @return self
*/
public function setTimestamp($timestamp) {
$this->timestamp = null; // Clear previous data
if(!is_numeric($timestamp)
&& !$timestamp = strtotime($timestamp)) {
Debug::log('Unable to parse timestamp!');
}
if($timestamp <= 0) {
Debug::log('Timestamp must be greater than zero!');
} else {
$this->timestamp = $timestamp;
}
return $this;
}
/**
* Get the current author name.
*
* Use {@see FeedItem::setAuthor()} to set the author.
*
* @return string|null The author or null if it hasn't been set.
*/
public function getAuthor() {
return $this->author;
}
/**
* Set the author name.
*
* Use {@see FeedItem::getAuthor()} to get the author.
*
* @param string $author The author name.
* @return self
*/
public function setAuthor($author) {
$this->author = null; // Clear previous data
if(!is_string($author)) {
Debug::log('Author must be a string!');
} else {
$this->author = $author;
}
return $this;
}
/**
* Get item content.
*
* Use {@see FeedItem::setContent()} to set the item content.
*
* @return string|null The item content or null if it hasn't been set.
*/
public function getContent() {
return $this->content;
}
/**
* Set item content.
*
* Note: This function casts objects of type simple_html_dom and
* simple_html_dom_node to string.
*
* Use {@see FeedItem::getContent()} to get the current item content.
*
* @param string|object $content The item content as text or simple_html_dom
* object.
* @return self
*/
public function setContent($content) {
$this->content = null; // Clear previous data
if($content instanceof simple_html_dom
|| $content instanceof simple_html_dom_node) {
$content = (string)$content;
}
if(!is_string($content)) {
Debug::log('Content must be a string!');
} else {
$this->content = $content;
}
return $this;
}
/**
* Get item enclosures.
*
* Use {@see FeedItem::setEnclosures()} to set feed enclosures.
*
* @return array Enclosures as array of enclosure URIs.
*/
public function getEnclosures() {
return $this->enclosures;
}
/**
* Set item enclosures.
*
* Use {@see FeedItem::getEnclosures()} to get the current item enclosures.
*
* @param array $enclosures Array of enclosures, where each element links to
* one enclosure.
* @return self
*/
public function setEnclosures($enclosures) {
$this->enclosures = array(); // Clear previous data
if(!is_array($enclosures)) {
Debug::log('Enclosures must be an array!');
} else {
foreach($enclosures as $enclosure) {
if(!filter_var(
$enclosure,
FILTER_VALIDATE_URL,
FILTER_FLAG_SCHEME_REQUIRED | FILTER_FLAG_HOST_REQUIRED | FILTER_FLAG_PATH_REQUIRED)) {
Debug::log('Each enclosure must contain a scheme, host and path!');
} else {
$this->enclosures[] = $enclosure;
}
}
}
return $this;
}
/**
* Get item categories.
*
* Use {@see FeedItem::setCategories()} to set item categories.
*
* @param array The item categories.
*/
public function getCategories() {
return $this->categories;
}
/**
* Set item categories.
*
* Use {@see FeedItem::getCategories()} to get the current item categories.
*
* @param array $categories Array of categories, where each element defines
* a single category name.
* @return self
*/
public function setCategories($categories) {
$this->categories = array(); // Clear previous data
if(!is_array($categories)) {
Debug::log('Categories must be an array!');
} else {
foreach($categories as $category) {
if(!is_string($category)) {
Debug::log('Category must be a string!');
} else {
$this->categories[] = $category;
}
}
}
return $this;
}
/**
* Add miscellaneous elements to the item.
*
* @param string $key Name of the element.
* @param mixed $value Value of the element.
* @return self
*/
public function addMisc($key, $value) {
if(!is_string($key)) {
Debug::log('Key must be a string!');
} elseif(in_array($key, get_object_vars($this))) {
Debug::log('Key must be unique!');
} else {
$this->misc[$key] = $value;
}
return $this;
}
/**
* Transform current object to array
*
* @return array
*/
public function toArray() {
return array_merge(
array(
'uri' => $this->uri,
'title' => $this->title,
'timestamp' => $this->timestamp,
'author' => $this->author,
'content' => $this->content,
'enclosures' => $this->enclosures,
'categories' => $this->categories,
), $this->misc
);
}
/**
* Set item property
*
* Allows simple assignment to parameters. This method is slower, but easier
* to implement in some cases:
*
* ```PHP
* $item = new \FeedItem();
* $item->content = 'Hello World!';
* $item->my_id = 42;
* ```
*
* @param string $name Property name
* @param mixed $value Property value
*/
function __set($name, $value) {
switch($name) {
case 'uri': $this->setURI($value); break;
case 'title': $this->setTitle($value); break;
case 'timestamp': $this->setTimestamp($value); break;
case 'author': $this->setAuthor($value); break;
case 'content': $this->setContent($value); break;
case 'enclosures': $this->setEnclosures($value); break;
case 'categories': $this->setCategories($value); break;
default: $this->addMisc($name, $value);
}
}
/**
* Get item property
*
* Allows simple assignment to parameters. This method is slower, but easier
* to implement in some cases.
*
* @param string $name Property name
* @return mixed Property value
*/
function __get($name) {
switch($name) {
case 'uri': return $this->getURI();
case 'title': return $this->getTitle();
case 'timestamp': return $this->getTimestamp();
case 'author': return $this->getAuthor();
case 'content': return $this->getContent();
case 'enclosures': return $this->getEnclosures();
case 'categories': return $this->getCategories();
default:
if(array_key_exists($name, $this->misc))
return $this->misc[$name];
return null;
}
}
}

View file

@ -113,7 +113,7 @@ abstract class FormatAbstract implements FormatInterface {
* @param array $items {@inheritdoc}
*/
public function setItems(array $items){
$this->items = array_map(array($this, 'array_trim'), $items);
$this->items = $items;
return $this;
}

View file

@ -44,6 +44,7 @@ require_once PATH_LIB . 'CacheInterface.php';
require_once PATH_LIB . 'FormatInterface.php';
// Classes
require_once PATH_LIB . 'FeedItem.php';
require_once PATH_LIB . 'Debug.php';
require_once PATH_LIB . 'Exceptions.php';
require_once PATH_LIB . 'Format.php';