From 988635dcf3fa2a35eabf063a167c987cc8482172 Mon Sep 17 00:00:00 2001 From: LogMANOriginal Date: Wed, 26 Dec 2018 22:41:32 +0100 Subject: [PATCH] core: Add FeedItem class (#940) Add transformation from legacy items to FeedItems, before transforming items to the desired format. This allows using legacy bridges alongside bridges that return FeedItems. As discussed in #940, instead of throwing exceptions on invalid parameters, add messages to the debug log instead Add support for strings to setTimestamp(). If the provided timestamp is a string, automatically try to parse it using strtotime(). This allows bridges to simply use `$item['timestamp'] = $timestamp;` instead of `$item['timestamp'] = strtotime($timestamp);` Support simple_html_dom_node as input paramter for setURI Support simple_html_dom_node as input parameter for setContent --- formats/AtomFormat.php | 34 ++- formats/HtmlFormat.php | 24 +- formats/JsonFormat.php | 8 +- formats/MrssFormat.php | 31 ++- formats/PlaintextFormat.php | 8 +- index.php | 54 ++-- lib/FeedItem.php | 487 ++++++++++++++++++++++++++++++++++++ lib/FormatAbstract.php | 2 +- lib/rssbridge.php | 1 + 9 files changed, 583 insertions(+), 66 deletions(-) create mode 100644 lib/FeedItem.php diff --git a/formats/AtomFormat.php b/formats/AtomFormat.php index 693b6ad0..bb5e30eb 100644 --- a/formats/AtomFormat.php +++ b/formats/AtomFormat.php @@ -27,30 +27,26 @@ class AtomFormat extends FormatAbstract{ $entries = ''; foreach($this->getItems() as $item) { - $entryAuthor = isset($item['author']) ? $this->xml_encode($item['author']) : ''; - $entryTitle = isset($item['title']) ? $this->xml_encode($item['title']) : ''; - $entryUri = isset($item['uri']) ? $this->xml_encode($item['uri']) : ''; - $entryTimestamp = isset($item['timestamp']) ? $this->xml_encode(date(DATE_ATOM, $item['timestamp'])) : ''; - $entryContent = isset($item['content']) ? $this->xml_encode($this->sanitizeHtml($item['content'])) : ''; + $entryAuthor = $this->xml_encode($item->getAuthor()); + $entryTitle = $this->xml_encode($item->getTitle()); + $entryUri = $this->xml_encode($item->getURI()); + $entryTimestamp = $this->xml_encode(date(DATE_ATOM, $item->getTimestamp())); + $entryContent = $this->xml_encode($this->sanitizeHtml($item->getContent())); $entryEnclosures = ''; - if(isset($item['enclosures'])) { - foreach($item['enclosures'] as $enclosure) { - $entryEnclosures .= '' - . PHP_EOL; - } + foreach($item->getEnclosures() as $enclosure) { + $entryEnclosures .= '' + . PHP_EOL; } $entryCategories = ''; - if(isset($item['categories'])) { - foreach($item['categories'] as $category) { - $entryCategories .= '' - . PHP_EOL; - } + foreach($item->getCategories() as $category) { + $entryCategories .= '' + . PHP_EOL; } $entries .= <<getItems() as $item) { - $entryAuthor = isset($item['author']) ? '

by: ' . $item['author'] . '

' : ''; - $entryTitle = isset($item['title']) ? $this->sanitizeHtml(strip_tags($item['title'])) : ''; - $entryUri = isset($item['uri']) ? $item['uri'] : $uri; + $entryAuthor = $item->getAuthor() ? '

by: ' . $item->getAuthor() . '

' : ''; + $entryTitle = $this->sanitizeHtml(strip_tags($item->getTitle())); + $entryUri = $item->getURI() ?: $uri; $entryTimestamp = ''; - if(isset($item['timestamp'])) { + if($item->getTimestamp()) { $entryTimestamp = ''; } $entryContent = ''; - if(isset($item['content'])) { + if($item->getContent()) { $entryContent = '
' - . $this->sanitizeHtml($item['content']) + . $this->sanitizeHtml($item->getContent()) . '
'; } $entryEnclosures = ''; - if(isset($item['enclosures'])) { + if(!empty($item->getEnclosures())) { $entryEnclosures = '

Attachments:

'; - foreach($item['enclosures'] as $enclosure) { + foreach($item->getEnclosures() as $enclosure) { $url = $this->sanitizeHtml($enclosure); $entryEnclosures .= '
  • Categories:

    '; - foreach($item['categories'] as $category) { + foreach($item->getCategories() as $category) { $entryCategories .= '
  • ' . $this->sanitizeHtml($category) diff --git a/formats/JsonFormat.php b/formats/JsonFormat.php index ef8c9f70..c3f85f94 100644 --- a/formats/JsonFormat.php +++ b/formats/JsonFormat.php @@ -6,7 +6,13 @@ class JsonFormat extends FormatAbstract { public function stringify(){ $items = $this->getItems(); - $toReturn = json_encode($items, JSON_PRETTY_PRINT); + $data = array(); + + foreach($items as $item) { + $data[] = $item->toArray(); + } + + $toReturn = json_encode($data, JSON_PRETTY_PRINT); // Remove invalid non-UTF8 characters ini_set('mbstring.substitute_character', 'none'); diff --git a/formats/MrssFormat.php b/formats/MrssFormat.php index 023750eb..34b9a92a 100644 --- a/formats/MrssFormat.php +++ b/formats/MrssFormat.php @@ -25,24 +25,24 @@ class MrssFormat extends FormatAbstract { $items = ''; foreach($this->getItems() as $item) { - $itemAuthor = isset($item['author']) ? $this->xml_encode($item['author']) : ''; - $itemTitle = strip_tags(isset($item['title']) ? $this->xml_encode($item['title']) : ''); - $itemUri = isset($item['uri']) ? $this->xml_encode($item['uri']) : ''; - $itemTimestamp = isset($item['timestamp']) ? $this->xml_encode(date(DATE_RFC2822, $item['timestamp'])) : ''; - $itemContent = isset($item['content']) ? $this->xml_encode($this->sanitizeHtml($item['content'])) : ''; + $itemAuthor = $this->xml_encode($item->getAuthor()); + $itemTitle = $this->xml_encode($item->getTitle()); + $itemUri = $this->xml_encode($item->getURI()); + $itemTimestamp = $this->xml_encode(date(DATE_RFC2822, $item->getTimestamp())); + $itemContent = $this->xml_encode($this->sanitizeHtml($item->getContent())); $entryEnclosuresWarning = ''; $entryEnclosures = ''; - if(isset($item['enclosures'])) { + if(!empty($item->getEnclosures())) { $entryEnclosures .= ''; + . $this->xml_encode($item->getEnclosures()[0]) + . '" type="' . getMimeType($item->getEnclosures()[0]) . '" />'; - if(count($item['enclosures']) > 1) { + if(count($item->getEnclosures()) > 1) { $entryEnclosures .= PHP_EOL; $entryEnclosuresWarning = '<br>Warning: Some media files might not be shown to you. Consider using the ATOM format instead!'; - foreach($item['enclosures'] as $enclosure) { + foreach($item->getEnclosures() as $enclosure) { $entryEnclosures .= '' . PHP_EOL; @@ -51,13 +51,10 @@ Some media files might not be shown to you. Consider using the ATOM format inste } $entryCategories = ''; - if(isset($item['categories'])) { - - foreach($item['categories'] as $category) { - $entryCategories .= '' - . $category . '' - . PHP_EOL; - } + foreach($item->getCategories() as $category) { + $entryCategories .= '' + . $category . '' + . PHP_EOL; } $items .= <<getItems(); - $toReturn = print_r($items, true); + $data = array(); + + foreach($items as $item) { + $data[] = $item->toArray(); + } + + $toReturn = print_r($data, true); // Remove invalid non-UTF8 characters ini_set('mbstring.substitute_character', 'none'); diff --git a/index.php b/index.php index 0998c829..bf5c32db 100644 --- a/index.php +++ b/index.php @@ -229,7 +229,10 @@ try { $cached = $cache->loadData(); if(isset($cached['items']) && isset($cached['extraInfos'])) { - $items = $cached['items']; + foreach($cached['items'] as $item) { + $items[] = new \FeedItem($item); + } + $infos = $cached['extraInfos']; } @@ -240,6 +243,19 @@ try { $bridge->collectData(); $items = $bridge->getItems(); + + // Transform "legacy" items to FeedItems if necessary. + // Remove this code when support for "legacy" items ends! + if(is_array($items[0])) { + $feedItems = array(); + + foreach($items as $item) { + $feedItems[] = new \FeedItem($item); + } + + $items = $feedItems; + } + $infos = array( 'name' => $bridge->getName(), 'uri' => $bridge->getURI(), @@ -248,44 +264,52 @@ try { } catch(Error $e) { error_log($e); - $item = array(); + $item = new \FeedItem(); // Create "new" error message every 24 hours $params['_error_time'] = urlencode((int)(time() / 86400)); // Error 0 is a special case (i.e. "trying to get property of non-object") if($e->getCode() === 0) { - $item['title'] = 'Bridge encountered an unexpected situation! (' . $params['_error_time'] . ')'; + $item->setTitle('Bridge encountered an unexpected situation! (' . $params['_error_time'] . ')'); } else { - $item['title'] = 'Bridge returned error ' . $e->getCode() . '! (' . $params['_error_time'] . ')'; + $item->setTitle('Bridge returned error ' . $e->getCode() . '! (' . $params['_error_time'] . ')'); } - $item['uri'] = (isset($_SERVER['REQUEST_URI']) ? parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH) : '') - . '?' . http_build_query($params); - $item['timestamp'] = time(); - $item['content'] = buildBridgeException($e, $bridge); + $item->setURI( + (isset($_SERVER['REQUEST_URI']) ? parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH) : '') + . '?' + . http_build_query($params) + ); + + $item->setTimestamp(time()); + $item->setContent(buildBridgeException($e, $bridge)); $items[] = $item; } catch(Exception $e) { error_log($e); - $item = array(); + $item = new \FeedItem(); // Create "new" error message every 24 hours $params['_error_time'] = urlencode((int)(time() / 86400)); - $item['uri'] = (isset($_SERVER['REQUEST_URI']) ? parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH) : '') - . '?' . http_build_query($params); - $item['title'] = 'Bridge returned error ' . $e->getCode() . '! (' . $params['_error_time'] . ')'; - $item['timestamp'] = time(); - $item['content'] = buildBridgeException($e, $bridge); + $item->setURI( + (isset($_SERVER['REQUEST_URI']) ? parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH) : '') + . '?' + . http_build_query($params) + ); + + $item->setTitle('Bridge returned error ' . $e->getCode() . '! (' . $params['_error_time'] . ')'); + $item->setTimestamp(time()); + $item->setContent(buildBridgeException($e, $bridge)); $items[] = $item; } // Store data in cache $cache->saveData(array( - 'items' => $items, + 'items' => array_map(function($i){ return $i->toArray(); }, $items), 'extraInfos' => $infos )); diff --git a/lib/FeedItem.php b/lib/FeedItem.php new file mode 100644 index 00000000..81492ad4 --- /dev/null +++ b/lib/FeedItem.php @@ -0,0 +1,487 @@ +uri = 'https://www.github.com/rss-bridge/rss-bridge/'; + * $feedItem->title = 'Title'; + * $feedItem->timestamp = strtotime('now'); + * $feedItem->autor = 'Unknown author'; + * $feedItem->content = 'Hello World!'; + * $feedItem->enclosures = array('https://github.com/favicon.ico'); + * $feedItem->categories = array('php', 'rss-bridge', 'awesome'); + * ``` + * + * @param array $item (optional) A legacy item (empty: no legacy support). + * @return object A new object of this class + */ + public function __construct($item = array()) { + if(!is_array($item)) + Debug::log('Item must be an array!'); + + foreach($item as $key => $value) { + $this->__set($key, $value); + } + } + + /** + * Get current URI. + * + * Use {@see FeedItem::setURI()} to set the URI. + * + * @return string|null The URI or null if it hasn't been set. + */ + public function getURI() { + return $this->uri; + } + + /** + * Set URI to the full article. + * + * Use {@see FeedItem::getURI()} to get the URI. + * + * _Note_: Removes whitespace from the beginning and end of the URI. + * + * _Remarks_: Uses the attribute "href" or "src" if the provided URI is an + * object of simple_html_dom_node. + * + * @param object|string $uri URI to the full article. + * @return self + */ + public function setURI($uri) { + $this->uri = null; // Clear previous data + + if($uri instanceof simple_html_dom_node) { + if($uri->hasAttribute('href')) { // Anchor + $uri = $uri->href; + } elseif($uri->hasAttribute('src')) { // Image + $uri = $uri->src; + } else { + Debug::log('The item provided as URI is unknown!'); + } + } + + if(!is_string($uri)) { + Debug::log('URI must be a string!'); + } elseif(!filter_var( + $uri, + FILTER_VALIDATE_URL, + FILTER_FLAG_SCHEME_REQUIRED | FILTER_FLAG_HOST_REQUIRED | FILTER_FLAG_PATH_REQUIRED)) { + Debug::log('URI must include a scheme, host and path!'); + } else { + $scheme = parse_url($uri, PHP_URL_SCHEME); + + if($scheme !== 'http' && $scheme !== 'https') { + Debug::log('URI scheme must be "http" or "https"!'); + } else { + $this->uri = trim($uri); + } + } + + return $this; + } + + /** + * Get current title. + * + * Use {@see FeedItem::setTitle()} to set the title. + * + * @return string|null The current title or null if it hasn't been set. + */ + public function getTitle() { + return $this->title; + } + + /** + * Set title. + * + * Use {@see FeedItem::getTitle()} to get the title. + * + * _Note_: Removes whitespace from beginning and end of the title. + * + * @param string $title The title + * @return self + */ + public function setTitle($title) { + $this->title = null; // Clear previous data + + if(!is_string($title)) { + Debug::log('Title must be a string!'); + } else { + $this->title = trim($title); + } + + return $this; + } + + /** + * Get current timestamp. + * + * Use {@see FeedItem::setTimestamp()} to set the timestamp. + * + * @return int|null The current timestamp or null if it hasn't been set. + */ + public function getTimestamp() { + return $this->timestamp; + } + + /** + * Set timestamp of first release. + * + * _Note_: The timestamp should represent the number of seconds since + * January 1 1970 00:00:00 GMT (Unix time). + * + * _Remarks_: If the provided timestamp is a string (not numeric), this + * function automatically attempts to parse the string using + * [strtotime](http://php.net/manual/en/function.strtotime.php) + * + * @link http://php.net/manual/en/function.strtotime.php strtotime (PHP) + * @link https://en.wikipedia.org/wiki/Unix_time Unix time (Wikipedia) + * + * @param string|int $timestamp A timestamp of when the item was first released + * @return self + */ + public function setTimestamp($timestamp) { + $this->timestamp = null; // Clear previous data + + if(!is_numeric($timestamp) + && !$timestamp = strtotime($timestamp)) { + Debug::log('Unable to parse timestamp!'); + } + + if($timestamp <= 0) { + Debug::log('Timestamp must be greater than zero!'); + } else { + $this->timestamp = $timestamp; + } + + return $this; + } + + /** + * Get the current author name. + * + * Use {@see FeedItem::setAuthor()} to set the author. + * + * @return string|null The author or null if it hasn't been set. + */ + public function getAuthor() { + return $this->author; + } + + /** + * Set the author name. + * + * Use {@see FeedItem::getAuthor()} to get the author. + * + * @param string $author The author name. + * @return self + */ + public function setAuthor($author) { + $this->author = null; // Clear previous data + + if(!is_string($author)) { + Debug::log('Author must be a string!'); + } else { + $this->author = $author; + } + + return $this; + } + + /** + * Get item content. + * + * Use {@see FeedItem::setContent()} to set the item content. + * + * @return string|null The item content or null if it hasn't been set. + */ + public function getContent() { + return $this->content; + } + + /** + * Set item content. + * + * Note: This function casts objects of type simple_html_dom and + * simple_html_dom_node to string. + * + * Use {@see FeedItem::getContent()} to get the current item content. + * + * @param string|object $content The item content as text or simple_html_dom + * object. + * @return self + */ + public function setContent($content) { + $this->content = null; // Clear previous data + + if($content instanceof simple_html_dom + || $content instanceof simple_html_dom_node) { + $content = (string)$content; + } + + if(!is_string($content)) { + Debug::log('Content must be a string!'); + } else { + $this->content = $content; + } + + return $this; + } + + /** + * Get item enclosures. + * + * Use {@see FeedItem::setEnclosures()} to set feed enclosures. + * + * @return array Enclosures as array of enclosure URIs. + */ + public function getEnclosures() { + return $this->enclosures; + } + + /** + * Set item enclosures. + * + * Use {@see FeedItem::getEnclosures()} to get the current item enclosures. + * + * @param array $enclosures Array of enclosures, where each element links to + * one enclosure. + * @return self + */ + public function setEnclosures($enclosures) { + $this->enclosures = array(); // Clear previous data + + if(!is_array($enclosures)) { + Debug::log('Enclosures must be an array!'); + } else { + foreach($enclosures as $enclosure) { + if(!filter_var( + $enclosure, + FILTER_VALIDATE_URL, + FILTER_FLAG_SCHEME_REQUIRED | FILTER_FLAG_HOST_REQUIRED | FILTER_FLAG_PATH_REQUIRED)) { + Debug::log('Each enclosure must contain a scheme, host and path!'); + } else { + $this->enclosures[] = $enclosure; + } + } + } + + return $this; + } + + /** + * Get item categories. + * + * Use {@see FeedItem::setCategories()} to set item categories. + * + * @param array The item categories. + */ + public function getCategories() { + return $this->categories; + } + + /** + * Set item categories. + * + * Use {@see FeedItem::getCategories()} to get the current item categories. + * + * @param array $categories Array of categories, where each element defines + * a single category name. + * @return self + */ + public function setCategories($categories) { + $this->categories = array(); // Clear previous data + + if(!is_array($categories)) { + Debug::log('Categories must be an array!'); + } else { + foreach($categories as $category) { + if(!is_string($category)) { + Debug::log('Category must be a string!'); + } else { + $this->categories[] = $category; + } + } + } + + return $this; + } + + /** + * Add miscellaneous elements to the item. + * + * @param string $key Name of the element. + * @param mixed $value Value of the element. + * @return self + */ + public function addMisc($key, $value) { + + if(!is_string($key)) { + Debug::log('Key must be a string!'); + } elseif(in_array($key, get_object_vars($this))) { + Debug::log('Key must be unique!'); + } else { + $this->misc[$key] = $value; + } + + return $this; + } + + /** + * Transform current object to array + * + * @return array + */ + public function toArray() { + return array_merge( + array( + 'uri' => $this->uri, + 'title' => $this->title, + 'timestamp' => $this->timestamp, + 'author' => $this->author, + 'content' => $this->content, + 'enclosures' => $this->enclosures, + 'categories' => $this->categories, + ), $this->misc + ); + } + + /** + * Set item property + * + * Allows simple assignment to parameters. This method is slower, but easier + * to implement in some cases: + * + * ```PHP + * $item = new \FeedItem(); + * $item->content = 'Hello World!'; + * $item->my_id = 42; + * ``` + * + * @param string $name Property name + * @param mixed $value Property value + */ + function __set($name, $value) { + switch($name) { + case 'uri': $this->setURI($value); break; + case 'title': $this->setTitle($value); break; + case 'timestamp': $this->setTimestamp($value); break; + case 'author': $this->setAuthor($value); break; + case 'content': $this->setContent($value); break; + case 'enclosures': $this->setEnclosures($value); break; + case 'categories': $this->setCategories($value); break; + default: $this->addMisc($name, $value); + } + } + + /** + * Get item property + * + * Allows simple assignment to parameters. This method is slower, but easier + * to implement in some cases. + * + * @param string $name Property name + * @return mixed Property value + */ + function __get($name) { + switch($name) { + case 'uri': return $this->getURI(); + case 'title': return $this->getTitle(); + case 'timestamp': return $this->getTimestamp(); + case 'author': return $this->getAuthor(); + case 'content': return $this->getContent(); + case 'enclosures': return $this->getEnclosures(); + case 'categories': return $this->getCategories(); + default: + if(array_key_exists($name, $this->misc)) + return $this->misc[$name]; + return null; + } + } +} diff --git a/lib/FormatAbstract.php b/lib/FormatAbstract.php index 5bbd3807..5395d562 100644 --- a/lib/FormatAbstract.php +++ b/lib/FormatAbstract.php @@ -113,7 +113,7 @@ abstract class FormatAbstract implements FormatInterface { * @param array $items {@inheritdoc} */ public function setItems(array $items){ - $this->items = array_map(array($this, 'array_trim'), $items); + $this->items = $items; return $this; } diff --git a/lib/rssbridge.php b/lib/rssbridge.php index 01d11d73..dbeab26f 100644 --- a/lib/rssbridge.php +++ b/lib/rssbridge.php @@ -44,6 +44,7 @@ require_once PATH_LIB . 'CacheInterface.php'; require_once PATH_LIB . 'FormatInterface.php'; // Classes +require_once PATH_LIB . 'FeedItem.php'; require_once PATH_LIB . 'Debug.php'; require_once PATH_LIB . 'Exceptions.php'; require_once PATH_LIB . 'Format.php';