[BridgeXPathAbstract + BlizzardNewsBridge + XPathBridge] Add new abstract class + two example implementations (#1671)

2020-11-08 08:22:41 +01:00 · 2020-11-08 08:22:41 +01:00 · 3ad138026d
commit 3ad138026d
parent d05a8b79fe
5 changed files with 897 additions and 1 deletions
--- a/bridges/BlizzardNewsBridge.php
+++ b/bridges/BlizzardNewsBridge.php
@ -0,0 +1,60 @@
 <?php
 class BlizzardNewsBridge extends XPathAbstract {
 	const NAME = 'Blizzard News';
 	const URI = 'https://news.blizzard.com';
 	const DESCRIPTION = 'Blizzard (game company) newsfeed';
 	const MAINTAINER = 'Niehztog';
 	const PARAMETERS = array(
 		'' => array(
 			'locale' => array(
 				'name' => 'Language',
 				'type' => 'list',
 				'values' => array(
 					'Deutsch' => 'de-de',
 					'English (EU)' => 'en-gb',
 					'English (US)' => 'en-us',
 					'Español (EU)' => 'es-es',
 					'Español (AL)' => 'es-mx',
 					'Français' => 'fr-fr',
 					'Italiano' => 'it-it',
 					'日本語' => 'ja-jp',
 					'한국어' => 'ko-kr',
 					'Polski' => 'pl-pl',
 					'Português (AL)' => 'pt-br',
 					'Русский' => 'ru-ru',
 					'ภาษาไทย' => 'th-th',
 					'简体中文' => 'zh-cn',
 					'繁體中文' => 'zh-tw'
 				),
 				'defaultValue' => 'en-us',
 				'title' => 'Select your language'
 			)
 		)
 	);
 	const CACHE_TIMEOUT = 3600;
 	const XPATH_EXPRESSION_ITEM = '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article';
 	const XPATH_EXPRESSION_ITEM_TITLE = './/div/div[2]/h2';
 	const XPATH_EXPRESSION_ITEM_CONTENT = './/div[@class="ArticleListItem-description"]/div[@class="h6"]';
 	const XPATH_EXPRESSION_ITEM_URI = './/a[@class="ArticleLink ArticleLink"]/@href';
 	const XPATH_EXPRESSION_ITEM_AUTHOR = '';
 	const XPATH_EXPRESSION_ITEM_TIMESTAMP = './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp';
 	const XPATH_EXPRESSION_ITEM_ENCLOSURES = './/div[@class="ArticleListItem-image"]/@style';
 	const XPATH_EXPRESSION_ITEM_CATEGORIES = './/div[@class="ArticleListItem-label"]';
 	const SETTING_FIX_ENCODING = true;
 	/**
 	 * Source Web page URL (should provide either HTML or XML content)
 	 * @return string
 	 */
 	protected function getSourceUrl(){
 		$locale = $this->getInput('locale');
 		if('zh-cn' === $locale) {
 			return 'https://cn.news.blizzard.com';
 		}
 		return 'https://news.blizzard.com/' . $locale;
 	}
 }
--- a/bridges/XPathBridge.php
+++ b/bridges/XPathBridge.php
@ -0,0 +1,251 @@
 <?php
 class XPathBridge extends XPathAbstract {
 	const NAME = 'XPathBridge';
 	const URI = 'https://github.com/rss-bridge/rss-bridge';
 	const DESCRIPTION
 		= 'Parse any webpage using <a href="https://devhints.io/xpath" target="_blank">XPath expressions</a>';
 	const MAINTAINER = 'Niehztog';
 	const PARAMETERS = array(
 		'' => array(
 			'url' => array(
 				'name' => 'Enter web page URL',
 				'title' => <<<"EOL"
 You can specify any website URL which serves data suited for display in RSS feeds
 (for example a news blog).
 EOL
 				, 'type' => 'text',
 				'exampleValue' => 'https://news.blizzard.com/en-en',
 				'defaultValue' => 'https://news.blizzard.com/en-en',
 				'required' => true
 			),
 			'item' => array(
 				'name' => 'Item selector',
 				'title' => <<<"EOL"
 Enter an XPath expression matching a list of dom nodes, each node containing one
 feed article item in total (usually a surrounding &lt;div&gt; or &lt;span&gt; tag). This will
 be the context nodes for all of the following expressions. This expression usually
 starts with a single forward slash.
 EOL
 				, 'type' => 'text',
 				'exampleValue' => '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article',
 				'defaultValue' => '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article',
 				'required' => true
 			),
 			'title' => array(
 				'name' => 'Item title selector',
 				'title' => <<<"EOL"
 This expression should match a node contained within each article item node
 containing the article headline. It should start with a dot followed by two
 forward slashes, referring to any descendant nodes of the article item node.
 EOL
 				, 'type' => 'text',
 				'exampleValue' => './/div/div[2]/h2',
 				'defaultValue' => './/div/div[2]/h2',
 				'required' => true
 			),
 			'content' => array(
 				'name' => 'Item description selector',
 				'title' => <<<"EOL"
 This expression should match a node contained within each article item node
 containing the article content or description. It should start with a dot
 followed by two forward slashes, referring to any descendant nodes of the
 article item node.
 EOL
 				, 'type' => 'text',
 				'exampleValue' => './/div[@class="ArticleListItem-description"]/div[@class="h6"]',
 				'defaultValue' => './/div[@class="ArticleListItem-description"]/div[@class="h6"]',
 				'required' => false
 			),
 			'uri' => array(
 				'name' => 'Item URL selector',
 				'title' => <<<"EOL"
 This expression should match a node's attribute containing the article URL
 (usually the href attribute of an &lt;a&gt; tag). It should start with a dot
 followed by two forward slashes, referring to any descendant nodes of
 the article item node. Attributes can be selected by prepending an @ char
 before the attributes name.
 EOL
 				, 'type' => 'text',
 				'exampleValue' => './/a[@class="ArticleLink ArticleLink"]/@href',
 				'defaultValue' => './/a[@class="ArticleLink ArticleLink"]/@href',
 				'required' => false
 			),
 			'author' => array(
 				'name' => 'Item author selector',
 				'title' => <<<"EOL"
 This expression should match a node contained within each article item
 node containing the article author's name. It should start with a dot
 followed by two forward slashes, referring to any descendant nodes of
 the article item node.
 EOL
 				, 'type' => 'text',
 				'required' => false
 			),
 			'timestamp' => array(
 				'name' => 'Item date selector',
 				'title' => <<<"EOL"
 This expression should match a node or node's attribute containing the
 article timestamp or date (parsable by PHP's strtotime function). It
 should start with a dot followed by two forward slashes, referring to
 any descendant nodes of the article item node. Attributes can be
 selected by prepending an @ char before the attributes name.
 EOL
 				, 'type' => 'text',
 				'exampleValue' => './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp',
 				'defaultValue' => './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp',
 				'required' => false
 			),
 			'enclosures' => array(
 				'name' => 'Item image selector',
 				'title' => <<<"EOL"
 This expression should match a node's attribute containing an article
 image URL (usually the src attribute of an &lt;img&gt; tag or a style
 attribute). It should start with a dot followed by two forward slashes,
 referring to any descendant nodes of the article item node. Attributes
 can be selected by prepending an @ char before the attributes name.
 EOL
 				, 'type' => 'text',
 				'exampleValue' => './/div[@class="ArticleListItem-image"]/@style',
 				'defaultValue' => './/div[@class="ArticleListItem-image"]/@style',
 				'required' => false
 			),
 			'categories' => array(
 				'name' => 'Item category selector',
 				'title' => <<<"EOL"
 This expression should match a node or node's attribute contained
 within each article item node containing the article category. This
 could be inside &lt;div&gt; or &lt;span&gt; tags or sometimes be hidden
 in a data attribute. It should start with a dot followed by two
 forward slashes, referring to any descendant nodes of the article
 item node. Attributes can be selected by prepending an @ char
 before the attributes name.
 EOL
 				, 'type' => 'text',
 				'exampleValue' => './/div[@class="ArticleListItem-label"]',
 				'defaultValue' => './/div[@class="ArticleListItem-label"]',
 				'required' => false
 			),
 			'fix_encoding' => array(
 				'name' => 'Fix encoding',
 				'title' => <<<"EOL"
 Check this to fix feed encoding by invoking PHP's utf8_decode
 function on all extracted texts. Try this in case you see "broken" or
 "weird" characters in your feed where you'd normally expect umlauts
 or any other non-ascii characters.
 EOL
 				, 'type' => 'checkbox',
 				'required' => false
 			),
 		)
 	);
 	/**
 	 * Source Web page URL (should provide either HTML or XML content)
 	 * @return string
 	 */
 	protected function getSourceUrl(){
 		return $this->encodeUri($this->getInput('url'));
 	}
 	/**
 	 * XPath expression for extracting the feed items from the source page
 	 * @return string
 	 */
 	protected function getExpressionItem(){
 		return urldecode($this->getInput('item'));
 	}
 	/**
 	 * XPath expression for extracting an item title from the item context
 	 * @return string
 	 */
 	protected function getExpressionItemTitle(){
 		return urldecode($this->getInput('title'));
 	}
 	/**
 	 * XPath expression for extracting an item's content from the item context
 	 * @return string
 	 */
 	protected function getExpressionItemContent(){
 		return urldecode($this->getInput('content'));
 	}
 	/**
 	 * XPath expression for extracting an item link from the item context
 	 * @return string
 	 */
 	protected function getExpressionItemUri(){
 		return urldecode($this->getInput('uri'));
 	}
 	/**
 	 * XPath expression for extracting an item author from the item context
 	 * @return string
 	 */
 	protected function getExpressionItemAuthor(){
 		return urldecode($this->getInput('author'));
 	}
 	/**
 	 * XPath expression for extracting an item timestamp from the item context
 	 * @return string
 	 */
 	protected function getExpressionItemTimestamp(){
 		return urldecode($this->getInput('timestamp'));
 	}
 	/**
 	 * XPath expression for extracting item enclosures (media content like
 	 * images or movies) from the item context
 	 * @return string
 	 */
 	protected function getExpressionItemEnclosures(){
 		return urldecode($this->getInput('enclosures'));
 	}
 	/**
 	 * XPath expression for extracting an item category from the item context
 	 * @return string
 	 */
 	protected function getExpressionItemCategories(){
 		return urldecode($this->getInput('categories'));
 	}
 	/**
 	 * Fix encoding
 	 * @return string
 	 */
 	protected function getSettingFixEncoding(){
 		return $this->getInput('fix_encoding');
 	}
 	/**
 	 * Fixes URL encoding issues in input URL's
 	 * @param $uri
 	 * @return string|string[]
 	 */
 	private function encodeUri($uri)
 	{
 		if (strpos($uri, 'https%3A%2F%2F') === 0
 			|| strpos($uri, 'http%3A%2F%2F') === 0) {
 			$uri = urldecode($uri);
 		}
 		$uri = str_replace('|', '%7C', $uri);
 		return $uri;
 	}
 }
--- a/composer.json
+++ b/composer.json
@ -34,6 +34,7 @@
    },
    "suggest": {
        "ext-memcached": "Allows to use memcached as cache type",
-        "ext-sqlite3": "Allows to use an SQLite database for caching"
+        "ext-sqlite3": "Allows to use an SQLite database for caching",
        "ext-dom": "Allows to use some bridges based on XPath expressions"
    }
 }
--- a/lib/XPathAbstract.php
+++ b/lib/XPathAbstract.php
@ -0,0 +1,583 @@
 <?php
 /**
 * An alternative abstract class for bridges utilizing XPath expressions
 *
 * This class is meant as an alternative base class for bridge implementations.
 * It offers preliminary functionality for generating feeds based on XPath
 * expressions.
 * As a minimum, extending classes should define XPath expressions pointing
 * to the feed items contents in the class constants below. In case there is
 * more manual fine tuning required, it offers a bunch of methods which can
 * be overridden, for example in order to specify formatting of field values
 * or more flexible definition of dynamic XPath expressions.
 *
 * This class extends {@see BridgeAbstract}, which means it incorporates and
 * extends all of its functionality.
 **/
 abstract class XPathAbstract extends BridgeAbstract {
 	/**
 	 * Source Web page URL (should provide either HTML or XML content)
 	 * You can specify any website URL which serves data suited for display in RSS feeds
 	 * (for example a news blog).
 	 *
 	 * Use {@see XPathAbstract::getSourceUrl()} to read this parameter
 	 */
 	const FEED_SOURCE_URL = '';
 	/**
 	 * XPath expression for extracting the feed title from the source page.
 	 * If this is left blank or does not provide any data {@see BridgeAbstract::getName()}
 	 * is used instead as the feed's title.
 	 *
 	 * Use {@see XPathAbstract::getExpressionTitle()} to read this parameter
 	 */
 	const XPATH_EXPRESSION_FEED_TITLE = './/title';
 	/**
 	 * XPath expression for extracting the feed favicon URL from the source page.
 	 * If this is left blank or does not provide any data {@see BridgeAbstract::getIcon()}
 	 * is used instead as the feed's favicon URL.
 	 *
 	 * Use {@see XPathAbstract::getExpressionIcon()} to read this parameter
 	 */
 	const XPATH_EXPRESSION_FEED_ICON = './/link[@rel="icon"]/@href';
 	/**
 	 * XPath expression for extracting the feed items from the source page
 	 * Enter an XPath expression matching a list of dom nodes, each node containing one
 	 * feed article item in total (usually a surrounding <div> or <span> tag). This will
 	 * be the context nodes for all of the following expressions. This expression usually
 	 * starts with a single forward slash.
 	 *
 	 * Use {@see XPathAbstract::getExpressionItem()} to read this parameter
 	 */
 	const XPATH_EXPRESSION_ITEM = '';
 	/**
 	 * XPath expression for extracting an item title from the item context
 	 * This expression should match a node contained within each article item node
 	 * containing the article headline. It should start with a dot followed by two
 	 * forward slashes, referring to any descendant nodes of the article item node.
 	 *
 	 * Use {@see XPathAbstract::getExpressionItemTitle()} to read this parameter
 	 */
 	const XPATH_EXPRESSION_ITEM_TITLE = '';
 	/**
 	 * XPath expression for extracting an item's content from the item context
 	 * This expression should match a node contained within each article item node
 	 * containing the article content or description. It should start with a dot
 	 * followed by two forward slashes, referring to any descendant nodes of the
 	 * article item node.
 	 *
 	 * Use {@see XPathAbstract::getExpressionItemContent()} to read this parameter
 	 */
 	const XPATH_EXPRESSION_ITEM_CONTENT = '';
 	/**
 	 * XPath expression for extracting an item link from the item context
 	 * This expression should match a node's attribute containing the article URL
 	 * (usually the href attribute of an <a> tag). It should start with a dot
 	 * followed by two forward slashes, referring to any descendant nodes of
 	 * the article item node. Attributes can be selected by prepending an @ char
 	 * before the attributes name.
 	 *
 	 * Use {@see XPathAbstract::getExpressionItemUri()} to read this parameter
 	 */
 	const XPATH_EXPRESSION_ITEM_URI = '';
 	/**
 	 * XPath expression for extracting an item author from the item context
 	 * This expression should match a node contained within each article item
 	 * node containing the article author's name. It should start with a dot
 	 * followed by two forward slashes, referring to any descendant nodes of
 	 * the article item node.
 	 *
 	 * Use {@see XPathAbstract::getExpressionItemAuthor()} to read this parameter
 	 */
 	const XPATH_EXPRESSION_ITEM_AUTHOR = '';
 	/**
 	 * XPath expression for extracting an item timestamp from the item context
 	 * This expression should match a node or node's attribute containing the
 	 * article timestamp or date (parsable by PHP's strtotime function). It
 	 * should start with a dot followed by two forward slashes, referring to
 	 * any descendant nodes of the article item node. Attributes can be
 	 * selected by prepending an @ char before the attributes name.
 	 *
 	 * Use {@see XPathAbstract::getExpressionItemTimestamp()} to read this parameter
 	 */
 	const XPATH_EXPRESSION_ITEM_TIMESTAMP = '';
 	/**
 	 * XPath expression for extracting item enclosures (media content like
 	 * images or movies) from the item context
 	 * This expression should match a node's attribute containing an article
 	 * image URL (usually the src attribute of an <img> tag or a style
 	 * attribute). It should start with a dot followed by two forward slashes,
 	 * referring to any descendant nodes of the article item node. Attributes
 	 * can be selected by prepending an @ char before the attributes name.
 	 *
 	 * Use {@see XPathAbstract::getExpressionItemEnclosures()} to read this parameter
 	 */
 	const XPATH_EXPRESSION_ITEM_ENCLOSURES = '';
 	/**
 	 * XPath expression for extracting an item category from the item context
 	 * This expression should match a node or node's attribute contained
 	 * within each article item node containing the article category. This
 	 * could be inside <div> or <span> tags or sometimes be hidden
 	 * in a data attribute. It should start with a dot followed by two
 	 * forward slashes, referring to any descendant nodes of the article
 	 * item node. Attributes can be selected by prepending an @ char
 	 * before the attributes name.
 	 *
 	 * Use {@see XPathAbstract::getExpressionItemCategories()} to read this parameter
 	 */
 	const XPATH_EXPRESSION_ITEM_CATEGORIES = '';
 	/**
 	 * Fix encoding
 	 * Set this to true for fixing feed encoding by invoking PHP's utf8_decode
 	 * function on all extracted texts. Try this in case you see "broken" or
 	 * "weird" characters in your feed where you'd normally expect umlauts
 	 * or any other non-ascii characters.
 	 *
 	 * Use {@see XPathAbstract::getSettingFixEncoding()} to read this parameter
 	 */
 	const SETTING_FIX_ENCODING = false;
 	/**
 	 * Internal storage for resulting feed name, automatically detected
 	 * @var string
 	 */
 	private $feedName;
 	/**
 	 * Internal storage for resulting feed name, automatically detected
 	 * @var string
 	 */
 	private $feedUri;
 	/**
 	 * Internal storage for resulting feed favicon, automatically detected
 	 * @var string
 	 */
 	private $feedIcon;
 	public function getName(){
 		return $this->feedName ?: parent::getName();
 	}
 	public function getURI() {
 		return $this->feedUri ?: parent::getURI();
 	}
 	public function getIcon() {
 		return $this->feedIcon ?: parent::getIcon();
 	}
 	/**
 	 * Source Web page URL (should provide either HTML or XML content)
 	 * @return string
 	 */
 	protected function getSourceUrl(){
 		return static::FEED_SOURCE_URL;
 	}
 	/**
 	 * XPath expression for extracting the feed title from the source page
 	 * @return string
 	 */
 	protected function getExpressionTitle(){
 		return static::XPATH_EXPRESSION_FEED_TITLE;
 	}
 	/**
 	 * XPath expression for extracting the feed favicon from the source page
 	 * @return string
 	 */
 	protected function getExpressionIcon(){
 		return static::XPATH_EXPRESSION_FEED_ICON;
 	}
 	/**
 	 * XPath expression for extracting the feed items from the source page
 	 * @return string
 	 */
 	protected function getExpressionItem(){
 		return static::XPATH_EXPRESSION_ITEM;
 	}
 	/**
 	 * XPath expression for extracting an item title from the item context
 	 * @return string
 	 */
 	protected function getExpressionItemTitle(){
 		return static::XPATH_EXPRESSION_ITEM_TITLE;
 	}
 	/**
 	 * XPath expression for extracting an item's content from the item context
 	 * @return string
 	 */
 	protected function getExpressionItemContent(){
 		return static::XPATH_EXPRESSION_ITEM_CONTENT;
 	}
 	/**
 	 * XPath expression for extracting an item link from the item context
 	 * @return string
 	 */
 	protected function getExpressionItemUri(){
 		return static::XPATH_EXPRESSION_ITEM_URI;
 	}
 	/**
 	 * XPath expression for extracting an item author from the item context
 	 * @return string
 	 */
 	protected function getExpressionItemAuthor(){
 		return static::XPATH_EXPRESSION_ITEM_AUTHOR;
 	}
 	/**
 	 * XPath expression for extracting an item timestamp from the item context
 	 * @return string
 	 */
 	protected function getExpressionItemTimestamp(){
 		return static::XPATH_EXPRESSION_ITEM_TIMESTAMP;
 	}
 	/**
 	 * XPath expression for extracting item enclosures (media content like
 	 * images or movies) from the item context
 	 * @return string
 	 */
 	protected function getExpressionItemEnclosures(){
 		return static::XPATH_EXPRESSION_ITEM_ENCLOSURES;
 	}
 	/**
 	 * XPath expression for extracting an item category from the item context
 	 * @return string
 	 */
 	protected function getExpressionItemCategories(){
 		return static::XPATH_EXPRESSION_ITEM_CATEGORIES;
 	}
 	/**
 	 * Fix encoding
 	 * @return string
 	 */
 	protected function getSettingFixEncoding(){
 		return static::SETTING_FIX_ENCODING;
 	}
 	/**
 	 * Internal helper method for quickly accessing all the user defined constants
 	 * in derived classes
 	 *
 	 * @param $name
 	 * @return bool|string
 	 */
 	private function getParam($name){
 		switch($name) {
 			case 'url':
 				return $this->getSourceUrl();
 			case 'feed_title':
 				return $this->getExpressionTitle();
 			case 'feed_icon':
 				return $this->getExpressionIcon();
 			case 'item':
 				return $this->getExpressionItem();
 			case 'title':
 				return $this->getExpressionItemTitle();
 			case 'content':
 				return $this->getExpressionItemContent();
 			case 'uri':
 				return $this->getExpressionItemUri();
 			case 'author':
 				return $this->getExpressionItemAuthor();
 			case 'timestamp':
 				return $this->getExpressionItemTimestamp();
 			case 'enclosures':
 				return $this->getExpressionItemEnclosures();
 			case 'categories':
 				return $this->getExpressionItemCategories();
 			case 'fix_encoding':
 				return $this->getSettingFixEncoding();
 		}
 	}
 	/**
 	 * Should provide the source website HTML content
 	 * can be easily overwritten for example if special headers or auth infos are required
 	 * @return string
 	 */
 	protected function provideWebsiteContent() {
 		return getContents($this->feedUri);
 	}
 	/**
 	 * Should provide the feeds title
 	 *
 	 * @param DOMXPath $xpath
 	 * @return string
 	 */
 	protected function provideFeedTitle(DOMXPath $xpath) {
 		$title = $xpath->query($this->getParam('feed_title'));
 		if(count($title) === 1) {
 			return $this->getItemValueOrNodeValue($title);
 		}
 	}
 	/**
 	 * Should provide the URL of the feed's favicon
 	 *
 	 * @param DOMXPath $xpath
 	 * @return string
 	 */
 	protected function provideFeedIcon(DOMXPath $xpath) {
 		$icon = $xpath->query($this->getParam('feed_icon'));
 		if(count($icon) === 1) {
 			return $this->cleanImageUrl($this->getItemValueOrNodeValue($icon));
 		}
 	}
 	/**
 	 * Should provide the feed's items.
 	 *
 	 * @param DOMXPath $xpath
 	 * @return DOMNodeList
 	 */
 	protected function provideFeedItems(DOMXPath $xpath) {
 		return @$xpath->query($this->getParam('item'));
 	}
 	public function collectData() {
 		$this->feedUri = $this->getParam('url');
 		$webPageHtml = new DOMDocument();
 		libxml_use_internal_errors(true);
 		$webPageHtml->loadHTML($this->provideWebsiteContent());
 		libxml_clear_errors();
 		libxml_use_internal_errors(false);
 		$xpath = new DOMXPath($webPageHtml);
 		$this->feedName = $this->provideFeedTitle($xpath);
 		$this->feedIcon = $this->provideFeedIcon($xpath);
 		$entries = $this->provideFeedItems($xpath);
 		if($entries === false) {
 			return;
 		}
 		foreach ($entries as $entry) {
 			$item = new \FeedItem();
 			foreach(array('title', 'content', 'uri', 'author', 'timestamp', 'enclosures', 'categories') as $param) {
 				$expression = $this->getParam($param);
 				if('' === $expression) {
 					continue;
 				}
 				//can be a string or DOMNodeList, depending on the expression result
 				$typedResult = @$xpath->evaluate($expression, $entry);
 				if ($typedResult === false || ($typedResult instanceof DOMNodeList && count($typedResult) === 0)
 					|| (is_string($typedResult) && strlen(trim($typedResult)) === 0)) {
 					continue;
 				}
 				$item->__set($param, $this->formatParamValue($param, $this->getItemValueOrNodeValue($typedResult)));
 			}
 			$itemId = $this->generateItemId($item);
 			if(null !== $itemId) {
 				$item->setUid($itemId);
 			}
 			$this->items[] = $item;
 		}
 	}
 	/**
 	 * @param $param
 	 * @param $value
 	 * @return string|array
 	 */
 	protected function formatParamValue($param, $value)
 	{
 		$value = $this->fixEncoding($value);
 		switch ($param) {
 			case 'title':
 				return $this->formatItemTitle($value);
 			case 'content':
 				return $this->formatItemContent($value);
 			case 'uri':
 				return $this->formatItemUri($value);
 			case 'author':
 				return $this->formatItemAuthor($value);
 			case 'timestamp':
 				return $this->formatItemTimestamp($value);
 			case 'enclosures':
 				return array($this->cleanImageUrl($value));
 			case 'categories':
 				return array($this->fixEncoding($value));
 		}
 		return $value;
 	}
 	/**
 	 * Formats the title of a feed item. Takes extracted raw title and returns it formatted
 	 * as string.
 	 * Can be easily overwritten for in case the value needs to be transformed into something
 	 * else.
 	 * @param string $value
 	 * @return string
 	 */
 	protected function formatItemTitle($value) {
 		return $value;
 	}
 	/**
 	 * Formats the timestamp of a feed item. Takes extracted raw timestamp and returns unix
 	 * timestamp as integer.
 	 * Can be easily overwritten for example if a special format has to be expected on the
 	 * source website.
 	 * @param string $value
 	 * @return string
 	 */
 	protected function formatItemContent($value) {
 		return $value;
 	}
 	/**
 	 * Formats the URI of a feed item. Takes extracted raw URI and returns it formatted
 	 * as string.
 	 * Can be easily overwritten for in case the value needs to be transformed into something
 	 * else.
 	 * @param string $value
 	 * @return string
 	 */
 	protected function formatItemUri($value) {
 		if(strlen($value) === 0) {
 			return '';
 		}
 		if(strpos($value, 'http://') === 0 || strpos($value, 'https://') === 0) {
 			return $value;
 		}
 		return urljoin($this->feedUri, $value);
 	}
 	/**
 	 * Formats the author of a feed item. Takes extracted raw author and returns it formatted
 	 * as string.
 	 * Can be easily overwritten for in case the value needs to be transformed into something
 	 * else.
 	 * @param string $value
 	 * @return string
 	 */
 	protected function formatItemAuthor($value) {
 		return $value;
 	}
 	/**
 	 * Formats the timestamp of a feed item. Takes extracted raw timestamp and returns unix
 	 * timestamp as integer.
 	 * Can be easily overwritten for example if a special format has to be expected on the
 	 * source website.
 	 * @param string $value
 	 * @return false|int
 	 */
 	protected function formatItemTimestamp($value) {
 		return strtotime($value);
 	}
 	/**
 	 * Formats the enclosures of a feed item. Takes extracted raw enclosures and returns them
 	 * formatted as array.
 	 * Can be easily overwritten for in case the values need to be transformed into something
 	 * else.
 	 * @param string $value
 	 * @return array
 	 */
 	protected function formatItemEnclosures($value) {
 		return array($this->cleanImageUrl($value));
 	}
 	/**
 	 * Formats the categories of a feed item. Takes extracted raw categories and returns them
 	 * formatted as array.
 	 * Can be easily overwritten for in case the values need to be transformed into something
 	 * else.
 	 * @param string $value
 	 * @return array
 	 */
 	protected function formatItemCategories($value) {
 		return array($value);
 	}
 	/**
 	 * @param $imageUrl
 	 * @return string|void
 	 */
 	protected function cleanImageUrl($imageUrl)
 	{
 		$result = preg_match('~(?:http(?:s)?:)?[\/a-zA-Z0-9\-_\.]+\.(?:jpg|gif|png|jpeg|ico){1}~', $imageUrl, $matches);
 		if(1 !== $result) {
 			return;
 		}
 		return urljoin($this->feedUri, $matches[0]);
 	}
 	/**
 	 * @param $typedResult
 	 * @return string
 	 */
 	protected function getItemValueOrNodeValue($typedResult)
 	{
 		if($typedResult instanceof DOMNodeList) {
 			$item = $typedResult->item(0);
 			if ($item instanceof DOMElement) {
 				return trim($item->nodeValue);
 			} elseif ($item instanceof DOMAttr) {
 				return trim($item->value);
 			}
 		} elseif(is_string($typedResult) && strlen($typedResult) > 0) {
 			return trim($typedResult);
 		}
 		returnServerError('Unknown type of XPath expression result.');
 	}
 	/**
 	 * Fixes feed encoding by invoking PHP's utf8_decode function on extracted texts.
 	 * Useful in case of "broken" or "weird" characters in the feed where you'd normally
 	 * expect umlauts.
 	 *
 	 * @param $input
 	 * @return string
 	 */
 	protected function fixEncoding($input)
 	{
 		return $this->getParam('fix_encoding') ? utf8_decode($input) : $input;
 	}
 	/**
 	 * Allows overriding default mechanism determining items Uid's
 	 *
 	 * @param FeedItem $item
 	 * @return string|null
 	 */
 	protected function generateItemId(\FeedItem $item) {
 		return null; //auto generation
 	}
 }
--- a/lib/rssbridge.php
+++ b/lib/rssbridge.php
@ -74,6 +74,7 @@ require_once PATH_LIB . 'BridgeList.php';
 require_once PATH_LIB . 'ParameterValidator.php';
 require_once PATH_LIB . 'ActionFactory.php';
 require_once PATH_LIB . 'ActionAbstract.php';
 require_once PATH_LIB . 'XPathAbstract.php';
 // Functions
 require_once PATH_LIB . 'html.php';