XPath expressions'; const MAINTAINER = 'Niehztog'; const PARAMETERS = array( '' => array( 'url' => array( 'name' => 'Enter web page URL', 'title' => <<<"EOL" You can specify any website URL which serves data suited for display in RSS feeds (for example a news blog). EOL , 'type' => 'text', 'exampleValue' => 'https://news.blizzard.com/en-en', 'defaultValue' => 'https://news.blizzard.com/en-en', 'required' => true ), 'item' => array( 'name' => 'Item selector', 'title' => <<<"EOL" Enter an XPath expression matching a list of dom nodes, each node containing one feed article item in total (usually a surrounding <div> or <span> tag). This will be the context nodes for all of the following expressions. This expression usually starts with a single forward slash. EOL , 'type' => 'text', 'exampleValue' => '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article', 'defaultValue' => '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article', 'required' => true ), 'title' => array( 'name' => 'Item title selector', 'title' => <<<"EOL" This expression should match a node contained within each article item node containing the article headline. It should start with a dot followed by two forward slashes, referring to any descendant nodes of the article item node. EOL , 'type' => 'text', 'exampleValue' => './/div/div[2]/h2', 'defaultValue' => './/div/div[2]/h2', 'required' => true ), 'content' => array( 'name' => 'Item description selector', 'title' => <<<"EOL" This expression should match a node contained within each article item node containing the article content or description. It should start with a dot followed by two forward slashes, referring to any descendant nodes of the article item node. EOL , 'type' => 'text', 'exampleValue' => './/div[@class="ArticleListItem-description"]/div[@class="h6"]', 'defaultValue' => './/div[@class="ArticleListItem-description"]/div[@class="h6"]', 'required' => false ), 'uri' => array( 'name' => 'Item URL selector', 'title' => <<<"EOL" This expression should match a node's attribute containing the article URL (usually the href attribute of an <a> tag). It should start with a dot followed by two forward slashes, referring to any descendant nodes of the article item node. Attributes can be selected by prepending an @ char before the attributes name. EOL , 'type' => 'text', 'exampleValue' => './/a[@class="ArticleLink ArticleLink"]/@href', 'defaultValue' => './/a[@class="ArticleLink ArticleLink"]/@href', 'required' => false ), 'author' => array( 'name' => 'Item author selector', 'title' => <<<"EOL" This expression should match a node contained within each article item node containing the article author's name. It should start with a dot followed by two forward slashes, referring to any descendant nodes of the article item node. EOL , 'type' => 'text', 'required' => false ), 'timestamp' => array( 'name' => 'Item date selector', 'title' => <<<"EOL" This expression should match a node or node's attribute containing the article timestamp or date (parsable by PHP's strtotime function). It should start with a dot followed by two forward slashes, referring to any descendant nodes of the article item node. Attributes can be selected by prepending an @ char before the attributes name. EOL , 'type' => 'text', 'exampleValue' => './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp', 'defaultValue' => './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp', 'required' => false ), 'enclosures' => array( 'name' => 'Item image selector', 'title' => <<<"EOL" This expression should match a node's attribute containing an article image URL (usually the src attribute of an <img> tag or a style attribute). It should start with a dot followed by two forward slashes, referring to any descendant nodes of the article item node. Attributes can be selected by prepending an @ char before the attributes name. EOL , 'type' => 'text', 'exampleValue' => './/div[@class="ArticleListItem-image"]/@style', 'defaultValue' => './/div[@class="ArticleListItem-image"]/@style', 'required' => false ), 'categories' => array( 'name' => 'Item category selector', 'title' => <<<"EOL" This expression should match a node or node's attribute contained within each article item node containing the article category. This could be inside <div> or <span> tags or sometimes be hidden in a data attribute. It should start with a dot followed by two forward slashes, referring to any descendant nodes of the article item node. Attributes can be selected by prepending an @ char before the attributes name. EOL , 'type' => 'text', 'exampleValue' => './/div[@class="ArticleListItem-label"]', 'defaultValue' => './/div[@class="ArticleListItem-label"]', 'required' => false ), 'fix_encoding' => array( 'name' => 'Fix encoding', 'title' => <<<"EOL" Check this to fix feed encoding by invoking PHP's utf8_decode function on all extracted texts. Try this in case you see "broken" or "weird" characters in your feed where you'd normally expect umlauts or any other non-ascii characters. EOL , 'type' => 'checkbox', 'required' => false ), ) ); /** * Source Web page URL (should provide either HTML or XML content) * @return string */ protected function getSourceUrl(){ return $this->encodeUri($this->getInput('url')); } /** * XPath expression for extracting the feed items from the source page * @return string */ protected function getExpressionItem(){ return urldecode($this->getInput('item')); } /** * XPath expression for extracting an item title from the item context * @return string */ protected function getExpressionItemTitle(){ return urldecode($this->getInput('title')); } /** * XPath expression for extracting an item's content from the item context * @return string */ protected function getExpressionItemContent(){ return urldecode($this->getInput('content')); } /** * XPath expression for extracting an item link from the item context * @return string */ protected function getExpressionItemUri(){ return urldecode($this->getInput('uri')); } /** * XPath expression for extracting an item author from the item context * @return string */ protected function getExpressionItemAuthor(){ return urldecode($this->getInput('author')); } /** * XPath expression for extracting an item timestamp from the item context * @return string */ protected function getExpressionItemTimestamp(){ return urldecode($this->getInput('timestamp')); } /** * XPath expression for extracting item enclosures (media content like * images or movies) from the item context * @return string */ protected function getExpressionItemEnclosures(){ return urldecode($this->getInput('enclosures')); } /** * XPath expression for extracting an item category from the item context * @return string */ protected function getExpressionItemCategories(){ return urldecode($this->getInput('categories')); } /** * Fix encoding * @return string */ protected function getSettingFixEncoding(){ return $this->getInput('fix_encoding'); } /** * Fixes URL encoding issues in input URL's * @param $uri * @return string|string[] */ private function encodeUri($uri) { if (strpos($uri, 'https%3A%2F%2F') === 0 || strpos($uri, 'http%3A%2F%2F') === 0) { $uri = urldecode($uri); } $uri = str_replace('|', '%7C', $uri); return $uri; } }