diff --git a/README.md b/README.md index 202f3680..0b90c76d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ ![RSS-Bridge](static/logo_600px.png) === -[![LICENSE](https://img.shields.io/badge/license-UNLICENSE-blue.svg)](UNLICENSE) [![GitHub release](https://img.shields.io/github/release/rss-bridge/rss-bridge.svg?logo=github)](https://github.com/rss-bridge/rss-bridge/releases/latest) [![Debian Release](https://img.shields.io/badge/dynamic/json.svg?logo=debian&label=debian%20release&url=https%3A%2F%2Fsources.debian.org%2Fapi%2Fsrc%2Frss-bridge%2F&query=%24.versions%5B0%5D.version&colorB=blue)](https://tracker.debian.org/pkg/rss-bridge) [![Guix Release](https://img.shields.io/badge/guix%20release-unknown-blue.svg)](https://www.gnu.org/software/guix/packages/R/) [![Build Status](https://travis-ci.org/RSS-Bridge/rss-bridge.svg?branch=master)](https://travis-ci.org/RSS-Bridge/rss-bridge) [![Docker Build Status](https://img.shields.io/docker/build/rssbridge/rss-bridge.svg?logo=docker)](https://hub.docker.com/r/rssbridge/rss-bridge/) +[![LICENSE](https://img.shields.io/badge/license-UNLICENSE-blue.svg)](UNLICENSE) [![GitHub release](https://img.shields.io/github/release/rss-bridge/rss-bridge.svg?logo=github)](https://github.com/rss-bridge/rss-bridge/releases/latest) [![Debian Release](https://img.shields.io/badge/dynamic/json.svg?logo=debian&label=debian%20release&url=https%3A%2F%2Fsources.debian.org%2Fapi%2Fsrc%2Frss-bridge%2F&query=%24.versions%5B0%5D.version&colorB=blue)](https://tracker.debian.org/pkg/rss-bridge) [![Guix Release](https://img.shields.io/badge/guix%20release-unknown-blue.svg)](https://www.gnu.org/software/guix/packages/R/) [![Actions Status](https://img.shields.io/github/workflow/status/RSS-Bridge/rss-bridge/Tests/master?label=GitHub%20Actions&logo=github)](https://github.com/RSS-Bridge/rss-bridge/actions) [![Docker Build Status](https://img.shields.io/docker/cloud/build/rssbridge/rss-bridge?logo=docker)](https://hub.docker.com/r/rssbridge/rss-bridge/) RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for websites that don't have one. It can be used on webservers or as a stand-alone application in CLI mode. diff --git a/bridges/BukowskisBridge.php b/bridges/BukowskisBridge.php new file mode 100755 index 00000000..8889f3c0 --- /dev/null +++ b/bridges/BukowskisBridge.php @@ -0,0 +1,219 @@ + array( + 'name' => 'Category', + 'type' => 'list', + 'values' => array( + 'All categories' => '', + 'Art' => array( + 'All' => 'art', + 'Classic Art' => 'art.classic-art', + 'Classic Finnish Art' => 'art.classic-finnish-art', + 'Classic Swedish Art' => 'art.classic-swedish-art', + 'Contemporary' => 'art.contemporary', + 'Modern Finnish Art' => 'art.modern-finnish-art', + 'Modern International Art' => 'art.modern-international-art', + 'Modern Swedish Art' => 'art.modern-swedish-art', + 'Old Masters' => 'art.old-masters', + 'Other' => 'art.other', + 'Photographs' => 'art.photographs', + 'Prints' => 'art.prints', + 'Sculpture' => 'art.sculpture', + 'Swedish Old Masters' => 'art.swedish-old-masters', + ), + 'Asian Ceramics & Works of Art' => array( + 'All' => 'asian-ceramics-works-of-art', + 'Other' => 'asian-ceramics-works-of-art.other', + 'Porcelain' => 'asian-ceramics-works-of-art.porcelain', + ), + 'Books & Manuscripts' => array( + 'All' => 'books-manuscripts', + 'Books' => 'books-manuscripts.books', + ), + 'Carpets, rugs & textiles' => array( + 'All' => 'carpets-rugs-textiles', + 'European' => 'carpets-rugs-textiles.european', + 'Oriental' => 'carpets-rugs-textiles.oriental', + 'Rest of the world' => 'carpets-rugs-textiles.rest-of-the-world', + 'Scandinavian' => 'carpets-rugs-textiles.scandinavian', + ), + 'Ceramics & porcelain' => array( + 'All' => 'ceramics-porcelain', + 'Ceramic ware' => 'ceramics-porcelain.ceramic-ware', + 'European' => 'ceramics-porcelain.european', + 'Rest of the world' => 'ceramics-porcelain.rest-of-the-world', + 'Scandinavian' => 'ceramics-porcelain.scandinavian', + ), + 'Collectibles' => array( + 'All' => 'collectibles', + 'Advertising & Retail' => 'collectibles.advertising-retail', + 'Memorabilia' => 'collectibles.memorabilia', + 'Movies & music' => 'collectibles.movies-music', + 'Other' => 'collectibles.other', + 'Retro & Popular Culture' => 'collectibles.retro-popular-culture', + 'Technica & Nautica' => 'collectibles.technica-nautica', + 'Toys' => 'collectibles.toys', + ), + 'Design' => array( + 'All' => 'design', + 'Art glass' => 'design.art-glass', + 'Furniture' => 'design.furniture', + 'Other' => 'design.other', + ), + 'Folk art' => array( + 'All' => 'folk-art', + 'All categories' => 'lots', + ), + 'Furniture' => array( + 'All' => 'furniture', + 'Armchairs & Sofas' => 'furniture.armchairs-sofas', + 'Cabinets & Bureaus' => 'furniture.cabinets-bureaus', + 'Chairs' => 'furniture.chairs', + 'Garden furniture' => 'furniture.garden-furniture', + 'Mirrors' => 'furniture.mirrors', + 'Other' => 'furniture.other', + 'Shelves & Book cases' => 'furniture.shelves-book-cases', + 'Tables' => 'furniture.tables', + ), + 'Glassware' => array( + 'All' => 'glassware', + 'Glassware' => 'glassware.glassware', + 'Other' => 'glassware.other', + ), + 'Jewellery' => array( + 'All' => 'jewellery', + 'Bracelets' => 'jewellery.bracelets', + 'Brooches' => 'jewellery.brooches', + 'Earrings' => 'jewellery.earrings', + 'Necklaces & Pendants' => 'jewellery.necklaces-pendants', + 'Other' => 'jewellery.other', + 'Rings' => 'jewellery.rings', + ), + 'Lighting' => array( + 'All' => 'lighting', + 'Candle sticks & Candelabras' => 'lighting.candle-sticks-candelabras', + 'Ceiling lights' => 'lighting.ceiling-lights', + 'Chandeliers' => 'lighting.chandeliers', + 'Floor lights' => 'lighting.floor-lights', + 'Other' => 'lighting.other', + 'Table lights' => 'lighting.table-lights', + 'Wall lights' => 'lighting.wall-lights', + ), + 'Militaria' => array( + 'All' => 'militaria', + 'Honors & Medals' => 'militaria.honors-medals', + 'Other militaria' => 'militaria.other-militaria', + 'Weaponry' => 'militaria.weaponry', + ), + 'Miscellaneous' => array( + 'All' => 'miscellaneous', + 'Brass, Copper & Pewter' => 'miscellaneous.brass-copper-pewter', + 'Nickel silver' => 'miscellaneous.nickel-silver', + 'Oriental' => 'miscellaneous.oriental', + 'Other' => 'miscellaneous.other', + ), + 'Silver' => array( + 'All' => 'silver', + 'Candle sticks' => 'silver.candle-sticks', + 'Cups & Bowls' => 'silver.cups-bowls', + 'Cutlery' => 'silver.cutlery', + 'Other' => 'silver.other', + ), + 'Timepieces' => array( + 'All' => 'timepieces', + 'Other' => 'timepieces.other', + 'Pocket watches' => 'timepieces.pocket-watches', + 'Table clocks' => 'timepieces.table-clocks', + 'Wrist watches' => 'timepieces.wrist-watches', + ), + 'Vintage & Fashion' => array( + 'All' => 'vintage-fashion', + 'Accessories' => 'vintage-fashion.accessories', + 'Bags & Trunks' => 'vintage-fashion.bags-trunks', + 'Clothes' => 'vintage-fashion.clothes', + ), + ) + ), + 'sort_order' => array( + 'name' => 'Sort order', + 'type' => 'list', + 'values' => array( + 'Ending soon' => 'ending', + 'Most recent' => 'recent', + 'Most bids' => 'most', + 'Fewest bids' => 'fewest', + 'Lowest price' => 'lowest', + 'Highest price' => 'highest', + 'Lowest estimate' => 'low', + 'Highest estimate' => 'high', + 'Alphabetical' => 'alphabetical', + ), + ), + 'language' => array( + 'name' => 'Language', + 'type' => 'list', + 'values' => array( + 'English' => 'en', + 'Swedish' => 'sv', + 'Finnish' => 'fi' + ), + ), + )); + + const CACHE_TIMEOUT = 3600; // 1 hour + + private $title; + + public function collectData() + { + $baseUrl = 'https://www.bukowskis.com'; + $category = $this->getInput('category'); + $language = $this->getInput('language'); + $sort_order = $this->getInput('sort_order'); + + $url = $baseUrl . '/' . $language . '/lots'; + + if ($category) + $url = $url . '/category/' . $category; + + if ($sort_order) + $url = $url . '/sort/' . $sort_order; + + $html = getSimpleHTMLDOM($url) + or returnServerError('Could not request: ' . $url); + + $this->title = htmlspecialchars_decode($html->find('title', 0)->innertext); + + foreach ($html->find('div.c-lot-index-lot') as $lot) { + $title = $lot->find('a.c-lot-index-lot__title', 0)->plaintext; + $relative_url = $lot->find('a.c-lot-index-lot__link', 0)->href; + $images = json_decode( + htmlspecialchars_decode( + $lot + ->find('img.o-aspect-ratio__image', 0) + ->getAttribute('data-thumbnails') + ) + ); + + $this->items[] = array( + 'title' => $title, + 'uri' => $baseUrl . $relative_url, + 'uid' => $lot->getAttribute('data-lot-id'), + 'content' => count($images) > 0 ? "
$title" : $title, + 'enclosures' => array_slice($images, 1), + ); + } + } + + public function getName() + { + return $this->title ?: parent::getName(); + } +} diff --git a/bridges/DockerHubBridge.php b/bridges/DockerHubBridge.php new file mode 100644 index 00000000..a349a5f5 --- /dev/null +++ b/bridges/DockerHubBridge.php @@ -0,0 +1,143 @@ + array( + 'user' => array( + 'name' => 'User', + 'type' => 'text', + 'required' => true, + 'exampleValue' => 'rssbridge', + ), + 'repo' => array( + 'name' => 'Repository', + 'type' => 'text', + 'required' => true, + 'exampleValue' => 'rss-bridge', + ) + ), + 'Official Image' => array( + 'repo' => array( + 'name' => 'Repository', + 'type' => 'text', + 'required' => true, + 'exampleValue' => 'postgres', + ) + ), + ); + + const CACHE_TIMEOUT = 3600; // 1 hour + + private $apiURL = 'https://hub.docker.com/v2/repositories/'; + + public function collectData() { + $json = getContents($this->getApiUrl()) + or returnServerError('Could not request: ' . $this->getURI()); + + $data = json_decode($json, false); + + foreach ($data->results as $result) { + $item = array(); + + $lastPushed = date('Y-m-d H:i:s', strtotime($result->tag_last_pushed)); + + $item['title'] = $result->name; + $item['uid'] = $result->id; + $item['uri'] = $this->getTagUrl($result->name); + $item['author'] = $result->last_updater_username; + $item['timestamp'] = $result->tag_last_pushed; + $item['content'] = <<Tag
+

{$result->name}

+Last pushed
+

{$lastPushed}

+Images
+{$this->getImages($result)} +EOD; + + $this->items[] = $item; + } + + } + + public function getURI() { + if ($this->queriedContext === 'Official Image') { + return self::URI . '/_/' . $this->getRepo(); + } + + if ($this->getInput('repo')) { + return self::URI . '/r/' . $this->getRepo(); + } + + return parent::getURI(); + } + + public function getName() { + if ($this->getInput('repo')) { + return $this->getRepo() . ' - Docker Hub'; + } + + return parent::getName(); + } + + private function getRepo() { + if ($this->queriedContext === 'Official Image') { + return $this->getInput('repo'); + } + + return $this->getInput('user') . '/' . $this->getInput('repo'); + } + + private function getApiUrl() { + if ($this->queriedContext === 'Official Image') { + return $this->apiURL . 'library/' . $this->getRepo() . '/tags/?page_size=25&page=1'; + } + + return $this->apiURL . $this->getRepo() . '/tags/?page_size=25&page=1'; + } + + private function getLayerUrl($name, $digest) { + if ($this->queriedContext === 'Official Image') { + return self::URI . '/layers/' . $this->getRepo() . '/library/' . + $this->getRepo() . '/' . $name . '/images/' . $digest; + } + + return self::URI . '/layers/' . $this->getRepo() . '/' . $name . '/images/' . $digest; + } + + private function getTagUrl($name) { + if ($this->queriedContext === 'Official Image') { + return self::URI . '/_/' . $this->getRepo() . '?tab=tags&name=' . $name; + } + + return self::URI . '/r/' . $this->getRepo() . '/tags?name=' . $name; + } + + private function getImages($result) { + $html = <<DigestOS/architecture +EOD; + + foreach ($result->images as $image) { + $layersUrl = $this->getLayerUrl($result->name, $image->digest); + $id = $this->getShortDigestId($image->digest); + + $html .= << + {$id} + {$image->os}/{$image->architecture} + +EOD; + } + + return $html . ''; + } + + private function getShortDigestId($digest) { + $parts = explode(':', $digest); + return substr($parts[1], 0, 12); + } +} diff --git a/bridges/FSecureBlogBridge.php b/bridges/FSecureBlogBridge.php new file mode 100644 index 00000000..47910960 --- /dev/null +++ b/bridges/FSecureBlogBridge.php @@ -0,0 +1,115 @@ + array( + 'categories' => array( + 'name' => 'Blog categories', + 'exampleValue' => 'home-security', + ), + 'language' => array( + 'name' => 'Language', + 'defaultValue' => 'en', + ), + 'oldest_date' => array( + 'name' => 'Oldest article date', + 'exampleValue' => '-2 months', + ), + ) + ); + + public function getURI() { + $lang = $this->getInput('language') or 'en'; + if ($lang === 'en') { + return self::URI; + } + return self::URI . "/$lang"; + } + + public function collectData() { + $this->items = array(); + $this->seen = array(); + + $this->oldest = strtotime($this->getInput('oldest_date')) ?: 0; + + $categories = $this->getInput('categories'); + if (!empty($categories)) { + foreach (explode(',', $categories) as $cat) { + if (!empty($cat)) { + $this->collectCategory($cat); + } + } + return; + } + + $html = getSimpleHTMLDOMCached($this->getURI() . '/'); + + foreach ($html->find('ul.c-header-menu-desktop__list li a') as $link) { + $url = parse_url($link->href); + if (($pos = strpos($url['path'], '/category/')) !== false) { + $cat = substr($url['path'], $pos + strlen('/category/'), -1); + $this->collectCategory($cat); + } + } + } + + private function collectCategory($category) { + $url = $this->getURI() . "/category/$category/"; + while ($url) { + $url = $this->collectListing($url); + } + } + + // n.b. this relies on articles to be ordered by date so the cutoff works + private function collectListing($url) { + $html = getSimpleHTMLDOMCached($url, 60 * 60); + $items = $html->find('section.b-blog .l-blog__content__listing div.c-listing-item'); + + $catName = trim($html->find('section.b-blog .c-blog-header__title', 0)->plaintext); + + foreach ($items as $item) { + $url = $item->getAttribute('data-url'); + if (!$this->collectArticle($url)) { + return null; // Too old, stop collecting + } + } + + // Point's to 404 for non-english blog + // $next = $html->find('link[rel=next]', 0); + $next = $html->find('ul.page-numbers a.next', 0); + return $next ? $next->href : null; + } + + // Returns a boolean whether to continue collecting articles + // i.e. date is after oldest cutoff + private function collectArticle($url) { + if (array_key_exists($url, $this->seen)) { + return true; + } + $html = getSimpleHTMLDOMCached($url); + + $rssItem = array( 'uri' => $url, 'uid' => $url ); + $rssItem['title'] = $html->find('meta[property=og:title]', 0)->content; + $dt = $html->find('meta[property=article:published_time]', 0)->content; + // Exit if too old + if (strtotime($dt) < $this->oldest) { + return false; + } + $rssItem['timestamp'] = $dt; + $img = $html->find('meta[property=og:image]', 0); + $rssItem['enclosures'] = $img ? array($img->content) : array(); + $rssItem['author'] = trim($html->find('.c-blog-author__text a', 0)->plaintext); + $rssItem['categories'] = array_map(function ($link) { + return trim($link->plaintext); + }, $html->find('.b-single-header__categories .c-category-list a')); + $rssItem['content'] = trim($html->find('article', 0)->innertext); + + $this->items[] = $rssItem; + $this->seen[$url] = 1; + return true; + } +} diff --git a/bridges/FirefoxAddonsBridge.php b/bridges/FirefoxAddonsBridge.php new file mode 100644 index 00000000..d9803b74 --- /dev/null +++ b/bridges/FirefoxAddonsBridge.php @@ -0,0 +1,90 @@ + array( + 'name' => 'Add-on ID', + 'type' => 'text', + 'required' => true, + 'exampleValue' => 'save-to-the-wayback-machine', + ) + ) + ); + + const CACHE_TIMEOUT = 3600; + + private $feedName = ''; + private $releaseDateRegex = '/Released ([\w, ]+) - ([\w. ]+)/'; + private $xpiFileRegex = '/([A-Za-z0-9_.-]+)\.xpi$/'; + private $outgoingRegex = '/https:\/\/outgoing.prod.mozaws.net\/v1\/(?:[A-z0-9]+)\//'; + + public function collectData() { + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('Could not request: ' . $this->getURI()); + + $this->feedName = $html->find('h1[class="AddonTitle"] > a', 0)->innertext; + $author = $html->find('span.AddonTitle-author > a', 0)->plaintext; + + foreach ($html->find('div.AddonVersionCard-content') as $div) { + $item = array(); + + $item['title'] = $div->find('h2.AddonVersionCard-version', 0)->plaintext; + $item['uri'] = $this->getURI(); + $item['author'] = $author; + + if (preg_match($this->releaseDateRegex, $div->find('div.AddonVersionCard-fileInfo', 0)->plaintext, $match)) { + $item['timestamp'] = $match[1]; + $size = $match[2]; + } + + $compatibility = $div->find('div.AddonVersionCard-compatibility', 0)->plaintext; + $license = $div->find('p.AddonVersionCard-license', 0)->innertext; + $downloadlink = $div->find('a.InstallButtonWrapper-download-link', 0)->href; + $releaseNotes = $this->removeOutgoinglink($div->find('div.AddonVersionCard-releaseNotes', 0)); + + if (preg_match($this->xpiFileRegex, $downloadlink, $match)) { + $xpiFilename = $match[0]; + } + + $item['content'] = <<Release Notes +

{$releaseNotes}

+Compatibility +

{$compatibility}

+License +

{$license}

+Download +

{$xpiFilename} ($size)

+EOD; + + $this->items[] = $item; + } + } + + public function getURI() { + if (!is_null($this->getInput('id'))) { + return self::URI . 'en-US/firefox/addon/' . $this->getInput('id') . '/versions/'; + } + + return parent::getURI(); + } + + public function getName() { + if (!empty($this->feedName)) { + return $this->feedName . ' - Firefox Add-on'; + } + + return parent::getName(); + } + + private function removeOutgoinglink($html) { + foreach ($html->find('a') as $a) { + $a->href = urldecode(preg_replace($this->outgoingRegex, '', $a->href)); + } + + return $html->innertext; + } +} diff --git a/bridges/LeBonCoinBridge.php b/bridges/LeBonCoinBridge.php index fc1432e3..372ff25c 100644 --- a/bridges/LeBonCoinBridge.php +++ b/bridges/LeBonCoinBridge.php @@ -352,12 +352,14 @@ class LeBonCoinBridge extends BridgeAbstract { public function collectData(){ - $url = 'https://api.leboncoin.fr/finder/search/'; + $url = 'https://api.leboncoin.fr/api/adfinder/v1/search'; $data = $this->buildRequestJson(); $header = array( - 'User-Agent: LBC;Android;Null;Null;Null;Null;Null;Null;Null;Null', + 'User-Agent: LBC;Android;10;SAMSUNG;phone;0aaaaaaaaaaaaaaa;wifi;8.24.3.8;152437;0', 'Content-Type: application/json', + 'X-LBC-CC: 7', + 'Accept: application/json,application/hal+json', 'Content-Length: ' . strlen($data), 'api_key: ' . self::$LBC_API_KEY ); diff --git a/bridges/NordbayernBridge.php b/bridges/NordbayernBridge.php index 37fa3d5e..08d7482a 100644 --- a/bridges/NordbayernBridge.php +++ b/bridges/NordbayernBridge.php @@ -26,7 +26,7 @@ class NordbayernBridge extends BridgeAbstract { 'Gunzenhausen' => 'gunzenhausen', 'Hersbruck' => 'hersbruck', 'Herzogenaurach' => 'herzogenaurach', - 'Hilpolstein' => 'holpolstein', + 'Hilpoltstein' => 'hilpoltstein', 'Höchstadt' => 'hoechstadt', 'Lauf' => 'lauf', 'Neumarkt' => 'neumarkt', diff --git a/bridges/RadioMelodieBridge.php b/bridges/RadioMelodieBridge.php index fb5aca6e..3df0d044 100644 --- a/bridges/RadioMelodieBridge.php +++ b/bridges/RadioMelodieBridge.php @@ -25,7 +25,7 @@ class RadioMelodieBridge extends BridgeAbstract { $picture = array(); // Get the Main picture URL - $picture[] = $this->rewriteImage($article->find('div[id=pictureTitleSupport]', 0)->find('img', 0)->src); + $picture[] = self::URI . $article->find('div[id=pictureTitleSupport]', 0)->find('img', 0)->src; $audioHTML = $article->find('audio'); // Add the audio element to the enclosure diff --git a/bridges/RedditBridge.php b/bridges/RedditBridge.php index 130dc662..1dbd8d91 100644 --- a/bridges/RedditBridge.php +++ b/bridges/RedditBridge.php @@ -23,6 +23,19 @@ class RedditBridge extends BridgeAbstract { 'exampleValue' => 'selfhosted, php', 'title' => 'SubReddit names, separated by commas' ) + ), + 'user' => array( + 'u' => array( + 'name' => 'User', + 'required' => true, + 'title' => 'User name' + ), + 'comments' => array( + 'type' => 'checkbox', + 'name' => 'Comments', + 'title' => 'Whether to return comments', + 'defaultValue' => false + ) ) ); @@ -33,12 +46,18 @@ class RedditBridge extends BridgeAbstract { public function getName() { if ($this->queriedContext == 'single') { return 'Reddit r/' . $this->getInput('r'); + } elseif ($this->queriedContext == 'user') { + return 'Reddit u/' . $this->getInput('u'); } else { return self::NAME; } } public function collectData() { + + $user = false; + $comments = false; + switch ($this->queriedContext) { case 'single': $subreddits[] = $this->getInput('r'); @@ -46,33 +65,55 @@ class RedditBridge extends BridgeAbstract { case 'multi': $subreddits = explode(',', $this->getInput('rs')); break; + case 'user': + $subreddits[] = $this->getInput('u'); + $user = true; + $comments = $this->getInput('comments'); + break; } foreach ($subreddits as $subreddit) { $name = trim($subreddit); - $values = getContents(self::URI . '/r/' . $name . '.json') + $values = getContents(self::URI . ($user ? '/user/' : '/r/') . $name . '.json') or returnServerError('Unable to fetch posts!'); $decodedValues = json_decode($values); foreach ($decodedValues->data->children as $post) { + if ($post->kind == 't1' && !$comments) { + continue; + } + $data = $post->data; $item = array(); $item['author'] = $data->author; - $item['title'] = $data->title; $item['uid'] = $data->id; $item['timestamp'] = $data->created_utc; $item['uri'] = $this->encodePermalink($data->permalink); $item['categories'] = array(); - $item['categories'][] = $data->link_flair_text; - $item['categories'][] = $data->pinned ? 'Pinned' : null; + + if ($post->kind == 't1') { + $item['title'] = 'Comment: ' . $data->link_title; + } else { + $item['title'] = $data->title; + + $item['categories'][] = $data->link_flair_text; + $item['categories'][] = $data->pinned ? 'Pinned' : null; + $item['categories'][] = $data->spoiler ? 'Spoiler' : null; + } + $item['categories'][] = $data->over_18 ? 'NSFW' : null; - $item['categories'][] = $data->spoiler ? 'Spoiler' : null; $item['categories'] = array_filter($item['categories']); - if ($data->is_self) { + if ($post->kind == 't1') { + // Comment + + $item['content'] + = htmlspecialchars_decode($data->body_html); + + } elseif ($data->is_self) { // Text post $item['content'] @@ -112,7 +153,7 @@ class RedditBridge extends BridgeAbstract { $id = $media->media_id; $type = $data->media_metadata->$id->m == 'image/gif' ? 'gif' : 'u'; $src = $data->media_metadata->$id->s->$type; - $images[] = ''; + $images[] = '
'; } $item['content'] = implode('', $images); diff --git a/bridges/ReutersBridge.php b/bridges/ReutersBridge.php new file mode 100644 index 00000000..cb6b4e38 --- /dev/null +++ b/bridges/ReutersBridge.php @@ -0,0 +1,246 @@ + array( + 'name' => 'News Feed', + 'type' => 'list', + 'title' => 'Feeds from Reuters U.S/International edition', + 'values' => array( + 'Aerospace and Defense' => 'aerospace', + 'Business' => 'business', + 'China' => 'china', + 'Energy' => 'energy', + 'Entertainment' => 'chan:8ym8q8dl', + 'Environment' => 'chan:6u4f0jgs', + 'Health' => 'chan:8hw7807a', + 'Lifestyle' => 'life', + 'Markets' => 'markets', + 'Politics' => 'politics', + 'Science' => 'science', + 'Special Reports' => 'special-reports', + 'Sports' => 'sports', + 'Tech' => 'tech', + 'Top News' => 'home/topnews', + 'UK' => 'chan:61leiu7j', + 'USA News' => 'us', + 'Wire' => 'wire', + 'World' => 'world', + ) + ) + ) + ); + + /** + * Performs an HTTP request to the Reuters API and returns decoded JSON + * in the form of an associative array + * @param string $feed_uri Parameter string to the Reuters API + * @return array + */ + private function getJson($feed_uri) + { + $uri = "https://wireapi.reuters.com/v8$feed_uri"; + $returned_data = getContents($uri); + return json_decode($returned_data, true); + } + + /** + * Takes in data from Reuters Wire API and + * creates structured data in the form of a list + * of story information. + * @param array $data JSON collected from the Reuters Wire API + */ + private function processData($data) + { + /** + * Gets a list of wire items which are groups of templates + */ + $reuters_allowed_wireitems = array_filter( + $data, function ($wireitem) { + return in_array( + $wireitem['wireitem_type'], + self::ALLOWED_WIREITEM_TYPES + ); + } + ); + + /* + * Gets a list of "Templates", which is data containing a story + */ + $reuters_wireitem_templates = array_reduce( + $reuters_allowed_wireitems, + function (array $carry, array $wireitem) { + $wireitem_templates = $wireitem['templates']; + return array_merge( + $carry, + array_filter( + $wireitem_templates, function ( + array $template_data + ) { + return in_array( + $template_data['type'], + self::ALLOWED_TEMPLATE_TYPES + ); + } + ) + ); + }, + array() + ); + + return $reuters_wireitem_templates; + } + + private function getArticle($feed_uri) + { + // This will make another request to API to get full detail of article and author's name. + $rawData = $this->getJson($feed_uri); + $reuters_wireitems = $rawData['wireitems']; + $processedData = $this->processData($reuters_wireitems); + + $first = reset($processedData); + $article_content = $first['story']['body_items']; + $authorlist = $first['story']['authors']; + $category = $first['story']['channel']['name']; + $image_list = $first['story']['images']; + $img_placeholder = ''; + + foreach($image_list as $image) { // Add more image to article. + $image_url = $image['url']; + $image_caption = $image['caption']; + $img = ""; + $img_caption = "
$image_caption
"; + $figure = "
$img \t $img_caption
"; + $img_placeholder = $img_placeholder . $figure; + } + + $author = ''; + $counter = 0; + foreach ($authorlist as $data) { + //Formatting author's name. + $counter++; + $name = $data['name']; + if ($counter == count($authorlist)) { + $author = $author . $name; + } else { + $author = $author . "$name, "; + } + } + + $description = ''; + foreach ($article_content as $content) { + $data; + if(isset($content['content'])) { + $data = $content['content']; + } + switch($content['type']) { + case 'paragraph': + $description = $description . "

$data

"; + break; + case 'heading': + $description = $description . "

$data

"; + break; + case 'infographics': + $description = $description . ""; + break; + case 'inline_items': + $item_list = $content['items']; + $description = $description . '

'; + foreach ($item_list as $item) { + if($item['type'] == 'text') { + $description = $description . $item['content']; + } else { + $description = $description . $item['symbol']; + } + } + $description = $description . '

'; + break; + case 'p_table': + $description = $description . $content['content']; + break; + } + } + + $content_detail = array( + 'content' => $description, + 'author' => $author, + 'category' => $category, + 'images' => $img_placeholder, + ); + return $content_detail; + } + + public function getName() { + return $this->feedName; + } + + public function collectData() + { + $reuters_feed_name = $this->getInput('feed'); + + if(strpos($reuters_feed_name, 'chan:') !== false) { + // Now checking whether that feed has unique ID or not. + $feed_uri = "/feed/rapp/us/wirefeed/$reuters_feed_name"; + } else { + $feed_uri = "/feed/rapp/us/tabbar/feeds/$reuters_feed_name"; + } + + $data = $this->getJson($feed_uri); + + $reuters_wireitems = $data['wireitems']; + $this->feedName = $data['wire_name'] . ' | Reuters'; + $processedData = $this->processData($reuters_wireitems); + + // Merge all articles from Editor's Highlight section into existing array of templates. + $top_section = reset($reuters_wireitems); + if ($top_section['wireitem_type'] == 'headlines') { + $top_articles = $top_section['templates'][1]['headlines']; + $processedData = array_merge($top_articles, $processedData); + } + + foreach ($processedData as $story) { + $item['uid'] = $story['story']['usn']; + $article_uri = $story['template_action']['api_path']; + $content_detail = $this->getArticle($article_uri); + $description = $content_detail['content']; + $author = $content_detail['author']; + $images = $content_detail['images']; + $item['categories'] = array($content_detail['category']); + $item['author'] = $author; + if (!(bool) $description) { + $description = $story['story']['lede']; // Just in case the content doesn't have anything. + } else { + $item['content'] = "$description $images"; + } + + $item['title'] = $story['story']['hed']; + $item['timestamp'] = $story['story']['updated_at']; + $item['uri'] = $story['template_action']['url']; + $this->items[] = $item; + } + } +} diff --git a/bridges/SoundcloudBridge.php b/bridges/SoundcloudBridge.php index 45e6fed1..fe1e9414 100644 --- a/bridges/SoundcloudBridge.php +++ b/bridges/SoundcloudBridge.php @@ -27,6 +27,9 @@ class SoundCloudBridge extends BridgeAbstract { private $feedIcon = null; private $clientIDCache = null; + private $clientIdRegex = '/client_id.*?"(.+?)"/'; + private $widgetRegex = '/widget-.+?\.js/'; + public function collectData(){ $res = $this->apiGet('resolve', array( 'url' => 'https://soundcloud.com/' . $this->getInput('u') @@ -112,21 +115,32 @@ class SoundCloudBridge extends BridgeAbstract { // Without url=http, this returns a 404 $playerHTML = getContents('https://w.soundcloud.com/player/?url=http') - or returnServerError('Unable to get player page.'); - $regex = '/widget-.+?\.js/'; - if(preg_match($regex, $playerHTML, $matches) == false) + or returnServerError('Unable to get player page.'); + + // Extract widget JS filenames from player page + if(preg_match_all($this->widgetRegex, $playerHTML, $matches) == false) returnServerError('Unable to find widget JS URL.'); - $widgetURL = 'https://widget.sndcdn.com/' . $matches[0]; - $widgetJS = getContents($widgetURL) - or returnServerError('Unable to get widget JS page.'); - $regex = '/client_id.*?"(.+?)"/'; - if(preg_match($regex, $widgetJS, $matches) == false) + $clientID = ''; + + // Loop widget js files and extract client ID + foreach ($matches[0] as $widgetFile) { + $widgetURL = 'https://widget.sndcdn.com/' . $widgetFile; + + $widgetJS = getContents($widgetURL) + or returnServerError('Unable to get widget JS page.'); + + if(preg_match($this->clientIdRegex, $widgetJS, $matches)) { + $clientID = $matches[1]; + $this->clientIDCache->saveData($clientID); + + return $clientID; + } + } + + if (empty($clientID)) { returnServerError('Unable to find client ID.'); - $clientID = $matches[1]; - - $this->clientIDCache->saveData($clientID); - return $clientID; + } } private function buildAPIURL($endpoint, $parameters){ diff --git a/bridges/SymfonyCastsBridge.php b/bridges/SymfonyCastsBridge.php new file mode 100644 index 00000000..acad9041 --- /dev/null +++ b/bridges/SymfonyCastsBridge.php @@ -0,0 +1,34 @@ +find('div'); + + /* @var simple_html_dom $div */ + foreach ($dives as $div) { + $id = $div->getAttribute('data-mark-update-id-value'); + $type = $div->find('h5', 0); + $title = $div->find('span', 0); + $dateString = $div->find('h5.font-gray', 0); + $href = $div->find('a', 0); + $url = 'https://symfonycasts.com' . $href->getAttribute('href'); + + $item = array(); // Create an empty item + $item['uid'] = $id; + $item['title'] = $title->innertext; + $item['timestamp'] = $dateString->innertext; + $item['content'] = $type->plaintext . '' . $title . ''; + $item['uri'] = $url; + $this->items[] = $item; // Add item to the list + } + + } +} diff --git a/bridges/TelegramBridge.php b/bridges/TelegramBridge.php index a7296b8a..152e2da0 100644 --- a/bridges/TelegramBridge.php +++ b/bridges/TelegramBridge.php @@ -21,6 +21,18 @@ class TelegramBridge extends BridgeAbstract { private $itemTitle = ''; private $backgroundImageRegex = "/background-image:url\('(.*)'\)/"; + private $detectParamsRegex = '/^https?:\/\/t.me\/(?:s\/)?([\w]+)$/'; + + public function detectParameters($url) { + $params = array(); + + if(preg_match($this->detectParamsRegex, $url, $matches) > 0) { + $params['username'] = $matches[1]; + return $params; + } + + return null; + } public function collectData() { diff --git a/bridges/TheYeteeBridge.php b/bridges/TheYeteeBridge.php index fa5a6455..fb3c969e 100644 --- a/bridges/TheYeteeBridge.php +++ b/bridges/TheYeteeBridge.php @@ -12,7 +12,7 @@ class TheYeteeBridge extends BridgeAbstract { $html = getSimpleHTMLDOM(self::URI) or returnServerError('Could not request The Yetee.'); - $div = $html->find('.hero-col'); + $div = $html->find('.module_timed-item.is--full'); foreach($div as $element) { $item = array(); @@ -21,16 +21,15 @@ class TheYeteeBridge extends BridgeAbstract { $title = $element->find('h2', 0)->plaintext; $item['title'] = $title; - $author = trim($element->find('div[class=credit]', 0)->plaintext); + $author = trim($element->find('.module_timed-item--artist a', 0)->plaintext); $item['author'] = $author; - $uri = $element->find('div[class=controls] a', 0)->href; - $item['uri'] = static::URI . $uri; + $item['uri'] = static::URI; - $content = '

' . $element->find('section[class=product-listing-info] p', -1)->plaintext . '

'; - $photos = $element->find('a[class=js-modaal-gallery] img'); + $content = '

' . $title . ' by ' . $author . '

'; + $photos = $element->find('a.img'); foreach($photos as $photo) { - $content = $content . "
"; + $content = $content . "
"; $item['enclosures'][] = $photo->src; } $item['content'] = $content; diff --git a/bridges/VkBridge.php b/bridges/VkBridge.php index 89c11278..a09d1a20 100644 --- a/bridges/VkBridge.php +++ b/bridges/VkBridge.php @@ -379,6 +379,7 @@ class VkBridge extends BridgeAbstract return $time; } else { $strdate = $post->find('span.rel_date', 0)->plaintext; + $strdate = preg_replace('/[\x00-\x1F\x7F-\xFF]/', ' ', $strdate); $date = date_parse($strdate); if (!$date['year']) { diff --git a/bridges/ZoneTelechargementBridge.php b/bridges/ZoneTelechargementBridge.php index 3eb62b8c..56c6764a 100644 --- a/bridges/ZoneTelechargementBridge.php +++ b/bridges/ZoneTelechargementBridge.php @@ -34,7 +34,7 @@ class ZoneTelechargementBridge extends BridgeAbstract { ); // This is an URL that is not protected by robot protection for Direct Download - const UNPROTECTED_URI = 'https://www.zone-annuaire.com/'; + const UNPROTECTED_URI = 'https://www.zone-telechargement.net/'; // This is an URL that is not protected by robot protection for Streaming Links const UNPROTECTED_URI_STREAMING = 'https://zone-telechargement.stream/'; diff --git a/lib/BridgeCard.php b/lib/BridgeCard.php index 0ed605bf..69c67bc4 100644 --- a/lib/BridgeCard.php +++ b/lib/BridgeCard.php @@ -347,11 +347,13 @@ This bridge is not fetching its content through a secure connection'; CARD; // If we don't have any parameter for the bridge, we print a generic form to load it. - if(count($parameters) === 0 - || count($parameters) === 1 && array_key_exists('global', $parameters)) { - + if (count($parameters) === 0) { $card .= self::getForm($bridgeName, $formats, $isActive, $isHttps); + // Display form with cache timeout and/or noproxy options (if enabled) when bridge has no parameters + } else if (count($parameters) === 1 && array_key_exists('global', $parameters)) { + $card .= self::getForm($bridgeName, $formats, $isActive, $isHttps, '', $parameters['global']); + } else { foreach($parameters as $parameterName => $parameter) { diff --git a/lib/FormatFactory.php b/lib/FormatFactory.php index 28db7596..e2bba2fc 100644 --- a/lib/FormatFactory.php +++ b/lib/FormatFactory.php @@ -46,7 +46,13 @@ class FormatFactory extends FactoryAbstract { throw new \InvalidArgumentException('Format name invalid!'); } - $name = $this->sanitizeFormatName($name) . 'Format'; + $name = $this->sanitizeFormatName($name); + + if (is_null($name)) { + throw new \InvalidArgumentException('Unknown format given!'); + } + + $name .= 'Format'; $pathFormat = $this->getWorkingDir() . $name . '.php'; if(!file_exists($pathFormat)) { @@ -72,7 +78,7 @@ class FormatFactory extends FactoryAbstract { * @return bool true if the name is a valid format name, false otherwise. */ public function isFormatName($name){ - return is_string($name) && preg_match('/^[A-Z][a-zA-Z0-9-]*$/', $name) === 1; + return is_string($name) && preg_match('/^[a-zA-Z0-9-]*$/', $name) === 1; } /** @@ -108,8 +114,6 @@ class FormatFactory extends FactoryAbstract { * * The PHP file name without file extension (i.e. `AtomFormat`) * * The format name (i.e. `Atom`) * - * Casing is ignored (i.e. `ATOM` and `atom` are the same). - * * A format file matching the given format name must exist in the working * directory! * @@ -118,6 +122,7 @@ class FormatFactory extends FactoryAbstract { * valid, null otherwise. */ protected function sanitizeFormatName($name) { + $name = ucfirst(strtolower($name)); if(is_string($name)) { @@ -131,18 +136,12 @@ class FormatFactory extends FactoryAbstract { $name = $matches[1]; } - // Improve performance for correctly written format names + // The name is valid if a corresponding format file is found on disk if(in_array($name, $this->getFormatNames())) { $index = array_search($name, $this->getFormatNames()); return $this->getFormatNames()[$index]; } - // The name is valid if a corresponding format file is found on disk - if(in_array(strtolower($name), array_map('strtolower', $this->getFormatNames()))) { - $index = array_search(strtolower($name), array_map('strtolower', $this->getFormatNames())); - return $this->getFormatNames()[$index]; - } - Debug::log('Invalid format name: "' . $name . '"!'); } diff --git a/lib/contents.php b/lib/contents.php index 21d81e60..797c6125 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -311,7 +311,7 @@ function getSimpleHTMLDOMCached($url, $time = $cache->getTime(); if($time !== false && (time() - $duration < $time) - && Debug::isEnabled()) { // Contents within duration + && !Debug::isEnabled()) { // Contents within duration $content = $cache->loadData(); } else { // Content not within duration $content = getContents($url, $header, $opts);