From 84ba0c4a9e95cb93e17a37d53751ccc031e475c8 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sat, 8 Jun 2019 20:12:04 +0200 Subject: [PATCH 001/190] [Configuration] Bump version to dev.2019-06-08 --- lib/Configuration.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Configuration.php b/lib/Configuration.php index d6a31dfc..0dfd7ea9 100644 --- a/lib/Configuration.php +++ b/lib/Configuration.php @@ -28,7 +28,7 @@ final class Configuration { * * @todo Replace this property by a constant. */ - public static $VERSION = '2019-06-08'; + public static $VERSION = 'dev.2019-06-08'; /** * Holds the configuration data. From a725fdd3151f56caa7741a5f3c7ad17ed2a9474e Mon Sep 17 00:00:00 2001 From: LogMANOriginal Date: Sat, 8 Jun 2019 20:27:41 +0200 Subject: [PATCH 002/190] [README] Add logos to badges where applicable --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 95086c03..39259eaa 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ rss-bridge === -[![LICENSE](https://img.shields.io/badge/license-UNLICENSE-blue.svg)](UNLICENSE) [![GitHub release](https://img.shields.io/github/release/rss-bridge/rss-bridge.svg)](https://github.com/rss-bridge/rss-bridge/releases/latest) [![Debian Release](https://img.shields.io/badge/dynamic/json.svg?label=debian%20release&url=https%3A%2F%2Fsources.debian.org%2Fapi%2Fsrc%2Frss-bridge%2F&query=%24.versions%5B0%5D.version&colorB=blue)](https://tracker.debian.org/pkg/rss-bridge) [![Guix Release](https://img.shields.io/badge/guix%20release-unknown-light--gray.svg)](https://www.gnu.org/software/guix/packages/R/) [![Build Status](https://travis-ci.org/RSS-Bridge/rss-bridge.svg?branch=master)](https://travis-ci.org/RSS-Bridge/rss-bridge) [![Docker Build Status](https://img.shields.io/docker/build/rssbridge/rss-bridge.svg)](https://hub.docker.com/r/rssbridge/rss-bridge/) +[![LICENSE](https://img.shields.io/badge/license-UNLICENSE-blue.svg)](UNLICENSE) [![GitHub release](https://img.shields.io/github/release/rss-bridge/rss-bridge.svg?logo=github)](https://github.com/rss-bridge/rss-bridge/releases/latest) [![Debian Release](https://img.shields.io/badge/dynamic/json.svg?logo=debian&label=debian%20release&url=https%3A%2F%2Fsources.debian.org%2Fapi%2Fsrc%2Frss-bridge%2F&query=%24.versions%5B0%5D.version&colorB=blue)](https://tracker.debian.org/pkg/rss-bridge) [![Guix Release](https://img.shields.io/badge/guix%20release-unknown-light--gray.svg)](https://www.gnu.org/software/guix/packages/R/) [![Build Status](https://travis-ci.org/RSS-Bridge/rss-bridge.svg?branch=master)](https://travis-ci.org/RSS-Bridge/rss-bridge) [![Docker Build Status](https://img.shields.io/docker/build/rssbridge/rss-bridge.svg?logo=docker)](https://hub.docker.com/r/rssbridge/rss-bridge/) RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for websites which don't have one. It can be used on webservers or as stand alone application in CLI mode. From edf10be93a82cc6d60ecbccd2104108f43b53239 Mon Sep 17 00:00:00 2001 From: LogMANOriginal Date: Sat, 8 Jun 2019 20:36:59 +0200 Subject: [PATCH 003/190] [README] Change color for Guix release to blue This prevents confusion with the build status for Travis-CI and Docker --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 39259eaa..3673457c 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ rss-bridge === -[![LICENSE](https://img.shields.io/badge/license-UNLICENSE-blue.svg)](UNLICENSE) [![GitHub release](https://img.shields.io/github/release/rss-bridge/rss-bridge.svg?logo=github)](https://github.com/rss-bridge/rss-bridge/releases/latest) [![Debian Release](https://img.shields.io/badge/dynamic/json.svg?logo=debian&label=debian%20release&url=https%3A%2F%2Fsources.debian.org%2Fapi%2Fsrc%2Frss-bridge%2F&query=%24.versions%5B0%5D.version&colorB=blue)](https://tracker.debian.org/pkg/rss-bridge) [![Guix Release](https://img.shields.io/badge/guix%20release-unknown-light--gray.svg)](https://www.gnu.org/software/guix/packages/R/) [![Build Status](https://travis-ci.org/RSS-Bridge/rss-bridge.svg?branch=master)](https://travis-ci.org/RSS-Bridge/rss-bridge) [![Docker Build Status](https://img.shields.io/docker/build/rssbridge/rss-bridge.svg?logo=docker)](https://hub.docker.com/r/rssbridge/rss-bridge/) +[![LICENSE](https://img.shields.io/badge/license-UNLICENSE-blue.svg)](UNLICENSE) [![GitHub release](https://img.shields.io/github/release/rss-bridge/rss-bridge.svg?logo=github)](https://github.com/rss-bridge/rss-bridge/releases/latest) [![Debian Release](https://img.shields.io/badge/dynamic/json.svg?logo=debian&label=debian%20release&url=https%3A%2F%2Fsources.debian.org%2Fapi%2Fsrc%2Frss-bridge%2F&query=%24.versions%5B0%5D.version&colorB=blue)](https://tracker.debian.org/pkg/rss-bridge) [![Guix Release](https://img.shields.io/badge/guix%20release-unknown-blue.svg)](https://www.gnu.org/software/guix/packages/R/) [![Build Status](https://travis-ci.org/RSS-Bridge/rss-bridge.svg?branch=master)](https://travis-ci.org/RSS-Bridge/rss-bridge) [![Docker Build Status](https://img.shields.io/docker/build/rssbridge/rss-bridge.svg?logo=docker)](https://hub.docker.com/r/rssbridge/rss-bridge/) RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for websites which don't have one. It can be used on webservers or as stand alone application in CLI mode. From 70542686bb61795584775f4f5b974edea386b9b5 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 9 Jun 2019 17:07:21 +0200 Subject: [PATCH 004/190] [contents] Fix parsing of incomplete headers Response headers may contain fields with no values. Example: "Referrer-Policy: " In this case the current implementation of explode() results in an error because there is no content after ": ". Changing the delimiter to ":" and trimming the value manually fixes that issue. --- lib/contents.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/contents.php b/lib/contents.php index c65d6dfb..958feb1b 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -322,8 +322,8 @@ function parseResponseHeader($header) { $header['http_code'] = $line; } else { - list ($key, $value) = explode(': ', $line); - $header[$key] = $value; + list ($key, $value) = explode(':', $line); + $header[$key] = trim($value); } From d34411137f40f5b1c60626fafc7fe217c39d5fa0 Mon Sep 17 00:00:00 2001 From: Joseph Date: Sun, 9 Jun 2019 15:24:40 +0000 Subject: [PATCH 005/190] [TwitterBridge] Display all images from a tweet (#1160) --- bridges/TwitterBridge.php | 67 +++++++++++++++++++++++++++------------ 1 file changed, 47 insertions(+), 20 deletions(-) diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index b3b7bed4..d36581ef 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -245,22 +245,26 @@ EOD; // Add embeded image to content $image_html = ''; - $image = $this->getImageURI($tweet); - if(!$this->getInput('noimg') && !is_null($image)) { - // Set image scaling - $image_orig = $this->getInput('noimgscaling') ? $image : $image . ':orig'; - $image_thumb = $this->getInput('noimgscaling') ? $image : $image . ':thumb'; + $images = $this->getImageURI($tweet); + if(!$this->getInput('noimg') && !is_null($images)) { - // add enclosures - $item['enclosures'] = array($image_orig); + foreach ($images as $image) { - $image_html = <<getInput('noimgscaling') ? $image : $image . ':orig'; + $image_thumb = $this->getInput('noimgscaling') ? $image : $image . ':thumb'; + + // add enclosures + $item['enclosures'][] = $image_orig; + + $image_html .= << EOD; + } } // add content @@ -291,22 +295,27 @@ EOD; // Add embeded image to content $quotedImage_html = ''; - $quotedImage = $this->getQuotedImageURI($tweet); - if(!$this->getInput('noimg') && !is_null($quotedImage)) { - // Set image scaling - $quotedImage_orig = $this->getInput('noimgscaling') ? $quotedImage : $quotedImage . ':orig'; - $quotedImage_thumb = $this->getInput('noimgscaling') ? $quotedImage : $quotedImage . ':thumb'; + $quotedImages = $this->getQuotedImageURI($tweet); - // add enclosures - $item['enclosures'] = array($quotedImage_orig); + if(!$this->getInput('noimg') && !is_null($quotedImages)) { - $quotedImage_html = << + foreach ($quotedImages as $image) { + + // Set image scaling + $image_orig = $this->getInput('noimgscaling') ? $image : $image . ':orig'; + $image_thumb = $this->getInput('noimgscaling') ? $image : $image . ':thumb'; + + // add enclosures + $item['enclosures'][] = $image_orig; + + $quotedImage_html .= << + src="{$image_thumb}" /> EOD; + } } $item['content'] = <<find('div.AdaptiveMedia-container', 0); + if($container && $container->find('img', 0)) { - return $container->find('img', 0)->src; + foreach ($container->find('img') as $img) { + $images[] = $img->src; + } + } + + if (!empty($images)) { + return $images; } return null; @@ -370,9 +388,18 @@ EOD; private function getQuotedImageURI($tweet){ // Find media in tweet + $images = array(); + $container = $tweet->find('div.QuoteMedia-container', 0); + if($container && $container->find('img', 0)) { - return $container->find('img', 0)->src; + foreach ($container->find('img') as $img) { + $images[] = $img->src; + } + } + + if (!empty($images)) { + return $images; } return null; From 1efb7c7bcea6c6fa87afd5470cdb611a87b8c32c Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 9 Jun 2019 19:01:49 +0200 Subject: [PATCH 006/190] [DesoutterBridge] Fix bridge after DOM changes --- bridges/DesoutterBridge.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bridges/DesoutterBridge.php b/bridges/DesoutterBridge.php index 4a7b0a94..0aae41ad 100644 --- a/bridges/DesoutterBridge.php +++ b/bridges/DesoutterBridge.php @@ -159,13 +159,13 @@ class DesoutterBridge extends BridgeAbstract { foreach($html->find('article') as $article) { $item = array(); - $item['uri'] = $article->find('[itemprop="name"]', 0)->href; - $item['title'] = $article->find('[itemprop="name"]', 0)->title; + $item['uri'] = $article->find('a', 0)->href; + $item['title'] = $article->find('a[title]', 0)->title; if($this->getInput('full')) { $item['content'] = $this->getFullNewsArticle($item['uri']); } else { - $item['content'] = $article->find('[itemprop="description"]', 0)->plaintext; + $item['content'] = $article->find('div.tile-body p', 0)->plaintext; } $this->items[] = $item; From 28d46b672112fc78964ae1230f39be23947ab132 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 9 Jun 2019 19:04:56 +0200 Subject: [PATCH 007/190] [ShanaprojectBridge] Fix broken bridge --- bridges/ShanaprojectBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/ShanaprojectBridge.php b/bridges/ShanaprojectBridge.php index 6eadcb12..7084bfb7 100644 --- a/bridges/ShanaprojectBridge.php +++ b/bridges/ShanaprojectBridge.php @@ -13,7 +13,7 @@ class ShanaprojectBridge extends BridgeAbstract { if(!$html) returnServerError('Could not load \'seasons\' page!'); - $season = $html->find('div.follows_menu/a', 1); + $season = $html->find('div.follows_menu a', 1); if(!$season) returnServerError('Could not find \'Season Anime List\'!'); From ade09b2aad02312dd800bff275f80266f6e39a0f Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 9 Jun 2019 19:21:27 +0200 Subject: [PATCH 008/190] [XenForoBridge] Fix broken bridge --- bridges/XenForoBridge.php | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/bridges/XenForoBridge.php b/bridges/XenForoBridge.php index 7bf1f15d..ac9cdd5b 100644 --- a/bridges/XenForoBridge.php +++ b/bridges/XenForoBridge.php @@ -118,7 +118,7 @@ class XenForoBridge extends BridgeAbstract { // Notice: The DOM structure changes depending on the XenForo version used if($mainContent = $html->find('div.mainContent', 0)) { $this->version = self::XENFORO_VERSION_1; - } elseif ($mainContent = $html->find('div[class="p-body"]', 0)) { + } elseif ($mainContent = $html->find('div[class~="p-body"]', 0)) { $this->version = self::XENFORO_VERSION_2; } else { returnServerError('This forum is currently not supported!'); @@ -140,7 +140,7 @@ class XenForoBridge extends BridgeAbstract { case self::XENFORO_VERSION_2: - $titleBar = $mainContent->find('div[class="p-title"] h1', 0) + $titleBar = $mainContent->find('div[class~="p-title"] h1', 0) or returnServerError('Error finding title bar!'); $this->title = $titleBar->plaintext; @@ -255,7 +255,7 @@ class XenForoBridge extends BridgeAbstract { $lang = $html->find('html', 0)->lang; - $messageList = $html->find('div[class="block-body"] article') + $messageList = $html->find('div[class~="block-body"] article') or returnServerError('Error finding message list!'); foreach($messageList as $post) { @@ -268,13 +268,17 @@ class XenForoBridge extends BridgeAbstract { $item['uri'] = $url . '#' . $post->getAttribute('id'); - $title = $post->find('div[class="message-content"] article', 0)->plaintext; + $title = $post->find('div[class~="message-content"] article', 0)->plaintext; $end = strpos($title, ' ', 70); $item['title'] = substr($title, 0, $end); - $item['timestamp'] = $this->fixDate($post->find('time', 0)->title, $lang); + if ($post->find('time[datetime]', 0)) { + $item['timestamp'] = $post->find('time[datetime]', 0)->datetime; + } else { + $item['timestamp'] = $this->fixDate($post->find('time', 0)->title, $lang); + } $item['author'] = $post->getAttribute('data-author'); - $item['content'] = $post->find('div[class="message-content"] article', 0); + $item['content'] = $post->find('div[class~="message-content"] article', 0); // Bridge specific properties $item['id'] = $post->getAttribute('id'); @@ -305,7 +309,7 @@ class XenForoBridge extends BridgeAbstract { // Load at least the last page do { - $pageurl = $hosturl . str_replace($sentinel, $lastpage, $baseurl); + $pageurl = str_replace($sentinel, $lastpage, $baseurl); // We can optimize performance by caching all but the last page if($page != $lastpage) { @@ -353,7 +357,7 @@ class XenForoBridge extends BridgeAbstract { // Load at least the last page do { - $pageurl = $hosturl . str_replace($sentinel, $lastpage, $baseurl); + $pageurl = str_replace($sentinel, $lastpage, $baseurl); // We can optimize performance by caching all but the last page if($page != $lastpage) { @@ -364,9 +368,9 @@ class XenForoBridge extends BridgeAbstract { or returnServerError('Error loading contents from ' . $pageurl . '!'); } - $html = defaultLinkTo($html, $this->hosturl); + $html = defaultLinkTo($html, $hosturl); - $this->extractThreadPostsV2($html, $this->pageurl); + $this->extractThreadPostsV2($html, $pageurl); $page--; From ea08445946d2fed1fa6bde89a570643bfcdf8442 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 9 Jun 2019 19:32:22 +0200 Subject: [PATCH 009/190] [GlassdoorBridge] Fix broken bridge --- bridges/GlassdoorBridge.php | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/bridges/GlassdoorBridge.php b/bridges/GlassdoorBridge.php index 68b6a137..308859d7 100644 --- a/bridges/GlassdoorBridge.php +++ b/bridges/GlassdoorBridge.php @@ -141,7 +141,7 @@ class GlassdoorBridge extends BridgeAbstract { } private function collectReviewData($html, $limit) { - $reviews = $html->find('#EmployerReviews li[id^="empReview]') + $reviews = $html->find('#ReviewsFeed li[id^="empReview]') or returnServerError('Unable to find reviews!'); foreach($reviews as $review) { @@ -153,7 +153,19 @@ class GlassdoorBridge extends BridgeAbstract { $item['timestamp'] = strtotime($review->find('time', 0)->datetime); $mainText = $review->find('p.mainText', 0)->plaintext; - $description = $review->find('div.prosConsAdvice', 0)->innertext; + + $description = ''; + foreach($review->find('div.description p') as $p) { + + if ($p->hasClass('strong')) { + $p->tag = 'strong'; + $p->removeClass('strong'); + } + + $description .= $p; + + } + $item['content'] = "

{$mainText}

{$description}

"; $this->items[] = $item; From ba116d9ab603e8fc369fe4c47f5cc0ebb52ac44f Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 9 Jun 2019 19:57:48 +0200 Subject: [PATCH 010/190] [GithubIssueBridge] Fix bridge after DOM changes --- bridges/GithubIssueBridge.php | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/bridges/GithubIssueBridge.php b/bridges/GithubIssueBridge.php index 91dd45ec..9889083f 100644 --- a/bridges/GithubIssueBridge.php +++ b/bridges/GithubIssueBridge.php @@ -192,8 +192,13 @@ class GithubIssueBridge extends BridgeAbstract { ENT_QUOTES, 'UTF-8' ); - $comments = trim($issue->find('.col-5', 0)->plaintext); - $item['content'] .= "\n" . 'Comments: ' . ($comments ? $comments : '0'); + + $comment_count = 0; + if($span = $issue->find('a[aria-label*="comment"] span', 0)) { + $comment_count = $span->plaintext; + } + + $item['content'] .= "\n" . 'Comments: ' . $comment_count; $item['uri'] = self::URI . $issue->find('.js-navigation-open', 0)->getAttribute('href'); $this->items[] = $item; From da339fd5cc5c74ad76e16c94f01752aa66862e0c Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 9 Jun 2019 20:39:45 +0200 Subject: [PATCH 011/190] [GithubIssueBridge] Include issue author comment in the feed - Add function to build an URL to the GitHub issue comment - Change scope of internal functions from protected to private - Use IDs instead of classes as comment selectors, to include the issue author in the output feed. References #1100 --- bridges/GithubIssueBridge.php | 52 +++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/bridges/GithubIssueBridge.php b/bridges/GithubIssueBridge.php index 9889083f..20da69d1 100644 --- a/bridges/GithubIssueBridge.php +++ b/bridges/GithubIssueBridge.php @@ -66,10 +66,21 @@ class GithubIssueBridge extends BridgeAbstract { return parent::getURI(); } - protected function extractIssueEvent($issueNbr, $title, $comment){ - $comment = $comment->firstChild(); - $uri = static::URI . $this->getInput('u') . '/' . $this->getInput('p') - . '/issues/' . $issueNbr . '#' . $comment->getAttribute('id'); + private function buildGitHubIssueCommentUri($issue_number, $comment_id) { + // https://github.com///issues/# + return static::URI + . $this->getInput('u') + . '/' + . $this->getInput('p') + . '/issues/' + . $issue_number + . '#' + . $comment_id; + } + + private function extractIssueEvent($issueNbr, $title, $comment){ + + $uri = buildGitHubIssueCommentUri($issueNbr, $comment->getAttribute('id')); $author = $comment->find('.author', 0)->plaintext; @@ -94,22 +105,21 @@ class GithubIssueBridge extends BridgeAbstract { return $item; } - protected function extractIssueComment($issueNbr, $title, $comment){ - $uri = static::URI . $this->getInput('u') . '/' - . $this->getInput('p') . '/issues/' . $issueNbr; + private function extractIssueComment($issueNbr, $title, $comment){ + + $uri = buildGitHubIssueCommentUri($issueNbr, $comment->id); $author = $comment->find('.author', 0)->plaintext; $title .= ' / ' . trim( - $comment->find('.comment .timeline-comment-header-text', 0)->plaintext + $comment->find('.timeline-comment-header-text', 0)->plaintext ); $content = $comment->find('.comment-body', 0)->innertext; $item = array(); $item['author'] = $author; - $item['uri'] = $uri - . '#' . $comment->firstChild()->nextSibling()->getAttribute('id'); + $item['uri'] = $uri; $item['title'] = html_entity_decode($title, ENT_QUOTES, 'UTF-8'); $item['timestamp'] = strtotime( $comment->find('relative-time', 0)->getAttribute('datetime') @@ -118,25 +128,32 @@ class GithubIssueBridge extends BridgeAbstract { return $item; } - protected function extractIssueComments($issue){ + private function extractIssueComments($issue){ $items = array(); $title = $issue->find('.gh-header-title', 0)->plaintext; $issueNbr = trim( substr($issue->find('.gh-header-number', 0)->plaintext, 1) ); - $comments = $issue->find('.js-discussion', 0); - foreach($comments->children() as $comment) { + + $comments = $issue->find(' + [id^="issue-"] > .comment, + [id^="issuecomment-"] > .comment, + [id^="event-"], + [id^="ref-"] + '); + foreach($comments as $comment) { + if (!$comment->hasChildNodes()) { continue; } - $comment = $comment->firstChild(); - $classes = explode(' ', $comment->getAttribute('class')); - if (in_array('timeline-comment-wrapper', $classes)) { + + if (!$comment->hasClass('discussion-item-header')) { $item = $this->extractIssueComment($issueNbr, $title, $comment); $items[] = $item; continue; } - while (in_array('discussion-item', $classes)) { + + while ($comment->hasClass('discussion-item-header')) { $item = $this->extractIssueEvent($issueNbr, $title, $comment); $items[] = $item; $comment = $comment->nextSibling(); @@ -145,6 +162,7 @@ class GithubIssueBridge extends BridgeAbstract { } $classes = explode(' ', $comment->getAttribute('class')); } + } return $items; } From e4e04a78652f19c52237c3b423d0872da3c7c163 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Mon, 10 Jun 2019 00:02:13 +0200 Subject: [PATCH 012/190] [GithubIssueBridge] Fix broken feed item URLs References #1100 --- bridges/GithubIssueBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/GithubIssueBridge.php b/bridges/GithubIssueBridge.php index 20da69d1..bbb599bd 100644 --- a/bridges/GithubIssueBridge.php +++ b/bridges/GithubIssueBridge.php @@ -80,7 +80,7 @@ class GithubIssueBridge extends BridgeAbstract { private function extractIssueEvent($issueNbr, $title, $comment){ - $uri = buildGitHubIssueCommentUri($issueNbr, $comment->getAttribute('id')); + $uri = $this->buildGitHubIssueCommentUri($issueNbr, $comment->id); $author = $comment->find('.author', 0)->plaintext; @@ -107,7 +107,7 @@ class GithubIssueBridge extends BridgeAbstract { private function extractIssueComment($issueNbr, $title, $comment){ - $uri = buildGitHubIssueCommentUri($issueNbr, $comment->id); + $uri = $this->buildGitHubIssueCommentUri($issueNbr, $comment->parent->id); $author = $comment->find('.author', 0)->plaintext; From f76a23f0a5d10093b46dfa1545dd91eaee3f2a8d Mon Sep 17 00:00:00 2001 From: Eugene Molotov Date: Mon, 10 Jun 2019 18:31:35 +0500 Subject: [PATCH 013/190] [YoutubeBridge] Add playlist caching (#1162) --- bridges/YoutubeBridge.php | 109 ++++++++++++++++++++++---------------- 1 file changed, 63 insertions(+), 46 deletions(-) diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index d2a45128..90ee0499 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -65,7 +65,7 @@ class YoutubeBridge extends BridgeAbstract { private $feedName = ''; private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ - $html = $this->ytGetSimpleHTMLDOM(self::URI . "watch?v=$vid"); + $html = $this->ytGetSimpleHTMLDOM(self::URI . "watch?v=$vid", true); // Skip unavailable videos if(!strpos($html->innertext, 'IS_UNAVAILABLE_PAGE')) { @@ -127,7 +127,6 @@ class YoutubeBridge extends BridgeAbstract { } private function ytBridgeParseHtmlListing($html, $element_selector, $title_selector, $add_parsed_items = true) { - $limit = $add_parsed_items ? 10 : INF; $count = 0; $duration_min = $this->getInput('duration_min') ?: -1; @@ -141,40 +140,38 @@ class YoutubeBridge extends BridgeAbstract { } foreach($html->find($element_selector) as $element) { - if($count < $limit) { - $author = ''; - $desc = ''; - $time = 0; - $vid = str_replace('/watch?v=', '', $element->find('a', 0)->href); - $vid = substr($vid, 0, strpos($vid, '&') ?: strlen($vid)); - $title = trim($this->ytBridgeFixTitle($element->find($title_selector, 0)->plaintext)); + $author = ''; + $desc = ''; + $time = 0; + $vid = str_replace('/watch?v=', '', $element->find('a', 0)->href); + $vid = substr($vid, 0, strpos($vid, '&') ?: strlen($vid)); + $title = trim($this->ytBridgeFixTitle($element->find($title_selector, 0)->plaintext)); - if (strpos($vid, 'googleads') !== false - || $title == '[Private video]' - || $title == '[Deleted video]' - ) { - continue; - } - - // The duration comes in one of the formats: - // hh:mm:ss / mm:ss / m:ss - // 01:03:30 / 15:06 / 1:24 - $durationText = trim($element->find('div.timestamp span', 0)->plaintext); - $durationText = preg_replace('/([\d]{1,2})\:([\d]{2})/', '00:$1:$2', $durationText); - - sscanf($durationText, '%d:%d:%d', $hours, $minutes, $seconds); - $duration = $hours * 3600 + $minutes * 60 + $seconds; - - if($duration < $duration_min || $duration > $duration_max) { - continue; - } - - if ($add_parsed_items) { - $this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time); - $this->ytBridgeAddItem($vid, $title, $author, $desc, $time); - } - $count++; + if (strpos($vid, 'googleads') !== false + || $title == '[Private video]' + || $title == '[Deleted video]' + ) { + continue; } + + // The duration comes in one of the formats: + // hh:mm:ss / mm:ss / m:ss + // 01:03:30 / 15:06 / 1:24 + $durationText = trim($element->find('div.timestamp span', 0)->plaintext); + $durationText = preg_replace('/([\d]{1,2})\:([\d]{2})/', '00:$1:$2', $durationText); + + sscanf($durationText, '%d:%d:%d', $hours, $minutes, $seconds); + $duration = $hours * 3600 + $minutes * 60 + $seconds; + + if($duration < $duration_min || $duration > $duration_max) { + continue; + } + + if ($add_parsed_items) { + $this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time); + $this->ytBridgeAddItem($vid, $title, $author, $desc, $time); + } + $count++; } return $count; } @@ -184,18 +181,38 @@ class YoutubeBridge extends BridgeAbstract { return html_entity_decode($title, ENT_QUOTES, 'UTF-8'); } - private function ytGetSimpleHTMLDOM($url){ + private function ytGetSimpleHTMLDOM($url, $cached = false){ + $header = array( + 'Accept-Language: en-US' + ); + $opts = array(); + $lowercase = true; + $forceTagsClosed = true; + $target_charset = DEFAULT_TARGET_CHARSET; + $stripRN = false; + $defaultBRText = DEFAULT_BR_TEXT; + $defaultSpanText = DEFAULT_SPAN_TEXT; + if ($cached) { + return getSimpleHTMLDOMCached($url, + 86400, + $header, + $opts, + $lowercase, + $forceTagsClosed, + $target_charset, + $stripRN, + $defaultBRText, + $defaultSpanText); + } return getSimpleHTMLDOM($url, - $header = array( - 'Accept-Language: en-US' - ), - $opts = array(), - $lowercase = true, - $forceTagsClosed = true, - $target_charset = DEFAULT_TARGET_CHARSET, - $stripRN = false, - $defaultBRText = DEFAULT_BR_TEXT, - $defaultSpanText = DEFAULT_SPAN_TEXT); + $header, + $opts, + $lowercase, + $forceTagsClosed, + $target_charset, + $stripRN, + $defaultBRText, + $defaultSpanText); } public function collectData(){ @@ -229,7 +246,7 @@ class YoutubeBridge extends BridgeAbstract { $url_listing = self::URI . 'playlist?list=' . urlencode($this->request); $html = $this->ytGetSimpleHTMLDOM($url_listing) or returnServerError("Could not request YouTube. Tried:\n - $url_listing"); - $item_count = $this->ytBridgeParseHtmlListing($html, 'tr.pl-video', '.pl-video-title a', true); + $item_count = $this->ytBridgeParseHtmlListing($html, 'tr.pl-video', '.pl-video-title a', false); if ($item_count <= 15 && !$this->skipFeeds() && ($xml = $this->ytGetSimpleHTMLDOM($url_feed))) { $this->ytBridgeParseXmlFeed($xml); } else { From 75b021368423fef339ae58cf05f12dc89581ed88 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Mon, 10 Jun 2019 15:32:57 +0200 Subject: [PATCH 014/190] [GithubIssueBridge] Add support for detect action References #1100 --- bridges/GithubIssueBridge.php | 59 +++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/bridges/GithubIssueBridge.php b/bridges/GithubIssueBridge.php index bbb599bd..9a2b7ec1 100644 --- a/bridges/GithubIssueBridge.php +++ b/bridges/GithubIssueBridge.php @@ -239,4 +239,63 @@ class GithubIssueBridge extends BridgeAbstract { $item['title'] = preg_replace('/\s+/', ' ', $item['title']); }); } + + public function detectParameters($url) { + + $help = <</ +For issue comments the URL must include ///issues/ + +Examples: +- https://github.com/rss-bridge/rss-bridge +- https://github.com/rss-bridge/rss-bridge/issues/1 + +Issue comments for project issues are enabled if the URL points to issues +https://github.com/rss-bridge/rss-bridge/issues +EOD; + + if(filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_PATH_REQUIRED) === false + || strpos($url, self::URI) !== 0) { + returnClientError('Invalid URL' . $help); + return null; + } + + $url_components = parse_url($url); + $path_segments = array_values(array_filter(explode('/', $url_components['path']))); + + switch(count($path_segments)) { + case 2: { // Project issues + list($user, $project) = $path_segments; + $show_comments = 'off'; + } break; + case 3: { // Project issues with issue comments + if($path_segments[2] !== 'issues') { + returnClientError('Invalid path. Expected "/issues/", found "/' + . $path_segments[2] + . '/"' + . $help + ); + } + list($user, $project) = $path_segments; + $show_comments = 'on'; + } break; + case 4: { // Issue comments + list($user, $project, /* issues */, $issue) = $path_segments; + } break; + default: { + returnClientError('Invalid path.' . $help); + } + } + + return array( + 'u' => $user, + 'p' => $project, + 'c' => isset($show_comments) ? $show_comments : null, + 'i' => isset($issue) ? $issue : null, + ); + + } } From 93de8c239bd612e54a4b882640022b4d8291463c Mon Sep 17 00:00:00 2001 From: LogMANOriginal Date: Mon, 10 Jun 2019 15:40:57 +0200 Subject: [PATCH 015/190] [README] Remove GooglePlus from supported sites --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 3673457c..865840a0 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,6 @@ Supported sites/pages (examples) * `DuckDuckGo`: Most recent results from [DuckDuckGo.com](https://duckduckgo.com/) * `Facebook` : Returns the latest posts on a page or profile on [Facebook](https://facebook.com/) * `FlickrExplore` : [Latest interesting images](http://www.flickr.com/explore) from Flickr -* `GooglePlus` : Most recent posts of user timeline * `GoogleSearch` : Most recent results from Google Search * `Identi.ca` : Identica user timeline (Should be compatible with other Pump.io instances) * `Instagram`: Most recent photos from an Instagram user From d005acca83b5113ad4fd12c7d1ee39377c72daf8 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Tue, 11 Jun 2019 21:51:10 +0200 Subject: [PATCH 016/190] [TwitterBridge] Add extensive description to keyword search query References #1163 --- bridges/TwitterBridge.php | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index d36581ef..91facce6 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -28,7 +28,31 @@ class TwitterBridge extends BridgeAbstract { 'name' => 'Keyword or #hashtag', 'required' => true, 'exampleValue' => 'rss-bridge, #rss-bridge', - 'title' => 'Insert a keyword or hashtag' + 'title' => << array( From b27487ace064b6578c0ac4789d3b1ea051666250 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Wed, 12 Jun 2019 18:25:48 +0200 Subject: [PATCH 017/190] [TwitterBridge] Fix detection of retweets on lists References #1161 --- bridges/TwitterBridge.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index 91facce6..76ca5305 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -189,7 +189,7 @@ EOD // Skip retweets? if($this->getInput('noretweet') - && strcasecmp($tweet->getAttribute('data-screen-name'), $this->getInput('u'))) { + && $tweet->find('div.context span.js-retweet-text a', 0)) { continue; } @@ -213,8 +213,8 @@ EOD $item['fullname'] = htmlspecialchars_decode($tweet->getAttribute('data-name'), ENT_QUOTES); // get author $item['author'] = $item['fullname'] . ' (@' . $item['username'] . ')'; - if(strcasecmp($tweet->getAttribute('data-screen-name'), $this->getInput('u'))) { - $item['author'] .= ' RT: @' . $this->getInput('u'); + if($rt = $tweet->find('div.context span.js-retweet-text a', 0)) { + $item['author'] .= ' RT: @' . $rt->plaintext; } // get avatar link $item['avatar'] = $tweet->find('img', 0)->src; From b1b7e4edce5a29e2587be4cccf27ebb56e31bf5f Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Wed, 12 Jun 2019 20:11:30 +0200 Subject: [PATCH 018/190] [DollbooruBridge] Remove bridge The target site for this bridge has been down for at least a year now and there doesn't seem to be any attempt to get it back up. Their twitter account is also silent since 2012, so no harm removing this bridge. https://twitter.com/dollbooru?lang=en --- bridges/DollbooruBridge.php | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 bridges/DollbooruBridge.php diff --git a/bridges/DollbooruBridge.php b/bridges/DollbooruBridge.php deleted file mode 100644 index 5ed4119f..00000000 --- a/bridges/DollbooruBridge.php +++ /dev/null @@ -1,9 +0,0 @@ - Date: Wed, 12 Jun 2019 20:21:00 +0200 Subject: [PATCH 019/190] [SakugabooruBridge] Remove bridge The target server for this bridge is no longer reachable and there doesn't seem to be any attempt to get it back online. --- bridges/SakugabooruBridge.php | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 bridges/SakugabooruBridge.php diff --git a/bridges/SakugabooruBridge.php b/bridges/SakugabooruBridge.php deleted file mode 100644 index 1d6cee0a..00000000 --- a/bridges/SakugabooruBridge.php +++ /dev/null @@ -1,11 +0,0 @@ - Date: Thu, 13 Jun 2019 17:13:02 +0000 Subject: [PATCH 020/190] [BrutBridge] Create custom feed name for each category and edition (#1164) --- bridges/BrutBridge.php | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/bridges/BrutBridge.php b/bridges/BrutBridge.php index 432cb502..32265b69 100644 --- a/bridges/BrutBridge.php +++ b/bridges/BrutBridge.php @@ -92,6 +92,21 @@ class BrutBridge extends BridgeAbstract { return parent::getURI(); } + public function getName() { + + if (!is_null($this->getInput('edition')) && !is_null($this->getInput('category'))) { + $parameters = $this->getParameters(); + + $editionValues = array_flip($parameters[0]['edition']['values']); + $categoryValues = array_flip($parameters[0]['category']['values']); + + return $categoryValues[$this->getInput('category')] . ' - ' . + $editionValues[$this->getInput('edition')] . ' - Brut.'; + } + + return parent::getName(); + } + private function processDate($description) { if ($this->getInput('edition') === 'uk') { From 6d6d6037a3b8065a37f4f7bf3ec92300c1ec72ea Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Thu, 13 Jun 2019 19:49:48 +0200 Subject: [PATCH 021/190] [GithubIssueBridge] Don't return error messages in detectParameters() detectParameters() is called in a loop for all bridges on a URL, thus if a bridge returns an error message, the output messages get mixed up and all detect operations fail. This seems to be a limitation of the detect function for now. --- bridges/GithubIssueBridge.php | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/bridges/GithubIssueBridge.php b/bridges/GithubIssueBridge.php index 9a2b7ec1..2eddeb2e 100644 --- a/bridges/GithubIssueBridge.php +++ b/bridges/GithubIssueBridge.php @@ -242,24 +242,8 @@ class GithubIssueBridge extends BridgeAbstract { public function detectParameters($url) { - $help = <</ -For issue comments the URL must include ///issues/ - -Examples: -- https://github.com/rss-bridge/rss-bridge -- https://github.com/rss-bridge/rss-bridge/issues/1 - -Issue comments for project issues are enabled if the URL points to issues -https://github.com/rss-bridge/rss-bridge/issues -EOD; - if(filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_PATH_REQUIRED) === false || strpos($url, self::URI) !== 0) { - returnClientError('Invalid URL' . $help); return null; } @@ -273,11 +257,7 @@ EOD; } break; case 3: { // Project issues with issue comments if($path_segments[2] !== 'issues') { - returnClientError('Invalid path. Expected "/issues/", found "/' - . $path_segments[2] - . '/"' - . $help - ); + return null; } list($user, $project) = $path_segments; $show_comments = 'on'; @@ -286,7 +266,7 @@ EOD; list($user, $project, /* issues */, $issue) = $path_segments; } break; default: { - returnClientError('Invalid path.' . $help); + return null; } } From dfa9c651cd8c6c1c177c3e0046a497098ba7ba90 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Thu, 13 Jun 2019 19:55:10 +0200 Subject: [PATCH 022/190] [BridgeList] Change placeholder message in the search bar The search bar should indicate that searching by URL is supported. References #1099 --- lib/BridgeList.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/BridgeList.php b/lib/BridgeList.php index d79d72f3..8a334a3f 100644 --- a/lib/BridgeList.php +++ b/lib/BridgeList.php @@ -130,7 +130,7 @@ EOD; EOD; From 8dd8be969446285f737d6c819f43b76568795674 Mon Sep 17 00:00:00 2001 From: LogMANOriginal Date: Sun, 16 Jun 2019 19:15:28 +0200 Subject: [PATCH 023/190] [.gitattributes] Keep files in export for Heroku Heroku requires the file `app.json` as well as the composer files `composer.json` and `composer.lock` to deploy a service. Deploy doesn't work if these files are ignored during export (because of the way this service deploys projects). This commit adds comments to .gitattributes to prevent this issue from re-appearing in the future. All affected lines are commented out. Also added some spacing for better readability. References #1165 --- .gitattributes | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/.gitattributes b/.gitattributes index 13ebe2ca..36544021 100644 --- a/.gitattributes +++ b/.gitattributes @@ -22,18 +22,24 @@ *.RTF diff=astextplain # Ignore files in git archive (i.e. GitHub release builds) + ## Docker Dockerfile export-ignore .dockerignore export-ignore + ## Travis .travis.yml export-ignore + ## GitHub .github/ export-ignore + ## Git .gitattributes export-ignore .gitignore export-ignore + ## Scalingo scalingo.json export-ignore + ## RSS-Bridge phpunit.xml export-ignore phpcs.xml export-ignore @@ -42,8 +48,22 @@ tests/ export-ignore cache/.gitkeep export-ignore bridges/DemoBridge.php export-ignore bridges/FeedExpanderExampleBridge.php export-ignore + ## Composer -composer.json export-ignore -composer.lock export-ignore +# +# Keep the following lines commented out. Heroku does +# not function if the composer files are ignored during +# export. For more information see +# https://github.com/rss-bridge/rss-bridge/issues/1165 +# +# composer.json export-ignore +# composer.lock export-ignore + ## Heroku -app.json export-ignore +# +# Keep the following line commented out. Heroku does +# not function if app.json is ignored during export. +# For more information see +# https://github.com/rss-bridge/rss-bridge/issues/1165 +# +# app.json export-ignore From 686f21bc5017517ad5c0efc500490811388812ce Mon Sep 17 00:00:00 2001 From: Thibault Couraud <1036233+couraudt@users.noreply.github.com> Date: Sun, 16 Jun 2019 17:35:43 +0000 Subject: [PATCH 024/190] [FindACrew] Improve bridge results (#1120) --- bridges/FindACrewBridge.php | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/bridges/FindACrewBridge.php b/bridges/FindACrewBridge.php index 1dac775a..abab6e13 100644 --- a/bridges/FindACrewBridge.php +++ b/bridges/FindACrewBridge.php @@ -62,11 +62,16 @@ class FindACrewBridge extends BridgeAbstract { foreach ($annonces as $annonce) { $item = array(); - $img = parent::getURI() . $annonce->find('.lst-pic img', 0)->getAttribute('src'); + $link = parent::getURI() . $annonce->find('.lst-ctrls a', 0)->href; + $htmlDetail = getSimpleHTMLDOMCached($link . '?mdl=2'); // add ?mdl=2 for xhr content not full html page + + $img = parent::getURI() . $htmlDetail->find('img.img-responsive', 0)->getAttribute('src'); $item['title'] = $annonce->find('.lst-tags span', 0)->plaintext; - $item['uri'] = parent::getURI() . $annonce->find('.lst-ctrls a', 0)->href; - $content = $annonce->find('.lst-dtl', 0)->innertext; - $item['content'] = "
$content"; + $item['uri'] = $link; + $content = $htmlDetail->find('.panel-body div.clearfix.row > div', 1)->innertext; + $content .= $htmlDetail->find('.panel-body > div', 1)->innertext; + $content = defaultLinkTo($content, parent::getURI()); + $item['content'] = $content; $item['enclosures'] = array($img); $item['categories'] = array($annonce->find('.css_AccLocCur', 0)->plaintext); $this->items[] = $item; From aeca4cfd60c53f1429d318c8f78066cd65dbe9e5 Mon Sep 17 00:00:00 2001 From: Thibault Couraud <1036233+couraudt@users.noreply.github.com> Date: Sun, 16 Jun 2019 17:40:21 +0000 Subject: [PATCH 025/190] [BAEBridge] Use defaultLinkTo rather than str_replace (#1168) --- bridges/BAEBridge.php | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bridges/BAEBridge.php b/bridges/BAEBridge.php index caa2cf72..6c5d8ba2 100644 --- a/bridges/BAEBridge.php +++ b/bridges/BAEBridge.php @@ -55,9 +55,7 @@ class BAEBridge extends BridgeAbstract { $content .= '
'; $content .= $htmlDetail->find('section', 0)->innertext; - $content = str_replace('src="/', 'src="' . parent::getURI() . '/', $content); - $content = str_replace('href="/', 'href="' . parent::getURI() . '/', $content); - $item['content'] = $content; + $item['content'] = defaultLinkTo($content, parent::getURI()); $image = $htmlDetail->find('#zoom', 0); if ($image) { $item['enclosures'] = array(parent::getURI() . $image->getAttribute('src')); From 849eaeb50edd2fd06866578d4f93abb5193ffd01 Mon Sep 17 00:00:00 2001 From: Tobias Alexander Franke Date: Sun, 16 Jun 2019 20:21:48 +0200 Subject: [PATCH 026/190] [SteamCommunityBridge] Add Workshop category (#1172) --- bridges/SteamCommunityBridge.php | 72 ++++++++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 4 deletions(-) diff --git a/bridges/SteamCommunityBridge.php b/bridges/SteamCommunityBridge.php index 56ea257c..9919a4b5 100644 --- a/bridges/SteamCommunityBridge.php +++ b/bridges/SteamCommunityBridge.php @@ -20,7 +20,8 @@ class SteamCommunityBridge extends BridgeAbstract { 'values' => array( 'Artwork' => 'images', 'Screenshots' => 'screenshots', - 'Videos' => 'videos' + 'Videos' => 'videos', + 'Workshop' => 'workshop' ) ) ) @@ -32,7 +33,7 @@ class SteamCommunityBridge extends BridgeAbstract { protected function getMainPage() { $category = $this->getInput('category'); - $html = getSimpleHTMLDOM($this->getURI() . '/?p=1&browsefilter=mostrecent') + $html = getSimpleHTMLDOM($this->getURI()) or returnServerError('Could not fetch Steam data.'); return $html; @@ -56,12 +57,17 @@ class SteamCommunityBridge extends BridgeAbstract { } public function getURI() { + if ($this->getInput('category') === 'workshop') + return self::URI . '/workshop/browse/?appid=' + . $this->getInput('i') . '&browsesort=mostrecent'; + return self::URI . '/app/' . $this->getInput('i') . '/' - . $this->getInput('category'); + . $this->getInput('category') + . '/?p=1&browsefilter=mostrecent'; } - public function collectData() { + private function collectMedia() { $category = $this->getInput('category'); $html = $this->getMainPage(); $cards = $html->find('div.apphub_Card'); @@ -124,4 +130,62 @@ class SteamCommunityBridge extends BridgeAbstract { break; } } + + private function collectWorkshop() { + $category = $this->getInput('category'); + $html = $this->getMainPage(); + $workShopItems = $html->find('div.workshopItem'); + + foreach($workShopItems as $workShopItem) { + $author = $workShopItem->find('div.workshopItemAuthorName', 0)->find('a', 0); + $author = $author->innertext; + + $fileRating = $workShopItem->find('img.fileRating', 0); + + $uri = $workShopItem->find('a.ugc', 0)->getAttribute('href'); + + $htmlItem = getSimpleHTMLDOMCached($uri); + + $title = $htmlItem->find('div.workshopItemTitle', 0)->innertext; + $date = $htmlItem->find('div.detailsStatRight', 0)->innertext; + $description = $htmlItem->find('div.workshopItemDescription', 0)->innertext; + + $previewImage = $htmlItem->find('#previewImage', 0); + + $htmlTags = $htmlItem->find('div.workshopTags'); + + $tags = ''; + + foreach($htmlTags as $htmlTag) { + if ($tags !== '') + $tags .= ','; + + $tags .= $htmlTag->find('a', 0)->innertext; + } + + // create item + $item = array(); + $item['title'] = $title; + $item['uri'] = $uri; + $item['timestamp'] = strtotime($date); + $item['author'] = $author; + $item['categories'] = $category; + + $item['content'] = '

' + . $previewImage . '

' . $fileRating + . '

' . $description . '

'; + + $this->items[] = $item; + + if (count($this->items) >= 10) + break; + } + } + + public function collectData() { + if ($this->getInput('category') === 'workshop') + $this->collectWorkshop(); + else + $this->collectMedia(); + } } From 55e1703741d069ee1c8d6748d964d9de84d8c003 Mon Sep 17 00:00:00 2001 From: Corentin Garcia Date: Sun, 16 Jun 2019 20:35:24 +0200 Subject: [PATCH 027/190] [EliteDangerousGalnetBridge] Remove duplicate items (#1167) --- bridges/EliteDangerousGalnetBridge.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bridges/EliteDangerousGalnetBridge.php b/bridges/EliteDangerousGalnetBridge.php index dc6077b3..1afa0423 100644 --- a/bridges/EliteDangerousGalnetBridge.php +++ b/bridges/EliteDangerousGalnetBridge.php @@ -47,5 +47,8 @@ class EliteDangerousGalnetBridge extends BridgeAbstract { $this->items[] = $item; } + + //Remove duplicates that sometimes show up on the website + $this->items = array_unique($this->items, SORT_REGULAR); } } From 1ada9c26f86282ba9a527709dd5a326f22874c05 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Tue, 18 Jun 2019 18:32:56 +0200 Subject: [PATCH 028/190] format: Sanitize format name in the format factory RSS-Bridge currently sanitizes the format name only for the display action, which can cause problems if other actions depend on formats as well. It is therefore better to do sanitization in the factory class for formats. Additionally, formats should not require a perfect match, so 'Atom' and 'aToM' make no difference. This will also allow users to define formats in their own style (i.e. only lowercase via CLI). References #1001 --- actions/DisplayAction.php | 6 ----- lib/Format.php | 53 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 7 deletions(-) diff --git a/actions/DisplayAction.php b/actions/DisplayAction.php index a1b106f5..7a59ad7f 100644 --- a/actions/DisplayAction.php +++ b/actions/DisplayAction.php @@ -18,12 +18,6 @@ class DisplayAction extends ActionAbstract { $format = $this->userData['format'] or returnClientError('You must specify a format!'); - // DEPRECATED: 'nameFormat' scheme is replaced by 'name' in format parameter values - // this is to keep compatibility until futher complete removal - if(($pos = strpos($format, 'Format')) === (strlen($format) - strlen('Format'))) { - $format = substr($format, 0, $pos); - } - // whitelist control if(!Bridge::isWhitelisted($bridge)) { throw new \Exception('This bridge is not whitelisted', 401); diff --git a/lib/Format.php b/lib/Format.php index 061b1f21..edbe5f57 100644 --- a/lib/Format.php +++ b/lib/Format.php @@ -68,7 +68,7 @@ class Format { throw new \InvalidArgumentException('Format name invalid!'); } - $name = $name . 'Format'; + $name = self::sanitizeFormatName($name) . 'Format'; $pathFormat = self::getWorkingDir() . $name . '.php'; if(!file_exists($pathFormat)) { @@ -163,4 +163,55 @@ class Format { return $formatNames; } + + /** + * Returns the sanitized format name. + * + * The format name can be specified in various ways: + * * The PHP file name (i.e. `AtomFormat.php`) + * * The PHP file name without file extension (i.e. `AtomFormat`) + * * The format name (i.e. `Atom`) + * + * Casing is ignored (i.e. `ATOM` and `atom` are the same). + * + * A format file matching the given format name must exist in the working + * directory! + * + * @param string $name The format name + * @return string|null The sanitized format name if the provided name is + * valid, null otherwise. + */ + protected static function sanitizeFormatName($name) { + + if(is_string($name)) { + + // Trim trailing '.php' if exists + if(preg_match('/(.+)(?:\.php)/', $name, $matches)) { + $name = $matches[1]; + } + + // Trim trailing 'Format' if exists + if(preg_match('/(.+)(?:Format)/i', $name, $matches)) { + $name = $matches[1]; + } + + // Improve performance for correctly written format names + if(in_array($name, self::getFormatNames())) { + $index = array_search($name, self::getFormatNames()); + return self::getFormatNames()[$index]; + } + + // The name is valid if a corresponding format file is found on disk + if(in_array(strtolower($name), array_map('strtolower', self::getFormatNames()))) { + $index = array_search(strtolower($name), array_map('strtolower', self::getFormatNames())); + return self::getFormatNames()[$index]; + } + + Debug::log('Invalid format name: "' . $name . '"!'); + + } + + return null; // Bad parameter + + } } From 705b9daa0bcfab2e6c342547eb7bf6324bd6c10f Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Tue, 18 Jun 2019 18:55:29 +0200 Subject: [PATCH 029/190] bridge: Refactor bridge factory to non-static class The bridge factory can be based on the abstract factory class if it wasn't static. This allows for higher abstraction and makes future extensions possible. Also, not all parts of RSS-Bridge need to work on the same instance of the bridge factory. References #1001 --- actions/DetectAction.php | 9 +- actions/DisplayAction.php | 7 +- actions/ListAction.php | 9 +- lib/BridgeCard.php | 5 +- lib/{Bridge.php => BridgeFactory.php} | 113 ++++++-------------------- lib/BridgeList.php | 7 +- lib/rssbridge.php | 3 +- 7 files changed, 52 insertions(+), 101 deletions(-) rename lib/{Bridge.php => BridgeFactory.php} (66%) diff --git a/actions/DetectAction.php b/actions/DetectAction.php index 2ad79a27..86605de4 100644 --- a/actions/DetectAction.php +++ b/actions/DetectAction.php @@ -19,13 +19,16 @@ class DetectAction extends ActionAbstract { $format = $this->userData['format'] or returnClientError('You must specify a format!'); - foreach(Bridge::getBridgeNames() as $bridgeName) { + $bridgeFac = new \BridgeFactory(); + $bridgeFac->setWorkingDir(PATH_LIB_BRIDGES); - if(!Bridge::isWhitelisted($bridgeName)) { + foreach($bridgeFac->getBridgeNames() as $bridgeName) { + + if(!$bridgeFac->isWhitelisted($bridgeName)) { continue; } - $bridge = Bridge::create($bridgeName); + $bridge = $bridgeFac->create($bridgeName); if($bridge === false) { continue; diff --git a/actions/DisplayAction.php b/actions/DisplayAction.php index 7a59ad7f..1dec5cbf 100644 --- a/actions/DisplayAction.php +++ b/actions/DisplayAction.php @@ -18,14 +18,17 @@ class DisplayAction extends ActionAbstract { $format = $this->userData['format'] or returnClientError('You must specify a format!'); + $bridgeFac = new \BridgeFactory(); + $bridgeFac->setWorkingDir(PATH_LIB_BRIDGES); + // whitelist control - if(!Bridge::isWhitelisted($bridge)) { + if(!$bridgeFac->isWhitelisted($bridge)) { throw new \Exception('This bridge is not whitelisted', 401); die; } // Data retrieval - $bridge = Bridge::create($bridge); + $bridge = $bridgeFac->create($bridge); $noproxy = array_key_exists('_noproxy', $this->userData) && filter_var($this->userData['_noproxy'], FILTER_VALIDATE_BOOLEAN); diff --git a/actions/ListAction.php b/actions/ListAction.php index 03e06119..92aef0e0 100644 --- a/actions/ListAction.php +++ b/actions/ListAction.php @@ -17,9 +17,12 @@ class ListAction extends ActionAbstract { $list->bridges = array(); $list->total = 0; - foreach(Bridge::getBridgeNames() as $bridgeName) { + $bridgeFac = new \BridgeFactory(); + $bridgeFac->setWorkingDir(PATH_LIB_BRIDGES); - $bridge = Bridge::create($bridgeName); + foreach($bridgeFac->getBridgeNames() as $bridgeName) { + + $bridge = $bridgeFac->create($bridgeName); if($bridge === false) { // Broken bridge, show as inactive @@ -31,7 +34,7 @@ class ListAction extends ActionAbstract { } - $status = Bridge::isWhitelisted($bridgeName) ? 'active' : 'inactive'; + $status = $bridgeFac->isWhitelisted($bridgeName) ? 'active' : 'inactive'; $list->bridges[$bridgeName] = array( 'status' => $status, diff --git a/lib/BridgeCard.php b/lib/BridgeCard.php index 697433ff..8c36919c 100644 --- a/lib/BridgeCard.php +++ b/lib/BridgeCard.php @@ -299,7 +299,10 @@ This bridge is not fetching its content through a secure connection'; */ static function displayBridgeCard($bridgeName, $formats, $isActive = true){ - $bridge = Bridge::create($bridgeName); + $bridgeFac = new \BridgeFactory(); + $bridgeFac->setWorkingDir(PATH_LIB_BRIDGES); + + $bridge = $bridgeFac->create($bridgeName); if($bridge == false) return ''; diff --git a/lib/Bridge.php b/lib/BridgeFactory.php similarity index 66% rename from lib/Bridge.php rename to lib/BridgeFactory.php index 31607922..fea254f1 100644 --- a/lib/Bridge.php +++ b/lib/BridgeFactory.php @@ -35,17 +35,7 @@ * $bridge = Bridge::create('GitHubIssue'); * ``` */ -class Bridge { - - /** - * Holds a path to the working directory. - * - * Do not access this property directly! - * Use {@see Bridge::setWorkingDir()} and {@see Bridge::getWorkingDir()} instead. - * - * @var string|null - */ - protected static $workingDir = null; +class BridgeFactory extends FactoryAbstract { /** * Holds a list of whitelisted bridges. @@ -55,18 +45,7 @@ class Bridge { * * @var array */ - protected static $whitelist = array(); - - /** - * Throws an exception when trying to create a new instance of this class. - * Use {@see Bridge::create()} to instanciate a new bridge from the working - * directory. - * - * @throws \LogicException if called. - */ - public function __construct(){ - throw new \LogicException('Use ' . __CLASS__ . '::create($name) to create bridge objects!'); - } + protected $whitelist = array(); /** * Creates a new bridge object from the working directory. @@ -77,13 +56,13 @@ class Bridge { * @param string $name Name of the bridge object. * @return object|bool The bridge object or false if the class is not instantiable. */ - public static function create($name){ - if(!self::isBridgeName($name)) { + public function create($name){ + if(!$this->isBridgeName($name)) { throw new \InvalidArgumentException('Bridge name invalid!'); } - $name = self::sanitizeBridgeName($name) . 'Bridge'; - $filePath = self::getWorkingDir() . $name . '.php'; + $name = $this->sanitizeBridgeName($name) . 'Bridge'; + $filePath = $this->getWorkingDir() . $name . '.php'; if(!file_exists($filePath)) { throw new \Exception('Bridge file ' . $filePath . ' does not exist!'); @@ -98,48 +77,6 @@ class Bridge { return false; } - /** - * Sets the working directory. - * - * @param string $dir Path to the directory containing bridges. - * @throws \LogicException if the provided path is not a valid string. - * @throws \Exception if the provided path does not exist. - * @throws \InvalidArgumentException if $dir is not a directory. - * @return void - */ - public static function setWorkingDir($dir){ - self::$workingDir = null; - - if(!is_string($dir)) { - throw new \InvalidArgumentException('Working directory is not a valid string!'); - } - - if(!file_exists($dir)) { - throw new \Exception('Working directory does not exist!'); - } - - if(!is_dir($dir)) { - throw new \InvalidArgumentException('Working directory is not a directory!'); - } - - self::$workingDir = realpath($dir) . '/'; - } - - /** - * Returns the working directory. - * The working directory must be specified with {@see Bridge::setWorkingDir()}! - * - * @throws \LogicException if the working directory is not set. - * @return string The current working directory. - */ - public static function getWorkingDir(){ - if(is_null(self::$workingDir)) { - throw new \LogicException('Working directory is not set!'); - } - - return self::$workingDir; - } - /** * Returns true if the provided name is a valid bridge name. * @@ -149,7 +86,7 @@ class Bridge { * @param string $name The bridge name. * @return bool true if the name is a valid bridge name, false otherwise. */ - public static function isBridgeName($name){ + public function isBridgeName($name){ return is_string($name) && preg_match('/^[A-Z][a-zA-Z0-9-]*$/', $name) === 1; } @@ -160,12 +97,12 @@ class Bridge { * * @return array List of bridge names */ - public static function getBridgeNames(){ + public function getBridgeNames(){ static $bridgeNames = array(); // Initialized on first call if(empty($bridgeNames)) { - $files = scandir(self::getWorkingDir()); + $files = scandir($this->getWorkingDir()); if($files !== false) { foreach($files as $file) { @@ -185,8 +122,8 @@ class Bridge { * @param string $name Name of the bridge. * @return bool True if the bridge is whitelisted. */ - public static function isWhitelisted($name){ - return in_array(self::sanitizeBridgeName($name), self::getWhitelist()); + public function isWhitelisted($name){ + return in_array($this->sanitizeBridgeName($name), $this->getWhitelist()); } /** @@ -205,7 +142,7 @@ class Bridge { * * @return array Array of whitelisted bridges */ - public static function getWhitelist() { + public function getWhitelist() { static $firstCall = true; // Initialized on first call @@ -220,17 +157,17 @@ class Bridge { } if($contents === '*') { // Whitelist all bridges - self::$whitelist = self::getBridgeNames(); + $this->whitelist = $this->getBridgeNames(); } else { - //self::$whitelist = array_map('self::sanitizeBridgeName', explode("\n", $contents)); + //$this->$whitelist = array_map('$this->sanitizeBridgeName', explode("\n", $contents)); foreach(explode("\n", $contents) as $bridgeName) { - self::$whitelist[] = self::sanitizeBridgeName($bridgeName); + $this->whitelist[] = $this->sanitizeBridgeName($bridgeName); } } } - return self::$whitelist; + return $this->whitelist; } @@ -248,8 +185,8 @@ class Bridge { * @param array $default The whitelist as array of bridge names. * @return void */ - public static function setWhitelist($default = array()) { - self::$whitelist = array_map('self::sanitizeBridgeName', $default); + public function setWhitelist($default = array()) { + $this->whitelist = array_map('$this->sanitizeBridgeName', $default); } /** @@ -269,7 +206,7 @@ class Bridge { * @return string|null The sanitized bridge name if the provided name is * valid, null otherwise. */ - protected static function sanitizeBridgeName($name) { + protected function sanitizeBridgeName($name) { if(is_string($name)) { @@ -284,15 +221,15 @@ class Bridge { } // Improve performance for correctly written bridge names - if(in_array($name, self::getBridgeNames())) { - $index = array_search($name, self::getBridgeNames()); - return self::getBridgeNames()[$index]; + if(in_array($name, $this->getBridgeNames())) { + $index = array_search($name, $this->getBridgeNames()); + return $this->getBridgeNames()[$index]; } // The name is valid if a corresponding bridge file is found on disk - if(in_array(strtolower($name), array_map('strtolower', self::getBridgeNames()))) { - $index = array_search(strtolower($name), array_map('strtolower', self::getBridgeNames())); - return self::getBridgeNames()[$index]; + if(in_array(strtolower($name), array_map('strtolower', $this->getBridgeNames()))) { + $index = array_search(strtolower($name), array_map('strtolower', $this->getBridgeNames())); + return $this->getBridgeNames()[$index]; } Debug::log('Invalid bridge name specified: "' . $name . '"!'); diff --git a/lib/BridgeList.php b/lib/BridgeList.php index 8a334a3f..0c3c4ffa 100644 --- a/lib/BridgeList.php +++ b/lib/BridgeList.php @@ -61,14 +61,17 @@ EOD; $totalActiveBridges = 0; $inactiveBridges = ''; - $bridgeList = Bridge::getBridgeNames(); + $bridgeFac = new \BridgeFactory(); + $bridgeFac->setWorkingDir(PATH_LIB_BRIDGES); + + $bridgeList = $bridgeFac->getBridgeNames(); $formats = Format::getFormatNames(); $totalBridges = count($bridgeList); foreach($bridgeList as $bridgeName) { - if(Bridge::isWhitelisted($bridgeName)) { + if($bridgeFac->isWhitelisted($bridgeName)) { $body .= BridgeCard::displayBridgeCard($bridgeName, $formats); $totalActiveBridges++; diff --git a/lib/rssbridge.php b/lib/rssbridge.php index 3b0e65d4..a10c117d 100644 --- a/lib/rssbridge.php +++ b/lib/rssbridge.php @@ -63,7 +63,7 @@ require_once PATH_LIB . 'Debug.php'; require_once PATH_LIB . 'Exceptions.php'; require_once PATH_LIB . 'Format.php'; require_once PATH_LIB . 'FormatAbstract.php'; -require_once PATH_LIB . 'Bridge.php'; +require_once PATH_LIB . 'BridgeFactory.php'; require_once PATH_LIB . 'BridgeAbstract.php'; require_once PATH_LIB . 'FeedExpander.php'; require_once PATH_LIB . 'Cache.php'; @@ -87,7 +87,6 @@ require_once PATH_LIB_VENDOR . 'php-urljoin/src/urljoin.php'; // Initialize static members try { - Bridge::setWorkingDir(PATH_LIB_BRIDGES); Format::setWorkingDir(PATH_LIB_FORMATS); Cache::setWorkingDir(PATH_LIB_CACHES); } catch(Exception $e) { From 2460b678869415fff08238afdd61e05eba501dc5 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Tue, 18 Jun 2019 19:04:19 +0200 Subject: [PATCH 030/190] cache: Refactor cache factory to non-static class The cache factory can be based on the abstract factory class if it wasn't static. This allows for higher abstraction and makes future extensions possible. Also, not all parts of RSS-Bridge need to work on the same instance of the factory. References #1001 --- actions/DisplayAction.php | 4 +- bridges/ElloBridge.php | 4 +- lib/{Cache.php => CacheFactory.php} | 88 ++++------------------------- lib/contents.php | 8 ++- lib/rssbridge.php | 3 +- 5 files changed, 25 insertions(+), 82 deletions(-) rename lib/{Cache.php => CacheFactory.php} (61%) diff --git a/actions/DisplayAction.php b/actions/DisplayAction.php index 1dec5cbf..17bc28b7 100644 --- a/actions/DisplayAction.php +++ b/actions/DisplayAction.php @@ -82,7 +82,9 @@ class DisplayAction extends ActionAbstract { ); // Initialize cache - $cache = Cache::create(Configuration::getConfig('cache', 'type')); + $cacheFac = new CacheFactory(); + $cacheFac->setWorkingDir(PATH_LIB_CACHES); + $cache = $cacheFac->create(Configuration::getConfig('cache', 'type')); $cache->setScope(''); $cache->purgeCache(86400); // 24 hours $cache->setKey($cache_params); diff --git a/bridges/ElloBridge.php b/bridges/ElloBridge.php index 1f66edc3..3de167ef 100644 --- a/bridges/ElloBridge.php +++ b/bridges/ElloBridge.php @@ -120,7 +120,9 @@ class ElloBridge extends BridgeAbstract { } private function getAPIKey() { - $cache = Cache::create(Configuration::getConfig('cache', 'type')); + $cacheFac = new CacheFactory(); + $cacheFac->setWorkingDir(PATH_LIB_CACHES); + $cache = $cacheFac->create(Configuration::getConfig('cache', 'type')); $cache->setScope(get_called_class()); $cache->setKey(['key']); $key = $cache->loadData(); diff --git a/lib/Cache.php b/lib/CacheFactory.php similarity index 61% rename from lib/Cache.php rename to lib/CacheFactory.php index 6c2943ad..9ce5c19b 100644 --- a/lib/Cache.php +++ b/lib/CacheFactory.php @@ -31,29 +31,7 @@ * $cache = Cache::create('FileCache'); * ``` */ -class Cache { - - /** - * Holds a path to the working directory. - * - * Do not access this property directly! - * Use {@see Cache::setWorkingDir()} and {@see Cache::getWorkingDir()} instead. - * - * @var string|null - */ - protected static $workingDir = null; - - /** - * Throws an exception when trying to create a new instance of this class. - * Use {@see Cache::create()} to create a new cache object from the working - * directory. - * - * @throws \LogicException if called. - */ - public function __construct(){ - throw new \LogicException('Use ' . __CLASS__ . '::create($name) to create cache objects!'); - } - +class CacheFactory extends FactoryAbstract { /** * Creates a new cache object from the working directory. * @@ -63,14 +41,14 @@ class Cache { * @param string $name Name of the cache object. * @return object|bool The cache object or false if the class is not instantiable. */ - public static function create($name){ - $name = self::sanitizeCacheName($name) . 'Cache'; + public function create($name){ + $name = $this->sanitizeCacheName($name) . 'Cache'; - if(!self::isCacheName($name)) { + if(!$this->isCacheName($name)) { throw new \InvalidArgumentException('Cache name invalid!'); } - $filePath = self::getWorkingDir() . $name . '.php'; + $filePath = $this->getWorkingDir() . $name . '.php'; if(!file_exists($filePath)) { throw new \Exception('Cache file ' . $filePath . ' does not exist!'); @@ -85,48 +63,6 @@ class Cache { return false; } - /** - * Sets the working directory. - * - * @param string $dir Path to a directory containing cache classes - * @throws \InvalidArgumentException if $dir is not a string. - * @throws \Exception if the working directory doesn't exist. - * @throws \InvalidArgumentException if $dir is not a directory. - * @return void - */ - public static function setWorkingDir($dir){ - self::$workingDir = null; - - if(!is_string($dir)) { - throw new \InvalidArgumentException('Working directory is not a valid string!'); - } - - if(!file_exists($dir)) { - throw new \Exception('Working directory does not exist!'); - } - - if(!is_dir($dir)) { - throw new \InvalidArgumentException('Working directory is not a directory!'); - } - - self::$workingDir = realpath($dir) . '/'; - } - - /** - * Returns the working directory. - * The working directory must be set with {@see Cache::setWorkingDir()}! - * - * @throws \LogicException if the working directory is not set. - * @return string The current working directory. - */ - public static function getWorkingDir(){ - if(is_null(self::$workingDir)) { - throw new \LogicException('Working directory is not set!'); - } - - return self::$workingDir; - } - /** * Returns true if the provided name is a valid cache name. * @@ -136,7 +72,7 @@ class Cache { * @param string $name The cache name. * @return bool true if the name is a valid cache name, false otherwise. */ - public static function isCacheName($name){ + public function isCacheName($name){ return is_string($name) && preg_match('/^[A-Z][a-zA-Z0-9-]*$/', $name) === 1; } @@ -147,12 +83,12 @@ class Cache { * * @return array List of cache names */ - public static function getCacheNames(){ + public function getCacheNames(){ static $cacheNames = array(); // Initialized on first call if(empty($cacheNames)) { - $files = scandir(self::getWorkingDir()); + $files = scandir($this->getWorkingDir()); if($files !== false) { foreach($files as $file) { @@ -183,7 +119,7 @@ class Cache { * @return string|null The sanitized cache name if the provided name is * valid, null otherwise. */ - protected static function sanitizeCacheName($name) { + protected function sanitizeCacheName($name) { if(is_string($name)) { @@ -198,9 +134,9 @@ class Cache { } // The name is valid if a corresponding file is found on disk - if(in_array(strtolower($name), array_map('strtolower', self::getCacheNames()))) { - $index = array_search(strtolower($name), array_map('strtolower', self::getCacheNames())); - return self::getCacheNames()[$index]; + if(in_array(strtolower($name), array_map('strtolower', $this->getCacheNames()))) { + $index = array_search(strtolower($name), array_map('strtolower', $this->getCacheNames())); + return $this->getCacheNames()[$index]; } Debug::log('Invalid cache name specified: "' . $name . '"!'); diff --git a/lib/contents.php b/lib/contents.php index 958feb1b..d59b0d01 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -45,7 +45,9 @@ function getContents($url, $header = array(), $opts = array()){ Debug::log('Reading contents from "' . $url . '"'); // Initialize cache - $cache = Cache::create(Configuration::getConfig('cache', 'type')); + $cacheFac = new CacheFactory(); + $cacheFac->setWorkingDir(PATH_LIB_CACHES); + $cache = $cacheFac->create(Configuration::getConfig('cache', 'type')); $cache->setScope('server'); $cache->purgeCache(86400); // 24 hours (forced) @@ -270,7 +272,9 @@ function getSimpleHTMLDOMCached($url, Debug::log('Caching url ' . $url . ', duration ' . $duration); // Initialize cache - $cache = Cache::create(Configuration::getConfig('cache', 'type')); + $cacheFac = new CacheFactory(); + $cacheFac->setWorkingDir(PATH_LIB_CACHES); + $cache = $cacheFac->create(Configuration::getConfig('cache', 'type')); $cache->setScope('pages'); $cache->purgeCache(86400); // 24 hours (forced) diff --git a/lib/rssbridge.php b/lib/rssbridge.php index a10c117d..eda4e583 100644 --- a/lib/rssbridge.php +++ b/lib/rssbridge.php @@ -66,7 +66,7 @@ require_once PATH_LIB . 'FormatAbstract.php'; require_once PATH_LIB . 'BridgeFactory.php'; require_once PATH_LIB . 'BridgeAbstract.php'; require_once PATH_LIB . 'FeedExpander.php'; -require_once PATH_LIB . 'Cache.php'; +require_once PATH_LIB . 'CacheFactory.php'; require_once PATH_LIB . 'Authentication.php'; require_once PATH_LIB . 'Configuration.php'; require_once PATH_LIB . 'BridgeCard.php'; @@ -88,7 +88,6 @@ require_once PATH_LIB_VENDOR . 'php-urljoin/src/urljoin.php'; // Initialize static members try { Format::setWorkingDir(PATH_LIB_FORMATS); - Cache::setWorkingDir(PATH_LIB_CACHES); } catch(Exception $e) { error_log($e); header('Content-type: text/plain', true, 500); From fc8421ed504d613cfcb6f042665c0439ff1fcf35 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Tue, 18 Jun 2019 19:15:20 +0200 Subject: [PATCH 031/190] format: Refactor format factory to non-static class The format factory can be based on the abstract factory class if it wasn't static. This allows for higher abstraction and makes future extensions possible. Also, not all parts of RSS-Bridge need to work on the same instance of the factory. References #1001 --- actions/DisplayAction.php | 4 +- lib/BridgeList.php | 6 +- lib/{Format.php => FormatFactory.php} | 94 +++++---------------------- lib/rssbridge.php | 11 +--- tests/AtomFormatTest.php | 4 +- tests/JsonFormatTest.php | 4 +- tests/MrssFormatTest.php | 4 +- 7 files changed, 32 insertions(+), 95 deletions(-) rename lib/{Format.php => FormatFactory.php} (60%) diff --git a/actions/DisplayAction.php b/actions/DisplayAction.php index 17bc28b7..9b4d363f 100644 --- a/actions/DisplayAction.php +++ b/actions/DisplayAction.php @@ -215,7 +215,9 @@ class DisplayAction extends ActionAbstract { // Data transformation try { - $format = Format::create($format); + $formatFac = new FormatFactory(); + $formatFac->setWorkingDir(PATH_LIB_FORMATS); + $format = $formatFac->create($format); $format->setItems($items); $format->setExtraInfos($infos); $format->setLastModified($cache->getTime()); diff --git a/lib/BridgeList.php b/lib/BridgeList.php index 0c3c4ffa..9f77f7af 100644 --- a/lib/BridgeList.php +++ b/lib/BridgeList.php @@ -63,9 +63,11 @@ EOD; $bridgeFac = new \BridgeFactory(); $bridgeFac->setWorkingDir(PATH_LIB_BRIDGES); - $bridgeList = $bridgeFac->getBridgeNames(); - $formats = Format::getFormatNames(); + + $formatFac = new FormatFactory(); + $formatFac->setWorkingDir(PATH_LIB_FORMATS); + $formats = $formatFac->getFormatNames(); $totalBridges = count($bridgeList); diff --git a/lib/Format.php b/lib/FormatFactory.php similarity index 60% rename from lib/Format.php rename to lib/FormatFactory.php index edbe5f57..28db7596 100644 --- a/lib/Format.php +++ b/lib/FormatFactory.php @@ -31,29 +31,7 @@ * $format = Format::create('Atom'); * ``` */ -class Format { - - /** - * Holds a path to the working directory. - * - * Do not access this property directly! - * Use {@see Format::setWorkingDir()} and {@see Format::getWorkingDir()} instead. - * - * @var string|null - */ - protected static $workingDir = null; - - /** - * Throws an exception when trying to create a new instance of this class. - * Use {@see Format::create()} to create a new format object from the working - * directory. - * - * @throws \LogicException if called. - */ - public function __construct(){ - throw new \LogicException('Use ' . __CLASS__ . '::create($name) to create cache objects!'); - } - +class FormatFactory extends FactoryAbstract { /** * Creates a new format object from the working directory. * @@ -63,13 +41,13 @@ class Format { * @param string $name Name of the format object. * @return object|bool The format object or false if the class is not instantiable. */ - public static function create($name){ - if(!self::isFormatName($name)) { + public function create($name){ + if(!$this->isFormatName($name)) { throw new \InvalidArgumentException('Format name invalid!'); } - $name = self::sanitizeFormatName($name) . 'Format'; - $pathFormat = self::getWorkingDir() . $name . '.php'; + $name = $this->sanitizeFormatName($name) . 'Format'; + $pathFormat = $this->getWorkingDir() . $name . '.php'; if(!file_exists($pathFormat)) { throw new \Exception('Format file ' . $filePath . ' does not exist!'); @@ -84,48 +62,6 @@ class Format { return false; } - /** - * Sets the working directory. - * - * @param string $dir Path to a directory containing cache classes - * @throws \InvalidArgumentException if $dir is not a string. - * @throws \Exception if the working directory doesn't exist. - * @throws \InvalidArgumentException if $dir is not a directory. - * @return void - */ - public static function setWorkingDir($dir){ - self::$workingDir = null; - - if(!is_string($dir)) { - throw new \InvalidArgumentException('Dir format must be a string.'); - } - - if(!file_exists($dir)) { - throw new \Exception('Working directory does not exist!'); - } - - if(!is_dir($dir)) { - throw new \InvalidArgumentException('Working directory is not a directory!'); - } - - self::$workingDir = realpath($dir) . '/'; - } - - /** - * Returns the working directory. - * The working directory must be set with {@see Format::setWorkingDir()}! - * - * @throws \LogicException if the working directory is not set. - * @return string The current working directory. - */ - public static function getWorkingDir(){ - if(is_null(self::$workingDir)) { - throw new \LogicException('Working directory is not set!'); - } - - return self::$workingDir; - } - /** * Returns true if the provided name is a valid format name. * @@ -135,7 +71,7 @@ class Format { * @param string $name The format name. * @return bool true if the name is a valid format name, false otherwise. */ - public static function isFormatName($name){ + public function isFormatName($name){ return is_string($name) && preg_match('/^[A-Z][a-zA-Z0-9-]*$/', $name) === 1; } @@ -146,11 +82,11 @@ class Format { * * @return array List of format names */ - public static function getFormatNames(){ + public function getFormatNames(){ static $formatNames = array(); // Initialized on first call if(empty($formatNames)) { - $files = scandir(self::getWorkingDir()); + $files = scandir($this->getWorkingDir()); if($files !== false) { foreach($files as $file) { @@ -181,7 +117,7 @@ class Format { * @return string|null The sanitized format name if the provided name is * valid, null otherwise. */ - protected static function sanitizeFormatName($name) { + protected function sanitizeFormatName($name) { if(is_string($name)) { @@ -196,15 +132,15 @@ class Format { } // Improve performance for correctly written format names - if(in_array($name, self::getFormatNames())) { - $index = array_search($name, self::getFormatNames()); - return self::getFormatNames()[$index]; + if(in_array($name, $this->getFormatNames())) { + $index = array_search($name, $this->getFormatNames()); + return $this->getFormatNames()[$index]; } // The name is valid if a corresponding format file is found on disk - if(in_array(strtolower($name), array_map('strtolower', self::getFormatNames()))) { - $index = array_search(strtolower($name), array_map('strtolower', self::getFormatNames())); - return self::getFormatNames()[$index]; + if(in_array(strtolower($name), array_map('strtolower', $this->getFormatNames()))) { + $index = array_search(strtolower($name), array_map('strtolower', $this->getFormatNames())); + return $this->getFormatNames()[$index]; } Debug::log('Invalid format name: "' . $name . '"!'); diff --git a/lib/rssbridge.php b/lib/rssbridge.php index eda4e583..a025f229 100644 --- a/lib/rssbridge.php +++ b/lib/rssbridge.php @@ -61,7 +61,7 @@ require_once PATH_LIB . 'FactoryAbstract.php'; require_once PATH_LIB . 'FeedItem.php'; require_once PATH_LIB . 'Debug.php'; require_once PATH_LIB . 'Exceptions.php'; -require_once PATH_LIB . 'Format.php'; +require_once PATH_LIB . 'FormatFactory.php'; require_once PATH_LIB . 'FormatAbstract.php'; require_once PATH_LIB . 'BridgeFactory.php'; require_once PATH_LIB . 'BridgeAbstract.php'; @@ -84,12 +84,3 @@ require_once PATH_LIB . 'contents.php'; define('MAX_FILE_SIZE', 10000000); /* Allow larger files for simple_html_dom */ require_once PATH_LIB_VENDOR . 'simplehtmldom/simple_html_dom.php'; require_once PATH_LIB_VENDOR . 'php-urljoin/src/urljoin.php'; - -// Initialize static members -try { - Format::setWorkingDir(PATH_LIB_FORMATS); -} catch(Exception $e) { - error_log($e); - header('Content-type: text/plain', true, 500); - die($e->getMessage()); -} diff --git a/tests/AtomFormatTest.php b/tests/AtomFormatTest.php index 1a8905fb..818b82eb 100644 --- a/tests/AtomFormatTest.php +++ b/tests/AtomFormatTest.php @@ -77,7 +77,9 @@ class AtomFormatTest extends TestCase { } private function initFormat() { - $this->format = \Format::create('Atom'); + $formatFac = new FormatFactory(); + $formatFac->setWorkingDir(PATH_LIB_FORMATS); + $this->format = $formatFac->create('Atom'); $this->format->setItems($this->sample->items); $this->format->setExtraInfos($this->sample->meta); $this->format->setLastModified(strtotime('2000-01-01 12:00:00 UTC')); diff --git a/tests/JsonFormatTest.php b/tests/JsonFormatTest.php index 24d09067..a9417e25 100644 --- a/tests/JsonFormatTest.php +++ b/tests/JsonFormatTest.php @@ -77,7 +77,9 @@ class JsonFormatTest extends TestCase { } private function initFormat() { - $this->format = \Format::create('Json'); + $formatFac = new FormatFactory(); + $formatFac->setWorkingDir(PATH_LIB_FORMATS); + $this->format = $formatFac->create('Json'); $this->format->setItems($this->sample->items); $this->format->setExtraInfos($this->sample->meta); $this->format->setLastModified(strtotime('2000-01-01 12:00:00 UTC')); diff --git a/tests/MrssFormatTest.php b/tests/MrssFormatTest.php index b4dd32a9..0ddc33c4 100644 --- a/tests/MrssFormatTest.php +++ b/tests/MrssFormatTest.php @@ -78,7 +78,9 @@ class MrssFormatTest extends TestCase { } private function initFormat() { - $this->format = \Format::create('Mrss'); + $formatFac = new FormatFactory(); + $formatFac->setWorkingDir(PATH_LIB_FORMATS); + $this->format = $formatFac->create('Mrss'); $this->format->setItems($this->sample->items); $this->format->setExtraInfos($this->sample->meta); $this->format->setLastModified(strtotime('2000-01-01 12:00:00 UTC')); From 14e6dbb6457c5d34d5b02c95b77744f91048d070 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Tue, 18 Jun 2019 19:21:28 +0200 Subject: [PATCH 032/190] [ListActionTest] Fix broken test --- tests/ListActionTest.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/ListActionTest.php b/tests/ListActionTest.php index 7f625882..1b1e1172 100644 --- a/tests/ListActionTest.php +++ b/tests/ListActionTest.php @@ -42,8 +42,11 @@ class ListActionTest extends TestCase { 'Item count doesn\'t match' ); + $bridgeFac = new BridgeFactory(); + $bridgeFac->setWorkingDir(PATH_LIB_BRIDGES); + $this->assertEquals( - count(Bridge::getBridgeNames()), + count($bridgeFac->getBridgeNames()), count($items['bridges']), 'Number of bridges doesn\'t match' ); From 99d1343045ba9e01252f6b3bc8338c944c458080 Mon Sep 17 00:00:00 2001 From: Joseph Date: Tue, 18 Jun 2019 20:18:52 +0000 Subject: [PATCH 033/190] [SplCenterBridge] Add new bridge (#1177) --- bridges/SplCenterBridge.php | 64 +++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 bridges/SplCenterBridge.php diff --git a/bridges/SplCenterBridge.php b/bridges/SplCenterBridge.php new file mode 100644 index 00000000..7a690908 --- /dev/null +++ b/bridges/SplCenterBridge.php @@ -0,0 +1,64 @@ + array( + 'name' => 'Content', + 'type' => 'list', + 'values' => array( + 'News' => 'news', + 'Hatewatch' => 'hatewatch', + ), + 'defaultValue' => 'news', + ) + ) + ); + + const CACHE_TIMEOUT = 3600; // 1 hour + + protected function parseItem($item) { + $item = parent::parseItem($item); + + $articleHtml = getSimpleHTMLDOMCached($item['uri']) + or returnServerError('Could not request: ' . $item['uri']); + + foreach ($articleHtml->find('.file') as $index => $media) { + $articleHtml->find('div.file', $index)->outertext = '' . $media->outertext . ''; + } + + $item['content'] = $articleHtml->find('div#group-content-container', 0)->innertext; + $item['enclosures'][] = $articleHtml->find('meta[name="twitter:image"]', 0)->content; + + return $item; + } + + public function collectData() { + $this->collectExpandableDatas($this->getURI() . '/rss.xml'); + } + + public function getURI() { + + if (!is_null($this->getInput('content'))) { + return self::URI . '/' . $this->getInput('content'); + } + + return parent::getURI(); + } + + public function getName() { + + if (!is_null($this->getInput('content'))) { + $parameters = $this->getParameters(); + + $contentValues = array_flip($parameters[0]['content']['values']); + + return $contentValues[$this->getInput('content')] . ' - Southern Poverty Law Center'; + } + + return parent::getName(); + } +} From 5c6c79baf467cc6d0e01be66d7abd2e45b3995bb Mon Sep 17 00:00:00 2001 From: LogMANOriginal Date: Tue, 18 Jun 2019 22:50:31 +0200 Subject: [PATCH 034/190] [VimeoBridge] Add new bridge (#933) Closes #932 --- bridges/VimeoBridge.php | 175 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 bridges/VimeoBridge.php diff --git a/bridges/VimeoBridge.php b/bridges/VimeoBridge.php new file mode 100644 index 00000000..d318e30e --- /dev/null +++ b/bridges/VimeoBridge.php @@ -0,0 +1,175 @@ + array( + 'name' => 'Search Query', + 'type' => 'text', + 'required' => true + ), + 'type' => array( + 'name' => 'Show results for', + 'type' => 'list', + 'defaultValue' => 'Videos', + 'values' => array( + 'Videos' => 'search', + 'On Demand' => 'search/ondemand', + 'People' => 'search/people', + 'Channels' => 'search/channels', + 'Groups' => 'search/groups' + ) + ) + ) + ); + + public function getURI() { + if(($query = $this->getInput('q')) + && ($type = $this->getInput('type'))) { + return self::URI . $type . '/sort:latest?q=' . $query; + } + + return parent::getURI(); + } + + public function collectData() { + + $html = getSimpleHTMLDOM($this->getURI(), + $header = array(), + $opts = array(), + $lowercase = true, + $forceTagsClosed = true, + $target_charset = DEFAULT_TARGET_CHARSET, + $stripRN = false, // We want to keep newline characters + $defaultBRText = DEFAULT_BR_TEXT, + $defaultSpanText = DEFAULT_SPAN_TEXT) + or returnServerError('Could not request ' . $this->getURI()); + + $json = null; // Holds the JSON data + + /** + * Search results are included as JSON formatted string inside a script + * tag that has the variable 'vimeo.config'. The data is condensed into + * a single line of code, so we can just search for the newline. + * + * Everything after "vimeo.config = _extend((vimeo.config || {}), " is + * the JSON formatted string. + */ + foreach($html->find('script') as $script) { + foreach(explode("\n", $script) as $line) { + $line = trim($line); + + if(strpos($line, 'vimeo.config') !== 0) + continue; + + // 45 = strlen("vimeo.config = _extend((vimeo.config || {}), "); + // 47 = 45 + 2, because we don't want the final ");" + $json = json_decode(substr($line, 45, strlen($line) - 47)); + } + } + + if(is_null($json)) { + returnClientError('No results for this query!'); + } + + foreach($json->api->initial_json->data as $element) { + switch($element->type) { + case 'clip': $this->addClip($element); break; + case 'ondemand': $this->addOnDemand($element); break; + case 'people': $this->addPeople($element); break; + case 'channel': $this->addChannel($element); break; + case 'group': $this->addGroup($element); break; + + default: returnServerError('Unknown type: ' . $element->type); + } + } + + } + + private function addClip($element) { + $item = array(); + + $item['uri'] = $element->clip->link; + $item['title'] = $element->clip->name; + $item['author'] = $element->clip->user->name; + $item['timestamp'] = strtotime($element->clip->created_time); + + $item['enclosures'] = array( + end($element->clip->pictures->sizes)->link + ); + + $item['content'] = ""; + + $this->items[] = $item; + } + + private function addOnDemand($element) { + $item = array(); + + $item['uri'] = $element->ondemand->link; + $item['title'] = $element->ondemand->name; + + // Only for films + if(isset($element->ondemand->film)) + $item['timestamp'] = strtotime($element->ondemand->film->release_time); + + $item['enclosures'] = array( + end($element->ondemand->pictures->sizes)->link + ); + + $item['content'] = ""; + + $this->items[] = $item; + } + + private function addPeople($element) { + $item = array(); + + $item['uri'] = $element->people->link; + $item['title'] = $element->people->name; + + $item['enclosures'] = array( + end($element->people->pictures->sizes)->link + ); + + $item['content'] = ""; + + $this->items[] = $item; + } + + private function addChannel($element) { + $item = array(); + + $item['uri'] = $element->channel->link; + $item['title'] = $element->channel->name; + + $item['enclosures'] = array( + end($element->channel->pictures->sizes)->link + ); + + $item['content'] = ""; + + $this->items[] = $item; + } + + private function addGroup($element) { + $item = array(); + + $item['uri'] = $element->group->link; + $item['title'] = $element->group->name; + + $item['enclosures'] = array( + end($element->group->pictures->sizes)->link + ); + + $item['content'] = ""; + + $this->items[] = $item; + } +} From 91e73b00b5051b213074ed852f6f93f564db6efe Mon Sep 17 00:00:00 2001 From: LogMANOriginal Date: Tue, 18 Jun 2019 22:57:42 +0200 Subject: [PATCH 035/190] [NationalGeographicBridge] Add new bridge (#1065) Closes #1029 --- bridges/NationalGeographicBridge.php | 194 +++++++++++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100644 bridges/NationalGeographicBridge.php diff --git a/bridges/NationalGeographicBridge.php b/bridges/NationalGeographicBridge.php new file mode 100644 index 00000000..dfccd25c --- /dev/null +++ b/bridges/NationalGeographicBridge.php @@ -0,0 +1,194 @@ + array( + self::PARAMETER_TOPIC => array( + 'name' => 'Topic', + 'type' => 'list', + 'values' => array( + self::TOPIC_MAGAZINE => 'magazine', + self::TOPIC_LATEST_STORIES => 'latest-stories' + ), + 'title' => 'Select your topic', + 'defaultValue' => 'Magazine' + ) + ), + 'global' => array( + self::PARAMETER_FULL_ARTICLE => array( + 'name' => 'Full Article', + 'type' => 'checkbox', + 'title' => 'Enable to load full articles (takes longer)' + ) + ) + ); + + private $topicName = ''; + + public function getURI() { + switch ($this->queriedContext) { + case self::CONTEXT_BY_TOPIC: { + return self::URI . $this->getInput(self::PARAMETER_TOPIC); + } break; + default: { + return parent::getURI(); + } + } + } + + public function collectData() { + $this->topicName = $this->getTopicName($this->getInput(self::PARAMETER_TOPIC)); + + switch($this->topicName) { + case self::TOPIC_MAGAZINE: { + return $this->collectMagazine(); + } break; + case self::TOPIC_LATEST_STORIES: { + return $this->collectLatestStories(); + } break; + default: { + returnServerError('Unknown topic: "' . $this->topicName . '"'); + } + } + } + + public function getName() { + switch ($this->queriedContext) { + case self::CONTEXT_BY_TOPIC: { + return static::NAME . ': ' . $this->topicName; + } break; + default: { + return parent::getName(); + } + } + } + + private function getTopicName($topic) { + return array_search($topic, static::PARAMETERS[self::CONTEXT_BY_TOPIC][self::PARAMETER_TOPIC]['values']); + } + + private function collectMagazine() { + $uri = $this->getURI(); + + $html = getSimpleHTMLDOM($uri) + or returnServerError('Could not request ' . $uri); + + $script = $html->find('#lead-component script')[0]; + + $json = json_decode($script->innertext, true); + + // This is probably going to break in the future, fix it then :) + foreach($json['body']['0']['multilayout_promo_beta']['stories'] as $story) { + $this->addStory($story); + } + } + + private function collectLatestStories() { + $uri = self::URI . 'latest-stories/_jcr_content/content/hubfeed.promo-hub-feed-all-stories.json'; + + $json_raw = getContents($uri) + or returnServerError('Could not request ' . $uri); + + foreach(json_decode($json_raw, true) as $story) { + $this->addStory($story); + } + } + + private function addStory($story) { + $title = 'Unknown title'; + $content = ''; + + foreach($story['components'] as $component) { + switch($component['content_type']) { + case 'title': { + $title = $component['title']['text']; + } break; + case 'dek': { + $content = $component['dek']['text']; + } break; + } + } + + $item = array(); + + $item['uri'] = $story['uri']; + $item['title'] = $title; + + // if full article is requested! + if ($this->getInput(self::PARAMETER_FULL_ARTICLE)) + $item['content'] = $this->getFullArticle($item['uri']); + else + $item['content'] = $content; + + if (isset($story['promo_image'])) { + switch($story['promo_image']['content_type']) { + case 'image': { + $item['enclosures'][] = $story['promo_image']['image']['uri']; + } break; + } + } + + if (isset($story['lead_media'])) { + $media = $story['lead_media']; + switch($media['content_type']) { + case 'image': { + // Don't add if promo_image was added + if (empty($item['enclosures'])) + $item['enclosures'][] = $media['image']['uri']; + } break; + case 'image_gallery': { + foreach($media['image_gallery']['images'] as $image) { + $item['enclosures'][] = $image['uri']; + } + } break; + } + } + + $this->items[] = $item; + } + + private function getFullArticle($uri) { + $html = getSimpleHTMLDOMCached($uri) + or returnServerError('Could not load ' . $uri); + + $html = defaultLinkTo($html, $uri); + + $content = ''; + + foreach($html->find(' + .content > .smartbody.text, + .content > .section.image script[type="text/json"], + .content > .section.image span[itemprop="caption"], + .content > .section.inline script[type="text/json"] + ') as $element) { + if ($element->tag === 'script') { + $json = json_decode($element->innertext, true); + if (isset($json['src'])) { + $content .= '' . $json['alt'] . ''; + } elseif (isset($json['galleryType']) && isset($json['endpoint'])) { + $doc = getContents($json['endpoint']) + or returnServerError('Could not load ' . $json['endpoint']); + $json = json_decode($doc, true); + foreach($json['items'] as $item) { + $content .= '

' . $item['caption'] . '

'; + $content .= '' . $item['caption'] . ''; + } + } + } else { + $content .= $element->outertext; + } + } + + return $content; + } +} From 1989252608c4363d3429ebdb23c0eec3fad1a6ca Mon Sep 17 00:00:00 2001 From: Joseph Date: Wed, 19 Jun 2019 20:40:56 +0000 Subject: [PATCH 036/190] [TelegramBridge] Add new bridge (#1175) --- bridges/TelegramBridge.php | 265 +++++++++++++++++++++++++++++++++++++ 1 file changed, 265 insertions(+) create mode 100644 bridges/TelegramBridge.php diff --git a/bridges/TelegramBridge.php b/bridges/TelegramBridge.php new file mode 100644 index 00000000..5fec905e --- /dev/null +++ b/bridges/TelegramBridge.php @@ -0,0 +1,265 @@ + array( + 'name' => 'Username', + 'type' => 'text', + 'exampleValue' => '@telegram', + ) + ) + ); + + const CACHE_TIMEOUT = 900; // 15 mins + + private $feedName = ''; + private $enclosures = array(); + private $itemTitle = ''; + + private $backgroundImageRegex = "/background-image:url\('(.*)'\)/"; + + public function collectData() { + + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('Could not request: ' . $this->getURI()); + + $channelTitle = htmlspecialchars_decode( + $html->find('div.tgme_channel_info_header_title span', 0)->plaintext, + ENT_QUOTES + ); + $this->feedName = $channelTitle . ' (@' . $this->processUsername() . ')'; + + foreach($html->find('div.tgme_widget_message_wrap.js-widget_message_wrap') as $index => $messageDiv) { + $this->itemTitle = ''; + $this->enclosures = array(); + $item = array(); + + $item['uri'] = $this->processUri($messageDiv); + $item['content'] = $this->processContent($messageDiv); + $item['title'] = $this->itemTitle; + $item['timestamp'] = $this->processDate($messageDiv); + $item['enclosures'] = $this->enclosures; + + $this->items[] = $item; + } + $this->items = array_reverse($this->items); + } + + public function getURI() { + + if (!is_null($this->getInput('username'))) { + return self::URI . '/s/' . $this->processUsername(); + } + + return parent::getURI(); + } + + public function getName() { + + if (!empty($this->feedName)) { + return $this->feedName . ' - Telegram'; + } + + return parent::getName(); + } + + private function processUsername() { + + if (substr($this->getInput('username'), 0, 1) === '@') { + return substr($this->getInput('username'), 1); + } + + return $this->getInput('username'); + } + + private function processUri($messageDiv) { + return $messageDiv->find('a.tgme_widget_message_date', 0)->href; + } + + private function processContent($messageDiv) { + $message = ''; + + if ($messageDiv->find('div.tgme_widget_message_forwarded_from', 0)) { + $message = $messageDiv->find('div.tgme_widget_message_forwarded_from', 0)->innertext . '

'; + } + + if ($messageDiv->find('a.tgme_widget_message_reply', 0)) { + $message = $this->processReply($messageDiv); + } + + if ($messageDiv->find('div.tgme_widget_message_sticker_wrap', 0)) { + $message .= $this->processSticker($messageDiv); + } + + if ($messageDiv->find('div.tgme_widget_message_poll', 0)) { + $message .= $this->processPoll($messageDiv); + } + + if ($messageDiv->find('video', 0)) { + $message .= $this->processVideo($messageDiv); + } + + if ($messageDiv->find('a.tgme_widget_message_photo_wrap', 0)) { + $message .= $this->processPhoto($messageDiv); + } + + if ($messageDiv->find('div.tgme_widget_message_text.js-message_text', 0)) { + $message .= $messageDiv->find('div.tgme_widget_message_text.js-message_text', 0); + + $this->itemTitle = $this->ellipsisTitle( + $messageDiv->find('div.tgme_widget_message_text.js-message_text', 0)->plaintext + ); + } + + if ($messageDiv->find('a.tgme_widget_message_link_preview', 0)) { + $message .= $this->processLinkPreview($messageDiv); + } + + return $message; + } + + private function processReply($messageDiv) { + + $reply = $messageDiv->find('a.tgme_widget_message_reply', 0); + + return <<{$reply->find('span.tgme_widget_message_author_name', 0)->plaintext}
+{$reply->find('div.tgme_widget_message_text', 0)->innertext} +{$reply->href}
+EOD; + } + + private function processSticker($messageDiv) { + + if (empty($this->itemTitle)) { + $this->itemTitle = '@' . $this->processUsername() . ' posted a sticker'; + } + + $stickerDiv = $messageDiv->find('div.tgme_widget_message_sticker_wrap', 0); + + preg_match($this->backgroundImageRegex, $stickerDiv->find('i', 0)->style, $sticker); + + $this->enclosures[] = $sticker[1]; + + return << +EOD; + } + + private function processPoll($messageDiv) { + + $poll = $messageDiv->find('div.tgme_widget_message_poll', 0); + + $title = $poll->find('div.tgme_widget_message_poll_question', 0)->plaintext; + $type = $poll->find('div.tgme_widget_message_poll_type', 0)->plaintext; + + if (empty($this->itemTitle)) { + $this->itemTitle = $title; + } + + $pollOptions = '
    '; + + foreach ($poll->find('div.tgme_widget_message_poll_option') as $option) { + $pollOptions .= '
  • ' . $option->children(0)->plaintext . ' - ' . + $option->find('div.tgme_widget_message_poll_option_text', 0)->plaintext . '
  • '; + } + $pollOptions .= '
'; + + return <<$type
{$pollOptions} +EOD; + } + + private function processLinkPreview($messageDiv) { + + $image = ''; + $title = ''; + $site = ''; + $description = ''; + + $preview = $messageDiv->find('a.tgme_widget_message_link_preview', 0); + + if (trim($preview->innertext) === '') { + return ''; + } + + if($preview->find('i', 0) && + preg_match($this->backgroundImageRegex, $preview->find('i', 0)->style, $photo)) { + + $image = ''; + $this->enclosures[] = $photo[1]; + } + + if ($preview->find('div.link_preview_title', 0)) { + $title = $preview->find('div.link_preview_title', 0)->plaintext; + } + + if ($preview->find('div.link_preview_site_name', 0)) { + $site = $preview->find('div.link_preview_site_name', 0)->plaintext; + } + + if ($preview->find('div.link_preview_description', 0)) { + $description = $preview->find('div.link_preview_description', 0)->plaintext; + } + + return <<$image
+{$title} - {$site}
{$description} +EOD; + } + + private function processVideo($messageDiv) { + + if (empty($this->itemTitle)) { + $this->itemTitle = '@' . $this->processUsername() . ' posted a video'; + } + + preg_match($this->backgroundImageRegex, $messageDiv->find('i.tgme_widget_message_video_thumb', 0)->style, $photo); + + $this->enclosures[] = $photo[1]; + + return << + + +EOD; + } + + private function processPhoto($messageDiv) { + + if (empty($this->itemTitle)) { + $this->itemTitle = '@' . $this->processUsername() . ' posted a photo'; + } + + $photos = ''; + + foreach ($messageDiv->find('a.tgme_widget_message_photo_wrap') as $photoWrap) { + preg_match($this->backgroundImageRegex, $photoWrap->style, $photo); + + $this->enclosures[] = $photo[1]; + + $photos .= <<
+EOD; + } + return $photos; + } + + private function processDate($messageDiv) { + return $messageDiv->find('time', 0)->datetime; + } + + private function ellipsisTitle($text) { + + $length = 100; + + if (strlen($text) > $length) { + $text = explode('
', wordwrap($text, $length, '
')); + return $text[0] . '...'; + } + return $text; + } +} From 7ff97c0c7b60d8273f241c851decd2dd1e8e860b Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Wed, 19 Jun 2019 23:09:08 +0200 Subject: [PATCH 037/190] [HtmlFormat] Dynamically build buttons for other feed formats Adding or removing feed formats from the "formats/" directory currently has no effect on the buttons shown in the HTML format. This can cause errors if users press one of the buttons for a format that is no longer available on the server. This commit changes the behavior to dynamically add buttons based on the available formats. Syndication feeds, however, are no longer supported as they require knowledge about the content type, which is not known without further changes to the formats API (may be added later if there is a demand). Closes #942 --- formats/HtmlFormat.php | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/formats/HtmlFormat.php b/formats/HtmlFormat.php index 052bedc5..fefabf03 100644 --- a/formats/HtmlFormat.php +++ b/formats/HtmlFormat.php @@ -4,8 +4,21 @@ class HtmlFormat extends FormatAbstract { $extraInfos = $this->getExtraInfos(); $title = htmlspecialchars($extraInfos['name']); $uri = htmlspecialchars($extraInfos['uri']); - $atomquery = str_replace('format=Html', 'format=Atom', htmlentities($_SERVER['QUERY_STRING'])); - $mrssquery = str_replace('format=Html', 'format=Mrss', htmlentities($_SERVER['QUERY_STRING'])); + + // Dynamically build buttons for all formats (except HTML) + $formatFac = new FormatFactory(); + $formatFac->setWorkingDir(PATH_LIB_FORMATS); + + $buttons = ''; + + foreach($formatFac->getFormatNames() as $format) { + if(strcasecmp($format, 'HTML') === 0) { + continue; + } + + $query = str_replace('format=Html', 'format=' . $format, htmlentities($_SERVER['QUERY_STRING'])); + $buttons .= $this->buildButton($format, $query) . PHP_EOL; + } $entries = ''; foreach($this->getItems() as $item) { @@ -84,16 +97,13 @@ EOD; {$title} - -

{$title}

{$entries} @@ -113,4 +123,10 @@ EOD; return parent::display(); } + + private function buildButton($format, $query) { + return << +EOD; + } } From 7926ffad7396fb029fe3c4c6d95f01e97921ab42 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Fri, 21 Jun 2019 00:00:44 +0200 Subject: [PATCH 038/190] [KununuBridge] Improve feed contents - Add support for ratings - Add support for benefits - Fix broken timestamp --- bridges/KununuBridge.php | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/bridges/KununuBridge.php b/bridges/KununuBridge.php index cb13ca35..7cc4af60 100644 --- a/bridges/KununuBridge.php +++ b/bridges/KununuBridge.php @@ -24,6 +24,16 @@ class KununuBridge extends BridgeAbstract { 'type' => 'checkbox', 'exampleValue' => 'checked', 'title' => 'Activate to load full article' + ), + 'include_ratings' => array( + 'name' => 'Include ratings', + 'type' => 'checkbox', + 'title' => 'Activate to include ratings in the feed' + ), + 'include_benefits' => array( + 'name' => 'Include benefits', + 'type' => 'checkbox', + 'title' => 'Activate to include benefits in the feed' ) ), array( @@ -116,7 +126,7 @@ class KununuBridge extends BridgeAbstract { $item = array(); $item['author'] = $this->extractArticleAuthorPosition($article); - $item['timestamp'] = strtotime($date); + $item['timestamp'] = strtotime($date->content); $item['title'] = $rating->getAttribute('aria-label') . ' : ' . strip_tags($summary->innertext); @@ -175,7 +185,32 @@ class KununuBridge extends BridgeAbstract { $description = $article->find('[itemprop=reviewBody]', 0) or returnServerError('Cannot find article description!'); - return $description->innertext; + $retVal = $description->innertext; + + if($this->getInput('include_ratings') + && ($ratings = $article->find('.review-ratings .rating-group'))) { + $retVal .= (empty($retVal) ? '' : '
') . ''; + foreach($ratings as $rating) { + $retVal .= << + +EOD; + } + $retVal .= '
{$rating->find('.rating-title', 0)->plaintext} + {$rating->find('.rating-badge', 0)->plaintext} +
'; + } + + if($this->getInput('include_benefits') + && ($benefits = $article->find('benefit'))) { + $retVal .= (empty($retVal) ? '' : '
') . '
    '; + foreach($benefits as $benefit) { + $retVal .= "
  • {$benefit->plaintext}
  • "; + } + $retVal .= '
'; + } + + return $retVal; } /** From e2bca5bb052420c78db01120037b06206b01412d Mon Sep 17 00:00:00 2001 From: husimo <50374438+husim0@users.noreply.github.com> Date: Fri, 21 Jun 2019 17:30:34 +0200 Subject: [PATCH 039/190] [MastodonBridge] Add new bridge (#1178) --- bridges/MastodonBridge.php | 89 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 bridges/MastodonBridge.php diff --git a/bridges/MastodonBridge.php b/bridges/MastodonBridge.php new file mode 100644 index 00000000..9e131b7d --- /dev/null +++ b/bridges/MastodonBridge.php @@ -0,0 +1,89 @@ + array( + 'name' => 'Canonical username (ex : @sebsauvage@framapiaf.org)', + 'required' => true, + ), + 'norep' => array( + 'name' => 'Without replies', + 'type' => 'checkbox', + 'title' => 'Only return initial toots' + ), + 'noboost' => array( + 'name' => 'Without boosts', + 'required' => false, + 'type' => 'checkbox', + 'title' => 'Hide boosts' + ) + )); + + public function getName(){ + switch($this->queriedContext) { + case 'By username': + return $this->getInput('canusername'); + default: return parent::getName(); + } + } + + protected function parseItem($newItem){ + $item = parent::parseItem($newItem); + + $content = str_get_html($item['content']); + $title = str_get_html($item['title']); + + $item['title'] = $content->plaintext; + + if(strlen($item['title']) > 75) { + $item['title'] = substr($item['title'], 0, strpos(wordwrap($item['title'], 75), "\n")) . '...'; + } + + if(strpos($title, 'shared a status by') !== false) { + if($this->getInput('noboost')) { + return null; + } + + preg_match('/shared a status by (\S{0,})/', $title, $matches); + $item['title'] = 'Boost ' . $matches[1] . ' ' . $item['title']; + $item['author'] = $matches[1]; + } else { + $item['author'] = $this->getInput('canusername'); + } + + // Check if it's a initial toot or a response + if($this->getInput('norep') && preg_match('/^@.+/', trim($content->plaintext))) { + return null; + } + + return $item; + } + + private function getInstance(){ + preg_match('/^@[a-zA-Z0-9_]+@(.+)/', $this->getInput('canusername'), $matches); + return $matches[1]; + } + + private function getUsername(){ + preg_match('/^@([a-zA-Z_0-9_]+)@.+/', $this->getInput('canusername'), $matches); + return $matches[1]; + } + + public function getURI(){ + if($this->getInput('canusername')) + return 'https://' . $this->getInstance() . '/users/' . $this->getUsername() . '.atom'; + + return parent::getURI(); + } + + public function collectData(){ + return $this->collectExpandableDatas($this->getURI()); + } +} From 1591e18027064d76a78fae27352b0cf9a1a50629 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Fri, 21 Jun 2019 19:08:59 +0200 Subject: [PATCH 040/190] core: Add context hinting for new feeds RSS-Bridge currently has to guess the queried context from the data provided by the user. This, however, can cause issues for bridges that have multiple contexts with conflicting parameters (i.e. none). This commit adds context hinting to queries via '&context=' which can be omitted in which case the context is determined as before. --- lib/BridgeAbstract.php | 12 ++++++++++-- lib/BridgeCard.php | 10 ++++++++-- lib/ParameterValidator.php | 1 + 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/lib/BridgeAbstract.php b/lib/BridgeAbstract.php index 13215a46..b4eb9ff5 100644 --- a/lib/BridgeAbstract.php +++ b/lib/BridgeAbstract.php @@ -194,6 +194,11 @@ abstract class BridgeAbstract implements BridgeInterface { */ public function setDatas(array $inputs){ + if(isset($inputs['context'])) { // Context hinting (optional) + $this->queriedContext = $inputs['context']; + unset($inputs['context']); + } + if(empty(static::PARAMETERS)) { if(!empty($inputs)) { @@ -218,8 +223,11 @@ abstract class BridgeAbstract implements BridgeInterface { ); } - // Guess the paramter context from input data - $this->queriedContext = $validator->getQueriedContext($inputs, static::PARAMETERS); + // Guess the context from input data + if(empty($this->queriedContext)) { + $this->queriedContext = $validator->getQueriedContext($inputs, static::PARAMETERS); + } + if(is_null($this->queriedContext)) { returnClientError('Required parameter(s) missing'); } elseif($this->queriedContext === false) { diff --git a/lib/BridgeCard.php b/lib/BridgeCard.php index 8c36919c..c6f38221 100644 --- a/lib/BridgeCard.php +++ b/lib/BridgeCard.php @@ -48,13 +48,19 @@ final class BridgeCard { * @param bool $isHttps If disabled, adds a warning to the form * @return string The form header */ - private static function getFormHeader($bridgeName, $isHttps = false) { + private static function getFormHeader($bridgeName, $isHttps = false, $parameterName = '') { $form = << EOD; + if(!empty($parameterName)) { + $form .= << +EOD; + } + if(!$isHttps) { $form .= '
Warning : This bridge is not fetching its content through a secure connection
'; @@ -80,7 +86,7 @@ This bridge is not fetching its content through a secure connection'; $isHttps = false, $parameterName = '', $parameters = array()) { - $form = self::getFormHeader($bridgeName, $isHttps); + $form = self::getFormHeader($bridgeName, $isHttps, $parameterName); if(count($parameters) > 0) { diff --git a/lib/ParameterValidator.php b/lib/ParameterValidator.php index 55e6fe4b..f740888a 100644 --- a/lib/ParameterValidator.php +++ b/lib/ParameterValidator.php @@ -214,6 +214,7 @@ class ParameterValidator { switch(array_sum($queriedContexts)) { case 0: // Found no match, is there a context without parameters? + if(isset($data['context'])) return $data['context']; foreach($queriedContexts as $context => $queried) { if(is_null($queried)) { return $context; From 372461b1a3ff21275b33ffb940679a21a4449251 Mon Sep 17 00:00:00 2001 From: triatic <42704418+triatic@users.noreply.github.com> Date: Sat, 22 Jun 2019 17:34:02 +0100 Subject: [PATCH 041/190] [TelegramBridge] Fix timestamp for videos (#1181) --- bridges/TelegramBridge.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bridges/TelegramBridge.php b/bridges/TelegramBridge.php index 5fec905e..8471cc39 100644 --- a/bridges/TelegramBridge.php +++ b/bridges/TelegramBridge.php @@ -249,7 +249,10 @@ EOD; } private function processDate($messageDiv) { - return $messageDiv->find('time', 0)->datetime; + + $messageMeta = $messageDiv->find('span.tgme_widget_message_meta', 0); + return $messageMeta->find('time', 0)->datetime; + } private function ellipsisTitle($text) { From 5e2f0fb626d4d3474943307fe25eb61b329ae498 Mon Sep 17 00:00:00 2001 From: triatic <42704418+triatic@users.noreply.github.com> Date: Sat, 22 Jun 2019 17:44:25 +0100 Subject: [PATCH 042/190] [TelegramBridge] Prevent double encoding entities (#1182) --- bridges/TelegramBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/TelegramBridge.php b/bridges/TelegramBridge.php index 8471cc39..4fea650c 100644 --- a/bridges/TelegramBridge.php +++ b/bridges/TelegramBridge.php @@ -38,8 +38,8 @@ class TelegramBridge extends BridgeAbstract { $item = array(); $item['uri'] = $this->processUri($messageDiv); - $item['content'] = $this->processContent($messageDiv); - $item['title'] = $this->itemTitle; + $item['content'] = html_entity_decode($this->processContent($messageDiv), ENT_QUOTES); + $item['title'] = html_entity_decode($this->itemTitle, ENT_QUOTES); $item['timestamp'] = $this->processDate($messageDiv); $item['enclosures'] = $this->enclosures; From 69acc6228a36d2bc0aaa87c02843844ca28f9fba Mon Sep 17 00:00:00 2001 From: triatic <42704418+triatic@users.noreply.github.com> Date: Sat, 22 Jun 2019 17:45:15 +0100 Subject: [PATCH 043/190] [TelegramBridge] Populate author (#1183) --- bridges/TelegramBridge.php | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bridges/TelegramBridge.php b/bridges/TelegramBridge.php index 4fea650c..5a9f128d 100644 --- a/bridges/TelegramBridge.php +++ b/bridges/TelegramBridge.php @@ -42,6 +42,8 @@ class TelegramBridge extends BridgeAbstract { $item['title'] = html_entity_decode($this->itemTitle, ENT_QUOTES); $item['timestamp'] = $this->processDate($messageDiv); $item['enclosures'] = $this->enclosures; + $author = trim($messageDiv->find('a.tgme_widget_message_owner_name', 0)->plaintext); + $item['author'] = html_entity_decode($author, ENT_QUOTES); $this->items[] = $item; } From 99d4571c6b2c809f483d025ebab21b8b5f7cb276 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Fri, 21 Jun 2019 21:22:21 +0200 Subject: [PATCH 044/190] core: Make RSS-Bridge more usable via mobile devices Adds styles for display sizes smaller than 768px where elements are currently hardly usable. Note that RSS-Bridge is not designed for mobile use, but some users may want to try things on their mobile phone before using it in real life applications. Resolves #796 --- formats/HtmlFormat.php | 1 + static/HtmlFormat.css | 17 ++++++++++++++ static/style.css | 52 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+) diff --git a/formats/HtmlFormat.php b/formats/HtmlFormat.php index fefabf03..687c1f4f 100644 --- a/formats/HtmlFormat.php +++ b/formats/HtmlFormat.php @@ -95,6 +95,7 @@ EOD; + {$title} diff --git a/static/HtmlFormat.css b/static/HtmlFormat.css index 5b9fd2e1..4ebc79e9 100644 --- a/static/HtmlFormat.css +++ b/static/HtmlFormat.css @@ -96,3 +96,20 @@ html, body, div, span, applet, object, iframe, h1, h2, h3, h4, h5, h6, p, blockq button:hover { background: #49afff; } + +@media screen and (max-width: 767px) { + + section { + width: 100%; + padding: 0; + + } + + button { + display: inline-block; + width: 40%; + padding: 5px auto; + margin: 3px auto 0; + } + +} diff --git a/static/style.css b/static/style.css index 4abf9f96..7fd1ee40 100644 --- a/static/style.css +++ b/static/style.css @@ -199,6 +199,7 @@ form { .parameters label { text-align: right; + line-height: 1.5em; } .parameters label::before { @@ -303,3 +304,54 @@ h5 { .advice > li { text-align: left; } + +@media screen and (max-width: 767px) { + body { + font-size: 75%; + } + + header > section.warning { + width: 90%; + } + + header > section.critical-warning { + width: 90%; + } + + .searchbar { + width: 90%; + margin: 0 auto; + } + + section { + width: 90%; + margin: 10px auto; + overflow: hidden; + } + + button { + display: inline-block; + width: 40%; + padding: 5px auto; + margin: 3px auto 0; + } + + @supports (display: grid) { + + .parameters { + grid-template-columns: auto auto; + grid-column-gap: 5px; + } + + .parameters label { + line-height: 2em; + word-break: break-word; + } + + } /* @supports (display: grid) */ + + .secure-warning { + width: 100%; + } + +} \ No newline at end of file From 89e3da0b6f64bb72929f0107e896432f4f83d4dc Mon Sep 17 00:00:00 2001 From: LogMANOriginal Date: Sat, 22 Jun 2019 18:50:06 +0200 Subject: [PATCH 045/190] [IndeedBridge] Add new bridge (#1166) Implements a bridge for https://www.indeed.com/ (or any of the local variants) Features: - Takes a company name and returns a list of reviews and comments - Limit the maximum number of items to return (default: 20) - No upper limit on the number of items to return - Search by language code (45 options) - Supports detectParameters for any supported URL --- bridges/IndeedBridge.php | 245 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 245 insertions(+) create mode 100644 bridges/IndeedBridge.php diff --git a/bridges/IndeedBridge.php b/bridges/IndeedBridge.php new file mode 100644 index 00000000..c1d0cfd3 --- /dev/null +++ b/bridges/IndeedBridge.php @@ -0,0 +1,245 @@ + array( + 'name' => 'Company', + 'type' => 'text', + 'required' => true, + 'title' => 'Company name', + 'exampleValue' => 'GitHub', + ) + ), + 'global' => array( + 'language' => array( + 'name' => 'Language Code', + 'type' => 'list', + 'title' => 'Choose your language code', + 'defaultValue' => 'en-US', + 'values' => array( + 'es-AR' => 'es-AR', + 'de-AT' => 'de-AT', + 'en-AU' => 'en-AU', + 'nl-BE' => 'nl-BE', + 'fr-BE' => 'fr-BE', + 'pt-BR' => 'pt-BR', + 'en-CA' => 'en-CA', + 'fr-CA' => 'fr-CA', + 'de-CH' => 'de-CH', + 'fr-CH' => 'fr-CH', + 'es-CL' => 'es-CL', + 'zh-CN' => 'zh-CN', + 'es-CO' => 'es-CO', + 'de-DE' => 'de-DE', + 'es-ES' => 'es-ES', + 'fr-FR' => 'fr-FR', + 'en-GB' => 'en-GB', + 'en-HK' => 'en-HK', + 'en-IE' => 'en-IE', + 'en-IN' => 'en-IN', + 'it-IT' => 'it-IT', + 'ja-JP' => 'ja-JP', + 'ko-KR' => 'ko-KR', + 'es-MX' => 'es-MX', + 'nl-NL' => 'nl-NL', + 'pl-PL' => 'pl-PL', + 'en-SG' => 'en-SG', + 'en-US' => 'en-US', + 'en-ZA' => 'en-ZA', + 'en-AE' => 'en-AE', + 'da-DK' => 'da-DK', + 'in-ID' => 'in-ID', + 'en-MY' => 'en-MY', + 'es-PE' => 'es-PE', + 'en-PH' => 'en-PH', + 'en-PK' => 'en-PK', + 'ro-RO' => 'ro-RO', + 'ru-RU' => 'ru-RU', + 'tr-TR' => 'tr-TR', + 'zh-TW' => 'zh-TW', + 'vi-VN' => 'vi-VN', + 'en-VN' => 'en-VN', + 'ar-EG' => 'ar-EG', + 'fr-MA' => 'fr-MA', + 'en-NG' => 'en-NG', + ) + ), + 'limit' => array( + 'name' => 'Limit', + 'type' => 'number', + 'title' => 'Maximum number of items to return', + 'exampleValue' => 20, + ) + ) + ); + + const SITES = array( + 'es-AR' => 'https://ar.indeed.com/', + 'de-AT' => 'https://at.indeed.com/', + 'en-AU' => 'https://au.indeed.com/', + 'nl-BE' => 'https://be.indeed.com/', + 'fr-BE' => 'https://emplois.be.indeed.com/', + 'pt-BR' => 'https://www.indeed.com.br/', + 'en-CA' => 'https://ca.indeed.com/', + 'fr-CA' => 'https://emplois.ca.indeed.com/', + 'de-CH' => 'https://www.indeed.ch/', + 'fr-CH' => 'https://emplois.indeed.ch/', + 'es-CL' => 'https://www.indeed.cl/', + 'zh-CN' => 'https://cn.indeed.com/', + 'es-CO' => 'https://co.indeed.com/', + 'de-DE' => 'https://de.indeed.com/', + 'es-ES' => 'https://www.indeed.es/', + 'fr-FR' => 'https://www.indeed.fr/', + 'en-GB' => 'https://www.indeed.co.uk/', + 'en-HK' => 'https://www.indeed.hk/', + 'en-IE' => 'https://ie.indeed.com/', + 'en-IN' => 'https://www.indeed.co.in/', + 'it-IT' => 'https://it.indeed.com/', + 'ja-JP' => 'https://jp.indeed.com/', + 'ko-KR' => 'https://kr.indeed.com/', + 'es-MX' => 'https://www.indeed.com.mx/', + 'nl-NL' => 'https://www.indeed.nl/', + 'pl-PL' => 'https://pl.indeed.com/', + 'en-SG' => 'https://www.indeed.com.sg/', + 'en-US' => 'https://www.indeed.com/', + 'en-ZA' => 'https://www.indeed.co.za/', + 'en-AE' => 'https://www.indeed.ae/', + 'da-DK' => 'https://dk.indeed.com/', + 'in-ID' => 'https://id.indeed.com/', + 'en-MY' => 'https://www.indeed.com.my/', + 'es-PE' => 'https://www.indeed.com.pe/', + 'en-PH' => 'https://www.indeed.com.ph/', + 'en-PK' => 'https://www.indeed.com.pk/', + 'ro-RO' => 'https://ro.indeed.com/', + 'ru-RU' => 'https://ru.indeed.com/', + 'tr-TR' => 'https://tr.indeed.com/', + 'zh-TW' => 'https://tw.indeed.com/', + 'vi-VN' => 'https://vn.indeed.com/', + 'en-VN' => 'https://jobs.vn.indeed.com/', + 'ar-EG' => 'https://eg.indeed.com/', + 'fr-MA' => 'https://ma.indeed.com/', + 'en-NG' => 'https://ng.indeed.com/', + ); + + private $title; + + public function collectData() { + + $url = $this->getURI(); + $limit = $this->getInput('limit') ?: 20; + + do { + + $html = getSimpleHTMLDOM($url) + or returnServerError('Could not request ' . $url); + + $html = defaultLinkTo($html, $url); + + $this->title = $html->find('h1', 0)->innertext; + + // Use local translation of the word "Rating" + $rating_local = $html->find('a[data-id="rating_desc"]', 0)->plaintext; + + foreach($html->find('#cmp-content [id^="cmp-review-"]') as $review) { + $item = array(); + + $rating = $review->find('.cmp-ratingNumber', 0)->plaintext; + $title = $review->find('.cmp-review-title > span', 0)->plaintext; + $comment = $this->beautifyComment($review->find('.cmp-review-content-container', 0)); + + $item['uri'] = $review->find('.cmp-review-share-popup-item-link--copylink', 0)->href; + $item['title'] = "{$rating_local} {$rating} / {$title}"; + $item['timestamp'] = $review->find('.cmp-review-date-created', 0)->plaintext; + $item['author'] = $review->find('.cmp-reviewer', 0)->plaintext; + $item['content'] = $comment; + //$item['enclosures'] + $item['categories'][] = $review->find('.cmp-reviewer-job-location', 0)->plaintext; + //$item['uid'] + + $this->items[] = $item; + + if(count($this->items) >= $limit) { + break; + } + } + + // Break if no more pages available. + if($next = $html->find('a[data-tn-element="next-page"]', 0)) { + $url = $next->href; + } else { + break; + } + + } while(count($this->items) < $limit); + + } + + public function getURI() { + if($this->getInput('language') + && $this->getInput('c')) { + return self::SITES[$this->getInput('language')] + . 'cmp/' + . urlencode($this->getInput('c')) + . '/reviews'; + } + + return parent::getURI(); + } + + public function getName() { + return $this->title ?: parent::getName(); + } + + public function detectParameters($url) { + /** + * Expected: https://<...>.indeed.<...>/cmp/[/reviews][/...] + * + * Note that most users will be redirected to their localized version + * of the page, which adds the language code to the host. For example, + * "en.indeed.com" or "www.indeed.fr" (see link[rel="alternate"]). At + * least each of the sites have ".indeed." in the name. + */ + + if(filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_PATH_REQUIRED) === false + || stristr($url, '.indeed.') === false) { + return null; + } + + $url_components = parse_url($url); + $path_segments = array_values(array_filter(explode('/', $url_components['path']))); + + if(count($path_segments) < 2 || $path_segments[0] !== 'cmp') { + return null; + } + + $language = array_search('https://' . $url_components['host'] . '/', self::SITES); + if($language === false) { + return null; + } + + $limit = self::PARAMETERS['global']['limit']['defaultValue'] ?: 20; + $company = $path_segments[1]; + + return array( + 'c' => $company, + 'language' => $language, + 'limit' => $limit, + ); + } + + private function beautifyComment($comment) { + foreach($comment->find('.cmp-bold') as $bold) { + $bold->tag = 'strong'; + $bold->removeClass('cmp-bold'); + } + + return $comment; + } +} From 3769850ba33c8707488800c01a0ec31e76fcb774 Mon Sep 17 00:00:00 2001 From: triatic <42704418+triatic@users.noreply.github.com> Date: Sun, 23 Jun 2019 07:54:52 +0100 Subject: [PATCH 046/190] [TelegramBridge] Fix entries for "media too big" (#1184) When a large video is posted, "Media is too big" appears in web preview. This adds code to detect this and offer a link. --- bridges/TelegramBridge.php | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/bridges/TelegramBridge.php b/bridges/TelegramBridge.php index 5a9f128d..e9f3bb08 100644 --- a/bridges/TelegramBridge.php +++ b/bridges/TelegramBridge.php @@ -108,6 +108,10 @@ class TelegramBridge extends BridgeAbstract { $message .= $this->processPhoto($messageDiv); } + if ($messageDiv->find('a.not_supported', 0)) { + $message .= $this->processNotSupported($messageDiv); + } + if ($messageDiv->find('div.tgme_widget_message_text.js-message_text', 0)) { $message .= $messageDiv->find('div.tgme_widget_message_text.js-message_text', 0); @@ -250,6 +254,24 @@ EOD; return $photos; } + private function processNotSupported($messageDiv) { + + if (empty($this->itemTitle)) { + $this->itemTitle = '@' . $this->processUsername() . ' posted a video'; + } + + preg_match($this->backgroundImageRegex, $messageDiv->find('i.tgme_widget_message_video_thumb', 0)->style, $photo); + + $this->enclosures[] = $photo[1]; + + return << +{$messageDiv->find('div.message_media_not_supported_label', 0)->innertext}

+{$messageDiv->find('span.message_media_view_in_telegram', 0)->innertext}

+ +EOD; + } + private function processDate($messageDiv) { $messageMeta = $messageDiv->find('span.tgme_widget_message_meta', 0); From e4444e6432b5222ee222648d42cc59c93e05f62d Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Fri, 21 Jun 2019 19:52:51 +0200 Subject: [PATCH 047/190] [GogsBridge] Add new bridge --- bridges/GogsBridge.php | 206 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 bridges/GogsBridge.php diff --git a/bridges/GogsBridge.php b/bridges/GogsBridge.php new file mode 100644 index 00000000..a08bcc0e --- /dev/null +++ b/bridges/GogsBridge.php @@ -0,0 +1,206 @@ + array( + 'host' => array( + 'name' => 'Host', + 'exampleValue' => 'https://gogs.io', + 'required' => true, + 'title' => 'Host name without trailing slash', + ), + 'user' => array( + 'name' => 'Username', + 'exampleValue' => 'gogs', + 'required' => true, + 'title' => 'User name as it appears in the URL', + ), + 'project' => array( + 'name' => 'Project name', + 'exampleValue' => 'gogs', + 'required' => true, + 'title' => 'Project name as it appears in the URL', + ), + ), + 'Commits' => array( + 'branch' => array( + 'name' => 'Branch name', + 'defaultValue' => 'master', + 'required' => true, + 'title' => 'Branch name as it appears in the URL', + ), + ), + 'Issues' => array( + 'include_description' => array( + 'name' => 'Include issue description', + 'type' => 'checkbox', + 'title' => 'Activate to include the issue description', + ), + ), + 'Single issue' => array( + 'issue' => array( + 'name' => 'Issue number', + 'type' => 'number', + 'exampleValue' => 102, + 'required' => true, + 'title' => 'Issue number from the issues list', + ), + ), + 'Releases' => array(), + ); + + private $title = ''; + + /** + * Note: detectParamters doesn't make sense for this bridge because there is + * no "single" host for this service. Anyone can host it. + */ + + public function getURI() { + switch($this->queriedContext) { + case 'Commits': { + return $this->getInput('host') + . '/' . $this->getInput('user') + . '/' . $this->getInput('project') + . '/commits/' . $this->getInput('branch'); + } break; + case 'Issues': { + return $this->getInput('host') + . '/' . $this->getInput('user') + . '/' . $this->getInput('project') + . '/issues/'; + } break; + case 'Single issue': { + return $this->getInput('host') + . '/' . $this->getInput('user') + . '/' . $this->getInput('project') + . '/issues/' . $this->getInput('issue'); + } break; + case 'Releases': { + return $this->getInput('host') + . '/' . $this->getInput('user') + . '/' . $this->getInput('project') + . '/releases/'; + } break; + default: return parent::getURI(); + } + } + + public function getName() { + switch($this->queriedContext) { + case 'Commits': + case 'Issues': + case 'Releases': return $this->title . ' ' . $this->queriedContext; + case 'Single issue': return $this->title . ' Issue ' . $this->getInput('issue'); + default: return parent::getName(); + } + } + + public function getIcon() { + return 'https://gogs.io/img/favicon.ico'; + } + + public function collectData() { + + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('Could not request ' . $this->getURI()); + + $html = defaultLinkTo($html, $this->getURI()); + + $this->title = $html->find('[property="og:title"]', 0)->content; + + switch($this->queriedContext) { + case 'Commits': { + $this->collectCommitsData($html); + } break; + case 'Issues': { + $this->collectIssuesData($html); + } break; + case 'Single issue': { + $this->collectSingleIssueData($html); + } break; + case 'Releases': { + $this->collectReleasesData($html); + } break; + } + + } + + protected function collectCommitsData($html) { + $commits = $html->find('#commits-table tbody tr') + or returnServerError('Unable to find commits'); + + foreach($commits as $commit) { + $this->items[] = array( + 'uri' => $commit->find('a.sha', 0)->href, + 'title' => $commit->find('.message span', 0)->plaintext, + 'author' => $commit->find('.author', 0)->plaintext, + 'timestamp' => $commit->find('.time-since', 0)->title, + 'uid' => $commit->find('.sha', 0)->plaintext, + ); + } + } + + protected function collectIssuesData($html) { + $issues = $html->find('.issue.list li') + or returnServerError('Unable to find issues'); + + foreach($issues as $issue) { + $uri = $issue->find('a', 0)->href; + + $item = array( + 'uri' => $uri, + 'title' => $issue->find('.label', 0)->plaintext . ' | ' . $issue->find('a.title', 0)->plaintext, + 'author' => $issue->find('.desc a', 0)->plaintext, + 'timestamp' => $issue->find('.time-since', 0)->title, + 'uid' => $issue->find('.label', 0)->plaintext, + ); + + if($this->getInput('include_description')) { + $issue_html = getSimpleHTMLDOMCached($uri, 3600) + or returnServerError('Unable to load issue description'); + + $issue_html = defaultLinkTo($issue_html, $uri); + + $item['content'] = $issue_html->find('.comment .markdown', 0); + } + + $this->items[] = $item; + } + } + + protected function collectSingleIssueData($html) { + $comments = $html->find('.comments .comment') + or returnServerError('Unable to find comments'); + + foreach($comments as $comment) { + $this->items[] = array( + 'uri' => $comment->find('a[href*="#issue"]', 0)->href, + 'title' => $comment->find('span', 0)->plaintext, + 'author' => $comment->find('.content a', 0)->plaintext, + 'timestamp' => $comment->find('.time-since', 0)->title, + 'content' => $comment->find('.markdown', 0), + ); + } + + $this->items = array_reverse($this->items); + } + + protected function collectReleasesData($html) { + $releases = $html->find('#release-list li') + or returnServerError('Unable to find releases'); + + foreach($releases as $release) { + $this->items[] = array( + 'uri' => $release->find('a', 0)->href, + 'title' => 'Release ' . $release->find('h4', 0)->plaintext, + ); + } + } +} From fa8253c8bfee581c866673c3eb9ffc3ad95807f1 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 23 Jun 2019 09:14:16 +0200 Subject: [PATCH 048/190] [GiteaBridge] Add new bridge Gitea is a fork of Gogs and therefore shares most of its features except for releases. --- bridges/GiteaBridge.php | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 bridges/GiteaBridge.php diff --git a/bridges/GiteaBridge.php b/bridges/GiteaBridge.php new file mode 100644 index 00000000..33247873 --- /dev/null +++ b/bridges/GiteaBridge.php @@ -0,0 +1,27 @@ +find('#release-list > li') + or returnServerError('Unable to find releases'); + + foreach($releases as $release) { + $this->items[] = array( + 'uri' => $release->find('a', 0)->href, + 'title' => 'Release ' . $release->find('h3', 0)->plaintext, + ); + } + } +} From 987f42d6d44cc18e5c33441e742d15ffff6feb72 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Tue, 25 Jun 2019 18:39:28 +0200 Subject: [PATCH 049/190] logo: Add logo to the project References #1087 --- README.md | 2 +- formats/HtmlFormat.php | 1 + lib/BridgeList.php | 4 +- static/favicon.png | Bin 0 -> 3007 bytes static/favicon.svg | 122 +++++++++++++++++++++++++++++++ static/logo.svg | 162 +++++++++++++++++++++++++++++++++++++++++ static/logo_300px.png | Bin 0 -> 11546 bytes static/logo_600px.png | Bin 0 -> 24072 bytes static/style.css | 19 +++-- 9 files changed, 299 insertions(+), 11 deletions(-) create mode 100644 static/favicon.png create mode 100644 static/favicon.svg create mode 100644 static/logo.svg create mode 100644 static/logo_300px.png create mode 100644 static/logo_600px.png diff --git a/README.md b/README.md index 865840a0..8ee20c44 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -rss-bridge +![RSS-Bridge](static/logo_600px.png) === [![LICENSE](https://img.shields.io/badge/license-UNLICENSE-blue.svg)](UNLICENSE) [![GitHub release](https://img.shields.io/github/release/rss-bridge/rss-bridge.svg?logo=github)](https://github.com/rss-bridge/rss-bridge/releases/latest) [![Debian Release](https://img.shields.io/badge/dynamic/json.svg?logo=debian&label=debian%20release&url=https%3A%2F%2Fsources.debian.org%2Fapi%2Fsrc%2Frss-bridge%2F&query=%24.versions%5B0%5D.version&colorB=blue)](https://tracker.debian.org/pkg/rss-bridge) [![Guix Release](https://img.shields.io/badge/guix%20release-unknown-blue.svg)](https://www.gnu.org/software/guix/packages/R/) [![Build Status](https://travis-ci.org/RSS-Bridge/rss-bridge.svg?branch=master)](https://travis-ci.org/RSS-Bridge/rss-bridge) [![Docker Build Status](https://img.shields.io/docker/build/rssbridge/rss-bridge.svg?logo=docker)](https://hub.docker.com/r/rssbridge/rss-bridge/) diff --git a/formats/HtmlFormat.php b/formats/HtmlFormat.php index 687c1f4f..ebb6b78e 100644 --- a/formats/HtmlFormat.php +++ b/formats/HtmlFormat.php @@ -98,6 +98,7 @@ EOD; {$title} + diff --git a/lib/BridgeList.php b/lib/BridgeList.php index 9f77f7af..dc545de9 100644 --- a/lib/BridgeList.php +++ b/lib/BridgeList.php @@ -33,6 +33,7 @@ final class BridgeList { RSS-Bridge +