From 2175a4d08bf4d0d493d733adc7cd71d589e5918d Mon Sep 17 00:00:00 2001 From: Nono Date: Fri, 10 Jan 2020 14:22:58 +0100 Subject: [PATCH 001/192] [MozillaSecurityBridge] source has been modified (#1394) adjustement following source change --- bridges/MozillaSecurityBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/MozillaSecurityBridge.php b/bridges/MozillaSecurityBridge.php index 52672f56..1e7dc316 100644 --- a/bridges/MozillaSecurityBridge.php +++ b/bridges/MozillaSecurityBridge.php @@ -15,7 +15,7 @@ class MozillaSecurityBridge extends BridgeAbstract { $html = defaultLinkTo($html, self::WEBROOT); $item = array(); - $articles = $html->find('div[itemprop="articleBody"] h2'); + $articles = $html->find('div[id="main-content"] h2'); foreach ($articles as $element) { $item['title'] = $element->innertext; From 1343dbe97ab3e6b5193f8628da3434ddf4bf5146 Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Wed, 15 Jan 2020 21:36:12 +0100 Subject: [PATCH 002/192] [index] Bump spoofed user-agent version --- index.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.php b/index.php index 666b9e45..ee37b223 100644 --- a/index.php +++ b/index.php @@ -18,7 +18,7 @@ if (isset($argv)) { } define('USER_AGENT', - 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20121202 Firefox/30.0(rss-bridge/' + 'Mozilla/5.0 (X11; Linux x86_64; rv:72.0) Gecko/20100101 Firefox/72.0(rss-bridge/' . Configuration::$VERSION . ';+' . REPOSITORY From 46b9879c084ac0defff4308c40abf7661c3f94b0 Mon Sep 17 00:00:00 2001 From: Eugene Molotov Date: Thu, 16 Jan 2020 11:00:10 +0000 Subject: [PATCH 003/192] [VkBridge] Correct post date calculating (#1417) * [VkBridge] Correct post date calculating Before this commit, post dates from december past year were calculated as december current year. --- bridges/VkBridge.php | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bridges/VkBridge.php b/bridges/VkBridge.php index 713b86f3..ea81a2b2 100644 --- a/bridges/VkBridge.php +++ b/bridges/VkBridge.php @@ -374,6 +374,8 @@ class VkBridge extends BridgeAbstract } elseif (strstr($strdate, 'yesterday ') !== false) { $time = time() - 60 * 60 * 24; $strdate = date('d-m-Y', $time) . ' ' . $strdate; + } elseif ($date['month'] && intval(date('m')) < $date['month']) { + $strdate = $strdate . ' ' . (date('Y') - 1); } else { $strdate = $strdate . ' ' . date('Y'); } From 6a90a9d33fa9868cd12db60b1370d22b38061aef Mon Sep 17 00:00:00 2001 From: Eugene Molotov Date: Fri, 31 Jan 2020 18:30:31 +0500 Subject: [PATCH 004/192] phpcs: fix new sudden violations (#1443) --- bridges/FacebookBridge.php | 1 + bridges/JustETFBridge.php | 1 + 2 files changed, 2 insertions(+) diff --git a/bridges/FacebookBridge.php b/bridges/FacebookBridge.php index 5ce67f94..13ccb27a 100644 --- a/bridges/FacebookBridge.php +++ b/bridges/FacebookBridge.php @@ -729,6 +729,7 @@ EOD; } } } + #endregion (User) } diff --git a/bridges/JustETFBridge.php b/bridges/JustETFBridge.php index 8d5b3d5a..746f1c97 100644 --- a/bridges/JustETFBridge.php +++ b/bridges/JustETFBridge.php @@ -347,5 +347,6 @@ class JustETFBridge extends BridgeAbstract { return $element->plaintext; } + #endregion } From 830f57f6074e351af36b10a79be95871d575097e Mon Sep 17 00:00:00 2001 From: Eugene Molotov Date: Fri, 31 Jan 2020 18:36:25 +0500 Subject: [PATCH 005/192] [TwitterBridge] Use IE's user-agent (#1442) Twitter will return pages with legacy design and frontend code, which bridge can deal with --- bridges/TwitterBridge.php | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index 2f5565b1..0d8b0243 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -172,11 +172,15 @@ EOD $html = ''; $page = $this->getURI(); + $header = array( + 'User-Agent: Mozilla/5.0 (Windows NT 9.0; WOW64; Trident/7.0; rv:11.0) like Gecko' + ); + if(php_sapi_name() === 'cli' && empty(ini_get('curl.cainfo'))) { $cookies = $this->getCookies($page); - $html = getSimpleHTMLDOM($page, array("Cookie: $cookies")); + $html = getSimpleHTMLDOM($page, array_merge($header, array("Cookie: $cookies"))); } else { - $html = getSimpleHTMLDOM($page, array(), array(CURLOPT_COOKIEFILE => '')); + $html = getSimpleHTMLDOM($page, $header, array(CURLOPT_COOKIEFILE => '')); } if(!$html) { From 45287e6853926aa49714ca7f2127efe4d8af8cd1 Mon Sep 17 00:00:00 2001 From: Corentin Garcia Date: Fri, 31 Jan 2020 14:51:59 +0100 Subject: [PATCH 006/192] [RainbowSixSiegeBridge] Fix bridge (#1433) --- bridges/RainbowSixSiegeBridge.php | 59 +++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/bridges/RainbowSixSiegeBridge.php b/bridges/RainbowSixSiegeBridge.php index 724edc8d..62ea482b 100644 --- a/bridges/RainbowSixSiegeBridge.php +++ b/bridges/RainbowSixSiegeBridge.php @@ -2,19 +2,18 @@ class RainbowSixSiegeBridge extends BridgeAbstract { const MAINTAINER = 'corenting'; - const NAME = 'Rainbow Six Siege Blog'; - const URI = 'https://rainbow6.ubisoft.com/siege/en-us/news/'; + const NAME = 'Rainbow Six Siege News'; + const URI = 'https://www.ubisoft.com/en-us/game/rainbow-six/siege/news-updates'; const CACHE_TIMEOUT = 7200; // 2h - const DESCRIPTION = 'Latest articles from the Rainbow Six Siege blog'; + const DESCRIPTION = 'Latest news about Rainbow Six Siege'; public function getIcon() { - return 'https://ubistatic19-a.akamaihd.net/resource/en-us/game/rainbow6/siege-v3/r6s-favicon_316592.ico'; + return 'https://static-dm.akamaized.net/siege/prod/favicon-144x144.png'; } public function collectData(){ - $dlUrl = 'https://prod-tridionservice.ubisoft.com/live/v1/News/Latest?templateId=tcm%3A152-7677'; - $dlUrl .= '8-32&pageIndex=0&pageSize=10&language=en-US&detailPageId=tcm%3A150-194572-64'; - $dlUrl .= '&keywordList=233416%2C316144%2C233418%2C233417&siteId=undefined&useSeoFriendlyUrl=true'; + $dlUrl = 'https://www.ubisoft.com/api/updates/items?categoriesFilter=all'; + $dlUrl = $dlUrl . '&limit=6&mediaFilter=all&skip=0&startIndex=undefined&locale=en-us'; $jsonString = getContents($dlUrl) or returnServerError('Error while downloading the website content'); $json = json_decode($jsonString, true); @@ -22,17 +21,47 @@ class RainbowSixSiegeBridge extends BridgeAbstract { // Start at index 2 to remove highlighted articles for($i = 0; $i < count($json); $i++) { - $jsonItem = $json[$i]['Content']; - $article = str_get_html($jsonItem); + $jsonItem = $json[$i]; + + $uri = 'https://www.ubisoft.com/en-us/game/rainbow-six/siege'; + $uri = $uri . $jsonItem['button']['buttonUrl']; + + $thumbnail = 'Thumbnail'; + $content = $thumbnail . '
' . $jsonItem['content']; + + // Markdown parsing from https://gist.github.com/jbroadway/2836900 + + // Line breaks + $content = preg_replace("/\r\n|\r|\n/", '
', $content); + + // Links + $regex = '/\[([^\[]+)\]\(([^\)]+)\)/'; + $replacement = '\1'; + $content = preg_replace($regex, $replacement, $content); + + // Bold text + $regex = '/(\*\*|__)(.*?)\1/'; + $replacement = '\2'; + $content = preg_replace($regex, $replacement, $content); + + // Lists + $regex = '/\n\s*[\*|\-](.*)/'; + $content = preg_replace_callback($regex, function($regs) { + $item = $regs[1]; + return sprintf ('
  • %s
', trim ($item)); + }, $content); + + // Italic text + $regex = '/(\*\*|\*)(.*?)\1/'; + $replacement = '\2'; + $content = preg_replace($regex, $replacement, $content); $item = array(); - - $uri = $article->find('h3 a', 0)->href; - $uri = 'https://rainbow6.ubisoft.com' . $uri; $item['uri'] = $uri; - $item['title'] = $article->find('h3', 0)->plaintext; - $item['content'] = $article->find('img', 0)->outertext . '
' . $article->find('strong', 0)->plaintext; - $item['timestamp'] = strtotime($article->find('p.news_date', 0)->plaintext); + $item['id'] = $jsonItem['id']; + $item['title'] = $jsonItem['title']; + $item['content'] = $content; + $item['timestamp'] = strtotime($jsonItem['date']); $this->items[] = $item; } From 2450f8082370c65519a05d82b41dca96bcac2aff Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Fri, 31 Jan 2020 15:00:17 +0100 Subject: [PATCH 007/192] [ExtremeDownloadBridge] Update URL (#1429) Website URL has changed again ! --- bridges/ExtremeDownloadBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/ExtremeDownloadBridge.php b/bridges/ExtremeDownloadBridge.php index 1b4aa9a9..bca3997a 100644 --- a/bridges/ExtremeDownloadBridge.php +++ b/bridges/ExtremeDownloadBridge.php @@ -1,7 +1,7 @@ Date: Fri, 31 Jan 2020 15:01:46 +0100 Subject: [PATCH 008/192] Update GithubSearchBridge.php (#1431) Fixes #1430 --- bridges/GithubSearchBridge.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bridges/GithubSearchBridge.php b/bridges/GithubSearchBridge.php index fd90934c..9c1face4 100644 --- a/bridges/GithubSearchBridge.php +++ b/bridges/GithubSearchBridge.php @@ -27,16 +27,16 @@ class GithubSearchBridge extends BridgeAbstract { foreach($html->find('li.repo-list-item') as $element) { $item = array(); - $uri = $element->find('h3 a', 0)->href; + $uri = $element->find('.f4 a', 0)->href; $uri = substr(self::URI, 0, -1) . $uri; $item['uri'] = $uri; - $title = $element->find('h3', 0)->plaintext; + $title = $element->find('.f4', 0)->plaintext; $item['title'] = $title; // Description - if (count($element->find('p.d-inline-block')) != 0) { - $content = $element->find('p.d-inline-block', 0)->innertext; + if (count($element->find('p.mb-1')) != 0) { + $content = $element->find('p.mb-1', 0)->innertext; } else{ $content = 'No description'; } From 275662b8d455399e33b651d9d77115ab8f0edf55 Mon Sep 17 00:00:00 2001 From: somini Date: Tue, 4 Feb 2020 16:19:39 +0000 Subject: [PATCH 009/192] [FolhaDeSaoPaulo]: Add new Bridge (#1426) * [FolhaDeSaoPaulo]: Add new Bridge --- bridges/FolhaDeSaoPauloBridge.php | 51 +++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 bridges/FolhaDeSaoPauloBridge.php diff --git a/bridges/FolhaDeSaoPauloBridge.php b/bridges/FolhaDeSaoPauloBridge.php new file mode 100644 index 00000000..acd8d259 --- /dev/null +++ b/bridges/FolhaDeSaoPauloBridge.php @@ -0,0 +1,51 @@ + array( + 'name' => 'Feed sub-URL', + 'type' => 'text', + 'title' => 'Select the sub-feed (see https://www1.folha.uol.com.br/feed/)', + 'exampleValue' => 'emcimadahora/rss091.xml', + ) + ) + ); + + protected function parseItem($item){ + $item = parent::parseItem($item); + + $articleHTMLContent = getSimpleHTMLDOMCached($item['uri']); + if($articleHTMLContent) { + foreach ($articleHTMLContent->find('div.c-news__body .is-hidden') as $toRemove) { + $toRemove->innertext = ''; + } + $item_content = $articleHTMLContent->find('div.c-news__body', 0); + if ($item_content) { + $text = $item_content->innertext; + $text = strip_tags($text, '

'); + $item['content'] = $text; + } + } else { + Debug::log('???: ' . $item['uri']); + } + + return $item; + } + + public function collectData(){ + $feed_input = $this->getInput('feed'); + if (substr($feed_input, 0, strlen(self::URI)) === self::URI) { + Debug::log('Input:: ' . $feed_input); + $feed_url = $feed_input; + } else { + /* TODO: prepend `/` if missing */ + $feed_url = self::URI . '/' . $this->getInput('feed'); + } + Debug::log('URL: ' . $feed_url); + $this->collectExpandableDatas($feed_url); + } +} From 182e9e7b419f89b5bd2c4a8d9629aa737563287c Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Tue, 4 Feb 2020 17:21:02 +0100 Subject: [PATCH 010/192] [ZoneTelechargement] Update URL (#1425) Website changed again his URL --- bridges/ZoneTelechargementBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/ZoneTelechargementBridge.php b/bridges/ZoneTelechargementBridge.php index ab7b947f..79723fcb 100644 --- a/bridges/ZoneTelechargementBridge.php +++ b/bridges/ZoneTelechargementBridge.php @@ -8,7 +8,7 @@ class ZoneTelechargementBridge extends BridgeAbstract { */ const NAME = 'Zone Telechargement'; - const URI = 'https://www.zone-telechargement.net/'; + const URI = 'https://www.zone-annuaire.com/'; const DESCRIPTION = 'Suivi de série sur Zone Telechargement'; const MAINTAINER = 'sysadminstory'; const PARAMETERS = array( @@ -17,7 +17,7 @@ class ZoneTelechargementBridge extends BridgeAbstract { 'name' => 'URL de la série', 'type' => 'text', 'required' => true, - 'title' => 'URL d\'une série sans le https://wwv.zone-telechargement.net/', + 'title' => 'URL d\'une série sans le https://www.zone-annuaire.com/', 'exampleValue' => 'telecharger-series/31079-halt-and-catch-fire-saison-4-french-hd720p.html' ) ) From f040e4dc9c100b3f4b9598b0a6e89c111fe38460 Mon Sep 17 00:00:00 2001 From: floviolleau Date: Tue, 4 Feb 2020 17:22:42 +0100 Subject: [PATCH 011/192] [AtmoNouvelleAquitaine] Change description (#1423) * [AtmoNouvelleAquitaine] Change description --- bridges/AtmoNouvelleAquitaineBridge.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bridges/AtmoNouvelleAquitaineBridge.php b/bridges/AtmoNouvelleAquitaineBridge.php index d395fa78..7766bd9d 100644 --- a/bridges/AtmoNouvelleAquitaineBridge.php +++ b/bridges/AtmoNouvelleAquitaineBridge.php @@ -2,8 +2,8 @@ class AtmoNouvelleAquitaineBridge extends BridgeAbstract { const NAME = 'Atmo Nouvelle Aquitaine'; - const URI = 'https://www.atmo-nouvelleaquitaine.org/monair/commune/'; - const DESCRIPTION = 'Fetches the latest air polution of Bordeaux from Atmo Nouvelle Aquitaine'; + const URI = 'https://www.atmo-nouvelleaquitaine.org'; + const DESCRIPTION = 'Fetches the latest air polution of cities in Nouvelle Aquitaine from Atmo'; const MAINTAINER = 'floviolleau'; const PARAMETERS = array(array( 'cities' => array( @@ -27,7 +27,7 @@ class AtmoNouvelleAquitaineBridge extends BridgeAbstract { } public function collectData() { - $uri = self::URI . $this->getInput('cities'); + $uri = self::URI . '/monair/commune/' . $this->getInput('cities'); $html = getSimpleHTMLDOM($uri) or returnServerError('Could not request ' . $uri); From a00e75b71c671ecc8740527e8aa5f12cc677bdac Mon Sep 17 00:00:00 2001 From: floviolleau Date: Tue, 4 Feb 2020 17:24:42 +0100 Subject: [PATCH 012/192] [AtmoOccitanieBridge] Add new bridge for air quality in cities in Occitanie (#1422) * Add new bridge for Air Quality in cities supported by Atmo Occitanie --- bridges/AtmoOccitanieBridge.php | 58 +++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 bridges/AtmoOccitanieBridge.php diff --git a/bridges/AtmoOccitanieBridge.php b/bridges/AtmoOccitanieBridge.php new file mode 100644 index 00000000..24f63832 --- /dev/null +++ b/bridges/AtmoOccitanieBridge.php @@ -0,0 +1,58 @@ + array( + 'name' => 'Ville', + 'required' => true + ) + )); + const CACHE_TIMEOUT = 7200; + + public function collectData() { + $uri = self::URI . $this->getInput('city'); + + $html = getSimpleHTMLDOM($uri) + or returnServerError('Could not request ' . $uri); + + $generalMessage = $html->find('.landing-ville .city-banner .iqa-avertissement', 0)->innertext; + $recommendationsDom = $html->find('.landing-ville .recommandations', 0); + $recommendationsItemDom = $recommendationsDom->find('.recommandation-item .label'); + + $recommendationsMessage = ''; + + $i = 0; + $len = count($recommendationsItemDom); + foreach ($recommendationsItemDom as $key => $value) { + if ($i == 0) { + $recommendationsMessage .= trim($value->innertext) . '.'; + } else { + $recommendationsMessage .= ' ' . trim($value->innertext) . '.'; + } + $i++; + } + + $lastRecommendationsDom = $recommendationsDom->find('.col-md-6', -1); + $informationHeaderMessage = $lastRecommendationsDom->find('.heading', 0)->innertext; + $indice = $lastRecommendationsDom->find('.current-indice .indice div', 0)->innertext; + $informationDescriptionMessage = $lastRecommendationsDom->find('.current-indice .description p', 0)->innertext; + + $message = "$generalMessage L'indice est de $indice/10. $informationDescriptionMessage. $recommendationsMessage"; + $city = $this->getInput('city'); + + $item['uri'] = $uri; + $today = date('d/m/Y'); + $item['title'] = "Bulletin de l'air du $today pour la ville : $city."; + //$item['title'] .= ' Retrouvez plus d\'informations en allant sur atmo-occitanie.org #QualiteAir. ' . $message; + $item['title'] .= ' #QualiteAir. ' . $message; + $item['author'] = 'floviolleau'; + $item['content'] = $message; + $item['uid'] = hash('sha256', $item['title']); + + $this->items[] = $item; + } +} From 00dbde2c2449fcd9017333174d62cbc5d33a8fca Mon Sep 17 00:00:00 2001 From: Anchit Bajaj Date: Tue, 4 Feb 2020 21:55:56 +0530 Subject: [PATCH 013/192] [IGNBridge] Removed Ugly Nonworking Widgets (#1413) --- bridges/IGNBridge.php | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/bridges/IGNBridge.php b/bridges/IGNBridge.php index 6a254b37..ef5088f2 100644 --- a/bridges/IGNBridge.php +++ b/bridges/IGNBridge.php @@ -19,6 +19,27 @@ class IGNBridge extends FeedExpander { // $articlePage gets the entire page's contents $articlePage = getSimpleHTMLDOM($newsItem->link); + // List of BS elements + $uselessElements = array( + '.wiki-page-tools', + '.feedback-container', + '.paging-container', + '.dropdown-wrapper', + '.mw-editsection', + '.jsx-4115608983', + '.jsx-4213937408', + '.commerce-container', + '.widget-container', + '.newsletter-signup-button' + ); + + // Remove useless elements + foreach($uselessElements as $uslElement) { + foreach($articlePage->find($uslElement) as $jsWidget) { + $jsWidget->remove(); + } + } + /* * NOTE: Though articles and wiki/howtos have seperate styles of pages, there is no mechanism * for handling them seperately as it just ignores the DOM querys which it does not find. @@ -33,19 +54,8 @@ class IGNBridge extends FeedExpander { } // For Wikis and HowTos - $uselessWikiElements = array( - '.wiki-page-tools', - '.feedback-container', - '.paging-container' - ); foreach($articlePage->find('.wiki-page') as $wikiContents) { - $copy = clone $wikiContents; - // Remove useless elements present in IGN wiki/howtos - foreach($uselessWikiElements as $uslElement) { - $toRemove = $wikiContents->find($uslElement, 0); - $copy = str_replace($toRemove, '', $copy); - } - $article = $article . $copy; + $article = $article . $wikiContents; } // Add content to feed From 5bd07723ad75cba0437a9dbaa4cf429cd93e8bff Mon Sep 17 00:00:00 2001 From: Joseph Date: Tue, 4 Feb 2020 16:26:34 +0000 Subject: [PATCH 014/192] [ScribdBridge] Add bridge (#1391) --- bridges/ScribdBridge.php | 83 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 bridges/ScribdBridge.php diff --git a/bridges/ScribdBridge.php b/bridges/ScribdBridge.php new file mode 100644 index 00000000..3cb4199a --- /dev/null +++ b/bridges/ScribdBridge.php @@ -0,0 +1,83 @@ + array( + 'name' => 'Profile URL', + 'type' => 'text', + 'required' => true, + 'title' => 'Profile URL. Example: https://www.scribd.com/user/489040929/number10leaks-com', + 'exampleValue' => 'https://www.scribd.com/user/' + ), + )); + + const CACHE_TIMEOUT = 3600; + + private $profileUrlRegex = '/scribd\.com\/(user\/[0-9]+\/[\w-]+)\/?/'; + private $feedName = ''; + + public function collectData() { + + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('Could not request: ' . $this->getURI()); + + $header = $html->find('div.header', 0); + $this->feedName = $header->find('a', 0)->plaintext; + + foreach($html->find('div.content ul li') as $index => $li) { + $item = array(); + + $item['title'] = $li->find('div.under_title', 0)->plaintext; + $item['uri'] = $li->find('a', 0)->href; + $item['author'] = $li->find('span.uploader', 0)->plaintext; + //$item['timestamp'] = + $item['uid'] = $li->find('a', 0)->href; + + $pageHtml = getSimpleHTMLDOMCached($item['uri'], 3600) + or returnServerError('Could not request: ' . $item['uri']); + + $image = $pageHtml->find('meta[property="og:image"]', 0)->content; + $description = $pageHtml->find('meta[property="og:description"]', 0)->content; + + foreach ($pageHtml->find('ul.interest_pills li') as $pills) { + $item['categories'][] = $pills->plaintext; + } + + $item['content'] = <<{$description}

+EOD; + + $item['enclosures'][] = $image; + + $this->items[] = $item; + + if (count($this->items) >= 15) { + break; + } + } + } + + public function getName() { + + if ($this->feedName) { + return $this->feedName . ' - Scribd'; + } + + return parent::getName(); + } + + public function getURI() { + + if (!is_null($this->getInput('profile'))) { + preg_match($this->profileUrlRegex, $this->getInput('profile'), $user) + or returnServerError('Could not extract user ID and name from given profile URL.'); + + return self::URI . '/' . $user[1] . '/uploads'; + } + + return parent::getURI(); + } +} From e5303efba301d26bfec9647488291e8ed60ff4b7 Mon Sep 17 00:00:00 2001 From: 86423355844265459587182778 <47747986+86423355844265459587182778@users.noreply.github.com> Date: Fri, 7 Feb 2020 15:16:55 +0000 Subject: [PATCH 015/192] [SoundcloudBridge] Fix returned URL and title (#1449) --- bridges/SoundcloudBridge.php | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bridges/SoundcloudBridge.php b/bridges/SoundcloudBridge.php index 9607d33d..99a21174 100644 --- a/bridges/SoundcloudBridge.php +++ b/bridges/SoundcloudBridge.php @@ -59,9 +59,13 @@ class SoundCloudBridge extends BridgeAbstract { return parent::getIcon(); } + public function getURI(){ + return 'https://soundcloud.com/' . $this->getInput('u'); + } + public function getName(){ if(!is_null($this->getInput('u'))) { - return self::NAME . ' - ' . $this->getInput('u'); + return $this->getInput('u') . ' - ' . self::NAME; } return parent::getName(); From 1ab7e493a87dc94d07ac5b5861b85cde319d8fd2 Mon Sep 17 00:00:00 2001 From: Binnette Date: Mon, 10 Feb 2020 16:56:40 +0100 Subject: [PATCH 016/192] [DonnonsBridge] Add a new bridge (#1441) --- bridges/DonnonsBridge.php | 123 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 bridges/DonnonsBridge.php diff --git a/bridges/DonnonsBridge.php b/bridges/DonnonsBridge.php new file mode 100644 index 00000000..77413499 --- /dev/null +++ b/bridges/DonnonsBridge.php @@ -0,0 +1,123 @@ + array( + 'name' => 'Url de recherche', + 'required' => true, + 'exampleValue' => '/Sport/Ile-de-France', + 'pattern' => '\/.*', + 'title' => 'Faites une recherche sur le site. Puis copiez ici la fin de l’url. Doit commencer par /', + ), + 'p' => array( + 'name' => 'Nombre de pages à scanner', + 'type' => 'number', + 'defaultValue' => 5, + 'title' => 'Indique le nombre de pages de donnons.org qui seront scannées' + ) + ) + ); + + public function collectData() { + $pages = $this->getInput('p'); + + for($i = 1; $i <= $pages; $i++) { + $this->collectDataByPage($i); + } + } + + private function collectDataByPage($page) { + $uri = $this->getPageURI($page); + + $html = getSimpleHTMLDOM($uri) + or returnServerError('No results for this query.'); + + $searchDiv = $html->find('div[id=search]', 0); + + if(!is_null($searchDiv)) { + $elements = $searchDiv->find('a.lst-annonce'); + foreach($elements as $element) { + $item = array(); + + // Lien vers le don + $item['uri'] = self::URI . $element->href; + // Id de l'objet + $item['uid'] = $element->getAttribute('data-id'); + + // Grab info from json + $jsonString = $element->find('script', 0)->innertext; + $json = json_decode($jsonString, true); + + $name = $json['name']; + $category = $json['category']; + $date = $json['availabilityStarts']; + $description = $json['description']; + $city = $json['availableAtOrFrom']['address']['addressLocality']; + $region = $json['availableAtOrFrom']['address']['addressRegion']; + + // Grab info from HTML + $imageSrc = $element->find('img.ima-center', 0)->getAttribute('data-src'); + $image = self::URI . $imageSrc; + $author = $element->find('div.avatar-holder', 0)->plaintext; + + $content = ' + +
+

' . $name . '

+

' . $description . '

+

Lieu : ' . $city . ' - ' . $region . '

+

Par : ' . $author . '

+

Date : ' . $date . '

+
+ '; + + // Titre du don + $item['title'] = '[' . $category . '] ' . $name; + $item['timestamp'] = $date; + $item['author'] = $author; + $item['content'] = $content; + $item['enclosures'] = array($image); + + $this->items[] = $item; + } + } + } + + private function getPageURI($page) { + $uri = $this->getURI(); + $haveQueryParams = strpos($uri, '?') !== false; + + if($haveQueryParams) { + return $uri . '&page=' . $page; + } else { + return $uri . '?page=' . $page; + } + } + + public function getURI() { + if(!is_null($this->getInput('q'))) { + return self::URI . $this->getInput('q'); + } + + return parent::getURI(); + } + + public function getName() { + if(!is_null($this->getInput('q'))) { + return 'Donnons.org - ' . $this->getInput('q'); + } + + return parent::getName(); + } +} From 8697e1e1a26c5b8c1dca284bd3b46b517063a9f2 Mon Sep 17 00:00:00 2001 From: Tyler Kenney Date: Mon, 10 Feb 2020 10:57:08 -0500 Subject: [PATCH 017/192] [RoosterTeethBridge] Add a new bridge (#1450) * Added RoosterTeethBridge --- bridges/RoosterTeethBridge.php | 107 +++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 bridges/RoosterTeethBridge.php diff --git a/bridges/RoosterTeethBridge.php b/bridges/RoosterTeethBridge.php new file mode 100644 index 00000000..496c7de8 --- /dev/null +++ b/bridges/RoosterTeethBridge.php @@ -0,0 +1,107 @@ + array( + 'channel' => array( + 'type' => 'list', + 'name' => 'Channel', + 'title' => 'Select a channel to filter by', + 'values' => array( + 'All channels' => 'all', + 'Achievement Hunter' => 'achievement-hunter', + 'Cow Chop' => 'cow-chop', + 'Death Battle' => 'death-battle', + 'Funhaus' => 'funhaus', + 'Inside Gaming' => 'inside-gaming', + 'JT Music' => 'jt-music', + 'Kinda Funny' => 'kinda-funny', + 'Rooster Teeth' => 'rooster-teeth', + 'Sugar Pine 7' => 'sugar-pine-7' + ) + ), + 'sort' => array( + 'type' => 'list', + 'name' => 'Sort', + 'title' => 'Select a sort order', + 'values' => array( + 'Newest -> Oldest' => 'desc', + 'Oldest -> Newest' => 'asc' + ), + 'defaultValue' => 'desc' + ), + 'first' => array( + 'type' => 'list', + 'name' => 'RoosterTeeth First', + 'title' => 'Select whether to include "First" videos before they are public', + 'values' => array( + 'True' => true, + 'False' => false + ) + ), + 'limit' => array( + 'name' => 'Limit', + 'type' => 'number', + 'required' => false, + 'title' => 'Maximum number of items to return', + 'defaultValue' => 10 + ) + ) + ); + + public function collectData() { + if ($this->getInput('channel') !== 'all') { + $uri = self::API + . 'api/v1/episodes?per_page=' + . $this->getInput('limit') + . '&channel_id=' + . $this->getInput('channel') + . '&order=' . $this->getInput('sort') + . '&page=1'; + + $htmlJSON = getSimpleHTMLDOM($uri) + or returnServerError('Could not contact Rooster Teeth: ' . $uri); + } else { + $uri = self::API + . '/api/v1/episodes?per_page=' + . $this->getInput('limit') + . '&filter=all&order=' + . $this->getInput('sort') + . '&page=1'; + + $htmlJSON = getSimpleHTMLDOM($uri) + or returnServerError('Could not contact Rooster Teeth: ' . $uri); + } + + $htmlArray = json_decode($htmlJSON, true); + + foreach($htmlArray['data'] as $key => $value) { + $item = array(); + + if (!$this->getInput('first') && $value['attributes']['is_sponsors_only']) { + continue; + } + + $publicDate = date_create($value['attributes']['member_golive_at']); + $dateDiff = date_diff($publicDate, date_create(), false); + + if (!$this->getInput('first') && $dateDiff->invert == 1) { + continue; + } + + $item['uri'] = self::URI . $value['canonical_links']['self']; + $item['title'] = $value['attributes']['title']; + $item['timestamp'] = $value['attributes']['member_golive_at']; + $item['author'] = $value['attributes']['show_title']; + + $this->items[] = $item; + } + } +} From 480694e819e8a14178da00c355b5fbff75879879 Mon Sep 17 00:00:00 2001 From: Mitsukarenai Date: Sat, 15 Feb 2020 00:03:29 +0100 Subject: [PATCH 018/192] [PornhubBridge] Add bridge --- bridges/PornhubBridge.php | 99 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 bridges/PornhubBridge.php diff --git a/bridges/PornhubBridge.php b/bridges/PornhubBridge.php new file mode 100644 index 00000000..c8e93084 --- /dev/null +++ b/bridges/PornhubBridge.php @@ -0,0 +1,99 @@ + array( + 'name' => 'User name', + 'required' => true, + ), + 'type' => array( + 'name' => 'User type', + 'type' => 'list', + 'values' => array( + 'user' => 'users', + 'model' => 'model', + 'pornstar' => 'pornstar', + ), + 'defaultValue' => 'users', + ), + 'sort' => array( + 'name' => 'Sort by', + 'type' => 'list', + 'values' => array( + 'Most recent' => '', + 'Most views' => '?o=mv', + 'Top rated' => '?o=tr', + 'Longest' => '?o=lg', + ), + 'defaultValue' => '', + ), + 'show_images' => array( + 'name' => 'Show thumbnails', + 'type' => 'checkbox', + ), + )); + + public function getName(){ + if(!is_null($this->getInput('type')) && !is_null($this->getInput('q'))) { + return 'PornHub ' . $this->getInput('type') . ':' . $this->getInput('q'); + } + + return parent::getName(); + } + + public function collectData() { + + $uri = 'https://www.pornhub.com/' . $this->getInput('type') . '/'; + switch($this->getInput('type')) { // select proper permalink format per user type... + case 'model': + $uri .= urlencode($this->getInput('q')) . '/videos' . $this->getInput('sort'); break; + case 'users': + $uri .= urlencode($this->getInput('q')) . '/videos/public' . $this->getInput('sort'); break; + case 'pornstar': + $uri .= urlencode($this->getInput('q')) . '/videos/upload' . $this->getInput('sort'); break; + } + + $show_images = $this->getInput('show_images'); + + $html = getSimpleHTMLDOM($uri) + or returnServerError('Could not request PornHub.'); + + foreach($html->find('div.videoUList ul.videos li.videoblock') as $element) { + + $item = array(); + + $item['author'] = $this->getInput('q'); + + // Title + $title = $element->find('a', 0)->getAttribute('title'); + if (is_null($title)) { + continue; + } + $item['title'] = $title; + + // Url + $url = $element->find('a', 0)->href; + $item['uri'] = 'https://www.pornhub.com' . $url; + + // Content + $image = $element->find('img', 0)->getAttribute('data-src'); + if($show_images == TRUE) { + $item['content'] = '
'; + } + + // date hack, guess upload YYYYMMDD from thumbnail URL (format: https://ci.phncdn.com/videos/201907/25/--- ) + $uploaded = explode('/', $image); + $uploaded = strtotime($uploaded[4].$uploaded[5]); + $item['timestamp'] = $uploaded; + + $this->items[] = $item; + } + } +} From fe83d763a37afdb55a1bc64b937b558b44ccd916 Mon Sep 17 00:00:00 2001 From: Joseph Date: Wed, 26 Feb 2020 20:34:46 +0000 Subject: [PATCH 019/192] [PornhubBridge] Fix travis issues (#1471) * [PornhubBridge] Fix travis issues --- bridges/PornhubBridge.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bridges/PornhubBridge.php b/bridges/PornhubBridge.php index c8e93084..b8da99a5 100644 --- a/bridges/PornhubBridge.php +++ b/bridges/PornhubBridge.php @@ -27,12 +27,12 @@ class PornhubBridge extends BridgeAbstract { 'name' => 'Sort by', 'type' => 'list', 'values' => array( - 'Most recent' => '', + 'Most recent' => '?', 'Most views' => '?o=mv', 'Top rated' => '?o=tr', 'Longest' => '?o=lg', ), - 'defaultValue' => '', + 'defaultValue' => '?', ), 'show_images' => array( 'name' => 'Show thumbnails', @@ -52,7 +52,7 @@ class PornhubBridge extends BridgeAbstract { $uri = 'https://www.pornhub.com/' . $this->getInput('type') . '/'; switch($this->getInput('type')) { // select proper permalink format per user type... - case 'model': + case 'model': $uri .= urlencode($this->getInput('q')) . '/videos' . $this->getInput('sort'); break; case 'users': $uri .= urlencode($this->getInput('q')) . '/videos/public' . $this->getInput('sort'); break; @@ -84,13 +84,13 @@ class PornhubBridge extends BridgeAbstract { // Content $image = $element->find('img', 0)->getAttribute('data-src'); - if($show_images == TRUE) { - $item['content'] = ''; + if($show_images === true) { + $item['content'] = ''; } // date hack, guess upload YYYYMMDD from thumbnail URL (format: https://ci.phncdn.com/videos/201907/25/--- ) $uploaded = explode('/', $image); - $uploaded = strtotime($uploaded[4].$uploaded[5]); + $uploaded = strtotime($uploaded[4] . $uploaded[5]); $item['timestamp'] = $uploaded; $this->items[] = $item; From 1584636e5b85a969996f3414429891724dd5e112 Mon Sep 17 00:00:00 2001 From: somini Date: Wed, 26 Feb 2020 20:50:25 +0000 Subject: [PATCH 020/192] TinyLetter: New Bridge (#1469) * TinyLetter: New Bridge --- bridges/TinyLetterBridge.php | 54 ++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 bridges/TinyLetterBridge.php diff --git a/bridges/TinyLetterBridge.php b/bridges/TinyLetterBridge.php new file mode 100644 index 00000000..e9860b54 --- /dev/null +++ b/bridges/TinyLetterBridge.php @@ -0,0 +1,54 @@ + array( + 'name' => 'User Name', + 'exampleValue' => 'forwards', + ) + ) + ); + + public function getName() { + $username = $this->getInput('username'); + if (!is_null($username)) { + return static::NAME . ' | ' . $username; + } + + return parent::getName(); + } + + public function getURI() { + $username = $this->getInput('username'); + if (!is_null($username)) { + return static::URI . urlencode($username); + } + + return parent::getURI(); + } + + public function collectData() { + $archives = self::getURI() . '/archive'; + $html = getSimpleHTMLDOMCached($archives) + or returnServerError('Could not load content'); + + foreach($html->find('.message-list li') as $element) { + $item = array(); + + $snippet = $element->find('p.message-snippet', 0); + $link = $element->find('.message-link', 0); + + $item['title'] = $link->plaintext; + $item['content'] = $snippet->innertext; + $item['uri'] = $link->href; + $item['timestamp'] = strtotime($element->find('.message-date', 0)->plaintext); + + $this->items[] = $item; + } + + } +} From a54eb88ee1fe164a5ab19dae52cad20e1aa2d2db Mon Sep 17 00:00:00 2001 From: Joseph Date: Wed, 26 Feb 2020 20:56:03 +0000 Subject: [PATCH 021/192] [DevToBridge] Fix bridge & add getName() (#1470) --- bridges/DevToBridge.php | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/bridges/DevToBridge.php b/bridges/DevToBridge.php index 868ac976..c298d462 100644 --- a/bridges/DevToBridge.php +++ b/bridges/DevToBridge.php @@ -51,15 +51,10 @@ apple-icon-5c6fa9f2bce280428589c6195b7f1924206a53b782b371cfe2d02da932c8c173.png' $html = defaultLinkTo($html, static::URI); - $articles = $html->find('div[class="single-article"]') + $articles = $html->find('div.single-article') or returnServerError('Could not find articles!'); foreach($articles as $article) { - - if($article->find('[class*="cta"]', 0)) { // Skip ads - continue; - } - $item = array(); $item['uri'] = $article->find('a[id*=article-link]', 0)->href; @@ -92,6 +87,14 @@ EOD; } + public function getName() { + if (!is_null($this->getInput('tag'))) { + return ucfirst($this->getInput('tag')) . ' - dev.to'; + } + + return parent::getName(); + } + private function getFullArticle($url) { $html = getSimpleHTMLDOMCached($url) or returnServerError('Unable to load article from "' . $url . '"!'); From e102353ab8c5b1c725b68dbf1854762d508ccc94 Mon Sep 17 00:00:00 2001 From: "St. John Johnson" Date: Wed, 26 Feb 2020 12:56:52 -0800 Subject: [PATCH 022/192] [GoComics] Update to new website structure (#1464) GoComics.com has updated their website. The image location is now a data attribute in a div. --- bridges/GoComicsBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/GoComicsBridge.php b/bridges/GoComicsBridge.php index 3223d19a..7512d84a 100644 --- a/bridges/GoComicsBridge.php +++ b/bridges/GoComicsBridge.php @@ -28,7 +28,7 @@ class GoComicsBridge extends BridgeAbstract { $page = getSimpleHTMLDOM($link) or returnServerError('Could not request GoComics: ' . $link); - $imagelink = $page->find('.img-fluid', 1)->src; + $imagelink = $page->find('.comic.container', 0)->getAttribute('data-image'); $date = explode('/', $link); $item['id'] = $imagelink; From a3b4bd2d086997630d0af6b415e091c1ed5f9ac3 Mon Sep 17 00:00:00 2001 From: John Corser Date: Wed, 26 Feb 2020 16:05:55 -0500 Subject: [PATCH 023/192] [DaveRamseyBlogBridge] Add new bridge (#1459) --- bridges/DaveRamseyBlogBridge.php | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 bridges/DaveRamseyBlogBridge.php diff --git a/bridges/DaveRamseyBlogBridge.php b/bridges/DaveRamseyBlogBridge.php new file mode 100644 index 00000000..34c90446 --- /dev/null +++ b/bridges/DaveRamseyBlogBridge.php @@ -0,0 +1,24 @@ +find('.Post') as $element) { + $this->items[] = array( + 'uri' => 'https://www.daveramsey.com' . $element->find('header > a', 0)->href, + 'title' => $element->find('header > h2 > a', 0)->plaintext, + 'tags' => $element->find('.Post-topic', 0)->plaintext, + 'content' => $element->find('.Post-body', 0)->plaintext, + ); + } + } +} From 90147fc45c6256519ac53a3c6e15ae3a29276aeb Mon Sep 17 00:00:00 2001 From: somini Date: Wed, 26 Feb 2020 21:08:14 +0000 Subject: [PATCH 024/192] [FirstLookMediaTech]: New Bridge (#1438) --- bridges/FirstLookMediaTechBridge.php | 50 ++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 bridges/FirstLookMediaTechBridge.php diff --git a/bridges/FirstLookMediaTechBridge.php b/bridges/FirstLookMediaTechBridge.php new file mode 100644 index 00000000..114bf62d --- /dev/null +++ b/bridges/FirstLookMediaTechBridge.php @@ -0,0 +1,50 @@ + array( + 'type' => 'checkbox', + 'name' => 'Include Projects?', + ) + ) + ); + + public function collectData() { + $html = getSimpleHTMLDOM(self::URI) + or returnServerError('Could not load content'); + + if ($this->getInput('projects')) { + $top_projects = $html->find('.PromoList-ul', 0); + foreach($top_projects->find('li.PromoList-item') as $element) { + $item = array(); + + $item_uri = $element->find('a', 0); + $item['uri'] = $item_uri->href; + $item['title'] = strip_tags($item_uri->innertext); + $item['content'] = $element->find('div > div', 0); + + $this->items[] = $item; + } + } + + $top_articles = $html->find('.PromoList-ul', 1); + foreach($top_articles->find('li.PromoList-item') as $element) { + $item = array(); + + $item_left = $element->find('div > div', 0); + $item_date = $element->find('.PromoList-date', 0); + $item['timestamp'] = strtotime($item_date->innertext); + $item_date->outertext = ''; /* Remove */ + $item['author'] = $item_left->innertext; + $item_uri = $element->find('a', 0); + $item['uri'] = self::URI . $item_uri->href; + $item['title'] = strip_tags($item_uri); + + $this->items[] = $item; + } + } +} From f0363ba03b92cd949760f01492b140f89de12ca7 Mon Sep 17 00:00:00 2001 From: Anchit Bajaj Date: Thu, 27 Feb 2020 02:40:09 +0530 Subject: [PATCH 025/192] [PcGamerBridge] - Add all articles, full content and images (#1420) --- bridges/PcGamerBridge.php | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/bridges/PcGamerBridge.php b/bridges/PcGamerBridge.php index e0e55ce4..c4bcccf4 100644 --- a/bridges/PcGamerBridge.php +++ b/bridges/PcGamerBridge.php @@ -2,22 +2,43 @@ class PcGamerBridge extends BridgeAbstract { const NAME = 'PC Gamer'; - const URI = 'https://www.pcgamer.com/'; + const URI = 'https://www.pcgamer.com/archive/'; const DESCRIPTION = 'PC Gamer Most Read Stories'; - const MAINTAINER = 'mdemoss'; + const CACHE_TIMEOUT = 3600; + const MAINTAINER = 'IceWreck, mdemoss'; public function collectData() { $html = getSimpleHTMLDOMCached($this->getURI(), 300); - $stories = $html->find('div#popularcontent li.most-popular-item'); + $stories = $html->find('ul.basic-list li.day-article'); + $i = 0; + // Find induvidual stories in the archive page foreach ($stories as $element) { + if($i == 15) break; $item['uri'] = $element->find('a', 0)->href; + // error_log(print_r($item['uri'], TRUE)); $articleHtml = getSimpleHTMLDOMCached($item['uri']); - $item['title'] = $element->find('h4 a', 0)->plaintext; + $item['title'] = $element->find('a', 0)->plaintext; $item['timestamp'] = strtotime($articleHtml->find('meta[name=pub_date]', 0)->content); - $item['content'] = $articleHtml->find('meta[name=description]', 0)->content; - $item['author'] = $articleHtml->find('a[itemprop=author]', 0)->plaintext; + $item['author'] = $articleHtml->find('span.by-author a', 0)->plaintext; + + // Get the article content + $articleContents = $articleHtml->find('#article-body', 0); + + /* + By default the img src has a link to an error image and then the actual image + is added in by JS. So we replace the error image with the actual full size image + whoose link is in one of the attributes of the img tag + */ + foreach($articleContents->find('img') as $img) { + $imgsrc = $img->getAttribute('data-original-mos'); + // error_log($imgsrc); + $img->src = $imgsrc; + } + + $item['content'] = $articleContents; $this->items[] = $item; + $i++; } } } From 96e58d4c947d6e5b6dfef200f542327f1e948462 Mon Sep 17 00:00:00 2001 From: Anchit Bajaj Date: Thu, 27 Feb 2020 02:40:54 +0530 Subject: [PATCH 026/192] Add bridge for Phoronix (#1412) --- bridges/PhoronixBridge.php | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 bridges/PhoronixBridge.php diff --git a/bridges/PhoronixBridge.php b/bridges/PhoronixBridge.php new file mode 100644 index 00000000..c5ded27b --- /dev/null +++ b/bridges/PhoronixBridge.php @@ -0,0 +1,22 @@ +collectExpandableDatas('https://www.phoronix.com/rss.php', 15); + } + + protected function parseItem($newsItem){ + $item = parent::parseItem($newsItem); + // $articlePage gets the entire page's contents + $articlePage = getSimpleHTMLDOM($newsItem->link); + $article = $articlePage->find('.content', 0); + $item['content'] = $article; + return $item; + } +} From 6b6974d115bbd7dff7911f9af4e8a291a4cbc86e Mon Sep 17 00:00:00 2001 From: Julien Desgats Date: Wed, 26 Feb 2020 21:11:54 +0000 Subject: [PATCH 027/192] [NewOnNetflix] Add new bridge (#1408) --- bridges/NewOnNetflixBridge.php | 59 ++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 bridges/NewOnNetflixBridge.php diff --git a/bridges/NewOnNetflixBridge.php b/bridges/NewOnNetflixBridge.php new file mode 100644 index 00000000..bb35e71d --- /dev/null +++ b/bridges/NewOnNetflixBridge.php @@ -0,0 +1,59 @@ + array( + 'name' => 'Country', + 'type' => 'list', + 'values' => array( + 'Australia/New Zealand' => 'anz', + 'Canada' => 'can', + 'United Kingdom' => 'uk', + 'United States' => 'usa', + ), + 'defaultValue' => 'uk', + ) + )); + const CACHE_TIMEOUT = 3600 * 24; + + public function collectData() { + $baseURI = 'https://' . $this->getInput('country') . '.newonnetflix.info'; + $html = getSimpleHTMLDOMCached($baseURI . '/lastchance', self::CACHE_TIMEOUT) + or returnServerError('Could not request NewOnNetflix (U FAILED LOL).'); + + foreach($html->find('article.oldpost') as $element) { + $title = $element->find('a.infopop[title]', 0); + $img = $element->find('img[lazy_src]', 0); + $date = $element->find('span[title]', 0); + + // format sholud be 'dd/mm/yy - dd/mm/yy' + // (the added date might be "unknown") + $fromTo = array(); + if (preg_match('/^\s*(.*?)\s*-\s*(.*?)\s*$/', $date->title, $fromTo)) { + $from = $fromTo[1]; + $to = $fromTo[2]; + } else { + $from = 'unknown'; + $to = 'unknown'; + } + $summary = << +
{$title->title}
+
Added on:$from
+
Removed on:$to
+EOD; + + $item = array(); + $item['uri'] = $baseURI . $title->href; + $item['title'] = $to . ' - ' . $title->plaintext; + $item['content'] = $summary; + // some movies are added and removed multiple times + $item['uid'] = $title->href . '-' . $to; + $this->items[] = $item; + } + } +} From c9e5f6c9dd6d10ad40d2f5e2074587d89744f4b1 Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Wed, 26 Feb 2020 22:12:25 +0100 Subject: [PATCH 028/192] [AllocineFRBridge] Update Show List and parsing (#1407) * [AllocineFRBridge] Update Show List and parsing --- bridges/AllocineFRBridge.php | 84 +++++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 31 deletions(-) diff --git a/bridges/AllocineFRBridge.php b/bridges/AllocineFRBridge.php index 17da9031..40ef9a98 100644 --- a/bridges/AllocineFRBridge.php +++ b/bridges/AllocineFRBridge.php @@ -8,14 +8,25 @@ class AllocineFRBridge extends BridgeAbstract { const DESCRIPTION = 'Bridge for allocine.fr'; const PARAMETERS = array( array( 'category' => array( - 'name' => 'category', + 'name' => 'Emission', 'type' => 'list', - 'exampleValue' => 'Faux Raccord', - 'title' => 'Select your category', + 'title' => 'Sélectionner l\'emission', 'values' => array( 'Faux Raccord' => 'faux-raccord', - 'Top 5' => 'top-5', - 'Tueurs en Séries' => 'tueurs-en-serie' + 'Fanzone' => 'fanzone', + 'Game In Ciné' => 'game-in-cine', + 'Pour la faire courte' => 'pour-la-faire-courte', + 'Home Cinéma' => 'home-cinema', + 'PILS - Par Ici Les Sorties' => 'pils-par-ici-les-sorties', + 'AlloCiné : l\'émission, sur LeStream' => 'allocine-lemission-sur-lestream', + 'Give Me Five' => 'give-me-five', + 'Aviez-vous remarqué ?' => 'aviez-vous-remarque', + 'Et paf, il est mort' => 'et-paf-il-est-mort', + 'The Big Fan Theory' => 'the-big-fan-theory', + 'Clichés' => 'cliches', + 'Complètement...' => 'completement', + '#Fun Facts' => 'fun-facts', + 'Origin Story' => 'origin-story', ) ) )); @@ -23,19 +34,30 @@ class AllocineFRBridge extends BridgeAbstract { public function getURI(){ if(!is_null($this->getInput('category'))) { - switch($this->getInput('category')) { - case 'faux-raccord': - $uri = static::URI . 'video/programme-12284/saison-32180/'; - break; - case 'top-5': - $uri = static::URI . 'video/programme-12299/saison-29561/'; - break; - case 'tueurs-en-serie': - $uri = static::URI . 'video/programme-12286/saison-22938/'; - break; - } + $categories = array( + 'faux-raccord' => 'video/programme-12284/saison-37054/', + 'fanzone' => 'video/programme-12298/saison-37059/', + 'game-in-cine' => 'video/programme-12288/saison-22971/', + 'pour-la-faire-courte' => 'video/programme-20960/saison-29678/', + 'home-cinema' => 'video/programme-12287/saison-34703/', + 'pils-par-ici-les-sorties' => 'video/programme-25789/saison-37253/', + 'allocine-lemission-sur-lestream' => 'video/programme-25123/saison-36067/', + 'give-me-five' => 'video/programme-21919/saison-34518/', + 'aviez-vous-remarque' => 'video/programme-19518/saison-37084/', + 'et-paf-il-est-mort' => 'video/programme-25113/saison-36657/', + 'the-big-fan-theory' => 'video/programme-20403/saison-37419/', + 'cliches' => 'video/programme-24834/saison-35591/', + 'completement' => 'video/programme-23859/saison-34102/', + 'fun-facts' => 'video/programme-23040/saison-32686/', + 'origin-story' => 'video/programme-25667/saison-37041/' + ); - return $uri; + $category = $this->getInput('category'); + if(array_key_exists($category, $categories)) { + return static::URI . $categories[$category]; + } else { + returnClientError('Emission inconnue'); + } } return parent::getURI(); @@ -63,23 +85,23 @@ class AllocineFRBridge extends BridgeAbstract { self::PARAMETERS[$this->queriedContext]['category']['values'] ); - foreach($html->find('.media-meta-list figure.media-meta-fig') as $element) { + foreach($html->find('div[class=col-left]', 0)->find('div[class*=video-card]') as $element) { $item = array(); - $title = $element->find('div.titlebar h3.title a', 0); - $content = trim($element->innertext); - $figCaption = strpos($content, $category); + $title = $element->find('a[class*=meta-title-link]', 0); + $content = trim($element->outertext); - if($figCaption !== false) { - $content = str_replace('src="/', 'src="' . static::URI, $content); - $content = str_replace('href="/', 'href="' . static::URI, $content); - $content = str_replace('src=\'/', 'src=\'' . static::URI, $content); - $content = str_replace('href=\'/', 'href=\'' . static::URI, $content); - $item['content'] = $content; - $item['title'] = trim($title->innertext); - $item['uri'] = static::URI . $title->href; - $this->items[] = $item; - } + // Replace image 'src' with the one in 'data-src' + $content = preg_replace('@src="data:image/gif;base64,[A-Za-z0-9+\/]*"@', '', $content); + $content = preg_replace('@data-src=@', 'src=', $content); + + // Remove date in the content to prevent content update while the video is getting older + $content = preg_replace('@
.*[^<]*[^<]*
@', '', $content); + + $item['content'] = $content; + $item['title'] = trim($title->innertext); + $item['uri'] = static::URI . substr($title->href, 1); + $this->items[] = $item; } } } From 3179c1e8843d8585d4ab676e07460b26c84cbc3d Mon Sep 17 00:00:00 2001 From: Lorenzo Stanco Date: Wed, 26 Feb 2020 22:13:40 +0100 Subject: [PATCH 029/192] [InstagramBridge] Fixed item thumb on video entries (#1387) --- bridges/InstagramBridge.php | 23 ++++++++++++++--------- formats/AtomFormat.php | 5 +++++ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/bridges/InstagramBridge.php b/bridges/InstagramBridge.php index 679c4c0e..0a6dbaad 100644 --- a/bridges/InstagramBridge.php +++ b/bridges/InstagramBridge.php @@ -123,6 +123,12 @@ class InstagramBridge extends BridgeAbstract { $item['title'] = substr($item['title'], 0, $titleLinePos) . '...'; } + if($directLink) { + $mediaURI = $media->display_url; + } else { + $mediaURI = self::URI . 'p/' . $media->shortcode . '/media?size=l'; + } + switch($media->__typename) { case 'GraphSidecar': $data = $this->getInstagramSidecarData($item['uri'], $item['title']); @@ -130,24 +136,20 @@ class InstagramBridge extends BridgeAbstract { $item['enclosures'] = $data[1]; break; case 'GraphImage': - if($directLink) { - $mediaURI = $media->display_url; - } else { - $mediaURI = self::URI . 'p/' . $media->shortcode . '/media?size=l'; - } $item['content'] = ''; $item['content'] .= '' . $item['title'] . ''; $item['content'] .= '

' . nl2br(htmlentities($textContent)); $item['enclosures'] = array($mediaURI); break; case 'GraphVideo': - $data = $this->getInstagramVideoData($item['uri']); + $data = $this->getInstagramVideoData($item['uri'], $mediaURI); $item['content'] = $data[0]; if($directLink) { $item['enclosures'] = $data[1]; } else { - $item['enclosures'] = array(self::URI . 'p/' . $media->shortcode . '/media?size=l'); + $item['enclosures'] = array($mediaURI); } + $item['thumbnail'] = $mediaURI; break; default: break; } @@ -185,11 +187,14 @@ class InstagramBridge extends BridgeAbstract { } // returns Video post's contents and enclosures - protected function getInstagramVideoData($uri) { + protected function getInstagramVideoData($uri, $mediaURI) { $mediaInfo = $this->getSinglePostData($uri); $textContent = $this->getTextContent($mediaInfo); - $content = '
'; + $content = '
'; $content .= '
' . nl2br(htmlentities($textContent)); return array($content, array($mediaInfo->video_url)); diff --git a/formats/AtomFormat.php b/formats/AtomFormat.php index a1ecfcf4..c1bde25f 100644 --- a/formats/AtomFormat.php +++ b/formats/AtomFormat.php @@ -89,6 +89,10 @@ class AtomFormat extends FormatAbstract{ . PHP_EOL; } + $entryThumbnail = $item->thumbnail; + if (!empty($entryThumbnail)) + $entryThumbnail = ''; + $entryLinkAlternate = ''; if (!empty($entryUri)) { $entryLinkAlternate = '{$entryContent} {$entryEnclosures} {$entryCategories} + {$entryThumbnail} EOD; From d39741c29677e50acf618db1f43562cd8863f939 Mon Sep 17 00:00:00 2001 From: somini Date: Wed, 26 Feb 2020 21:15:50 +0000 Subject: [PATCH 030/192] [GithubIssueBridgeIssue] Fix bridge (#1453) * fix bridge according to website evolution --- bridges/GithubIssueBridge.php | 51 +++++++++++++++-------------------- 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/bridges/GithubIssueBridge.php b/bridges/GithubIssueBridge.php index 2eddeb2e..29a336bd 100644 --- a/bridges/GithubIssueBridge.php +++ b/bridges/GithubIssueBridge.php @@ -82,18 +82,21 @@ class GithubIssueBridge extends BridgeAbstract { $uri = $this->buildGitHubIssueCommentUri($issueNbr, $comment->id); - $author = $comment->find('.author', 0)->plaintext; - - $title .= ' / ' . trim($comment->plaintext); - - $content = $title; - if (null !== $comment->nextSibling()) { - $content = $comment->nextSibling()->innertext; - if ($comment->nextSibling()->nodeName() === 'span') { - $content = $comment->nextSibling()->nextSibling()->innertext; - } + $author = $comment->find('.author', 0); + if ($author) { + $author = $author->plaintext; + } else { + $author = ''; } + $title .= ' / ' + . trim(str_replace( + array('octicon','-'), array(''), + $comment->find('.octicon', 0)->getAttribute('class') + )); + + $content = $comment->plaintext; + $item = array(); $item['author'] = $author; $item['uri'] = $uri; @@ -135,32 +138,20 @@ class GithubIssueBridge extends BridgeAbstract { substr($issue->find('.gh-header-number', 0)->plaintext, 1) ); - $comments = $issue->find(' - [id^="issue-"] > .comment, - [id^="issuecomment-"] > .comment, - [id^="event-"], - [id^="ref-"] - '); + $comments = $issue->find( + '.comment, .TimelineItem-badge' + ); + foreach($comments as $comment) { - - if (!$comment->hasChildNodes()) { - continue; - } - - if (!$comment->hasClass('discussion-item-header')) { + if ($comment->hasClass('comment')) { + $comment = $comment->parent; $item = $this->extractIssueComment($issueNbr, $title, $comment); $items[] = $item; continue; - } - - while ($comment->hasClass('discussion-item-header')) { + } else { + $comment = $comment->parent; $item = $this->extractIssueEvent($issueNbr, $title, $comment); $items[] = $item; - $comment = $comment->nextSibling(); - if (null == $comment) { - break; - } - $classes = explode(' ', $comment->getAttribute('class')); } } From a981450ae0bd0ec1700e322dc8e166298e10eb36 Mon Sep 17 00:00:00 2001 From: Eugene Molotov Date: Thu, 27 Feb 2020 02:16:46 +0500 Subject: [PATCH 031/192] [Dockerfile] Build memcached extension (#1415) --- Dockerfile | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index fa9979d6..02704da5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,11 +3,20 @@ FROM php:7-apache ENV APACHE_DOCUMENT_ROOT=/app RUN mv "$PHP_INI_DIR/php.ini-production" "$PHP_INI_DIR/php.ini" \ - && apt-get --yes update && apt-get --yes install libxml2-dev \ + && apt-get --yes update && apt-get --yes install libxml2-dev zlib1g-dev libmemcached-dev \ && docker-php-ext-install -j$(nproc) simplexml \ && sed -ri -e 's!/var/www/html!${APACHE_DOCUMENT_ROOT}!g' /etc/apache2/sites-available/*.conf \ && sed -ri -e 's!/var/www/!${APACHE_DOCUMENT_ROOT}!g' /etc/apache2/apache2.conf /etc/apache2/conf-available/*.conf \ && sed -ri -e 's/(MinProtocol\s*=\s*)TLSv1\.2/\1None/' /etc/ssl/openssl.cnf \ && sed -ri -e 's/(CipherString\s*=\s*DEFAULT)@SECLEVEL=2/\1/' /etc/ssl/openssl.cnf +RUN curl https://codeload.github.com/php-memcached-dev/php-memcached/tar.gz/v3.1.5 --output /tmp/php-memcached.tar.gz \ + && mkdir -p /usr/src/php/ext \ + && tar xzvf /tmp/php-memcached.tar.gz -C /usr/src/php/ext \ + && mv /usr/src/php/ext/php-memcached-3.1.5 /usr/src/php/ext/memcached \ + && cd /usr/src/php/ext/memcached \ + && docker-php-ext-configure /usr/src/php/ext/memcached --disable-memcached-sasl \ + && docker-php-ext-install /usr/src/php/ext/memcached \ + && rm -rf /usr/src/php/ext/memcached + COPY --chown=www-data:www-data ./ /app/ \ No newline at end of file From 84616f53bfe9e6af7d8cb3cd59c3c82d7c178240 Mon Sep 17 00:00:00 2001 From: Lyra Date: Wed, 26 Feb 2020 22:23:30 +0100 Subject: [PATCH 032/192] Update contributors --- README.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a9db8eaf..2239826d 100644 --- a/README.md +++ b/README.md @@ -109,8 +109,8 @@ We are RSS-Bridge community, a group of developers continuing the project initia Use this script to generate the list automatically (using the GitHub API): https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8 --> - * [16mhz](https://github.com/16mhz) +* [86423355844265459587182778](https://github.com/86423355844265459587182778) * [adamchainz](https://github.com/adamchainz) * [Ahiles3005](https://github.com/Ahiles3005) * [Albirew](https://github.com/Albirew) @@ -126,6 +126,7 @@ https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8 * [azdkj532](https://github.com/azdkj532) * [b1nj](https://github.com/b1nj) * [benasse](https://github.com/benasse) +* [Binnette](https://github.com/Binnette) * [captn3m0](https://github.com/captn3m0) * [chemel](https://github.com/chemel) * [ckiw](https://github.com/ckiw) @@ -161,10 +162,12 @@ https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8 * [IceWreck](https://github.com/IceWreck) * [j0k3r](https://github.com/j0k3r) * [JackNUMBER](https://github.com/JackNUMBER) +* [jdesgats](https://github.com/jdesgats) * [jdigilio](https://github.com/jdigilio) * [JeremyRand](https://github.com/JeremyRand) * [Jocker666z](https://github.com/Jocker666z) * [johnnygroovy](https://github.com/johnnygroovy) +* [johnpc](https://github.com/johnpc) * [killruana](https://github.com/killruana) * [klimplant](https://github.com/klimplant) * [kranack](https://github.com/kranack) @@ -187,7 +190,6 @@ https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8 * [metaMMA](https://github.com/metaMMA) * [mitsukarenai](https://github.com/mitsukarenai) * [MonsieurPoutounours](https://github.com/MonsieurPoutounours) -* [mr-flibble](https://github.com/mr-flibble) * [mro](https://github.com/mro) * [mxmehl](https://github.com/mxmehl) * [nel50n](https://github.com/nel50n) @@ -221,6 +223,7 @@ https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8 * [sysadminstory](https://github.com/sysadminstory) * [tameroski](https://github.com/tameroski) * [teromene](https://github.com/teromene) +* [tgkenney](https://github.com/tgkenney) * [thefranke](https://github.com/thefranke) * [ThePadawan](https://github.com/ThePadawan) * [TheRadialActive](https://github.com/TheRadialActive) @@ -232,7 +235,7 @@ https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8 * [xurxof](https://github.com/xurxof) * [yardenac](https://github.com/yardenac) * [ZeNairolf](https://github.com/ZeNairolf) - + Licenses === From 0705a2e7bb391d2eac707bb1d195e51f5911b334 Mon Sep 17 00:00:00 2001 From: Lyra Date: Wed, 26 Feb 2020 22:24:20 +0100 Subject: [PATCH 033/192] Bump version to dev.2020-02-26 --- lib/Configuration.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Configuration.php b/lib/Configuration.php index fa32be1f..76a34aff 100644 --- a/lib/Configuration.php +++ b/lib/Configuration.php @@ -28,7 +28,7 @@ final class Configuration { * * @todo Replace this property by a constant. */ - public static $VERSION = 'dev.2019-12-01'; + public static $VERSION = 'dev.2020-02-26'; /** * Holds the configuration data. From 7b63da522fa728811349081a0921f84bb1fa6f19 Mon Sep 17 00:00:00 2001 From: Lyra Date: Wed, 26 Feb 2020 22:35:44 +0100 Subject: [PATCH 034/192] [InstagramBridge] Use lowercase comparison when looking up user pk --- bridges/InstagramBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/InstagramBridge.php b/bridges/InstagramBridge.php index 0a6dbaad..58238e9d 100644 --- a/bridges/InstagramBridge.php +++ b/bridges/InstagramBridge.php @@ -65,7 +65,7 @@ class InstagramBridge extends BridgeAbstract { $data = getContents(self::URI . 'web/search/topsearch/?query=' . $username); foreach(json_decode($data)->users as $user) { - if($user->user->username === $username) { + if(strtolower($user->user->username) === strtolower($username)) { $key = $user->user->pk; } } From 366d2d66b3fa126cfad7f2ac104e722d5f69d9ed Mon Sep 17 00:00:00 2001 From: John Corser Date: Wed, 26 Feb 2020 17:32:57 -0500 Subject: [PATCH 035/192] [RobinhoodSnacks] Add bridge for Robinhood Snacks (#1460) --- bridges/RobinhoodSnacksBridge.php | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 bridges/RobinhoodSnacksBridge.php diff --git a/bridges/RobinhoodSnacksBridge.php b/bridges/RobinhoodSnacksBridge.php new file mode 100644 index 00000000..e123146c --- /dev/null +++ b/bridges/RobinhoodSnacksBridge.php @@ -0,0 +1,27 @@ +find('#root > div > div > div > div > div > a') as $element) { + if ($element->href === 'https://snacks.robinhood.com/newsletters/page/2/') { + continue; + } + + $this->items[] = array( + 'uri' => $element->href, + 'title' => $element->find('div > div', 3)->plaintext, + 'content' => $element->find('div > div', 4)->plaintext, + ); + } + } +} From c2559ff71fb3ffb81810152d46071129a2e8a4d9 Mon Sep 17 00:00:00 2001 From: Kirill Kotikov Date: Mon, 16 Mar 2020 19:25:28 +0300 Subject: [PATCH 036/192] Add sdfsf --- bridges/GithubTrendingBridge.php | 66 ++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 bridges/GithubTrendingBridge.php diff --git a/bridges/GithubTrendingBridge.php b/bridges/GithubTrendingBridge.php new file mode 100644 index 00000000..f788887d --- /dev/null +++ b/bridges/GithubTrendingBridge.php @@ -0,0 +1,66 @@ + array( + 'language' => array( + 'name' => 'Programming language', + 'required' => true + ) + ), + 'global' => array( + 'date_range' => array( + 'name' => 'Date range', + 'type' => 'list', + 'required' => false, + 'values' => array( + 'Today' => 'today', + 'Weekly' => 'weekly', + 'Monthly' => 'monthly', + ), + 'defaultValue' => 'today' + ) + ) + + ); + + public function collectData(){ + $params = array('since' => urlencode($this->getInput('date_range'))); + $url = self::URI . '/' . $this->getInput('language') . '?' . http_build_query($params); + + $html = getSimpleHTMLDOM($url) + or returnServerError('Error while downloading the website content'); + + foreach($html->find('.Box-row') as $element) { + $item = array(); + + // URI + $item['uri'] = substr(self::URI, 0, -1) . $element->find('h1 a', 0)->href; + + // Title + $item['title'] = str_replace(' ', '', trim(strip_tags($element->find('h1 a', 0)->plaintext))); + + // Description + $item['description'] = trim(strip_tags($element->find('p.text-gray', 0)->innertext)); + + // Time + $item['timestamp'] = time(); + + // TODO: Proxy? + $this->items[] = $item; + } + } + + public function getName(){ + if(!is_null($this->getInput('language'))) { + return self::NAME . ' - ' . $this->getInput('language'); + } + + return parent::getName(); + } +} From 7c71377af08db38695deb7f89d19f8731324fc5f Mon Sep 17 00:00:00 2001 From: Kirill Kotikov Date: Fri, 20 Mar 2020 16:34:42 +0300 Subject: [PATCH 037/192] Add additional languages + fix issues --- bridges/GithubTrendingBridge.php | 582 ++++++++++++++++++++++++++++++- 1 file changed, 574 insertions(+), 8 deletions(-) diff --git a/bridges/GithubTrendingBridge.php b/bridges/GithubTrendingBridge.php index f788887d..96a04aa9 100644 --- a/bridges/GithubTrendingBridge.php +++ b/bridges/GithubTrendingBridge.php @@ -4,15 +4,582 @@ class GithubTrendingBridge extends BridgeAbstract { const MAINTAINER = 'liamka'; const NAME = 'Github Trending'; const URI = 'https://github.com/trending'; + const URI_ITEM = 'https://github.com'; const CACHE_TIMEOUT = 43200; // 12hr const DESCRIPTION = 'See what the GitHub community is most excited repos.'; const PARAMETERS = array( - 'Language' => array( + 'By language' => array( 'language' => array( - 'name' => 'Programming language', - 'required' => true + 'name' => 'Select language', + 'type' => 'list', + 'values' => array( + 'All languages' => '', + 'C++' => 'c++', + 'HTML' => 'html', + 'Java' => 'java', + 'JavaScript' => 'javascript', + 'PHP' => 'php', + 'Python' => 'python', + 'Ruby' => 'ruby', + 'Unknown languages' => 'unknown languages', + '1C Enterprise' => '1c enterprise', + '4D' => '4d', + 'ABAP' => 'abap', + 'ABNF' => 'abnf', + 'ActionScript' => 'actionscript', + 'Ada' => 'ada', + 'Adobe Font Metrics' => 'adobe font metrics', + 'Agda' => 'agda', + 'AGS Script' => 'ags script', + 'Alloy' => 'alloy', + 'Alpine Abuild' => 'alpine abuild', + 'Altium Designer' => 'altium designer', + 'AMPL' => 'ampl', + 'AngelScript' => 'angelscript', + 'Ant Build System' => 'ant build system', + 'ANTLR' => 'antlr', + 'ApacheConf' => 'apacheconf', + 'Apex' => 'apex', + 'API Blueprint' => 'api blueprint', + 'APL' => 'apl', + 'Apollo Guidance Computer' => 'apollo guidance computer', + 'AppleScript' => 'applescript', + 'Arc' => 'arc', + 'AsciiDoc' => 'asciidoc', + 'ASN.1' => 'asn.1', + 'ASP' => 'asp', + 'AspectJ' => 'aspectj', + 'Assembly' => 'assembly', + 'Asymptote' => 'asymptote', + 'ATS' => 'ats', + 'Augeas' => 'augeas', + 'AutoHotkey' => 'autohotkey', + 'AutoIt' => 'autoit', + 'Awk' => 'awk', + 'Ballerina' => 'ballerina', + 'Batchfile' => 'batchfile', + 'Befunge' => 'befunge', + 'BibTeX' => 'bibtex', + 'Bison' => 'bison', + 'BitBake' => 'bitbake', + 'Blade' => 'blade', + 'BlitzBasic' => 'blitzbasic', + 'BlitzMax' => 'blitzmax', + 'Bluespec' => 'bluespec', + 'Boo' => 'boo', + 'Brainfuck' => 'brainfuck', + 'Brightscript' => 'brightscript', + 'Zeek' => 'zeek', + 'C' => 'c', + 'C#' => 'c#', + 'C++' => 'c++', + 'C-ObjDump' => 'c-objdump', + 'C2hs Haskell' => 'c2hs haskell', + 'Cabal Config' => 'cabal config', + 'CartoCSS' => 'cartocss', + 'Ceylon' => 'ceylon', + 'Chapel' => 'chapel', + 'Charity' => 'charity', + 'ChucK' => 'chuck', + 'Cirru' => 'cirru', + 'Clarion' => 'clarion', + 'Clean' => 'clean', + 'Click' => 'click', + 'CLIPS' => 'clips', + 'Clojure' => 'clojure', + 'Closure Templates' => 'closure templates', + 'Cloud Firestore Security Rules' => 'cloud firestore security rules', + 'CMake' => 'cmake', + 'COBOL' => 'cobol', + 'CodeQL' => 'codeql', + 'CoffeeScript' => 'coffeescript', + 'ColdFusion' => 'coldfusion', + 'ColdFusion CFC' => 'coldfusion cfc', + 'COLLADA' => 'collada', + 'Common Lisp' => 'common lisp', + 'Common Workflow Language' => 'common workflow language', + 'Component Pascal' => 'component pascal', + 'CoNLL-U' => 'conll-u', + 'Cool' => 'cool', + 'Coq' => 'coq', + 'Cpp-ObjDump' => 'cpp-objdump', + 'Creole' => 'creole', + 'Crystal' => 'crystal', + 'CSON' => 'cson', + 'Csound' => 'csound', + 'Csound Document' => 'csound document', + 'Csound Score' => 'csound score', + 'CSS' => 'css', + 'CSV' => 'csv', + 'Cuda' => 'cuda', + 'cURL Config' => 'curl config', + 'CWeb' => 'cweb', + 'Cycript' => 'cycript', + 'Cython' => 'cython', + 'D' => 'd', + 'D-ObjDump' => 'd-objdump', + 'Darcs Patch' => 'darcs patch', + 'Dart' => 'dart', + 'DataWeave' => 'dataweave', + 'desktop' => 'desktop', + 'Dhall' => 'dhall', + 'Diff' => 'diff', + 'DIGITAL Command Language' => 'digital command language', + 'dircolors' => 'dircolors', + 'DirectX 3D File' => 'directx 3d file', + 'DM' => 'dm', + 'DNS Zone' => 'dns zone', + 'Dockerfile' => 'dockerfile', + 'Dogescript' => 'dogescript', + 'DTrace' => 'dtrace', + 'Dylan' => 'dylan', + 'E' => 'e', + 'Eagle' => 'eagle', + 'Easybuild' => 'easybuild', + 'EBNF' => 'ebnf', + 'eC' => 'ec', + 'Ecere Projects' => 'ecere projects', + 'ECL' => 'ecl', + 'ECLiPSe' => 'eclipse', + 'EditorConfig' => 'editorconfig', + 'Edje Data Collection' => 'edje data collection', + 'edn' => 'edn', + 'Eiffel' => 'eiffel', + 'EJS' => 'ejs', + 'Elixir' => 'elixir', + 'Elm' => 'elm', + 'Emacs Lisp' => 'emacs lisp', + 'EmberScript' => 'emberscript', + 'EML' => 'eml', + 'EQ' => 'eq', + 'Erlang' => 'erlang', + 'F#' => 'f#', + 'F*' => 'f*', + 'Factor' => 'factor', + 'Fancy' => 'fancy', + 'Fantom' => 'fantom', + 'Faust' => 'faust', + 'FIGlet Font' => 'figlet font', + 'Filebench WML' => 'filebench wml', + 'Filterscript' => 'filterscript', + 'fish' => 'fish', + 'FLUX' => 'flux', + 'Formatted' => 'formatted', + 'Forth' => 'forth', + 'Fortran' => 'fortran', + 'FreeMarker' => 'freemarker', + 'Frege' => 'frege', + 'G-code' => 'g-code', + 'Game Maker Language' => 'game maker language', + 'GAML' => 'gaml', + 'GAMS' => 'gams', + 'GAP' => 'gap', + 'GCC Machine Description' => 'gcc machine description', + 'GDB' => 'gdb', + 'GDScript' => 'gdscript', + 'Genie' => 'genie', + 'Genshi' => 'genshi', + 'Gentoo Ebuild' => 'gentoo ebuild', + 'Gentoo Eclass' => 'gentoo eclass', + 'Gerber Image' => 'gerber image', + 'Gettext Catalog' => 'gettext catalog', + 'Gherkin' => 'gherkin', + 'Git Attributes' => 'git attributes', + 'Git Config' => 'git config', + 'GLSL' => 'glsl', + 'Glyph' => 'glyph', + 'Glyph Bitmap Distribution Format' => 'glyph bitmap distribution format', + 'GN' => 'gn', + 'Gnuplot' => 'gnuplot', + 'Go' => 'go', + 'Golo' => 'golo', + 'Gosu' => 'gosu', + 'Grace' => 'grace', + 'Gradle' => 'gradle', + 'Grammatical Framework' => 'grammatical framework', + 'Graph Modeling Language' => 'graph modeling language', + 'GraphQL' => 'graphql', + 'Graphviz (DOT)' => 'graphviz (dot)', + 'Groovy' => 'groovy', + 'Groovy Server Pages' => 'groovy server pages', + 'Hack' => 'hack', + 'Haml' => 'haml', + 'Handlebars' => 'handlebars', + 'HAProxy' => 'haproxy', + 'Harbour' => 'harbour', + 'Haskell' => 'haskell', + 'Haxe' => 'haxe', + 'HCL' => 'hcl', + 'HiveQL' => 'hiveql', + 'HLSL' => 'hlsl', + 'HolyC' => 'holyc', + 'HTML' => 'html', + 'HTML+Django' => 'html+django', + 'HTML+ECR' => 'html+ecr', + 'HTML+EEX' => 'html+eex', + 'HTML+ERB' => 'html+erb', + 'HTML+PHP' => 'html+php', + 'HTML+Razor' => 'html+razor', + 'HTTP' => 'http', + 'HXML' => 'hxml', + 'Hy' => 'hy', + 'HyPhy' => 'hyphy', + 'IDL' => 'idl', + 'Idris' => 'idris', + 'Ignore List' => 'ignore list', + 'IGOR Pro' => 'igor pro', + 'Inform 7' => 'inform 7', + 'INI' => 'ini', + 'Inno Setup' => 'inno setup', + 'Io' => 'io', + 'Ioke' => 'ioke', + 'IRC log' => 'irc log', + 'Isabelle' => 'isabelle', + 'Isabelle ROOT' => 'isabelle root', + 'J' => 'j', + 'Jasmin' => 'jasmin', + 'Java' => 'java', + 'Java Properties' => 'java properties', + 'Java Server Pages' => 'java server pages', + 'JavaScript' => 'javascript', + 'JavaScript+ERB' => 'javascript+erb', + 'JFlex' => 'jflex', + 'Jison' => 'jison', + 'Jison Lex' => 'jison lex', + 'Jolie' => 'jolie', + 'JSON' => 'json', + 'JSON with Comments' => 'json with comments', + 'JSON5' => 'json5', + 'JSONiq' => 'jsoniq', + 'JSONLD' => 'jsonld', + 'Jsonnet' => 'jsonnet', + 'JSX' => 'jsx', + 'Julia' => 'julia', + 'Jupyter Notebook' => 'jupyter notebook', + 'KiCad Layout' => 'kicad layout', + 'KiCad Legacy Layout' => 'kicad legacy layout', + 'KiCad Schematic' => 'kicad schematic', + 'Kit' => 'kit', + 'Kotlin' => 'kotlin', + 'KRL' => 'krl', + 'LabVIEW' => 'labview', + 'Lasso' => 'lasso', + 'Latte' => 'latte', + 'Lean' => 'lean', + 'Less' => 'less', + 'Lex' => 'lex', + 'LFE' => 'lfe', + 'LilyPond' => 'lilypond', + 'Limbo' => 'limbo', + 'Linker Script' => 'linker script', + 'Linux Kernel Module' => 'linux kernel module', + 'Liquid' => 'liquid', + 'Literate Agda' => 'literate agda', + 'Literate CoffeeScript' => 'literate coffeescript', + 'Literate Haskell' => 'literate haskell', + 'LiveScript' => 'livescript', + 'LLVM' => 'llvm', + 'Logos' => 'logos', + 'Logtalk' => 'logtalk', + 'LOLCODE' => 'lolcode', + 'LookML' => 'lookml', + 'LoomScript' => 'loomscript', + 'LSL' => 'lsl', + 'LTspice Symbol' => 'ltspice symbol', + 'Lua' => 'lua', + 'M' => 'm', + 'M4' => 'm4', + 'M4Sugar' => 'm4sugar', + 'Makefile' => 'makefile', + 'Mako' => 'mako', + 'Markdown' => 'markdown', + 'Marko' => 'marko', + 'Mask' => 'mask', + 'Mathematica' => 'mathematica', + 'MATLAB' => 'matlab', + 'Maven POM' => 'maven pom', + 'Max' => 'max', + 'MAXScript' => 'maxscript', + 'mcfunction' => 'mcfunction', + 'MediaWiki' => 'mediawiki', + 'Mercury' => 'mercury', + 'Meson' => 'meson', + 'Metal' => 'metal', + 'Microsoft Developer Studio Project' => 'microsoft developer studio project', + 'MiniD' => 'minid', + 'Mirah' => 'mirah', + 'mIRC Script' => 'mirc script', + 'MLIR' => 'mlir', + 'Modelica' => 'modelica', + 'Modula-2' => 'modula-2', + 'Modula-3' => 'modula-3', + 'Module Management System' => 'module management system', + 'Monkey' => 'monkey', + 'Moocode' => 'moocode', + 'MoonScript' => 'moonscript', + 'Motorola 68K Assembly' => 'motorola 68k assembly', + 'MQL4' => 'mql4', + 'MQL5' => 'mql5', + 'MTML' => 'mtml', + 'MUF' => 'muf', + 'mupad' => 'mupad', + 'Muse' => 'muse', + 'Myghty' => 'myghty', + 'nanorc' => 'nanorc', + 'NASL' => 'nasl', + 'NCL' => 'ncl', + 'Nearley' => 'nearley', + 'Nemerle' => 'nemerle', + 'nesC' => 'nesc', + 'NetLinx' => 'netlinx', + 'NetLinx+ERB' => 'netlinx+erb', + 'NetLogo' => 'netlogo', + 'NewLisp' => 'newlisp', + 'Nextflow' => 'nextflow', + 'Nginx' => 'nginx', + 'Nim' => 'nim', + 'Ninja' => 'ninja', + 'Nit' => 'nit', + 'Nix' => 'nix', + 'NL' => 'nl', + 'NPM Config' => 'npm config', + 'NSIS' => 'nsis', + 'Nu' => 'nu', + 'NumPy' => 'numpy', + 'ObjDump' => 'objdump', + 'Object Data Instance Notation' => 'object data instance notation', + 'Objective-C' => 'objective-c', + 'Objective-C++' => 'objective-c++', + 'Objective-J' => 'objective-j', + 'ObjectScript' => 'objectscript', + 'OCaml' => 'ocaml', + 'Odin' => 'odin', + 'Omgrofl' => 'omgrofl', + 'ooc' => 'ooc', + 'Opa' => 'opa', + 'Opal' => 'opal', + 'Open Policy Agent' => 'open policy agent', + 'OpenCL' => 'opencl', + 'OpenEdge ABL' => 'openedge abl', + 'OpenQASM' => 'openqasm', + 'OpenRC runscript' => 'openrc runscript', + 'OpenSCAD' => 'openscad', + 'OpenStep Property List' => 'openstep property list', + 'OpenType Feature File' => 'opentype feature file', + 'Org' => 'org', + 'Ox' => 'ox', + 'Oxygene' => 'oxygene', + 'Oz' => 'oz', + 'P4' => 'p4', + 'Pan' => 'pan', + 'Papyrus' => 'papyrus', + 'Parrot' => 'parrot', + 'Parrot Assembly' => 'parrot assembly', + 'Parrot Internal Representation' => 'parrot internal representation', + 'Pascal' => 'pascal', + 'Pawn' => 'pawn', + 'Pep8' => 'pep8', + 'Perl' => 'perl', + 'PHP' => 'php', + 'Pic' => 'pic', + 'Pickle' => 'pickle', + 'PicoLisp' => 'picolisp', + 'PigLatin' => 'piglatin', + 'Pike' => 'pike', + 'PLpgSQL' => 'plpgsql', + 'PLSQL' => 'plsql', + 'Pod' => 'pod', + 'Pod 6' => 'pod 6', + 'PogoScript' => 'pogoscript', + 'Pony' => 'pony', + 'PostCSS' => 'postcss', + 'PostScript' => 'postscript', + 'POV-Ray SDL' => 'pov-ray sdl', + 'PowerBuilder' => 'powerbuilder', + 'PowerShell' => 'powershell', + 'Prisma' => 'prisma', + 'Processing' => 'processing', + 'Proguard' => 'proguard', + 'Prolog' => 'prolog', + 'Propeller Spin' => 'propeller spin', + 'Protocol Buffer' => 'protocol buffer', + 'Public Key' => 'public key', + 'Pug' => 'pug', + 'Puppet' => 'puppet', + 'Pure Data' => 'pure data', + 'PureBasic' => 'purebasic', + 'PureScript' => 'purescript', + 'Python' => 'python', + 'Python console' => 'python console', + 'Python traceback' => 'python traceback', + 'q' => 'q', + 'QMake' => 'qmake', + 'QML' => 'qml', + 'Quake' => 'quake', + 'R' => 'r', + 'Racket' => 'racket', + 'Ragel' => 'ragel', + 'Raku' => 'raku', + 'RAML' => 'raml', + 'Rascal' => 'rascal', + 'Raw token data' => 'raw token data', + 'RDoc' => 'rdoc', + 'Readline Config' => 'readline config', + 'REALbasic' => 'realbasic', + 'Reason' => 'reason', + 'Rebol' => 'rebol', + 'Red' => 'red', + 'Redcode' => 'redcode', + 'Regular Expression' => 'regular expression', + // 'Ren'Py' => 'ren'py', + 'RenderScript' => 'renderscript', + 'reStructuredText' => 'restructuredtext', + 'REXX' => 'rexx', + 'RHTML' => 'rhtml', + 'Rich Text Format' => 'rich text format', + 'Ring' => 'ring', + 'Riot' => 'riot', + 'RMarkdown' => 'rmarkdown', + 'RobotFramework' => 'robotframework', + 'Roff' => 'roff', + 'Roff Manpage' => 'roff manpage', + 'Rouge' => 'rouge', + 'RPC' => 'rpc', + 'RPM Spec' => 'rpm spec', + 'Ruby' => 'ruby', + 'RUNOFF' => 'runoff', + 'Rust' => 'rust', + 'Sage' => 'sage', + 'SaltStack' => 'saltstack', + 'SAS' => 'sas', + 'Sass' => 'sass', + 'Scala' => 'scala', + 'Scaml' => 'scaml', + 'Scheme' => 'scheme', + 'Scilab' => 'scilab', + 'SCSS' => 'scss', + 'sed' => 'sed', + 'Self' => 'self', + 'ShaderLab' => 'shaderlab', + 'Shell' => 'shell', + 'ShellSession' => 'shellsession', + 'Shen' => 'shen', + 'Slash' => 'slash', + 'Slice' => 'slice', + 'Slim' => 'slim', + 'Smali' => 'smali', + 'Smalltalk' => 'smalltalk', + 'Smarty' => 'smarty', + 'SmPL' => 'smpl', + 'SMT' => 'smt', + 'Solidity' => 'solidity', + 'SourcePawn' => 'sourcepawn', + 'SPARQL' => 'sparql', + 'Spline Font Database' => 'spline font database', + 'SQF' => 'sqf', + 'SQL' => 'sql', + 'SQLPL' => 'sqlpl', + 'Squirrel' => 'squirrel', + 'SRecode Template' => 'srecode template', + 'SSH Config' => 'ssh config', + 'Stan' => 'stan', + 'Standard ML' => 'standard ml', + 'Starlark' => 'starlark', + 'Stata' => 'stata', + 'STON' => 'ston', + 'Stylus' => 'stylus', + 'SubRip Text' => 'subrip text', + 'SugarSS' => 'sugarss', + 'SuperCollider' => 'supercollider', + 'Svelte' => 'svelte', + 'SVG' => 'svg', + 'Swift' => 'swift', + 'SWIG' => 'swig', + 'SystemVerilog' => 'systemverilog', + 'Tcl' => 'tcl', + 'Tcsh' => 'tcsh', + 'Tea' => 'tea', + 'Terra' => 'terra', + 'TeX' => 'tex', + 'Texinfo' => 'texinfo', + 'Text' => 'text', + 'Textile' => 'textile', + 'Thrift' => 'thrift', + 'TI Program' => 'ti program', + 'TLA' => 'tla', + 'TOML' => 'toml', + 'TSQL' => 'tsql', + 'TSX' => 'tsx', + 'Turing' => 'turing', + 'Turtle' => 'turtle', + 'Twig' => 'twig', + 'TXL' => 'txl', + 'Type Language' => 'type language', + 'TypeScript' => 'typescript', + 'Unified Parallel C' => 'unified parallel c', + 'Unity3D Asset' => 'unity3d asset', + 'Unix Assembly' => 'unix assembly', + 'Uno' => 'uno', + 'UnrealScript' => 'unrealscript', + 'UrWeb' => 'urweb', + 'V' => 'v', + 'Vala' => 'vala', + 'VBA' => 'vba', + 'VBScript' => 'vbscript', + 'VCL' => 'vcl', + 'Verilog' => 'verilog', + 'VHDL' => 'vhdl', + 'Vim script' => 'vim script', + 'Vim Snippet' => 'vim snippet', + 'Visual Basic .NET' => 'visual basic .net', + 'Visual Basic .NET' => 'visual basic .net', + 'Volt' => 'volt', + 'Vue' => 'vue', + 'Wavefront Material' => 'wavefront material', + 'Wavefront Object' => 'wavefront object', + 'wdl' => 'wdl', + 'Web Ontology Language' => 'web ontology language', + 'WebAssembly' => 'webassembly', + 'WebIDL' => 'webidl', + 'WebVTT' => 'webvtt', + 'Wget Config' => 'wget config', + 'Windows Registry Entries' => 'windows registry entries', + 'wisp' => 'wisp', + 'Wollok' => 'wollok', + 'World of Warcraft Addon Data' => 'world of warcraft addon data', + 'X BitMap' => 'x bitmap', + 'X Font Directory Index' => 'x font directory index', + 'X PixMap' => 'x pixmap', + 'X10' => 'x10', + 'xBase' => 'xbase', + 'XC' => 'xc', + 'XCompose' => 'xcompose', + 'XML' => 'xml', + 'XML Property List' => 'xml property list', + 'Xojo' => 'xojo', + 'XPages' => 'xpages', + 'XProc' => 'xproc', + 'XQuery' => 'xquery', + 'XS' => 'xs', + 'XSLT' => 'xslt', + 'Xtend' => 'xtend', + 'Yacc' => 'yacc', + 'YAML' => 'yaml', + 'YANG' => 'yang', + 'YARA' => 'yara', + 'YASnippet' => 'yasnippet', + 'ZAP' => 'zap', + 'Zeek' => 'zeek', + 'ZenScript' => 'zenscript', + 'Zephir' => 'zephir', + 'Zig' => 'zig', + 'ZIL' => 'zil', + 'Zimpl' => 'zimpl', + ), + 'defaultValue' => 'All languages' ) ), + 'global' => array( 'date_range' => array( 'name' => 'Date range', @@ -29,24 +596,23 @@ class GithubTrendingBridge extends BridgeAbstract { ); + public function collectData(){ $params = array('since' => urlencode($this->getInput('date_range'))); $url = self::URI . '/' . $this->getInput('language') . '?' . http_build_query($params); - $html = getSimpleHTMLDOM($url) - or returnServerError('Error while downloading the website content'); - + $this->items = []; foreach($html->find('.Box-row') as $element) { $item = array(); // URI - $item['uri'] = substr(self::URI, 0, -1) . $element->find('h1 a', 0)->href; + $item['uri'] = self::URI_ITEM . $element->find('h1 a', 0)->href; // Title $item['title'] = str_replace(' ', '', trim(strip_tags($element->find('h1 a', 0)->plaintext))); // Description - $item['description'] = trim(strip_tags($element->find('p.text-gray', 0)->innertext)); + $item['content'] = trim(strip_tags($element->find('p.text-gray', 0)->innertext)); // Time $item['timestamp'] = time(); From 7b73f3217f1239a1f5784d261e13a9c49e3db73b Mon Sep 17 00:00:00 2001 From: Kirill Kotikov Date: Sat, 21 Mar 2020 05:01:45 +0300 Subject: [PATCH 038/192] Fix page request --- bridges/GithubTrendingBridge.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bridges/GithubTrendingBridge.php b/bridges/GithubTrendingBridge.php index 96a04aa9..534caa10 100644 --- a/bridges/GithubTrendingBridge.php +++ b/bridges/GithubTrendingBridge.php @@ -601,6 +601,9 @@ class GithubTrendingBridge extends BridgeAbstract { $params = array('since' => urlencode($this->getInput('date_range'))); $url = self::URI . '/' . $this->getInput('language') . '?' . http_build_query($params); + $html = getSimpleHTMLDOM($url) + or returnServerError('Error while downloading the website content'); + $this->items = []; foreach($html->find('.Box-row') as $element) { $item = array(); From ec7ef8f5026681ca0624992aad2fa4d6715ca003 Mon Sep 17 00:00:00 2001 From: Kirill Kotikov Date: Sat, 21 Mar 2020 05:07:38 +0300 Subject: [PATCH 039/192] Update GithubTrendingBridge.php --- bridges/GithubTrendingBridge.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bridges/GithubTrendingBridge.php b/bridges/GithubTrendingBridge.php index 534caa10..24295754 100644 --- a/bridges/GithubTrendingBridge.php +++ b/bridges/GithubTrendingBridge.php @@ -596,7 +596,6 @@ class GithubTrendingBridge extends BridgeAbstract { ); - public function collectData(){ $params = array('since' => urlencode($this->getInput('date_range'))); $url = self::URI . '/' . $this->getInput('language') . '?' . http_build_query($params); @@ -604,7 +603,7 @@ class GithubTrendingBridge extends BridgeAbstract { $html = getSimpleHTMLDOM($url) or returnServerError('Error while downloading the website content'); - $this->items = []; + $this->items = array(); foreach($html->find('.Box-row') as $element) { $item = array(); From 3ec32bb6c2bb2e41c76d9d755794dde0ec9e4075 Mon Sep 17 00:00:00 2001 From: Kirill Kotikov Date: Sun, 22 Mar 2020 21:43:37 +0300 Subject: [PATCH 040/192] Fix title if language not set --- bridges/GithubTrendingBridge.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bridges/GithubTrendingBridge.php b/bridges/GithubTrendingBridge.php index 24295754..0854946a 100644 --- a/bridges/GithubTrendingBridge.php +++ b/bridges/GithubTrendingBridge.php @@ -625,7 +625,9 @@ class GithubTrendingBridge extends BridgeAbstract { } public function getName(){ - if(!is_null($this->getInput('language'))) { + if($this->getInput('language') == '') { + return self::NAME . ' - all'; + } elseif (!is_null($this->getInput('language'))) { return self::NAME . ' - ' . $this->getInput('language'); } From 0fd702103051d4e9a2fd1c800eac0bfc7a068f6d Mon Sep 17 00:00:00 2001 From: Kirill Kotikov Date: Mon, 23 Mar 2020 10:35:02 +0300 Subject: [PATCH 041/192] Change cache time to 24hr (daily update time) --- bridges/GithubTrendingBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/GithubTrendingBridge.php b/bridges/GithubTrendingBridge.php index 0854946a..85c683d2 100644 --- a/bridges/GithubTrendingBridge.php +++ b/bridges/GithubTrendingBridge.php @@ -5,7 +5,7 @@ class GithubTrendingBridge extends BridgeAbstract { const NAME = 'Github Trending'; const URI = 'https://github.com/trending'; const URI_ITEM = 'https://github.com'; - const CACHE_TIMEOUT = 43200; // 12hr + const CACHE_TIMEOUT = 86400; // 24hr (minimum since daily) const DESCRIPTION = 'See what the GitHub community is most excited repos.'; const PARAMETERS = array( 'By language' => array( From f2de5aecc729e70f8d72a49f4ca0c5da033cfff7 Mon Sep 17 00:00:00 2001 From: Kirill Kotikov Date: Tue, 24 Mar 2020 19:07:23 +0300 Subject: [PATCH 042/192] Change feed title --- bridges/GithubTrendingBridge.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bridges/GithubTrendingBridge.php b/bridges/GithubTrendingBridge.php index 85c683d2..7290c855 100644 --- a/bridges/GithubTrendingBridge.php +++ b/bridges/GithubTrendingBridge.php @@ -5,7 +5,7 @@ class GithubTrendingBridge extends BridgeAbstract { const NAME = 'Github Trending'; const URI = 'https://github.com/trending'; const URI_ITEM = 'https://github.com'; - const CACHE_TIMEOUT = 86400; // 24hr (minimum since daily) + const CACHE_TIMEOUT = 43200; // 12hr const DESCRIPTION = 'See what the GitHub community is most excited repos.'; const PARAMETERS = array( 'By language' => array( @@ -626,9 +626,9 @@ class GithubTrendingBridge extends BridgeAbstract { public function getName(){ if($this->getInput('language') == '') { - return self::NAME . ' - all'; + return self::NAME . ': All'; } elseif (!is_null($this->getInput('language'))) { - return self::NAME . ' - ' . $this->getInput('language'); + return self::NAME . ': ' . ucfirst($this->getInput('language')); } return parent::getName(); From e0ac9972eeb8ba5886b2f9656bcbc6639a427b41 Mon Sep 17 00:00:00 2001 From: somini Date: Wed, 25 Mar 2020 19:02:09 +0000 Subject: [PATCH 043/192] [MediapartBlogsBridge]: New Bridge Fix #1468 --- bridges/MediapartBlogsBridge.php | 49 ++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 bridges/MediapartBlogsBridge.php diff --git a/bridges/MediapartBlogsBridge.php b/bridges/MediapartBlogsBridge.php new file mode 100644 index 00000000..be46b0e5 --- /dev/null +++ b/bridges/MediapartBlogsBridge.php @@ -0,0 +1,49 @@ + array( + 'name' => 'Blog Slug', + 'type' => 'text', + 'title' => 'Blog user name', + 'exampleValue' => 'jean-vincot', + ) + ) + ); + + public function getIcon() { + return 'https://static.mediapart.fr/favicon/favicon-club.ico?v=2'; + } + + public function collectData() { + $html = getSimpleHTMLDOM(self::BASE_URI . '/' . $this->getInput('slug') . '/blog') + or returnServerError('Could not load content'); + + foreach($html->find('ul.post-list li') as $element) { + $item = array(); + + $item_title = $element->find('h3.title a', 0); + $item_divs = $element->find('div'); + + $item['title'] = $item_title->innertext; + $item['uri'] = self::BASE_URI . trim($item_title->href); + $item['author'] = $element->find('.author .subscriber', 0)->innertext; + $item['content'] = $item_divs[count($item_divs) - 2] . $item_divs[count($item_divs) - 1]; + $item['timestamp'] = strtotime($element->find('.author time', 0)->datetime); + + $this->items[] = $item; + } + } + + public function getName() { + if ($this->getInput('slug')) { + return self::NAME . ' | ' . $this->getInput('slug'); + } + return parent::getName(); + } + +} From 00dd81a8aadad1ec94e955255abc2dab151222da Mon Sep 17 00:00:00 2001 From: ORelio Date: Wed, 25 Mar 2020 20:40:17 +0100 Subject: [PATCH 044/192] [DarkReading] Hide dummy articles --- bridges/DarkReadingBridge.php | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bridges/DarkReadingBridge.php b/bridges/DarkReadingBridge.php index 3baaad75..6ab83e9c 100644 --- a/bridges/DarkReadingBridge.php +++ b/bridges/DarkReadingBridge.php @@ -53,6 +53,8 @@ class DarkReadingBridge extends FeedExpander { protected function parseItem($newsItem){ $item = parent::parseItem($newsItem); + if (empty($item['content'])) + return null; //ignore dummy articles $article = getSimpleHTMLDOMCached($item['uri']) or returnServerError('Could not request Dark Reading: ' . $item['uri']); $item['content'] = $this->extractArticleContent($article); From fbfc82b0b7118a9c986da34a4a048b19b4344335 Mon Sep 17 00:00:00 2001 From: Kirill Kotikov Date: Thu, 26 Mar 2020 21:37:19 +0300 Subject: [PATCH 045/192] Revert feed title --- bridges/GithubTrendingBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/GithubTrendingBridge.php b/bridges/GithubTrendingBridge.php index 7290c855..0b4a9078 100644 --- a/bridges/GithubTrendingBridge.php +++ b/bridges/GithubTrendingBridge.php @@ -626,9 +626,9 @@ class GithubTrendingBridge extends BridgeAbstract { public function getName(){ if($this->getInput('language') == '') { - return self::NAME . ': All'; + return self::NAME . ': all'; } elseif (!is_null($this->getInput('language'))) { - return self::NAME . ': ' . ucfirst($this->getInput('language')); + return self::NAME . ': ' . $this->getInput('language'); } return parent::getName(); From 066e42e99acb475158c148a91c3a2e2ad4fe49e4 Mon Sep 17 00:00:00 2001 From: somini Date: Tue, 31 Mar 2020 02:32:15 +0100 Subject: [PATCH 046/192] [FolhaDeSaoPauloBridge]: Improve HTML --- bridges/FolhaDeSaoPauloBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/FolhaDeSaoPauloBridge.php b/bridges/FolhaDeSaoPauloBridge.php index acd8d259..e4fc0e44 100644 --- a/bridges/FolhaDeSaoPauloBridge.php +++ b/bridges/FolhaDeSaoPauloBridge.php @@ -26,7 +26,7 @@ class FolhaDeSaoPauloBridge extends FeedExpander { $item_content = $articleHTMLContent->find('div.c-news__body', 0); if ($item_content) { $text = $item_content->innertext; - $text = strip_tags($text, '

'); + $text = strip_tags($text, '

'); $item['content'] = $text; } } else { From 223337d62d76946789b48f8c6424dd59e1f0b9f2 Mon Sep 17 00:00:00 2001 From: somini Date: Tue, 31 Mar 2020 02:34:38 +0100 Subject: [PATCH 047/192] [FolhaDeSaoPauloBridge]: Improve URL Remove the redirection. --- bridges/FolhaDeSaoPauloBridge.php | 1 + 1 file changed, 1 insertion(+) diff --git a/bridges/FolhaDeSaoPauloBridge.php b/bridges/FolhaDeSaoPauloBridge.php index e4fc0e44..9a9717cb 100644 --- a/bridges/FolhaDeSaoPauloBridge.php +++ b/bridges/FolhaDeSaoPauloBridge.php @@ -28,6 +28,7 @@ class FolhaDeSaoPauloBridge extends FeedExpander { $text = $item_content->innertext; $text = strip_tags($text, '

'); $item['content'] = $text; + $item['uri'] = explode('*', $item['uri'])[1]; } } else { Debug::log('???: ' . $item['uri']); From cccd390b0f4e6ab4639d5ff9f5aa21c2f5c7c14b Mon Sep 17 00:00:00 2001 From: Dreckiger-Dan Date: Tue, 31 Mar 2020 23:47:57 +0200 Subject: [PATCH 048/192] [HeiseBridge] add TechStage support --- bridges/HeiseBridge.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bridges/HeiseBridge.php b/bridges/HeiseBridge.php index 1d9d8025..0401a48b 100644 --- a/bridges/HeiseBridge.php +++ b/bridges/HeiseBridge.php @@ -62,6 +62,9 @@ class HeiseBridge extends FeedExpander { $content = $article->find('div[class*="article-content"]', 0); + if ($content == null) + $content = $article->find('#article_content', 0); + foreach($content->find('p, h3, ul, table, pre, img') as $element) { $item['content'] .= $element; } From 604d527ac7e4db216028e89a37acf182e58fa410 Mon Sep 17 00:00:00 2001 From: Christian Schabesberger Date: Tue, 31 Mar 2020 21:14:16 +0200 Subject: [PATCH 049/192] add nordbayern bridge fix intending --- bridges/NordbayernBridge.php | 127 +++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 bridges/NordbayernBridge.php diff --git a/bridges/NordbayernBridge.php b/bridges/NordbayernBridge.php new file mode 100644 index 00000000..a7ae72e3 --- /dev/null +++ b/bridges/NordbayernBridge.php @@ -0,0 +1,127 @@ + array( + 'name' => 'region', + 'type' => 'list', + 'exampleValue' => 'Nürnberg', + 'title' => 'Select a region', + 'values' => array( + 'Nürnberg' => 'nuernberg', + 'Fürth' => 'fuerth', + 'Altdorf' => 'altdorf', + 'Ansbach' => 'ansbach', + 'Bad Windsheim' => 'bad-windsheim', + 'Bamberg' => 'bamberg', + 'Dinkelsbühl/Feuchtwangen' => 'dinkelsbuehl-feuchtwangen', + 'Feucht' => 'feucht', + 'Forchheim' => 'forchheim', + 'Gunzenhausen' => 'gunzenhausen', + 'Hersbruck' => 'hersbruck', + 'Herzogenaurach' => 'herzogenaurach', + 'Hilpolstein' => 'holpolstein', + 'Höchstadt' => 'hoechstadt', + 'Lauf' => 'lauf', + 'Neumarkt' => 'neumarkt', + 'Neustadt/Aisch' => 'neustadt-aisch', + 'Pegnitz' => 'pegnitz', + 'Roth' => 'roth', + 'Rothenburg o.d.T.' => 'rothenburg-o-d-t', + 'Schwabach' => 'schwabach', + 'Treuchtlingen' => 'treuchtlingen', + 'Weißenburg' => 'weissenburg' + ) + ), + 'policeReports' => array( + 'name' => 'Police Reports', + 'type' => 'checkbox', + 'exampleValue' => 'checked', + 'title' => 'Read Police Reports', + ) + )); + + private function getImageUrlFromScript($script) { + preg_match("#src=\\\\'(https:[-:\\.\\\\/a-zA-Z0-9_]*\\.jpg)#", $script->innertext, $matches, PREG_OFFSET_CAPTURE); + if(isset($matches[1][0])) { + return stripcslashes($matches[1][0]) . '?w=800'; + } else { + return null; + } + } + + private function handleArticle($link) { + $item = array(); + $article = getSimpleHTMLDOM($link); + $content = $article->find('div[class*=article-content]', 0); + $item['uri'] = $link; + $item['title'] = $article->find('h1', 0)->innertext; + $item['content'] = ''; + + //first get image from block/modul + $figure = $article->find('figure[class*=panorama]', 0); + if($figure !== null) { + $imgUrl = self::getImageUrlFromScript($figure->find('script', 0)); + if($imgUrl === null) { + $imgUrl = self::getImageUrlFromScript($figure->find('script', 1)); + } + $item['content'] .= ''; + } + + // get regular paragraphs + foreach($content->children() as $child) { + if($child->tag === 'p') { + $item['content'] .= $child; + } + } + + //get image divs + foreach($content->find('div[class*=article-slideshow]') as $slides) { + foreach($slides->children() as $child) { + switch($child->tag) { + case 'p': + $item['content'] .= $child; + break; + case 'h5': + $item['content'] .= '
' . $child->plaintext . '
'; + break; + case 'a': + $url = self::getImageUrlFromScript($child->find('script', 0)); + $item['content'] .= ''; + break; + } + } + } + $this->items[] = $item; + $article->clear(); + } + + private function handleNewsblock($listSite, $readPoliceReports) { + $newsBlocks = $listSite->find('section[class*=newsblock]'); + $policeBlock = $newsBlocks[0]; + $regionalNewsBlock = $newsBlocks[1]; + if($readPoliceReports === true) { + foreach($policeBlock->find('h2') as $headline) { + self::handleArticle(self::URI . $headline->find('a', 0)->href); + } + } + foreach($regionalNewsBlock->find('h2') as $headline) { + self::handleArticle(self::URI . $headline->find('a', 0)->href); + } + } + + public function collectData() { + $item = array(); + $region = $this->getInput('region'); + $listSite = getSimpleHTMLDOM(self::URI . '/region/' . $region); + + self::handleNewsblock($listSite, $this->getInput('policeReports')); + } +} From 119f4bdec58d2fe2099fe87505e1fdca6ce129de Mon Sep 17 00:00:00 2001 From: Eugene Molotov Date: Sun, 10 May 2020 17:35:21 +0500 Subject: [PATCH 050/192] [MediapartBlogsBridge] Lint --- bridges/MediapartBlogsBridge.php | 1 - 1 file changed, 1 deletion(-) diff --git a/bridges/MediapartBlogsBridge.php b/bridges/MediapartBlogsBridge.php index be46b0e5..40ae1f90 100644 --- a/bridges/MediapartBlogsBridge.php +++ b/bridges/MediapartBlogsBridge.php @@ -45,5 +45,4 @@ class MediapartBlogsBridge extends BridgeAbstract { } return parent::getName(); } - } From ca88096f1fdfce91724e8dd6b936c53d64277ca5 Mon Sep 17 00:00:00 2001 From: Paroleen <48787191+Paroleen@users.noreply.github.com> Date: Sun, 17 May 2020 19:58:19 +0200 Subject: [PATCH 051/192] [AwwwardsBridge] New bridge (#1524) [AwwwardsBridge] New bridge (#1524) --- bridges/AwwwardsBridge.php | 55 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 bridges/AwwwardsBridge.php diff --git a/bridges/AwwwardsBridge.php b/bridges/AwwwardsBridge.php new file mode 100644 index 00000000..c1d1d320 --- /dev/null +++ b/bridges/AwwwardsBridge.php @@ -0,0 +1,55 @@ +find('li[data-model]') as $site) { + $decode = html_entity_decode($site->attr['data-model'], + ENT_QUOTES, 'utf-8'); + $decode = json_decode($decode, true); + $this->sites[] = $decode; + } + } + + public function collectData() { + $this->fetchSites(); + + Debug::log('Building RSS feed'); + foreach($this->sites as $site) { + $item = array(); + $item['title'] = $site['title']; + $item['timestamp'] = $site['createdAt']; + $item['categories'] = $site['tags']; + + $item['content'] = ''; + $item['uri'] = self::SITEURI . $site['slug']; + + $this->items[] = $item; + + if(count($this->items) >= 10) + break; + } + } +} From f48909b84ed1210d8f30c5b8ca0927327700c1b8 Mon Sep 17 00:00:00 2001 From: Joseph Date: Sun, 17 May 2020 18:00:52 +0000 Subject: [PATCH 052/192] [ASRockNewsBridge] Add Bridge (#1526) * [ASRockNewsBridge] Add Bridge --- bridges/ASRockNewsBridge.php | 57 ++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 bridges/ASRockNewsBridge.php diff --git a/bridges/ASRockNewsBridge.php b/bridges/ASRockNewsBridge.php new file mode 100644 index 00000000..1f3f4dda --- /dev/null +++ b/bridges/ASRockNewsBridge.php @@ -0,0 +1,57 @@ +find('div.inner > a') as $index => $a) { + $item = array(); + + $articlePath = $a->href; + + $articlePageHtml = getSimpleHTMLDOMCached($articlePath, self::CACHE_TIMEOUT) + or returnServerError('Could not request: ' . $articlePath); + + $articlePageHtml = defaultLinkTo($articlePageHtml, self::URI); + + $contents = $articlePageHtml->find('div.Contents', 0); + + $item['uri'] = $articlePath; + $item['title'] = $contents->find('h5', 0)->innertext; + + $contents->find('h5', 0)->outertext = ''; + + $item['content'] = $contents->innertext; + $item['timestamp'] = $this->extractDate($a->plaintext); + $item['enclosures'][] = $a->find('img', 0)->src; + $this->items[] = $item; + + if (count($this->items) >= 10) { + break; + } + } + } + + private function extractDate($text) { + $dateRegex = '/^([0-9]{4}\/[0-9]{1,2}\/[0-9]{1,2})/'; + + $text = trim($text); + + if (preg_match($dateRegex, $text, $matches)) { + return $matches[1]; + } + + return ''; + } +} From 63a4db7e86501b4af6fa64a732101709ee5738e2 Mon Sep 17 00:00:00 2001 From: Joseph Date: Sun, 17 May 2020 18:04:37 +0000 Subject: [PATCH 053/192] [DownDetectorBridge] Fix bridge (#1528) --- bridges/DownDetectorBridge.php | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/bridges/DownDetectorBridge.php b/bridges/DownDetectorBridge.php index 4aef3728..bfbce699 100644 --- a/bridges/DownDetectorBridge.php +++ b/bridges/DownDetectorBridge.php @@ -6125,9 +6125,16 @@ class DownDetectorBridge extends BridgeAbstract { $table = $html->find('table.table-striped', 0); $maxCount = 10; - foreach ($table->find('tr') as $downEvent) { - $downLink = $downEvent->find('td', 1)->find('a', 1); - $item = $this->collectArticleData($downLink->getAttribute('href')); + foreach ($table->find('tr') as $event) { + $td = $event->find('td', 0); + + if (is_null($td)) { + continue; + } + + $link = $event->find('td', 0)->find('a', 0); + + $item = $this->collectArticleData($link->getAttribute('href')); $this->items[] = $item; if($maxCount == 0) break; $maxCount -= 1; From f4affe18337372973255e3f04c30be607683404c Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Sun, 17 May 2020 20:05:04 +0200 Subject: [PATCH 054/192] [AuoJMBridge] Follow Website change (#1527) * [AuoJMBridge] Follow Website change --- bridges/AutoJMBridge.php | 139 ++++++++++++++------------------------- 1 file changed, 49 insertions(+), 90 deletions(-) diff --git a/bridges/AutoJMBridge.php b/bridges/AutoJMBridge.php index 25fb2cb8..b9825ca4 100644 --- a/bridges/AutoJMBridge.php +++ b/bridges/AutoJMBridge.php @@ -77,110 +77,69 @@ class AutoJMBridge extends BridgeAbstract { $model_url = self::URI . $this->getInput('url'); - // Get the session cookies and the form token - $this->getInitialParameters($model_url); + // Build the GET data + $get_data = 'form[energy]=' . $this->getInput('energy') . + '&form[transmission]=' . $this->getInput('transmission') . + '&form[priceMin]=' . $this->getInput('priceMin') . + '&form[priceMin]=' . $this->getInput('priceMin'); - // Build the form - $post_data = array( - 'form[energy]' => $this->getInput('energy'), - 'form[transmission]' => $this->getInput('transmission'), - 'form[priceMin]' => $this->getInput('priceMin'), - 'form[priceMin]' => $this->getInput('priceMin'), - 'form[_token]' => $this->token - ); - - // Set the Form request content type + // Set the header 'X-Requested-With' like the website does it $header = array( - 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8', - ); - - // Set the curl options (POST query and content, and session cookies - $curl_opts = array( - CURLOPT_POST => true, - CURLOPT_POSTFIELDS => http_build_query($post_data), - CURLOPT_COOKIE => $this->cookies + 'X-Requested-With: XMLHttpRequest' ); // Get the JSON content of the form - $json = getContents($model_url, $header, $curl_opts) + $json = getContents($model_url . '?' . $get_data, $header) or returnServerError('Could not request AutoJM.'); // Extract the HTML content from the JSON result $data = json_decode($json); - $html = str_get_html($data->content); + $html = str_get_html($data->results); - // Go through every finisha of the model - $list = $html->find('h3'); - foreach ($list as $finish) { - $finish_name = $finish->plaintext; - $motorizations = $finish->next_sibling()->find('li'); - foreach ($motorizations as $element) { - $image = $element->find('div[class=block-product-image]', 0)->{'data-ga-banner'}; - $serie = $element->find('span[class=model]', 0)->plaintext; - $url = self::URI . substr($element->find('a', 0)->href, 1); - if ($element->find('span[class*=block-product-nbModel]', 0) != null) { - $availability = 'En Stock'; - } else { - $availability = 'Sur commande'; - } - $discount_html = $element->find('span[class*=tag--promo]', 0); - if ($discount_html != null) { - $discount = $discount_html->plaintext; - } else { - $discount = 'inconnue'; - } - $price = $element->find('span[class=price red h1]', 0)->plaintext; - $item = array(); - $item['title'] = $finish_name . ' ' . $serie; - $item['content'] = '

' - . $finish_name . ' ' . $serie . '

'; - $item['content'] .= '
  • Disponibilité : ' . $availability . '
  • '; - $item['content'] .= '
  • Série : ' . $serie . '
  • '; - $item['content'] .= '
  • Remise : ' . $discount . '
  • '; - $item['content'] .= '
  • Prix : ' . $price . '
'; + // Go through every car of the model + $list = $html->find('div[class=car-card]'); + foreach ($list as $car) { - // Add a fictionnal anchor to the RSS element URL, based on the item content ; - // As the URL could be identical even if the price change, some RSS reader will not show those offers as new items - $item['uri'] = $url . '#' . md5($item['content']); - - $this->items[] = $item; + // Get the Finish name if this car is the first of a new finish + $prev_tag = $car->prev_sibling(); + if($prev_tag->tag == 'div' && $prev_tag->class == 'results-title') { + $finish_name = $prev_tag->plaintext; } - } - } - /** - * Gets the session cookie and the form token - * - * @param string $pageURL The URL from which to get the values - */ - private function getInitialParameters($pageURL) { - $ch = curl_init(); - curl_setopt($ch, CURLOPT_URL, $pageURL); - curl_setopt($ch, CURLOPT_HEADER, true); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - $data = curl_exec($ch); - - // Separate the response header and the content - $headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE); - $header = substr($data, 0, $headerSize); - $content = substr($data, $headerSize); - curl_close($ch); - - // Extract the cookies from the headers - $cookies = ''; - $http_response_header = explode("\r\n", $header); - foreach ($http_response_header as $hdr) { - if (strpos($hdr, 'Set-Cookie') !== false) { - $cLine = explode(':', $hdr)[1]; - $cLine = explode(';', $cLine)[0]; - $cookies .= ';' . $cLine; + // Get the info about the car offer + $image = $car->find('div[class=car-card__visual]', 0)->find('img', 0)->src; + $serie = $car->find('div[class=car-card__title]', 0)->plaintext; + $url = $car->find('a', 0)->href; + // Check if the car model is in stock or available only on order + if($car->find('span[class*=tag--dispo]', 0) != null) { + $availability = 'En Stock'; + } else { + $availability = 'Sur commande'; } - } - $this->cookies = trim(substr($cookies, 1)); + $discount_html = $car->find('span[class=promo]', 0); + // Check if there is any discount dsiplayed + if ($discount_html != null) { + $discount = $discount_html->plaintext; + } else { + $discount = 'inconnue'; + } + $price = $car->find('span[class=price]', 0)->plaintext; - // Get the token from the content - $html = str_get_html($content); - $token = $html->find('input[type=hidden][id=form__token]', 0); - $this->token = $token->value; + // Construct the new item + $item = array(); + $item['title'] = $finish_name . ' ' . $serie; + $item['content'] = '

' + . $finish_name . ' ' . $serie . '

'; + $item['content'] .= '
  • Disponibilité : ' . $availability . '
  • '; + $item['content'] .= '
  • Série : ' . $serie . '
  • '; + $item['content'] .= '
  • Remise : ' . $discount . '
  • '; + $item['content'] .= '
  • Prix : ' . $price . '
'; + + // Add a fictionnal anchor to the RSS element URL, based on the item content ; + // As the URL could be identical even if the price change, some RSS reader will not show those offers as new items + $item['uri'] = $url . '#' . md5($item['content']); + + $this->items[] = $item; + } } } From 868d3f600d85e0c3bcc96e63d208751564793b35 Mon Sep 17 00:00:00 2001 From: Eugene Molotov Date: Sun, 17 May 2020 23:21:37 +0500 Subject: [PATCH 055/192] [VkBridge] Fix one letter bug on titles (#1555) --- bridges/VkBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/VkBridge.php b/bridges/VkBridge.php index ea81a2b2..2e03790b 100644 --- a/bridges/VkBridge.php +++ b/bridges/VkBridge.php @@ -355,7 +355,7 @@ class VkBridge extends BridgeAbstract private function getTitle($content) { - preg_match('/^["\w\ \p{Cyrillic}\(\)\?#«»-]+/mu', htmlspecialchars_decode($content), $result); + preg_match('/^["\w\ \p{L}\(\)\?#«»-]+/mu', htmlspecialchars_decode($content), $result); if (count($result) == 0) return 'untitled'; return $result[0]; } From 36fc4822dd3824518911b7e7e6c0d7c6c849ca03 Mon Sep 17 00:00:00 2001 From: Paroleen <48787191+Paroleen@users.noreply.github.com> Date: Sun, 17 May 2020 20:22:04 +0200 Subject: [PATCH 056/192] [UnraidCommunityApplicationsBridge] Add new bridge (#1534) * [UnraidCommunityApplicationsBridge] Add new bridge --- bridges/UnraidCommunityApplicationsBridge.php | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 bridges/UnraidCommunityApplicationsBridge.php diff --git a/bridges/UnraidCommunityApplicationsBridge.php b/bridges/UnraidCommunityApplicationsBridge.php new file mode 100644 index 00000000..1ab06e3c --- /dev/null +++ b/bridges/UnraidCommunityApplicationsBridge.php @@ -0,0 +1,71 @@ +apps = getContents(self::APPSURI) + or returnServerError('Could not fetch JSON for apps.'); + $this->apps = json_decode($this->apps, true)['applist']; + } + + private function sortApps() { + Debug::log('Sorting applications/plugins'); + usort($this->apps, function($app1, $app2) { + return $app1['FirstSeen'] < $app2['FirstSeen'] ? 1 : -1; + }); + } + + public function collectData() { + $this->fetchApps(); + $this->sortApps(); + + Debug::log('Building RSS feed'); + foreach($this->apps as $app) { + if(!array_key_exists('Language', $app)) { + $item = array(); + $item['title'] = $app['Name']; + $item['timestamp'] = $app['FirstSeen']; + $item['author'] = explode('\'', $app['Repo'])[0]; + $item['categories'] = explode(' ', $app['Category']); + $item['content'] = ''; + + if(array_key_exists('Icon', $app)) + $item['content'] .= ''; + + if(array_key_exists('Overview', $app)) + $item['content'] .= '

' + . $app['Overview'] + . '

'; + + if(array_key_exists('Project', $app)) + $item['uri'] = $app['Project']; + + if(array_key_exists('Registry', $app)) + $item['content'] .= '
Docker Hub'; + + if(array_key_exists('Support', $app)) + $item['content'] .= '
Support'; + + $this->items[] = $item; + + if(count($this->items) >= 15) + break; + } + } + } +} From 71745116e1a92dbe383a2938fae307608b125621 Mon Sep 17 00:00:00 2001 From: Joseph Date: Sun, 17 May 2020 18:33:01 +0000 Subject: [PATCH 057/192] [MozillaBugTrackerBridge] Fix bridge (#1550) * [MozillaBugTrackerBridge] Fix bridge --- bridges/MozillaBugTrackerBridge.php | 59 +++++++++++++---------------- 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/bridges/MozillaBugTrackerBridge.php b/bridges/MozillaBugTrackerBridge.php index 356bedcf..439e148d 100644 --- a/bridges/MozillaBugTrackerBridge.php +++ b/bridges/MozillaBugTrackerBridge.php @@ -61,43 +61,44 @@ class MozillaBugTrackerBridge extends BridgeAbstract { if($html === false) returnServerError('Failed to load page!'); + // Fix relative URLs + defaultLinkTo($html, self::URI); + // Store header information into private members - $this->bugid = $html->find('#bugzilla-body', 0)->find('a', 0)->innertext; - $this->bugdesc = $html->find('table.bugfields', 0)->find('tr', 0)->find('td', 0)->innertext; + $this->bugid = $html->find('#field-value-bug_id', 0)->plaintext; + $this->bugdesc = $html->find('h1#field-value-short_desc', 0)->plaintext; // Get and limit comments - $comments = $html->find('.bz_comment_table div.bz_comment'); + $comments = $html->find('div.change-set'); if($limit > 0 && count($comments) > $limit) { $comments = array_slice($comments, count($comments) - $limit, $limit); } - // Order comments - switch($sorting) { - case 'lf': $comments = array_reverse($comments, true); - case 'of': - default: // Nothing to do, keep original order + if ($sorting === 'lf') { + $comments = array_reverse($comments, true); } foreach($comments as $comment) { $comment = $this->inlineStyles($comment); $item = array(); - $item['uri'] = $this->getURI() . '#' . $comment->id; - $item['author'] = $comment->find('span.bz_comment_user', 0)->innertext; - $item['title'] = $comment->find('span.bz_comment_number', 0)->find('a', 0)->innertext; - $item['timestamp'] = strtotime($comment->find('span.bz_comment_time', 0)->innertext); - $item['content'] = $comment->find('pre.bz_comment_text', 0)->innertext; + $item['uri'] = $comment->find('h3.change-name', 0)->find('a', 0)->href; + $item['author'] = $comment->find('td.change-author', 0)->plaintext; + $item['title'] = $comment->find('h3.change-name', 0)->plaintext; + $item['timestamp'] = strtotime($comment->find('span.rel-time', 0)->title); + $item['content'] = ''; - // Fix line breaks (they use LF) - $item['content'] = str_replace("\n", '
', $item['content']); + if ($comment->find('.comment-text', 0)) { + $item['content'] = $comment->find('.comment-text', 0)->outertext; + } - // Fix relative URIs - $item['content'] = $this->replaceRelativeURI($item['content']); + if ($comment->find('div.activity', 0)) { + $item['content'] .= $comment->find('div.activity', 0)->innertext; + } $this->items[] = $item; } - } public function getURI(){ @@ -114,9 +115,8 @@ class MozillaBugTrackerBridge extends BridgeAbstract { public function getName(){ switch($this->queriedContext) { case 'Bug comments': - return 'Bug ' - . $this->bugid - . ' tracker for ' + return $this->bugid + . ' - ' . $this->bugdesc . ' - ' . parent::getName(); @@ -125,17 +125,6 @@ class MozillaBugTrackerBridge extends BridgeAbstract { } } - /** - * Replaces all relative URIs with absolute ones - * - * @param string $content The source string - * @return string Returns the source string with all relative URIs replaced - * by absolute ones. - */ - private function replaceRelativeURI($content){ - return preg_replace('/href="(?!http)/', 'href="' . self::URI . '/', $content); - } - /** * Adds styles as attributes to tags with known classes * @@ -144,10 +133,14 @@ class MozillaBugTrackerBridge extends BridgeAbstract { * attributes. */ private function inlineStyles($html){ - foreach($html->find('.bz_obsolete') as $element) { + foreach($html->find('.bz_closed') as $element) { $element->style = 'text-decoration:line-through;'; } + foreach($html->find('pre') as $element) { + $element->style = 'white-space: pre-wrap;'; + } + return $html; } } From 8233497611075216b12d0dd4ef4ed1a1d254f69e Mon Sep 17 00:00:00 2001 From: Fanch Date: Sun, 17 May 2020 20:33:14 +0200 Subject: [PATCH 058/192] [AirBreizhBridge] Add new bridge (#1544) * [AirBreizhBridge] Add new bridge --- bridges/AirBreizhBridge.php | 54 +++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 bridges/AirBreizhBridge.php diff --git a/bridges/AirBreizhBridge.php b/bridges/AirBreizhBridge.php new file mode 100644 index 00000000..2d852da5 --- /dev/null +++ b/bridges/AirBreizhBridge.php @@ -0,0 +1,54 @@ + array( + 'theme' => array( + 'name' => 'Thematique', + 'type' => 'list', + 'values' => array( + 'Tout' => '', + 'Rapport d\'activite' => 'rapport-dactivite', + 'Etude' => 'etudes', + 'Information' => 'information', + 'Autres documents' => 'autres-documents', + 'Plan Régional de Surveillance de la qualité de l’air' => 'prsqa', + 'Transport' => 'transport' + ) + ) + ) + ); + + public function getIcon() { + return 'https://www.airbreizh.asso.fr/voy_content/uploads/2017/11/favicon.png'; + } + + public function collectData(){ + $html = ''; + $html = getSimpleHTMLDOM(static::URI . 'publications/?fwp_publications_thematiques=' . $this->getInput('theme')) + or returnClientError('No results for this query.'); + + foreach ($html->find('article') as $article) { + $item = array(); + // Title + $item['title'] = $article->find('h2', 0)->plaintext; + // Author + $item['author'] = 'Air Breizh'; + // Image + $imagelink = $article->find('.card__image', 0)->find('img', 0)->getAttribute('src'); + // Content preview + $item['content'] = ' +
' + . $article->find('.card__text', 0)->plaintext; + // URL + $item['uri'] = $article->find('.publi__buttons', 0)->find('a', 0)->getAttribute('href'); + // ID + $item['id'] = $article->find('.publi__buttons', 0)->find('a', 0)->getAttribute('href'); + $this->items[] = $item; + } + } +} From fa74d3728be0592fd4b549294b1df89e1328b416 Mon Sep 17 00:00:00 2001 From: Joseph Date: Sun, 17 May 2020 18:35:34 +0000 Subject: [PATCH 059/192] [GizmodoBridge] Fix bridge (#1538) * [GizmodoBridge] Update bridge --- bridges/GizmodoBridge.php | 84 +++++++++++++++++++++++++++++---------- 1 file changed, 64 insertions(+), 20 deletions(-) diff --git a/bridges/GizmodoBridge.php b/bridges/GizmodoBridge.php index 35f162b8..4b924a22 100644 --- a/bridges/GizmodoBridge.php +++ b/bridges/GizmodoBridge.php @@ -3,34 +3,78 @@ class GizmodoBridge extends FeedExpander { const MAINTAINER = 'polopollo'; const NAME = 'Gizmodo'; - const URI = 'http://gizmodo.com/'; + const URI = 'https://gizmodo.com'; const CACHE_TIMEOUT = 1800; // 30min - const DESCRIPTION = 'Returns the newest posts from Gizmodo (full text).'; + const DESCRIPTION = 'Returns the newest posts from Gizmodo.'; - protected function parseItem($item){ + protected function parseItem($item) { $item = parent::parseItem($item); - $articleHTMLContent = getSimpleHTMLDOMCached($item['uri']); - if(!$articleHTMLContent) { - $text = 'Could not load ' . $item['uri']; - } else { - $text = $articleHTMLContent->find('div.entry-content', 0)->innertext; - foreach($articleHTMLContent->find('pagespeed_iframe') as $element) { - $text .= '

link to a iframe (could be a video): ' - . $element->src - . '


'; - } + $html = getSimpleHTMLDOMCached($item['uri']) + or returnServerError('Could not request: ' . $item['uri']); - $text = strip_tags($text, '

'); - } + $html = defaultLinkTo($html, $this->getURI()); + $this->stripTags($html); + $this->handleFigureTags($html); + $this->handleIframeTags($html); + + // Get header image + $image = $html->find('meta[property="og:image"]', 0)->content; + + $item['content'] = $html->find('div.js_post-content', 0)->innertext; + + // Get categories + $categories = explode(',', $html->find('meta[name="keywords"]', 0)->content); + $item['categories'] = array_map('trim', $categories); + + $item['enclosures'][] = $html->find('meta[property="og:image"]', 0)->content; - $item['content'] = $text; return $item; } - public function collectData(){ - $this->collectExpandableDatas('http://feeds.gawker.com/gizmodo/full'); + public function collectData() { + $this->collectExpandableDatas(self::URI . '/rss', 20); + } + + private function stripTags($html) { + foreach ($html->find('aside') as $aside) { + $aside->outertext = ''; + } + + foreach ($html->find('div.ad-unit') as $div) { + $div->outertext = ''; + } + + foreach ($html->find('script') as $script) { + $script->outertext = ''; + } + } + + private function handleFigureTags($html) { + foreach ($html->find('figure') as $index => $figure) { + + if (isset($figure->attr['data-id'])) { + $id = $figure->attr['data-id']; + $format = $figure->attr['data-format']; + + } else { + $img = $figure->find('img', 0); + $id = $img->attr['data-chomp-id']; + $format = $img->attr['data-format']; + $figure->find('div.img-permalink-sub-wrapper', 0)->style = ''; + } + + $imageUrl = 'https://i.kinja-img.com/gawker-media/image/upload/' . $id . '.' . $format; + + $figure->find('span', 0)->outertext = << +EOD; + } + } + + private function handleIframeTags($html) { + foreach($html->find('iframe') as $iframe) { + $iframe->src = urljoin($this->getURI(), $iframe->src); + } } } From 8047041963cfaa10c574deabf4df3feb8e7e84af Mon Sep 17 00:00:00 2001 From: Sandro Date: Tue, 19 May 2020 10:00:12 +0200 Subject: [PATCH 060/192] [Core] Include Media RSS namespace for Atom feeds Include Media RSS namespace for Atom feeds Fix #1511 Fix #1499 --- formats/AtomFormat.php | 2 +- tests/samples/expectedAtomFormat/feed.common.xml | 2 +- tests/samples/expectedAtomFormat/feed.empty.xml | 2 +- tests/samples/expectedAtomFormat/feed.emptyItems.xml | 2 +- tests/samples/expectedAtomFormat/feed.microblog.xml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/formats/AtomFormat.php b/formats/AtomFormat.php index c1bde25f..80a388df 100644 --- a/formats/AtomFormat.php +++ b/formats/AtomFormat.php @@ -130,7 +130,7 @@ EOD; /* Data are prepared, now let's begin the "MAGIE !!!" */ $toReturn = << - + {$title} {$feedUrl} diff --git a/tests/samples/expectedAtomFormat/feed.common.xml b/tests/samples/expectedAtomFormat/feed.common.xml index 80cb0df4..aa6d0687 100644 --- a/tests/samples/expectedAtomFormat/feed.common.xml +++ b/tests/samples/expectedAtomFormat/feed.common.xml @@ -1,5 +1,5 @@ - + Sample feed with common data https://example.com/feed?type=common&items=4 diff --git a/tests/samples/expectedAtomFormat/feed.empty.xml b/tests/samples/expectedAtomFormat/feed.empty.xml index 371135b6..fc04304d 100644 --- a/tests/samples/expectedAtomFormat/feed.empty.xml +++ b/tests/samples/expectedAtomFormat/feed.empty.xml @@ -1,5 +1,5 @@ - + Sample feed with minimum data https://example.com/feed diff --git a/tests/samples/expectedAtomFormat/feed.emptyItems.xml b/tests/samples/expectedAtomFormat/feed.emptyItems.xml index 462a4e5c..989893fa 100644 --- a/tests/samples/expectedAtomFormat/feed.emptyItems.xml +++ b/tests/samples/expectedAtomFormat/feed.emptyItems.xml @@ -1,5 +1,5 @@ - + Sample feed with minimum data https://example.com/feed diff --git a/tests/samples/expectedAtomFormat/feed.microblog.xml b/tests/samples/expectedAtomFormat/feed.microblog.xml index a6264aee..32bc0273 100644 --- a/tests/samples/expectedAtomFormat/feed.microblog.xml +++ b/tests/samples/expectedAtomFormat/feed.microblog.xml @@ -1,5 +1,5 @@ - + Sample microblog feed https://example.com/feed From 9a66227a796bfe9dfe0e213a9378c5a7af47b284 Mon Sep 17 00:00:00 2001 From: Damien Calesse <2787828+kranack@users.noreply.github.com> Date: Wed, 20 May 2020 21:52:37 +0200 Subject: [PATCH 061/192] [SensCritique] Fix search display (#1567) - Remove movies search. It appears the website changed their movies displays and data cannot be easily extracted for now. - Fix some errors on items without proper description and/or original title. --- bridges/SensCritiqueBridge.php | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/bridges/SensCritiqueBridge.php b/bridges/SensCritiqueBridge.php index 7ac35f2c..9126c316 100644 --- a/bridges/SensCritiqueBridge.php +++ b/bridges/SensCritiqueBridge.php @@ -3,15 +3,11 @@ class SensCritiqueBridge extends BridgeAbstract { const MAINTAINER = 'kranack'; const NAME = 'Sens Critique'; - const URI = 'http://www.senscritique.com/'; + const URI = 'https://www.senscritique.com/'; const CACHE_TIMEOUT = 21600; // 6h const DESCRIPTION = 'Sens Critique news'; const PARAMETERS = array( array( - 'm' => array( - 'name' => 'Movies', - 'type' => 'checkbox' - ), 's' => array( 'name' => 'Series', 'type' => 'checkbox' @@ -40,8 +36,6 @@ class SensCritiqueBridge extends BridgeAbstract { if($this->getInput($category)) { $uri = self::URI; switch($category) { - case 'm': $uri .= 'films/cette-semaine'; - break; case 's': $uri .= 'series/actualite'; break; case 'g': $uri .= 'jeuxvideo/actualite'; @@ -77,20 +71,25 @@ class SensCritiqueBridge extends BridgeAbstract { . ' ' . $movie->find('.elco-date', 0)->plaintext; - $item['content'] = '' - . $movie->find('.elco-original-title', 0)->plaintext - . '

' - . $movie->find('.elco-baseline', 0)->plaintext + $item['content'] = ''; + $originalTitle = $movie->find('.elco-original-title', 0); + $description = $movie->find('.elco-description', 0); + + if ($originalTitle) { + $item['content'] = '' . $originalTitle->plaintext . '

'; + } + + $item['content'] .= $movie->find('.elco-baseline', 0)->plaintext . '
' . $movie->find('.elco-baseline', 1)->plaintext . '

' - . $movie->find('.elco-description', 0)->plaintext + . ($description ? $description->plaintext : '') . '

' . trim($movie->find('.erra-ratings .erra-global', 0)->plaintext) . ' / 10'; - $item['id'] = $this->getURI() . $movie->find('.elco-title a', 0)->href; - $item['uri'] = $this->getURI() . $movie->find('.elco-title a', 0)->href; + $item['id'] = $this->getURI() . ltrim($movie->find('.elco-title a', 0)->href, '/'); + $item['uri'] = $this->getURI() . ltrim($movie->find('.elco-title a', 0)->href, '/'); $this->items[] = $item; } } From 25f0d3b877deff3243860d10ca0c1cf46102bd9d Mon Sep 17 00:00:00 2001 From: floviolleau Date: Wed, 27 May 2020 23:04:03 +0200 Subject: [PATCH 062/192] [TheCodingLoveBridge] Fix not loading items (#1577) --- bridges/TheCodingLoveBridge.php | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/bridges/TheCodingLoveBridge.php b/bridges/TheCodingLoveBridge.php index 8060c947..54fd0d27 100644 --- a/bridges/TheCodingLoveBridge.php +++ b/bridges/TheCodingLoveBridge.php @@ -11,14 +11,14 @@ class TheCodingLoveBridge extends BridgeAbstract { $html = getSimpleHTMLDOM(self::URI) or returnServerError('Could not request The Coding Love.'); - foreach($html->find('div.post') as $element) { + foreach($html->find('article.blog-post') as $element) { $item = array(); - $temp = $element->find('h3 a', 0); + $temp = $element->find('h1 a', 0); - $titre = $temp->innertext; + $title = $temp->innertext; $url = $temp->href; - $temp = $element->find('div.bodytype', 0); + $temp = $element->find('div.blog-post-content', 0); // retrieve .gif instead of static .jpg $images = $temp->find('p.e img'); @@ -28,17 +28,13 @@ class TheCodingLoveBridge extends BridgeAbstract { } $content = $temp->innertext; - $auteur = $temp->find('i', 0); - $pos = strpos($auteur->innertext, 'by'); - - if($pos > 0) { - $auteur = trim(str_replace('*/', '', substr($auteur->innertext, ($pos + 2)))); - $item['author'] = $auteur; - } + $temp = $element->find('div.post-meta-info', 0); + $author = $temp->find('span', 0); + $item['author'] = $author->innertext; $item['content'] .= trim($content); $item['uri'] = $url; - $item['title'] = trim($titre); + $item['title'] = trim($title); $this->items[] = $item; } From a1dd98ff82b4ecf5fd32975a58a06a219ad6611b Mon Sep 17 00:00:00 2001 From: floviolleau Date: Wed, 27 May 2020 23:04:43 +0200 Subject: [PATCH 063/192] [LesJoiesDuCodeBridge] Fix items not loading --- bridges/LesJoiesDuCodeBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/LesJoiesDuCodeBridge.php b/bridges/LesJoiesDuCodeBridge.php index 0957d921..c79b1114 100644 --- a/bridges/LesJoiesDuCodeBridge.php +++ b/bridges/LesJoiesDuCodeBridge.php @@ -11,7 +11,7 @@ class LesJoiesDuCodeBridge extends BridgeAbstract { $html = getSimpleHTMLDOM(self::URI) or returnServerError('Could not request LesJoiesDuCode.'); - foreach($html->find('div.blog-post') as $element) { + foreach($html->find('article.blog-post') as $element) { $item = array(); $temp = $element->find('h1 a', 0); $titre = html_entity_decode($temp->innertext); From c4422bdbb553bc3ff89a7fdfa3ceabe7cd5a49c6 Mon Sep 17 00:00:00 2001 From: Michael Bemmerl Date: Wed, 27 May 2020 21:08:06 +0000 Subject: [PATCH 064/192] [Core] Fix notice of undefined offset when in detached HEAD state. (#1569) --- lib/Configuration.php | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/Configuration.php b/lib/Configuration.php index 76a34aff..d46a6091 100644 --- a/lib/Configuration.php +++ b/lib/Configuration.php @@ -244,9 +244,13 @@ final class Configuration { if(@is_readable($headFile)) { $revisionHashFile = '.git/' . substr(file_get_contents($headFile), 5, -1); - $branchName = explode('/', $revisionHashFile)[3]; - if(file_exists($revisionHashFile)) { - return 'git.' . $branchName . '.' . substr(file_get_contents($revisionHashFile), 0, 7); + $parts = explode('/', $revisionHashFile); + + if(isset($parts[3])) { + $branchName = $parts[3]; + if(file_exists($revisionHashFile)) { + return 'git.' . $branchName . '.' . substr(file_get_contents($revisionHashFile), 0, 7); + } } } From 06891ae35f0947d8f2a8daa0094b25e2db862226 Mon Sep 17 00:00:00 2001 From: Lyra Date: Fri, 5 Jun 2020 10:17:53 +0200 Subject: [PATCH 065/192] [TwitterBridge] Fix the bridge using a brand new API --- bridges/TwitterBridge.php | 327 ++++++++++++++------------------------ cache/pages/.gitkeep | 0 cache/server/.gitkeep | 0 3 files changed, 121 insertions(+), 206 deletions(-) delete mode 100644 cache/pages/.gitkeep delete mode 100644 cache/server/.gitkeep diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index 0d8b0243..6372def6 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -2,6 +2,7 @@ class TwitterBridge extends BridgeAbstract { const NAME = 'Twitter Bridge'; const URI = 'https://twitter.com/'; + const API_URI = 'https://api.twitter.com'; const CACHE_TIMEOUT = 300; // 5min const DESCRIPTION = 'returns tweets'; const MAINTAINER = 'pmaziere'; @@ -168,6 +169,27 @@ EOD } } + public function getApiURI() { + switch($this->queriedContext) { + case 'By keyword or hashtag': + return self::API_URI + . '/2/search/adaptive.json?q=' + . urlencode($this->getInput('q')) + . '&tweet_mode=extended'; + case 'By username': + return self::API_URI + . '/2/timeline/profile/' + . $this->getRestId($this->getInput('u')) + . '.json?tweet_mode=extended'; + case 'By list': + return self::API_URI + . '/2/timeline/list.json?list_id=' + . $this->getListId($this->getInput('user'), $this->getInput('list')) + . '&tweet_mode=extended'; + default: returnServerError('Invalid query context !'); + } + } + public function collectData(){ $html = ''; $page = $this->getURI(); @@ -176,14 +198,9 @@ EOD 'User-Agent: Mozilla/5.0 (Windows NT 9.0; WOW64; Trident/7.0; rv:11.0) like Gecko' ); - if(php_sapi_name() === 'cli' && empty(ini_get('curl.cainfo'))) { - $cookies = $this->getCookies($page); - $html = getSimpleHTMLDOM($page, array_merge($header, array("Cookie: $cookies"))); - } else { - $html = getSimpleHTMLDOM($page, $header, array(CURLOPT_COOKIEFILE => '')); - } + $data = json_decode($this->getApiContents($this->getApiURI())); - if(!$html) { + if(!$data) { switch($this->queriedContext) { case 'By keyword or hashtag': returnServerError('No results for this query.'); @@ -196,75 +213,33 @@ EOD $hidePictures = $this->getInput('nopic'); - foreach($html->find('div.js-stream-tweet') as $tweet) { + foreach($data->globalObjects->tweets as $tweet) { // Skip retweets? if($this->getInput('noretweet') - && $tweet->find('div.context span.js-retweet-text a', 0)) { - continue; - } - - // remove 'invisible' content - foreach($tweet->find('.invisible') as $invisible) { - $invisible->outertext = ''; - } - - // Skip protmoted tweets - $heading = $tweet->previousSibling(); - if(!is_null($heading) && - $heading->getAttribute('class') === 'promoted-tweet-heading' - ) { + && isset($tweet->retweeted_status_id_str)) { continue; } $item = array(); // extract username and sanitize - $item['username'] = htmlspecialchars_decode($tweet->getAttribute('data-screen-name'), ENT_QUOTES); - // extract fullname (pseudonym) - $item['fullname'] = htmlspecialchars_decode($tweet->getAttribute('data-name'), ENT_QUOTES); - // get author + $user_info = $this->getUserInformation($tweet->user_id_str, $data->globalObjects); + + $item['username'] = $user_info->name; + $item['fullname'] = $user_info->screen_name; $item['author'] = $item['fullname'] . ' (@' . $item['username'] . ')'; - if($rt = $tweet->find('div.context span.js-retweet-text a', 0)) { - $item['author'] .= ' RT: @' . $rt->plaintext; - } - // get avatar link - $item['avatar'] = $tweet->find('img', 0)->src; - // get TweetID - $item['id'] = $tweet->getAttribute('data-tweet-id'); - // get tweet link - $item['uri'] = self::URI . substr($tweet->find('a.js-permalink', 0)->getAttribute('href'), 1); + $item['avatar'] = $user_info->profile_image_url_https; + + $item['id'] = $tweet->id_str; + $item['uri'] = self::URI . $tweet->user_id_str . '/status/' . $item['id']; // extract tweet timestamp - $item['timestamp'] = $tweet->find('span.js-short-timestamp', 0)->getAttribute('data-time'); + $item['timestamp'] = $tweet->created_at; + // generate the title - $item['title'] = strip_tags($this->fixAnchorSpacing(htmlspecialchars_decode( - $tweet->find('p.js-tweet-text', 0), ENT_QUOTES), '
')); + $item['title'] = $tweet->full_text; + $cleanedTweet = $tweet->full_text; - switch($this->queriedContext) { - case 'By list': - // Check if filter applies to list (using raw content) - if($this->getInput('filter')) { - if(stripos($tweet->find('p.js-tweet-text', 0)->plaintext, $this->getInput('filter')) === false) { - continue 2; // switch + for-loop! - } - } - break; - default: - } - - $this->processContentLinks($tweet); - $this->processEmojis($tweet); - - // get tweet text - $cleanedTweet = str_replace( - 'href="/', - 'href="' . self::URI, - $tweet->find('p.js-tweet-text', 0)->innertext - ); - - // fix anchors missing spaces in-between - $cleanedTweet = $this->fixAnchorSpacing($cleanedTweet); - - // Add picture to content + // Add avatar $picture_html = ''; if(!$hidePictures) { $picture_html = <<getImageURI($tweet); - if(!$this->getInput('noimg') && !is_null($images)) { - - foreach ($images as $image) { - - // Set image scaling - $image_orig = $this->getInput('noimgscaling') ? $image : $image . ':orig'; - $image_thumb = $this->getInput('noimgscaling') ? $image : $image . ':thumb'; - + if(isset($tweet->extended_entities->media) && !$this->getInput('noimg')) { + foreach($tweet->extended_entities->media as $media) { + $image = $media->media_url_https; + $display_image = $media->display_url; // add enclosures - $item['enclosures'][] = $image_orig; + $item['enclosures'][] = $image; $image_html .= << + + src="{$display_image}" /> EOD; } } - // add content + switch($this->queriedContext) { + case 'By list': + // Check if filter applies to list (using raw content) + if($this->getInput('filter')) { + if(stripos($cleanedTweet, $this->getInput('filter')) === false) { + continue 2; // switch + for-loop! + } + } + break; + default: + } + $item['content'] = << {$picture_html} @@ -315,151 +296,85 @@ EOD; EOD; - // add quoted tweet - $quotedTweet = $tweet->find('div.QuoteTweet', 0); - if($quotedTweet) { - // get tweet text - $cleanedQuotedTweet = str_replace( - 'href="/', - 'href="' . self::URI, - $quotedTweet->find('div.tweet-text', 0)->innertext - ); - - $this->processContentLinks($quotedTweet); - $this->processEmojis($quotedTweet); - - // Add embeded image to content - $quotedImage_html = ''; - $quotedImages = $this->getQuotedImageURI($tweet); - - if(!$this->getInput('noimg') && !is_null($quotedImages)) { - - foreach ($quotedImages as $image) { - - // Set image scaling - $image_orig = $this->getInput('noimgscaling') ? $image : $image . ':orig'; - $image_thumb = $this->getInput('noimgscaling') ? $image : $image . ':thumb'; - - // add enclosures - $item['enclosures'][] = $image_orig; - - $quotedImage_html .= << - - -EOD; - } - } - - $item['content'] = << -
-
{$cleanedQuotedTweet}
-
-
-
{$quotedImage_html}
-
-EOD; - } $item['content'] = htmlspecialchars_decode($item['content'], ENT_QUOTES); // put out $this->items[] = $item; } + + usort($this->items, array('TwitterBridge', 'compareTweetId')); } - private function processEmojis($tweet){ - // process emojis (reduce size) - foreach($tweet->find('img.Emoji') as $img) { - $img->style .= ' height: 1em;'; + private static function compareTweetId($tweet1, $tweet2) { + return (intval($tweet1['id']) < intval($tweet2['id']) ? 1 : -1); + } + + //The aim of this function is to get an API key and a guest token + //This function takes 2 requests, and therefore is cached + private function getApiKey() { + + $cacheFac = new CacheFactory(); + $cacheFac->setWorkingDir(PATH_LIB_CACHES); + $cache = $cacheFac->create(Configuration::getConfig('cache', 'type')); + $cache->setScope(get_called_class()); + $cache->setKey(array('api_key')); + $data = $cache->loadData(); + + if($data === null) { + $twitterPage = getContents('https://twitter.com'); + $jsMainRegex = '/(https:\/\/abs\.twimg\.com\/responsive-web\/web\/main\.[^\.]+\.js)/m'; + preg_match_all($jsMainRegex, $twitterPage, $jsMainMatches, PREG_SET_ORDER, 0); + $jsLink = $jsMainMatches[0][0]; + $guestTokenRegex = '/gt=([0-9]*)/m'; + preg_match_all($guestTokenRegex, $twitterPage, $guestTokenMatches, PREG_SET_ORDER, 0); + $guestToken = $guestTokenMatches[0][1]; + + $jsContent = getContents($jsLink); + $apiKeyRegex = '/([a-zA-Z0-9]{59}%[a-zA-Z0-9]{44})/m'; + preg_match_all($apiKeyRegex, $jsContent, $apiKeyMatches, PREG_SET_ORDER, 0); + $apiKey = $apiKeyMatches[0][0]; + $cache->saveData(array($apiKey, $guestToken)); + return array($apiKey, $guestToken); } + + return $data; + } - private function processContentLinks($tweet){ - // processing content links - foreach($tweet->find('a') as $link) { - if($link->hasAttribute('data-expanded-url')) { - $link->href = $link->getAttribute('data-expanded-url'); - } - $link->removeAttribute('data-expanded-url'); - $link->removeAttribute('data-query-source'); - $link->removeAttribute('rel'); - $link->removeAttribute('class'); - $link->removeAttribute('target'); - $link->removeAttribute('title'); - } + private function getApiContents($uri) { + $apiKeys = $this->getApiKey(); + $headers = array('authorization: Bearer ' . $apiKeys[0], + 'x-guest-token: ' . $apiKeys[1], + ); + return getContents($uri, $headers); } - private function fixAnchorSpacing($content){ - // fix anchors missing spaces in-between - return str_replace( - 'find('img', 0)) { - foreach ($container->find('img') as $img) { - $images[] = $img->src; + private function getUserInformation($userId, $apiData) { + foreach($apiData->users as $user) { + if($user->id_str == $userId) { + return $user; } } - - if (!empty($images)) { - return $images; - } - - return null; } - private function getQuotedImageURI($tweet){ - // Find media in tweet - $images = array(); - - $container = $tweet->find('div.QuoteMedia-container', 0); - - if($container && $container->find('img', 0)) { - foreach ($container->find('img') as $img) { - $images[] = $img->src; - } - } - - if (!empty($images)) { - return $images; - } - - return null; - } - - private function getCookies($pageURL){ - - $ctx = stream_context_create(array( - 'http' => array( - 'follow_location' => false - ) - ) - ); - $a = file_get_contents($pageURL, 0, $ctx); - - //First request to get the cookie - $cookies = ''; - foreach($http_response_header as $hdr) { - if(stripos($hdr, 'Set-Cookie') !== false) { - $cLine = explode(':', $hdr)[1]; - $cLine = explode(';', $cLine)[0]; - $cookies .= ';' . $cLine; - } - } - - return substr($cookies, 2); - } } diff --git a/cache/pages/.gitkeep b/cache/pages/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/cache/server/.gitkeep b/cache/server/.gitkeep deleted file mode 100644 index e69de29b..00000000 From 124631df736eda882c2cebcfab1381122fb8828c Mon Sep 17 00:00:00 2001 From: Lyra Date: Mon, 8 Jun 2020 11:18:24 +0200 Subject: [PATCH 066/192] [TwitterBridge] Fix caching policy, usernames as well as images --- bridges/TwitterBridge.php | 53 ++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index 6372def6..6ba1626a 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -3,6 +3,7 @@ class TwitterBridge extends BridgeAbstract { const NAME = 'Twitter Bridge'; const URI = 'https://twitter.com/'; const API_URI = 'https://api.twitter.com'; + const GUEST_TOKEN_USES = 100; const CACHE_TIMEOUT = 300; // 5min const DESCRIPTION = 'returns tweets'; const MAINTAINER = 'pmaziere'; @@ -169,7 +170,7 @@ EOD } } - public function getApiURI() { + private function getApiURI() { switch($this->queriedContext) { case 'By keyword or hashtag': return self::API_URI @@ -225,8 +226,8 @@ EOD // extract username and sanitize $user_info = $this->getUserInformation($tweet->user_id_str, $data->globalObjects); - $item['username'] = $user_info->name; - $item['fullname'] = $user_info->screen_name; + $item['username'] = $user_info->screen_name; + $item['fullname'] = $user_info->name; $item['author'] = $item['fullname'] . ' (@' . $item['username'] . ')'; $item['avatar'] = $user_info->profile_image_url_https; @@ -258,7 +259,7 @@ EOD; if(isset($tweet->extended_entities->media) && !$this->getInput('noimg')) { foreach($tweet->extended_entities->media as $media) { $image = $media->media_url_https; - $display_image = $media->display_url; + $display_image = $media->media_url; // add enclosures $item['enclosures'][] = $image; @@ -266,6 +267,7 @@ EOD; EOD; @@ -320,25 +322,53 @@ EOD; $cache->setKey(array('api_key')); $data = $cache->loadData(); - if($data === null) { + $apiKey = null; + if($data === null || !is_array($data) || count($data) != 1) { $twitterPage = getContents('https://twitter.com'); + $jsMainRegex = '/(https:\/\/abs\.twimg\.com\/responsive-web\/web\/main\.[^\.]+\.js)/m'; preg_match_all($jsMainRegex, $twitterPage, $jsMainMatches, PREG_SET_ORDER, 0); $jsLink = $jsMainMatches[0][0]; - $guestTokenRegex = '/gt=([0-9]*)/m'; - preg_match_all($guestTokenRegex, $twitterPage, $guestTokenMatches, PREG_SET_ORDER, 0); - $guestToken = $guestTokenMatches[0][1]; $jsContent = getContents($jsLink); $apiKeyRegex = '/([a-zA-Z0-9]{59}%[a-zA-Z0-9]{44})/m'; preg_match_all($apiKeyRegex, $jsContent, $apiKeyMatches, PREG_SET_ORDER, 0); $apiKey = $apiKeyMatches[0][0]; - $cache->saveData(array($apiKey, $guestToken)); - return array($apiKey, $guestToken); + $cache->saveData($apiKey); + } else { + $apiKey = $data; } - return $data; + $cacheFac2 = new CacheFactory(); + $cacheFac2->setWorkingDir(PATH_LIB_CACHES); + $gt_cache = $cacheFac->create(Configuration::getConfig('cache', 'type')); + $gt_cache->setScope(get_called_class()); + $gt_cache->setKey(array('guest_token')); + $guestTokenUses = $gt_cache->loadData(); + $guestToken = null; + if($guestTokenUses === null || !is_array($guestTokenUses) || count($guestTokenUses) != 2 || $guestTokenUses[0] <= 0) { + $guestToken = $this->getGuestToken(); + $gt_cache->saveData(array(self::GUEST_TOKEN_USES, $guestToken)); + } else { + $guestTokenUses[0] -= 1; + $gt_cache->saveData($guestTokenUses); + $guestToken = $guestTokenUses[1]; + } + + return array($apiKey, $guestToken); + + } + + // Get a guest token. This is different to an API key, + // and it seems to change more regularly than the API key. + private function getGuestToken() { + $pageContent = getContents('https://twitter.com'); + + $guestTokenRegex = '/gt=([0-9]*)/m'; + preg_match_all($guestTokenRegex, $pageContent, $guestTokenMatches, PREG_SET_ORDER, 0); + $guestToken = $guestTokenMatches[0][1]; + return $guestToken; } private function getApiContents($uri) { @@ -376,5 +406,4 @@ EOD; } } } - } From e4c4ae82452814237a65939827e3b94792fb6af3 Mon Sep 17 00:00:00 2001 From: Eugene Molotov Date: Mon, 8 Jun 2020 14:27:19 +0500 Subject: [PATCH 067/192] [MemcachedCache] loadData now returns null instead of false (#1592) FileCache and SQLiteCache returns null on cache miss. This is important if using strict comparing (for example when using "===") --- caches/MemcachedCache.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/caches/MemcachedCache.php b/caches/MemcachedCache.php index f69f10b0..b431279a 100644 --- a/caches/MemcachedCache.php +++ b/caches/MemcachedCache.php @@ -40,7 +40,7 @@ class MemcachedCache implements CacheInterface { if ($this->data) return $this->data; $result = $this->conn->get($this->getCacheKey()); if ($result === false) { - return false; + return null; } $this->time = $result['time']; From 98ff5a095cc6cb60d43843e916cedc93dc3a74bf Mon Sep 17 00:00:00 2001 From: Park0 Date: Tue, 9 Jun 2020 20:21:34 +0200 Subject: [PATCH 068/192] [Marktplaats] New Bridge (#1575) --- bridges/MarktplaatsBridge.php | 112 ++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 bridges/MarktplaatsBridge.php diff --git a/bridges/MarktplaatsBridge.php b/bridges/MarktplaatsBridge.php new file mode 100644 index 00000000..ca88af2b --- /dev/null +++ b/bridges/MarktplaatsBridge.php @@ -0,0 +1,112 @@ + array( + 'q' => array( + 'name' => 'query', + 'type' => 'text', + 'required' => true, + 'title' => 'The search string for marktplaats', + ), + 'z' => array( + 'name' => 'zipcode', + 'type' => 'text', + 'required' => false, + 'title' => 'Zip code for location limited searches', + ), + 'd' => array( + 'name' => 'distance', + 'type' => 'number', + 'required' => false, + 'title' => 'The distance in meters from the zipcode', + ), + 'f' => array( + 'name' => 'priceFrom', + 'type' => 'number', + 'required' => false, + 'title' => 'The minimal price in cents', + ), + 't' => array( + 'name' => 'priceTo', + 'type' => 'number', + 'required' => false, + 'title' => 'The maximal price in cents', + ), + 's' => array( + 'name' => 'showGlobal', + 'type' => 'checkbox', + 'required' => false, + 'title' => 'Include result with negative distance', + ), + 'i' => array( + 'name' => 'includeImage', + 'type' => 'checkbox', + 'required' => false, + 'title' => 'Include the image at the end of the content', + ), + 'r' => array( + 'name' => 'includeRaw', + 'type' => 'checkbox', + 'required' => false, + 'title' => 'Include the raw data behind the content', + ) + ) + ); + const CACHE_TIMEOUT = 900; + + public function collectData() { + $query = ''; + $excludeGlobal = false; + if(!is_null($this->getInput('z')) && !is_null($this->getInput('d'))) { + $query = '&postcode=' . $this->getInput('z') . '&distanceMeters=' . $this->getInput('d'); + } + if(!is_null($this->getInput('f'))) { + $query .= '&PriceCentsFrom=' . $this->getInput('f'); + } + if(!is_null($this->getInput('t'))) { + $query .= '&PriceCentsTo=' . $this->getInput('t'); + } + if(!is_null($this->getInput('s'))) { + if(!$this->getInput('s')) { + $excludeGlobal = true; + } + } + $url = 'https://www.marktplaats.nl/lrp/api/search?query=' . urlencode($this->getInput('q')) . $query; + $jsonString = getSimpleHTMLDOM($url, 900) or returnServerError('No contents received!'); + $jsonObj = json_decode($jsonString); + foreach($jsonObj->listings as $listing) { + if(!$excludeGlobal || $listing->location->distanceMeters >= 0) { + $item = array(); + $item['uri'] = 'https://marktplaats.nl' . $listing->vipUrl; + $item['title'] = $listing->title; + $item['timestamp'] = $listing->date; + $item['author'] = $listing->sellerInformation->sellerName; + $item['content'] = $listing->description; + $item['enclosures'] = $listing->imageUrls; + $item['categories'] = $listing->verticals; + $item['uid'] = $listing->itemId; + if(!is_null($this->getInput('i')) && !empty($listing->imageUrls)) { + if($this->getInput('i')) { + if(is_array($listing->imageUrls)) { + foreach($listing->imageUrls as $imgurl) { + $item['content'] .= "
\n"; + } + } else { + $item['content'] .= "
\n"; + } + } + } + if(!is_null($this->getInput('r'))) { + if($this->getInput('r')) { + $item['content'] .= "
\n
\n
\n" . json_encode($listing); + } + } + $this->items[] = $item; + } + } + } +} From 22a01f10937f13ebfd710e3509ccb34c20698abc Mon Sep 17 00:00:00 2001 From: somini Date: Wed, 10 Jun 2020 21:39:36 +0100 Subject: [PATCH 069/192] [Twitter] Fix Twitter bridge images and add other media types (#1595) * Keep old URI structure Use the username, not the user ID. * Fix Twitter bridge images Credit to @kinoushe See https://github.com/RSS-Bridge/rss-bridge/issues/1562#issuecomment-639393175 * Include Videos and "Animated GIF" as twit enclosures Credit to @kinoushe for digging into the API docs. https://github.com/RSS-Bridge/rss-bridge/issues/1562#issuecomment-640320688 * Calculate the highest bitrate video Include that on the enclosure. * Appease linter * Appease linter, again * Remove surrounding link from videos Add it on a smaller link besides it. See https://github.com/RSS-Bridge/rss-bridge/pull/1595#issuecomment-640989208 * Include video poster on the enclosures. --- bridges/TwitterBridge.php | 51 +++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index 6ba1626a..210f5d01 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -232,7 +232,7 @@ EOD $item['avatar'] = $user_info->profile_image_url_https; $item['id'] = $tweet->id_str; - $item['uri'] = self::URI . $tweet->user_id_str . '/status/' . $item['id']; + $item['uri'] = self::URI . $item['username'] . '/status/' . $item['id']; // extract tweet timestamp $item['timestamp'] = $tweet->created_at; @@ -255,15 +255,17 @@ EOD; } // Get images - $image_html = ''; + $media_html = ''; if(isset($tweet->extended_entities->media) && !$this->getInput('noimg')) { foreach($tweet->extended_entities->media as $media) { - $image = $media->media_url_https; - $display_image = $media->media_url; - // add enclosures - $item['enclosures'][] = $image; + switch($media->type) { + case 'photo': + $image = $media->media_url_https . '?name=orig'; + $display_image = $media->media_url_https; + // add enclosures + $item['enclosures'][] = $image; - $image_html .= << EOD; + break; + case 'video': + case 'animated_gif': + if(isset($media->video_info)) { + $link = $media->expanded_url; + $poster = $media->media_url_https; + $video = null; + $maxBitrate = -1; + foreach($media->video_info->variants as $variant) { + $bitRate = isset($variant->bitrate) ? $variant->bitrate : -100; + if ($bitRate > $maxBitrate) { + $maxBitrate = $bitRate; + $video = $variant->url; + } + } + if(!is_null($video)) { + // add enclosures + $item['enclosures'][] = $video; + $item['enclosures'][] = $poster; + + $media_html .= <<Video +
find('h4 a', 0)->plaintext, 2)[1]); - $item['author'] = explode('・', $article->find('h4 a', 0)->plaintext, 2)[0]; + $item['timestamp'] = $article->find('time', 0)->datetime; + $item['author'] = $article->find('a.crayons-story__secondary.fw-medium', 0)->plaintext; // Profile image $item['enclosures'] = array($article->find('img', 0)->src); @@ -70,7 +68,6 @@ apple-icon-5c6fa9f2bce280428589c6195b7f1924206a53b782b371cfe2d02da932c8c173.png' if($this->getInput('full')) { $fullArticle = $this->getFullArticle($item['uri']); $item['content'] = <<

{$fullArticle}

EOD; } else { @@ -80,11 +77,13 @@ EOD; EOD; } - $item['categories'] = array_map(function($e){ return $e->plaintext; }, $article->find('div.tags span.tag')); + // categories + foreach ($article->find('a.crayons-tag') as $tag) { + $item['categories'][] = str_replace('#', '', $tag->plaintext); + } $this->items[] = $item; } - } public function getName() { @@ -101,6 +100,10 @@ EOD; $html = defaultLinkTo($html, static::URI); + if ($html->find('div.crayons-article__cover', 0)) { + return $html->find('div.crayons-article__cover', 0) . $html->find('[id="article-body"]', 0); + } + return $html->find('[id="article-body"]', 0); } } From 01cc32a0cc92d8b76ac2d181f974d1f8e1f95a1d Mon Sep 17 00:00:00 2001 From: ORelio Date: Wed, 25 Mar 2020 23:28:42 +0100 Subject: [PATCH 089/192] [Markdown] Fix images with empty replacement text --- lib/html.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/html.php b/lib/html.php index 13db97a4..c56140bf 100644 --- a/lib/html.php +++ b/lib/html.php @@ -207,7 +207,7 @@ function markdownToHtml($string) { //For more details about how these regex work: // https://github.com/RSS-Bridge/rss-bridge/pull/802#discussion_r216138702 - // Images: https://regex101.com/r/JW9Evr/1 + // Images: https://regex101.com/r/JW9Evr/2 // Links: https://regex101.com/r/eRGVe7/1 // Bold: https://regex101.com/r/2p40Y0/1 // Italic: https://regex101.com/r/xJkET9/1 @@ -215,7 +215,7 @@ function markdownToHtml($string) { // Plain URL: https://regex101.com/r/2JHYwb/1 // Site name: https://regex101.com/r/qIuKYE/1 - $string = preg_replace('/\!\[([^\]]+)\]\(([^\) ]+)(?: [^\)]+)?\)/', '$1', $string); + $string = preg_replace('/\!\[([^\]]*)\]\(([^\) ]+)(?: [^\)]+)?\)/', '$1', $string); $string = preg_replace('/\[([^\]]+)\]\(([^\)]+)\)/', '$1', $string); $string = preg_replace('/\*\*(.*)\*\*/U', '$1', $string); $string = preg_replace('/\*(.*)\*/U', '$1', $string); From 90e9c9962a3d9cf7c98173b1fd4e1e48adf9bca2 Mon Sep 17 00:00:00 2001 From: ORelio Date: Wed, 25 Mar 2020 23:57:22 +0100 Subject: [PATCH 090/192] [TheHackerNews] Fix Author name cleanup --- bridges/TheHackerNewsBridge.php | 1 + 1 file changed, 1 insertion(+) diff --git a/bridges/TheHackerNewsBridge.php b/bridges/TheHackerNewsBridge.php index 687b620c..1e710b31 100644 --- a/bridges/TheHackerNewsBridge.php +++ b/bridges/TheHackerNewsBridge.php @@ -17,6 +17,7 @@ class TheHackerNewsBridge extends BridgeAbstract { $article_url = $element->find('a.story-link', 0)->href; $article_author = trim($element->find('i.icon-user', 0)->parent()->plaintext); + $article_author = str_replace('', '', $article_author); $article_title = $element->find('h2.home-title', 0)->plaintext; //Date without time From 8b173b88740924d20850ad3f39c71379fb2512f2 Mon Sep 17 00:00:00 2001 From: ORelio Date: Thu, 26 Mar 2020 23:05:19 +0100 Subject: [PATCH 091/192] [LeMondeInformatique] Remove encoding conversion Was previously needed due to actual encoding on the page being inconsistent with encoding specified in tag --- bridges/LeMondeInformatiqueBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/LeMondeInformatiqueBridge.php b/bridges/LeMondeInformatiqueBridge.php index 45aa6075..b85a9631 100644 --- a/bridges/LeMondeInformatiqueBridge.php +++ b/bridges/LeMondeInformatiqueBridge.php @@ -26,8 +26,8 @@ class LeMondeInformatiqueBridge extends FeedExpander { //No response header sets the encoding, explicit conversion is needed or subsequent xml_encode() will fail $content_node = $article_html->find('div.col-primary, div.col-sm-9', 0); - $item['content'] = utf8_encode($this->cleanArticle($content_node->innertext)); - $item['author'] = utf8_encode($article_html->find('div.author-infos', 0)->find('b', 0)->plaintext); + $item['content'] = $this->cleanArticle($content_node->innertext); + $item['author'] = $article_html->find('div.author-infos', 0)->find('b', 0)->plaintext; return $item; } From efd1abfab193e314306a19d2495632b2a26bf1f5 Mon Sep 17 00:00:00 2001 From: ORelio Date: Fri, 15 May 2020 14:05:13 +0200 Subject: [PATCH 092/192] [AnimeUltime] Remove encoding conversion Was previously needed due to encoding on the page being incorrect --- bridges/AnimeUltimeBridge.php | 1 - 1 file changed, 1 deletion(-) diff --git a/bridges/AnimeUltimeBridge.php b/bridges/AnimeUltimeBridge.php index bc1dd7bc..c83d6ddb 100644 --- a/bridges/AnimeUltimeBridge.php +++ b/bridges/AnimeUltimeBridge.php @@ -102,7 +102,6 @@ class AnimeUltimeBridge extends BridgeAbstract { $item_description = defaultLinkTo($item_description, self::URI); $item_description = str_replace("\r", '', $item_description); $item_description = str_replace("\n", '', $item_description); - $item_description = utf8_encode($item_description); //Build and add final item $item = array(); From 66a009b8fb634bade38319376d056ca5cba1c800 Mon Sep 17 00:00:00 2001 From: ORelio Date: Sat, 23 May 2020 19:20:39 +0200 Subject: [PATCH 093/192] [FuturaSciences] Fix content extraction --- bridges/FuturaSciencesBridge.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bridges/FuturaSciencesBridge.php b/bridges/FuturaSciencesBridge.php index 5859bc41..59b4734e 100644 --- a/bridges/FuturaSciencesBridge.php +++ b/bridges/FuturaSciencesBridge.php @@ -92,11 +92,12 @@ class FuturaSciencesBridge extends FeedExpander { $author = $this->extractAuthor($article); if (!empty($author)) $item['author'] = $author; + unset($article); return $item; } private function extractArticleContent($article){ - $contents = $article->find('section.article-text-classic', 0)->innertext; + $contents = $article->find('section.article-text', 1)->innertext; $headline = trim($article->find('p.description', 0)->plaintext); if(!empty($headline)) $headline = '

' . $headline . '

'; From 45e247b9d092f910a0b9ed03d8cbad1a4ed19554 Mon Sep 17 00:00:00 2001 From: ORelio Date: Sat, 23 May 2020 19:21:48 +0200 Subject: [PATCH 094/192] [FuturaSciences] Fix unneeded unset() --- bridges/FuturaSciencesBridge.php | 1 - 1 file changed, 1 deletion(-) diff --git a/bridges/FuturaSciencesBridge.php b/bridges/FuturaSciencesBridge.php index 59b4734e..79c05880 100644 --- a/bridges/FuturaSciencesBridge.php +++ b/bridges/FuturaSciencesBridge.php @@ -92,7 +92,6 @@ class FuturaSciencesBridge extends FeedExpander { $author = $this->extractAuthor($article); if (!empty($author)) $item['author'] = $author; - unset($article); return $item; } From f0e6298cab91e7c669aaa242681adb697783c49c Mon Sep 17 00:00:00 2001 From: ORelio Date: Fri, 7 Aug 2020 15:09:21 +0200 Subject: [PATCH 095/192] [GBAtemp] Fix tutorial mode URL extraction --- bridges/GBAtempBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/GBAtempBridge.php b/bridges/GBAtempBridge.php index 48a7f851..b1a86ad9 100644 --- a/bridges/GBAtempBridge.php +++ b/bridges/GBAtempBridge.php @@ -113,7 +113,7 @@ class GBAtempBridge extends BridgeAbstract { break; case 'T': foreach($html->find('li.portal-tutorial') as $tutorialItem) { - $url = self::URI . $tutorialItem->find('a', 0)->href; + $url = self::URI . $tutorialItem->find('a', 1)->href; $title = $tutorialItem->find('a', 0)->plaintext; $time = $this->findItemDate($tutorialItem); $author = $tutorialItem->find('a.username', 0)->plaintext; From c642652fea6fd7ea27eb264c88aeef3d5d65ef0e Mon Sep 17 00:00:00 2001 From: ORelio Date: Fri, 7 Aug 2020 15:19:14 +0200 Subject: [PATCH 096/192] [GBAtemp] Fix tutorial mode Title extraction --- bridges/GBAtempBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/GBAtempBridge.php b/bridges/GBAtempBridge.php index b1a86ad9..e0841950 100644 --- a/bridges/GBAtempBridge.php +++ b/bridges/GBAtempBridge.php @@ -114,7 +114,7 @@ class GBAtempBridge extends BridgeAbstract { case 'T': foreach($html->find('li.portal-tutorial') as $tutorialItem) { $url = self::URI . $tutorialItem->find('a', 1)->href; - $title = $tutorialItem->find('a', 0)->plaintext; + $title = $tutorialItem->find('a', 1)->plaintext; $time = $this->findItemDate($tutorialItem); $author = $tutorialItem->find('a.username', 0)->plaintext; $content = $this->fetchPostContent($url, self::URI); From ef54a78430d56b5b6f3f091080b22d2b6722afd7 Mon Sep 17 00:00:00 2001 From: jannyba <56595831+jannyba@users.noreply.github.com> Date: Sun, 16 Aug 2020 08:23:48 +0200 Subject: [PATCH 097/192] [InstagramBridge] Fix "Skip reviews" checkbox description (#1702) --- bridges/FacebookBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/FacebookBridge.php b/bridges/FacebookBridge.php index 13ccb27a..cb5e30f8 100644 --- a/bridges/FacebookBridge.php +++ b/bridges/FacebookBridge.php @@ -30,7 +30,7 @@ class FacebookBridge extends BridgeAbstract { 'type' => 'checkbox', 'required' => false, 'defaultValue' => false, - 'title' => 'Feed includes reviews when checked' + 'title' => 'Feed includes reviews when unchecked' ) ), 'Group' => array( From 0b1e592a5edde86b20c240953cff07f1c91c8927 Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Wed, 19 Aug 2020 14:35:19 +0200 Subject: [PATCH 098/192] [ZoneTelechargement] Update URL (#1710) The bridge now shows links to the new URL. It keeps the old one internally to bypass the Robot protection on the new URL. --- bridges/ZoneTelechargementBridge.php | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/bridges/ZoneTelechargementBridge.php b/bridges/ZoneTelechargementBridge.php index 79723fcb..f11f3b7d 100644 --- a/bridges/ZoneTelechargementBridge.php +++ b/bridges/ZoneTelechargementBridge.php @@ -8,7 +8,7 @@ class ZoneTelechargementBridge extends BridgeAbstract { */ const NAME = 'Zone Telechargement'; - const URI = 'https://www.zone-annuaire.com/'; + const URI = 'https://www.zt-za.com/'; const DESCRIPTION = 'Suivi de série sur Zone Telechargement'; const MAINTAINER = 'sysadminstory'; const PARAMETERS = array( @@ -17,18 +17,21 @@ class ZoneTelechargementBridge extends BridgeAbstract { 'name' => 'URL de la série', 'type' => 'text', 'required' => true, - 'title' => 'URL d\'une série sans le https://www.zone-annuaire.com/', + 'title' => 'URL d\'une série sans le https://www.zt-za.com/', 'exampleValue' => 'telecharger-series/31079-halt-and-catch-fire-saison-4-french-hd720p.html' ) ) ); + // This is an URL that is not protected by robot protection + const UNPROTECED_URI = 'https://www.zone-annuaire.com/'; + public function getIcon() { return self::URI . '/templates/Default/images/favicon.ico'; } public function collectData(){ - $html = getSimpleHTMLDOM(self::URI . $this->getInput('url')) + $html = getSimpleHTMLDOM(self::UNPROTECED_URI . $this->getInput('url')) or returnServerError('Could not request Zone Telechargement.'); // Get the TV show title From 73287f536be5fd9d7609f55b326edc71f6c671e7 Mon Sep 17 00:00:00 2001 From: triatic <42704418+triatic@users.noreply.github.com> Date: Thu, 20 Aug 2020 06:00:27 +0100 Subject: [PATCH 099/192] [TwitterBridge] Add retweeter to retweeted tweets (#1679) --- bridges/TwitterBridge.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index 1946acad..c946bfd8 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -225,6 +225,9 @@ EOD $item['username'] = $user_info->screen_name; $item['fullname'] = $user_info->name; $item['author'] = $item['fullname'] . ' (@' . $item['username'] . ')'; + if (null !== $this->getInput('u') && $item['username'] != $this->getInput('u')) { + $item['author'] .= ' RT: @' . $this->getInput('u'); + } $item['avatar'] = $user_info->profile_image_url_https; $item['id'] = $tweet->id_str; From 859053ef7aa4878bb1e7486acb5d9229f7feba59 Mon Sep 17 00:00:00 2001 From: Alexander Date: Thu, 20 Aug 2020 08:36:11 +0300 Subject: [PATCH 100/192] [EpicgamesBridge] New bridge (#1709) --- bridges/EpicgamesBridge.php | 80 +++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 bridges/EpicgamesBridge.php diff --git a/bridges/EpicgamesBridge.php b/bridges/EpicgamesBridge.php new file mode 100644 index 00000000..2264e70b --- /dev/null +++ b/bridges/EpicgamesBridge.php @@ -0,0 +1,80 @@ + array( + 'name' => 'Limit', + 'type' => 'list', + 'values' => array( + '5' => 5, + '10' => 10, + '15' => 15, + '20' => 20, + '25' => 25, + ), + 'title' => 'Maximum number of items to return', + 'defaultValue' => 10, + ), + 'language' => array( + 'name' => 'Language', + 'type' => 'list', + 'values' => array( + 'English' => 'en', + 'العربية' => 'ar', + 'Deutsch' => 'de', + 'Español (Spain)' => 'es-ES', + 'Español (LA)' => 'es-MX', + 'Français' => 'fr', + 'Italiano' => 'it', + '日本語' => 'ja', + '한국어' => 'ko', + 'Polski' => 'pl', + 'Português (Brasil)' => 'pt-BR', + 'Русский' => 'ru', + 'ไทย' => 'th', + 'Türkçe' => 'tr', + '简体中文' => 'zh-CN', + '繁體中文' => 'zh-Hant', + ), + 'title' => 'Language of blog posts', + 'defaultValue' => 'en', + ), + )); + + public function collectData() { + // Example: https://store-content.ak.epicgames.com/api/ru/content/blog?limit=25 + $api = 'https://store-content.ak.epicgames.com/api/'; + $url = $api . $this->getInput('language') . '/content/blog?limit=' . $this->getInput('postcount'); + + $data = getContents($url) + or returnServerError('Unable to get the news pages from epicgames.com!'); + $decodedData = json_decode($data); + + foreach($decodedData as $key => $value) { + $item = array(); + $item['uri'] = self::URI . $value->url; + $item['title'] = $value->title; + $item['timestamp'] = $value->date; + $item['author'] = 'Epic Games Store'; + if(!empty($value->author)) { + $item['author'] = $value->author; + } + if(!empty($value->content)) { + $item['content'] = defaultLinkTo($value->content, self::URI); + } + if(!empty($value->image)) { + $item['enclosures'][] = $value->image; + } + $item['uid'] = $value->_id; + $item['id'] = $value->_id; + + $this->items[] = $item; + } + } +} From 07c71b3b36242e2913eadf1da191edd59edd6048 Mon Sep 17 00:00:00 2001 From: ORelio Date: Thu, 20 Aug 2020 14:49:26 +0200 Subject: [PATCH 101/192] [NextINpact] Upgrade for NextINpact v7 (#1708) --- bridges/NextInpactBridge.php | 127 +++++++++++++++++++++++++++-------- 1 file changed, 100 insertions(+), 27 deletions(-) diff --git a/bridges/NextInpactBridge.php b/bridges/NextInpactBridge.php index c6bf2f53..879141cc 100644 --- a/bridges/NextInpactBridge.php +++ b/bridges/NextInpactBridge.php @@ -1,9 +1,10 @@ 'Feed', 'type' => 'list', 'values' => array( - 'Tous nos articles' => 'news', - 'Nos contenus en accès libre' => 'acces-libre', - 'Blog' => 'blog', - 'Bons plans' => 'bonsplans' + 'Nos actualités' => array( + 'Toutes nos publications' => 'news', + 'Toutes nos publications sauf #LeBrief' => 'nobrief', + 'Toutes nos publications sauf INpact Hardware' => 'noih', + 'Seulement les publications INpact Hardware' => 'hardware:news', + 'Seulement les publications Next INpact' => 'nobrief-noih', + 'Seulement les publications #LeBrief' => 'lebrief', + ), + 'Flux spécifiques' => array( + 'Le blog' => 'blog', + 'Les bons plans' => 'bonsplans', + 'Publications INpact Hardware en accès libre' => 'hardware:acces-libre', + 'Publications Next INpact en accès libre' => 'acces-libre', + ), + 'Flux thématiques' => array( + 'Tech' => 'category:1', + 'Logiciel' => 'category:2', + 'Internet' => 'category:3', + 'Mobilité' => 'category:4', + 'Droit' => 'category:5', + 'Économie' => 'category:6', + 'Culture numérique' => 'category:7', + 'Next INpact' => 'category:8', + ) ) ), 'filter_premium' => array( @@ -39,9 +60,27 @@ class NextInpactBridge extends FeedExpander { public function collectData(){ $feed = $this->getInput('feed'); - if (empty($feed)) + $base_uri = self::URI; + $args = ''; + + if (empty($feed)) { + // Default to All articles $feed = 'news'; - $this->collectExpandableDatas(self::URI . 'rss/' . $feed . '.xml'); + } + + if (strpos($feed, 'hardware:') === 0) { + // Feed hosted on Hardware domain + $base_uri = self::URI_HARDWARE; + $feed = str_replace('hardware:', '', $feed); + } + + if (strpos($feed, 'category:') === 0) { + // Feed with specific category parameter + $args = '?CategoryIds=' . str_replace('category:', '', $feed); + $feed = 'params'; + } + + $this->collectExpandableDatas($base_uri . 'rss/' . $feed . '.xml' . $args); } protected function parseItem($newsItem){ @@ -57,9 +96,10 @@ class NextInpactBridge extends FeedExpander { if (!is_object($html)) return 'Failed to request NextInpact: ' . $url; + // Filter premium and brief articles? foreach(array( - 'filter_premium' => 'h2.title_reserve_article', - 'filter_brief' => 'div.brief-inner-content' + 'filter_premium' => 'p.red-msg', + 'filter_brief' => 'div.brief-container' ) as $param_name => $selector) { $param_val = intval($this->getInput($param_name)); if ($param_val != 0) { @@ -71,38 +111,71 @@ class NextInpactBridge extends FeedExpander { } } - if (is_object($html->find('div[itemprop=articleBody], div.brief-inner-content', 0))) { + $article_content = $html->find('div.article-content', 0); + if (!is_object($article_content)) { + $article_content = $html->find('div.content', 0); + } + if (is_object($article_content)) { - $subtitle = trim($html->find('span.sub_title, div.brief-head', 0)); - if(is_object($subtitle) && $subtitle->plaintext !== $item['title']) { - $subtitle = '

' . $subtitle->plaintext . '

'; + // Subtitle + $subtitle = $html->find('small.subtitle', 0); + if(!is_object($subtitle)) { + $subtitle = $html->find('small', 0); + } + if(!is_object($subtitle)) { + $content_wrapper = $html->find('div.content-wrapper', 0); + if (is_object($content_wrapper)) { + $subtitle = $content_wrapper->find('h2.title', 0); + } + } + if(is_object($subtitle) && (!isset($item['title']) || $subtitle->plaintext != $item['title'])) { + $subtitle = '

' . trim($subtitle->plaintext) . '

'; } else { $subtitle = ''; } - $postimg = $html->find( - 'div.container_main_image_article, div.image-brief-container, div.image-brief-side-container', 0 - ); + // Image + $postimg = $html->find('div.article-image, div.image-container', 0); if(is_object($postimg)) { - $postimg = '

-

'; + $postimg = $postimg->find('img', 0); + if (!empty($postimg->src)) { + $postimg = $postimg->src; + } else { + $postimg = $postimg->srcset; //"url 355w, url 1003w, url 748w" + $postimg = explode(', ', $postimg); //split by ', ' to get each url separately + $postimg = end($postimg); //Get last item: "url 748w" which is of largest size + $postimg = explode(' ', $postimg); //split by ' ' to separate url from res + $postimg = array_reverse($postimg); //reverse array content to have url last + $postimg = end($postimg); //Get last item of array: "url" + } + $postimg = '

-

'; } else { $postimg = ''; } + // Paywall + $paywall = $html->find('div.paywall-restriction', 0); + if (is_object($paywall) && is_object($paywall->find('p.red-msg', 0))) { + $paywall = '

' . $paywall->find('span.head-mention', 0)->innertext . '

'; + } else { + $paywall = ''; + } + + // Content + $article_content = $article_content->outertext; + $article_content = str_replace('>Signaler une erreur', '>', $article_content); + + // Result $text = $subtitle . $postimg - . $html->find('div[itemprop=articleBody], div.brief-inner-content', 0)->outertext; + . $article_content + . $paywall; } else { - $text = $item['content'] - . '

Failed retrieve full article content

'; - } - - $premium_article = $html->find('h2.title_reserve_article', 0); - if (is_object($premium_article)) { - $text .= '

' . $premium_article->innertext . '

'; + $text = '

Failed to retrieve full article content

'; + if (isset($item['content'])) { + $text = $item['content'] . $text; + } } return $text; From 25cff9c07b747fdd0ca58ebd9935fc3e95202e04 Mon Sep 17 00:00:00 2001 From: ronansalmon Date: Fri, 21 Aug 2020 14:55:11 +0200 Subject: [PATCH 102/192] [TwitterBridge] Convert plain text URLs into HTML hyperlinks (#1627) --- bridges/TwitterBridge.php | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index c946bfd8..7b7bbe86 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -235,9 +235,37 @@ EOD // extract tweet timestamp $item['timestamp'] = $tweet->created_at; + // Convert plain text URLs into HTML hyperlinks + $cleanedTweet = $tweet->full_text; + $foundUrls = false; + + if (isset($tweet->entities->media)) { + foreach($tweet->entities->media as $media) { + $cleanedTweet = str_replace($media->url, + '' . $media->display_url . '', + $cleanedTweet); + $foundUrls = true; + } + } + if (isset($tweet->entities->urls)) { + foreach($tweet->entities->urls as $url) { + $cleanedTweet = str_replace($url->url, + '' . $url->display_url . '', + $cleanedTweet); + $foundUrls = true; + } + } + if ($foundUrls === false) { + // fallback to regex'es + $reg_ex = '/(http|https|ftp|ftps)\:\/\/[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(\/\S*)?/'; + if(preg_match($reg_ex, $tweet->full_text, $url)) { + $cleanedTweet = preg_replace($reg_ex, + "{$url[0]} ", + $cleanedTweet); + } + } // generate the title - $item['title'] = $tweet->full_text; - $cleanedTweet = $tweet->full_text; + $item['title'] = strip_tags($cleanedTweet); // Add avatar $picture_html = ''; From 94576c30530e0df950cf9eab8e281d55f5a4b418 Mon Sep 17 00:00:00 2001 From: ggiessen Date: Mon, 24 Aug 2020 07:30:59 +0200 Subject: [PATCH 103/192] [MarktplaatsBridge] 'https:' added to img src url (#1713) --- bridges/MarktplaatsBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/MarktplaatsBridge.php b/bridges/MarktplaatsBridge.php index ca88af2b..af6f28f5 100644 --- a/bridges/MarktplaatsBridge.php +++ b/bridges/MarktplaatsBridge.php @@ -93,10 +93,10 @@ class MarktplaatsBridge extends BridgeAbstract { if($this->getInput('i')) { if(is_array($listing->imageUrls)) { foreach($listing->imageUrls as $imgurl) { - $item['content'] .= "
\n"; + $item['content'] .= "
\n"; } } else { - $item['content'] .= "
\n"; + $item['content'] .= "
\n"; } } } From 3b36c413e59ed89445a636d3a4f9a206fab4807e Mon Sep 17 00:00:00 2001 From: Simon Alberny Date: Thu, 27 Aug 2020 07:28:59 +0200 Subject: [PATCH 104/192] [MondeDiplo] Switched to HTTPS + Title and content updated (#1714) --- bridges/MondeDiploBridge.php | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/bridges/MondeDiploBridge.php b/bridges/MondeDiploBridge.php index 85f771e3..c5ed6169 100644 --- a/bridges/MondeDiploBridge.php +++ b/bridges/MondeDiploBridge.php @@ -3,22 +3,26 @@ class MondeDiploBridge extends BridgeAbstract { const MAINTAINER = 'Pitchoule'; const NAME = 'Monde Diplomatique'; - const URI = 'http://www.monde-diplomatique.fr/'; + const URI = 'https://www.monde-diplomatique.fr'; const CACHE_TIMEOUT = 21600; //6h const DESCRIPTION = 'Returns most recent results from MondeDiplo.'; + private function cleanText($text) { + return trim(str_replace([' ', ' '], ' ', $text)); + } + public function collectData(){ $html = getSimpleHTMLDOM(self::URI) or returnServerError('Could not request MondeDiplo. for : ' . self::URI); foreach($html->find('div.unarticle') as $article) { $element = $article->parent(); + $title = $element->find('h3', 0)->plaintext; + $datesAuteurs = $element->find('div.dates_auteurs', 0)->plaintext; $item = array(); $item['uri'] = self::URI . $element->href; - $item['title'] = $element->find('h3', 0)->plaintext; - $item['content'] = $element->find('div.dates_auteurs', 0)->plaintext - . '
' - . strstr($element->find('div', 0)->plaintext, $element->find('div.dates_auteurs', 0)->plaintext, true); + $item['title'] = $this->cleanText($title) . ' - ' . $this->cleanText($datesAuteurs); + $item['content'] = $this->cleanText(str_replace([$title, $datesAuteurs], '', $element->plaintext)); $this->items[] = $item; } From c21a805cb4d0dc84fffa5cd5a4eab6af110a0107 Mon Sep 17 00:00:00 2001 From: somini Date: Thu, 27 Aug 2020 06:38:51 +0100 Subject: [PATCH 105/192] [DiarioDeNoticiasBridge]: New Bridge (#1717) --- bridges/DiarioDeNoticiasBridge.php | 83 ++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 bridges/DiarioDeNoticiasBridge.php diff --git a/bridges/DiarioDeNoticiasBridge.php b/bridges/DiarioDeNoticiasBridge.php new file mode 100644 index 00000000..0aaac6f4 --- /dev/null +++ b/bridges/DiarioDeNoticiasBridge.php @@ -0,0 +1,83 @@ + array( + 'n' => array( + 'name' => 'Tag Name', + 'exampleValue' => 'rogerio-casanova', + ) + ) + ); + + const MONPT = array( + 'jan', + 'fev', + 'mar', + 'abr', + 'mai', + 'jun', + 'jul', + 'ago', + 'set', + 'out', + 'nov', + 'dez', + ); + + public function getIcon() { + return 'https://static.globalnoticias.pt/dn/common/images/favicons/favicon-128.png'; + } + + public function getName() { + switch($this->queriedContext) { + case 'Tag': + $name = self::NAME . ' | Tag | ' . $this->getInput('n'); + break; + default: + $name = self::NAME; + } + return $name; + } + public function getURI() { + switch($this->queriedContext) { + case 'Tag': + $url = self::URI . '/tag/' . $this->getInput('n') . '.html'; + break; + default: + $url = self::URI; + } + return $url; + } + + public function collectData() { + $archives = self::getURI(); + $html = getSimpleHTMLDOMCached($archives) + or returnServerError('Could not load content'); + + foreach($html->find('article') as $element) { + $item = array(); + + $title = $element->find('.t-am-title', 0); + $link = $element->find('a.t-am-text', 0); + + $item['title'] = $title->plaintext; + $item['uri'] = self::URI . $link->href; + + $snippet = $element->find('.t-am-lead', 0); + if ($snippet) { + $item['content'] = $snippet->plaintext; + } + preg_match('|edicao-do-dia\\/(?P\d\d)-(?P\w\w\w)-(?P\d\d\d\d)|', $link->href, $d); + if ($d) { + $item['timestamp'] = sprintf('%s-%s-%s', $d['year'], array_search($d['monpt'], self::MONPT) + 1, $d['day']); + } + + $this->items[] = $item; + } + + } +} From e00bbe353ff1dd5fc1c6c75ba2bdae78f3d1578a Mon Sep 17 00:00:00 2001 From: ORelio Date: Mon, 31 Aug 2020 19:02:25 +0200 Subject: [PATCH 106/192] * [ReleasesSwitch] Switch scene releases (#1694) Separate bridge from Releases3DS that just has a different URL. Inherits from Releases3DS so both bridges need to be present. * [Releases3DS] Fix PHP notices related to IGN --- bridges/Releases3DSBridge.php | 41 ++++++++++++++++++++------------ bridges/ReleasesSwitchBridge.php | 17 +++++++++++++ 2 files changed, 43 insertions(+), 15 deletions(-) create mode 100644 bridges/ReleasesSwitchBridge.php diff --git a/bridges/Releases3DSBridge.php b/bridges/Releases3DSBridge.php index fe2df8ec..686e7c5d 100644 --- a/bridges/Releases3DSBridge.php +++ b/bridges/Releases3DSBridge.php @@ -5,13 +5,16 @@ class Releases3DSBridge extends BridgeAbstract { const NAME = '3DS Scene Releases'; const URI = 'http://www.3dsdb.com/'; const CACHE_TIMEOUT = 10800; // 3h - const DESCRIPTION = 'Returns the newest scene releases.'; + const DESCRIPTION = 'Returns the newest scene releases for Nintendo 3DS.'; public function collectData(){ + $this->collectDataUrl(self::URI . 'xml.php'); + } + + protected function collectDataUrl($dataUrl){ - $dataUrl = self::URI . 'xml.php'; $xml = getContents($dataUrl) - or returnServerError('Could not request 3dsdb: ' . $dataUrl); + or returnServerError('Could not request URL: ' . $dataUrl); $limit = 0; foreach(array_reverse(explode('', $xml)) as $element) { @@ -52,17 +55,25 @@ class Releases3DSBridge extends BridgeAbstract { $ignSearchUrl = 'https://www.ign.com/search?q=' . urlencode($name); if($ignResult = getSimpleHTMLDOMCached($ignSearchUrl)) { - $ignCoverArt = $ignResult->find('div.search-item-media', 0)->find('img', 0)->src; - $ignDesc = $ignResult->find('div.search-item-description', 0)->plaintext; - $ignLink = $ignResult->find('div.search-item-sub-title', 0)->find('a', 1)->href; - $ignDate = strtotime(trim($ignResult->find('span.publish-date', 0)->plaintext)); - $ignDescription = '
' - . $ignDesc - . ' More at IGN
'; + $ignCoverArt = $ignResult->find('div.search-item-media', 0); + $ignDesc = $ignResult->find('div.search-item-description', 0); + $ignLink = $ignResult->find('div.search-item-sub-title', 0); + $ignDate = $ignResult->find('span.publish-date', 0); + if (is_object($ignCoverArt)) + $ignCoverArt = $ignCoverArt->find('img', 0); + if (is_object($ignLink)) + $ignLink = $ignLink->find('a', 1); + if (is_object($ignDate)) + $ignDate = strtotime(trim($ignDate->plaintext)); + if (is_object($ignCoverArt) && is_object($ignDesc) && is_object($ignLink)) { + $ignDescription = '
' + . $ignDesc->plaintext + . ' More at IGN
'; + } } //Main section : Release description from 3DS database @@ -111,7 +122,7 @@ class Releases3DSBridge extends BridgeAbstract { private function typeToString($type){ switch($type) { - case 1: return '3DS Game'; + case 1: return 'Card Game'; case 4: return 'eShop'; default: return '??? (' . $type . ')'; } diff --git a/bridges/ReleasesSwitchBridge.php b/bridges/ReleasesSwitchBridge.php new file mode 100644 index 00000000..3814f8ec --- /dev/null +++ b/bridges/ReleasesSwitchBridge.php @@ -0,0 +1,17 @@ +collectDataUrl(self::URI . 'xml.php'); + } +} From 46abc18e877d87ecffeedd35eb81fdda44205694 Mon Sep 17 00:00:00 2001 From: ORelio Date: Mon, 31 Aug 2020 19:04:56 +0200 Subject: [PATCH 107/192] [Anidex] Fix content retrieval (#1693) Anidex uses two separate domains, anidex.info and anidex.moe anidex.info has ddos-guard so we need to request anidex.moe with Host header set to anidex.info --- bridges/AnidexBridge.php | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/bridges/AnidexBridge.php b/bridges/AnidexBridge.php index ae387c90..ff9f5f96 100644 --- a/bridges/AnidexBridge.php +++ b/bridges/AnidexBridge.php @@ -3,7 +3,9 @@ class AnidexBridge extends BridgeAbstract { const MAINTAINER = 'ORelio'; const NAME = 'Anidex'; - const URI = 'https://anidex.info/'; + const URI = 'http://anidex.info/'; // anidex.info has ddos-guard so we need to use anidex.moe + const ALTERNATE_URI = 'https://anidex.moe/'; // anidex.moe returns 301 unless Host is set to anidex.info + const ALTERNATE_HOST = 'anidex.info'; // Correct host for requesting anidex.moe without 301 redirect const DESCRIPTION = 'Returns the newest torrents, with optional search criteria.'; const PARAMETERS = array( array( @@ -108,7 +110,7 @@ class AnidexBridge extends BridgeAbstract { public function collectData() { // Build Search URL from user-provided parameters - $search_url = self::URI . '?s=upload_timestamp&o=desc'; + $search_url = self::ALTERNATE_URI . '?s=upload_timestamp&o=desc'; foreach (array('id', 'lang_id', 'group_id') as $param_name) { $param = $this->getInput($param_name); if (!empty($param) && intval($param) != 0 && ctype_digit(str_replace(',', '', $param))) { @@ -131,8 +133,16 @@ class AnidexBridge extends BridgeAbstract { $opt[CURLOPT_COOKIE] = 'anidex_h_toggle=' . $h; } + // We need to use a different Host HTTP header to reach the correct page on ALTERNATE_URI + $headers = array('Host: ' . self::ALTERNATE_HOST); + + // The HTTPS certificate presented by anidex.moe is for anidex.info. We need to ignore this. + // As a consequence, the bridge is intentionally marked as insecure by setting self::URI to http:// + $opt[CURLOPT_SSL_VERIFYHOST] = 0; + $opt[CURLOPT_SSL_VERIFYPEER] = 0; + // Retrieve torrent listing from search results, which does not contain torrent description - $html = getSimpleHTMLDOM($search_url, array(), $opt) + $html = getSimpleHTMLDOM($search_url, $headers, $opt) or returnServerError('Could not request Anidex: ' . $search_url); $links = $html->find('a'); $results = array(); @@ -156,10 +166,11 @@ class AnidexBridge extends BridgeAbstract { if ($torrent_id != 0 && ctype_digit($torrent_id)) { //Retrieve data for this torrent ID - $item_uri = self::URI . 'torrent/' . $torrent_id; + $item_browse_uri = self::URI . 'torrent/' . $torrent_id; + $item_fetch_uri = self::ALTERNATE_URI . 'torrent/' . $torrent_id; - //Retrieve full description from torrent page - if ($item_html = getSimpleHTMLDOMCached($item_uri)) { + //Retrieve full description from torrent page (cached for 24 hours: 86400 seconds) + if ($item_html = getSimpleHTMLDOMCached($item_fetch_uri, 86400, $headers, $opt)) { //Retrieve data from page contents $item_title = str_replace(' (Torrent) - AniDex ', '', $item_html->find('title', 0)->plaintext); @@ -191,7 +202,7 @@ class AnidexBridge extends BridgeAbstract { //Build and add final item $item = array(); - $item['uri'] = $item_uri; + $item['uri'] = $item_browse_uri; $item['title'] = $item_title; $item['author'] = $item_author; $item['timestamp'] = $item_date; From 68dd2d745f1bd992d43d9e1cf466af644ab82411 Mon Sep 17 00:00:00 2001 From: Bob Date: Wed, 2 Sep 2020 07:02:34 +0100 Subject: [PATCH 108/192] [InstagramBridge] Change TAG_QUERY_HASH (#1727) --- bridges/InstagramBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/InstagramBridge.php b/bridges/InstagramBridge.php index 58238e9d..43df4e45 100644 --- a/bridges/InstagramBridge.php +++ b/bridges/InstagramBridge.php @@ -47,7 +47,7 @@ class InstagramBridge extends BridgeAbstract { ); const USER_QUERY_HASH = '58b6785bea111c67129decbe6a448951'; - const TAG_QUERY_HASH = '174a5243287c5f3a7de741089750ab3b'; + const TAG_QUERY_HASH = '9b498c08113f1e09617a1703c22b2f32'; const SHORTCODE_QUERY_HASH = '865589822932d1b43dfe312121dd353a'; protected function getInstagramUserId($username) { From bb51a0d212fb978ae4a79e87d436f1b57e7c7c3e Mon Sep 17 00:00:00 2001 From: ggiessen Date: Thu, 3 Sep 2020 07:44:32 +0200 Subject: [PATCH 109/192] [MarktplaatsBridge] Improvements (#1722) - sometimes $listing->imageUrls is empty so moved after the if statement on line 91 - added price and location info - added function getName --- bridges/MarktplaatsBridge.php | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/bridges/MarktplaatsBridge.php b/bridges/MarktplaatsBridge.php index af6f28f5..d8fce6c4 100644 --- a/bridges/MarktplaatsBridge.php +++ b/bridges/MarktplaatsBridge.php @@ -86,18 +86,16 @@ class MarktplaatsBridge extends BridgeAbstract { $item['timestamp'] = $listing->date; $item['author'] = $listing->sellerInformation->sellerName; $item['content'] = $listing->description; - $item['enclosures'] = $listing->imageUrls; $item['categories'] = $listing->verticals; $item['uid'] = $listing->itemId; if(!is_null($this->getInput('i')) && !empty($listing->imageUrls)) { - if($this->getInput('i')) { - if(is_array($listing->imageUrls)) { - foreach($listing->imageUrls as $imgurl) { - $item['content'] .= "
\n"; - } - } else { - $item['content'] .= "
\n"; + $item['enclosures'] = $listing->imageUrls; + if(is_array($listing->imageUrls)) { + foreach($listing->imageUrls as $imgurl) { + $item['content'] .= "
\n"; } + } else { + $item['content'] .= "
\n"; } } if(!is_null($this->getInput('r'))) { @@ -105,8 +103,25 @@ class MarktplaatsBridge extends BridgeAbstract { $item['content'] .= "
\n
\n
\n" . json_encode($listing); } } + $item['content'] .= "
\n
\nPrice: " . $listing->priceInfo->priceCents/100; + $item['content'] .= "  (" . $listing->priceInfo->priceType .")"; + if(!empty($listing->location->cityName)) { + $item['content'] .= "

\n" . $listing->location->cityName; + } + if(!is_null($this->getInput('r'))) { + if($this->getInput('r')) { + $item['content'] .= "
\n
\n
\n" . json_encode($listing); + } + } $this->items[] = $item; } } } + + public function getName(){ + if(!is_null($this->getInput('q'))) { + return $this->getInput('q') . ' - Marktplaats'; + } + return parent::getName(); + } } From fec52418d5adb9eff2b10280110d6d9702dca055 Mon Sep 17 00:00:00 2001 From: Christian Jonak Date: Thu, 3 Sep 2020 07:46:35 +0200 Subject: [PATCH 110/192] [FM4Bridge] Add new bridge for FM4 news page (#1719) --- bridges/FM4Bridge.php | 66 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 bridges/FM4Bridge.php diff --git a/bridges/FM4Bridge.php b/bridges/FM4Bridge.php new file mode 100644 index 00000000..6f59e80f --- /dev/null +++ b/bridges/FM4Bridge.php @@ -0,0 +1,66 @@ + array( + 'name' => 'Tag (author, category, ...)', + 'title' => 'Tag to retrieve', + 'exampleValue' => 'musik' + ), + 'loadcontent' => array( + 'name' => 'Load Full Article Content', + 'title' => 'Retrieve full content of articles (may take longer)', + 'type' => 'checkbox' + ), + 'pages' => array( + 'name' => 'Pages', + 'title' => 'Amount of pages to load', + 'type' => 'number', + 'defaultValue' => 1 + ) + ) + ); + + public function getPageData($tag, $page) { + if($tag) + $uri = self::URI . "/tags/" . $tag; + else + $uri = self::URI; + + $uri = $uri . '?page=' . $page; + + $html = getSimpleHTMLDOM($uri) + or returnServerError('Error while downloading the website content'); + + $page_items = array(); + + foreach ($html->find('div[class*=listItem]') as $article) { + $item = array(); + + $item['uri'] = $article->find('a', 0)->href; + $item['title'] = $article->find('h2', 0)->plaintext; + $item['author'] = $article->find('p[class*=keyword]', 0)->plaintext; + $item["timestamp"] = strtotime($article->find('p[class*=time]', 0)->plaintext); + + if ($this->getInput('loadcontent')) { + $item['content'] = getSimpleHTMLDOM($item['uri'])->find('div[class=storyText]', 0)->innertext + or returnServerError('Error while downloading the full article'); + } + + $page_items[] = $item; + } + return $page_items; + } + public function collectData() { + for ($cur_page = 1; $cur_page <= $this->getInput('pages'); $cur_page++) { + $this->items = array_merge($this->items,$this->getPageData($this->getInput('tag'), $cur_page)); + } + } +} From d33e090fe18ea1ca8ad52aa4d8dcce5fd7bb3b96 Mon Sep 17 00:00:00 2001 From: Joseph Date: Thu, 3 Sep 2020 05:49:19 +0000 Subject: [PATCH 111/192] [MastodonBridge] Update feed URL format (#1718) Changes feed URL from `https://instance/users/username.atom` to `https://instance/@username.rss`. --- bridges/MastodonBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/MastodonBridge.php b/bridges/MastodonBridge.php index 9e131b7d..de5e41f4 100644 --- a/bridges/MastodonBridge.php +++ b/bridges/MastodonBridge.php @@ -78,7 +78,7 @@ class MastodonBridge extends FeedExpander { public function getURI(){ if($this->getInput('canusername')) - return 'https://' . $this->getInstance() . '/users/' . $this->getUsername() . '.atom'; + return 'https://' . $this->getInstance() . '/@' . $this->getUsername() . '.rss'; return parent::getURI(); } From 747bb6ad9ca1b0d2d03b6d93a5ea99af5830e1c7 Mon Sep 17 00:00:00 2001 From: Joseph Date: Thu, 3 Sep 2020 06:18:15 +0000 Subject: [PATCH 112/192] [WosckerBridge] Add bridge (#1643) --- bridges/WosckerBridge.php | 51 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 bridges/WosckerBridge.php diff --git a/bridges/WosckerBridge.php b/bridges/WosckerBridge.php new file mode 100644 index 00000000..7f348531 --- /dev/null +++ b/bridges/WosckerBridge.php @@ -0,0 +1,51 @@ +getURI()) + or returnServerError('Could not request: ' . $this->getURI()); + + $date = $html->find('h1', 0)->plaintext; + $timestamp = $html->find('span.dateFont', 0)->plaintext . ' ' . $html->find('span.dateFont', 1)->plaintext; + + $item = array(); + $item['title'] = $date; + $item['content'] = $this->formatContent($html); + $item['timestamp'] = $timestamp; + + $this->items[] = $item; + } + + private function formatContent($html) { + $html->find('h1', 0)->outertext = ''; + + foreach ($html->find('hr') as $hr) { + $hr->outertext = ''; + } + + foreach ($html->find('div.betweenHeadline') as $div) { + $div->outertext = ''; + } + + foreach ($html->find('div.dividingBarrier') as $div) { + $div->outertext = ''; + } + + foreach ($html->find('h2') as $h2) { + $h2->outertext = '
' . $h2->innertext . '
'; + } + + foreach ($html->find('h3') as $h3) { + $h3->outertext = $h3->innertext . '
'; + } + + return $html->find('div.fullContentPiece', 0)->innertext; + } +} From ab8e89a97f98915e1b326aca882bae756ab6b340 Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Tue, 8 Sep 2020 07:55:21 +0200 Subject: [PATCH 113/192] [AllocineFRBridge] Update CSS class (#1585) Website has change one CSS class : updated the bridge to allow parsing. --- bridges/AllocineFRBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/AllocineFRBridge.php b/bridges/AllocineFRBridge.php index 40ef9a98..00fd0e80 100644 --- a/bridges/AllocineFRBridge.php +++ b/bridges/AllocineFRBridge.php @@ -85,7 +85,7 @@ class AllocineFRBridge extends BridgeAbstract { self::PARAMETERS[$this->queriedContext]['category']['values'] ); - foreach($html->find('div[class=col-left]', 0)->find('div[class*=video-card]') as $element) { + foreach($html->find('div[class=gd-col-left]', 0)->find('div[class*=video-card]') as $element) { $item = array(); $title = $element->find('a[class*=meta-title-link]', 0); From d6f277d02928e26d6f9585044cbfe90c17fc2009 Mon Sep 17 00:00:00 2001 From: AxorPL <1163219+AxorPL@users.noreply.github.com> Date: Wed, 9 Sep 2020 14:11:19 +0200 Subject: [PATCH 114/192] [WorldCosplayBridge] Add new bridge (#1732) --- bridges/WorldCosplayBridge.php | 143 +++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 bridges/WorldCosplayBridge.php diff --git a/bridges/WorldCosplayBridge.php b/bridges/WorldCosplayBridge.php new file mode 100644 index 00000000..4fc31e67 --- /dev/null +++ b/bridges/WorldCosplayBridge.php @@ -0,0 +1,143 @@ +%s'; + + const ERR_CONTEXT = 'No context provided'; + const ERR_QUERY = 'Unable to query: %s'; + + const LIMIT_MIN = 1; + const LIMIT_MAX = 24; + + const PARAMETERS = array( + 'Character' => array( + 'cid' => array( + 'name' => 'Character ID', + 'type' => 'number', + 'required' => true, + 'title' => 'WorldCosplay character ID', + 'exampleValue' => 18204 + ) + ), + 'Cosplayer' => array( + 'uid' => array( + 'name' => 'Cosplayer ID', + 'type' => 'number', + 'required' => true, + 'title' => 'Cosplayer\'s WorldCosplay profile ID', + 'exampleValue' => 406782 + ) + ), + 'Series' => array( + 'sid' => array( + 'name' => 'Series ID', + 'type' => 'number', + 'required' => true, + 'title' => 'WorldCosplay series ID', + 'exampleValue' => 3139 + ) + ), + 'Tag' => array( + 'tid' => array( + 'name' => 'Tag ID', + 'type' => 'number', + 'required' => true, + 'title' => 'WorldCosplay tag ID', + 'exampleValue' => 33643 + ) + ), + 'global' => array( + 'limit' => array( + 'name' => 'Limit', + 'type' => 'number', + 'required' => false, + 'title' => 'Maximum number of photos to return', + 'exampleValue' => 5, + 'defaultValue' => 5 + ) + ) + ); + + public function collectData() { + $limit = $this->getInput('limit'); + $limit = min(self::LIMIT_MAX, max(self::LIMIT_MIN, $limit)); + switch($this->queriedContext) { + case 'Character': + $id = $this->getInput('cid'); + $url = self::API_CHARACTER; + break; + case 'Cosplayer': + $id = $this->getInput('uid'); + $url = self::API_COSPLAYER; + break; + case 'Series': + $id = $this->getInput('sid'); + $url = self::API_SERIES; + break; + case 'Tag': + $id = $this->getInput('tid'); + $url = self::API_TAG; + break; + default: + returnClientError(self::ERR_CONTEXT); + } + $url = self::URI . sprintf($url, $id, $limit); + + $json = json_decode(getContents($url)) + or returnServerError(sprintf(self::ERR_QUERY, $url)); + if($json->has_error) + { + returnServerError($json->message); + } + $list = $json->list; + + foreach($list as $img) + { + $item = array(); + $item['uri'] = self::URI . substr($img->photo->url, 1); + $item['title'] = $img->photo->subject; + $item['timestamp'] = $img->photo->created_at; + $item['author'] = $img->member->global_name; + $item['enclosures'] = array($img->photo->large_url); + $item['uid'] = $img->photo->id; + $item['content'] = sprintf( + self::CONTENT_HTML, + $item['uri'], + $item['enclosures'][0], + $item['title'], + $item['title'] + ); + $this->items[] = $item; + } + } + + public function getName() { + switch($this->queriedContext) { + case 'Character': + $id = $this->getInput('cid'); + break; + case 'Cosplayer': + $id = $this->getInput('uid'); + break; + case 'Series': + $id = $this->getInput('sid'); + break; + case 'Tag': + $id = $this->getInput('tid'); + break; + default: + return parent::getName(); + } + return sprintf('%s %u - ', $this->queriedContext, $id) . self::NAME; + } +} From 6c244f4d9b0ca059748012b07f55e7422e5a67ed Mon Sep 17 00:00:00 2001 From: Eugene Molotov Date: Fri, 11 Sep 2020 11:44:28 +0500 Subject: [PATCH 115/192] [TwitterBridge] Skip advertisment tweets (#1673) --- bridges/TwitterBridge.php | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index 7b7bbe86..114b2feb 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -210,6 +210,17 @@ EOD $hidePictures = $this->getInput('nopic'); + $promotedTweetIds = array_reduce($data->timeline->instructions[0]->addEntries->entries, function($carry, $entry) { + if (!isset($entry->content->item)) { + return $carry; + } + $tweet = $entry->content->item->content->tweet; + if (isset($tweet->promotedMetadata)) { + $carry[] = $tweet->id; + } + return $carry; + }, array()); + foreach($data->globalObjects->tweets as $tweet) { /* Debug::log('>>> ' . json_encode($tweet)); */ @@ -218,6 +229,11 @@ EOD continue; } + // Skip promoted tweets + if (in_array($tweet->id_str, $promotedTweetIds)) { + continue; + } + $item = array(); // extract username and sanitize $user_info = $this->getUserInformation($tweet->user_id_str, $data->globalObjects); From cf606a3a6b8b1a27c0154c93d3ca6468354384f7 Mon Sep 17 00:00:00 2001 From: Michael Bemmerl Date: Fri, 11 Sep 2020 06:48:03 +0000 Subject: [PATCH 116/192] [OtrkeyFinderBridge] Add bridge for otrkeyfinder.com (#1712) --- bridges/OtrkeyFinderBridge.php | 173 +++++++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) create mode 100644 bridges/OtrkeyFinderBridge.php diff --git a/bridges/OtrkeyFinderBridge.php b/bridges/OtrkeyFinderBridge.php new file mode 100644 index 00000000..887ccd1b --- /dev/null +++ b/bridges/OtrkeyFinderBridge.php @@ -0,0 +1,173 @@ + array( + 'name' => 'Search term', + 'exampleValue' => 'Terminator', + 'defaultValue' => '', + 'title' => 'The search term is case-insensitive', + ), + 'station' => array( + 'name' => 'Station name', + 'exampleValue' => 'ARD', + 'defaultValue' => '', + ), + 'type' => array( + 'name' => 'Media type', + 'type' => 'list', + 'values' => array( + 'any' => '', + 'Detail' => array( + 'HD' => 'HD.avi', + 'AC3' => 'HD.ac3', + 'HD & AC3' => 'HD.', + 'HQ' => 'HQ.avi', + 'AVI' => 'g.avi', // 'g.' to exclude HD.avi and HQ.avi (filename always contains 'mpg.') + 'MP4' => '.mp4', + ), + ), + ), + 'minTime' => array( + 'name' => 'Min. running time', + 'type' => 'number', + 'title' => 'The minimum running time in minutes. The resolution is 5 minutes.', + 'exampleValue' => '90', + 'defaultValue' => '0', + ), + 'maxTime' => array( + 'name' => 'Max. running time', + 'type' => 'number', + 'title' => 'The maximum running time in minutes. The resolution is 5 minutes.', + 'exampleValue' => '120', + 'defaultValue' => '0', + ), + 'pages' => array( + 'name' => 'Number of pages', + 'type' => 'number', + 'title' => 'Specifies the number of pages to fetch. Increase this value if you get an empty feed.', + 'exampleValue' => '5', + 'defaultValue' => '5', + ), + ) + ); + // Example: Terminator_20.04.13_02-25_sf2_100_TVOON_DE.mpg.avi.otrkey + // The first group is the running time in minutes + const FILENAME_REGEX = '/_(\d+)_TVOON_DE\.mpg\..+\.otrkey/'; + // year.month.day_hour-minute with leading zeros + const TIME_REGEX = '/\d{2}\.\d{2}\.\d{2}_\d{2}-\d{2}/'; + const CONTENT_TEMPLATE = '
    %s
'; + const MIRROR_TEMPLATE = '
  • %s
  • '; + + public function collectData() { + $pages = $this->getInput('pages'); + + for($page = 1; $page <= $pages; $page++) { + $uri = $this->buildUri($page); + + $html = getSimpleHTMLDOMCached($uri, self::CACHE_TIMEOUT) + or returnServerError('Could not request ' . $uri); + + $keys = $html->find('div.otrkey'); + + foreach($keys as $key) { + $temp = $this->buildItem($key); + + if ($temp != null) + $this->items[] = $temp; + } + + // Sleep for 0.5 seconds to don't hammer the server. + usleep(500000); + } + } + + private function buildUri($page) { + $searchterm = $this->getInput('searchterm'); + $station = $this->getInput('station'); + $type = $this->getInput('type'); + + // Combine all three parts to a search query by separating them with white space + $search = implode(' ', array($searchterm, $station, $type)); + $search = trim($search); + $search = urlencode($search); + + return sprintf(self::URI_TEMPLATE, $search, $page); + } + + private function buildItem(simple_html_dom_node $node) { + $file = $this->getFilename($node); + + if ($file == null) + return null; + + $minTime = $this->getInput('minTime'); + $maxTime = $this->getInput('maxTime'); + + // Do we need to check the running time? + if ($minTime != 0 || $maxTime != 0) { + if ($maxTime > 0 && $maxTime < $minTime) + returnClientError('The minimum running time must be less than the maximum running time.'); + + preg_match(self::FILENAME_REGEX, $file, $matches); + + if (!isset($matches[1])) + return null; + + $time = (integer)$matches[1]; + + // Check for minimum running time + if ($minTime > 0 && $minTime > $time) + return null; + + // Check for maximum running time + if ($maxTime > 0 && $maxTime < $time) + return null; + } + + $item = array(); + $item['title'] = $file; + + // The URI_TEMPLATE for querying the site can be reused here + $item['uri'] = sprintf(self::URI_TEMPLATE, $file, 1); + + $content = $this->buildContent($node); + + if ($content != null) + $item['content'] = $content; + + if (preg_match(self::TIME_REGEX, $file, $matches) === 1) { + $item['timestamp'] = DateTime::createFromFormat('y.m.d_H-i', $matches[0], new DateTimeZone('Europe/Berlin'))->getTimestamp(); + } + + return $item; + } + + private function getFilename(simple_html_dom_node $node) { + $file = $node->find('.file', 0); + + if ($file == null) + return null; + else + return trim($file->innertext); + } + + private function buildContent(simple_html_dom_node $node) { + $mirrors = $node->find('div.mirror'); + $list = ''; + + // Build list of available mirrors + foreach($mirrors as $mirror) { + $anchor = $mirror->find('a', 0); + $list .= sprintf(self::MIRROR_TEMPLATE, $anchor->href, $anchor->innertext); + } + + return sprintf(self::CONTENT_TEMPLATE, $list); + } +} From d299adb827ff2c0d7e62be0dbc2077650be19082 Mon Sep 17 00:00:00 2001 From: Alexander Date: Mon, 14 Sep 2020 11:14:35 +0300 Subject: [PATCH 117/192] [EpicgamesBridge] Add pinned posts to list (#1736) --- bridges/EpicgamesBridge.php | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/bridges/EpicgamesBridge.php b/bridges/EpicgamesBridge.php index 2264e70b..756efb3b 100644 --- a/bridges/EpicgamesBridge.php +++ b/bridges/EpicgamesBridge.php @@ -48,13 +48,22 @@ class EpicgamesBridge extends BridgeAbstract { )); public function collectData() { - // Example: https://store-content.ak.epicgames.com/api/ru/content/blog?limit=25 $api = 'https://store-content.ak.epicgames.com/api/'; - $url = $api . $this->getInput('language') . '/content/blog?limit=' . $this->getInput('postcount'); - $data = getContents($url) - or returnServerError('Unable to get the news pages from epicgames.com!'); - $decodedData = json_decode($data); + // Get sticky posts first + // Example: https://store-content.ak.epicgames.com/api/ru/content/blog/sticky?locale=ru + $urlSticky = $api . $this->getInput('language') . '/content/blog/sticky'; + // Then get posts + // Example: https://store-content.ak.epicgames.com/api/ru/content/blog?limit=25 + $urlBlog = $api . $this->getInput('language') . '/content/blog?limit=' . $this->getInput('postcount'); + + $dataSticky = getContents($urlSticky) + or returnServerError('Unable to get the sticky posts from epicgames.com!'); + $dataBlog = getContents($urlBlog) + or returnServerError('Unable to get the news posts from epicgames.com!'); + + // Merge data + $decodedData = array_merge(json_decode($dataSticky), json_decode($dataBlog)); foreach($decodedData as $key => $value) { $item = array(); @@ -76,5 +85,16 @@ class EpicgamesBridge extends BridgeAbstract { $this->items[] = $item; } + + // Sort data + usort($this->items, function ($item1, $item2) { + if ($item2['timestamp'] == $item1['timestamp']) { + return 0; + } + return ($item2['timestamp'] < $item1['timestamp']) ? -1 : 1; + }); + + // Limit data + $this->items = array_slice($this->items, 0, $this->getInput('postcount')); } } From 3a29347e600aa643c6f64c8aa6c9351c04034859 Mon Sep 17 00:00:00 2001 From: Jason Ghent Date: Mon, 14 Sep 2020 05:01:55 -0400 Subject: [PATCH 118/192] [ParameterValidator] Ignore cache-busting param (#1723) --- lib/ParameterValidator.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/ParameterValidator.php b/lib/ParameterValidator.php index 149e8a40..12e07942 100644 --- a/lib/ParameterValidator.php +++ b/lib/ParameterValidator.php @@ -135,6 +135,9 @@ class ParameterValidator { return false; foreach($data as $name => $value) { + // Some RSS readers add a cache-busting parameter (_=) to feed URLs, detect and ignore them. + if ($name === '_') continue; + $registered = false; foreach($parameters as $context => $set) { if(array_key_exists($name, $set)) { From 2bb99c444817af322594344d667c06e15d528191 Mon Sep 17 00:00:00 2001 From: Christian Schabesberger Date: Fri, 18 Sep 2020 07:13:31 +0200 Subject: [PATCH 119/192] [NordbayernBridge] Fix images and newsblock order (#1741) --- bridges/NordbayernBridge.php | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/bridges/NordbayernBridge.php b/bridges/NordbayernBridge.php index a7ae72e3..ecc93c9d 100644 --- a/bridges/NordbayernBridge.php +++ b/bridges/NordbayernBridge.php @@ -48,12 +48,11 @@ class NordbayernBridge extends BridgeAbstract { )); private function getImageUrlFromScript($script) { - preg_match("#src=\\\\'(https:[-:\\.\\\\/a-zA-Z0-9_]*\\.jpg)#", $script->innertext, $matches, PREG_OFFSET_CAPTURE); + preg_match("#src=\\\\'(https:[-:\\.\\\\/a-zA-Z0-9%_]*\\.(jpg|JPG))#", $script->innertext, $matches, PREG_OFFSET_CAPTURE); if(isset($matches[1][0])) { return stripcslashes($matches[1][0]) . '?w=800'; - } else { - return null; } + return null; } private function handleArticle($link) { @@ -105,16 +104,16 @@ class NordbayernBridge extends BridgeAbstract { private function handleNewsblock($listSite, $readPoliceReports) { $newsBlocks = $listSite->find('section[class*=newsblock]'); - $policeBlock = $newsBlocks[0]; - $regionalNewsBlock = $newsBlocks[1]; + $regionalNewsBlock = $newsBlocks[0]; + $policeBlock = $newsBlocks[1]; + foreach($regionalNewsBlock->find('h2') as $headline) { + self::handleArticle(self::URI . $headline->find('a', 0)->href); + } if($readPoliceReports === true) { foreach($policeBlock->find('h2') as $headline) { self::handleArticle(self::URI . $headline->find('a', 0)->href); } } - foreach($regionalNewsBlock->find('h2') as $headline) { - self::handleArticle(self::URI . $headline->find('a', 0)->href); - } } public function collectData() { From 80cc88ba784b95c45add05e0489a125ca87f7b3a Mon Sep 17 00:00:00 2001 From: Joseph Date: Fri, 25 Sep 2020 07:43:12 +0100 Subject: [PATCH 120/192] [SoundcloudBridge] Fix bridge not returning tracks (#1757) + Use artwork for enclosure --- bridges/SoundcloudBridge.php | 38 ++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/bridges/SoundcloudBridge.php b/bridges/SoundcloudBridge.php index 6f7783ce..45e6fed1 100644 --- a/bridges/SoundcloudBridge.php +++ b/bridges/SoundcloudBridge.php @@ -23,6 +23,7 @@ class SoundCloudBridge extends BridgeAbstract { ) )); + private $feedTitle = null; private $feedIcon = null; private $clientIDCache = null; @@ -31,33 +32,36 @@ class SoundCloudBridge extends BridgeAbstract { 'url' => 'https://soundcloud.com/' . $this->getInput('u') )) or returnServerError('No results for this query'); + $this->feedTitle = $res->username; $this->feedIcon = $res->avatar_url; - $tracks = $this->apiGet('users/' . urlencode($res->id) . '/' . $this->getInput('t'))->collection - or returnServerError('No results for this user/playlist'); + $tracks = $this->apiGet( + 'users/' . urlencode($res->id) . '/' . $this->getInput('t'), + array('limit' => 31) + ) or returnServerError('No results for this user/playlist'); - $numTracks = min(count($tracks), 10); - for($i = 0; $i < $numTracks; $i++) { + foreach ($tracks->collection as $index => $track) { $item = array(); - $item['author'] = $tracks[$i]->user->username; - $item['title'] = $tracks[$i]->user->username . ' - ' . $tracks[$i]->title; - $item['timestamp'] = strtotime($tracks[$i]->created_at); - $item['content'] = nl2br($tracks[$i]->description); - $item['enclosures'] = array($tracks[$i]->uri - . '/stream?client_id=' - . $this->getClientID()); + $item['author'] = $track->user->username; + $item['title'] = $track->user->username . ' - ' . $track->title; + $item['timestamp'] = strtotime($track->created_at); + $item['content'] = nl2br($track->description); + $item['enclosures'][] = $track->artwork_url; $item['id'] = self::URI . urlencode($this->getInput('u')) . '/' - . urlencode($tracks[$i]->permalink); + . urlencode($track->permalink); $item['uri'] = self::URI . urlencode($this->getInput('u')) . '/' - . urlencode($tracks[$i]->permalink); + . urlencode($track->permalink); $this->items[] = $item; - } + if (count($this->items) >= 10) { + break; + } + } } public function getIcon(){ @@ -73,8 +77,8 @@ class SoundCloudBridge extends BridgeAbstract { } public function getName(){ - if(!is_null($this->getInput('u'))) { - return $this->getInput('u') . ' - ' . self::NAME; + if($this->feedTitle) { + return $this->feedTitle . ' - ' . self::NAME; } return parent::getName(); @@ -132,7 +136,7 @@ class SoundCloudBridge extends BridgeAbstract { . http_build_query($parameters); } - private function apiGet($endpoint, $parameters = array()){ + private function apiGet($endpoint, $parameters = array()) { $parameters['client_id'] = $this->getClientID(); try { From 01985b7af738e57d480edfc6d15b2536d0de0434 Mon Sep 17 00:00:00 2001 From: sarnd <34441397+sarnd@users.noreply.github.com> Date: Mon, 28 Sep 2020 07:01:37 +0200 Subject: [PATCH 121/192] [TwitterBridge] URL to js file with apikey changed again (#1764) --- bridges/TwitterBridge.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index 114b2feb..ba2f4b32 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -434,6 +434,10 @@ EOD; $jsMainRegex = '/(https:\/\/abs\.twimg\.com\/responsive-web\/client-web\/main\.[^\.]+\.js)/m'; preg_match_all($jsMainRegex, $twitterPage, $jsMainMatches, PREG_SET_ORDER, 0); } + if (!$jsMainMatches) { + $jsMainRegex = '/(https:\/\/abs\.twimg\.com\/responsive-web\/client-web-legacy\/main\.[^\.]+\.js)/m'; + preg_match_all($jsMainRegex, $twitterPage, $jsMainMatches, PREG_SET_ORDER, 0); + } if (!$jsMainMatches) { returnServerError('Could not locate main.js link'); } From 3df2de4c6f823291484c600e74f9fa594593bc44 Mon Sep 17 00:00:00 2001 From: Ololbu Date: Mon, 28 Sep 2020 14:02:40 +0500 Subject: [PATCH 122/192] [FicbookBridge] Fix data getting and months (#1765) --- bridges/FicbookBridge.php | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bridges/FicbookBridge.php b/bridges/FicbookBridge.php index 8b8a57fc..6a3761d5 100644 --- a/bridges/FicbookBridge.php +++ b/bridges/FicbookBridge.php @@ -84,7 +84,7 @@ class FicbookBridge extends BridgeAbstract { } private function collectCommentsData($html) { - foreach($html->find('article.post') as $article) { + foreach($html->find('article.comment-container') as $article) { $this->items[] = array( 'uri' => $article->find('.comment_link_to_fic > a', 0)->href, 'title' => $article->find('.comment_author', 0)->plaintext, @@ -97,7 +97,7 @@ class FicbookBridge extends BridgeAbstract { } private function collectUpdatesData($html) { - foreach($html->find('ul.table-of-contents > li') as $chapter) { + foreach($html->find('ul.list-of-fanfic-parts > li') as $chapter) { $item = array( 'uri' => $chapter->find('a', 0)->href, 'title' => $chapter->find('a', 0)->plaintext, @@ -130,10 +130,10 @@ class FicbookBridge extends BridgeAbstract { 'июня', 'июля', 'августа', - 'Сентября', + 'сентября', 'октября', - 'Ноября', - 'Декабря', + 'ноября', + 'декабря', ); $en_month = array( From 47dc26c775a2158950cffe085c1781b6e6a71e26 Mon Sep 17 00:00:00 2001 From: ORelio Date: Mon, 5 Oct 2020 08:57:13 +0200 Subject: [PATCH 123/192] [NextINpact] Fix subtitle extraction in #LeBrief (#1780) The bridge was taking another article abstract as subtitle for #LeBrief articles --- bridges/NextInpactBridge.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bridges/NextInpactBridge.php b/bridges/NextInpactBridge.php index 879141cc..c3cca30d 100644 --- a/bridges/NextInpactBridge.php +++ b/bridges/NextInpactBridge.php @@ -97,9 +97,10 @@ class NextInpactBridge extends FeedExpander { return 'Failed to request NextInpact: ' . $url; // Filter premium and brief articles? + $brief_selector = 'div.brief-container'; foreach(array( 'filter_premium' => 'p.red-msg', - 'filter_brief' => 'div.brief-container' + 'filter_brief' => $brief_selector ) as $param_name => $selector) { $param_val = intval($this->getInput($param_name)); if ($param_val != 0) { @@ -119,7 +120,7 @@ class NextInpactBridge extends FeedExpander { // Subtitle $subtitle = $html->find('small.subtitle', 0); - if(!is_object($subtitle)) { + if(!is_object($subtitle) && !is_object($html->find($brief_selector, 0))) { $subtitle = $html->find('small', 0); } if(!is_object($subtitle)) { From d3455dd18adc22461c728974536bd5f930bb0c05 Mon Sep 17 00:00:00 2001 From: triatic <42704418+triatic@users.noreply.github.com> Date: Mon, 5 Oct 2020 08:07:39 +0100 Subject: [PATCH 124/192] [TwitterBridge] Optimise regular expression code (#1768) * [TwitterBridge] Optimise regular expression code Optimise regular expression search code so adding new URLs is cleaner --- bridges/TwitterBridge.php | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index ba2f4b32..0108170f 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -424,24 +424,22 @@ EOD; if($data === null || (time() - $refresh) > self::GUEST_TOKEN_EXPIRY) { $twitterPage = getContents('https://twitter.com'); - $jsMainRegex = '/(https:\/\/abs\.twimg\.com\/responsive-web\/web\/main\.[^\.]+\.js)/m'; - preg_match_all($jsMainRegex, $twitterPage, $jsMainMatches, PREG_SET_ORDER, 0); - if (!$jsMainMatches) { - $jsMainRegex = '/(https:\/\/abs\.twimg\.com\/responsive-web\/web_legacy\/main\.[^\.]+\.js)/m'; - preg_match_all($jsMainRegex, $twitterPage, $jsMainMatches, PREG_SET_ORDER, 0); + $jsLink = false; + $jsMainRegexArray = array( + '/(https:\/\/abs\.twimg\.com\/responsive-web\/web\/main\.[^\.]+\.js)/m', + '/(https:\/\/abs\.twimg\.com\/responsive-web\/web_legacy\/main\.[^\.]+\.js)/m', + '/(https:\/\/abs\.twimg\.com\/responsive-web\/client-web\/main\.[^\.]+\.js)/m', + '/(https:\/\/abs\.twimg\.com\/responsive-web\/client-web-legacy\/main\.[^\.]+\.js)/m', + ); + foreach ($jsMainRegexArray as $jsMainRegex) { + if (preg_match_all($jsMainRegex, $twitterPage, $jsMainMatches, PREG_SET_ORDER, 0)) { + $jsLink = $jsMainMatches[0][0]; + break; + } } - if (!$jsMainMatches) { - $jsMainRegex = '/(https:\/\/abs\.twimg\.com\/responsive-web\/client-web\/main\.[^\.]+\.js)/m'; - preg_match_all($jsMainRegex, $twitterPage, $jsMainMatches, PREG_SET_ORDER, 0); - } - if (!$jsMainMatches) { - $jsMainRegex = '/(https:\/\/abs\.twimg\.com\/responsive-web\/client-web-legacy\/main\.[^\.]+\.js)/m'; - preg_match_all($jsMainRegex, $twitterPage, $jsMainMatches, PREG_SET_ORDER, 0); - } - if (!$jsMainMatches) { + if (!$jsLink) { returnServerError('Could not locate main.js link'); } - $jsLink = $jsMainMatches[0][0]; $jsContent = getContents($jsLink); $apiKeyRegex = '/([a-zA-Z0-9]{59}%[a-zA-Z0-9]{44})/m'; From fe166d0216683754e85d32f48cd78a2ac4b8a7e3 Mon Sep 17 00:00:00 2001 From: Corentin Garcia Date: Wed, 7 Oct 2020 08:16:26 +0200 Subject: [PATCH 125/192] [NasaApodBridge] Fix header being parsed as item (#1586) --- bridges/NasaApodBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/NasaApodBridge.php b/bridges/NasaApodBridge.php index 8e293e0f..e5893dbc 100644 --- a/bridges/NasaApodBridge.php +++ b/bridges/NasaApodBridge.php @@ -14,8 +14,8 @@ class NasaApodBridge extends BridgeAbstract { $list = explode('
    ', $html->find('b', 0)->innertext); - for($i = 0; $i < 3; $i++) { - $line = $list[$i]; + // Start at 1 to skip the "APOD Full Archive" on top of the page + for($i = 1; $i < 4; $i++) { $item = array(); $uri_page = $html->find('a', $i + 3)->href; From ff98efe8dc13c978be3a715da44f825af194513c Mon Sep 17 00:00:00 2001 From: Corentin Garcia Date: Fri, 9 Oct 2020 20:29:02 +0200 Subject: [PATCH 126/192] [core] Use Parsedown for Markdown parsing (#1783) --- README.md | 3 +- bridges/NyaaTorrentsBridge.php | 4 +- bridges/RainbowSixSiegeBridge.php | 7 +- lib/html.php | 40 +- lib/rssbridge.php | 3 +- vendor/parsedown/LICENSE.txt | 20 + vendor/parsedown/Parsedown.php | 1712 +++++++++++++++++++++++++++++ 7 files changed, 1744 insertions(+), 45 deletions(-) create mode 100644 vendor/parsedown/LICENSE.txt create mode 100644 vendor/parsedown/Parsedown.php diff --git a/README.md b/README.md index 2239826d..0f84108a 100644 --- a/README.md +++ b/README.md @@ -235,7 +235,7 @@ https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8 * [xurxof](https://github.com/xurxof) * [yardenac](https://github.com/yardenac) * [ZeNairolf](https://github.com/ZeNairolf) - + Licenses === @@ -243,6 +243,7 @@ The source code for RSS-Bridge is [Public Domain](UNLICENSE). RSS-Bridge uses third party libraries with their own license: + * [`Parsedown`](https://github.com/erusev/parsedown) licensed under the [MIT License](http://opensource.org/licenses/MIT) * [`PHP Simple HTML DOM Parser`](http://simplehtmldom.sourceforge.net/) licensed under the [MIT License](http://opensource.org/licenses/MIT) * [`php-urljoin`](https://github.com/fluffy-critter/php-urljoin) licensed under the [MIT License](http://opensource.org/licenses/MIT) diff --git a/bridges/NyaaTorrentsBridge.php b/bridges/NyaaTorrentsBridge.php index b40b0f95..ab00ece8 100644 --- a/bridges/NyaaTorrentsBridge.php +++ b/bridges/NyaaTorrentsBridge.php @@ -100,7 +100,9 @@ class NyaaTorrentsBridge extends BridgeAbstract { //Retrieve data from page contents $item_title = str_replace(' :: Nyaa', '', $item_html->find('title', 0)->plaintext); - $item_desc = str_get_html(markdownToHtml($item_html->find('#torrent-description', 0)->innertext)); + $item_desc = str_get_html( + markdownToHtml(html_entity_decode($item_html->find('#torrent-description', 0)->innertext)) + ); $item_author = extractFromDelimiters($item_html->outertext, 'href="/user/', '"'); $item_date = intval(extractFromDelimiters($item_html->outertext, 'data-timestamp="', '"')); diff --git a/bridges/RainbowSixSiegeBridge.php b/bridges/RainbowSixSiegeBridge.php index 347438b0..067d3e77 100644 --- a/bridges/RainbowSixSiegeBridge.php +++ b/bridges/RainbowSixSiegeBridge.php @@ -27,16 +27,13 @@ class RainbowSixSiegeBridge extends BridgeAbstract { $uri = $uri . $jsonItem['button']['buttonUrl']; $thumbnail = 'Thumbnail'; - $content = $thumbnail . '
    ' . $jsonItem['content']; - - // Line breaks - $content = preg_replace("/\r\n|\r|\n/", '
    ', $content); + $content = $thumbnail . '
    ' . markdownToHtml($jsonItem['content']); $item = array(); $item['uri'] = $uri; $item['id'] = $jsonItem['id']; $item['title'] = $jsonItem['title']; - $item['content'] = markdownToHtml($content); + $item['content'] = $content; $item['timestamp'] = strtotime($jsonItem['date']); $this->items[] = $item; diff --git a/lib/html.php b/lib/html.php index c56140bf..892ecb17 100644 --- a/lib/html.php +++ b/lib/html.php @@ -195,7 +195,7 @@ function stripRecursiveHTMLSection($string, $tag_name, $tag_start){ } /** - * Convert Markdown into HTML. Only a subset of the Markdown syntax is implemented. + * Convert Markdown into HTML with Parsedown. * * @link https://daringfireball.net/projects/markdown/ Markdown * @link https://github.github.com/gfm/ GitHub Flavored Markdown Spec @@ -205,40 +205,6 @@ function stripRecursiveHTMLSection($string, $tag_name, $tag_start){ */ function markdownToHtml($string) { - //For more details about how these regex work: - // https://github.com/RSS-Bridge/rss-bridge/pull/802#discussion_r216138702 - // Images: https://regex101.com/r/JW9Evr/2 - // Links: https://regex101.com/r/eRGVe7/1 - // Bold: https://regex101.com/r/2p40Y0/1 - // Italic: https://regex101.com/r/xJkET9/1 - // Separator: https://regex101.com/r/ZBEqFP/1 - // Plain URL: https://regex101.com/r/2JHYwb/1 - // Site name: https://regex101.com/r/qIuKYE/1 - - $string = preg_replace('/\!\[([^\]]*)\]\(([^\) ]+)(?: [^\)]+)?\)/', '$1', $string); - $string = preg_replace('/\[([^\]]+)\]\(([^\)]+)\)/', '$1', $string); - $string = preg_replace('/\*\*(.*)\*\*/U', '$1', $string); - $string = preg_replace('/\*(.*)\*/U', '$1', $string); - $string = preg_replace('/__(.*)__/U', '$1', $string); - $string = preg_replace('/_(.*)_/U', '$1', $string); - $string = preg_replace('/[-]{6,99}/', '
    ', $string); - $string = str_replace(' ', '
    ', $string); - $string = preg_replace('/([^"])(https?:\/\/[^ "<]+)([^"])/', '$1$2$3', $string . ' '); - $string = preg_replace('/([^"\/])(www\.[^ "<]+)([^"])/', '$1$2$3', $string . ' '); - - //As the regex are not perfect, we need to fix and that are introduced in URLs - // Fixup regex : https://regex101.com/r/NTRPf6/1 - // Fixup regex : https://regex101.com/r/aNklRp/1 - - $count = 1; - while($count > 0) { - $string = preg_replace('/ (src|href)="([^"]+)([^"]+)"/U', ' $1="$2_$3"', $string, -1, $count); - } - - $count = 1; - while($count > 0) { - $string = preg_replace('/ (src|href)="([^"]+)<\/i>([^"]+)"/U', ' $1="$2_$3"', $string, -1, $count); - } - - return '
    ' . trim($string) . '
    '; + $Parsedown = new Parsedown(); + return $Parsedown->text($string); } diff --git a/lib/rssbridge.php b/lib/rssbridge.php index a025f229..25232986 100644 --- a/lib/rssbridge.php +++ b/lib/rssbridge.php @@ -82,5 +82,6 @@ require_once PATH_LIB . 'contents.php'; // Vendor define('MAX_FILE_SIZE', 10000000); /* Allow larger files for simple_html_dom */ -require_once PATH_LIB_VENDOR . 'simplehtmldom/simple_html_dom.php'; +require_once PATH_LIB_VENDOR . 'parsedown/Parsedown.php'; require_once PATH_LIB_VENDOR . 'php-urljoin/src/urljoin.php'; +require_once PATH_LIB_VENDOR . 'simplehtmldom/simple_html_dom.php'; diff --git a/vendor/parsedown/LICENSE.txt b/vendor/parsedown/LICENSE.txt new file mode 100644 index 00000000..8e7c764d --- /dev/null +++ b/vendor/parsedown/LICENSE.txt @@ -0,0 +1,20 @@ +The MIT License (MIT) + +Copyright (c) 2013-2018 Emanuil Rusev, erusev.com + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/parsedown/Parsedown.php b/vendor/parsedown/Parsedown.php new file mode 100644 index 00000000..1b9d6d5b --- /dev/null +++ b/vendor/parsedown/Parsedown.php @@ -0,0 +1,1712 @@ +DefinitionData = array(); + + # standardize line breaks + $text = str_replace(array("\r\n", "\r"), "\n", $text); + + # remove surrounding line breaks + $text = trim($text, "\n"); + + # split text into lines + $lines = explode("\n", $text); + + # iterate through lines to identify blocks + $markup = $this->lines($lines); + + # trim line breaks + $markup = trim($markup, "\n"); + + return $markup; + } + + # + # Setters + # + + function setBreaksEnabled($breaksEnabled) + { + $this->breaksEnabled = $breaksEnabled; + + return $this; + } + + protected $breaksEnabled; + + function setMarkupEscaped($markupEscaped) + { + $this->markupEscaped = $markupEscaped; + + return $this; + } + + protected $markupEscaped; + + function setUrlsLinked($urlsLinked) + { + $this->urlsLinked = $urlsLinked; + + return $this; + } + + protected $urlsLinked = true; + + function setSafeMode($safeMode) + { + $this->safeMode = (bool) $safeMode; + + return $this; + } + + protected $safeMode; + + protected $safeLinksWhitelist = array( + 'http://', + 'https://', + 'ftp://', + 'ftps://', + 'mailto:', + 'data:image/png;base64,', + 'data:image/gif;base64,', + 'data:image/jpeg;base64,', + 'irc:', + 'ircs:', + 'git:', + 'ssh:', + 'news:', + 'steam:', + ); + + # + # Lines + # + + protected $BlockTypes = array( + '#' => array('Header'), + '*' => array('Rule', 'List'), + '+' => array('List'), + '-' => array('SetextHeader', 'Table', 'Rule', 'List'), + '0' => array('List'), + '1' => array('List'), + '2' => array('List'), + '3' => array('List'), + '4' => array('List'), + '5' => array('List'), + '6' => array('List'), + '7' => array('List'), + '8' => array('List'), + '9' => array('List'), + ':' => array('Table'), + '<' => array('Comment', 'Markup'), + '=' => array('SetextHeader'), + '>' => array('Quote'), + '[' => array('Reference'), + '_' => array('Rule'), + '`' => array('FencedCode'), + '|' => array('Table'), + '~' => array('FencedCode'), + ); + + # ~ + + protected $unmarkedBlockTypes = array( + 'Code', + ); + + # + # Blocks + # + + protected function lines(array $lines) + { + $CurrentBlock = null; + + foreach ($lines as $line) + { + if (chop($line) === '') + { + if (isset($CurrentBlock)) + { + $CurrentBlock['interrupted'] = true; + } + + continue; + } + + if (strpos($line, "\t") !== false) + { + $parts = explode("\t", $line); + + $line = $parts[0]; + + unset($parts[0]); + + foreach ($parts as $part) + { + $shortage = 4 - mb_strlen($line, 'utf-8') % 4; + + $line .= str_repeat(' ', $shortage); + $line .= $part; + } + } + + $indent = 0; + + while (isset($line[$indent]) and $line[$indent] === ' ') + { + $indent ++; + } + + $text = $indent > 0 ? substr($line, $indent) : $line; + + # ~ + + $Line = array('body' => $line, 'indent' => $indent, 'text' => $text); + + # ~ + + if (isset($CurrentBlock['continuable'])) + { + $Block = $this->{'block'.$CurrentBlock['type'].'Continue'}($Line, $CurrentBlock); + + if (isset($Block)) + { + $CurrentBlock = $Block; + + continue; + } + else + { + if ($this->isBlockCompletable($CurrentBlock['type'])) + { + $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock); + } + } + } + + # ~ + + $marker = $text[0]; + + # ~ + + $blockTypes = $this->unmarkedBlockTypes; + + if (isset($this->BlockTypes[$marker])) + { + foreach ($this->BlockTypes[$marker] as $blockType) + { + $blockTypes []= $blockType; + } + } + + # + # ~ + + foreach ($blockTypes as $blockType) + { + $Block = $this->{'block'.$blockType}($Line, $CurrentBlock); + + if (isset($Block)) + { + $Block['type'] = $blockType; + + if ( ! isset($Block['identified'])) + { + $Blocks []= $CurrentBlock; + + $Block['identified'] = true; + } + + if ($this->isBlockContinuable($blockType)) + { + $Block['continuable'] = true; + } + + $CurrentBlock = $Block; + + continue 2; + } + } + + # ~ + + if (isset($CurrentBlock) and ! isset($CurrentBlock['type']) and ! isset($CurrentBlock['interrupted'])) + { + $CurrentBlock['element']['text'] .= "\n".$text; + } + else + { + $Blocks []= $CurrentBlock; + + $CurrentBlock = $this->paragraph($Line); + + $CurrentBlock['identified'] = true; + } + } + + # ~ + + if (isset($CurrentBlock['continuable']) and $this->isBlockCompletable($CurrentBlock['type'])) + { + $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock); + } + + # ~ + + $Blocks []= $CurrentBlock; + + unset($Blocks[0]); + + # ~ + + $markup = ''; + + foreach ($Blocks as $Block) + { + if (isset($Block['hidden'])) + { + continue; + } + + $markup .= "\n"; + $markup .= isset($Block['markup']) ? $Block['markup'] : $this->element($Block['element']); + } + + $markup .= "\n"; + + # ~ + + return $markup; + } + + protected function isBlockContinuable($Type) + { + return method_exists($this, 'block'.$Type.'Continue'); + } + + protected function isBlockCompletable($Type) + { + return method_exists($this, 'block'.$Type.'Complete'); + } + + # + # Code + + protected function blockCode($Line, $Block = null) + { + if (isset($Block) and ! isset($Block['type']) and ! isset($Block['interrupted'])) + { + return; + } + + if ($Line['indent'] >= 4) + { + $text = substr($Line['body'], 4); + + $Block = array( + 'element' => array( + 'name' => 'pre', + 'handler' => 'element', + 'text' => array( + 'name' => 'code', + 'text' => $text, + ), + ), + ); + + return $Block; + } + } + + protected function blockCodeContinue($Line, $Block) + { + if ($Line['indent'] >= 4) + { + if (isset($Block['interrupted'])) + { + $Block['element']['text']['text'] .= "\n"; + + unset($Block['interrupted']); + } + + $Block['element']['text']['text'] .= "\n"; + + $text = substr($Line['body'], 4); + + $Block['element']['text']['text'] .= $text; + + return $Block; + } + } + + protected function blockCodeComplete($Block) + { + $text = $Block['element']['text']['text']; + + $Block['element']['text']['text'] = $text; + + return $Block; + } + + # + # Comment + + protected function blockComment($Line) + { + if ($this->markupEscaped or $this->safeMode) + { + return; + } + + if (isset($Line['text'][3]) and $Line['text'][3] === '-' and $Line['text'][2] === '-' and $Line['text'][1] === '!') + { + $Block = array( + 'markup' => $Line['body'], + ); + + if (preg_match('/-->$/', $Line['text'])) + { + $Block['closed'] = true; + } + + return $Block; + } + } + + protected function blockCommentContinue($Line, array $Block) + { + if (isset($Block['closed'])) + { + return; + } + + $Block['markup'] .= "\n" . $Line['body']; + + if (preg_match('/-->$/', $Line['text'])) + { + $Block['closed'] = true; + } + + return $Block; + } + + # + # Fenced Code + + protected function blockFencedCode($Line) + { + if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([^`]+)?[ ]*$/', $Line['text'], $matches)) + { + $Element = array( + 'name' => 'code', + 'text' => '', + ); + + if (isset($matches[1])) + { + /** + * https://www.w3.org/TR/2011/WD-html5-20110525/elements.html#classes + * Every HTML element may have a class attribute specified. + * The attribute, if specified, must have a value that is a set + * of space-separated tokens representing the various classes + * that the element belongs to. + * [...] + * The space characters, for the purposes of this specification, + * are U+0020 SPACE, U+0009 CHARACTER TABULATION (tab), + * U+000A LINE FEED (LF), U+000C FORM FEED (FF), and + * U+000D CARRIAGE RETURN (CR). + */ + $language = substr($matches[1], 0, strcspn($matches[1], " \t\n\f\r")); + + $class = 'language-'.$language; + + $Element['attributes'] = array( + 'class' => $class, + ); + } + + $Block = array( + 'char' => $Line['text'][0], + 'element' => array( + 'name' => 'pre', + 'handler' => 'element', + 'text' => $Element, + ), + ); + + return $Block; + } + } + + protected function blockFencedCodeContinue($Line, $Block) + { + if (isset($Block['complete'])) + { + return; + } + + if (isset($Block['interrupted'])) + { + $Block['element']['text']['text'] .= "\n"; + + unset($Block['interrupted']); + } + + if (preg_match('/^'.$Block['char'].'{3,}[ ]*$/', $Line['text'])) + { + $Block['element']['text']['text'] = substr($Block['element']['text']['text'], 1); + + $Block['complete'] = true; + + return $Block; + } + + $Block['element']['text']['text'] .= "\n".$Line['body']; + + return $Block; + } + + protected function blockFencedCodeComplete($Block) + { + $text = $Block['element']['text']['text']; + + $Block['element']['text']['text'] = $text; + + return $Block; + } + + # + # Header + + protected function blockHeader($Line) + { + if (isset($Line['text'][1])) + { + $level = 1; + + while (isset($Line['text'][$level]) and $Line['text'][$level] === '#') + { + $level ++; + } + + if ($level > 6) + { + return; + } + + $text = trim($Line['text'], '# '); + + $Block = array( + 'element' => array( + 'name' => 'h' . min(6, $level), + 'text' => $text, + 'handler' => 'line', + ), + ); + + return $Block; + } + } + + # + # List + + protected function blockList($Line) + { + list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]+[.]'); + + if (preg_match('/^('.$pattern.'[ ]+)(.*)/', $Line['text'], $matches)) + { + $Block = array( + 'indent' => $Line['indent'], + 'pattern' => $pattern, + 'element' => array( + 'name' => $name, + 'handler' => 'elements', + ), + ); + + if($name === 'ol') + { + $listStart = stristr($matches[0], '.', true); + + if($listStart !== '1') + { + $Block['element']['attributes'] = array('start' => $listStart); + } + } + + $Block['li'] = array( + 'name' => 'li', + 'handler' => 'li', + 'text' => array( + $matches[2], + ), + ); + + $Block['element']['text'] []= & $Block['li']; + + return $Block; + } + } + + protected function blockListContinue($Line, array $Block) + { + if ($Block['indent'] === $Line['indent'] and preg_match('/^'.$Block['pattern'].'(?:[ ]+(.*)|$)/', $Line['text'], $matches)) + { + if (isset($Block['interrupted'])) + { + $Block['li']['text'] []= ''; + + $Block['loose'] = true; + + unset($Block['interrupted']); + } + + unset($Block['li']); + + $text = isset($matches[1]) ? $matches[1] : ''; + + $Block['li'] = array( + 'name' => 'li', + 'handler' => 'li', + 'text' => array( + $text, + ), + ); + + $Block['element']['text'] []= & $Block['li']; + + return $Block; + } + + if ($Line['text'][0] === '[' and $this->blockReference($Line)) + { + return $Block; + } + + if ( ! isset($Block['interrupted'])) + { + $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']); + + $Block['li']['text'] []= $text; + + return $Block; + } + + if ($Line['indent'] > 0) + { + $Block['li']['text'] []= ''; + + $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']); + + $Block['li']['text'] []= $text; + + unset($Block['interrupted']); + + return $Block; + } + } + + protected function blockListComplete(array $Block) + { + if (isset($Block['loose'])) + { + foreach ($Block['element']['text'] as &$li) + { + if (end($li['text']) !== '') + { + $li['text'] []= ''; + } + } + } + + return $Block; + } + + # + # Quote + + protected function blockQuote($Line) + { + if (preg_match('/^>[ ]?(.*)/', $Line['text'], $matches)) + { + $Block = array( + 'element' => array( + 'name' => 'blockquote', + 'handler' => 'lines', + 'text' => (array) $matches[1], + ), + ); + + return $Block; + } + } + + protected function blockQuoteContinue($Line, array $Block) + { + if ($Line['text'][0] === '>' and preg_match('/^>[ ]?(.*)/', $Line['text'], $matches)) + { + if (isset($Block['interrupted'])) + { + $Block['element']['text'] []= ''; + + unset($Block['interrupted']); + } + + $Block['element']['text'] []= $matches[1]; + + return $Block; + } + + if ( ! isset($Block['interrupted'])) + { + $Block['element']['text'] []= $Line['text']; + + return $Block; + } + } + + # + # Rule + + protected function blockRule($Line) + { + if (preg_match('/^(['.$Line['text'][0].'])([ ]*\1){2,}[ ]*$/', $Line['text'])) + { + $Block = array( + 'element' => array( + 'name' => 'hr' + ), + ); + + return $Block; + } + } + + # + # Setext + + protected function blockSetextHeader($Line, array $Block = null) + { + if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted'])) + { + return; + } + + if (chop($Line['text'], $Line['text'][0]) === '') + { + $Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2'; + + return $Block; + } + } + + # + # Markup + + protected function blockMarkup($Line) + { + if ($this->markupEscaped or $this->safeMode) + { + return; + } + + if (preg_match('/^<(\w[\w-]*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches)) + { + $element = strtolower($matches[1]); + + if (in_array($element, $this->textLevelElements)) + { + return; + } + + $Block = array( + 'name' => $matches[1], + 'depth' => 0, + 'markup' => $Line['text'], + ); + + $length = strlen($matches[0]); + + $remainder = substr($Line['text'], $length); + + if (trim($remainder) === '') + { + if (isset($matches[2]) or in_array($matches[1], $this->voidElements)) + { + $Block['closed'] = true; + + $Block['void'] = true; + } + } + else + { + if (isset($matches[2]) or in_array($matches[1], $this->voidElements)) + { + return; + } + + if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder)) + { + $Block['closed'] = true; + } + } + + return $Block; + } + } + + protected function blockMarkupContinue($Line, array $Block) + { + if (isset($Block['closed'])) + { + return; + } + + if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) # open + { + $Block['depth'] ++; + } + + if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) # close + { + if ($Block['depth'] > 0) + { + $Block['depth'] --; + } + else + { + $Block['closed'] = true; + } + } + + if (isset($Block['interrupted'])) + { + $Block['markup'] .= "\n"; + + unset($Block['interrupted']); + } + + $Block['markup'] .= "\n".$Line['body']; + + return $Block; + } + + # + # Reference + + protected function blockReference($Line) + { + if (preg_match('/^\[(.+?)\]:[ ]*?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $Line['text'], $matches)) + { + $id = strtolower($matches[1]); + + $Data = array( + 'url' => $matches[2], + 'title' => null, + ); + + if (isset($matches[3])) + { + $Data['title'] = $matches[3]; + } + + $this->DefinitionData['Reference'][$id] = $Data; + + $Block = array( + 'hidden' => true, + ); + + return $Block; + } + } + + # + # Table + + protected function blockTable($Line, array $Block = null) + { + if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted'])) + { + return; + } + + if (strpos($Block['element']['text'], '|') !== false and chop($Line['text'], ' -:|') === '') + { + $alignments = array(); + + $divider = $Line['text']; + + $divider = trim($divider); + $divider = trim($divider, '|'); + + $dividerCells = explode('|', $divider); + + foreach ($dividerCells as $dividerCell) + { + $dividerCell = trim($dividerCell); + + if ($dividerCell === '') + { + continue; + } + + $alignment = null; + + if ($dividerCell[0] === ':') + { + $alignment = 'left'; + } + + if (substr($dividerCell, - 1) === ':') + { + $alignment = $alignment === 'left' ? 'center' : 'right'; + } + + $alignments []= $alignment; + } + + # ~ + + $HeaderElements = array(); + + $header = $Block['element']['text']; + + $header = trim($header); + $header = trim($header, '|'); + + $headerCells = explode('|', $header); + + foreach ($headerCells as $index => $headerCell) + { + $headerCell = trim($headerCell); + + $HeaderElement = array( + 'name' => 'th', + 'text' => $headerCell, + 'handler' => 'line', + ); + + if (isset($alignments[$index])) + { + $alignment = $alignments[$index]; + + $HeaderElement['attributes'] = array( + 'style' => 'text-align: '.$alignment.';', + ); + } + + $HeaderElements []= $HeaderElement; + } + + # ~ + + $Block = array( + 'alignments' => $alignments, + 'identified' => true, + 'element' => array( + 'name' => 'table', + 'handler' => 'elements', + ), + ); + + $Block['element']['text'] []= array( + 'name' => 'thead', + 'handler' => 'elements', + ); + + $Block['element']['text'] []= array( + 'name' => 'tbody', + 'handler' => 'elements', + 'text' => array(), + ); + + $Block['element']['text'][0]['text'] []= array( + 'name' => 'tr', + 'handler' => 'elements', + 'text' => $HeaderElements, + ); + + return $Block; + } + } + + protected function blockTableContinue($Line, array $Block) + { + if (isset($Block['interrupted'])) + { + return; + } + + if ($Line['text'][0] === '|' or strpos($Line['text'], '|')) + { + $Elements = array(); + + $row = $Line['text']; + + $row = trim($row); + $row = trim($row, '|'); + + preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]+`|`)+/', $row, $matches); + + foreach ($matches[0] as $index => $cell) + { + $cell = trim($cell); + + $Element = array( + 'name' => 'td', + 'handler' => 'line', + 'text' => $cell, + ); + + if (isset($Block['alignments'][$index])) + { + $Element['attributes'] = array( + 'style' => 'text-align: '.$Block['alignments'][$index].';', + ); + } + + $Elements []= $Element; + } + + $Element = array( + 'name' => 'tr', + 'handler' => 'elements', + 'text' => $Elements, + ); + + $Block['element']['text'][1]['text'] []= $Element; + + return $Block; + } + } + + # + # ~ + # + + protected function paragraph($Line) + { + $Block = array( + 'element' => array( + 'name' => 'p', + 'text' => $Line['text'], + 'handler' => 'line', + ), + ); + + return $Block; + } + + # + # Inline Elements + # + + protected $InlineTypes = array( + '"' => array('SpecialCharacter'), + '!' => array('Image'), + '&' => array('SpecialCharacter'), + '*' => array('Emphasis'), + ':' => array('Url'), + '<' => array('UrlTag', 'EmailTag', 'Markup', 'SpecialCharacter'), + '>' => array('SpecialCharacter'), + '[' => array('Link'), + '_' => array('Emphasis'), + '`' => array('Code'), + '~' => array('Strikethrough'), + '\\' => array('EscapeSequence'), + ); + + # ~ + + protected $inlineMarkerList = '!"*_&[:<>`~\\'; + + # + # ~ + # + + public function line($text, $nonNestables=array()) + { + $markup = ''; + + # $excerpt is based on the first occurrence of a marker + + while ($excerpt = strpbrk($text, $this->inlineMarkerList)) + { + $marker = $excerpt[0]; + + $markerPosition = strpos($text, $marker); + + $Excerpt = array('text' => $excerpt, 'context' => $text); + + foreach ($this->InlineTypes[$marker] as $inlineType) + { + # check to see if the current inline type is nestable in the current context + + if ( ! empty($nonNestables) and in_array($inlineType, $nonNestables)) + { + continue; + } + + $Inline = $this->{'inline'.$inlineType}($Excerpt); + + if ( ! isset($Inline)) + { + continue; + } + + # makes sure that the inline belongs to "our" marker + + if (isset($Inline['position']) and $Inline['position'] > $markerPosition) + { + continue; + } + + # sets a default inline position + + if ( ! isset($Inline['position'])) + { + $Inline['position'] = $markerPosition; + } + + # cause the new element to 'inherit' our non nestables + + foreach ($nonNestables as $non_nestable) + { + $Inline['element']['nonNestables'][] = $non_nestable; + } + + # the text that comes before the inline + $unmarkedText = substr($text, 0, $Inline['position']); + + # compile the unmarked text + $markup .= $this->unmarkedText($unmarkedText); + + # compile the inline + $markup .= isset($Inline['markup']) ? $Inline['markup'] : $this->element($Inline['element']); + + # remove the examined text + $text = substr($text, $Inline['position'] + $Inline['extent']); + + continue 2; + } + + # the marker does not belong to an inline + + $unmarkedText = substr($text, 0, $markerPosition + 1); + + $markup .= $this->unmarkedText($unmarkedText); + + $text = substr($text, $markerPosition + 1); + } + + $markup .= $this->unmarkedText($text); + + return $markup; + } + + # + # ~ + # + + protected function inlineCode($Excerpt) + { + $marker = $Excerpt['text'][0]; + + if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(? strlen($matches[0]), + 'element' => array( + 'name' => 'code', + 'text' => $text, + ), + ); + } + } + + protected function inlineEmailTag($Excerpt) + { + if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<((mailto:)?\S+?@\S+?)>/i', $Excerpt['text'], $matches)) + { + $url = $matches[1]; + + if ( ! isset($matches[2])) + { + $url = 'mailto:' . $url; + } + + return array( + 'extent' => strlen($matches[0]), + 'element' => array( + 'name' => 'a', + 'text' => $matches[1], + 'attributes' => array( + 'href' => $url, + ), + ), + ); + } + } + + protected function inlineEmphasis($Excerpt) + { + if ( ! isset($Excerpt['text'][1])) + { + return; + } + + $marker = $Excerpt['text'][0]; + + if ($Excerpt['text'][1] === $marker and preg_match($this->StrongRegex[$marker], $Excerpt['text'], $matches)) + { + $emphasis = 'strong'; + } + elseif (preg_match($this->EmRegex[$marker], $Excerpt['text'], $matches)) + { + $emphasis = 'em'; + } + else + { + return; + } + + return array( + 'extent' => strlen($matches[0]), + 'element' => array( + 'name' => $emphasis, + 'handler' => 'line', + 'text' => $matches[1], + ), + ); + } + + protected function inlineEscapeSequence($Excerpt) + { + if (isset($Excerpt['text'][1]) and in_array($Excerpt['text'][1], $this->specialCharacters)) + { + return array( + 'markup' => $Excerpt['text'][1], + 'extent' => 2, + ); + } + } + + protected function inlineImage($Excerpt) + { + if ( ! isset($Excerpt['text'][1]) or $Excerpt['text'][1] !== '[') + { + return; + } + + $Excerpt['text']= substr($Excerpt['text'], 1); + + $Link = $this->inlineLink($Excerpt); + + if ($Link === null) + { + return; + } + + $Inline = array( + 'extent' => $Link['extent'] + 1, + 'element' => array( + 'name' => 'img', + 'attributes' => array( + 'src' => $Link['element']['attributes']['href'], + 'alt' => $Link['element']['text'], + ), + ), + ); + + $Inline['element']['attributes'] += $Link['element']['attributes']; + + unset($Inline['element']['attributes']['href']); + + return $Inline; + } + + protected function inlineLink($Excerpt) + { + $Element = array( + 'name' => 'a', + 'handler' => 'line', + 'nonNestables' => array('Url', 'Link'), + 'text' => null, + 'attributes' => array( + 'href' => null, + 'title' => null, + ), + ); + + $extent = 0; + + $remainder = $Excerpt['text']; + + if (preg_match('/\[((?:[^][]++|(?R))*+)\]/', $remainder, $matches)) + { + $Element['text'] = $matches[1]; + + $extent += strlen($matches[0]); + + $remainder = substr($remainder, $extent); + } + else + { + return; + } + + if (preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*"|\'[^\']*\'))?\s*[)]/', $remainder, $matches)) + { + $Element['attributes']['href'] = $matches[1]; + + if (isset($matches[2])) + { + $Element['attributes']['title'] = substr($matches[2], 1, - 1); + } + + $extent += strlen($matches[0]); + } + else + { + if (preg_match('/^\s*\[(.*?)\]/', $remainder, $matches)) + { + $definition = strlen($matches[1]) ? $matches[1] : $Element['text']; + $definition = strtolower($definition); + + $extent += strlen($matches[0]); + } + else + { + $definition = strtolower($Element['text']); + } + + if ( ! isset($this->DefinitionData['Reference'][$definition])) + { + return; + } + + $Definition = $this->DefinitionData['Reference'][$definition]; + + $Element['attributes']['href'] = $Definition['url']; + $Element['attributes']['title'] = $Definition['title']; + } + + return array( + 'extent' => $extent, + 'element' => $Element, + ); + } + + protected function inlineMarkup($Excerpt) + { + if ($this->markupEscaped or $this->safeMode or strpos($Excerpt['text'], '>') === false) + { + return; + } + + if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w[\w-]*[ ]*>/s', $Excerpt['text'], $matches)) + { + return array( + 'markup' => $matches[0], + 'extent' => strlen($matches[0]), + ); + } + + if ($Excerpt['text'][1] === '!' and preg_match('/^/s', $Excerpt['text'], $matches)) + { + return array( + 'markup' => $matches[0], + 'extent' => strlen($matches[0]), + ); + } + + if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w[\w-]*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches)) + { + return array( + 'markup' => $matches[0], + 'extent' => strlen($matches[0]), + ); + } + } + + protected function inlineSpecialCharacter($Excerpt) + { + if ($Excerpt['text'][0] === '&' and ! preg_match('/^&#?\w+;/', $Excerpt['text'])) + { + return array( + 'markup' => '&', + 'extent' => 1, + ); + } + + $SpecialCharacter = array('>' => 'gt', '<' => 'lt', '"' => 'quot'); + + if (isset($SpecialCharacter[$Excerpt['text'][0]])) + { + return array( + 'markup' => '&'.$SpecialCharacter[$Excerpt['text'][0]].';', + 'extent' => 1, + ); + } + } + + protected function inlineStrikethrough($Excerpt) + { + if ( ! isset($Excerpt['text'][1])) + { + return; + } + + if ($Excerpt['text'][1] === '~' and preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $Excerpt['text'], $matches)) + { + return array( + 'extent' => strlen($matches[0]), + 'element' => array( + 'name' => 'del', + 'text' => $matches[1], + 'handler' => 'line', + ), + ); + } + } + + protected function inlineUrl($Excerpt) + { + if ($this->urlsLinked !== true or ! isset($Excerpt['text'][2]) or $Excerpt['text'][2] !== '/') + { + return; + } + + if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE)) + { + $url = $matches[0][0]; + + $Inline = array( + 'extent' => strlen($matches[0][0]), + 'position' => $matches[0][1], + 'element' => array( + 'name' => 'a', + 'text' => $url, + 'attributes' => array( + 'href' => $url, + ), + ), + ); + + return $Inline; + } + } + + protected function inlineUrlTag($Excerpt) + { + if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches)) + { + $url = $matches[1]; + + return array( + 'extent' => strlen($matches[0]), + 'element' => array( + 'name' => 'a', + 'text' => $url, + 'attributes' => array( + 'href' => $url, + ), + ), + ); + } + } + + # ~ + + protected function unmarkedText($text) + { + if ($this->breaksEnabled) + { + $text = preg_replace('/[ ]*\n/', "
    \n", $text); + } + else + { + $text = preg_replace('/(?:[ ][ ]+|[ ]*\\\\)\n/', "
    \n", $text); + $text = str_replace(" \n", "\n", $text); + } + + return $text; + } + + # + # Handlers + # + + protected function element(array $Element) + { + if ($this->safeMode) + { + $Element = $this->sanitiseElement($Element); + } + + $markup = '<'.$Element['name']; + + if (isset($Element['attributes'])) + { + foreach ($Element['attributes'] as $name => $value) + { + if ($value === null) + { + continue; + } + + $markup .= ' '.$name.'="'.self::escape($value).'"'; + } + } + + $permitRawHtml = false; + + if (isset($Element['text'])) + { + $text = $Element['text']; + } + // very strongly consider an alternative if you're writing an + // extension + elseif (isset($Element['rawHtml'])) + { + $text = $Element['rawHtml']; + $allowRawHtmlInSafeMode = isset($Element['allowRawHtmlInSafeMode']) && $Element['allowRawHtmlInSafeMode']; + $permitRawHtml = !$this->safeMode || $allowRawHtmlInSafeMode; + } + + if (isset($text)) + { + $markup .= '>'; + + if (!isset($Element['nonNestables'])) + { + $Element['nonNestables'] = array(); + } + + if (isset($Element['handler'])) + { + $markup .= $this->{$Element['handler']}($text, $Element['nonNestables']); + } + elseif (!$permitRawHtml) + { + $markup .= self::escape($text, true); + } + else + { + $markup .= $text; + } + + $markup .= ''; + } + else + { + $markup .= ' />'; + } + + return $markup; + } + + protected function elements(array $Elements) + { + $markup = ''; + + foreach ($Elements as $Element) + { + $markup .= "\n" . $this->element($Element); + } + + $markup .= "\n"; + + return $markup; + } + + # ~ + + protected function li($lines) + { + $markup = $this->lines($lines); + + $trimmedMarkup = trim($markup); + + if ( ! in_array('', $lines) and substr($trimmedMarkup, 0, 3) === '

    ') + { + $markup = $trimmedMarkup; + $markup = substr($markup, 3); + + $position = strpos($markup, "

    "); + + $markup = substr_replace($markup, '', $position, 4); + } + + return $markup; + } + + # + # Deprecated Methods + # + + function parse($text) + { + $markup = $this->text($text); + + return $markup; + } + + protected function sanitiseElement(array $Element) + { + static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/'; + static $safeUrlNameToAtt = array( + 'a' => 'href', + 'img' => 'src', + ); + + if (isset($safeUrlNameToAtt[$Element['name']])) + { + $Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]); + } + + if ( ! empty($Element['attributes'])) + { + foreach ($Element['attributes'] as $att => $val) + { + # filter out badly parsed attribute + if ( ! preg_match($goodAttribute, $att)) + { + unset($Element['attributes'][$att]); + } + # dump onevent attribute + elseif (self::striAtStart($att, 'on')) + { + unset($Element['attributes'][$att]); + } + } + } + + return $Element; + } + + protected function filterUnsafeUrlInAttribute(array $Element, $attribute) + { + foreach ($this->safeLinksWhitelist as $scheme) + { + if (self::striAtStart($Element['attributes'][$attribute], $scheme)) + { + return $Element; + } + } + + $Element['attributes'][$attribute] = str_replace(':', '%3A', $Element['attributes'][$attribute]); + + return $Element; + } + + # + # Static Methods + # + + protected static function escape($text, $allowQuotes = false) + { + return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8'); + } + + protected static function striAtStart($string, $needle) + { + $len = strlen($needle); + + if ($len > strlen($string)) + { + return false; + } + else + { + return strtolower(substr($string, 0, $len)) === strtolower($needle); + } + } + + static function instance($name = 'default') + { + if (isset(self::$instances[$name])) + { + return self::$instances[$name]; + } + + $instance = new static(); + + self::$instances[$name] = $instance; + + return $instance; + } + + private static $instances = array(); + + # + # Fields + # + + protected $DefinitionData; + + # + # Read-Only + + protected $specialCharacters = array( + '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '|', + ); + + protected $StrongRegex = array( + '*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*[*])+?)[*]{2}(?![*])/s', + '_' => '/^__((?:\\\\_|[^_]|_[^_]*_)+?)__(?!_)/us', + ); + + protected $EmRegex = array( + '*' => '/^[*]((?:\\\\\*|[^*]|[*][*][^*]+?[*][*])+?)[*](?![*])/s', + '_' => '/^_((?:\\\\_|[^_]|__[^_]*__)+?)_(?!_)\b/us', + ); + + protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*(?:\s*=\s*(?:[^"\'=<>`\s]+|"[^"]*"|\'[^\']*\'))?'; + + protected $voidElements = array( + 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', + ); + + protected $textLevelElements = array( + 'a', 'br', 'bdo', 'abbr', 'blink', 'nextid', 'acronym', 'basefont', + 'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing', + 'i', 'rp', 'del', 'code', 'strike', 'marquee', + 'q', 'rt', 'ins', 'font', 'strong', + 's', 'tt', 'kbd', 'mark', + 'u', 'xm', 'sub', 'nobr', + 'sup', 'ruby', + 'var', 'span', + 'wbr', 'time', + ); +} From f90c6b5bb98873c34e19e00e4f1053574eed641e Mon Sep 17 00:00:00 2001 From: csisoap <33269526+csisoap@users.noreply.github.com> Date: Sat, 10 Oct 2020 01:33:54 +0700 Subject: [PATCH 127/192] [NasaApodBridge] Fix broken image link (#1778) --- bridges/NasaApodBridge.php | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/bridges/NasaApodBridge.php b/bridges/NasaApodBridge.php index e5893dbc..cf633415 100644 --- a/bridges/NasaApodBridge.php +++ b/bridges/NasaApodBridge.php @@ -12,8 +12,6 @@ class NasaApodBridge extends BridgeAbstract { $html = getSimpleHTMLDOM(self::URI . 'archivepix.html') or returnServerError('Error while downloading the website content'); - $list = explode('
    ', $html->find('b', 0)->innertext); - // Start at 1 to skip the "APOD Full Archive" on top of the page for($i = 1; $i < 4; $i++) { $item = array(); @@ -26,9 +24,14 @@ class NasaApodBridge extends BridgeAbstract { $picture_html_string = $picture_html->innertext; //Extract image and explanation - $media = $picture_html->find('p', 1)->innertext; - $media = strstr($media, '
    '); - $media = preg_replace('/
    /', '', $media, 1); + $image_wrapper = $picture_html->find('a',1); + $image_path = $image_wrapper->href; + $img_placeholder = $image_wrapper->find('img', 0); + $img_alt = $img_placeholder->alt; + $img_style = $img_placeholder->style; + $image_uri = self::URI . $image_path; + $new_img_placeholder = "\"$img_alt\""; + $media = "$new_img_placeholder"; $explanation = $picture_html->find('p', 2)->innertext; //Extract date from the picture page From 55015f80cf129f35efe4e5bcd3e10ab33e0c5865 Mon Sep 17 00:00:00 2001 From: Alexander Date: Fri, 9 Oct 2020 21:48:40 +0300 Subject: [PATCH 128/192] [AlbionOnlineBridge] New bridge (#1769) --- bridges/AlbionOnlineBridge.php | 80 ++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 bridges/AlbionOnlineBridge.php diff --git a/bridges/AlbionOnlineBridge.php b/bridges/AlbionOnlineBridge.php new file mode 100644 index 00000000..8a7a61b7 --- /dev/null +++ b/bridges/AlbionOnlineBridge.php @@ -0,0 +1,80 @@ + array( + 'name' => 'Limit', + 'type' => 'list', + 'values' => array( + '2' => 2, + '5' => 5, + '10' => 10, + '15' => 15, + ), + 'title' => 'Maximum number of items to return', + 'defaultValue' => 5, + ), + 'language' => array( + 'name' => 'Language', + 'type' => 'list', + 'values' => array( + 'English' => 'en', + 'Deutsch' => 'de', + 'Polski' => 'pl', + 'Français' => 'fr', + 'Русский' => 'ru', + 'Português' => 'pt', + 'Español' => 'es', + ), + 'title' => 'Language of changelog posts', + 'defaultValue' => 'en', + ), + 'full' => array( + 'name' => 'Full changelog', + 'type' => 'checkbox', + 'required' => false, + 'title' => 'Enable to receive the full changelog post for each item' + ), + )); + + public function collectData() { + $api = 'https://albiononline.com/'; + // Example: https://albiononline.com/en/changelog/1/5 + $url = $api . $this->getInput('language') . '/changelog/1/' . $this->getInput('postcount'); + + $html = getSimpleHTMLDOM($url) + or returnServerError('Unable to get changelog data from "' . $url . '"!'); + + foreach ($html->find('li') as $data) { + $item = array(); + $item['uri'] = self::URI . $data->find('a', 0)->getAttribute('href'); + $item['title'] = trim(explode('|', $data->find('span', 0)->plaintext)[0]); + // Time below work only with en lang. Need to think about solution. May be separate request like getFullChangelog, but to english list for all language + //print_r( date_parse_from_format( 'M j, Y' , 'Sep 9, 2020') ); + //$item['timestamp'] = $this->extractDate($a->plaintext); + $item['author'] = 'albiononline.com'; + if($this->getInput('full')) { + $item['content'] = $this->getFullChangelog($item['uri']); + } else { + //$item['content'] = trim(preg_replace('/\s+/', ' ', $data->find('span', 0)->plaintext)); + // Just use title, no info at all or use title and date, see above + $item['content'] = $item['title']; + } + $item['uid'] = hash('sha256', $item['title']); + $this->items[] = $item; + } + } + + private function getFullChangelog($url) { + $html = getSimpleHTMLDOMCached($url) + or returnServerError('Unable to load changelog post from "' . $url . '"!'); + $html = defaultLinkTo($html, self::URI); + return $html->find('div.small-12.columns', 1)->innertext; + } +} From 712f60e91066700d648ed9c60305adc022359e9e Mon Sep 17 00:00:00 2001 From: Joseph Date: Fri, 9 Oct 2020 18:55:28 +0000 Subject: [PATCH 129/192] [HeiseBridge] Fix multi-page article fetching (#1767) Fixes multi-page article fetching by adding '&seite=all' to article URL. --- bridges/HeiseBridge.php | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/bridges/HeiseBridge.php b/bridges/HeiseBridge.php index 0401a48b..7bc404c0 100644 --- a/bridges/HeiseBridge.php +++ b/bridges/HeiseBridge.php @@ -40,18 +40,13 @@ class HeiseBridge extends FeedExpander { protected function parseItem($feedItem) { $item = parent::parseItem($feedItem); - $uri = $item['uri']; + $uri = $item['uri'] . '&seite=all'; - do { - $article = getSimpleHTMLDOMCached($uri) - or returnServerError('Could not open article: ' . $uri); + $article = getSimpleHTMLDOMCached($uri) + or returnServerError('Could not open article: ' . $uri); - $article = defaultLinkTo($article, $uri); - $item = $this->addArticleToItem($item, $article); - - if($next = $article->find('.pagination a[rel="next"]', 0)) - $uri = $next->href; - } while ($next); + $article = defaultLinkTo($article, $uri); + $item = $this->addArticleToItem($item, $article); return $item; } From 84d5daaa032d94e3b9c634a182316148f012c8b1 Mon Sep 17 00:00:00 2001 From: Ololbu Date: Sat, 10 Oct 2020 00:39:35 +0500 Subject: [PATCH 130/192] [FicbookBridge] Add getName implementation (#1771) --- bridges/FicbookBridge.php | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/bridges/FicbookBridge.php b/bridges/FicbookBridge.php index 6a3761d5..b79ad257 100644 --- a/bridges/FicbookBridge.php +++ b/bridges/FicbookBridge.php @@ -35,6 +35,8 @@ class FicbookBridge extends BridgeAbstract { ), ); + protected $titleName; + public function getURI() { switch($this->queriedContext) { case 'Site News': { @@ -56,6 +58,21 @@ class FicbookBridge extends BridgeAbstract { } } + public function getName() { + switch($this->queriedContext) { + case 'Site News': { + return $this->queriedContext . ' | ' . self::NAME; + } + case 'Fiction Updates': { + return $this->titleName . ' | ' . self::NAME; + } + case 'Fiction Comments': { + return $this->titleName . ' | Comments | ' . self::NAME; + } + default: return self::NAME; + } + } + public function collectData() { $header = array('Accept-Language: en-US'); @@ -65,6 +82,10 @@ class FicbookBridge extends BridgeAbstract { $html = defaultLinkTo($html, self::URI); + if ($this->queriedContext == 'Fiction Updates' or $this->queriedContext == 'Fiction Comments' ) { + $this->titleName = $html->find('.fanfic-main-info > h1', 0)->innertext; + } + switch($this->queriedContext) { case 'Site News': return $this->collectSiteNews($html); case 'Fiction Updates': return $this->collectUpdatesData($html); From 82acbbb421dc1091a166df295a4b137ca9baaf3e Mon Sep 17 00:00:00 2001 From: Corentin Garcia Date: Fri, 9 Oct 2020 21:46:40 +0200 Subject: [PATCH 131/192] [DribbbleBridge] Fix picture parsing (#1787) --- bridges/DribbbleBridge.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bridges/DribbbleBridge.php b/bridges/DribbbleBridge.php index ae3a2819..f66608f4 100644 --- a/bridges/DribbbleBridge.php +++ b/bridges/DribbbleBridge.php @@ -13,7 +13,7 @@ favicon-63b2904a073c89b52b19aa08cebc16a154bcf83fee8ecc6439968b1e6db569c7.ico'; } public function collectData(){ - $html = getSimpleHTMLDOM(self::URI . '/shots') + $html = getSimpleHTMLDOM(self::URI) or returnServerError('Error while downloading the website content'); $json = $this->loadEmbeddedJsonData($html); @@ -36,7 +36,7 @@ favicon-63b2904a073c89b52b19aa08cebc16a154bcf83fee8ecc6439968b1e6db569c7.ico'; $description = $shot->find('.comment', 0); $item['content'] = $description === null ? '' : $description->plaintext; - $preview_path = $shot->find('picture source', 0)->attr['srcset']; + $preview_path = $shot->find('figure img', 1)->attr['data-srcset']; $item['content'] .= $this->getImageTag($preview_path, $item['title']); $item['enclosures'] = array($this->getFullSizeImagePath($preview_path)); @@ -94,6 +94,6 @@ favicon-63b2904a073c89b52b19aa08cebc16a154bcf83fee8ecc6439968b1e6db569c7.ico'; } private function getFullSizeImagePath($preview_path){ - return str_replace('_1x', '', $preview_path); + return explode("?compress=1", $preview_path)[0]; } } From 7b6ff786232bbb8d97abc6c7099cf495fef452a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Kol=C3=A1=C5=99?= Date: Mon, 12 Oct 2020 08:35:06 +0200 Subject: [PATCH 132/192] [CeskaTelevizeBridge] Add New bridge (#1784) --- bridges/CeskaTelevizeBridge.php | 84 +++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100755 bridges/CeskaTelevizeBridge.php diff --git a/bridges/CeskaTelevizeBridge.php b/bridges/CeskaTelevizeBridge.php new file mode 100755 index 00000000..ead807b5 --- /dev/null +++ b/bridges/CeskaTelevizeBridge.php @@ -0,0 +1,84 @@ + array( + 'name' => 'url to the show', + 'required' => true, + 'exampleValue' => 'https://www.ceskatelevize.cz/porady/1097181328-udalosti/dily/' + ) + ) + ); + + private function fixChars($text) { + return html_entity_decode($text, ENT_QUOTES, 'UTF-8'); + } + + private function getUploadTimeFromString($string) { + if (strpos($string, 'dnes') !== false) { + return strtotime('today'); + } elseif (strpos($string, 'včera') !== false) { + return strtotime('yesterday'); + } elseif (!preg_match('/(\d+).\s(\d+).(\s(\d+))?/', $string, $match)) { + returnServerError('Could not get date from Česká televize string'); + } + + $date = sprintf('%04d-%02d-%02d', isset($match[3]) ? $match[3] : date('Y'), $match[2], $match[1]); + return strtotime($date); + } + + public function collectData() { + $url = $this->getInput('url'); + + $validUrl = '/^(https:\/\/www\.ceskatelevize\.cz\/porady\/\d+-[a-z0-9-]+\/)(dily\/((nove|vysilani)\/)?)?$/'; + if (!preg_match($validUrl, $url, $match)) { + returnServerError('Invalid url'); + } + + $category = isset($match[4]) ? $match[4] : 'nove'; + $fixedUrl = "{$match[1]}dily/{$category}/"; + + $html = getSimpleHTMLDOM($fixedUrl) + or returnServerError('Could not request Česká televize'); + + $this->feedUri = $fixedUrl; + $this->feedName = str_replace('Přehled dílů — ', '', $this->fixChars($html->find('title', 0)->plaintext)); + if ($category !== 'nove') { + $this->feedName .= " ({$category})"; + } + + foreach ($html->find('.episodes-broadcast-content a.episode_list_item') as $element) { + $itemTitle = $element->find('.episode_list_item-title', 0); + $itemContent = $element->find('.episode_list_item-desc', 0); + $itemDate = $element->find('.episode_list_item-date', 0); + $itemThumbnail = $element->find('img', 0); + $itemUri = self::URI . $element->getAttribute('href'); + + $item = array( + 'title' => $this->fixChars($itemTitle->plaintext), + 'uri' => $itemUri, + 'content' => '
    ' + . $this->fixChars($itemContent->plaintext), + 'timestamp' => $this->getUploadTimeFromString($itemDate->plaintext) + ); + + $this->items[] = $item; + } + } + + public function getUri() { + return isset($this->feedUri) ? $this->feedUri : parent::getUri(); + } + + public function getName() { + return isset($this->feedName) ? $this->feedName : parent::getName(); + } +} From 64ec488f7066a6efc3e346d090216aa81f7621cc Mon Sep 17 00:00:00 2001 From: ORelio Date: Tue, 13 Oct 2020 18:46:58 +0200 Subject: [PATCH 133/192] [ZDNet] Fix article layout (#1793) --- bridges/ZDNetBridge.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bridges/ZDNetBridge.php b/bridges/ZDNetBridge.php index 75df3b15..13478029 100644 --- a/bridges/ZDNetBridge.php +++ b/bridges/ZDNetBridge.php @@ -185,7 +185,8 @@ class ZDNetBridge extends FeedExpander { '
    Date: Thu, 15 Oct 2020 12:53:19 +0500 Subject: [PATCH 134/192] [.travis] Fix several phpcs and phpunit errors (#1799) --- bridges/AlbionOnlineBridge.php | 8 +------- bridges/CeskaTelevizeBridge.php | 4 ++-- bridges/DiarioDeNoticiasBridge.php | 1 + bridges/DribbbleBridge.php | 2 +- bridges/EpicgamesBridge.php | 9 +-------- bridges/FM4Bridge.php | 9 +++++---- bridges/FicbookBridge.php | 2 +- bridges/MarktplaatsBridge.php | 16 ++++++++-------- bridges/MondeDiploBridge.php | 8 ++++---- bridges/NasaApodBridge.php | 2 +- bridges/NordbayernBridge.php | 9 +++++++-- bridges/OtrkeyFinderBridge.php | 8 +++++--- bridges/ReleasesSwitchBridge.php | 2 +- bridges/WorldCosplayBridge.php | 8 +++----- 14 files changed, 41 insertions(+), 47 deletions(-) diff --git a/bridges/AlbionOnlineBridge.php b/bridges/AlbionOnlineBridge.php index 8a7a61b7..0a93901e 100644 --- a/bridges/AlbionOnlineBridge.php +++ b/bridges/AlbionOnlineBridge.php @@ -10,13 +10,7 @@ class AlbionOnlineBridge extends BridgeAbstract { const PARAMETERS = array( array( 'postcount' => array( 'name' => 'Limit', - 'type' => 'list', - 'values' => array( - '2' => 2, - '5' => 5, - '10' => 10, - '15' => 15, - ), + 'type' => 'number', 'title' => 'Maximum number of items to return', 'defaultValue' => 5, ), diff --git a/bridges/CeskaTelevizeBridge.php b/bridges/CeskaTelevizeBridge.php index ead807b5..ea3a4bf2 100755 --- a/bridges/CeskaTelevizeBridge.php +++ b/bridges/CeskaTelevizeBridge.php @@ -74,8 +74,8 @@ class CeskaTelevizeBridge extends BridgeAbstract { } } - public function getUri() { - return isset($this->feedUri) ? $this->feedUri : parent::getUri(); + public function getURI() { + return isset($this->feedUri) ? $this->feedUri : parent::getURI(); } public function getName() { diff --git a/bridges/DiarioDeNoticiasBridge.php b/bridges/DiarioDeNoticiasBridge.php index 0aaac6f4..887eb117 100644 --- a/bridges/DiarioDeNoticiasBridge.php +++ b/bridges/DiarioDeNoticiasBridge.php @@ -42,6 +42,7 @@ class DiarioDeNoticiasBridge extends BridgeAbstract { } return $name; } + public function getURI() { switch($this->queriedContext) { case 'Tag': diff --git a/bridges/DribbbleBridge.php b/bridges/DribbbleBridge.php index f66608f4..01cfb21a 100644 --- a/bridges/DribbbleBridge.php +++ b/bridges/DribbbleBridge.php @@ -94,6 +94,6 @@ favicon-63b2904a073c89b52b19aa08cebc16a154bcf83fee8ecc6439968b1e6db569c7.ico'; } private function getFullSizeImagePath($preview_path){ - return explode("?compress=1", $preview_path)[0]; + return explode('?compress=1', $preview_path)[0]; } } diff --git a/bridges/EpicgamesBridge.php b/bridges/EpicgamesBridge.php index 756efb3b..e6ba5421 100644 --- a/bridges/EpicgamesBridge.php +++ b/bridges/EpicgamesBridge.php @@ -10,14 +10,7 @@ class EpicgamesBridge extends BridgeAbstract { const PARAMETERS = array( array( 'postcount' => array( 'name' => 'Limit', - 'type' => 'list', - 'values' => array( - '5' => 5, - '10' => 10, - '15' => 15, - '20' => 20, - '25' => 25, - ), + 'type' => 'number', 'title' => 'Maximum number of items to return', 'defaultValue' => 10, ), diff --git a/bridges/FM4Bridge.php b/bridges/FM4Bridge.php index 6f59e80f..e129c5c8 100644 --- a/bridges/FM4Bridge.php +++ b/bridges/FM4Bridge.php @@ -28,9 +28,9 @@ class FM4Bridge extends BridgeAbstract ) ); - public function getPageData($tag, $page) { + private function getPageData($tag, $page) { if($tag) - $uri = self::URI . "/tags/" . $tag; + $uri = self::URI . '/tags/' . $tag; else $uri = self::URI; @@ -47,7 +47,7 @@ class FM4Bridge extends BridgeAbstract $item['uri'] = $article->find('a', 0)->href; $item['title'] = $article->find('h2', 0)->plaintext; $item['author'] = $article->find('p[class*=keyword]', 0)->plaintext; - $item["timestamp"] = strtotime($article->find('p[class*=time]', 0)->plaintext); + $item['timestamp'] = strtotime($article->find('p[class*=time]', 0)->plaintext); if ($this->getInput('loadcontent')) { $item['content'] = getSimpleHTMLDOM($item['uri'])->find('div[class=storyText]', 0)->innertext @@ -58,9 +58,10 @@ class FM4Bridge extends BridgeAbstract } return $page_items; } + public function collectData() { for ($cur_page = 1; $cur_page <= $this->getInput('pages'); $cur_page++) { - $this->items = array_merge($this->items,$this->getPageData($this->getInput('tag'), $cur_page)); + $this->items = array_merge($this->items, $this->getPageData($this->getInput('tag'), $cur_page)); } } } diff --git a/bridges/FicbookBridge.php b/bridges/FicbookBridge.php index b79ad257..7c897017 100644 --- a/bridges/FicbookBridge.php +++ b/bridges/FicbookBridge.php @@ -82,7 +82,7 @@ class FicbookBridge extends BridgeAbstract { $html = defaultLinkTo($html, self::URI); - if ($this->queriedContext == 'Fiction Updates' or $this->queriedContext == 'Fiction Comments' ) { + if ($this->queriedContext == 'Fiction Updates' or $this->queriedContext == 'Fiction Comments') { $this->titleName = $html->find('.fanfic-main-info > h1', 0)->innertext; } diff --git a/bridges/MarktplaatsBridge.php b/bridges/MarktplaatsBridge.php index d8fce6c4..ada65920 100644 --- a/bridges/MarktplaatsBridge.php +++ b/bridges/MarktplaatsBridge.php @@ -103,8 +103,8 @@ class MarktplaatsBridge extends BridgeAbstract { $item['content'] .= "
    \n
    \n
    \n" . json_encode($listing); } } - $item['content'] .= "
    \n
    \nPrice: " . $listing->priceInfo->priceCents/100; - $item['content'] .= "  (" . $listing->priceInfo->priceType .")"; + $item['content'] .= "
    \n
    \nPrice: " . $listing->priceInfo->priceCents / 100; + $item['content'] .= '  (' . $listing->priceInfo->priceType . ')'; if(!empty($listing->location->cityName)) { $item['content'] .= "

    \n" . $listing->location->cityName; } @@ -117,11 +117,11 @@ class MarktplaatsBridge extends BridgeAbstract { } } } - + public function getName(){ - if(!is_null($this->getInput('q'))) { - return $this->getInput('q') . ' - Marktplaats'; - } - return parent::getName(); - } + if(!is_null($this->getInput('q'))) { + return $this->getInput('q') . ' - Marktplaats'; + } + return parent::getName(); + } } diff --git a/bridges/MondeDiploBridge.php b/bridges/MondeDiploBridge.php index c5ed6169..cff84967 100644 --- a/bridges/MondeDiploBridge.php +++ b/bridges/MondeDiploBridge.php @@ -7,9 +7,9 @@ class MondeDiploBridge extends BridgeAbstract { const CACHE_TIMEOUT = 21600; //6h const DESCRIPTION = 'Returns most recent results from MondeDiplo.'; - private function cleanText($text) { - return trim(str_replace([' ', ' '], ' ', $text)); - } + private function cleanText($text) { + return trim(str_replace(array(' ', ' '), ' ', $text)); + } public function collectData(){ $html = getSimpleHTMLDOM(self::URI) @@ -22,7 +22,7 @@ class MondeDiploBridge extends BridgeAbstract { $item = array(); $item['uri'] = self::URI . $element->href; $item['title'] = $this->cleanText($title) . ' - ' . $this->cleanText($datesAuteurs); - $item['content'] = $this->cleanText(str_replace([$title, $datesAuteurs], '', $element->plaintext)); + $item['content'] = $this->cleanText(str_replace(array($title, $datesAuteurs), '', $element->plaintext)); $this->items[] = $item; } diff --git a/bridges/NasaApodBridge.php b/bridges/NasaApodBridge.php index cf633415..6e2674f0 100644 --- a/bridges/NasaApodBridge.php +++ b/bridges/NasaApodBridge.php @@ -24,7 +24,7 @@ class NasaApodBridge extends BridgeAbstract { $picture_html_string = $picture_html->innertext; //Extract image and explanation - $image_wrapper = $picture_html->find('a',1); + $image_wrapper = $picture_html->find('a', 1); $image_path = $image_wrapper->href; $img_placeholder = $image_wrapper->find('img', 0); $img_alt = $img_placeholder->alt; diff --git a/bridges/NordbayernBridge.php b/bridges/NordbayernBridge.php index ecc93c9d..37fa3d5e 100644 --- a/bridges/NordbayernBridge.php +++ b/bridges/NordbayernBridge.php @@ -48,11 +48,16 @@ class NordbayernBridge extends BridgeAbstract { )); private function getImageUrlFromScript($script) { - preg_match("#src=\\\\'(https:[-:\\.\\\\/a-zA-Z0-9%_]*\\.(jpg|JPG))#", $script->innertext, $matches, PREG_OFFSET_CAPTURE); + preg_match( + "#src=\\\\'(https:[-:\\.\\\\/a-zA-Z0-9%_]*\\.(jpg|JPG))#", + $script->innertext, + $matches, + PREG_OFFSET_CAPTURE + ); if(isset($matches[1][0])) { return stripcslashes($matches[1][0]) . '?w=800'; } - return null; + return null; } private function handleArticle($link) { diff --git a/bridges/OtrkeyFinderBridge.php b/bridges/OtrkeyFinderBridge.php index 887ccd1b..32ce5c4f 100644 --- a/bridges/OtrkeyFinderBridge.php +++ b/bridges/OtrkeyFinderBridge.php @@ -11,13 +11,11 @@ class OtrkeyFinderBridge extends BridgeAbstract { 'searchterm' => array( 'name' => 'Search term', 'exampleValue' => 'Terminator', - 'defaultValue' => '', 'title' => 'The search term is case-insensitive', ), 'station' => array( 'name' => 'Station name', 'exampleValue' => 'ARD', - 'defaultValue' => '', ), 'type' => array( 'name' => 'Media type', @@ -143,7 +141,11 @@ class OtrkeyFinderBridge extends BridgeAbstract { $item['content'] = $content; if (preg_match(self::TIME_REGEX, $file, $matches) === 1) { - $item['timestamp'] = DateTime::createFromFormat('y.m.d_H-i', $matches[0], new DateTimeZone('Europe/Berlin'))->getTimestamp(); + $item['timestamp'] = DateTime::createFromFormat( + 'y.m.d_H-i', + $matches[0], + new DateTimeZone('Europe/Berlin') + )->getTimestamp(); } return $item; diff --git a/bridges/ReleasesSwitchBridge.php b/bridges/ReleasesSwitchBridge.php index 3814f8ec..89ca76d5 100644 --- a/bridges/ReleasesSwitchBridge.php +++ b/bridges/ReleasesSwitchBridge.php @@ -1,7 +1,7 @@ has_error) - { + if($json->has_error) { returnServerError($json->message); } $list = $json->list; - foreach($list as $img) - { + foreach($list as $img) { $item = array(); $item['uri'] = self::URI . substr($img->photo->url, 1); $item['title'] = $img->photo->subject; From 0a1ff10a527bdafd8abbf6817937dd91cac96021 Mon Sep 17 00:00:00 2001 From: Joseph Date: Thu, 15 Oct 2020 08:03:51 +0000 Subject: [PATCH 135/192] [KoreusBridge + VarietyBridge] Use HTTPS when fetching feedburner feeds (#1797) --- bridges/KoreusBridge.php | 4 ++-- bridges/VarietyBridge.php | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bridges/KoreusBridge.php b/bridges/KoreusBridge.php index a5e09cbd..4cfb8c21 100644 --- a/bridges/KoreusBridge.php +++ b/bridges/KoreusBridge.php @@ -3,7 +3,7 @@ class KoreusBridge extends FeedExpander { const MAINTAINER = 'pit-fgfjiudghdf'; const NAME = 'Koreus'; - const URI = 'http://www.koreus.com/'; + const URI = 'https://www.koreus.com/'; const DESCRIPTION = 'Returns the newest posts from Koreus (full text)'; protected function parseItem($item){ @@ -17,6 +17,6 @@ class KoreusBridge extends FeedExpander { } public function collectData(){ - $this->collectExpandableDatas('http://feeds.feedburner.com/Koreus-articles'); + $this->collectExpandableDatas('https://feeds.feedburner.com/Koreus-articles'); } } diff --git a/bridges/VarietyBridge.php b/bridges/VarietyBridge.php index a2e61700..8bc48f46 100644 --- a/bridges/VarietyBridge.php +++ b/bridges/VarietyBridge.php @@ -8,7 +8,7 @@ class VarietyBridge extends FeedExpander { const DESCRIPTION = 'RSS feed for Variety'; public function collectData(){ - $this->collectExpandableDatas('http://feeds.feedburner.com/variety/headlines', 15); + $this->collectExpandableDatas('https://feeds.feedburner.com/variety/headlines', 15); } protected function parseItem($newsItem){ From 45e2f385b35f38c081bf5911d9b7460bbd789b15 Mon Sep 17 00:00:00 2001 From: Joshua Coales Date: Thu, 15 Oct 2020 10:08:03 +0100 Subject: [PATCH 136/192] [FacebookBridge] Handle mobile links and unify host validation (#1789) --- bridges/FacebookBridge.php | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/bridges/FacebookBridge.php b/bridges/FacebookBridge.php index cb5e30f8..b7681114 100644 --- a/bridges/FacebookBridge.php +++ b/bridges/FacebookBridge.php @@ -215,16 +215,7 @@ class FacebookBridge extends BridgeAbstract { $urlparts = parse_url($group); - if($urlparts['host'] !== parse_url(self::URI)['host'] - && 'www.' . $urlparts['host'] !== parse_url(self::URI)['host']) { - - returnClientError('The host you provided is invalid! Received "' - . $urlparts['host'] - . '", expected "' - . parse_url(self::URI)['host'] - . '"!'); - - } + $this->validateHost($urlparts['host']); return explode('/', $urlparts['path'])[2]; @@ -236,6 +227,24 @@ class FacebookBridge extends BridgeAbstract { } + private function validateHost($provided_host) { + // Handle mobile links + if (strpos($provided_host, 'm.') === 0) { + $provided_host = substr($provided_host, strlen('m.')); + } + + $facebook_host = parse_url(self::URI)['host']; + + if ($provided_host !== $facebook_host + && 'www.' . $provided_host !== $facebook_host) { + returnClientError('The host you provided is invalid! Received "' + . $provided_host + . '", expected "' + . $facebook_host + . '"!'); + } + } + private function isPublicGroup($html) { // Facebook redirects to the groups about page for non-public groups @@ -348,13 +357,7 @@ class FacebookBridge extends BridgeAbstract { $urlparts = parse_url($user); - if($urlparts['host'] !== parse_url(self::URI)['host']) { - returnClientError('The host you provided is invalid! Received "' - . $urlparts['host'] - . '", expected "' - . parse_url(self::URI)['host'] - . '"!'); - } + $this->validateHost($urlparts['host']); if(!array_key_exists('path', $urlparts) || $urlparts['path'] === '/') { From a332a5a414fa9b9aa27f6a9f8d2d4bcc77f5d9e7 Mon Sep 17 00:00:00 2001 From: Gregor Santner Date: Thu, 15 Oct 2020 11:12:54 +0200 Subject: [PATCH 137/192] [NineGagBridge] In post URI replace scheme from "http" to "https" --- bridges/NineGagBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/NineGagBridge.php b/bridges/NineGagBridge.php index 939ff387..abaa7ad4 100644 --- a/bridges/NineGagBridge.php +++ b/bridges/NineGagBridge.php @@ -148,7 +148,7 @@ class NineGagBridge extends BridgeAbstract { } if (!$AvoidElement) { - $item['uri'] = $post['url']; + $item['uri'] = preg_replace("/^http:/i", "https:", $post['url']); $item['title'] = $post['title']; $item['content'] = self::getContent($post); $item['categories'] = self::getCategories($post); From 5e4f3c351e457db6b369b4ac6c3244cb55d9a0cb Mon Sep 17 00:00:00 2001 From: Eugene Molotov Date: Thu, 15 Oct 2020 14:18:46 +0500 Subject: [PATCH 138/192] [NineGagBridge] Lint previous commit --- bridges/NineGagBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/NineGagBridge.php b/bridges/NineGagBridge.php index abaa7ad4..19c7e29c 100644 --- a/bridges/NineGagBridge.php +++ b/bridges/NineGagBridge.php @@ -148,7 +148,7 @@ class NineGagBridge extends BridgeAbstract { } if (!$AvoidElement) { - $item['uri'] = preg_replace("/^http:/i", "https:", $post['url']); + $item['uri'] = preg_replace('/^http:/i', 'https:', $post['url']); $item['title'] = $post['title']; $item['content'] = self::getContent($post); $item['categories'] = self::getCategories($post); From 364b5282a3c63816155ac0e35e900ca6441f4e8c Mon Sep 17 00:00:00 2001 From: Eugene Molotov Date: Mon, 19 Oct 2020 16:22:37 +0500 Subject: [PATCH 139/192] [GoogleSearch] Use other class for content retreiving (#1803) --- bridges/GoogleSearchBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/GoogleSearchBridge.php b/bridges/GoogleSearchBridge.php index 650696e2..10f0f121 100644 --- a/bridges/GoogleSearchBridge.php +++ b/bridges/GoogleSearchBridge.php @@ -38,7 +38,7 @@ class GoogleSearchBridge extends BridgeAbstract { $t = $element->find('a[href]', 0)->href; $item['uri'] = htmlspecialchars_decode($t); $item['title'] = $element->find('h3', 0)->plaintext; - $item['content'] = $element->find('span[class=st]', 0)->plaintext; + $item['content'] = $element->find('span[class=aCOpRe]', 0)->plaintext; $this->items[] = $item; } From 2714c3d816588192b7cec7b30bf53716543ac96d Mon Sep 17 00:00:00 2001 From: ORelio Date: Wed, 21 Oct 2020 11:59:04 +0200 Subject: [PATCH 140/192] [WordPress] Limit feed to 20 items (#1801) --- bridges/WordPressBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/WordPressBridge.php b/bridges/WordPressBridge.php index 1589c723..548e46e1 100644 --- a/bridges/WordPressBridge.php +++ b/bridges/WordPressBridge.php @@ -92,9 +92,9 @@ class WordPressBridge extends FeedExpander { returnClientError('The url parameter must either refer to http or https protocol.'); } try{ - $this->collectExpandableDatas($this->getURI() . '/feed/atom/'); + $this->collectExpandableDatas($this->getURI() . '/feed/atom/', 20); } catch (Exception $e) { - $this->collectExpandableDatas($this->getURI() . '/?feed=atom'); + $this->collectExpandableDatas($this->getURI() . '/?feed=atom', 20); } } From 164b407f28ac0088999cc52ee5bf4122e4715946 Mon Sep 17 00:00:00 2001 From: Joseph Date: Mon, 26 Oct 2020 07:11:58 +0000 Subject: [PATCH 141/192] [BridgeCard] Fix parameter layout issue (#1816) Fixes parameter layout issue on small screens. --- lib/BridgeCard.php | 2 +- static/style.css | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/BridgeCard.php b/lib/BridgeCard.php index 4353f643..0ed605bf 100644 --- a/lib/BridgeCard.php +++ b/lib/BridgeCard.php @@ -126,7 +126,7 @@ This bridge is not fetching its content through a secure connection
    '; if(isset($inputEntry['title'])) $form .= 'i'; else - $form .= ''; + $form .= ''; } $form .= ''; diff --git a/static/style.css b/static/style.css index 5df2c517..80591e43 100644 --- a/static/style.css +++ b/static/style.css @@ -360,7 +360,7 @@ h5 { margin: 3px auto 0; } - .info { + .info, .no-info { display: none; } From 93cdf5e342aa5548efd398d9e34a6ba0a177b371 Mon Sep 17 00:00:00 2001 From: Eugene Molotov Date: Mon, 26 Oct 2020 15:07:20 +0500 Subject: [PATCH 142/192] [core] Fixed passive XSS vulnerability Reference: https://www.openbugbounty.org/reports/1140367/ --- lib/BridgeList.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/BridgeList.php b/lib/BridgeList.php index dc545de9..7b2d5268 100644 --- a/lib/BridgeList.php +++ b/lib/BridgeList.php @@ -129,7 +129,7 @@ EOD; * @return string The searchbar */ private static function getSearchbar() { - $query = filter_input(INPUT_GET, 'q'); + $query = filter_input(INPUT_GET, 'q', FILTER_SANITIZE_SPECIAL_CHARS); return << From 6af87b2f326b5e52943e1413952ca40ea8570a49 Mon Sep 17 00:00:00 2001 From: Joshua Coales Date: Thu, 29 Oct 2020 03:42:49 +0000 Subject: [PATCH 143/192] [FacebookBridge] Use touch.facebook.com for groups (#1817) --- bridges/FacebookBridge.php | 90 ++++++++++++++++++++++++-------------- 1 file changed, 57 insertions(+), 33 deletions(-) diff --git a/bridges/FacebookBridge.php b/bridges/FacebookBridge.php index b7681114..bb9d02eb 100644 --- a/bridges/FacebookBridge.php +++ b/bridges/FacebookBridge.php @@ -175,7 +175,13 @@ class FacebookBridge extends BridgeAbstract { $header = array(); } - $html = getSimpleHTMLDOM($this->getURI(), $header) + $touchURI = str_replace( + 'https://www.facebook', + 'https://touch.facebook', + $this->getURI() + ); + + $html = getSimpleHTMLDOM($touchURI, $header) or returnServerError('Failed loading facebook page: ' . $this->getURI()); if(!$this->isPublicGroup($html)) { @@ -186,19 +192,19 @@ class FacebookBridge extends BridgeAbstract { $this->groupName = $this->extractGroupName($html); - $posts = $html->find('div.userContentWrapper') + $posts = $html->find('div.story_body_container') or returnServerError('Failed finding posts!'); foreach($posts as $post) { $item = array(); - $item['uri'] = $this->extractGroupURI($post); - $item['title'] = $this->extractGroupTitle($post); - $item['author'] = $this->extractGroupAuthor($post); - $item['content'] = $this->extractGroupContent($post); - $item['timestamp'] = $this->extractGroupTimestamp($post); - $item['enclosures'] = $this->extractGroupEnclosures($post); + $item['uri'] = $this->extractGroupPostURI($post); + $item['title'] = $this->extractGroupPostTitle($post); + $item['author'] = $this->extractGroupPostAuthor($post); + $item['content'] = $this->extractGroupPostContent($post); + $item['timestamp'] = $this->extractGroupPostTimestamp($post); + $item['enclosures'] = $this->extractGroupPostEnclosures($post); $this->items[] = $item; @@ -232,6 +238,9 @@ class FacebookBridge extends BridgeAbstract { if (strpos($provided_host, 'm.') === 0) { $provided_host = substr($provided_host, strlen('m.')); } + if (strpos($provided_host, 'touch.') === 0) { + $provided_host = substr($provided_host, strlen('touch.')); + } $facebook_host = parse_url(self::URI)['host']; @@ -245,24 +254,26 @@ class FacebookBridge extends BridgeAbstract { } } + /** + * @param $html simple_html_dom + * @return bool + */ private function isPublicGroup($html) { - // Facebook redirects to the groups about page for non-public groups - $about = $html->find('#pagelet_group_about', 0); - - return !($about); - + // Facebook touch just presents a login page for non-public groups + $title = $html->find('title', 0); + return $title->plaintext !== 'Log in to Facebook | Facebook'; } private function extractGroupName($html) { - $ogtitle = $html->find('meta[property="og:title"]', 0) + $ogtitle = $html->find('._de1', 0) or returnServerError('Unable to find group title!'); - return html_entity_decode($ogtitle->content, ENT_QUOTES); + return html_entity_decode($ogtitle->plaintext, ENT_QUOTES); } - private function extractGroupURI($post) { + private function extractGroupPostURI($post) { $elements = $post->find('a') or returnServerError('Unable to find URI!'); @@ -280,57 +291,70 @@ class FacebookBridge extends BridgeAbstract { } - private function extractGroupContent($post) { + private function extractGroupPostContent($post) { - $content = $post->find('div.userContent', 0) + $content = $post->find('div._5rgt', 0) or returnServerError('Unable to find user content!'); - return $content->innertext . $content->next_sibling()->innertext; + $context_text = $content->innertext; + if ($content->next_sibling() !== null) { + $context_text .= $content->next_sibling()->innertext; + } + return $context_text; } - private function extractGroupTimestamp($post) { + private function extractGroupPostTimestamp($post) { - $element = $post->find('abbr[data-utime]', 0) + $element = $post->find('abbr', 0) or returnServerError('Unable to find timestamp!'); - return $element->getAttribute('data-utime'); + return $element->plaintext; } - private function extractGroupAuthor($post) { + private function extractGroupPostAuthor($post) { - $element = $post->find('img', 0) + $element = $post->find('h3 a', 0) or returnServerError('Unable to find author information!'); - return $element->{'aria-label'}; + return $element->plaintext; } - private function extractGroupEnclosures($post) { + private function extractGroupPostEnclosures($post) { - $elements = $post->find('div.userContent', 0)->next_sibling()->find('img'); + $elements = $post->find('span._6qdm'); + if ($post->find('div._5rgt', 0)->next_sibling() !== null) { + array_push($elements, ...$post->find('div._5rgt', 0)->next_sibling()->find('i.img')); + } $enclosures = array(); + $background_img_regex = '/background-image: ?url\\((.+?)\\);/'; + foreach($elements as $enclosure) { - $enclosures[] = $enclosure->src; + if(preg_match($background_img_regex, $enclosure, $matches) > 0) { + $bg_img_value = trim(html_entity_decode($matches[1], ENT_QUOTES), "'\""); + $bg_img_url = urldecode(preg_replace('/\\\([0-9a-z]{2}) /', '%$1', $bg_img_value)); + $enclosures[] = urldecode($bg_img_url); + } } return empty($enclosures) ? null : $enclosures; } - private function extractGroupTitle($post) { + private function extractGroupPostTitle($post) { - $element = $post->find('h5', 0) + $element = $post->find('h3', 0) or returnServerError('Unable to find title!'); if(strpos($element->plaintext, 'shared') === false) { - $content = strip_tags($this->extractGroupContent($post)); + $content = strip_tags($this->extractGroupPostContent($post)); - return $this->extractGroupAuthor($post) + return $this->extractGroupPostAuthor($post) . ' posted: ' . substr( $content, @@ -558,7 +582,7 @@ EOD; } // No captcha? We can carry on retrieving page contents :) - // First, we check wether the page is public or not + // First, we check whether the page is public or not $loginForm = $html->find('._585r', 0); if($loginForm != null) { From b48bc77c22e13d93aa9f8216b0503a77305fd078 Mon Sep 17 00:00:00 2001 From: Roliga Date: Fri, 30 Oct 2020 14:50:36 +0100 Subject: [PATCH 144/192] [TwitchBridge] Switch to unofficial GraphQL API (#1829) * [TwitchBridge] Switch to unofficial GraphQL API The GraphQL API that the twitch.tv website uses has a lot more information available than the official APIs. Hopefully it'll be stable. --- bridges/TwitchBridge.php | 188 ++++++++++++++++++++++++++------------- 1 file changed, 125 insertions(+), 63 deletions(-) diff --git a/bridges/TwitchBridge.php b/bridges/TwitchBridge.php index 39b46010..8b43a317 100644 --- a/bridges/TwitchBridge.php +++ b/bridges/TwitchBridge.php @@ -20,7 +20,9 @@ class TwitchBridge extends BridgeAbstract { 'All' => 'all', 'Archive' => 'archive', 'Highlights' => 'highlight', - 'Uploads' => 'upload' + 'Uploads' => 'upload', + 'Past Premieres' => 'past_premiere', + 'Premiere Uploads' => 'premiere_upload' ), 'defaultValue' => 'archive' ) @@ -32,43 +34,90 @@ class TwitchBridge extends BridgeAbstract { */ const CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'; + const API_ENDPOINT = 'https://gql.twitch.tv/gql'; + const BROADCAST_TYPES = array( + 'all' => array( + 'ARCHIVE', + 'HIGHLIGHT', + 'UPLOAD', + 'PAST_PREMIERE', + 'PREMIERE_UPLOAD' + ), + 'archive' => 'ARCHIVE', + 'highlight' => 'HIGHLIGHT', + 'upload' => 'UPLOAD', + 'past_premiere' => 'PAST_PREMIERE', + 'premiere_upload' => 'PREMIERE_UPLOAD' + ); + public function collectData(){ - // get channel user - $query_data = array( - 'login' => $this->getInput('channel') + $query = <<<'EOD' +query VODList($channel: String!, $types: [BroadcastType!]) { + user(login: $channel) { + displayName + videos(types: $types, sort: TIME) { + edges { + node { + id + title + publishedAt + lengthSeconds + viewCount + thumbnailURLs(width: 640, height: 360) + previewThumbnailURL(width: 640, height: 360) + description + tags + contentTags { + isLanguageTag + localizedName + } + game { + displayName + } + moments(momentRequestType: VIDEO_CHAPTER_MARKERS) { + edges { + node { + description + positionMilliseconds + } + } + } + } + } + } + } +} +EOD; + $variables = array( + 'channel' => $this->getInput('channel'), + 'types' => self::BROADCAST_TYPES[$this->getInput('type')] ); - $users = $this->apiGet('users', $query_data)->users; - if(count($users) === 0) - returnClientError('User "' - . $this->getInput('channel') - . '" could not be found'); - $user = $users[0]; + $data = $this->apiRequest($query, $variables); - // get video list - $query_endpoint = 'channels/' . $user->_id . '/videos'; - $query_data = array( - 'broadcast_type' => $this->getInput('type'), - 'limit' => 10 - ); - $videos = $this->apiGet($query_endpoint, $query_data)->videos; + $user = $data->user; + foreach($user->videos->edges as $edge) { + $video = $edge->node; + + $url = 'https://www.twitch.tv/videos/' . $video->id; - foreach($videos as $video) { $item = array( - 'uri' => $video->url, + 'uri' => $url, 'title' => $video->title, - 'timestamp' => $video->published_at, - 'author' => $video->channel->display_name, + 'timestamp' => $video->publishedAt, + 'author' => $user->displayName, ); // Add categories for tags and played game - $item['categories'] = array_filter(explode(' ', $video->tag_list)); - if(!empty($video->game)) - $item['categories'][] = $video->game; + $item['categories'] = $video->tags; + if(!is_null($video->game)) + $item['categories'][] = $video->game->displayName; + foreach($video->contentTags as $tag) + if(!$tag->isLanguageTag) + $item['categories'][] = $tag->localizedName; // Add enclosures for thumbnails from a few points in the video - $item['enclosures'] = array(); - foreach($video->thumbnails->large as $thumbnail) - $item['enclosures'][] = $thumbnail->url; + // Thumbnail list has duplicate entries sometimes so remove those + $item['enclosures'] = array_unique($video->thumbnailURLs); /* * Content format example: @@ -86,44 +135,45 @@ class TwitchBridge extends BridgeAbstract { * */ $item['content'] = '

    ' - . $video->description_html + . $video->description // in markdown format . '

    Duration: ' - . $this->formatTimestampTime($video->length) + . $this->formatTimestampTime($video->lengthSeconds) . '
    Views: ' - . $video->views + . $video->viewCount . '

    '; // Add played games list to content - $video_id = trim($video->_id, 'v'); // _id gives 'v1234' but API wants '1234' - $markers = $this->apiGet('videos/' . $video_id . '/markers')->markers; - $item['content'] .= '

    Played games: