diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..e1f37170 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,35 @@ +name: Lint + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + phpcs: + runs-on: ubuntu-16.04 + strategy: + matrix: + php-versions: ['5.6', '7.0', '7.1', '7.2', '7.3', '7.4'] + steps: + - uses: actions/checkout@v2 + - uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php-versions }} + tools: phpcs + - run: phpcs . --standard=phpcs.xml --warning-severity=0 --extensions=php -p + + phpcompatibility: + runs-on: ubuntu-16.04 + strategy: + matrix: + php-versions: ['5.6', '7.4'] + steps: + - uses: actions/checkout@v2 + - uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php-versions }} + - run: composer global require dealerdirect/phpcodesniffer-composer-installer + - run: composer global require phpcompatibility/php-compatibility + - run: ~/.composer/vendor/bin/phpcs . --standard=phpcompatibility.xml --warning-severity=0 --extensions=php -p diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 00000000..29ca71c0 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,47 @@ +name: Tests + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + phpunit6: + runs-on: ubuntu-16.04 + strategy: + matrix: + php-versions: ['7.0', '7.1', '7.2'] + steps: + - uses: actions/checkout@v2 + - uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php-versions }} + - run: composer global require phpunit/phpunit ^6 + - run: phpunit --configuration=phpunit.xml --include-path=lib/ + + phpunit7: + runs-on: ubuntu-16.04 + strategy: + matrix: + php-versions: ['7.1', '7.2', '7.3'] + steps: + - uses: actions/checkout@v2 + - uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php-versions }} + - run: composer global require phpunit/phpunit ^7 + - run: phpunit --configuration=phpunit.xml --include-path=lib/ + + phpunit8: + runs-on: ubuntu-16.04 + strategy: + matrix: + php-versions: ['7.3', '7.4'] + steps: + - uses: actions/checkout@v2 + - uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php-versions }} + - run: composer global require phpunit/phpunit ^8 + - run: phpunit --configuration=phpunit.xml --include-path=lib/ diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 841ac5db..00000000 --- a/.travis.yml +++ /dev/null @@ -1,46 +0,0 @@ -dist: trusty -language: php - -install: - - composer global require dealerdirect/phpcodesniffer-composer-installer; - - composer global require phpcompatibility/php-compatibility; - - if [[ "$PHPUNIT" ]]; then - composer global require phpunit/phpunit ^$PHPUNIT; - fi - -script: - - phpenv rehash - # Run PHP_CodeSniffer on all versions - - ~/.config/composer/vendor/bin/phpcs . --standard=phpcs.xml --warning-severity=0 --extensions=php -p; - # Check PHP compatibility for the lowest and highest supported version - - if [[ $TRAVIS_PHP_VERSION == "5.6" || $TRAVIS_PHP_VERSION == "7.3" ]]; then - ~/.config/composer/vendor/bin/phpcs . --standard=phpcompatibility.xml --extensions=php -p; - fi - # Run unit tests on highest major version - - if [[ ${TRAVIS_PHP_VERSION:0:1} == "7" ]]; then - ~/.config/composer/vendor/bin/phpunit --configuration=phpunit.xml --include-path=lib/; - fi - -php: - - 7.3 - -env: - - PHPUNIT=6 - - PHPUNIT=7 - - PHPUNIT=8 - -matrix: - fast_finish: true - - include: - - php: 5.6 - env: PHPUNIT= - - php: 7.0 - - php: 7.1 - - php: 7.2 - - allow_failures: - - php: 7.3 - env: PHPUNIT=7 - - php: 7.3 - env: PHPUNIT=8 diff --git a/Dockerfile b/Dockerfile index fa9979d6..4b844541 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,8 +3,13 @@ FROM php:7-apache ENV APACHE_DOCUMENT_ROOT=/app RUN mv "$PHP_INI_DIR/php.ini-production" "$PHP_INI_DIR/php.ini" \ - && apt-get --yes update && apt-get --yes install libxml2-dev \ - && docker-php-ext-install -j$(nproc) simplexml \ + && apt-get --yes update \ + && apt-get --yes --no-install-recommends install \ + zlib1g-dev \ + libmemcached-dev \ + && rm -rf /var/lib/apt/lists/* \ + && pecl install memcached \ + && docker-php-ext-enable memcached \ && sed -ri -e 's!/var/www/html!${APACHE_DOCUMENT_ROOT}!g' /etc/apache2/sites-available/*.conf \ && sed -ri -e 's!/var/www/!${APACHE_DOCUMENT_ROOT}!g' /etc/apache2/apache2.conf /etc/apache2/conf-available/*.conf \ && sed -ri -e 's/(MinProtocol\s*=\s*)TLSv1\.2/\1None/' /etc/ssl/openssl.cnf \ diff --git a/README.md b/README.md index a9db8eaf..202f3680 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,7 @@ RSS-Bridge requires PHP 5.6 or higher with following extensions enabled: - [`simplexml`](https://secure.php.net/manual/en/book.simplexml.php) - [`curl`](https://secure.php.net/manual/en/book.curl.php) - [`json`](https://secure.php.net/manual/en/book.json.php) + - [`filter`](https://secure.php.net/manual/en/book.filter.php) - [`sqlite3`](http://php.net/manual/en/book.sqlite3.php) (only when using SQLiteCache) Find more information on our [Wiki](https://github.com/rss-bridge/rss-bridge/wiki) @@ -119,19 +120,24 @@ https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8 * [alexAubin](https://github.com/alexAubin) * [AmauryCarrade](https://github.com/AmauryCarrade) * [AntoineTurmel](https://github.com/AntoineTurmel) +* [arnd-s](https://github.com/arnd-s) * [ArthurHoaro](https://github.com/ArthurHoaro) * [Astalaseven](https://github.com/Astalaseven) * [Astyan-42](https://github.com/Astyan-42) +* [AxorPL](https://github.com/AxorPL) +* [ayacoo](https://github.com/ayacoo) * [az5he6ch](https://github.com/az5he6ch) * [azdkj532](https://github.com/azdkj532) * [b1nj](https://github.com/b1nj) * [benasse](https://github.com/benasse) +* [Binnette](https://github.com/Binnette) * [captn3m0](https://github.com/captn3m0) * [chemel](https://github.com/chemel) * [ckiw](https://github.com/ckiw) * [cnlpete](https://github.com/cnlpete) * [corenting](https://github.com/corenting) * [couraudt](https://github.com/couraudt) +* [csisoap](https://github.com/csisoap) * [cyberjacob](https://github.com/cyberjacob) * [da2x](https://github.com/da2x) * [Daiyousei](https://github.com/Daiyousei) @@ -146,27 +152,36 @@ https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8 * [em92](https://github.com/em92) * [eMerzh](https://github.com/eMerzh) * [EtienneM](https://github.com/EtienneM) +* [fanch317](https://github.com/fanch317) * [floviolleau](https://github.com/floviolleau) * [fluffy-critter](https://github.com/fluffy-critter) * [Frenzie](https://github.com/Frenzie) * [fulmeek](https://github.com/fulmeek) +* [ggiessen](https://github.com/ggiessen) * [Ginko-Aloe](https://github.com/Ginko-Aloe) * [Glandos](https://github.com/Glandos) * [gloony](https://github.com/gloony) * [GregThib](https://github.com/GregThib) * [griffaurel](https://github.com/griffaurel) * [Grummfy](https://github.com/Grummfy) +* [gsantner](https://github.com/gsantner) * [hunhejj](https://github.com/hunhejj) * [husim0](https://github.com/husim0) * [IceWreck](https://github.com/IceWreck) * [j0k3r](https://github.com/j0k3r) * [JackNUMBER](https://github.com/JackNUMBER) +* [jannyba](https://github.com/jannyba) +* [JasonGhent](https://github.com/JasonGhent) +* [jdesgats](https://github.com/jdesgats) * [jdigilio](https://github.com/jdigilio) * [JeremyRand](https://github.com/JeremyRand) * [Jocker666z](https://github.com/Jocker666z) * [johnnygroovy](https://github.com/johnnygroovy) -* [killruana](https://github.com/killruana) +* [johnpc](https://github.com/johnpc) +* [joni1993](https://github.com/joni1993) +* [joshcoales](https://github.com/joshcoales) * [klimplant](https://github.com/klimplant) +* [kolarcz](https://github.com/kolarcz) * [kranack](https://github.com/kranack) * [kraoc](https://github.com/kraoc) * [l1n](https://github.com/l1n) @@ -175,6 +190,7 @@ https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8 * [lalannev](https://github.com/lalannev) * [ldidry](https://github.com/ldidry) * [Leomaradan](https://github.com/Leomaradan) +* [liamka](https://github.com/liamka) * [Limero](https://github.com/Limero) * [LogMANOriginal](https://github.com/LogMANOriginal) * [lorenzos](https://github.com/lorenzos) @@ -185,18 +201,25 @@ https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8 * [mdemoss](https://github.com/mdemoss) * [melangue](https://github.com/melangue) * [metaMMA](https://github.com/metaMMA) +* [mibe](https://github.com/mibe) +* [mightymt](https://github.com/mightymt) * [mitsukarenai](https://github.com/mitsukarenai) * [MonsieurPoutounours](https://github.com/MonsieurPoutounours) * [mr-flibble](https://github.com/mr-flibble) * [mro](https://github.com/mro) +* [mschwld](https://github.com/mschwld) * [mxmehl](https://github.com/mxmehl) * [nel50n](https://github.com/nel50n) * [niawag](https://github.com/niawag) +* [Niehztog](https://github.com/Niehztog) * [Nono-m0le](https://github.com/Nono-m0le) * [ObsidianWitch](https://github.com/ObsidianWitch) * [OliverParoczai](https://github.com/OliverParoczai) -* [oratosquilla-oratoria](https://github.com/oratosquilla-oratoria) +* [Ololbu](https://github.com/Ololbu) * [ORelio](https://github.com/ORelio) +* [otakuf](https://github.com/otakuf) +* [Park0](https://github.com/Park0) +* [Paroleen](https://github.com/Paroleen) * [PaulVayssiere](https://github.com/PaulVayssiere) * [pellaeon](https://github.com/pellaeon) * [Piranhaplant](https://github.com/Piranhaplant) @@ -206,24 +229,31 @@ https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8 * [Pofilo](https://github.com/Pofilo) * [prysme01](https://github.com/prysme01) * [quentinus95](https://github.com/quentinus95) +* [RawkBob](https://github.com/RawkBob) * [regisenguehard](https://github.com/regisenguehard) * [Riduidel](https://github.com/Riduidel) * [rogerdc](https://github.com/rogerdc) * [Roliga](https://github.com/Roliga) +* [ronansalmon](https://github.com/ronansalmon) +* [rremizov](https://github.com/rremizov) * [sebsauvage](https://github.com/sebsauvage) * [shutosg](https://github.com/shutosg) +* [Simounet](https://github.com/Simounet) * [somini](https://github.com/somini) * [squeek502](https://github.com/squeek502) * [stjohnjohnson](https://github.com/stjohnjohnson) * [Strubbl](https://github.com/Strubbl) * [sublimz](https://github.com/sublimz) * [sunchaserinfo](https://github.com/sunchaserinfo) +* [SuperSandro2000](https://github.com/SuperSandro2000) * [sysadminstory](https://github.com/sysadminstory) * [tameroski](https://github.com/tameroski) * [teromene](https://github.com/teromene) +* [tgkenney](https://github.com/tgkenney) * [thefranke](https://github.com/thefranke) * [ThePadawan](https://github.com/ThePadawan) * [TheRadialActive](https://github.com/TheRadialActive) +* [theScrabi](https://github.com/theScrabi) * [TitiTestScalingo](https://github.com/TitiTestScalingo) * [triatic](https://github.com/triatic) * [VerifiedJoseph](https://github.com/VerifiedJoseph) @@ -231,6 +261,7 @@ https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8 * [wtuuju](https://github.com/wtuuju) * [xurxof](https://github.com/xurxof) * [yardenac](https://github.com/yardenac) +* [ymeister](https://github.com/ymeister) * [ZeNairolf](https://github.com/ZeNairolf) Licenses @@ -240,6 +271,7 @@ The source code for RSS-Bridge is [Public Domain](UNLICENSE). RSS-Bridge uses third party libraries with their own license: + * [`Parsedown`](https://github.com/erusev/parsedown) licensed under the [MIT License](http://opensource.org/licenses/MIT) * [`PHP Simple HTML DOM Parser`](http://simplehtmldom.sourceforge.net/) licensed under the [MIT License](http://opensource.org/licenses/MIT) * [`php-urljoin`](https://github.com/fluffy-critter/php-urljoin) licensed under the [MIT License](http://opensource.org/licenses/MIT) diff --git a/actions/DisplayAction.php b/actions/DisplayAction.php index 579630a1..16a67d54 100644 --- a/actions/DisplayAction.php +++ b/actions/DisplayAction.php @@ -131,6 +131,7 @@ class DisplayAction extends ActionAbstract { try { $bridge->setDatas($bridge_params); + $bridge->loadConfiguration(); $bridge->collectData(); $items = $bridge->getItems(); diff --git a/bridges/ASRockNewsBridge.php b/bridges/ASRockNewsBridge.php new file mode 100644 index 00000000..1f3f4dda --- /dev/null +++ b/bridges/ASRockNewsBridge.php @@ -0,0 +1,57 @@ +find('div.inner > a') as $index => $a) { + $item = array(); + + $articlePath = $a->href; + + $articlePageHtml = getSimpleHTMLDOMCached($articlePath, self::CACHE_TIMEOUT) + or returnServerError('Could not request: ' . $articlePath); + + $articlePageHtml = defaultLinkTo($articlePageHtml, self::URI); + + $contents = $articlePageHtml->find('div.Contents', 0); + + $item['uri'] = $articlePath; + $item['title'] = $contents->find('h5', 0)->innertext; + + $contents->find('h5', 0)->outertext = ''; + + $item['content'] = $contents->innertext; + $item['timestamp'] = $this->extractDate($a->plaintext); + $item['enclosures'][] = $a->find('img', 0)->src; + $this->items[] = $item; + + if (count($this->items) >= 10) { + break; + } + } + } + + private function extractDate($text) { + $dateRegex = '/^([0-9]{4}\/[0-9]{1,2}\/[0-9]{1,2})/'; + + $text = trim($text); + + if (preg_match($dateRegex, $text, $matches)) { + return $matches[1]; + } + + return ''; + } +} diff --git a/bridges/AirBreizhBridge.php b/bridges/AirBreizhBridge.php new file mode 100644 index 00000000..2d852da5 --- /dev/null +++ b/bridges/AirBreizhBridge.php @@ -0,0 +1,54 @@ + array( + 'theme' => array( + 'name' => 'Thematique', + 'type' => 'list', + 'values' => array( + 'Tout' => '', + 'Rapport d\'activite' => 'rapport-dactivite', + 'Etude' => 'etudes', + 'Information' => 'information', + 'Autres documents' => 'autres-documents', + 'Plan Régional de Surveillance de la qualité de l’air' => 'prsqa', + 'Transport' => 'transport' + ) + ) + ) + ); + + public function getIcon() { + return 'https://www.airbreizh.asso.fr/voy_content/uploads/2017/11/favicon.png'; + } + + public function collectData(){ + $html = ''; + $html = getSimpleHTMLDOM(static::URI . 'publications/?fwp_publications_thematiques=' . $this->getInput('theme')) + or returnClientError('No results for this query.'); + + foreach ($html->find('article') as $article) { + $item = array(); + // Title + $item['title'] = $article->find('h2', 0)->plaintext; + // Author + $item['author'] = 'Air Breizh'; + // Image + $imagelink = $article->find('.card__image', 0)->find('img', 0)->getAttribute('src'); + // Content preview + $item['content'] = ' +
' + . $article->find('.card__text', 0)->plaintext; + // URL + $item['uri'] = $article->find('.publi__buttons', 0)->find('a', 0)->getAttribute('href'); + // ID + $item['id'] = $article->find('.publi__buttons', 0)->find('a', 0)->getAttribute('href'); + $this->items[] = $item; + } + } +} diff --git a/bridges/AlbionOnlineBridge.php b/bridges/AlbionOnlineBridge.php new file mode 100644 index 00000000..0a93901e --- /dev/null +++ b/bridges/AlbionOnlineBridge.php @@ -0,0 +1,74 @@ + array( + 'name' => 'Limit', + 'type' => 'number', + 'title' => 'Maximum number of items to return', + 'defaultValue' => 5, + ), + 'language' => array( + 'name' => 'Language', + 'type' => 'list', + 'values' => array( + 'English' => 'en', + 'Deutsch' => 'de', + 'Polski' => 'pl', + 'Français' => 'fr', + 'Русский' => 'ru', + 'Português' => 'pt', + 'Español' => 'es', + ), + 'title' => 'Language of changelog posts', + 'defaultValue' => 'en', + ), + 'full' => array( + 'name' => 'Full changelog', + 'type' => 'checkbox', + 'required' => false, + 'title' => 'Enable to receive the full changelog post for each item' + ), + )); + + public function collectData() { + $api = 'https://albiononline.com/'; + // Example: https://albiononline.com/en/changelog/1/5 + $url = $api . $this->getInput('language') . '/changelog/1/' . $this->getInput('postcount'); + + $html = getSimpleHTMLDOM($url) + or returnServerError('Unable to get changelog data from "' . $url . '"!'); + + foreach ($html->find('li') as $data) { + $item = array(); + $item['uri'] = self::URI . $data->find('a', 0)->getAttribute('href'); + $item['title'] = trim(explode('|', $data->find('span', 0)->plaintext)[0]); + // Time below work only with en lang. Need to think about solution. May be separate request like getFullChangelog, but to english list for all language + //print_r( date_parse_from_format( 'M j, Y' , 'Sep 9, 2020') ); + //$item['timestamp'] = $this->extractDate($a->plaintext); + $item['author'] = 'albiononline.com'; + if($this->getInput('full')) { + $item['content'] = $this->getFullChangelog($item['uri']); + } else { + //$item['content'] = trim(preg_replace('/\s+/', ' ', $data->find('span', 0)->plaintext)); + // Just use title, no info at all or use title and date, see above + $item['content'] = $item['title']; + } + $item['uid'] = hash('sha256', $item['title']); + $this->items[] = $item; + } + } + + private function getFullChangelog($url) { + $html = getSimpleHTMLDOMCached($url) + or returnServerError('Unable to load changelog post from "' . $url . '"!'); + $html = defaultLinkTo($html, self::URI); + return $html->find('div.small-12.columns', 1)->innertext; + } +} diff --git a/bridges/AllocineFRBridge.php b/bridges/AllocineFRBridge.php index 17da9031..00fd0e80 100644 --- a/bridges/AllocineFRBridge.php +++ b/bridges/AllocineFRBridge.php @@ -8,14 +8,25 @@ class AllocineFRBridge extends BridgeAbstract { const DESCRIPTION = 'Bridge for allocine.fr'; const PARAMETERS = array( array( 'category' => array( - 'name' => 'category', + 'name' => 'Emission', 'type' => 'list', - 'exampleValue' => 'Faux Raccord', - 'title' => 'Select your category', + 'title' => 'Sélectionner l\'emission', 'values' => array( 'Faux Raccord' => 'faux-raccord', - 'Top 5' => 'top-5', - 'Tueurs en Séries' => 'tueurs-en-serie' + 'Fanzone' => 'fanzone', + 'Game In Ciné' => 'game-in-cine', + 'Pour la faire courte' => 'pour-la-faire-courte', + 'Home Cinéma' => 'home-cinema', + 'PILS - Par Ici Les Sorties' => 'pils-par-ici-les-sorties', + 'AlloCiné : l\'émission, sur LeStream' => 'allocine-lemission-sur-lestream', + 'Give Me Five' => 'give-me-five', + 'Aviez-vous remarqué ?' => 'aviez-vous-remarque', + 'Et paf, il est mort' => 'et-paf-il-est-mort', + 'The Big Fan Theory' => 'the-big-fan-theory', + 'Clichés' => 'cliches', + 'Complètement...' => 'completement', + '#Fun Facts' => 'fun-facts', + 'Origin Story' => 'origin-story', ) ) )); @@ -23,19 +34,30 @@ class AllocineFRBridge extends BridgeAbstract { public function getURI(){ if(!is_null($this->getInput('category'))) { - switch($this->getInput('category')) { - case 'faux-raccord': - $uri = static::URI . 'video/programme-12284/saison-32180/'; - break; - case 'top-5': - $uri = static::URI . 'video/programme-12299/saison-29561/'; - break; - case 'tueurs-en-serie': - $uri = static::URI . 'video/programme-12286/saison-22938/'; - break; - } + $categories = array( + 'faux-raccord' => 'video/programme-12284/saison-37054/', + 'fanzone' => 'video/programme-12298/saison-37059/', + 'game-in-cine' => 'video/programme-12288/saison-22971/', + 'pour-la-faire-courte' => 'video/programme-20960/saison-29678/', + 'home-cinema' => 'video/programme-12287/saison-34703/', + 'pils-par-ici-les-sorties' => 'video/programme-25789/saison-37253/', + 'allocine-lemission-sur-lestream' => 'video/programme-25123/saison-36067/', + 'give-me-five' => 'video/programme-21919/saison-34518/', + 'aviez-vous-remarque' => 'video/programme-19518/saison-37084/', + 'et-paf-il-est-mort' => 'video/programme-25113/saison-36657/', + 'the-big-fan-theory' => 'video/programme-20403/saison-37419/', + 'cliches' => 'video/programme-24834/saison-35591/', + 'completement' => 'video/programme-23859/saison-34102/', + 'fun-facts' => 'video/programme-23040/saison-32686/', + 'origin-story' => 'video/programme-25667/saison-37041/' + ); - return $uri; + $category = $this->getInput('category'); + if(array_key_exists($category, $categories)) { + return static::URI . $categories[$category]; + } else { + returnClientError('Emission inconnue'); + } } return parent::getURI(); @@ -63,23 +85,23 @@ class AllocineFRBridge extends BridgeAbstract { self::PARAMETERS[$this->queriedContext]['category']['values'] ); - foreach($html->find('.media-meta-list figure.media-meta-fig') as $element) { + foreach($html->find('div[class=gd-col-left]', 0)->find('div[class*=video-card]') as $element) { $item = array(); - $title = $element->find('div.titlebar h3.title a', 0); - $content = trim($element->innertext); - $figCaption = strpos($content, $category); + $title = $element->find('a[class*=meta-title-link]', 0); + $content = trim($element->outertext); - if($figCaption !== false) { - $content = str_replace('src="/', 'src="' . static::URI, $content); - $content = str_replace('href="/', 'href="' . static::URI, $content); - $content = str_replace('src=\'/', 'src=\'' . static::URI, $content); - $content = str_replace('href=\'/', 'href=\'' . static::URI, $content); - $item['content'] = $content; - $item['title'] = trim($title->innertext); - $item['uri'] = static::URI . $title->href; - $this->items[] = $item; - } + // Replace image 'src' with the one in 'data-src' + $content = preg_replace('@src="data:image/gif;base64,[A-Za-z0-9+\/]*"@', '', $content); + $content = preg_replace('@data-src=@', 'src=', $content); + + // Remove date in the content to prevent content update while the video is getting older + $content = preg_replace('@
.*[^<]*[^<]*
@', '', $content); + + $item['content'] = $content; + $item['title'] = trim($title->innertext); + $item['uri'] = static::URI . substr($title->href, 1); + $this->items[] = $item; } } } diff --git a/bridges/AmazonPriceTrackerBridge.php b/bridges/AmazonPriceTrackerBridge.php index 950178a7..129ed57c 100644 --- a/bridges/AmazonPriceTrackerBridge.php +++ b/bridges/AmazonPriceTrackerBridge.php @@ -32,6 +32,7 @@ class AmazonPriceTrackerBridge extends BridgeAbstract { 'Mexico' => 'com.mx', 'Netherlands' => 'nl', 'Spain' => 'es', + 'Sweden' => 'se', 'United Kingdom' => 'co.uk', 'United States' => 'com', ), diff --git a/bridges/AnidexBridge.php b/bridges/AnidexBridge.php index ae387c90..ff9f5f96 100644 --- a/bridges/AnidexBridge.php +++ b/bridges/AnidexBridge.php @@ -3,7 +3,9 @@ class AnidexBridge extends BridgeAbstract { const MAINTAINER = 'ORelio'; const NAME = 'Anidex'; - const URI = 'https://anidex.info/'; + const URI = 'http://anidex.info/'; // anidex.info has ddos-guard so we need to use anidex.moe + const ALTERNATE_URI = 'https://anidex.moe/'; // anidex.moe returns 301 unless Host is set to anidex.info + const ALTERNATE_HOST = 'anidex.info'; // Correct host for requesting anidex.moe without 301 redirect const DESCRIPTION = 'Returns the newest torrents, with optional search criteria.'; const PARAMETERS = array( array( @@ -108,7 +110,7 @@ class AnidexBridge extends BridgeAbstract { public function collectData() { // Build Search URL from user-provided parameters - $search_url = self::URI . '?s=upload_timestamp&o=desc'; + $search_url = self::ALTERNATE_URI . '?s=upload_timestamp&o=desc'; foreach (array('id', 'lang_id', 'group_id') as $param_name) { $param = $this->getInput($param_name); if (!empty($param) && intval($param) != 0 && ctype_digit(str_replace(',', '', $param))) { @@ -131,8 +133,16 @@ class AnidexBridge extends BridgeAbstract { $opt[CURLOPT_COOKIE] = 'anidex_h_toggle=' . $h; } + // We need to use a different Host HTTP header to reach the correct page on ALTERNATE_URI + $headers = array('Host: ' . self::ALTERNATE_HOST); + + // The HTTPS certificate presented by anidex.moe is for anidex.info. We need to ignore this. + // As a consequence, the bridge is intentionally marked as insecure by setting self::URI to http:// + $opt[CURLOPT_SSL_VERIFYHOST] = 0; + $opt[CURLOPT_SSL_VERIFYPEER] = 0; + // Retrieve torrent listing from search results, which does not contain torrent description - $html = getSimpleHTMLDOM($search_url, array(), $opt) + $html = getSimpleHTMLDOM($search_url, $headers, $opt) or returnServerError('Could not request Anidex: ' . $search_url); $links = $html->find('a'); $results = array(); @@ -156,10 +166,11 @@ class AnidexBridge extends BridgeAbstract { if ($torrent_id != 0 && ctype_digit($torrent_id)) { //Retrieve data for this torrent ID - $item_uri = self::URI . 'torrent/' . $torrent_id; + $item_browse_uri = self::URI . 'torrent/' . $torrent_id; + $item_fetch_uri = self::ALTERNATE_URI . 'torrent/' . $torrent_id; - //Retrieve full description from torrent page - if ($item_html = getSimpleHTMLDOMCached($item_uri)) { + //Retrieve full description from torrent page (cached for 24 hours: 86400 seconds) + if ($item_html = getSimpleHTMLDOMCached($item_fetch_uri, 86400, $headers, $opt)) { //Retrieve data from page contents $item_title = str_replace(' (Torrent) - AniDex ', '', $item_html->find('title', 0)->plaintext); @@ -191,7 +202,7 @@ class AnidexBridge extends BridgeAbstract { //Build and add final item $item = array(); - $item['uri'] = $item_uri; + $item['uri'] = $item_browse_uri; $item['title'] = $item_title; $item['author'] = $item_author; $item['timestamp'] = $item_date; diff --git a/bridges/AnimeUltimeBridge.php b/bridges/AnimeUltimeBridge.php index bc1dd7bc..c83d6ddb 100644 --- a/bridges/AnimeUltimeBridge.php +++ b/bridges/AnimeUltimeBridge.php @@ -102,7 +102,6 @@ class AnimeUltimeBridge extends BridgeAbstract { $item_description = defaultLinkTo($item_description, self::URI); $item_description = str_replace("\r", '', $item_description); $item_description = str_replace("\n", '', $item_description); - $item_description = utf8_encode($item_description); //Build and add final item $item = array(); diff --git a/bridges/AppleMusicBridge.php b/bridges/AppleMusicBridge.php index 30119777..6fc0c3dc 100644 --- a/bridges/AppleMusicBridge.php +++ b/bridges/AppleMusicBridge.php @@ -20,6 +20,8 @@ class AppleMusicBridge extends BridgeAbstract { )); const CACHE_TIMEOUT = 21600; // 6 hours + private $title; + public function collectData() { $url = $this->getInput('url'); $html = getSimpleHTMLDOM($url) @@ -27,6 +29,8 @@ class AppleMusicBridge extends BridgeAbstract { $imgSize = $this->getInput('imgSize'); + $this->title = $html->find('title', 0)->innertext; + // Grab the json data from the page $html = $html->find('script[id=shoebox-ember-data-store]', 0); $html = strstr($html, '{'); @@ -59,4 +63,8 @@ class AppleMusicBridge extends BridgeAbstract { return $a['timestamp'] < $b['timestamp']; }); } + + public function getName() { + return $this->title ?: parent::getName(); + } } diff --git a/bridges/Arte7Bridge.php b/bridges/Arte7Bridge.php index 562f648f..08390afa 100644 --- a/bridges/Arte7Bridge.php +++ b/bridges/Arte7Bridge.php @@ -1,7 +1,7 @@ array( @@ -27,7 +27,7 @@ class AtmoNouvelleAquitaineBridge extends BridgeAbstract { } public function collectData() { - $uri = self::URI . $this->getInput('cities'); + $uri = self::URI . '/monair/commune/' . $this->getInput('cities'); $html = getSimpleHTMLDOM($uri) or returnServerError('Could not request ' . $uri); diff --git a/bridges/AtmoOccitanieBridge.php b/bridges/AtmoOccitanieBridge.php new file mode 100644 index 00000000..24f63832 --- /dev/null +++ b/bridges/AtmoOccitanieBridge.php @@ -0,0 +1,58 @@ + array( + 'name' => 'Ville', + 'required' => true + ) + )); + const CACHE_TIMEOUT = 7200; + + public function collectData() { + $uri = self::URI . $this->getInput('city'); + + $html = getSimpleHTMLDOM($uri) + or returnServerError('Could not request ' . $uri); + + $generalMessage = $html->find('.landing-ville .city-banner .iqa-avertissement', 0)->innertext; + $recommendationsDom = $html->find('.landing-ville .recommandations', 0); + $recommendationsItemDom = $recommendationsDom->find('.recommandation-item .label'); + + $recommendationsMessage = ''; + + $i = 0; + $len = count($recommendationsItemDom); + foreach ($recommendationsItemDom as $key => $value) { + if ($i == 0) { + $recommendationsMessage .= trim($value->innertext) . '.'; + } else { + $recommendationsMessage .= ' ' . trim($value->innertext) . '.'; + } + $i++; + } + + $lastRecommendationsDom = $recommendationsDom->find('.col-md-6', -1); + $informationHeaderMessage = $lastRecommendationsDom->find('.heading', 0)->innertext; + $indice = $lastRecommendationsDom->find('.current-indice .indice div', 0)->innertext; + $informationDescriptionMessage = $lastRecommendationsDom->find('.current-indice .description p', 0)->innertext; + + $message = "$generalMessage L'indice est de $indice/10. $informationDescriptionMessage. $recommendationsMessage"; + $city = $this->getInput('city'); + + $item['uri'] = $uri; + $today = date('d/m/Y'); + $item['title'] = "Bulletin de l'air du $today pour la ville : $city."; + //$item['title'] .= ' Retrouvez plus d\'informations en allant sur atmo-occitanie.org #QualiteAir. ' . $message; + $item['title'] .= ' #QualiteAir. ' . $message; + $item['author'] = 'floviolleau'; + $item['content'] = $message; + $item['uid'] = hash('sha256', $item['title']); + + $this->items[] = $item; + } +} diff --git a/bridges/AutoJMBridge.php b/bridges/AutoJMBridge.php index 25fb2cb8..b9825ca4 100644 --- a/bridges/AutoJMBridge.php +++ b/bridges/AutoJMBridge.php @@ -77,110 +77,69 @@ class AutoJMBridge extends BridgeAbstract { $model_url = self::URI . $this->getInput('url'); - // Get the session cookies and the form token - $this->getInitialParameters($model_url); + // Build the GET data + $get_data = 'form[energy]=' . $this->getInput('energy') . + '&form[transmission]=' . $this->getInput('transmission') . + '&form[priceMin]=' . $this->getInput('priceMin') . + '&form[priceMin]=' . $this->getInput('priceMin'); - // Build the form - $post_data = array( - 'form[energy]' => $this->getInput('energy'), - 'form[transmission]' => $this->getInput('transmission'), - 'form[priceMin]' => $this->getInput('priceMin'), - 'form[priceMin]' => $this->getInput('priceMin'), - 'form[_token]' => $this->token - ); - - // Set the Form request content type + // Set the header 'X-Requested-With' like the website does it $header = array( - 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8', - ); - - // Set the curl options (POST query and content, and session cookies - $curl_opts = array( - CURLOPT_POST => true, - CURLOPT_POSTFIELDS => http_build_query($post_data), - CURLOPT_COOKIE => $this->cookies + 'X-Requested-With: XMLHttpRequest' ); // Get the JSON content of the form - $json = getContents($model_url, $header, $curl_opts) + $json = getContents($model_url . '?' . $get_data, $header) or returnServerError('Could not request AutoJM.'); // Extract the HTML content from the JSON result $data = json_decode($json); - $html = str_get_html($data->content); + $html = str_get_html($data->results); - // Go through every finisha of the model - $list = $html->find('h3'); - foreach ($list as $finish) { - $finish_name = $finish->plaintext; - $motorizations = $finish->next_sibling()->find('li'); - foreach ($motorizations as $element) { - $image = $element->find('div[class=block-product-image]', 0)->{'data-ga-banner'}; - $serie = $element->find('span[class=model]', 0)->plaintext; - $url = self::URI . substr($element->find('a', 0)->href, 1); - if ($element->find('span[class*=block-product-nbModel]', 0) != null) { - $availability = 'En Stock'; - } else { - $availability = 'Sur commande'; - } - $discount_html = $element->find('span[class*=tag--promo]', 0); - if ($discount_html != null) { - $discount = $discount_html->plaintext; - } else { - $discount = 'inconnue'; - } - $price = $element->find('span[class=price red h1]', 0)->plaintext; - $item = array(); - $item['title'] = $finish_name . ' ' . $serie; - $item['content'] = '

' - . $finish_name . ' ' . $serie . '

'; - $item['content'] .= ''; + // Go through every car of the model + $list = $html->find('div[class=car-card]'); + foreach ($list as $car) { - // Add a fictionnal anchor to the RSS element URL, based on the item content ; - // As the URL could be identical even if the price change, some RSS reader will not show those offers as new items - $item['uri'] = $url . '#' . md5($item['content']); - - $this->items[] = $item; + // Get the Finish name if this car is the first of a new finish + $prev_tag = $car->prev_sibling(); + if($prev_tag->tag == 'div' && $prev_tag->class == 'results-title') { + $finish_name = $prev_tag->plaintext; } - } - } - /** - * Gets the session cookie and the form token - * - * @param string $pageURL The URL from which to get the values - */ - private function getInitialParameters($pageURL) { - $ch = curl_init(); - curl_setopt($ch, CURLOPT_URL, $pageURL); - curl_setopt($ch, CURLOPT_HEADER, true); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - $data = curl_exec($ch); - - // Separate the response header and the content - $headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE); - $header = substr($data, 0, $headerSize); - $content = substr($data, $headerSize); - curl_close($ch); - - // Extract the cookies from the headers - $cookies = ''; - $http_response_header = explode("\r\n", $header); - foreach ($http_response_header as $hdr) { - if (strpos($hdr, 'Set-Cookie') !== false) { - $cLine = explode(':', $hdr)[1]; - $cLine = explode(';', $cLine)[0]; - $cookies .= ';' . $cLine; + // Get the info about the car offer + $image = $car->find('div[class=car-card__visual]', 0)->find('img', 0)->src; + $serie = $car->find('div[class=car-card__title]', 0)->plaintext; + $url = $car->find('a', 0)->href; + // Check if the car model is in stock or available only on order + if($car->find('span[class*=tag--dispo]', 0) != null) { + $availability = 'En Stock'; + } else { + $availability = 'Sur commande'; } - } - $this->cookies = trim(substr($cookies, 1)); + $discount_html = $car->find('span[class=promo]', 0); + // Check if there is any discount dsiplayed + if ($discount_html != null) { + $discount = $discount_html->plaintext; + } else { + $discount = 'inconnue'; + } + $price = $car->find('span[class=price]', 0)->plaintext; - // Get the token from the content - $html = str_get_html($content); - $token = $html->find('input[type=hidden][id=form__token]', 0); - $this->token = $token->value; + // Construct the new item + $item = array(); + $item['title'] = $finish_name . ' ' . $serie; + $item['content'] = '

' + . $finish_name . ' ' . $serie . '

'; + $item['content'] .= ''; + + // Add a fictionnal anchor to the RSS element URL, based on the item content ; + // As the URL could be identical even if the price change, some RSS reader will not show those offers as new items + $item['uri'] = $url . '#' . md5($item['content']); + + $this->items[] = $item; + } } } diff --git a/bridges/AwwwardsBridge.php b/bridges/AwwwardsBridge.php new file mode 100644 index 00000000..c1d1d320 --- /dev/null +++ b/bridges/AwwwardsBridge.php @@ -0,0 +1,55 @@ +find('li[data-model]') as $site) { + $decode = html_entity_decode($site->attr['data-model'], + ENT_QUOTES, 'utf-8'); + $decode = json_decode($decode, true); + $this->sites[] = $decode; + } + } + + public function collectData() { + $this->fetchSites(); + + Debug::log('Building RSS feed'); + foreach($this->sites as $site) { + $item = array(); + $item['title'] = $site['title']; + $item['timestamp'] = $site['createdAt']; + $item['categories'] = $site['tags']; + + $item['content'] = ''; + $item['uri'] = self::SITEURI . $site['slug']; + + $this->items[] = $item; + + if(count($this->items) >= 10) + break; + } + } +} diff --git a/bridges/BastaBridge.php b/bridges/BastaBridge.php index 613005fd..14ad3e53 100644 --- a/bridges/BastaBridge.php +++ b/bridges/BastaBridge.php @@ -19,13 +19,11 @@ class BastaBridge extends BridgeAbstract { $item['title'] = $element->find('title', 0)->innertext; $item['uri'] = $element->find('guid', 0)->plaintext; $item['timestamp'] = strtotime($element->find('dc:date', 0)->plaintext); - // Replaces all relative image URLs by absolute URLs. - // Relative URLs always start with 'local/'! - $item['content'] = preg_replace( - '/src=["\']{1}([^"\']+)/ims', - 'src=\'' . self::URI . '$1\'', - getSimpleHTMLDOM($item['uri'])->find('div.texte', 0)->innertext - ); + + $html = getSimpleHTMLDOM($item['uri']); + $html = defaultLinkTo($html, self::URI); + + $item['content'] = $html->find('div.texte', 0)->innertext; $this->items[] = $item; $limit++; } diff --git a/bridges/BleepingComputerBridge.php b/bridges/BleepingComputerBridge.php new file mode 100644 index 00000000..78ec3125 --- /dev/null +++ b/bridges/BleepingComputerBridge.php @@ -0,0 +1,29 @@ +Could not request ' . $this->getName() . ': ' . $item['uri'] . '

'; + return $item; + } + + $article_content = $article_html->find('div.articleBody', 0)->innertext; + $article_content = stripRecursiveHTMLSection($article_content, 'div', '
getInput('locale'); + if('zh-cn' === $locale) { + return 'https://cn.news.blizzard.com'; + } + return 'https://news.blizzard.com/' . $locale; + } +} diff --git a/bridges/BrutBridge.php b/bridges/BrutBridge.php index 32265b69..d91ce97c 100644 --- a/bridges/BrutBridge.php +++ b/bridges/BrutBridge.php @@ -16,6 +16,7 @@ class BrutBridge extends BridgeAbstract { 'Entertainment' => 'entertainment', 'Sports' => 'sport', 'Nature' => 'nature', + 'Health' => 'health', ), 'defaultValue' => 'news', ), @@ -26,6 +27,7 @@ class BrutBridge extends BridgeAbstract { 'United States' => 'us', 'United Kingdom' => 'uk', 'France' => 'fr', + 'Spain' => 'es', 'India' => 'in', 'Mexico' => 'mx', ), diff --git a/bridges/CeskaTelevizeBridge.php b/bridges/CeskaTelevizeBridge.php new file mode 100644 index 00000000..ea3a4bf2 --- /dev/null +++ b/bridges/CeskaTelevizeBridge.php @@ -0,0 +1,84 @@ + array( + 'name' => 'url to the show', + 'required' => true, + 'exampleValue' => 'https://www.ceskatelevize.cz/porady/1097181328-udalosti/dily/' + ) + ) + ); + + private function fixChars($text) { + return html_entity_decode($text, ENT_QUOTES, 'UTF-8'); + } + + private function getUploadTimeFromString($string) { + if (strpos($string, 'dnes') !== false) { + return strtotime('today'); + } elseif (strpos($string, 'včera') !== false) { + return strtotime('yesterday'); + } elseif (!preg_match('/(\d+).\s(\d+).(\s(\d+))?/', $string, $match)) { + returnServerError('Could not get date from Česká televize string'); + } + + $date = sprintf('%04d-%02d-%02d', isset($match[3]) ? $match[3] : date('Y'), $match[2], $match[1]); + return strtotime($date); + } + + public function collectData() { + $url = $this->getInput('url'); + + $validUrl = '/^(https:\/\/www\.ceskatelevize\.cz\/porady\/\d+-[a-z0-9-]+\/)(dily\/((nove|vysilani)\/)?)?$/'; + if (!preg_match($validUrl, $url, $match)) { + returnServerError('Invalid url'); + } + + $category = isset($match[4]) ? $match[4] : 'nove'; + $fixedUrl = "{$match[1]}dily/{$category}/"; + + $html = getSimpleHTMLDOM($fixedUrl) + or returnServerError('Could not request Česká televize'); + + $this->feedUri = $fixedUrl; + $this->feedName = str_replace('Přehled dílů — ', '', $this->fixChars($html->find('title', 0)->plaintext)); + if ($category !== 'nove') { + $this->feedName .= " ({$category})"; + } + + foreach ($html->find('.episodes-broadcast-content a.episode_list_item') as $element) { + $itemTitle = $element->find('.episode_list_item-title', 0); + $itemContent = $element->find('.episode_list_item-desc', 0); + $itemDate = $element->find('.episode_list_item-date', 0); + $itemThumbnail = $element->find('img', 0); + $itemUri = self::URI . $element->getAttribute('href'); + + $item = array( + 'title' => $this->fixChars($itemTitle->plaintext), + 'uri' => $itemUri, + 'content' => '
' + . $this->fixChars($itemContent->plaintext), + 'timestamp' => $this->getUploadTimeFromString($itemDate->plaintext) + ); + + $this->items[] = $item; + } + } + + public function getURI() { + return isset($this->feedUri) ? $this->feedUri : parent::getURI(); + } + + public function getName() { + return isset($this->feedName) ? $this->feedName : parent::getName(); + } +} diff --git a/bridges/ChristianDailyReporterBridge.php b/bridges/ChristianDailyReporterBridge.php deleted file mode 100644 index 85f664df..00000000 --- a/bridges/ChristianDailyReporterBridge.php +++ /dev/null @@ -1,28 +0,0 @@ -find('div.top p a,div.column p a') as $element) { - $item = array(); - // Title - $item['title'] = $element->innertext; - // URL - $item['uri'] = $element->href; - $this->items[] = $item; - } - } -} diff --git a/bridges/DarkReadingBridge.php b/bridges/DarkReadingBridge.php index 3baaad75..6ab83e9c 100644 --- a/bridges/DarkReadingBridge.php +++ b/bridges/DarkReadingBridge.php @@ -53,6 +53,8 @@ class DarkReadingBridge extends FeedExpander { protected function parseItem($newsItem){ $item = parent::parseItem($newsItem); + if (empty($item['content'])) + return null; //ignore dummy articles $article = getSimpleHTMLDOMCached($item['uri']) or returnServerError('Could not request Dark Reading: ' . $item['uri']); $item['content'] = $this->extractArticleContent($article); diff --git a/bridges/DaveRamseyBlogBridge.php b/bridges/DaveRamseyBlogBridge.php new file mode 100644 index 00000000..34c90446 --- /dev/null +++ b/bridges/DaveRamseyBlogBridge.php @@ -0,0 +1,24 @@ +find('.Post') as $element) { + $this->items[] = array( + 'uri' => 'https://www.daveramsey.com' . $element->find('header > a', 0)->href, + 'title' => $element->find('header > h2 > a', 0)->plaintext, + 'tags' => $element->find('.Post-topic', 0)->plaintext, + 'content' => $element->find('.Post-body', 0)->plaintext, + ); + } + } +} diff --git a/bridges/DevToBridge.php b/bridges/DevToBridge.php index 868ac976..def7e76a 100644 --- a/bridges/DevToBridge.php +++ b/bridges/DevToBridge.php @@ -45,29 +45,22 @@ apple-icon-5c6fa9f2bce280428589c6195b7f1924206a53b782b371cfe2d02da932c8c173.png' } public function collectData() { - $html = getSimpleHTMLDOMCached($this->getURI()) or returnServerError('Could not request ' . $this->getURI()); $html = defaultLinkTo($html, static::URI); - $articles = $html->find('div[class="single-article"]') + $articles = $html->find('div.crayons-story') or returnServerError('Could not find articles!'); foreach($articles as $article) { - - if($article->find('[class*="cta"]', 0)) { // Skip ads - continue; - } - $item = array(); $item['uri'] = $article->find('a[id*=article-link]', 0)->href; - $item['title'] = $article->find('h3', 0)->plaintext; + $item['title'] = $article->find('h2 > a', 0)->plaintext; - // i.e. "Charlie Harrington・Sep 21" - $item['timestamp'] = strtotime(explode('・', $article->find('h4 a', 0)->plaintext, 2)[1]); - $item['author'] = explode('・', $article->find('h4 a', 0)->plaintext, 2)[0]; + $item['timestamp'] = $article->find('time', 0)->datetime; + $item['author'] = $article->find('a.crayons-story__secondary.fw-medium', 0)->plaintext; // Profile image $item['enclosures'] = array($article->find('img', 0)->src); @@ -75,7 +68,6 @@ apple-icon-5c6fa9f2bce280428589c6195b7f1924206a53b782b371cfe2d02da932c8c173.png' if($this->getInput('full')) { $fullArticle = $this->getFullArticle($item['uri']); $item['content'] = <<

{$fullArticle}

EOD; } else { @@ -85,11 +77,21 @@ EOD; EOD; } - $item['categories'] = array_map(function($e){ return $e->plaintext; }, $article->find('div.tags span.tag')); + // categories + foreach ($article->find('a.crayons-tag') as $tag) { + $item['categories'][] = str_replace('#', '', $tag->plaintext); + } $this->items[] = $item; } + } + public function getName() { + if (!is_null($this->getInput('tag'))) { + return ucfirst($this->getInput('tag')) . ' - dev.to'; + } + + return parent::getName(); } private function getFullArticle($url) { @@ -98,6 +100,10 @@ EOD; $html = defaultLinkTo($html, static::URI); + if ($html->find('div.crayons-article__cover', 0)) { + return $html->find('div.crayons-article__cover', 0) . $html->find('[id="article-body"]', 0); + } + return $html->find('[id="article-body"]', 0); } } diff --git a/bridges/DiarioDeNoticiasBridge.php b/bridges/DiarioDeNoticiasBridge.php new file mode 100644 index 00000000..887eb117 --- /dev/null +++ b/bridges/DiarioDeNoticiasBridge.php @@ -0,0 +1,84 @@ + array( + 'n' => array( + 'name' => 'Tag Name', + 'exampleValue' => 'rogerio-casanova', + ) + ) + ); + + const MONPT = array( + 'jan', + 'fev', + 'mar', + 'abr', + 'mai', + 'jun', + 'jul', + 'ago', + 'set', + 'out', + 'nov', + 'dez', + ); + + public function getIcon() { + return 'https://static.globalnoticias.pt/dn/common/images/favicons/favicon-128.png'; + } + + public function getName() { + switch($this->queriedContext) { + case 'Tag': + $name = self::NAME . ' | Tag | ' . $this->getInput('n'); + break; + default: + $name = self::NAME; + } + return $name; + } + + public function getURI() { + switch($this->queriedContext) { + case 'Tag': + $url = self::URI . '/tag/' . $this->getInput('n') . '.html'; + break; + default: + $url = self::URI; + } + return $url; + } + + public function collectData() { + $archives = self::getURI(); + $html = getSimpleHTMLDOMCached($archives) + or returnServerError('Could not load content'); + + foreach($html->find('article') as $element) { + $item = array(); + + $title = $element->find('.t-am-title', 0); + $link = $element->find('a.t-am-text', 0); + + $item['title'] = $title->plaintext; + $item['uri'] = self::URI . $link->href; + + $snippet = $element->find('.t-am-lead', 0); + if ($snippet) { + $item['content'] = $snippet->plaintext; + } + preg_match('|edicao-do-dia\\/(?P\d\d)-(?P\w\w\w)-(?P\d\d\d\d)|', $link->href, $d); + if ($d) { + $item['timestamp'] = sprintf('%s-%s-%s', $d['year'], array_search($d['monpt'], self::MONPT) + 1, $d['day']); + } + + $this->items[] = $item; + } + + } +} diff --git a/bridges/DonnonsBridge.php b/bridges/DonnonsBridge.php new file mode 100644 index 00000000..77413499 --- /dev/null +++ b/bridges/DonnonsBridge.php @@ -0,0 +1,123 @@ + array( + 'name' => 'Url de recherche', + 'required' => true, + 'exampleValue' => '/Sport/Ile-de-France', + 'pattern' => '\/.*', + 'title' => 'Faites une recherche sur le site. Puis copiez ici la fin de l’url. Doit commencer par /', + ), + 'p' => array( + 'name' => 'Nombre de pages à scanner', + 'type' => 'number', + 'defaultValue' => 5, + 'title' => 'Indique le nombre de pages de donnons.org qui seront scannées' + ) + ) + ); + + public function collectData() { + $pages = $this->getInput('p'); + + for($i = 1; $i <= $pages; $i++) { + $this->collectDataByPage($i); + } + } + + private function collectDataByPage($page) { + $uri = $this->getPageURI($page); + + $html = getSimpleHTMLDOM($uri) + or returnServerError('No results for this query.'); + + $searchDiv = $html->find('div[id=search]', 0); + + if(!is_null($searchDiv)) { + $elements = $searchDiv->find('a.lst-annonce'); + foreach($elements as $element) { + $item = array(); + + // Lien vers le don + $item['uri'] = self::URI . $element->href; + // Id de l'objet + $item['uid'] = $element->getAttribute('data-id'); + + // Grab info from json + $jsonString = $element->find('script', 0)->innertext; + $json = json_decode($jsonString, true); + + $name = $json['name']; + $category = $json['category']; + $date = $json['availabilityStarts']; + $description = $json['description']; + $city = $json['availableAtOrFrom']['address']['addressLocality']; + $region = $json['availableAtOrFrom']['address']['addressRegion']; + + // Grab info from HTML + $imageSrc = $element->find('img.ima-center', 0)->getAttribute('data-src'); + $image = self::URI . $imageSrc; + $author = $element->find('div.avatar-holder', 0)->plaintext; + + $content = ' + +
+

' . $name . '

+

' . $description . '

+

Lieu : ' . $city . ' - ' . $region . '

+

Par : ' . $author . '

+

Date : ' . $date . '

+
+ '; + + // Titre du don + $item['title'] = '[' . $category . '] ' . $name; + $item['timestamp'] = $date; + $item['author'] = $author; + $item['content'] = $content; + $item['enclosures'] = array($image); + + $this->items[] = $item; + } + } + } + + private function getPageURI($page) { + $uri = $this->getURI(); + $haveQueryParams = strpos($uri, '?') !== false; + + if($haveQueryParams) { + return $uri . '&page=' . $page; + } else { + return $uri . '?page=' . $page; + } + } + + public function getURI() { + if(!is_null($this->getInput('q'))) { + return self::URI . $this->getInput('q'); + } + + return parent::getURI(); + } + + public function getName() { + if(!is_null($this->getInput('q'))) { + return 'Donnons.org - ' . $this->getInput('q'); + } + + return parent::getName(); + } +} diff --git a/bridges/DownDetectorBridge.php b/bridges/DownDetectorBridge.php index 4aef3728..bfbce699 100644 --- a/bridges/DownDetectorBridge.php +++ b/bridges/DownDetectorBridge.php @@ -6125,9 +6125,16 @@ class DownDetectorBridge extends BridgeAbstract { $table = $html->find('table.table-striped', 0); $maxCount = 10; - foreach ($table->find('tr') as $downEvent) { - $downLink = $downEvent->find('td', 1)->find('a', 1); - $item = $this->collectArticleData($downLink->getAttribute('href')); + foreach ($table->find('tr') as $event) { + $td = $event->find('td', 0); + + if (is_null($td)) { + continue; + } + + $link = $event->find('td', 0)->find('a', 0); + + $item = $this->collectArticleData($link->getAttribute('href')); $this->items[] = $item; if($maxCount == 0) break; $maxCount -= 1; diff --git a/bridges/DribbbleBridge.php b/bridges/DribbbleBridge.php index b1193c90..e3452658 100644 --- a/bridges/DribbbleBridge.php +++ b/bridges/DribbbleBridge.php @@ -13,7 +13,7 @@ favicon-63b2904a073c89b52b19aa08cebc16a154bcf83fee8ecc6439968b1e6db569c7.ico'; } public function collectData(){ - $html = getSimpleHTMLDOM(self::URI . '/shots') + $html = getSimpleHTMLDOM(self::URI) or returnServerError('Error while downloading the website content'); $json = $this->loadEmbeddedJsonData($html); @@ -24,19 +24,19 @@ favicon-63b2904a073c89b52b19aa08cebc16a154bcf83fee8ecc6439968b1e6db569c7.ico'; $additional_data = $this->findJsonForShot($shot, $json); if ($additional_data === null) { $item['uri'] = self::URI . $shot->find('a', 0)->href; - $item['title'] = $shot->find('.dribbble-over strong', 0)->plaintext; + $item['title'] = $shot->find('.shot-title', 0)->plaintext; } else { $item['timestamp'] = strtotime($additional_data['published_at']); $item['uri'] = self::URI . $additional_data['path']; $item['title'] = $additional_data['title']; } - $item['author'] = trim($shot->find('.attribution-user a', 0)->plaintext); + $item['author'] = trim($shot->find('.user-information .display-name', 0)->plaintext); $description = $shot->find('.comment', 0); $item['content'] = $description === null ? '' : $description->plaintext; - $preview_path = $shot->find('picture source', 0)->attr['srcset']; + $preview_path = $shot->find('figure img', 1)->attr['data-srcset']; $item['content'] .= $this->getImageTag($preview_path, $item['title']); $item['enclosures'] = array($this->getFullSizeImagePath($preview_path)); @@ -51,10 +51,13 @@ favicon-63b2904a073c89b52b19aa08cebc16a154bcf83fee8ecc6439968b1e6db569c7.ico'; foreach($scripts as $script) { if(strpos($script->innertext, 'newestShots') !== false) { // fix single quotes - $script->innertext = str_replace('\'', '"', $script->innertext); + $script->innertext = preg_replace('/\'(.*)\'(,?)$/im', '"\1"\2', $script->innertext); // fix JavaScript JSON (why do they not adhere to the standard?) - $script->innertext = preg_replace('/(\w+):/i', '"\1":', $script->innertext); + $script->innertext = preg_replace('/^(\s*)(\w+):/im', '\1"\2":', $script->innertext); + + // fix relative dates, so they are recognized by strtotime + $script->innertext = preg_replace('/"about ([0-9]+ hours? ago)"(,?)$/im', '"\1"\2', $script->innertext); // find beginning of JSON array $start = strpos($script->innertext, '['); @@ -83,7 +86,7 @@ favicon-63b2904a073c89b52b19aa08cebc16a154bcf83fee8ecc6439968b1e6db569c7.ico'; private function getImageTag($preview_path, $title){ return sprintf( - '
%s', + '
%s', $this->getFullSizeImagePath($preview_path), $preview_path, $title @@ -91,6 +94,11 @@ favicon-63b2904a073c89b52b19aa08cebc16a154bcf83fee8ecc6439968b1e6db569c7.ico'; } private function getFullSizeImagePath($preview_path){ - return str_replace('_1x', '', $preview_path); + // Get last image from srcset + $src_set_urls = explode(',', $preview_path); + $url = end($src_set_urls); + $url = explode(' ', $url)[1]; + + return htmlspecialchars_decode($url); } } diff --git a/bridges/EconomistBridge.php b/bridges/EconomistBridge.php index 94121ac3..b58c6672 100644 --- a/bridges/EconomistBridge.php +++ b/bridges/EconomistBridge.php @@ -14,17 +14,28 @@ class EconomistBridge extends BridgeAbstract { $html = getSimpleHTMLDOM(self::URI . '/latest/') or returnServerError('Could not fetch latest updates form The Economist.'); - foreach($html->find('article') as $element) { + foreach($html->find('div.teaser') as $element) { + + $a = $element->find('a.headline-link', 0); + $href = $a->href; + + if (substr($href, 0, 4) != 'http') + $href = self::URI . $a->href; - $a = $element->find('a', 0); - $href = self::URI . $a->href; $full = getSimpleHTMLDOMCached($href); $article = $full->find('article', 0); + $header = $article->find('span[itemprop="headline"]', 0); + $headerimg = $article->find('div[itemprop="image"]', 0)->find('img', 0); + $author = $article->find('p[itemprop="byline"]', 0); + $time = $article->find('time', 0); + $content = $article->find('div[itemprop="text"]', 0); + $section = array( $article->find('strong[itemprop="articleSection"]', 0)->plaintext ); - $header = $article->find('h1', 0); - $author = $article->find('span[itemprop="author"]', 0); - $time = $article->find('time[itemprop="dateCreated"]', 0); - $content = $article->find('div[itemprop="description"]', 0); + // Author + if ($author) + $author = substr($author->innertext, 3, strlen($author)); + else + $author = 'The Economist'; // Remove newsletter subscription box $newsletter = $content->find('div[class="newsletter-form__message"]', 0); @@ -40,19 +51,15 @@ class EconomistBridge extends BridgeAbstract { if ($nextprev) $nextprev->outertext = ''; - $section = array( $article->find('h3[itemprop="articleSection"]', 0)->plaintext ); - $item = array(); - $item['title'] = $header->find('span', 0)->innertext . ': ' - . $header->find('span', 1)->innertext; - + $item['title'] = $header->innertext; $item['uri'] = $href; $item['timestamp'] = strtotime($time->datetime); - $item['author'] = $author->innertext; + $item['author'] = $author; $item['categories'] = $section; $item['content'] = '' . $content->innertext; + . $headerimg->src . '">' . $content->innertext; $this->items[] = $item; diff --git a/bridges/EpicgamesBridge.php b/bridges/EpicgamesBridge.php new file mode 100644 index 00000000..e6ba5421 --- /dev/null +++ b/bridges/EpicgamesBridge.php @@ -0,0 +1,93 @@ + array( + 'name' => 'Limit', + 'type' => 'number', + 'title' => 'Maximum number of items to return', + 'defaultValue' => 10, + ), + 'language' => array( + 'name' => 'Language', + 'type' => 'list', + 'values' => array( + 'English' => 'en', + 'العربية' => 'ar', + 'Deutsch' => 'de', + 'Español (Spain)' => 'es-ES', + 'Español (LA)' => 'es-MX', + 'Français' => 'fr', + 'Italiano' => 'it', + '日本語' => 'ja', + '한국어' => 'ko', + 'Polski' => 'pl', + 'Português (Brasil)' => 'pt-BR', + 'Русский' => 'ru', + 'ไทย' => 'th', + 'Türkçe' => 'tr', + '简体中文' => 'zh-CN', + '繁體中文' => 'zh-Hant', + ), + 'title' => 'Language of blog posts', + 'defaultValue' => 'en', + ), + )); + + public function collectData() { + $api = 'https://store-content.ak.epicgames.com/api/'; + + // Get sticky posts first + // Example: https://store-content.ak.epicgames.com/api/ru/content/blog/sticky?locale=ru + $urlSticky = $api . $this->getInput('language') . '/content/blog/sticky'; + // Then get posts + // Example: https://store-content.ak.epicgames.com/api/ru/content/blog?limit=25 + $urlBlog = $api . $this->getInput('language') . '/content/blog?limit=' . $this->getInput('postcount'); + + $dataSticky = getContents($urlSticky) + or returnServerError('Unable to get the sticky posts from epicgames.com!'); + $dataBlog = getContents($urlBlog) + or returnServerError('Unable to get the news posts from epicgames.com!'); + + // Merge data + $decodedData = array_merge(json_decode($dataSticky), json_decode($dataBlog)); + + foreach($decodedData as $key => $value) { + $item = array(); + $item['uri'] = self::URI . $value->url; + $item['title'] = $value->title; + $item['timestamp'] = $value->date; + $item['author'] = 'Epic Games Store'; + if(!empty($value->author)) { + $item['author'] = $value->author; + } + if(!empty($value->content)) { + $item['content'] = defaultLinkTo($value->content, self::URI); + } + if(!empty($value->image)) { + $item['enclosures'][] = $value->image; + } + $item['uid'] = $value->_id; + $item['id'] = $value->_id; + + $this->items[] = $item; + } + + // Sort data + usort($this->items, function ($item1, $item2) { + if ($item2['timestamp'] == $item1['timestamp']) { + return 0; + } + return ($item2['timestamp'] < $item1['timestamp']) ? -1 : 1; + }); + + // Limit data + $this->items = array_slice($this->items, 0, $this->getInput('postcount')); + } +} diff --git a/bridges/ExtremeDownloadBridge.php b/bridges/ExtremeDownloadBridge.php index 1b4aa9a9..9859a2a6 100644 --- a/bridges/ExtremeDownloadBridge.php +++ b/bridges/ExtremeDownloadBridge.php @@ -1,7 +1,7 @@ array( + 'name' => 'Tag (author, category, ...)', + 'title' => 'Tag to retrieve', + 'exampleValue' => 'musik' + ), + 'loadcontent' => array( + 'name' => 'Load Full Article Content', + 'title' => 'Retrieve full content of articles (may take longer)', + 'type' => 'checkbox' + ), + 'pages' => array( + 'name' => 'Pages', + 'title' => 'Amount of pages to load', + 'type' => 'number', + 'defaultValue' => 1 + ) + ) + ); + + private function getPageData($tag, $page) { + if($tag) + $uri = self::URI . '/tags/' . $tag; + else + $uri = self::URI; + + $uri = $uri . '?page=' . $page; + + $html = getSimpleHTMLDOM($uri) + or returnServerError('Error while downloading the website content'); + + $page_items = array(); + + foreach ($html->find('div[class*=listItem]') as $article) { + $item = array(); + + $item['uri'] = $article->find('a', 0)->href; + $item['title'] = $article->find('h2', 0)->plaintext; + $item['author'] = $article->find('p[class*=keyword]', 0)->plaintext; + $item['timestamp'] = strtotime($article->find('p[class*=time]', 0)->plaintext); + + if ($this->getInput('loadcontent')) { + $item['content'] = getSimpleHTMLDOM($item['uri'])->find('div[class=storyText]', 0)->innertext + or returnServerError('Error while downloading the full article'); + } + + $page_items[] = $item; + } + return $page_items; + } + + public function collectData() { + for ($cur_page = 1; $cur_page <= $this->getInput('pages'); $cur_page++) { + $this->items = array_merge($this->items, $this->getPageData($this->getInput('tag'), $cur_page)); + } + } +} diff --git a/bridges/FacebookBridge.php b/bridges/FacebookBridge.php index 5ce67f94..c03de4ed 100644 --- a/bridges/FacebookBridge.php +++ b/bridges/FacebookBridge.php @@ -30,7 +30,7 @@ class FacebookBridge extends BridgeAbstract { 'type' => 'checkbox', 'required' => false, 'defaultValue' => false, - 'title' => 'Feed includes reviews when checked' + 'title' => 'Feed includes reviews when unchecked' ) ), 'Group' => array( @@ -175,7 +175,13 @@ class FacebookBridge extends BridgeAbstract { $header = array(); } - $html = getSimpleHTMLDOM($this->getURI(), $header) + $touchURI = str_replace( + 'https://www.facebook', + 'https://touch.facebook', + $this->getURI() + ); + + $html = getSimpleHTMLDOM($touchURI, $header) or returnServerError('Failed loading facebook page: ' . $this->getURI()); if(!$this->isPublicGroup($html)) { @@ -186,19 +192,18 @@ class FacebookBridge extends BridgeAbstract { $this->groupName = $this->extractGroupName($html); - $posts = $html->find('div.userContentWrapper') + $posts = $html->find('div.story_body_container') or returnServerError('Failed finding posts!'); foreach($posts as $post) { $item = array(); - $item['uri'] = $this->extractGroupURI($post); - $item['title'] = $this->extractGroupTitle($post); - $item['author'] = $this->extractGroupAuthor($post); - $item['content'] = $this->extractGroupContent($post); - $item['timestamp'] = $this->extractGroupTimestamp($post); - $item['enclosures'] = $this->extractGroupEnclosures($post); + $item['uri'] = $this->extractGroupPostURI($post); + $item['title'] = $this->extractGroupPostTitle($post); + $item['author'] = $this->extractGroupPostAuthor($post); + $item['content'] = $this->extractGroupPostContent($post); + $item['enclosures'] = $this->extractGroupPostEnclosures($post); $this->items[] = $item; @@ -215,16 +220,7 @@ class FacebookBridge extends BridgeAbstract { $urlparts = parse_url($group); - if($urlparts['host'] !== parse_url(self::URI)['host'] - && 'www.' . $urlparts['host'] !== parse_url(self::URI)['host']) { - - returnClientError('The host you provided is invalid! Received "' - . $urlparts['host'] - . '", expected "' - . parse_url(self::URI)['host'] - . '"!'); - - } + $this->validateHost($urlparts['host']); return explode('/', $urlparts['path'])[2]; @@ -236,24 +232,47 @@ class FacebookBridge extends BridgeAbstract { } + private function validateHost($provided_host) { + // Handle mobile links + if (strpos($provided_host, 'm.') === 0) { + $provided_host = substr($provided_host, strlen('m.')); + } + if (strpos($provided_host, 'touch.') === 0) { + $provided_host = substr($provided_host, strlen('touch.')); + } + + $facebook_host = parse_url(self::URI)['host']; + + if ($provided_host !== $facebook_host + && 'www.' . $provided_host !== $facebook_host) { + returnClientError('The host you provided is invalid! Received "' + . $provided_host + . '", expected "' + . $facebook_host + . '"!'); + } + } + + /** + * @param $html simple_html_dom + * @return bool + */ private function isPublicGroup($html) { - // Facebook redirects to the groups about page for non-public groups - $about = $html->find('#pagelet_group_about', 0); - - return !($about); - + // Facebook touch just presents a login page for non-public groups + $title = $html->find('title', 0); + return $title->plaintext !== 'Log in to Facebook | Facebook'; } private function extractGroupName($html) { - $ogtitle = $html->find('meta[property="og:title"]', 0) + $ogtitle = $html->find('._de1', 0) or returnServerError('Unable to find group title!'); - return html_entity_decode($ogtitle->content, ENT_QUOTES); + return html_entity_decode($ogtitle->plaintext, ENT_QUOTES); } - private function extractGroupURI($post) { + private function extractGroupPostURI($post) { $elements = $post->find('a') or returnServerError('Unable to find URI!'); @@ -262,7 +281,8 @@ class FacebookBridge extends BridgeAbstract { // Find the one that is a permalink if(strpos($anchor->href, 'permalink') !== false) { - return $anchor->href; + $arr = explode('?', $anchor->href, 2); + return $arr[0]; } } @@ -271,57 +291,61 @@ class FacebookBridge extends BridgeAbstract { } - private function extractGroupContent($post) { + private function extractGroupPostContent($post) { - $content = $post->find('div.userContent', 0) + $content = $post->find('div._5rgt', 0) or returnServerError('Unable to find user content!'); - return $content->innertext . $content->next_sibling()->innertext; + $context_text = $content->innertext; + if ($content->next_sibling() !== null) { + $context_text .= $content->next_sibling()->innertext; + } + return $context_text; } - private function extractGroupTimestamp($post) { + private function extractGroupPostAuthor($post) { - $element = $post->find('abbr[data-utime]', 0) - or returnServerError('Unable to find timestamp!'); - - return $element->getAttribute('data-utime'); - - } - - private function extractGroupAuthor($post) { - - $element = $post->find('img', 0) + $element = $post->find('h3 a', 0) or returnServerError('Unable to find author information!'); - return $element->{'aria-label'}; + return $element->plaintext; } - private function extractGroupEnclosures($post) { + private function extractGroupPostEnclosures($post) { - $elements = $post->find('div.userContent', 0)->next_sibling()->find('img'); + $elements = $post->find('span._6qdm'); + if ($post->find('div._5rgt', 0)->next_sibling() !== null) { + array_push($elements, ...$post->find('div._5rgt', 0)->next_sibling()->find('i.img')); + } $enclosures = array(); + $background_img_regex = '/background-image: ?url\\((.+?)\\);/'; + foreach($elements as $enclosure) { - $enclosures[] = $enclosure->src; + if(preg_match($background_img_regex, $enclosure, $matches) > 0) { + $bg_img_value = trim(html_entity_decode($matches[1], ENT_QUOTES), "'\""); + $bg_img_url = urldecode(preg_replace('/\\\([0-9a-z]{2}) /', '%$1', $bg_img_value)); + $enclosures[] = urldecode($bg_img_url); + } } return empty($enclosures) ? null : $enclosures; } - private function extractGroupTitle($post) { + private function extractGroupPostTitle($post) { - $element = $post->find('h5', 0) + $element = $post->find('h3', 0) or returnServerError('Unable to find title!'); if(strpos($element->plaintext, 'shared') === false) { - $content = strip_tags($this->extractGroupContent($post)); + $content = strip_tags($this->extractGroupPostContent($post)); - return $this->extractGroupAuthor($post) + return $this->extractGroupPostAuthor($post) . ' posted: ' . substr( $content, @@ -348,13 +372,7 @@ class FacebookBridge extends BridgeAbstract { $urlparts = parse_url($user); - if($urlparts['host'] !== parse_url(self::URI)['host']) { - returnClientError('The host you provided is invalid! Received "' - . $urlparts['host'] - . '", expected "' - . parse_url(self::URI)['host'] - . '"!'); - } + $this->validateHost($urlparts['host']); if(!array_key_exists('path', $urlparts) || $urlparts['path'] === '/') { @@ -555,7 +573,7 @@ EOD; } // No captcha? We can carry on retrieving page contents :) - // First, we check wether the page is public or not + // First, we check whether the page is public or not $loginForm = $html->find('._585r', 0); if($loginForm != null) { @@ -729,6 +747,7 @@ EOD; } } } + #endregion (User) } diff --git a/bridges/FicbookBridge.php b/bridges/FicbookBridge.php index 8b8a57fc..7c897017 100644 --- a/bridges/FicbookBridge.php +++ b/bridges/FicbookBridge.php @@ -35,6 +35,8 @@ class FicbookBridge extends BridgeAbstract { ), ); + protected $titleName; + public function getURI() { switch($this->queriedContext) { case 'Site News': { @@ -56,6 +58,21 @@ class FicbookBridge extends BridgeAbstract { } } + public function getName() { + switch($this->queriedContext) { + case 'Site News': { + return $this->queriedContext . ' | ' . self::NAME; + } + case 'Fiction Updates': { + return $this->titleName . ' | ' . self::NAME; + } + case 'Fiction Comments': { + return $this->titleName . ' | Comments | ' . self::NAME; + } + default: return self::NAME; + } + } + public function collectData() { $header = array('Accept-Language: en-US'); @@ -65,6 +82,10 @@ class FicbookBridge extends BridgeAbstract { $html = defaultLinkTo($html, self::URI); + if ($this->queriedContext == 'Fiction Updates' or $this->queriedContext == 'Fiction Comments') { + $this->titleName = $html->find('.fanfic-main-info > h1', 0)->innertext; + } + switch($this->queriedContext) { case 'Site News': return $this->collectSiteNews($html); case 'Fiction Updates': return $this->collectUpdatesData($html); @@ -84,7 +105,7 @@ class FicbookBridge extends BridgeAbstract { } private function collectCommentsData($html) { - foreach($html->find('article.post') as $article) { + foreach($html->find('article.comment-container') as $article) { $this->items[] = array( 'uri' => $article->find('.comment_link_to_fic > a', 0)->href, 'title' => $article->find('.comment_author', 0)->plaintext, @@ -97,7 +118,7 @@ class FicbookBridge extends BridgeAbstract { } private function collectUpdatesData($html) { - foreach($html->find('ul.table-of-contents > li') as $chapter) { + foreach($html->find('ul.list-of-fanfic-parts > li') as $chapter) { $item = array( 'uri' => $chapter->find('a', 0)->href, 'title' => $chapter->find('a', 0)->plaintext, @@ -130,10 +151,10 @@ class FicbookBridge extends BridgeAbstract { 'июня', 'июля', 'августа', - 'Сентября', + 'сентября', 'октября', - 'Ноября', - 'Декабря', + 'ноября', + 'декабря', ); $en_month = array( diff --git a/bridges/FirstLookMediaTechBridge.php b/bridges/FirstLookMediaTechBridge.php new file mode 100644 index 00000000..114bf62d --- /dev/null +++ b/bridges/FirstLookMediaTechBridge.php @@ -0,0 +1,50 @@ + array( + 'type' => 'checkbox', + 'name' => 'Include Projects?', + ) + ) + ); + + public function collectData() { + $html = getSimpleHTMLDOM(self::URI) + or returnServerError('Could not load content'); + + if ($this->getInput('projects')) { + $top_projects = $html->find('.PromoList-ul', 0); + foreach($top_projects->find('li.PromoList-item') as $element) { + $item = array(); + + $item_uri = $element->find('a', 0); + $item['uri'] = $item_uri->href; + $item['title'] = strip_tags($item_uri->innertext); + $item['content'] = $element->find('div > div', 0); + + $this->items[] = $item; + } + } + + $top_articles = $html->find('.PromoList-ul', 1); + foreach($top_articles->find('li.PromoList-item') as $element) { + $item = array(); + + $item_left = $element->find('div > div', 0); + $item_date = $element->find('.PromoList-date', 0); + $item['timestamp'] = strtotime($item_date->innertext); + $item_date->outertext = ''; /* Remove */ + $item['author'] = $item_left->innertext; + $item_uri = $element->find('a', 0); + $item['uri'] = self::URI . $item_uri->href; + $item['title'] = strip_tags($item_uri); + + $this->items[] = $item; + } + } +} diff --git a/bridges/FlickrBridge.php b/bridges/FlickrBridge.php index cb9db722..feab191a 100644 --- a/bridges/FlickrBridge.php +++ b/bridges/FlickrBridge.php @@ -20,6 +20,27 @@ class FlickrBridge extends BridgeAbstract { 'required' => true, 'title' => 'Insert keyword', 'exampleValue' => 'bird' + ), + 'media' => array( + 'name' => 'Media', + 'type' => 'list', + 'values' => array( + 'All (Photos & videos)' => 'all', + 'Photos' => 'photos', + 'Videos' => 'videos', + ), + 'defaultValue' => 'all', + ), + 'sort' => array( + 'name' => 'Sort By', + 'type' => 'list', + 'values' => array( + 'Relevance' => 'relevance', + 'Date uploaded' => 'date-posted-desc', + 'Date taken' => 'date-taken-desc', + 'Interesting' => 'interestingness-desc', + ), + 'defaultValue' => 'relevance', ) ), 'By username' => array( @@ -29,30 +50,60 @@ class FlickrBridge extends BridgeAbstract { 'required' => true, 'title' => 'Insert username (as shown in the address bar)', 'exampleValue' => 'flickr' + ), + 'media' => array( + 'name' => 'Media', + 'type' => 'list', + 'values' => array( + 'All (Photos & videos)' => 'all', + 'Photos' => 'photos', + 'Videos' => 'videos', + ), + 'defaultValue' => 'all', + ), + 'sort' => array( + 'name' => 'Sort By', + 'type' => 'list', + 'values' => array( + 'Relevance' => 'relevance', + 'Date uploaded' => 'date-posted-desc', + 'Date taken' => 'date-taken-desc', + 'Interesting' => 'interestingness-desc', + ), + 'defaultValue' => 'date-posted-desc', ) ) ); - public function collectData(){ + private $username = ''; + + public function collectData() { switch($this->queriedContext) { case 'Explore': $filter = 'photo-lite-models'; - $html = getSimpleHTMLDOM(self::URI . 'explore') + $html = getSimpleHTMLDOM($this->getURI()) or returnServerError('Could not request Flickr.'); break; case 'By keyword': $filter = 'photo-lite-models'; - $html = getSimpleHTMLDOM(self::URI . 'search/?q=' . urlencode($this->getInput('q')) . '&s=rec') + $html = getSimpleHTMLDOM($this->getURI()) or returnServerError('No results for this query.'); break; case 'By username': - $filter = 'photo-models'; - $html = getSimpleHTMLDOM(self::URI . 'photos/' . urlencode($this->getInput('u'))) + //$filter = 'photo-models'; + $filter = 'photo-lite-models'; + $html = getSimpleHTMLDOM($this->getURI()) or returnServerError('Requested username can\'t be found.'); + + $this->username = $this->getInput('u'); + + if ($html->find('span.search-pill-name', 0)) { + $this->username = $html->find('span.search-pill-name', 0)->plaintext; + } break; default: @@ -64,7 +115,6 @@ class FlickrBridge extends BridgeAbstract { $photo_models = $this->getPhotoModels($model_json, $filter); foreach($photo_models as $model) { - $item = array(); /* Author name depends on scope. On a keyword search the @@ -72,12 +122,12 @@ class FlickrBridge extends BridgeAbstract { * the author is part of the owner data. */ if(array_key_exists('username', $model)) { - $item['author'] = $model['username']; + $item['author'] = urldecode($model['username']); } elseif (array_key_exists('owner', reset($model_json)[0])) { - $item['author'] = reset($model_json)[0]['owner']['username']; + $item['author'] = urldecode(reset($model_json)[0]['owner']['username']); } - $item['title'] = (array_key_exists('title', $model) ? $model['title'] : 'Untitled'); + $item['title'] = urldecode((array_key_exists('title', $model) ? $model['title'] : 'Untitled')); $item['uri'] = self::URI . 'photo.gne?id=' . $model['id']; $description = (array_key_exists('description', $model) ? $model['description'] : ''); @@ -87,7 +137,7 @@ class FlickrBridge extends BridgeAbstract { . '">

' - . $description + . urldecode($description) . '

'; $item['enclosures'] = $this->extractEnclosures($model); @@ -98,6 +148,46 @@ class FlickrBridge extends BridgeAbstract { } + public function getURI() { + + switch($this->queriedContext) { + case 'Explore': + return self::URI . 'explore'; + break; + case 'By keyword': + return self::URI . 'search/?q=' . urlencode($this->getInput('q')) + . '&sort=' . $this->getInput('sort') . '&media=' . $this->getInput('media'); + break; + case 'By username': + return self::URI . 'search/?user_id=' . urlencode($this->getInput('u')) + . '&sort=' . $this->getInput('sort') . '&media=' . $this->getInput('media'); + break; + + default: + return parent::getURI(); + } + } + + public function getName() { + + switch($this->queriedContext) { + case 'Explore': + return 'Explore - ' . self::NAME; + break; + case 'By keyword': + return $this->getInput('q') . ' - keyword - ' . self::NAME; + break; + case 'By username': + return $this->username . ' - ' . self::NAME; + break; + + default: + return parent::getName(); + } + + return parent::getName(); + } + private function extractJsonModel($html) { // Find SCRIPT containing JSON data diff --git a/bridges/FolhaDeSaoPauloBridge.php b/bridges/FolhaDeSaoPauloBridge.php new file mode 100644 index 00000000..9a9717cb --- /dev/null +++ b/bridges/FolhaDeSaoPauloBridge.php @@ -0,0 +1,52 @@ + array( + 'name' => 'Feed sub-URL', + 'type' => 'text', + 'title' => 'Select the sub-feed (see https://www1.folha.uol.com.br/feed/)', + 'exampleValue' => 'emcimadahora/rss091.xml', + ) + ) + ); + + protected function parseItem($item){ + $item = parent::parseItem($item); + + $articleHTMLContent = getSimpleHTMLDOMCached($item['uri']); + if($articleHTMLContent) { + foreach ($articleHTMLContent->find('div.c-news__body .is-hidden') as $toRemove) { + $toRemove->innertext = ''; + } + $item_content = $articleHTMLContent->find('div.c-news__body', 0); + if ($item_content) { + $text = $item_content->innertext; + $text = strip_tags($text, '

'); + $item['content'] = $text; + $item['uri'] = explode('*', $item['uri'])[1]; + } + } else { + Debug::log('???: ' . $item['uri']); + } + + return $item; + } + + public function collectData(){ + $feed_input = $this->getInput('feed'); + if (substr($feed_input, 0, strlen(self::URI)) === self::URI) { + Debug::log('Input:: ' . $feed_input); + $feed_url = $feed_input; + } else { + /* TODO: prepend `/` if missing */ + $feed_url = self::URI . '/' . $this->getInput('feed'); + } + Debug::log('URL: ' . $feed_url); + $this->collectExpandableDatas($feed_url); + } +} diff --git a/bridges/FuturaSciencesBridge.php b/bridges/FuturaSciencesBridge.php index 772f4438..79c05880 100644 --- a/bridges/FuturaSciencesBridge.php +++ b/bridges/FuturaSciencesBridge.php @@ -96,7 +96,7 @@ class FuturaSciencesBridge extends FeedExpander { } private function extractArticleContent($article){ - $contents = $article->find('section.article-text-classic', 0)->innertext; + $contents = $article->find('section.article-text', 1)->innertext; $headline = trim($article->find('p.description', 0)->plaintext); if(!empty($headline)) $headline = '

' . $headline . '

'; @@ -129,6 +129,7 @@ class FuturaSciencesBridge extends FeedExpander { $contents = stripWithDelimiters($contents, 'fs:xt:clickname="', '"'); $contents = StripWithDelimiters($contents, '
.comment, - [id^="issuecomment-"] > .comment, - [id^="event-"], - [id^="ref-"] - '); + $comments = $issue->find( + '.comment, .TimelineItem-badge' + ); + foreach($comments as $comment) { - - if (!$comment->hasChildNodes()) { - continue; - } - - if (!$comment->hasClass('discussion-item-header')) { + if ($comment->hasClass('comment')) { + $comment = $comment->parent; $item = $this->extractIssueComment($issueNbr, $title, $comment); $items[] = $item; continue; - } - - while ($comment->hasClass('discussion-item-header')) { + } else { + $comment = $comment->parent; $item = $this->extractIssueEvent($issueNbr, $title, $comment); $items[] = $item; - $comment = $comment->nextSibling(); - if (null == $comment) { - break; - } - $classes = explode(' ', $comment->getAttribute('class')); } } @@ -180,9 +170,9 @@ class GithubIssueBridge extends BridgeAbstract { case 'Project Issues': foreach($html->find('.js-active-navigation-container .js-navigation-item') as $issue) { $info = $issue->find('.opened-by', 0); - $issueNbr = substr( - trim($info->plaintext), 1, strpos(trim($info->plaintext), ' ') - ); + + preg_match('/\/([0-9]+)$/', $issue->find('a', 0)->href, $match); + $issueNbr = $match[1]; $item = array(); $item['content'] = ''; diff --git a/bridges/GithubSearchBridge.php b/bridges/GithubSearchBridge.php index fd90934c..9c1face4 100644 --- a/bridges/GithubSearchBridge.php +++ b/bridges/GithubSearchBridge.php @@ -27,16 +27,16 @@ class GithubSearchBridge extends BridgeAbstract { foreach($html->find('li.repo-list-item') as $element) { $item = array(); - $uri = $element->find('h3 a', 0)->href; + $uri = $element->find('.f4 a', 0)->href; $uri = substr(self::URI, 0, -1) . $uri; $item['uri'] = $uri; - $title = $element->find('h3', 0)->plaintext; + $title = $element->find('.f4', 0)->plaintext; $item['title'] = $title; // Description - if (count($element->find('p.d-inline-block')) != 0) { - $content = $element->find('p.d-inline-block', 0)->innertext; + if (count($element->find('p.mb-1')) != 0) { + $content = $element->find('p.mb-1', 0)->innertext; } else{ $content = 'No description'; } diff --git a/bridges/GithubTrendingBridge.php b/bridges/GithubTrendingBridge.php new file mode 100644 index 00000000..0b4a9078 --- /dev/null +++ b/bridges/GithubTrendingBridge.php @@ -0,0 +1,636 @@ + array( + 'language' => array( + 'name' => 'Select language', + 'type' => 'list', + 'values' => array( + 'All languages' => '', + 'C++' => 'c++', + 'HTML' => 'html', + 'Java' => 'java', + 'JavaScript' => 'javascript', + 'PHP' => 'php', + 'Python' => 'python', + 'Ruby' => 'ruby', + 'Unknown languages' => 'unknown languages', + '1C Enterprise' => '1c enterprise', + '4D' => '4d', + 'ABAP' => 'abap', + 'ABNF' => 'abnf', + 'ActionScript' => 'actionscript', + 'Ada' => 'ada', + 'Adobe Font Metrics' => 'adobe font metrics', + 'Agda' => 'agda', + 'AGS Script' => 'ags script', + 'Alloy' => 'alloy', + 'Alpine Abuild' => 'alpine abuild', + 'Altium Designer' => 'altium designer', + 'AMPL' => 'ampl', + 'AngelScript' => 'angelscript', + 'Ant Build System' => 'ant build system', + 'ANTLR' => 'antlr', + 'ApacheConf' => 'apacheconf', + 'Apex' => 'apex', + 'API Blueprint' => 'api blueprint', + 'APL' => 'apl', + 'Apollo Guidance Computer' => 'apollo guidance computer', + 'AppleScript' => 'applescript', + 'Arc' => 'arc', + 'AsciiDoc' => 'asciidoc', + 'ASN.1' => 'asn.1', + 'ASP' => 'asp', + 'AspectJ' => 'aspectj', + 'Assembly' => 'assembly', + 'Asymptote' => 'asymptote', + 'ATS' => 'ats', + 'Augeas' => 'augeas', + 'AutoHotkey' => 'autohotkey', + 'AutoIt' => 'autoit', + 'Awk' => 'awk', + 'Ballerina' => 'ballerina', + 'Batchfile' => 'batchfile', + 'Befunge' => 'befunge', + 'BibTeX' => 'bibtex', + 'Bison' => 'bison', + 'BitBake' => 'bitbake', + 'Blade' => 'blade', + 'BlitzBasic' => 'blitzbasic', + 'BlitzMax' => 'blitzmax', + 'Bluespec' => 'bluespec', + 'Boo' => 'boo', + 'Brainfuck' => 'brainfuck', + 'Brightscript' => 'brightscript', + 'Zeek' => 'zeek', + 'C' => 'c', + 'C#' => 'c#', + 'C++' => 'c++', + 'C-ObjDump' => 'c-objdump', + 'C2hs Haskell' => 'c2hs haskell', + 'Cabal Config' => 'cabal config', + 'CartoCSS' => 'cartocss', + 'Ceylon' => 'ceylon', + 'Chapel' => 'chapel', + 'Charity' => 'charity', + 'ChucK' => 'chuck', + 'Cirru' => 'cirru', + 'Clarion' => 'clarion', + 'Clean' => 'clean', + 'Click' => 'click', + 'CLIPS' => 'clips', + 'Clojure' => 'clojure', + 'Closure Templates' => 'closure templates', + 'Cloud Firestore Security Rules' => 'cloud firestore security rules', + 'CMake' => 'cmake', + 'COBOL' => 'cobol', + 'CodeQL' => 'codeql', + 'CoffeeScript' => 'coffeescript', + 'ColdFusion' => 'coldfusion', + 'ColdFusion CFC' => 'coldfusion cfc', + 'COLLADA' => 'collada', + 'Common Lisp' => 'common lisp', + 'Common Workflow Language' => 'common workflow language', + 'Component Pascal' => 'component pascal', + 'CoNLL-U' => 'conll-u', + 'Cool' => 'cool', + 'Coq' => 'coq', + 'Cpp-ObjDump' => 'cpp-objdump', + 'Creole' => 'creole', + 'Crystal' => 'crystal', + 'CSON' => 'cson', + 'Csound' => 'csound', + 'Csound Document' => 'csound document', + 'Csound Score' => 'csound score', + 'CSS' => 'css', + 'CSV' => 'csv', + 'Cuda' => 'cuda', + 'cURL Config' => 'curl config', + 'CWeb' => 'cweb', + 'Cycript' => 'cycript', + 'Cython' => 'cython', + 'D' => 'd', + 'D-ObjDump' => 'd-objdump', + 'Darcs Patch' => 'darcs patch', + 'Dart' => 'dart', + 'DataWeave' => 'dataweave', + 'desktop' => 'desktop', + 'Dhall' => 'dhall', + 'Diff' => 'diff', + 'DIGITAL Command Language' => 'digital command language', + 'dircolors' => 'dircolors', + 'DirectX 3D File' => 'directx 3d file', + 'DM' => 'dm', + 'DNS Zone' => 'dns zone', + 'Dockerfile' => 'dockerfile', + 'Dogescript' => 'dogescript', + 'DTrace' => 'dtrace', + 'Dylan' => 'dylan', + 'E' => 'e', + 'Eagle' => 'eagle', + 'Easybuild' => 'easybuild', + 'EBNF' => 'ebnf', + 'eC' => 'ec', + 'Ecere Projects' => 'ecere projects', + 'ECL' => 'ecl', + 'ECLiPSe' => 'eclipse', + 'EditorConfig' => 'editorconfig', + 'Edje Data Collection' => 'edje data collection', + 'edn' => 'edn', + 'Eiffel' => 'eiffel', + 'EJS' => 'ejs', + 'Elixir' => 'elixir', + 'Elm' => 'elm', + 'Emacs Lisp' => 'emacs lisp', + 'EmberScript' => 'emberscript', + 'EML' => 'eml', + 'EQ' => 'eq', + 'Erlang' => 'erlang', + 'F#' => 'f#', + 'F*' => 'f*', + 'Factor' => 'factor', + 'Fancy' => 'fancy', + 'Fantom' => 'fantom', + 'Faust' => 'faust', + 'FIGlet Font' => 'figlet font', + 'Filebench WML' => 'filebench wml', + 'Filterscript' => 'filterscript', + 'fish' => 'fish', + 'FLUX' => 'flux', + 'Formatted' => 'formatted', + 'Forth' => 'forth', + 'Fortran' => 'fortran', + 'FreeMarker' => 'freemarker', + 'Frege' => 'frege', + 'G-code' => 'g-code', + 'Game Maker Language' => 'game maker language', + 'GAML' => 'gaml', + 'GAMS' => 'gams', + 'GAP' => 'gap', + 'GCC Machine Description' => 'gcc machine description', + 'GDB' => 'gdb', + 'GDScript' => 'gdscript', + 'Genie' => 'genie', + 'Genshi' => 'genshi', + 'Gentoo Ebuild' => 'gentoo ebuild', + 'Gentoo Eclass' => 'gentoo eclass', + 'Gerber Image' => 'gerber image', + 'Gettext Catalog' => 'gettext catalog', + 'Gherkin' => 'gherkin', + 'Git Attributes' => 'git attributes', + 'Git Config' => 'git config', + 'GLSL' => 'glsl', + 'Glyph' => 'glyph', + 'Glyph Bitmap Distribution Format' => 'glyph bitmap distribution format', + 'GN' => 'gn', + 'Gnuplot' => 'gnuplot', + 'Go' => 'go', + 'Golo' => 'golo', + 'Gosu' => 'gosu', + 'Grace' => 'grace', + 'Gradle' => 'gradle', + 'Grammatical Framework' => 'grammatical framework', + 'Graph Modeling Language' => 'graph modeling language', + 'GraphQL' => 'graphql', + 'Graphviz (DOT)' => 'graphviz (dot)', + 'Groovy' => 'groovy', + 'Groovy Server Pages' => 'groovy server pages', + 'Hack' => 'hack', + 'Haml' => 'haml', + 'Handlebars' => 'handlebars', + 'HAProxy' => 'haproxy', + 'Harbour' => 'harbour', + 'Haskell' => 'haskell', + 'Haxe' => 'haxe', + 'HCL' => 'hcl', + 'HiveQL' => 'hiveql', + 'HLSL' => 'hlsl', + 'HolyC' => 'holyc', + 'HTML' => 'html', + 'HTML+Django' => 'html+django', + 'HTML+ECR' => 'html+ecr', + 'HTML+EEX' => 'html+eex', + 'HTML+ERB' => 'html+erb', + 'HTML+PHP' => 'html+php', + 'HTML+Razor' => 'html+razor', + 'HTTP' => 'http', + 'HXML' => 'hxml', + 'Hy' => 'hy', + 'HyPhy' => 'hyphy', + 'IDL' => 'idl', + 'Idris' => 'idris', + 'Ignore List' => 'ignore list', + 'IGOR Pro' => 'igor pro', + 'Inform 7' => 'inform 7', + 'INI' => 'ini', + 'Inno Setup' => 'inno setup', + 'Io' => 'io', + 'Ioke' => 'ioke', + 'IRC log' => 'irc log', + 'Isabelle' => 'isabelle', + 'Isabelle ROOT' => 'isabelle root', + 'J' => 'j', + 'Jasmin' => 'jasmin', + 'Java' => 'java', + 'Java Properties' => 'java properties', + 'Java Server Pages' => 'java server pages', + 'JavaScript' => 'javascript', + 'JavaScript+ERB' => 'javascript+erb', + 'JFlex' => 'jflex', + 'Jison' => 'jison', + 'Jison Lex' => 'jison lex', + 'Jolie' => 'jolie', + 'JSON' => 'json', + 'JSON with Comments' => 'json with comments', + 'JSON5' => 'json5', + 'JSONiq' => 'jsoniq', + 'JSONLD' => 'jsonld', + 'Jsonnet' => 'jsonnet', + 'JSX' => 'jsx', + 'Julia' => 'julia', + 'Jupyter Notebook' => 'jupyter notebook', + 'KiCad Layout' => 'kicad layout', + 'KiCad Legacy Layout' => 'kicad legacy layout', + 'KiCad Schematic' => 'kicad schematic', + 'Kit' => 'kit', + 'Kotlin' => 'kotlin', + 'KRL' => 'krl', + 'LabVIEW' => 'labview', + 'Lasso' => 'lasso', + 'Latte' => 'latte', + 'Lean' => 'lean', + 'Less' => 'less', + 'Lex' => 'lex', + 'LFE' => 'lfe', + 'LilyPond' => 'lilypond', + 'Limbo' => 'limbo', + 'Linker Script' => 'linker script', + 'Linux Kernel Module' => 'linux kernel module', + 'Liquid' => 'liquid', + 'Literate Agda' => 'literate agda', + 'Literate CoffeeScript' => 'literate coffeescript', + 'Literate Haskell' => 'literate haskell', + 'LiveScript' => 'livescript', + 'LLVM' => 'llvm', + 'Logos' => 'logos', + 'Logtalk' => 'logtalk', + 'LOLCODE' => 'lolcode', + 'LookML' => 'lookml', + 'LoomScript' => 'loomscript', + 'LSL' => 'lsl', + 'LTspice Symbol' => 'ltspice symbol', + 'Lua' => 'lua', + 'M' => 'm', + 'M4' => 'm4', + 'M4Sugar' => 'm4sugar', + 'Makefile' => 'makefile', + 'Mako' => 'mako', + 'Markdown' => 'markdown', + 'Marko' => 'marko', + 'Mask' => 'mask', + 'Mathematica' => 'mathematica', + 'MATLAB' => 'matlab', + 'Maven POM' => 'maven pom', + 'Max' => 'max', + 'MAXScript' => 'maxscript', + 'mcfunction' => 'mcfunction', + 'MediaWiki' => 'mediawiki', + 'Mercury' => 'mercury', + 'Meson' => 'meson', + 'Metal' => 'metal', + 'Microsoft Developer Studio Project' => 'microsoft developer studio project', + 'MiniD' => 'minid', + 'Mirah' => 'mirah', + 'mIRC Script' => 'mirc script', + 'MLIR' => 'mlir', + 'Modelica' => 'modelica', + 'Modula-2' => 'modula-2', + 'Modula-3' => 'modula-3', + 'Module Management System' => 'module management system', + 'Monkey' => 'monkey', + 'Moocode' => 'moocode', + 'MoonScript' => 'moonscript', + 'Motorola 68K Assembly' => 'motorola 68k assembly', + 'MQL4' => 'mql4', + 'MQL5' => 'mql5', + 'MTML' => 'mtml', + 'MUF' => 'muf', + 'mupad' => 'mupad', + 'Muse' => 'muse', + 'Myghty' => 'myghty', + 'nanorc' => 'nanorc', + 'NASL' => 'nasl', + 'NCL' => 'ncl', + 'Nearley' => 'nearley', + 'Nemerle' => 'nemerle', + 'nesC' => 'nesc', + 'NetLinx' => 'netlinx', + 'NetLinx+ERB' => 'netlinx+erb', + 'NetLogo' => 'netlogo', + 'NewLisp' => 'newlisp', + 'Nextflow' => 'nextflow', + 'Nginx' => 'nginx', + 'Nim' => 'nim', + 'Ninja' => 'ninja', + 'Nit' => 'nit', + 'Nix' => 'nix', + 'NL' => 'nl', + 'NPM Config' => 'npm config', + 'NSIS' => 'nsis', + 'Nu' => 'nu', + 'NumPy' => 'numpy', + 'ObjDump' => 'objdump', + 'Object Data Instance Notation' => 'object data instance notation', + 'Objective-C' => 'objective-c', + 'Objective-C++' => 'objective-c++', + 'Objective-J' => 'objective-j', + 'ObjectScript' => 'objectscript', + 'OCaml' => 'ocaml', + 'Odin' => 'odin', + 'Omgrofl' => 'omgrofl', + 'ooc' => 'ooc', + 'Opa' => 'opa', + 'Opal' => 'opal', + 'Open Policy Agent' => 'open policy agent', + 'OpenCL' => 'opencl', + 'OpenEdge ABL' => 'openedge abl', + 'OpenQASM' => 'openqasm', + 'OpenRC runscript' => 'openrc runscript', + 'OpenSCAD' => 'openscad', + 'OpenStep Property List' => 'openstep property list', + 'OpenType Feature File' => 'opentype feature file', + 'Org' => 'org', + 'Ox' => 'ox', + 'Oxygene' => 'oxygene', + 'Oz' => 'oz', + 'P4' => 'p4', + 'Pan' => 'pan', + 'Papyrus' => 'papyrus', + 'Parrot' => 'parrot', + 'Parrot Assembly' => 'parrot assembly', + 'Parrot Internal Representation' => 'parrot internal representation', + 'Pascal' => 'pascal', + 'Pawn' => 'pawn', + 'Pep8' => 'pep8', + 'Perl' => 'perl', + 'PHP' => 'php', + 'Pic' => 'pic', + 'Pickle' => 'pickle', + 'PicoLisp' => 'picolisp', + 'PigLatin' => 'piglatin', + 'Pike' => 'pike', + 'PLpgSQL' => 'plpgsql', + 'PLSQL' => 'plsql', + 'Pod' => 'pod', + 'Pod 6' => 'pod 6', + 'PogoScript' => 'pogoscript', + 'Pony' => 'pony', + 'PostCSS' => 'postcss', + 'PostScript' => 'postscript', + 'POV-Ray SDL' => 'pov-ray sdl', + 'PowerBuilder' => 'powerbuilder', + 'PowerShell' => 'powershell', + 'Prisma' => 'prisma', + 'Processing' => 'processing', + 'Proguard' => 'proguard', + 'Prolog' => 'prolog', + 'Propeller Spin' => 'propeller spin', + 'Protocol Buffer' => 'protocol buffer', + 'Public Key' => 'public key', + 'Pug' => 'pug', + 'Puppet' => 'puppet', + 'Pure Data' => 'pure data', + 'PureBasic' => 'purebasic', + 'PureScript' => 'purescript', + 'Python' => 'python', + 'Python console' => 'python console', + 'Python traceback' => 'python traceback', + 'q' => 'q', + 'QMake' => 'qmake', + 'QML' => 'qml', + 'Quake' => 'quake', + 'R' => 'r', + 'Racket' => 'racket', + 'Ragel' => 'ragel', + 'Raku' => 'raku', + 'RAML' => 'raml', + 'Rascal' => 'rascal', + 'Raw token data' => 'raw token data', + 'RDoc' => 'rdoc', + 'Readline Config' => 'readline config', + 'REALbasic' => 'realbasic', + 'Reason' => 'reason', + 'Rebol' => 'rebol', + 'Red' => 'red', + 'Redcode' => 'redcode', + 'Regular Expression' => 'regular expression', + // 'Ren'Py' => 'ren'py', + 'RenderScript' => 'renderscript', + 'reStructuredText' => 'restructuredtext', + 'REXX' => 'rexx', + 'RHTML' => 'rhtml', + 'Rich Text Format' => 'rich text format', + 'Ring' => 'ring', + 'Riot' => 'riot', + 'RMarkdown' => 'rmarkdown', + 'RobotFramework' => 'robotframework', + 'Roff' => 'roff', + 'Roff Manpage' => 'roff manpage', + 'Rouge' => 'rouge', + 'RPC' => 'rpc', + 'RPM Spec' => 'rpm spec', + 'Ruby' => 'ruby', + 'RUNOFF' => 'runoff', + 'Rust' => 'rust', + 'Sage' => 'sage', + 'SaltStack' => 'saltstack', + 'SAS' => 'sas', + 'Sass' => 'sass', + 'Scala' => 'scala', + 'Scaml' => 'scaml', + 'Scheme' => 'scheme', + 'Scilab' => 'scilab', + 'SCSS' => 'scss', + 'sed' => 'sed', + 'Self' => 'self', + 'ShaderLab' => 'shaderlab', + 'Shell' => 'shell', + 'ShellSession' => 'shellsession', + 'Shen' => 'shen', + 'Slash' => 'slash', + 'Slice' => 'slice', + 'Slim' => 'slim', + 'Smali' => 'smali', + 'Smalltalk' => 'smalltalk', + 'Smarty' => 'smarty', + 'SmPL' => 'smpl', + 'SMT' => 'smt', + 'Solidity' => 'solidity', + 'SourcePawn' => 'sourcepawn', + 'SPARQL' => 'sparql', + 'Spline Font Database' => 'spline font database', + 'SQF' => 'sqf', + 'SQL' => 'sql', + 'SQLPL' => 'sqlpl', + 'Squirrel' => 'squirrel', + 'SRecode Template' => 'srecode template', + 'SSH Config' => 'ssh config', + 'Stan' => 'stan', + 'Standard ML' => 'standard ml', + 'Starlark' => 'starlark', + 'Stata' => 'stata', + 'STON' => 'ston', + 'Stylus' => 'stylus', + 'SubRip Text' => 'subrip text', + 'SugarSS' => 'sugarss', + 'SuperCollider' => 'supercollider', + 'Svelte' => 'svelte', + 'SVG' => 'svg', + 'Swift' => 'swift', + 'SWIG' => 'swig', + 'SystemVerilog' => 'systemverilog', + 'Tcl' => 'tcl', + 'Tcsh' => 'tcsh', + 'Tea' => 'tea', + 'Terra' => 'terra', + 'TeX' => 'tex', + 'Texinfo' => 'texinfo', + 'Text' => 'text', + 'Textile' => 'textile', + 'Thrift' => 'thrift', + 'TI Program' => 'ti program', + 'TLA' => 'tla', + 'TOML' => 'toml', + 'TSQL' => 'tsql', + 'TSX' => 'tsx', + 'Turing' => 'turing', + 'Turtle' => 'turtle', + 'Twig' => 'twig', + 'TXL' => 'txl', + 'Type Language' => 'type language', + 'TypeScript' => 'typescript', + 'Unified Parallel C' => 'unified parallel c', + 'Unity3D Asset' => 'unity3d asset', + 'Unix Assembly' => 'unix assembly', + 'Uno' => 'uno', + 'UnrealScript' => 'unrealscript', + 'UrWeb' => 'urweb', + 'V' => 'v', + 'Vala' => 'vala', + 'VBA' => 'vba', + 'VBScript' => 'vbscript', + 'VCL' => 'vcl', + 'Verilog' => 'verilog', + 'VHDL' => 'vhdl', + 'Vim script' => 'vim script', + 'Vim Snippet' => 'vim snippet', + 'Visual Basic .NET' => 'visual basic .net', + 'Visual Basic .NET' => 'visual basic .net', + 'Volt' => 'volt', + 'Vue' => 'vue', + 'Wavefront Material' => 'wavefront material', + 'Wavefront Object' => 'wavefront object', + 'wdl' => 'wdl', + 'Web Ontology Language' => 'web ontology language', + 'WebAssembly' => 'webassembly', + 'WebIDL' => 'webidl', + 'WebVTT' => 'webvtt', + 'Wget Config' => 'wget config', + 'Windows Registry Entries' => 'windows registry entries', + 'wisp' => 'wisp', + 'Wollok' => 'wollok', + 'World of Warcraft Addon Data' => 'world of warcraft addon data', + 'X BitMap' => 'x bitmap', + 'X Font Directory Index' => 'x font directory index', + 'X PixMap' => 'x pixmap', + 'X10' => 'x10', + 'xBase' => 'xbase', + 'XC' => 'xc', + 'XCompose' => 'xcompose', + 'XML' => 'xml', + 'XML Property List' => 'xml property list', + 'Xojo' => 'xojo', + 'XPages' => 'xpages', + 'XProc' => 'xproc', + 'XQuery' => 'xquery', + 'XS' => 'xs', + 'XSLT' => 'xslt', + 'Xtend' => 'xtend', + 'Yacc' => 'yacc', + 'YAML' => 'yaml', + 'YANG' => 'yang', + 'YARA' => 'yara', + 'YASnippet' => 'yasnippet', + 'ZAP' => 'zap', + 'Zeek' => 'zeek', + 'ZenScript' => 'zenscript', + 'Zephir' => 'zephir', + 'Zig' => 'zig', + 'ZIL' => 'zil', + 'Zimpl' => 'zimpl', + ), + 'defaultValue' => 'All languages' + ) + ), + + 'global' => array( + 'date_range' => array( + 'name' => 'Date range', + 'type' => 'list', + 'required' => false, + 'values' => array( + 'Today' => 'today', + 'Weekly' => 'weekly', + 'Monthly' => 'monthly', + ), + 'defaultValue' => 'today' + ) + ) + + ); + + public function collectData(){ + $params = array('since' => urlencode($this->getInput('date_range'))); + $url = self::URI . '/' . $this->getInput('language') . '?' . http_build_query($params); + + $html = getSimpleHTMLDOM($url) + or returnServerError('Error while downloading the website content'); + + $this->items = array(); + foreach($html->find('.Box-row') as $element) { + $item = array(); + + // URI + $item['uri'] = self::URI_ITEM . $element->find('h1 a', 0)->href; + + // Title + $item['title'] = str_replace(' ', '', trim(strip_tags($element->find('h1 a', 0)->plaintext))); + + // Description + $item['content'] = trim(strip_tags($element->find('p.text-gray', 0)->innertext)); + + // Time + $item['timestamp'] = time(); + + // TODO: Proxy? + $this->items[] = $item; + } + } + + public function getName(){ + if($this->getInput('language') == '') { + return self::NAME . ': all'; + } elseif (!is_null($this->getInput('language'))) { + return self::NAME . ': ' . $this->getInput('language'); + } + + return parent::getName(); + } +} diff --git a/bridges/GizmodoBridge.php b/bridges/GizmodoBridge.php index 35f162b8..4b924a22 100644 --- a/bridges/GizmodoBridge.php +++ b/bridges/GizmodoBridge.php @@ -3,34 +3,78 @@ class GizmodoBridge extends FeedExpander { const MAINTAINER = 'polopollo'; const NAME = 'Gizmodo'; - const URI = 'http://gizmodo.com/'; + const URI = 'https://gizmodo.com'; const CACHE_TIMEOUT = 1800; // 30min - const DESCRIPTION = 'Returns the newest posts from Gizmodo (full text).'; + const DESCRIPTION = 'Returns the newest posts from Gizmodo.'; - protected function parseItem($item){ + protected function parseItem($item) { $item = parent::parseItem($item); - $articleHTMLContent = getSimpleHTMLDOMCached($item['uri']); - if(!$articleHTMLContent) { - $text = 'Could not load ' . $item['uri']; - } else { - $text = $articleHTMLContent->find('div.entry-content', 0)->innertext; - foreach($articleHTMLContent->find('pagespeed_iframe') as $element) { - $text .= '

link to a iframe (could be a video): ' - . $element->src - . '


'; - } + $html = getSimpleHTMLDOMCached($item['uri']) + or returnServerError('Could not request: ' . $item['uri']); - $text = strip_tags($text, '

'); - } + $html = defaultLinkTo($html, $this->getURI()); + $this->stripTags($html); + $this->handleFigureTags($html); + $this->handleIframeTags($html); + + // Get header image + $image = $html->find('meta[property="og:image"]', 0)->content; + + $item['content'] = $html->find('div.js_post-content', 0)->innertext; + + // Get categories + $categories = explode(',', $html->find('meta[name="keywords"]', 0)->content); + $item['categories'] = array_map('trim', $categories); + + $item['enclosures'][] = $html->find('meta[property="og:image"]', 0)->content; - $item['content'] = $text; return $item; } - public function collectData(){ - $this->collectExpandableDatas('http://feeds.gawker.com/gizmodo/full'); + public function collectData() { + $this->collectExpandableDatas(self::URI . '/rss', 20); + } + + private function stripTags($html) { + foreach ($html->find('aside') as $aside) { + $aside->outertext = ''; + } + + foreach ($html->find('div.ad-unit') as $div) { + $div->outertext = ''; + } + + foreach ($html->find('script') as $script) { + $script->outertext = ''; + } + } + + private function handleFigureTags($html) { + foreach ($html->find('figure') as $index => $figure) { + + if (isset($figure->attr['data-id'])) { + $id = $figure->attr['data-id']; + $format = $figure->attr['data-format']; + + } else { + $img = $figure->find('img', 0); + $id = $img->attr['data-chomp-id']; + $format = $img->attr['data-format']; + $figure->find('div.img-permalink-sub-wrapper', 0)->style = ''; + } + + $imageUrl = 'https://i.kinja-img.com/gawker-media/image/upload/' . $id . '.' . $format; + + $figure->find('span', 0)->outertext = << +EOD; + } + } + + private function handleIframeTags($html) { + foreach($html->find('iframe') as $iframe) { + $iframe->src = urljoin($this->getURI(), $iframe->src); + } } } diff --git a/bridges/GoComicsBridge.php b/bridges/GoComicsBridge.php index 3223d19a..7512d84a 100644 --- a/bridges/GoComicsBridge.php +++ b/bridges/GoComicsBridge.php @@ -28,7 +28,7 @@ class GoComicsBridge extends BridgeAbstract { $page = getSimpleHTMLDOM($link) or returnServerError('Could not request GoComics: ' . $link); - $imagelink = $page->find('.img-fluid', 1)->src; + $imagelink = $page->find('.comic.container', 0)->getAttribute('data-image'); $date = explode('/', $link); $item['id'] = $imagelink; diff --git a/bridges/GoogleSearchBridge.php b/bridges/GoogleSearchBridge.php index e02aaeba..10f0f121 100644 --- a/bridges/GoogleSearchBridge.php +++ b/bridges/GoogleSearchBridge.php @@ -35,16 +35,10 @@ class GoogleSearchBridge extends BridgeAbstract { $item = array(); - // Extract direct URL from google href (eg. /url?q=...) $t = $element->find('a[href]', 0)->href; - $item['uri'] = '' . $t; - parse_str(parse_url($t, PHP_URL_QUERY), $parameters); - if(isset($parameters['q'])) { - $item['uri'] = $parameters['q']; - } - + $item['uri'] = htmlspecialchars_decode($t); $item['title'] = $element->find('h3', 0)->plaintext; - $item['content'] = $element->find('span[class=st]', 0)->plaintext; + $item['content'] = $element->find('span[class=aCOpRe]', 0)->plaintext; $this->items[] = $item; } diff --git a/bridges/HDWallpapersBridge.php b/bridges/HDWallpapersBridge.php index 16c08e75..ffb99425 100644 --- a/bridges/HDWallpapersBridge.php +++ b/bridges/HDWallpapersBridge.php @@ -32,7 +32,7 @@ class HDWallpapersBridge extends BridgeAbstract { $lastpage = 1; for($page = 1; $page <= $lastpage; $page++) { - $link = self::URI . '/' . $category . '/page/' . $page; + $link = self::URI . $category . '/page/' . $page; $html = getSimpleHTMLDOM($link) or returnServerError('No results for this query.'); @@ -41,13 +41,16 @@ class HDWallpapersBridge extends BridgeAbstract { $lastpage = min($matches[1], ceil($max / 14)); } + $html = defaultLinkTo($html, self::URI); + foreach($html->find('.wallpapers .wall a') as $element) { $thumbnail = $element->find('img', 0); + $search = array(self::URI, 'wallpapers.html'); + $replace = array(self::URI . 'download/', $this->getInput('r') . '.jpg'); + $item = array(); - $item['uri'] = self::URI - . '/download' - . str_replace('wallpapers.html', $this->getInput('r') . '.jpg', $element->href); + $item['uri'] = str_replace($search, $replace, $element->href); $item['timestamp'] = time(); $item['title'] = $element->find('em1', 0)->text(); @@ -55,7 +58,6 @@ class HDWallpapersBridge extends BridgeAbstract { . '
'; diff --git a/bridges/HeiseBridge.php b/bridges/HeiseBridge.php index 1d9d8025..fd72fbb6 100644 --- a/bridges/HeiseBridge.php +++ b/bridges/HeiseBridge.php @@ -40,18 +40,15 @@ class HeiseBridge extends FeedExpander { protected function parseItem($feedItem) { $item = parent::parseItem($feedItem); - $uri = $item['uri']; + $uri = $item['uri'] . '&seite=all'; - do { - $article = getSimpleHTMLDOMCached($uri) - or returnServerError('Could not open article: ' . $uri); + $article = getSimpleHTMLDOMCached($uri) + or returnServerError('Could not open article: ' . $uri); + if ($article) { $article = defaultLinkTo($article, $uri); $item = $this->addArticleToItem($item, $article); - - if($next = $article->find('.pagination a[rel="next"]', 0)) - $uri = $next->href; - } while ($next); + } return $item; } @@ -62,6 +59,9 @@ class HeiseBridge extends FeedExpander { $content = $article->find('div[class*="article-content"]', 0); + if ($content == null) + $content = $article->find('#article_content', 0); + foreach($content->find('p, h3, ul, table, pre, img') as $element) { $item['content'] .= $element; } diff --git a/bridges/IGNBridge.php b/bridges/IGNBridge.php index 6a254b37..ef5088f2 100644 --- a/bridges/IGNBridge.php +++ b/bridges/IGNBridge.php @@ -19,6 +19,27 @@ class IGNBridge extends FeedExpander { // $articlePage gets the entire page's contents $articlePage = getSimpleHTMLDOM($newsItem->link); + // List of BS elements + $uselessElements = array( + '.wiki-page-tools', + '.feedback-container', + '.paging-container', + '.dropdown-wrapper', + '.mw-editsection', + '.jsx-4115608983', + '.jsx-4213937408', + '.commerce-container', + '.widget-container', + '.newsletter-signup-button' + ); + + // Remove useless elements + foreach($uselessElements as $uslElement) { + foreach($articlePage->find($uslElement) as $jsWidget) { + $jsWidget->remove(); + } + } + /* * NOTE: Though articles and wiki/howtos have seperate styles of pages, there is no mechanism * for handling them seperately as it just ignores the DOM querys which it does not find. @@ -33,19 +54,8 @@ class IGNBridge extends FeedExpander { } // For Wikis and HowTos - $uselessWikiElements = array( - '.wiki-page-tools', - '.feedback-container', - '.paging-container' - ); foreach($articlePage->find('.wiki-page') as $wikiContents) { - $copy = clone $wikiContents; - // Remove useless elements present in IGN wiki/howtos - foreach($uselessWikiElements as $uslElement) { - $toRemove = $wikiContents->find($uslElement, 0); - $copy = str_replace($toRemove, '', $copy); - } - $article = $article . $copy; + $article = $article . $wikiContents; } // Add content to feed diff --git a/bridges/IKWYDBridge.php b/bridges/IKWYDBridge.php new file mode 100644 index 00000000..b24ac75c --- /dev/null +++ b/bridges/IKWYDBridge.php @@ -0,0 +1,114 @@ + array( + 'name' => 'IP Address', + 'required' => true + ), + 'update' => array( + 'name' => 'Update last seen', + 'type' => 'checkbox', + 'title' => 'Update timestamp every time "last seen" changes' + ) + ) + ); + private $name; + private $uri; + + public function detectParameters($url) { + $params = array(); + + $regex = '/^(https?:\/\/)?iknowwhatyoudownload\.com\/'; + $regex .= '(?:en|ru)\/peer\/\?ip=(\d+\.\d+\.\d+\.\d+)/'; + if(preg_match($regex, $url, $matches) > 0) { + $params['ip'] = urldecode($matches[2]); + return $params; + } + + $regex = '/^(https?:\/\/)?iknowwhatyoudownload\.com\/'; + $regex .= '(?:(?:en|ru)\/peer\/)?/'; + if(preg_match($regex, $url, $matches) > 0) { + $params['ip'] = $_SERVER['REMOTE_ADDR']; + return $params; + } + + return null; + } + + public function getName() { + if($this->name) { + return $this->name; + } else { + return self::NAME; + } + } + + public function getURI() { + if($this->uri) { + return $this->uri; + } else { + return self::URI; + } + } + + public function collectData() { + $ip = $this->getInput('ip'); + $root = self::URI . 'en/peer/?ip=' . $ip; + $html = getSimpleHTMLDOM($root) + or returnServerError('Could not request ' . self::URI); + + $this->name = 'IKWYD: ' . $ip; + $this->uri = $root; + + foreach($html->find('.table > tbody > tr') as $download) { + $download = defaultLinkTo($download, self::URI); + $firstSeen = $download->find('.date-column', + 0)->innertext; + $lastSeen = $download->find('.date-column', + 1)->innertext; + $category = $download->find('.category-column', + 0)->innertext; + $torlink = $download->find('.name-column > div > a', + 0); + $tortitle = strip_tags($torlink); + $size = $download->find('td', 4)->innertext; + $title = $tortitle; + $author = $ip; + + if($this->getInput('update')) { + $timestamp = strtotime($lastSeen); + } else { + $timestamp = strtotime($firstSeen); + } + + $uri = $torlink->href; + + $content = 'IP address: '; + $content .= $ip . '
'; + $content .= 'First seen: ' . $firstSeen . '
'; + $content .= ($this->getInput('update') ? 'Last seen: ' . + $lastSeen . '
' : ''); + $content .= ($category ? 'Category: ' . + $category . '
' : ''); + $content .= 'Title: ' . $torlink . '
'; + $content .= 'Size: ' . $size; + + $item = array(); + $item['uri'] = $uri; + $item['title'] = $title; + $item['author'] = $author; + $item['timestamp'] = $timestamp; + $item['content'] = $content; + if($category) { + $item['categories'] = array($category); + } + $this->items[] = $item; + } + } +} diff --git a/bridges/InstagramBridge.php b/bridges/InstagramBridge.php index 679c4c0e..bf2999b4 100644 --- a/bridges/InstagramBridge.php +++ b/bridges/InstagramBridge.php @@ -1,7 +1,7 @@ users as $user) { - if($user->user->username === $username) { + if(strtolower($user->user->username) === strtolower($username)) { $key = $user->user->pk; } } @@ -123,31 +123,33 @@ class InstagramBridge extends BridgeAbstract { $item['title'] = substr($item['title'], 0, $titleLinePos) . '...'; } + if($directLink) { + $mediaURI = $media->display_url; + } else { + $mediaURI = self::URI . 'p/' . $media->shortcode . '/media?size=l'; + } + switch($media->__typename) { case 'GraphSidecar': - $data = $this->getInstagramSidecarData($item['uri'], $item['title']); + $data = $this->getInstagramSidecarData($item['uri'], $item['title'], $media, $textContent); $item['content'] = $data[0]; $item['enclosures'] = $data[1]; break; case 'GraphImage': - if($directLink) { - $mediaURI = $media->display_url; - } else { - $mediaURI = self::URI . 'p/' . $media->shortcode . '/media?size=l'; - } $item['content'] = ''; $item['content'] .= '' . $item['title'] . ''; $item['content'] .= '

' . nl2br(htmlentities($textContent)); $item['enclosures'] = array($mediaURI); break; case 'GraphVideo': - $data = $this->getInstagramVideoData($item['uri']); + $data = $this->getInstagramVideoData($item['uri'], $mediaURI, $media, $textContent); $item['content'] = $data[0]; if($directLink) { $item['enclosures'] = $data[1]; } else { - $item['enclosures'] = array(self::URI . 'p/' . $media->shortcode . '/media?size=l'); + $item['enclosures'] = array($mediaURI); } + $item['thumbnail'] = $mediaURI; break; default: break; } @@ -158,11 +160,7 @@ class InstagramBridge extends BridgeAbstract { } // returns Sidecar(a post which has multiple media)'s contents and enclosures - protected function getInstagramSidecarData($uri, $postTitle) { - $mediaInfo = $this->getSinglePostData($uri); - - $textContent = $this->getTextContent($mediaInfo); - + protected function getInstagramSidecarData($uri, $postTitle, $mediaInfo, $textContent) { $enclosures = array(); $content = ''; foreach($mediaInfo->edge_sidecar_to_children->edges as $singleMedia) { @@ -185,11 +183,11 @@ class InstagramBridge extends BridgeAbstract { } // returns Video post's contents and enclosures - protected function getInstagramVideoData($uri) { - $mediaInfo = $this->getSinglePostData($uri); - - $textContent = $this->getTextContent($mediaInfo); - $content = '
'; + protected function getInstagramVideoData($uri, $mediaURI, $mediaInfo, $textContent) { + $content = '
'; $content .= '
' . nl2br(htmlentities($textContent)); return array($content, array($mediaInfo->video_url)); diff --git a/bridges/ItchioBridge.php b/bridges/ItchioBridge.php new file mode 100644 index 00000000..6ba724d2 --- /dev/null +++ b/bridges/ItchioBridge.php @@ -0,0 +1,46 @@ + array( + 'name' => 'Product URL', + 'exampleValue' => 'https://remedybg.itch.io/remedybg', + 'required' => true, + ) + )); + const CACHE_TIMEOUT = 21600; // 6 hours + + public function collectData() { + $url = $this->getInput('url'); + $html = getSimpleHTMLDOM($url) + or returnServerError('Could not request: ' . $url); + + $title = $html->find('.game_title', 0)->innertext; + $timestampOriginal = $html->find('span.icon-stopwatch', 0)->parent()->title; + $timestampFormatted = str_replace('@', '', $timestampOriginal); + + $content = 'The following files are available to download:
'; + foreach ($html->find('div.upload') as $element) { + $filename = $element->find('strong.name', 0)->innertext; + $filesize = $element->find('span.file_size', 0)->first_child()->innertext; + $content = $content . $filename . ' (' . $filesize . ')
'; + } + + // NOTE: At the time of writing it is not clear under which conditions + // itch updates the timestamp. In case they don't always update it, + // we include the file list as well when computing the UID hash. + $uidContent = $timestampFormatted . $content; + + $item = array(); + $item['uri'] = $url; + $item['uid'] = $uidContent; + $item['title'] = 'New release for ' . $title; + $item['content'] = $content; + $item['timestamp'] = $timestampFormatted; + $this->items[] = $item; + } +} diff --git a/bridges/JustETFBridge.php b/bridges/JustETFBridge.php index 8d5b3d5a..746f1c97 100644 --- a/bridges/JustETFBridge.php +++ b/bridges/JustETFBridge.php @@ -347,5 +347,6 @@ class JustETFBridge extends BridgeAbstract { return $element->plaintext; } + #endregion } diff --git a/bridges/KernelBugTrackerBridge.php b/bridges/KernelBugTrackerBridge.php index d617b80f..81321966 100644 --- a/bridges/KernelBugTrackerBridge.php +++ b/bridges/KernelBugTrackerBridge.php @@ -61,6 +61,8 @@ class KernelBugTrackerBridge extends BridgeAbstract { if($html === false) returnServerError('Failed to load page!'); + $html = defaultLinkTo($html, self::URI); + // Store header information into private members $this->bugid = $html->find('#bugzilla-body', 0)->find('a', 0)->innertext; $this->bugdesc = $html->find('table.bugfields', 0)->find('tr', 0)->find('td', 0)->innertext; @@ -93,7 +95,7 @@ class KernelBugTrackerBridge extends BridgeAbstract { $item['content'] = str_replace("\n", '
', $item['content']); // Fix relative URIs - $item['content'] = $this->replaceRelativeURI($item['content']); + $item['content'] = $item['content']; $this->items[] = $item; } @@ -125,17 +127,6 @@ class KernelBugTrackerBridge extends BridgeAbstract { } } - /** - * Replaces all relative URIs with absolute ones - * - * @param string $content The source string - * @return string Returns the source string with all relative URIs replaced - * by absolute ones. - */ - private function replaceRelativeURI($content){ - return preg_replace('/href="(?!http)/', 'href="' . self::URI . '/', $content); - } - /** * Adds styles as attributes to tags with known classes * diff --git a/bridges/KoreusBridge.php b/bridges/KoreusBridge.php index a5e09cbd..4cfb8c21 100644 --- a/bridges/KoreusBridge.php +++ b/bridges/KoreusBridge.php @@ -3,7 +3,7 @@ class KoreusBridge extends FeedExpander { const MAINTAINER = 'pit-fgfjiudghdf'; const NAME = 'Koreus'; - const URI = 'http://www.koreus.com/'; + const URI = 'https://www.koreus.com/'; const DESCRIPTION = 'Returns the newest posts from Koreus (full text)'; protected function parseItem($item){ @@ -17,6 +17,6 @@ class KoreusBridge extends FeedExpander { } public function collectData(){ - $this->collectExpandableDatas('http://feeds.feedburner.com/Koreus-articles'); + $this->collectExpandableDatas('https://feeds.feedburner.com/Koreus-articles'); } } diff --git a/bridges/LeMondeInformatiqueBridge.php b/bridges/LeMondeInformatiqueBridge.php index 45aa6075..b85a9631 100644 --- a/bridges/LeMondeInformatiqueBridge.php +++ b/bridges/LeMondeInformatiqueBridge.php @@ -26,8 +26,8 @@ class LeMondeInformatiqueBridge extends FeedExpander { //No response header sets the encoding, explicit conversion is needed or subsequent xml_encode() will fail $content_node = $article_html->find('div.col-primary, div.col-sm-9', 0); - $item['content'] = utf8_encode($this->cleanArticle($content_node->innertext)); - $item['author'] = utf8_encode($article_html->find('div.author-infos', 0)->find('b', 0)->plaintext); + $item['content'] = $this->cleanArticle($content_node->innertext); + $item['author'] = $article_html->find('div.author-infos', 0)->find('b', 0)->plaintext; return $item; } diff --git a/bridges/LesJoiesDuCodeBridge.php b/bridges/LesJoiesDuCodeBridge.php index 0957d921..c79b1114 100644 --- a/bridges/LesJoiesDuCodeBridge.php +++ b/bridges/LesJoiesDuCodeBridge.php @@ -11,7 +11,7 @@ class LesJoiesDuCodeBridge extends BridgeAbstract { $html = getSimpleHTMLDOM(self::URI) or returnServerError('Could not request LesJoiesDuCode.'); - foreach($html->find('div.blog-post') as $element) { + foreach($html->find('article.blog-post') as $element) { $item = array(); $temp = $element->find('h1 a', 0); $titre = html_entity_decode($temp->innertext); diff --git a/bridges/MallTvBridge.php b/bridges/MallTvBridge.php new file mode 100644 index 00000000..34b38e84 --- /dev/null +++ b/bridges/MallTvBridge.php @@ -0,0 +1,73 @@ + array( + 'name' => 'url to the show', + 'required' => true, + 'exampleValue' => 'https://www.mall.tv/zivot-je-hra' + ) + ) + ); + + private function fixChars($text) { + return html_entity_decode($text, ENT_QUOTES, 'UTF-8'); + } + + private function getUploadTimeFromUrl($url) { + $html = getSimpleHTMLDOM($url) + or returnServerError('Could not request MALL.TV detail page'); + + $scriptLdJson = $html->find('script[type="application/ld+json"]', 0)->innertext; + if (!preg_match('/[\'"]uploadDate[\'"]\s*:\s*[\'"](\d{4}-\d{2}-\d{2})[\'"]/', $scriptLdJson, $match)) { + returnServerError('Could not get date from MALL.TV detail page'); + } + + return strtotime($match[1]); + } + + public function collectData() { + $url = $this->getInput('url'); + + if (!preg_match('/^https:\/\/www\.mall\.tv\/[a-z0-9-]+(\/[a-z0-9-]+)?\/?$/', $url)) { + returnServerError('Invalid url'); + } + + $html = getSimpleHTMLDOM($url) + or returnServerError('Could not request MALL.TV'); + + $this->feedUri = $url; + $this->feedName = $this->fixChars($html->find('title', 0)->plaintext); + + foreach ($html->find('section.isVideo .video-card') as $element) { + $itemTitle = $element->find('.video-card__details-link', 0); + $itemThumbnail = $element->find('.video-card__thumbnail', 0); + $itemUri = self::URI . $itemTitle->getAttribute('href'); + + $item = array( + 'title' => $this->fixChars($itemTitle->plaintext), + 'uri' => $itemUri, + 'content' => '', + 'timestamp' => $this->getUploadTimeFromUrl($itemUri) + ); + + $this->items[] = $item; + } + } + + public function getURI() { + return isset($this->feedUri) ? $this->feedUri : parent::getURI(); + } + + public function getName() { + return isset($this->feedName) ? $this->feedName : parent::getName(); + } +} diff --git a/bridges/MarktplaatsBridge.php b/bridges/MarktplaatsBridge.php new file mode 100644 index 00000000..ada65920 --- /dev/null +++ b/bridges/MarktplaatsBridge.php @@ -0,0 +1,127 @@ + array( + 'q' => array( + 'name' => 'query', + 'type' => 'text', + 'required' => true, + 'title' => 'The search string for marktplaats', + ), + 'z' => array( + 'name' => 'zipcode', + 'type' => 'text', + 'required' => false, + 'title' => 'Zip code for location limited searches', + ), + 'd' => array( + 'name' => 'distance', + 'type' => 'number', + 'required' => false, + 'title' => 'The distance in meters from the zipcode', + ), + 'f' => array( + 'name' => 'priceFrom', + 'type' => 'number', + 'required' => false, + 'title' => 'The minimal price in cents', + ), + 't' => array( + 'name' => 'priceTo', + 'type' => 'number', + 'required' => false, + 'title' => 'The maximal price in cents', + ), + 's' => array( + 'name' => 'showGlobal', + 'type' => 'checkbox', + 'required' => false, + 'title' => 'Include result with negative distance', + ), + 'i' => array( + 'name' => 'includeImage', + 'type' => 'checkbox', + 'required' => false, + 'title' => 'Include the image at the end of the content', + ), + 'r' => array( + 'name' => 'includeRaw', + 'type' => 'checkbox', + 'required' => false, + 'title' => 'Include the raw data behind the content', + ) + ) + ); + const CACHE_TIMEOUT = 900; + + public function collectData() { + $query = ''; + $excludeGlobal = false; + if(!is_null($this->getInput('z')) && !is_null($this->getInput('d'))) { + $query = '&postcode=' . $this->getInput('z') . '&distanceMeters=' . $this->getInput('d'); + } + if(!is_null($this->getInput('f'))) { + $query .= '&PriceCentsFrom=' . $this->getInput('f'); + } + if(!is_null($this->getInput('t'))) { + $query .= '&PriceCentsTo=' . $this->getInput('t'); + } + if(!is_null($this->getInput('s'))) { + if(!$this->getInput('s')) { + $excludeGlobal = true; + } + } + $url = 'https://www.marktplaats.nl/lrp/api/search?query=' . urlencode($this->getInput('q')) . $query; + $jsonString = getSimpleHTMLDOM($url, 900) or returnServerError('No contents received!'); + $jsonObj = json_decode($jsonString); + foreach($jsonObj->listings as $listing) { + if(!$excludeGlobal || $listing->location->distanceMeters >= 0) { + $item = array(); + $item['uri'] = 'https://marktplaats.nl' . $listing->vipUrl; + $item['title'] = $listing->title; + $item['timestamp'] = $listing->date; + $item['author'] = $listing->sellerInformation->sellerName; + $item['content'] = $listing->description; + $item['categories'] = $listing->verticals; + $item['uid'] = $listing->itemId; + if(!is_null($this->getInput('i')) && !empty($listing->imageUrls)) { + $item['enclosures'] = $listing->imageUrls; + if(is_array($listing->imageUrls)) { + foreach($listing->imageUrls as $imgurl) { + $item['content'] .= "
\n"; + } + } else { + $item['content'] .= "
\n"; + } + } + if(!is_null($this->getInput('r'))) { + if($this->getInput('r')) { + $item['content'] .= "
\n
\n
\n" . json_encode($listing); + } + } + $item['content'] .= "
\n
\nPrice: " . $listing->priceInfo->priceCents / 100; + $item['content'] .= '  (' . $listing->priceInfo->priceType . ')'; + if(!empty($listing->location->cityName)) { + $item['content'] .= "

\n" . $listing->location->cityName; + } + if(!is_null($this->getInput('r'))) { + if($this->getInput('r')) { + $item['content'] .= "
\n
\n
\n" . json_encode($listing); + } + } + $this->items[] = $item; + } + } + } + + public function getName(){ + if(!is_null($this->getInput('q'))) { + return $this->getInput('q') . ' - Marktplaats'; + } + return parent::getName(); + } +} diff --git a/bridges/MastodonBridge.php b/bridges/MastodonBridge.php index 9e131b7d..de5e41f4 100644 --- a/bridges/MastodonBridge.php +++ b/bridges/MastodonBridge.php @@ -78,7 +78,7 @@ class MastodonBridge extends FeedExpander { public function getURI(){ if($this->getInput('canusername')) - return 'https://' . $this->getInstance() . '/users/' . $this->getUsername() . '.atom'; + return 'https://' . $this->getInstance() . '/@' . $this->getUsername() . '.rss'; return parent::getURI(); } diff --git a/bridges/MediapartBlogsBridge.php b/bridges/MediapartBlogsBridge.php new file mode 100644 index 00000000..40ae1f90 --- /dev/null +++ b/bridges/MediapartBlogsBridge.php @@ -0,0 +1,48 @@ + array( + 'name' => 'Blog Slug', + 'type' => 'text', + 'title' => 'Blog user name', + 'exampleValue' => 'jean-vincot', + ) + ) + ); + + public function getIcon() { + return 'https://static.mediapart.fr/favicon/favicon-club.ico?v=2'; + } + + public function collectData() { + $html = getSimpleHTMLDOM(self::BASE_URI . '/' . $this->getInput('slug') . '/blog') + or returnServerError('Could not load content'); + + foreach($html->find('ul.post-list li') as $element) { + $item = array(); + + $item_title = $element->find('h3.title a', 0); + $item_divs = $element->find('div'); + + $item['title'] = $item_title->innertext; + $item['uri'] = self::BASE_URI . trim($item_title->href); + $item['author'] = $element->find('.author .subscriber', 0)->innertext; + $item['content'] = $item_divs[count($item_divs) - 2] . $item_divs[count($item_divs) - 1]; + $item['timestamp'] = strtotime($element->find('.author time', 0)->datetime); + + $this->items[] = $item; + } + } + + public function getName() { + if ($this->getInput('slug')) { + return self::NAME . ' | ' . $this->getInput('slug'); + } + return parent::getName(); + } +} diff --git a/bridges/MondeDiploBridge.php b/bridges/MondeDiploBridge.php index 85f771e3..cff84967 100644 --- a/bridges/MondeDiploBridge.php +++ b/bridges/MondeDiploBridge.php @@ -3,22 +3,26 @@ class MondeDiploBridge extends BridgeAbstract { const MAINTAINER = 'Pitchoule'; const NAME = 'Monde Diplomatique'; - const URI = 'http://www.monde-diplomatique.fr/'; + const URI = 'https://www.monde-diplomatique.fr'; const CACHE_TIMEOUT = 21600; //6h const DESCRIPTION = 'Returns most recent results from MondeDiplo.'; + private function cleanText($text) { + return trim(str_replace(array(' ', ' '), ' ', $text)); + } + public function collectData(){ $html = getSimpleHTMLDOM(self::URI) or returnServerError('Could not request MondeDiplo. for : ' . self::URI); foreach($html->find('div.unarticle') as $article) { $element = $article->parent(); + $title = $element->find('h3', 0)->plaintext; + $datesAuteurs = $element->find('div.dates_auteurs', 0)->plaintext; $item = array(); $item['uri'] = self::URI . $element->href; - $item['title'] = $element->find('h3', 0)->plaintext; - $item['content'] = $element->find('div.dates_auteurs', 0)->plaintext - . '
' - . strstr($element->find('div', 0)->plaintext, $element->find('div.dates_auteurs', 0)->plaintext, true); + $item['title'] = $this->cleanText($title) . ' - ' . $this->cleanText($datesAuteurs); + $item['content'] = $this->cleanText(str_replace(array($title, $datesAuteurs), '', $element->plaintext)); $this->items[] = $item; } diff --git a/bridges/MozillaBugTrackerBridge.php b/bridges/MozillaBugTrackerBridge.php index 356bedcf..439e148d 100644 --- a/bridges/MozillaBugTrackerBridge.php +++ b/bridges/MozillaBugTrackerBridge.php @@ -61,43 +61,44 @@ class MozillaBugTrackerBridge extends BridgeAbstract { if($html === false) returnServerError('Failed to load page!'); + // Fix relative URLs + defaultLinkTo($html, self::URI); + // Store header information into private members - $this->bugid = $html->find('#bugzilla-body', 0)->find('a', 0)->innertext; - $this->bugdesc = $html->find('table.bugfields', 0)->find('tr', 0)->find('td', 0)->innertext; + $this->bugid = $html->find('#field-value-bug_id', 0)->plaintext; + $this->bugdesc = $html->find('h1#field-value-short_desc', 0)->plaintext; // Get and limit comments - $comments = $html->find('.bz_comment_table div.bz_comment'); + $comments = $html->find('div.change-set'); if($limit > 0 && count($comments) > $limit) { $comments = array_slice($comments, count($comments) - $limit, $limit); } - // Order comments - switch($sorting) { - case 'lf': $comments = array_reverse($comments, true); - case 'of': - default: // Nothing to do, keep original order + if ($sorting === 'lf') { + $comments = array_reverse($comments, true); } foreach($comments as $comment) { $comment = $this->inlineStyles($comment); $item = array(); - $item['uri'] = $this->getURI() . '#' . $comment->id; - $item['author'] = $comment->find('span.bz_comment_user', 0)->innertext; - $item['title'] = $comment->find('span.bz_comment_number', 0)->find('a', 0)->innertext; - $item['timestamp'] = strtotime($comment->find('span.bz_comment_time', 0)->innertext); - $item['content'] = $comment->find('pre.bz_comment_text', 0)->innertext; + $item['uri'] = $comment->find('h3.change-name', 0)->find('a', 0)->href; + $item['author'] = $comment->find('td.change-author', 0)->plaintext; + $item['title'] = $comment->find('h3.change-name', 0)->plaintext; + $item['timestamp'] = strtotime($comment->find('span.rel-time', 0)->title); + $item['content'] = ''; - // Fix line breaks (they use LF) - $item['content'] = str_replace("\n", '
', $item['content']); + if ($comment->find('.comment-text', 0)) { + $item['content'] = $comment->find('.comment-text', 0)->outertext; + } - // Fix relative URIs - $item['content'] = $this->replaceRelativeURI($item['content']); + if ($comment->find('div.activity', 0)) { + $item['content'] .= $comment->find('div.activity', 0)->innertext; + } $this->items[] = $item; } - } public function getURI(){ @@ -114,9 +115,8 @@ class MozillaBugTrackerBridge extends BridgeAbstract { public function getName(){ switch($this->queriedContext) { case 'Bug comments': - return 'Bug ' - . $this->bugid - . ' tracker for ' + return $this->bugid + . ' - ' . $this->bugdesc . ' - ' . parent::getName(); @@ -125,17 +125,6 @@ class MozillaBugTrackerBridge extends BridgeAbstract { } } - /** - * Replaces all relative URIs with absolute ones - * - * @param string $content The source string - * @return string Returns the source string with all relative URIs replaced - * by absolute ones. - */ - private function replaceRelativeURI($content){ - return preg_replace('/href="(?!http)/', 'href="' . self::URI . '/', $content); - } - /** * Adds styles as attributes to tags with known classes * @@ -144,10 +133,14 @@ class MozillaBugTrackerBridge extends BridgeAbstract { * attributes. */ private function inlineStyles($html){ - foreach($html->find('.bz_obsolete') as $element) { + foreach($html->find('.bz_closed') as $element) { $element->style = 'text-decoration:line-through;'; } + foreach($html->find('pre') as $element) { + $element->style = 'white-space: pre-wrap;'; + } + return $html; } } diff --git a/bridges/MozillaSecurityBridge.php b/bridges/MozillaSecurityBridge.php index 52672f56..1e7dc316 100644 --- a/bridges/MozillaSecurityBridge.php +++ b/bridges/MozillaSecurityBridge.php @@ -15,7 +15,7 @@ class MozillaSecurityBridge extends BridgeAbstract { $html = defaultLinkTo($html, self::WEBROOT); $item = array(); - $articles = $html->find('div[itemprop="articleBody"] h2'); + $articles = $html->find('div[id="main-content"] h2'); foreach ($articles as $element) { $item['title'] = $element->innertext; diff --git a/bridges/NasaApodBridge.php b/bridges/NasaApodBridge.php index 8e293e0f..6e2674f0 100644 --- a/bridges/NasaApodBridge.php +++ b/bridges/NasaApodBridge.php @@ -12,10 +12,8 @@ class NasaApodBridge extends BridgeAbstract { $html = getSimpleHTMLDOM(self::URI . 'archivepix.html') or returnServerError('Error while downloading the website content'); - $list = explode('
', $html->find('b', 0)->innertext); - - for($i = 0; $i < 3; $i++) { - $line = $list[$i]; + // Start at 1 to skip the "APOD Full Archive" on top of the page + for($i = 1; $i < 4; $i++) { $item = array(); $uri_page = $html->find('a', $i + 3)->href; @@ -26,9 +24,14 @@ class NasaApodBridge extends BridgeAbstract { $picture_html_string = $picture_html->innertext; //Extract image and explanation - $media = $picture_html->find('p', 1)->innertext; - $media = strstr($media, '
'); - $media = preg_replace('/
/', '', $media, 1); + $image_wrapper = $picture_html->find('a', 1); + $image_path = $image_wrapper->href; + $img_placeholder = $image_wrapper->find('img', 0); + $img_alt = $img_placeholder->alt; + $img_style = $img_placeholder->style; + $image_uri = self::URI . $image_path; + $new_img_placeholder = "\"$img_alt\""; + $media = "$new_img_placeholder"; $explanation = $picture_html->find('p', 2)->innertext; //Extract date from the picture page diff --git a/bridges/NewOnNetflixBridge.php b/bridges/NewOnNetflixBridge.php new file mode 100644 index 00000000..bb35e71d --- /dev/null +++ b/bridges/NewOnNetflixBridge.php @@ -0,0 +1,59 @@ + array( + 'name' => 'Country', + 'type' => 'list', + 'values' => array( + 'Australia/New Zealand' => 'anz', + 'Canada' => 'can', + 'United Kingdom' => 'uk', + 'United States' => 'usa', + ), + 'defaultValue' => 'uk', + ) + )); + const CACHE_TIMEOUT = 3600 * 24; + + public function collectData() { + $baseURI = 'https://' . $this->getInput('country') . '.newonnetflix.info'; + $html = getSimpleHTMLDOMCached($baseURI . '/lastchance', self::CACHE_TIMEOUT) + or returnServerError('Could not request NewOnNetflix (U FAILED LOL).'); + + foreach($html->find('article.oldpost') as $element) { + $title = $element->find('a.infopop[title]', 0); + $img = $element->find('img[lazy_src]', 0); + $date = $element->find('span[title]', 0); + + // format sholud be 'dd/mm/yy - dd/mm/yy' + // (the added date might be "unknown") + $fromTo = array(); + if (preg_match('/^\s*(.*?)\s*-\s*(.*?)\s*$/', $date->title, $fromTo)) { + $from = $fromTo[1]; + $to = $fromTo[2]; + } else { + $from = 'unknown'; + $to = 'unknown'; + } + $summary = << +
{$title->title}
+
Added on:$from
+
Removed on:$to
+EOD; + + $item = array(); + $item['uri'] = $baseURI . $title->href; + $item['title'] = $to . ' - ' . $title->plaintext; + $item['content'] = $summary; + // some movies are added and removed multiple times + $item['uid'] = $title->href . '-' . $to; + $this->items[] = $item; + } + } +} diff --git a/bridges/NextInpactBridge.php b/bridges/NextInpactBridge.php index c6bf2f53..c3cca30d 100644 --- a/bridges/NextInpactBridge.php +++ b/bridges/NextInpactBridge.php @@ -1,9 +1,10 @@ 'Feed', 'type' => 'list', 'values' => array( - 'Tous nos articles' => 'news', - 'Nos contenus en accès libre' => 'acces-libre', - 'Blog' => 'blog', - 'Bons plans' => 'bonsplans' + 'Nos actualités' => array( + 'Toutes nos publications' => 'news', + 'Toutes nos publications sauf #LeBrief' => 'nobrief', + 'Toutes nos publications sauf INpact Hardware' => 'noih', + 'Seulement les publications INpact Hardware' => 'hardware:news', + 'Seulement les publications Next INpact' => 'nobrief-noih', + 'Seulement les publications #LeBrief' => 'lebrief', + ), + 'Flux spécifiques' => array( + 'Le blog' => 'blog', + 'Les bons plans' => 'bonsplans', + 'Publications INpact Hardware en accès libre' => 'hardware:acces-libre', + 'Publications Next INpact en accès libre' => 'acces-libre', + ), + 'Flux thématiques' => array( + 'Tech' => 'category:1', + 'Logiciel' => 'category:2', + 'Internet' => 'category:3', + 'Mobilité' => 'category:4', + 'Droit' => 'category:5', + 'Économie' => 'category:6', + 'Culture numérique' => 'category:7', + 'Next INpact' => 'category:8', + ) ) ), 'filter_premium' => array( @@ -39,9 +60,27 @@ class NextInpactBridge extends FeedExpander { public function collectData(){ $feed = $this->getInput('feed'); - if (empty($feed)) + $base_uri = self::URI; + $args = ''; + + if (empty($feed)) { + // Default to All articles $feed = 'news'; - $this->collectExpandableDatas(self::URI . 'rss/' . $feed . '.xml'); + } + + if (strpos($feed, 'hardware:') === 0) { + // Feed hosted on Hardware domain + $base_uri = self::URI_HARDWARE; + $feed = str_replace('hardware:', '', $feed); + } + + if (strpos($feed, 'category:') === 0) { + // Feed with specific category parameter + $args = '?CategoryIds=' . str_replace('category:', '', $feed); + $feed = 'params'; + } + + $this->collectExpandableDatas($base_uri . 'rss/' . $feed . '.xml' . $args); } protected function parseItem($newsItem){ @@ -57,9 +96,11 @@ class NextInpactBridge extends FeedExpander { if (!is_object($html)) return 'Failed to request NextInpact: ' . $url; + // Filter premium and brief articles? + $brief_selector = 'div.brief-container'; foreach(array( - 'filter_premium' => 'h2.title_reserve_article', - 'filter_brief' => 'div.brief-inner-content' + 'filter_premium' => 'p.red-msg', + 'filter_brief' => $brief_selector ) as $param_name => $selector) { $param_val = intval($this->getInput($param_name)); if ($param_val != 0) { @@ -71,38 +112,71 @@ class NextInpactBridge extends FeedExpander { } } - if (is_object($html->find('div[itemprop=articleBody], div.brief-inner-content', 0))) { + $article_content = $html->find('div.article-content', 0); + if (!is_object($article_content)) { + $article_content = $html->find('div.content', 0); + } + if (is_object($article_content)) { - $subtitle = trim($html->find('span.sub_title, div.brief-head', 0)); - if(is_object($subtitle) && $subtitle->plaintext !== $item['title']) { - $subtitle = '

' . $subtitle->plaintext . '

'; + // Subtitle + $subtitle = $html->find('small.subtitle', 0); + if(!is_object($subtitle) && !is_object($html->find($brief_selector, 0))) { + $subtitle = $html->find('small', 0); + } + if(!is_object($subtitle)) { + $content_wrapper = $html->find('div.content-wrapper', 0); + if (is_object($content_wrapper)) { + $subtitle = $content_wrapper->find('h2.title', 0); + } + } + if(is_object($subtitle) && (!isset($item['title']) || $subtitle->plaintext != $item['title'])) { + $subtitle = '

' . trim($subtitle->plaintext) . '

'; } else { $subtitle = ''; } - $postimg = $html->find( - 'div.container_main_image_article, div.image-brief-container, div.image-brief-side-container', 0 - ); + // Image + $postimg = $html->find('div.article-image, div.image-container', 0); if(is_object($postimg)) { - $postimg = '

-

'; + $postimg = $postimg->find('img', 0); + if (!empty($postimg->src)) { + $postimg = $postimg->src; + } else { + $postimg = $postimg->srcset; //"url 355w, url 1003w, url 748w" + $postimg = explode(', ', $postimg); //split by ', ' to get each url separately + $postimg = end($postimg); //Get last item: "url 748w" which is of largest size + $postimg = explode(' ', $postimg); //split by ' ' to separate url from res + $postimg = array_reverse($postimg); //reverse array content to have url last + $postimg = end($postimg); //Get last item of array: "url" + } + $postimg = '

-

'; } else { $postimg = ''; } + // Paywall + $paywall = $html->find('div.paywall-restriction', 0); + if (is_object($paywall) && is_object($paywall->find('p.red-msg', 0))) { + $paywall = '

' . $paywall->find('span.head-mention', 0)->innertext . '

'; + } else { + $paywall = ''; + } + + // Content + $article_content = $article_content->outertext; + $article_content = str_replace('>Signaler une erreur', '>', $article_content); + + // Result $text = $subtitle . $postimg - . $html->find('div[itemprop=articleBody], div.brief-inner-content', 0)->outertext; + . $article_content + . $paywall; } else { - $text = $item['content'] - . '

Failed retrieve full article content

'; - } - - $premium_article = $html->find('h2.title_reserve_article', 0); - if (is_object($premium_article)) { - $text .= '

' . $premium_article->innertext . '

'; + $text = '

Failed to retrieve full article content

'; + if (isset($item['content'])) { + $text = $item['content'] . $text; + } } return $text; diff --git a/bridges/NineGagBridge.php b/bridges/NineGagBridge.php index 939ff387..19c7e29c 100644 --- a/bridges/NineGagBridge.php +++ b/bridges/NineGagBridge.php @@ -148,7 +148,7 @@ class NineGagBridge extends BridgeAbstract { } if (!$AvoidElement) { - $item['uri'] = $post['url']; + $item['uri'] = preg_replace('/^http:/i', 'https:', $post['url']); $item['title'] = $post['title']; $item['content'] = self::getContent($post); $item['categories'] = self::getCategories($post); diff --git a/bridges/NordbayernBridge.php b/bridges/NordbayernBridge.php new file mode 100644 index 00000000..37fa3d5e --- /dev/null +++ b/bridges/NordbayernBridge.php @@ -0,0 +1,131 @@ + array( + 'name' => 'region', + 'type' => 'list', + 'exampleValue' => 'Nürnberg', + 'title' => 'Select a region', + 'values' => array( + 'Nürnberg' => 'nuernberg', + 'Fürth' => 'fuerth', + 'Altdorf' => 'altdorf', + 'Ansbach' => 'ansbach', + 'Bad Windsheim' => 'bad-windsheim', + 'Bamberg' => 'bamberg', + 'Dinkelsbühl/Feuchtwangen' => 'dinkelsbuehl-feuchtwangen', + 'Feucht' => 'feucht', + 'Forchheim' => 'forchheim', + 'Gunzenhausen' => 'gunzenhausen', + 'Hersbruck' => 'hersbruck', + 'Herzogenaurach' => 'herzogenaurach', + 'Hilpolstein' => 'holpolstein', + 'Höchstadt' => 'hoechstadt', + 'Lauf' => 'lauf', + 'Neumarkt' => 'neumarkt', + 'Neustadt/Aisch' => 'neustadt-aisch', + 'Pegnitz' => 'pegnitz', + 'Roth' => 'roth', + 'Rothenburg o.d.T.' => 'rothenburg-o-d-t', + 'Schwabach' => 'schwabach', + 'Treuchtlingen' => 'treuchtlingen', + 'Weißenburg' => 'weissenburg' + ) + ), + 'policeReports' => array( + 'name' => 'Police Reports', + 'type' => 'checkbox', + 'exampleValue' => 'checked', + 'title' => 'Read Police Reports', + ) + )); + + private function getImageUrlFromScript($script) { + preg_match( + "#src=\\\\'(https:[-:\\.\\\\/a-zA-Z0-9%_]*\\.(jpg|JPG))#", + $script->innertext, + $matches, + PREG_OFFSET_CAPTURE + ); + if(isset($matches[1][0])) { + return stripcslashes($matches[1][0]) . '?w=800'; + } + return null; + } + + private function handleArticle($link) { + $item = array(); + $article = getSimpleHTMLDOM($link); + $content = $article->find('div[class*=article-content]', 0); + $item['uri'] = $link; + $item['title'] = $article->find('h1', 0)->innertext; + $item['content'] = ''; + + //first get image from block/modul + $figure = $article->find('figure[class*=panorama]', 0); + if($figure !== null) { + $imgUrl = self::getImageUrlFromScript($figure->find('script', 0)); + if($imgUrl === null) { + $imgUrl = self::getImageUrlFromScript($figure->find('script', 1)); + } + $item['content'] .= ''; + } + + // get regular paragraphs + foreach($content->children() as $child) { + if($child->tag === 'p') { + $item['content'] .= $child; + } + } + + //get image divs + foreach($content->find('div[class*=article-slideshow]') as $slides) { + foreach($slides->children() as $child) { + switch($child->tag) { + case 'p': + $item['content'] .= $child; + break; + case 'h5': + $item['content'] .= '
' . $child->plaintext . '
'; + break; + case 'a': + $url = self::getImageUrlFromScript($child->find('script', 0)); + $item['content'] .= ''; + break; + } + } + } + $this->items[] = $item; + $article->clear(); + } + + private function handleNewsblock($listSite, $readPoliceReports) { + $newsBlocks = $listSite->find('section[class*=newsblock]'); + $regionalNewsBlock = $newsBlocks[0]; + $policeBlock = $newsBlocks[1]; + foreach($regionalNewsBlock->find('h2') as $headline) { + self::handleArticle(self::URI . $headline->find('a', 0)->href); + } + if($readPoliceReports === true) { + foreach($policeBlock->find('h2') as $headline) { + self::handleArticle(self::URI . $headline->find('a', 0)->href); + } + } + } + + public function collectData() { + $item = array(); + $region = $this->getInput('region'); + $listSite = getSimpleHTMLDOM(self::URI . '/region/' . $region); + + self::handleNewsblock($listSite, $this->getInput('policeReports')); + } +} diff --git a/bridges/NyaaTorrentsBridge.php b/bridges/NyaaTorrentsBridge.php index b40b0f95..ab00ece8 100644 --- a/bridges/NyaaTorrentsBridge.php +++ b/bridges/NyaaTorrentsBridge.php @@ -100,7 +100,9 @@ class NyaaTorrentsBridge extends BridgeAbstract { //Retrieve data from page contents $item_title = str_replace(' :: Nyaa', '', $item_html->find('title', 0)->plaintext); - $item_desc = str_get_html(markdownToHtml($item_html->find('#torrent-description', 0)->innertext)); + $item_desc = str_get_html( + markdownToHtml(html_entity_decode($item_html->find('#torrent-description', 0)->innertext)) + ); $item_author = extractFromDelimiters($item_html->outertext, 'href="/user/', '"'); $item_date = intval(extractFromDelimiters($item_html->outertext, 'data-timestamp="', '"')); diff --git a/bridges/OpenwrtSecurityBridge.php b/bridges/OpenwrtSecurityBridge.php new file mode 100644 index 00000000..7a7470fe --- /dev/null +++ b/bridges/OpenwrtSecurityBridge.php @@ -0,0 +1,37 @@ +find('div[class=plugin_nspages]', 0); + + foreach($advisories->find('a[class=wikilink1]') as $element) { + $item = array(); + + $row = $element->innertext; + + $item['title'] = substr($row, 0, strpos($row, ' - ')); + $item['timestamp'] = $this->getDate($element->href); + $item['uri'] = self::WEBROOT . $element->href; + $item['uid'] = self::WEBROOT . $element->href; + $item['content'] = substr($row, strpos($row, ' - ') + 3); + $item['author'] = 'OpenWrt Project'; + + $this->items[] = $item; + } + } + + private function getDate($href) { + $date = substr($href, -12); + return $date; + } +} diff --git a/bridges/OtrkeyFinderBridge.php b/bridges/OtrkeyFinderBridge.php new file mode 100644 index 00000000..32ce5c4f --- /dev/null +++ b/bridges/OtrkeyFinderBridge.php @@ -0,0 +1,175 @@ + array( + 'name' => 'Search term', + 'exampleValue' => 'Terminator', + 'title' => 'The search term is case-insensitive', + ), + 'station' => array( + 'name' => 'Station name', + 'exampleValue' => 'ARD', + ), + 'type' => array( + 'name' => 'Media type', + 'type' => 'list', + 'values' => array( + 'any' => '', + 'Detail' => array( + 'HD' => 'HD.avi', + 'AC3' => 'HD.ac3', + 'HD & AC3' => 'HD.', + 'HQ' => 'HQ.avi', + 'AVI' => 'g.avi', // 'g.' to exclude HD.avi and HQ.avi (filename always contains 'mpg.') + 'MP4' => '.mp4', + ), + ), + ), + 'minTime' => array( + 'name' => 'Min. running time', + 'type' => 'number', + 'title' => 'The minimum running time in minutes. The resolution is 5 minutes.', + 'exampleValue' => '90', + 'defaultValue' => '0', + ), + 'maxTime' => array( + 'name' => 'Max. running time', + 'type' => 'number', + 'title' => 'The maximum running time in minutes. The resolution is 5 minutes.', + 'exampleValue' => '120', + 'defaultValue' => '0', + ), + 'pages' => array( + 'name' => 'Number of pages', + 'type' => 'number', + 'title' => 'Specifies the number of pages to fetch. Increase this value if you get an empty feed.', + 'exampleValue' => '5', + 'defaultValue' => '5', + ), + ) + ); + // Example: Terminator_20.04.13_02-25_sf2_100_TVOON_DE.mpg.avi.otrkey + // The first group is the running time in minutes + const FILENAME_REGEX = '/_(\d+)_TVOON_DE\.mpg\..+\.otrkey/'; + // year.month.day_hour-minute with leading zeros + const TIME_REGEX = '/\d{2}\.\d{2}\.\d{2}_\d{2}-\d{2}/'; + const CONTENT_TEMPLATE = '
    %s
'; + const MIRROR_TEMPLATE = '
  • %s
  • '; + + public function collectData() { + $pages = $this->getInput('pages'); + + for($page = 1; $page <= $pages; $page++) { + $uri = $this->buildUri($page); + + $html = getSimpleHTMLDOMCached($uri, self::CACHE_TIMEOUT) + or returnServerError('Could not request ' . $uri); + + $keys = $html->find('div.otrkey'); + + foreach($keys as $key) { + $temp = $this->buildItem($key); + + if ($temp != null) + $this->items[] = $temp; + } + + // Sleep for 0.5 seconds to don't hammer the server. + usleep(500000); + } + } + + private function buildUri($page) { + $searchterm = $this->getInput('searchterm'); + $station = $this->getInput('station'); + $type = $this->getInput('type'); + + // Combine all three parts to a search query by separating them with white space + $search = implode(' ', array($searchterm, $station, $type)); + $search = trim($search); + $search = urlencode($search); + + return sprintf(self::URI_TEMPLATE, $search, $page); + } + + private function buildItem(simple_html_dom_node $node) { + $file = $this->getFilename($node); + + if ($file == null) + return null; + + $minTime = $this->getInput('minTime'); + $maxTime = $this->getInput('maxTime'); + + // Do we need to check the running time? + if ($minTime != 0 || $maxTime != 0) { + if ($maxTime > 0 && $maxTime < $minTime) + returnClientError('The minimum running time must be less than the maximum running time.'); + + preg_match(self::FILENAME_REGEX, $file, $matches); + + if (!isset($matches[1])) + return null; + + $time = (integer)$matches[1]; + + // Check for minimum running time + if ($minTime > 0 && $minTime > $time) + return null; + + // Check for maximum running time + if ($maxTime > 0 && $maxTime < $time) + return null; + } + + $item = array(); + $item['title'] = $file; + + // The URI_TEMPLATE for querying the site can be reused here + $item['uri'] = sprintf(self::URI_TEMPLATE, $file, 1); + + $content = $this->buildContent($node); + + if ($content != null) + $item['content'] = $content; + + if (preg_match(self::TIME_REGEX, $file, $matches) === 1) { + $item['timestamp'] = DateTime::createFromFormat( + 'y.m.d_H-i', + $matches[0], + new DateTimeZone('Europe/Berlin') + )->getTimestamp(); + } + + return $item; + } + + private function getFilename(simple_html_dom_node $node) { + $file = $node->find('.file', 0); + + if ($file == null) + return null; + else + return trim($file->innertext); + } + + private function buildContent(simple_html_dom_node $node) { + $mirrors = $node->find('div.mirror'); + $list = ''; + + // Build list of available mirrors + foreach($mirrors as $mirror) { + $anchor = $mirror->find('a', 0); + $list .= sprintf(self::MIRROR_TEMPLATE, $anchor->href, $anchor->innertext); + } + + return sprintf(self::CONTENT_TEMPLATE, $list); + } +} diff --git a/bridges/PcGamerBridge.php b/bridges/PcGamerBridge.php index e0e55ce4..c4bcccf4 100644 --- a/bridges/PcGamerBridge.php +++ b/bridges/PcGamerBridge.php @@ -2,22 +2,43 @@ class PcGamerBridge extends BridgeAbstract { const NAME = 'PC Gamer'; - const URI = 'https://www.pcgamer.com/'; + const URI = 'https://www.pcgamer.com/archive/'; const DESCRIPTION = 'PC Gamer Most Read Stories'; - const MAINTAINER = 'mdemoss'; + const CACHE_TIMEOUT = 3600; + const MAINTAINER = 'IceWreck, mdemoss'; public function collectData() { $html = getSimpleHTMLDOMCached($this->getURI(), 300); - $stories = $html->find('div#popularcontent li.most-popular-item'); + $stories = $html->find('ul.basic-list li.day-article'); + $i = 0; + // Find induvidual stories in the archive page foreach ($stories as $element) { + if($i == 15) break; $item['uri'] = $element->find('a', 0)->href; + // error_log(print_r($item['uri'], TRUE)); $articleHtml = getSimpleHTMLDOMCached($item['uri']); - $item['title'] = $element->find('h4 a', 0)->plaintext; + $item['title'] = $element->find('a', 0)->plaintext; $item['timestamp'] = strtotime($articleHtml->find('meta[name=pub_date]', 0)->content); - $item['content'] = $articleHtml->find('meta[name=description]', 0)->content; - $item['author'] = $articleHtml->find('a[itemprop=author]', 0)->plaintext; + $item['author'] = $articleHtml->find('span.by-author a', 0)->plaintext; + + // Get the article content + $articleContents = $articleHtml->find('#article-body', 0); + + /* + By default the img src has a link to an error image and then the actual image + is added in by JS. So we replace the error image with the actual full size image + whoose link is in one of the attributes of the img tag + */ + foreach($articleContents->find('img') as $img) { + $imgsrc = $img->getAttribute('data-original-mos'); + // error_log($imgsrc); + $img->src = $imgsrc; + } + + $item['content'] = $articleContents; $this->items[] = $item; + $i++; } } } diff --git a/bridges/PhoronixBridge.php b/bridges/PhoronixBridge.php new file mode 100644 index 00000000..c5ded27b --- /dev/null +++ b/bridges/PhoronixBridge.php @@ -0,0 +1,22 @@ +collectExpandableDatas('https://www.phoronix.com/rss.php', 15); + } + + protected function parseItem($newsItem){ + $item = parent::parseItem($newsItem); + // $articlePage gets the entire page's contents + $articlePage = getSimpleHTMLDOM($newsItem->link); + $article = $articlePage->find('.content', 0); + $item['content'] = $article; + return $item; + } +} diff --git a/bridges/PornhubBridge.php b/bridges/PornhubBridge.php new file mode 100644 index 00000000..b8da99a5 --- /dev/null +++ b/bridges/PornhubBridge.php @@ -0,0 +1,99 @@ + array( + 'name' => 'User name', + 'required' => true, + ), + 'type' => array( + 'name' => 'User type', + 'type' => 'list', + 'values' => array( + 'user' => 'users', + 'model' => 'model', + 'pornstar' => 'pornstar', + ), + 'defaultValue' => 'users', + ), + 'sort' => array( + 'name' => 'Sort by', + 'type' => 'list', + 'values' => array( + 'Most recent' => '?', + 'Most views' => '?o=mv', + 'Top rated' => '?o=tr', + 'Longest' => '?o=lg', + ), + 'defaultValue' => '?', + ), + 'show_images' => array( + 'name' => 'Show thumbnails', + 'type' => 'checkbox', + ), + )); + + public function getName(){ + if(!is_null($this->getInput('type')) && !is_null($this->getInput('q'))) { + return 'PornHub ' . $this->getInput('type') . ':' . $this->getInput('q'); + } + + return parent::getName(); + } + + public function collectData() { + + $uri = 'https://www.pornhub.com/' . $this->getInput('type') . '/'; + switch($this->getInput('type')) { // select proper permalink format per user type... + case 'model': + $uri .= urlencode($this->getInput('q')) . '/videos' . $this->getInput('sort'); break; + case 'users': + $uri .= urlencode($this->getInput('q')) . '/videos/public' . $this->getInput('sort'); break; + case 'pornstar': + $uri .= urlencode($this->getInput('q')) . '/videos/upload' . $this->getInput('sort'); break; + } + + $show_images = $this->getInput('show_images'); + + $html = getSimpleHTMLDOM($uri) + or returnServerError('Could not request PornHub.'); + + foreach($html->find('div.videoUList ul.videos li.videoblock') as $element) { + + $item = array(); + + $item['author'] = $this->getInput('q'); + + // Title + $title = $element->find('a', 0)->getAttribute('title'); + if (is_null($title)) { + continue; + } + $item['title'] = $title; + + // Url + $url = $element->find('a', 0)->href; + $item['uri'] = 'https://www.pornhub.com' . $url; + + // Content + $image = $element->find('img', 0)->getAttribute('data-src'); + if($show_images === true) { + $item['content'] = ''; + } + + // date hack, guess upload YYYYMMDD from thumbnail URL (format: https://ci.phncdn.com/videos/201907/25/--- ) + $uploaded = explode('/', $image); + $uploaded = strtotime($uploaded[4] . $uploaded[5]); + $item['timestamp'] = $uploaded; + + $this->items[] = $item; + } + } +} diff --git a/bridges/RaceDepartmentBridge.php b/bridges/RaceDepartmentBridge.php new file mode 100644 index 00000000..b915dd59 --- /dev/null +++ b/bridges/RaceDepartmentBridge.php @@ -0,0 +1,56 @@ +collectExpandableDatas('https://www.racedepartment.com/news/archive.rss', 10); + } + + protected function parseItem($feedItem) { + $item = parent::parseItem($feedItem); + + //fetch page + $articlePage = getSimpleHTMLDOMCached($feedItem->link) + or returnServerError('Could not retrieve ' . $feedItem->link); + //extract article + $item['content'] = $articlePage->find('div.thfeature_firstPost', 0); + + //convert iframes to links. meant for embedded videos. + foreach($item['content']->find('iframe') as $found) { + + $iframeUrl = $found->getAttribute('src'); + + if ($iframeUrl) { + $found->outertext = '' . $iframeUrl . ''; + } + } + + //get rid of some elements we don't need + $to_remove_selectors = array( + 'div.p-title', //title + 'ul.listInline', //Thread starter, Start date + 'div.rd_news_article_share_buttons', + 'div.thfeature_firstPost-author', + 'div.reactionsBar', + 'footer', + 'div.message-lastEdit', + 'section.message-attachments' + ); + + foreach($to_remove_selectors as $selector) { + foreach($item['content']->find($selector) as $found) { + $found->outertext = ''; + } + } + + //category + $forumPath = $articlePage->find('div.breadcrumb', 0); + $pathElements = $forumPath->find('span'); + $item['categories'] = array(end($pathElements)->innertext); + + return $item; + } +} diff --git a/bridges/RainbowSixSiegeBridge.php b/bridges/RainbowSixSiegeBridge.php index 724edc8d..067d3e77 100644 --- a/bridges/RainbowSixSiegeBridge.php +++ b/bridges/RainbowSixSiegeBridge.php @@ -2,19 +2,18 @@ class RainbowSixSiegeBridge extends BridgeAbstract { const MAINTAINER = 'corenting'; - const NAME = 'Rainbow Six Siege Blog'; - const URI = 'https://rainbow6.ubisoft.com/siege/en-us/news/'; + const NAME = 'Rainbow Six Siege News'; + const URI = 'https://www.ubisoft.com/en-us/game/rainbow-six/siege/news-updates'; const CACHE_TIMEOUT = 7200; // 2h - const DESCRIPTION = 'Latest articles from the Rainbow Six Siege blog'; + const DESCRIPTION = 'Latest news about Rainbow Six Siege'; public function getIcon() { - return 'https://ubistatic19-a.akamaihd.net/resource/en-us/game/rainbow6/siege-v3/r6s-favicon_316592.ico'; + return 'https://static-dm.akamaized.net/siege/prod/favicon-144x144.png'; } public function collectData(){ - $dlUrl = 'https://prod-tridionservice.ubisoft.com/live/v1/News/Latest?templateId=tcm%3A152-7677'; - $dlUrl .= '8-32&pageIndex=0&pageSize=10&language=en-US&detailPageId=tcm%3A150-194572-64'; - $dlUrl .= '&keywordList=233416%2C316144%2C233418%2C233417&siteId=undefined&useSeoFriendlyUrl=true'; + $dlUrl = 'https://www.ubisoft.com/api/updates/items?locale=en-us&categoriesFilter=all'; + $dlUrl = $dlUrl . '&limit=6&mediaFilter=news&skip=0&startIndex=undefined&tags=BR-rainbow-six%20GA-siege'; $jsonString = getContents($dlUrl) or returnServerError('Error while downloading the website content'); $json = json_decode($jsonString, true); @@ -22,17 +21,20 @@ class RainbowSixSiegeBridge extends BridgeAbstract { // Start at index 2 to remove highlighted articles for($i = 0; $i < count($json); $i++) { - $jsonItem = $json[$i]['Content']; - $article = str_get_html($jsonItem); + $jsonItem = $json[$i]; + + $uri = 'https://www.ubisoft.com/en-us/game/rainbow-six/siege'; + $uri = $uri . $jsonItem['button']['buttonUrl']; + + $thumbnail = 'Thumbnail'; + $content = $thumbnail . '
    ' . markdownToHtml($jsonItem['content']); $item = array(); - - $uri = $article->find('h3 a', 0)->href; - $uri = 'https://rainbow6.ubisoft.com' . $uri; $item['uri'] = $uri; - $item['title'] = $article->find('h3', 0)->plaintext; - $item['content'] = $article->find('img', 0)->outertext . '
    ' . $article->find('strong', 0)->plaintext; - $item['timestamp'] = strtotime($article->find('p.news_date', 0)->plaintext); + $item['id'] = $jsonItem['id']; + $item['title'] = $jsonItem['title']; + $item['content'] = $content; + $item['timestamp'] = strtotime($jsonItem['date']); $this->items[] = $item; } diff --git a/bridges/RedditBridge.php b/bridges/RedditBridge.php index 8de499f9..130dc662 100644 --- a/bridges/RedditBridge.php +++ b/bridges/RedditBridge.php @@ -1,10 +1,11 @@ array( @@ -25,16 +26,153 @@ class RedditBridge extends FeedExpander { ) ); - public function collectData(){ - - switch($this->queriedContext) { - case 'single': $subreddits[] = $this->getInput('r'); break; - case 'multi': $subreddits = explode(',', $this->getInput('rs')); break; + public function getIcon() { + return 'https://www.redditstatic.com/desktop2x/img/favicon/favicon-96x96.png'; } + public function getName() { + if ($this->queriedContext == 'single') { + return 'Reddit r/' . $this->getInput('r'); + } else { + return self::NAME; + } + } + + public function collectData() { + switch ($this->queriedContext) { + case 'single': + $subreddits[] = $this->getInput('r'); + break; + case 'multi': + $subreddits = explode(',', $this->getInput('rs')); + break; + } + foreach ($subreddits as $subreddit) { $name = trim($subreddit); - $this->collectExpandableDatas("https://www.reddit.com/r/$name/.rss"); + + $values = getContents(self::URI . '/r/' . $name . '.json') + or returnServerError('Unable to fetch posts!'); + $decodedValues = json_decode($values); + + foreach ($decodedValues->data->children as $post) { + $data = $post->data; + + $item = array(); + $item['author'] = $data->author; + $item['title'] = $data->title; + $item['uid'] = $data->id; + $item['timestamp'] = $data->created_utc; + $item['uri'] = $this->encodePermalink($data->permalink); + + $item['categories'] = array(); + $item['categories'][] = $data->link_flair_text; + $item['categories'][] = $data->pinned ? 'Pinned' : null; + $item['categories'][] = $data->over_18 ? 'NSFW' : null; + $item['categories'][] = $data->spoiler ? 'Spoiler' : null; + $item['categories'] = array_filter($item['categories']); + + if ($data->is_self) { + // Text post + + $item['content'] + = htmlspecialchars_decode($data->selftext_html); + + } elseif (isset($data->post_hint) ? $data->post_hint == 'link' : false) { + // Link with preview + + if (isset($data->media)) { + // Reddit embeds content for some sites (e.g. Twitter) + $embed = htmlspecialchars_decode( + $data->media->oembed->html + ); + } else { + $embed = ''; + } + + $item['content'] = $this->template( + $data->url, + $data->thumbnail, + $data->domain + ) . $embed; + + } elseif (isset($data->post_hint) ? $data->post_hint == 'image' : false) { + // Single image + + $item['content'] = $this->link( + $this->encodePermalink($data->permalink), + '' + ); + + } elseif (isset($data->is_gallery) ? $data->is_gallery : false) { + // Multiple images + + $images = array(); + foreach ($data->gallery_data->items as $media) { + $id = $media->media_id; + $type = $data->media_metadata->$id->m == 'image/gif' ? 'gif' : 'u'; + $src = $data->media_metadata->$id->s->$type; + $images[] = ''; + } + + $item['content'] = implode('', $images); + + } elseif ($data->is_video) { + // Video + + // Higher index -> Higher resolution + end($data->preview->images[0]->resolutions); + $index = key($data->preview->images[0]->resolutions); + + $item['content'] = $this->template( + $data->url, + $data->preview->images[0]->resolutions[$index]->url, + 'Video' + ); + + } elseif (isset($data->media) ? $data->media->type == 'youtube.com' : false) { + // Youtube link + + $item['content'] = $this->template( + $data->url, + $data->media->oembed->thumbnail_url, + 'YouTube'); + + } elseif (explode('.', $data->domain)[0] == 'self') { + // Crossposted text post + // TODO (optionally?) Fetch content of the original post. + + $item['content'] = $this->link( + $this->encodePermalink($data->permalink), + 'Crossposted from r/' + . explode('.', $data->domain)[1] + ); + + } else { + // Link WITHOUT preview + + $item['content'] = $this->link($data->url, $data->domain); + } + + $this->items[] = $item; + } } } + + private function encodePermalink($link) { + return self::URI . implode( + '/', + array_map('urlencode', explode('/', $link)) + ); + } + + private function template($href, $src, $caption) { + return '
    ' + . $caption . '
    '; + } + + private function link($href, $text) { + return '' . $text . ''; + } } diff --git a/bridges/Releases3DSBridge.php b/bridges/Releases3DSBridge.php index fe2df8ec..686e7c5d 100644 --- a/bridges/Releases3DSBridge.php +++ b/bridges/Releases3DSBridge.php @@ -5,13 +5,16 @@ class Releases3DSBridge extends BridgeAbstract { const NAME = '3DS Scene Releases'; const URI = 'http://www.3dsdb.com/'; const CACHE_TIMEOUT = 10800; // 3h - const DESCRIPTION = 'Returns the newest scene releases.'; + const DESCRIPTION = 'Returns the newest scene releases for Nintendo 3DS.'; public function collectData(){ + $this->collectDataUrl(self::URI . 'xml.php'); + } + + protected function collectDataUrl($dataUrl){ - $dataUrl = self::URI . 'xml.php'; $xml = getContents($dataUrl) - or returnServerError('Could not request 3dsdb: ' . $dataUrl); + or returnServerError('Could not request URL: ' . $dataUrl); $limit = 0; foreach(array_reverse(explode('', $xml)) as $element) { @@ -52,17 +55,25 @@ class Releases3DSBridge extends BridgeAbstract { $ignSearchUrl = 'https://www.ign.com/search?q=' . urlencode($name); if($ignResult = getSimpleHTMLDOMCached($ignSearchUrl)) { - $ignCoverArt = $ignResult->find('div.search-item-media', 0)->find('img', 0)->src; - $ignDesc = $ignResult->find('div.search-item-description', 0)->plaintext; - $ignLink = $ignResult->find('div.search-item-sub-title', 0)->find('a', 1)->href; - $ignDate = strtotime(trim($ignResult->find('span.publish-date', 0)->plaintext)); - $ignDescription = '
    ' - . $ignDesc - . ' More at IGN
    '; + $ignCoverArt = $ignResult->find('div.search-item-media', 0); + $ignDesc = $ignResult->find('div.search-item-description', 0); + $ignLink = $ignResult->find('div.search-item-sub-title', 0); + $ignDate = $ignResult->find('span.publish-date', 0); + if (is_object($ignCoverArt)) + $ignCoverArt = $ignCoverArt->find('img', 0); + if (is_object($ignLink)) + $ignLink = $ignLink->find('a', 1); + if (is_object($ignDate)) + $ignDate = strtotime(trim($ignDate->plaintext)); + if (is_object($ignCoverArt) && is_object($ignDesc) && is_object($ignLink)) { + $ignDescription = '
    ' + . $ignDesc->plaintext + . ' More at IGN
    '; + } } //Main section : Release description from 3DS database @@ -111,7 +122,7 @@ class Releases3DSBridge extends BridgeAbstract { private function typeToString($type){ switch($type) { - case 1: return '3DS Game'; + case 1: return 'Card Game'; case 4: return 'eShop'; default: return '??? (' . $type . ')'; } diff --git a/bridges/ReleasesSwitchBridge.php b/bridges/ReleasesSwitchBridge.php new file mode 100644 index 00000000..89ca76d5 --- /dev/null +++ b/bridges/ReleasesSwitchBridge.php @@ -0,0 +1,17 @@ +collectDataUrl(self::URI . 'xml.php'); + } +} diff --git a/bridges/ReporterreBridge.php b/bridges/ReporterreBridge.php index 41f0f70f..6148be13 100644 --- a/bridges/ReporterreBridge.php +++ b/bridges/ReporterreBridge.php @@ -8,6 +8,7 @@ class ReporterreBridge extends BridgeAbstract { private function extractContent($url){ $html2 = getSimpleHTMLDOM($url); + $html2 = defaultLinkTo($html2, self::URI); foreach($html2->find('div[style=text-align:justify]') as $e) { $text = $e->outertext; @@ -16,13 +17,6 @@ class ReporterreBridge extends BridgeAbstract { $html2->clear(); unset($html2); - // Replace all relative urls with absolute ones - $text = preg_replace( - '/(href|src)(\=[\"\'])(?!http)([^"\']+)/ims', - '$1$2' . self::URI . '$3', - $text - ); - $text = strip_tags($text, '


    '); return $text; } diff --git a/bridges/RobinhoodSnacksBridge.php b/bridges/RobinhoodSnacksBridge.php new file mode 100644 index 00000000..e123146c --- /dev/null +++ b/bridges/RobinhoodSnacksBridge.php @@ -0,0 +1,27 @@ +find('#root > div > div > div > div > div > a') as $element) { + if ($element->href === 'https://snacks.robinhood.com/newsletters/page/2/') { + continue; + } + + $this->items[] = array( + 'uri' => $element->href, + 'title' => $element->find('div > div', 3)->plaintext, + 'content' => $element->find('div > div', 4)->plaintext, + ); + } + } +} diff --git a/bridges/RoosterTeethBridge.php b/bridges/RoosterTeethBridge.php new file mode 100644 index 00000000..496c7de8 --- /dev/null +++ b/bridges/RoosterTeethBridge.php @@ -0,0 +1,107 @@ + array( + 'channel' => array( + 'type' => 'list', + 'name' => 'Channel', + 'title' => 'Select a channel to filter by', + 'values' => array( + 'All channels' => 'all', + 'Achievement Hunter' => 'achievement-hunter', + 'Cow Chop' => 'cow-chop', + 'Death Battle' => 'death-battle', + 'Funhaus' => 'funhaus', + 'Inside Gaming' => 'inside-gaming', + 'JT Music' => 'jt-music', + 'Kinda Funny' => 'kinda-funny', + 'Rooster Teeth' => 'rooster-teeth', + 'Sugar Pine 7' => 'sugar-pine-7' + ) + ), + 'sort' => array( + 'type' => 'list', + 'name' => 'Sort', + 'title' => 'Select a sort order', + 'values' => array( + 'Newest -> Oldest' => 'desc', + 'Oldest -> Newest' => 'asc' + ), + 'defaultValue' => 'desc' + ), + 'first' => array( + 'type' => 'list', + 'name' => 'RoosterTeeth First', + 'title' => 'Select whether to include "First" videos before they are public', + 'values' => array( + 'True' => true, + 'False' => false + ) + ), + 'limit' => array( + 'name' => 'Limit', + 'type' => 'number', + 'required' => false, + 'title' => 'Maximum number of items to return', + 'defaultValue' => 10 + ) + ) + ); + + public function collectData() { + if ($this->getInput('channel') !== 'all') { + $uri = self::API + . 'api/v1/episodes?per_page=' + . $this->getInput('limit') + . '&channel_id=' + . $this->getInput('channel') + . '&order=' . $this->getInput('sort') + . '&page=1'; + + $htmlJSON = getSimpleHTMLDOM($uri) + or returnServerError('Could not contact Rooster Teeth: ' . $uri); + } else { + $uri = self::API + . '/api/v1/episodes?per_page=' + . $this->getInput('limit') + . '&filter=all&order=' + . $this->getInput('sort') + . '&page=1'; + + $htmlJSON = getSimpleHTMLDOM($uri) + or returnServerError('Could not contact Rooster Teeth: ' . $uri); + } + + $htmlArray = json_decode($htmlJSON, true); + + foreach($htmlArray['data'] as $key => $value) { + $item = array(); + + if (!$this->getInput('first') && $value['attributes']['is_sponsors_only']) { + continue; + } + + $publicDate = date_create($value['attributes']['member_golive_at']); + $dateDiff = date_diff($publicDate, date_create(), false); + + if (!$this->getInput('first') && $dateDiff->invert == 1) { + continue; + } + + $item['uri'] = self::URI . $value['canonical_links']['self']; + $item['title'] = $value['attributes']['title']; + $item['timestamp'] = $value['attributes']['member_golive_at']; + $item['author'] = $value['attributes']['show_title']; + + $this->items[] = $item; + } + } +} diff --git a/bridges/Rule34pahealBridge.php b/bridges/Rule34pahealBridge.php index 0e13ed03..c12dfe00 100644 --- a/bridges/Rule34pahealBridge.php +++ b/bridges/Rule34pahealBridge.php @@ -8,12 +8,14 @@ class Rule34pahealBridge extends Shimmie2Bridge { const URI = 'https://rule34.paheal.net/'; const DESCRIPTION = 'Returns images from given page'; + const PATHTODATA = '.shm-thumb'; + protected function getItemFromElement($element){ $item = array(); - $item['uri'] = $this->getURI() . $element->href; + $item['uri'] = rtrim($this->getURI(), '/') . $element->find('.shm-thumb-link', 0)->href; $item['id'] = (int)preg_replace('/[^0-9]/', '', $element->getAttribute(static::IDATTRIBUTE)); $item['timestamp'] = time(); - $thumbnailUri = $element->find('img', 0)->src; + $thumbnailUri = $element->find('a', 1)->href; $item['tags'] = $element->getAttribute('data-tags'); $item['title'] = $this->getName() . ' | ' . $item['id']; $item['content'] = 'content; + $description = $pageHtml->find('meta[property="og:description"]', 0)->content; + + foreach ($pageHtml->find('ul.interest_pills li') as $pills) { + $item['categories'][] = $pills->plaintext; + } + + $item['content'] = <<{$description}

    +EOD; + + $item['enclosures'][] = $image; + + $this->items[] = $item; + + if (count($this->items) >= 15) { + break; + } + } + } + + public function getName() { + + if ($this->feedName) { + return $this->feedName . ' - Scribd'; + } + + return parent::getName(); + } + + public function getURI() { + + if (!is_null($this->getInput('profile'))) { + preg_match($this->profileUrlRegex, $this->getInput('profile'), $user) + or returnServerError('Could not extract user ID and name from given profile URL.'); + + return self::URI . '/' . $user[1] . '/uploads'; + } + + return parent::getURI(); + } +} diff --git a/bridges/SensCritiqueBridge.php b/bridges/SensCritiqueBridge.php index 7ac35f2c..9126c316 100644 --- a/bridges/SensCritiqueBridge.php +++ b/bridges/SensCritiqueBridge.php @@ -3,15 +3,11 @@ class SensCritiqueBridge extends BridgeAbstract { const MAINTAINER = 'kranack'; const NAME = 'Sens Critique'; - const URI = 'http://www.senscritique.com/'; + const URI = 'https://www.senscritique.com/'; const CACHE_TIMEOUT = 21600; // 6h const DESCRIPTION = 'Sens Critique news'; const PARAMETERS = array( array( - 'm' => array( - 'name' => 'Movies', - 'type' => 'checkbox' - ), 's' => array( 'name' => 'Series', 'type' => 'checkbox' @@ -40,8 +36,6 @@ class SensCritiqueBridge extends BridgeAbstract { if($this->getInput($category)) { $uri = self::URI; switch($category) { - case 'm': $uri .= 'films/cette-semaine'; - break; case 's': $uri .= 'series/actualite'; break; case 'g': $uri .= 'jeuxvideo/actualite'; @@ -77,20 +71,25 @@ class SensCritiqueBridge extends BridgeAbstract { . ' ' . $movie->find('.elco-date', 0)->plaintext; - $item['content'] = '' - . $movie->find('.elco-original-title', 0)->plaintext - . '

    ' - . $movie->find('.elco-baseline', 0)->plaintext + $item['content'] = ''; + $originalTitle = $movie->find('.elco-original-title', 0); + $description = $movie->find('.elco-description', 0); + + if ($originalTitle) { + $item['content'] = '' . $originalTitle->plaintext . '

    '; + } + + $item['content'] .= $movie->find('.elco-baseline', 0)->plaintext . '
    ' . $movie->find('.elco-baseline', 1)->plaintext . '

    ' - . $movie->find('.elco-description', 0)->plaintext + . ($description ? $description->plaintext : '') . '

    ' . trim($movie->find('.erra-ratings .erra-global', 0)->plaintext) . ' / 10'; - $item['id'] = $this->getURI() . $movie->find('.elco-title a', 0)->href; - $item['uri'] = $this->getURI() . $movie->find('.elco-title a', 0)->href; + $item['id'] = $this->getURI() . ltrim($movie->find('.elco-title a', 0)->href, '/'); + $item['uri'] = $this->getURI() . ltrim($movie->find('.elco-title a', 0)->href, '/'); $this->items[] = $item; } } diff --git a/bridges/SeznamZpravyBridge.php b/bridges/SeznamZpravyBridge.php new file mode 100644 index 00000000..bfd0f1d0 --- /dev/null +++ b/bridges/SeznamZpravyBridge.php @@ -0,0 +1,91 @@ + array( + 'author' => array( + 'name' => 'Author String', + 'type' => 'text', + 'required' => true, + 'title' => 'The dash-separated author string, as shown in the URL bar.', + 'pattern' => '[a-z]+-[a-z]+-[0-9]+', + 'exampleValue' => 'janek-rubes-506' + ), + ) + ); + + private $feedName; + + public function getName() { + if (isset($this->feedName)) { + return $this->feedName; + } + return parent::getName(); + } + + public function collectData() { + $ONE_DAY = 86500; + switch($this->queriedContext) { + case 'By Author': + $url = 'https://www.seznamzpravy.cz/autor/'; + $selectors = array( + 'breadcrumbs' => 'div[data-dot=ogm-breadcrumb-navigation]', + 'article_list' => 'ul.ogm-document-timeline-page.atm-list-ul li article[data-dot=mol-timeline-item]', + 'article_title' => 'a[data-dot=mol-article-card-title]', + 'article_dm' => 'span.mol-formatted-date__date', + 'article_time' => 'span.mol-formatted-date__time', + 'article_content' => 'div[data-dot=ogm-article-content]' + ); + + $html = getSimpleHTMLDOMCached($url . $this->getInput('author'), $ONE_DAY); + $main_breadcrumbs = $html->find($selectors['breadcrumbs'], 0); + $author = $main_breadcrumbs->last_child()->plaintext + or returnServerError('Could not get author on: ' . $this->getURI()); + $this->feedName = $author . ' - Seznam Zprávy'; + + $articles = $html->find($selectors['article_list']) + or returnServerError('Could not find articles on: ' . $this->getURI()); + + foreach ($articles as $article) { + $title_link = $article->find($selectors['article_title'], 0) + or returnServerError('Could not find title on: ' . $this->getURI()); + + $article_url = $title_link->href; + $article_content_html = getSimpleHTMLDOMCached($article_url, $ONE_DAY); + $content_e = $article_content_html->find($selectors['article_content'], 0); + $content_text = $content_e->innertext + or returnServerError('Could not get article content for: ' . $article_url); + + $breadcrumbs_e = $article_content_html->find($selectors['breadcrumbs'], 0); + $breadcrumbs = $breadcrumbs_e->children(); + $num_breadcrumbs = count($breadcrumbs); + $categories = array(); + foreach ($breadcrumbs as $cat) { + if (--$num_breadcrumbs <= 0) { + break; + } + $categories[] = trim($cat->plaintext); + } + + $article_dm_e = $article->find($selectors['article_dm'], 0); + $article_dm_text = $article_dm_e->plaintext; + $article_dmy = preg_replace('/[^0-9\.]/', '', $article_dm_text) . date('Y'); + $article_time = $article->find($selectors['article_time'], 0)->plaintext; + $item = array( + 'title' => $title_link->plaintext, + 'uri' => $title_link->href, + 'timestamp' => strtotime($article_dmy . ' ' . $article_time), + 'author' => $author, + 'content' => $content_text, + 'categories' => $categories + ); + $this->items[] = $item; + } + break; + } + $this->items[] = $item; + } +} diff --git a/bridges/SkimfeedBridge.php b/bridges/SkimfeedBridge.php index 1b78baf2..3016f236 100644 --- a/bridges/SkimfeedBridge.php +++ b/bridges/SkimfeedBridge.php @@ -455,6 +455,35 @@ class SkimfeedBridge extends BridgeAbstract { } + public function detectParameters($url) { + + if (0 !== strpos($url, static::URI)) { + return null; + } + + foreach(self::PARAMETERS as $channels) { + + foreach($channels as $box_name => $box) { + + foreach($box['values'] as $name => $channel_url) { + + if (static::URI . $channel_url === $url) { + return array( + $box_name => $name, + ); + + } + + } + + } + + } + + return null; + + } + public function getName() { switch($this->queriedContext) { diff --git a/bridges/SoundcloudBridge.php b/bridges/SoundcloudBridge.php index 9607d33d..45e6fed1 100644 --- a/bridges/SoundcloudBridge.php +++ b/bridges/SoundcloudBridge.php @@ -11,44 +11,57 @@ class SoundCloudBridge extends BridgeAbstract { 'u' => array( 'name' => 'username', 'required' => true + ), + 't' => array( + 'name' => 'type', + 'type' => 'list', + 'defaultValue' => 'tracks', + 'values' => array( + 'Tracks' => 'tracks', + 'Playlists' => 'playlists' + ) ) )); + private $feedTitle = null; private $feedIcon = null; private $clientIDCache = null; public function collectData(){ $res = $this->apiGet('resolve', array( - 'url' => 'http://www.soundcloud.com/' . $this->getInput('u') + 'url' => 'https://soundcloud.com/' . $this->getInput('u') )) or returnServerError('No results for this query'); + $this->feedTitle = $res->username; $this->feedIcon = $res->avatar_url; - $tracks = $this->apiGet('users/' . urlencode($res->id) . '/tracks') - or returnServerError('No results for this user'); + $tracks = $this->apiGet( + 'users/' . urlencode($res->id) . '/' . $this->getInput('t'), + array('limit' => 31) + ) or returnServerError('No results for this user/playlist'); - $numTracks = min(count($tracks), 10); - for($i = 0; $i < $numTracks; $i++) { + foreach ($tracks->collection as $index => $track) { $item = array(); - $item['author'] = $tracks[$i]->user->username; - $item['title'] = $tracks[$i]->user->username . ' - ' . $tracks[$i]->title; - $item['timestamp'] = strtotime($tracks[$i]->created_at); - $item['content'] = $tracks[$i]->description; - $item['enclosures'] = array($tracks[$i]->uri - . '/stream?client_id=' - . $this->getClientID()); + $item['author'] = $track->user->username; + $item['title'] = $track->user->username . ' - ' . $track->title; + $item['timestamp'] = strtotime($track->created_at); + $item['content'] = nl2br($track->description); + $item['enclosures'][] = $track->artwork_url; $item['id'] = self::URI . urlencode($this->getInput('u')) . '/' - . urlencode($tracks[$i]->permalink); + . urlencode($track->permalink); $item['uri'] = self::URI . urlencode($this->getInput('u')) . '/' - . urlencode($tracks[$i]->permalink); + . urlencode($track->permalink); $this->items[] = $item; - } + if (count($this->items) >= 10) { + break; + } + } } public function getIcon(){ @@ -59,9 +72,13 @@ class SoundCloudBridge extends BridgeAbstract { return parent::getIcon(); } + public function getURI(){ + return 'https://soundcloud.com/' . $this->getInput('u'); + } + public function getName(){ - if(!is_null($this->getInput('u'))) { - return self::NAME . ' - ' . $this->getInput('u'); + if($this->feedTitle) { + return $this->feedTitle . ' - ' . self::NAME; } return parent::getName(); @@ -95,14 +112,14 @@ class SoundCloudBridge extends BridgeAbstract { // Without url=http, this returns a 404 $playerHTML = getContents('https://w.soundcloud.com/player/?url=http') - or returnServerError('Unable to get player page.'); + or returnServerError('Unable to get player page.'); $regex = '/widget-.+?\.js/'; if(preg_match($regex, $playerHTML, $matches) == false) returnServerError('Unable to find widget JS URL.'); $widgetURL = 'https://widget.sndcdn.com/' . $matches[0]; $widgetJS = getContents($widgetURL) - or returnServerError('Unable to get widget JS page.'); + or returnServerError('Unable to get widget JS page.'); $regex = '/client_id.*?"(.+?)"/'; if(preg_match($regex, $widgetJS, $matches) == false) returnServerError('Unable to find client ID.'); @@ -113,13 +130,13 @@ class SoundCloudBridge extends BridgeAbstract { } private function buildAPIURL($endpoint, $parameters){ - return 'https://api.soundcloud.com/' + return 'https://api-v2.soundcloud.com/' . $endpoint . '?' . http_build_query($parameters); } - private function apiGet($endpoint, $parameters = array()){ + private function apiGet($endpoint, $parameters = array()) { $parameters['client_id'] = $this->getClientID(); try { diff --git a/bridges/TelegramBridge.php b/bridges/TelegramBridge.php index 3afc2831..a7296b8a 100644 --- a/bridges/TelegramBridge.php +++ b/bridges/TelegramBridge.php @@ -39,8 +39,8 @@ class TelegramBridge extends BridgeAbstract { $item = array(); $item['uri'] = $this->processUri($messageDiv); - $item['content'] = html_entity_decode($this->processContent($messageDiv), ENT_QUOTES); - $item['title'] = html_entity_decode($this->itemTitle, ENT_QUOTES); + $item['content'] = $this->processContent($messageDiv); + $item['title'] = $this->itemTitle; $item['timestamp'] = $this->processDate($messageDiv); $item['enclosures'] = $this->enclosures; $author = trim($messageDiv->find('a.tgme_widget_message_owner_name', 0)->plaintext); diff --git a/bridges/TheCodingLoveBridge.php b/bridges/TheCodingLoveBridge.php index 8060c947..54fd0d27 100644 --- a/bridges/TheCodingLoveBridge.php +++ b/bridges/TheCodingLoveBridge.php @@ -11,14 +11,14 @@ class TheCodingLoveBridge extends BridgeAbstract { $html = getSimpleHTMLDOM(self::URI) or returnServerError('Could not request The Coding Love.'); - foreach($html->find('div.post') as $element) { + foreach($html->find('article.blog-post') as $element) { $item = array(); - $temp = $element->find('h3 a', 0); + $temp = $element->find('h1 a', 0); - $titre = $temp->innertext; + $title = $temp->innertext; $url = $temp->href; - $temp = $element->find('div.bodytype', 0); + $temp = $element->find('div.blog-post-content', 0); // retrieve .gif instead of static .jpg $images = $temp->find('p.e img'); @@ -28,17 +28,13 @@ class TheCodingLoveBridge extends BridgeAbstract { } $content = $temp->innertext; - $auteur = $temp->find('i', 0); - $pos = strpos($auteur->innertext, 'by'); - - if($pos > 0) { - $auteur = trim(str_replace('*/', '', substr($auteur->innertext, ($pos + 2)))); - $item['author'] = $auteur; - } + $temp = $element->find('div.post-meta-info', 0); + $author = $temp->find('span', 0); + $item['author'] = $author->innertext; $item['content'] .= trim($content); $item['uri'] = $url; - $item['title'] = trim($titre); + $item['title'] = trim($title); $this->items[] = $item; } diff --git a/bridges/TheHackerNewsBridge.php b/bridges/TheHackerNewsBridge.php index 687b620c..1e710b31 100644 --- a/bridges/TheHackerNewsBridge.php +++ b/bridges/TheHackerNewsBridge.php @@ -17,6 +17,7 @@ class TheHackerNewsBridge extends BridgeAbstract { $article_url = $element->find('a.story-link', 0)->href; $article_author = trim($element->find('i.icon-user', 0)->parent()->plaintext); + $article_author = str_replace('', '', $article_author); $article_title = $element->find('h2.home-title', 0)->plaintext; //Date without time diff --git a/bridges/TinyLetterBridge.php b/bridges/TinyLetterBridge.php new file mode 100644 index 00000000..e9860b54 --- /dev/null +++ b/bridges/TinyLetterBridge.php @@ -0,0 +1,54 @@ + array( + 'name' => 'User Name', + 'exampleValue' => 'forwards', + ) + ) + ); + + public function getName() { + $username = $this->getInput('username'); + if (!is_null($username)) { + return static::NAME . ' | ' . $username; + } + + return parent::getName(); + } + + public function getURI() { + $username = $this->getInput('username'); + if (!is_null($username)) { + return static::URI . urlencode($username); + } + + return parent::getURI(); + } + + public function collectData() { + $archives = self::getURI() . '/archive'; + $html = getSimpleHTMLDOMCached($archives) + or returnServerError('Could not load content'); + + foreach($html->find('.message-list li') as $element) { + $item = array(); + + $snippet = $element->find('p.message-snippet', 0); + $link = $element->find('.message-link', 0); + + $item['title'] = $link->plaintext; + $item['content'] = $snippet->innertext; + $item['uri'] = $link->href; + $item['timestamp'] = strtotime($element->find('.message-date', 0)->plaintext); + + $this->items[] = $item; + } + + } +} diff --git a/bridges/TwitchBridge.php b/bridges/TwitchBridge.php index 39b46010..8b43a317 100644 --- a/bridges/TwitchBridge.php +++ b/bridges/TwitchBridge.php @@ -20,7 +20,9 @@ class TwitchBridge extends BridgeAbstract { 'All' => 'all', 'Archive' => 'archive', 'Highlights' => 'highlight', - 'Uploads' => 'upload' + 'Uploads' => 'upload', + 'Past Premieres' => 'past_premiere', + 'Premiere Uploads' => 'premiere_upload' ), 'defaultValue' => 'archive' ) @@ -32,43 +34,90 @@ class TwitchBridge extends BridgeAbstract { */ const CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'; + const API_ENDPOINT = 'https://gql.twitch.tv/gql'; + const BROADCAST_TYPES = array( + 'all' => array( + 'ARCHIVE', + 'HIGHLIGHT', + 'UPLOAD', + 'PAST_PREMIERE', + 'PREMIERE_UPLOAD' + ), + 'archive' => 'ARCHIVE', + 'highlight' => 'HIGHLIGHT', + 'upload' => 'UPLOAD', + 'past_premiere' => 'PAST_PREMIERE', + 'premiere_upload' => 'PREMIERE_UPLOAD' + ); + public function collectData(){ - // get channel user - $query_data = array( - 'login' => $this->getInput('channel') + $query = <<<'EOD' +query VODList($channel: String!, $types: [BroadcastType!]) { + user(login: $channel) { + displayName + videos(types: $types, sort: TIME) { + edges { + node { + id + title + publishedAt + lengthSeconds + viewCount + thumbnailURLs(width: 640, height: 360) + previewThumbnailURL(width: 640, height: 360) + description + tags + contentTags { + isLanguageTag + localizedName + } + game { + displayName + } + moments(momentRequestType: VIDEO_CHAPTER_MARKERS) { + edges { + node { + description + positionMilliseconds + } + } + } + } + } + } + } +} +EOD; + $variables = array( + 'channel' => $this->getInput('channel'), + 'types' => self::BROADCAST_TYPES[$this->getInput('type')] ); - $users = $this->apiGet('users', $query_data)->users; - if(count($users) === 0) - returnClientError('User "' - . $this->getInput('channel') - . '" could not be found'); - $user = $users[0]; + $data = $this->apiRequest($query, $variables); - // get video list - $query_endpoint = 'channels/' . $user->_id . '/videos'; - $query_data = array( - 'broadcast_type' => $this->getInput('type'), - 'limit' => 10 - ); - $videos = $this->apiGet($query_endpoint, $query_data)->videos; + $user = $data->user; + foreach($user->videos->edges as $edge) { + $video = $edge->node; + + $url = 'https://www.twitch.tv/videos/' . $video->id; - foreach($videos as $video) { $item = array( - 'uri' => $video->url, + 'uri' => $url, 'title' => $video->title, - 'timestamp' => $video->published_at, - 'author' => $video->channel->display_name, + 'timestamp' => $video->publishedAt, + 'author' => $user->displayName, ); // Add categories for tags and played game - $item['categories'] = array_filter(explode(' ', $video->tag_list)); - if(!empty($video->game)) - $item['categories'][] = $video->game; + $item['categories'] = $video->tags; + if(!is_null($video->game)) + $item['categories'][] = $video->game->displayName; + foreach($video->contentTags as $tag) + if(!$tag->isLanguageTag) + $item['categories'][] = $tag->localizedName; // Add enclosures for thumbnails from a few points in the video - $item['enclosures'] = array(); - foreach($video->thumbnails->large as $thumbnail) - $item['enclosures'][] = $thumbnail->url; + // Thumbnail list has duplicate entries sometimes so remove those + $item['enclosures'] = array_unique($video->thumbnailURLs); /* * Content format example: @@ -86,44 +135,45 @@ class TwitchBridge extends BridgeAbstract { * */ $item['content'] = '

    ' - . $video->description_html + . $video->description // in markdown format . '

    Duration: ' - . $this->formatTimestampTime($video->length) + . $this->formatTimestampTime($video->lengthSeconds) . '
    Views: ' - . $video->views + . $video->viewCount . '

    '; // Add played games list to content - $video_id = trim($video->_id, 'v'); // _id gives 'v1234' but API wants '1234' - $markers = $this->apiGet('videos/' . $video_id . '/markers')->markers; - $item['content'] .= '

    Played games:

    • 00:00:00 - ' - . $video->game - . '
    • '; - if(isset($markers->game_changes)) { - usort($markers->game_changes, function($a, $b) { - return $a->time - $b->time; - }); - foreach($markers->game_changes as $game_change) { - $item['categories'][] = $game_change->label; + $item['content'] .= '

      Played games:

      '; + $item['categories'] = array_unique($item['categories']); + $this->items[] = $item; } } @@ -144,25 +194,37 @@ class TwitchBridge extends BridgeAbstract { $seconds % 60); } - /* - * Ideally the new 'helix' API should be used as v5/'kraken' is deprecated. - * The new API however still misses many features (markers, played game..) of - * the old one, so let's use the old one for as long as it's available. - */ - private function apiGet($endpoint, $query_data = array()) { - $query_data['api_version'] = 5; - $url = 'https://api.twitch.tv/kraken/' - . $endpoint - . '?' - . http_build_query($query_data); + // GraphQL: https://graphql.org/ + // Tool for developing/testing queries: https://github.com/skevy/graphiql-app + private function apiRequest($query, $variables) { + $request = array( + 'query' => $query, + 'variables' => $variables + ); $header = array( 'Client-ID: ' . self::CLIENT_ID ); + $opts = array( + CURLOPT_CUSTOMREQUEST => 'POST', + CURLOPT_POSTFIELDS => json_encode($request) + ); - $data = json_decode(getContents($url, $header)) - or returnServerError('API request to "' . $url . '" failed.'); + Debug::log("Sending GraphQL query:\n" . $query); + Debug::log("Sending GraphQL variables:\n" + . json_encode($variables, JSON_PRETTY_PRINT)); - return $data; + $response = json_decode(getContents(self::API_ENDPOINT, $header, $opts)) + or returnServerError('API request to "' . self::API_ENDPOINT . '" failed.'); + + Debug::log("Got GraphQL response:\n" + . json_encode($response, JSON_PRETTY_PRINT)); + + if(isset($response->errors)) { + $messages = array_column($response->errors, 'message'); + returnServerError('API error(s): ' . implode("\n", $messages)); + } + + return $response->data; } public function getName(){ diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index 2f5565b1..c62a6595 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -2,6 +2,9 @@ class TwitterBridge extends BridgeAbstract { const NAME = 'Twitter Bridge'; const URI = 'https://twitter.com/'; + const API_URI = 'https://api.twitter.com'; + const GUEST_TOKEN_USES = 100; + const GUEST_TOKEN_EXPIRY = 300; // 5min const CACHE_TIMEOUT = 300; // 5min const DESCRIPTION = 'returns tweets'; const MAINTAINER = 'pmaziere'; @@ -72,6 +75,12 @@ EOD 'required' => false, 'type' => 'checkbox', 'title' => 'Hide retweets' + ), + 'nopinned' => array( + 'name' => 'Without pinned tweet', + 'required' => false, + 'type' => 'checkbox', + 'title' => 'Hide pinned tweet' ) ), 'By list' => array( @@ -92,6 +101,20 @@ EOD 'required' => false, 'title' => 'Specify term to search for' ) + ), + 'By list ID' => array( + 'listid' => array( + 'name' => 'List ID', + 'exampleValue' => '31748', + 'required' => true, + 'title' => 'Insert the list id' + ), + 'filter' => array( + 'name' => 'Filter', + 'exampleValue' => '#rss-bridge', + 'required' => false, + 'title' => 'Specify term to search for' + ) ) ); @@ -142,6 +165,8 @@ EOD break; case 'By list': return $this->getInput('list') . ' - Twitter list by ' . $this->getInput('user'); + case 'By list ID': + return 'Twitter List #' . $this->getInput('listid'); default: return parent::getName(); } return 'Twitter ' . $specific . $this->getInput($param); @@ -164,22 +189,46 @@ EOD . urlencode($this->getInput('user')) . '/lists/' . str_replace(' ', '-', strtolower($this->getInput('list'))); + case 'By list ID': + return self::URI + . 'i/lists/' + . urlencode($this->getInput('listid')); default: return parent::getURI(); } } + private function getApiURI() { + switch($this->queriedContext) { + case 'By keyword or hashtag': + return self::API_URI + . '/2/search/adaptive.json?q=' + . urlencode($this->getInput('q')) + . '&tweet_mode=extended&tweet_search_mode=live'; + case 'By username': + return self::API_URI + . '/2/timeline/profile/' + . $this->getRestId($this->getInput('u')) + . '.json?tweet_mode=extended'; + case 'By list': + return self::API_URI + . '/2/timeline/list.json?list_id=' + . $this->getListId($this->getInput('user'), $this->getInput('list')) + . '&tweet_mode=extended'; + case 'By list ID': + return self::API_URI + . '/2/timeline/list.json?list_id=' + . $this->getInput('listid') + . '&tweet_mode=extended'; + default: returnServerError('Invalid query context !'); + } + } + public function collectData(){ $html = ''; $page = $this->getURI(); + $data = json_decode($this->getApiContents($this->getApiURI())); - if(php_sapi_name() === 'cli' && empty(ini_get('curl.cainfo'))) { - $cookies = $this->getCookies($page); - $html = getSimpleHTMLDOM($page, array("Cookie: $cookies")); - } else { - $html = getSimpleHTMLDOM($page, array(), array(CURLOPT_COOKIEFILE => '')); - } - - if(!$html) { + if(!$data) { switch($this->queriedContext) { case 'By keyword or hashtag': returnServerError('No results for this query.'); @@ -192,75 +241,93 @@ EOD $hidePictures = $this->getInput('nopic'); - foreach($html->find('div.js-stream-tweet') as $tweet) { + $promotedTweetIds = array_reduce($data->timeline->instructions[0]->addEntries->entries, function($carry, $entry) { + if (!isset($entry->content->item)) { + return $carry; + } + $tweet = $entry->content->item->content->tweet; + if (isset($tweet->promotedMetadata)) { + $carry[] = $tweet->id; + } + return $carry; + }, array()); - // Skip retweets? - if($this->getInput('noretweet') - && $tweet->find('div.context span.js-retweet-text a', 0)) { + $hidePinned = $this->getInput('nopinned'); + if ($hidePinned) { + $pinnedTweetId = null; + if (isset($data->timeline->instructions[1]) && isset($data->timeline->instructions[1]->pinEntry)) { + $pinnedTweetId = $data->timeline->instructions[1]->pinEntry->entry->content->item->content->tweet->id; + } + } + + foreach($data->globalObjects->tweets as $tweet) { + + /* Debug::log('>>> ' . json_encode($tweet)); */ + // Skip spurious retweets + if (isset($tweet->retweeted_status_id_str) && substr($tweet->full_text, 0, 4) === 'RT @') { continue; } - // remove 'invisible' content - foreach($tweet->find('.invisible') as $invisible) { - $invisible->outertext = ''; + // Skip promoted tweets + if (in_array($tweet->id_str, $promotedTweetIds)) { + continue; } - // Skip protmoted tweets - $heading = $tweet->previousSibling(); - if(!is_null($heading) && - $heading->getAttribute('class') === 'promoted-tweet-heading' - ) { + // Skip pinned tweet + if ($hidePinned && $tweet->id_str === $pinnedTweetId) { continue; } $item = array(); // extract username and sanitize - $item['username'] = htmlspecialchars_decode($tweet->getAttribute('data-screen-name'), ENT_QUOTES); - // extract fullname (pseudonym) - $item['fullname'] = htmlspecialchars_decode($tweet->getAttribute('data-name'), ENT_QUOTES); - // get author + $user_info = $this->getUserInformation($tweet->user_id_str, $data->globalObjects); + + $item['username'] = $user_info->screen_name; + $item['fullname'] = $user_info->name; $item['author'] = $item['fullname'] . ' (@' . $item['username'] . ')'; - if($rt = $tweet->find('div.context span.js-retweet-text a', 0)) { - $item['author'] .= ' RT: @' . $rt->plaintext; + if (null !== $this->getInput('u') && strtolower($item['username']) != strtolower($this->getInput('u'))) { + $item['author'] .= ' RT: @' . $this->getInput('u'); } - // get avatar link - $item['avatar'] = $tweet->find('img', 0)->src; - // get TweetID - $item['id'] = $tweet->getAttribute('data-tweet-id'); - // get tweet link - $item['uri'] = self::URI . substr($tweet->find('a.js-permalink', 0)->getAttribute('href'), 1); + $item['avatar'] = $user_info->profile_image_url_https; + + $item['id'] = $tweet->id_str; + $item['uri'] = self::URI . $item['username'] . '/status/' . $item['id']; // extract tweet timestamp - $item['timestamp'] = $tweet->find('span.js-short-timestamp', 0)->getAttribute('data-time'); - // generate the title - $item['title'] = strip_tags($this->fixAnchorSpacing(htmlspecialchars_decode( - $tweet->find('p.js-tweet-text', 0), ENT_QUOTES), '')); + $item['timestamp'] = $tweet->created_at; - switch($this->queriedContext) { - case 'By list': - // Check if filter applies to list (using raw content) - if($this->getInput('filter')) { - if(stripos($tweet->find('p.js-tweet-text', 0)->plaintext, $this->getInput('filter')) === false) { - continue 2; // switch + for-loop! - } - } - break; - default: + // Convert plain text URLs into HTML hyperlinks + $cleanedTweet = $tweet->full_text; + $foundUrls = false; + + if (isset($tweet->entities->media)) { + foreach($tweet->entities->media as $media) { + $cleanedTweet = str_replace($media->url, + '' . $media->display_url . '', + $cleanedTweet); + $foundUrls = true; + } } + if (isset($tweet->entities->urls)) { + foreach($tweet->entities->urls as $url) { + $cleanedTweet = str_replace($url->url, + '' . $url->display_url . '', + $cleanedTweet); + $foundUrls = true; + } + } + if ($foundUrls === false) { + // fallback to regex'es + $reg_ex = '/(http|https|ftp|ftps)\:\/\/[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(\/\S*)?/'; + if(preg_match($reg_ex, $tweet->full_text, $url)) { + $cleanedTweet = preg_replace($reg_ex, + "{$url[0]} ", + $cleanedTweet); + } + } + // generate the title + $item['title'] = strip_tags($cleanedTweet); - $this->processContentLinks($tweet); - $this->processEmojis($tweet); - - // get tweet text - $cleanedTweet = str_replace( - 'href="/', - 'href="' . self::URI, - $tweet->find('p.js-tweet-text', 0)->innertext - ); - - // fix anchors missing spaces in-between - $cleanedTweet = $this->fixAnchorSpacing($cleanedTweet); - - // Add picture to content + // Add avatar $picture_html = ''; if(!$hidePictures) { $picture_html = <<getImageURI($tweet); - if(!$this->getInput('noimg') && !is_null($images)) { + // Get images + $media_html = ''; + if(isset($tweet->extended_entities->media) && !$this->getInput('noimg')) { + foreach($tweet->extended_entities->media as $media) { + switch($media->type) { + case 'photo': + $image = $media->media_url_https . '?name=orig'; + $display_image = $media->media_url_https; + // add enclosures + $item['enclosures'][] = $image; - foreach ($images as $image) { - - // Set image scaling - $image_orig = $this->getInput('noimgscaling') ? $image : $image . ':orig'; - $image_thumb = $this->getInput('noimgscaling') ? $image : $image . ':thumb'; - - // add enclosures - $item['enclosures'][] = $image_orig; - - $image_html .= << + $media_html .= << + referrerpolicy="no-referrer" + src="{$display_image}" /> EOD; + break; + case 'video': + case 'animated_gif': + if(isset($media->video_info)) { + $link = $media->expanded_url; + $poster = $media->media_url_https; + $video = null; + $maxBitrate = -1; + foreach($media->video_info->variants as $variant) { + $bitRate = isset($variant->bitrate) ? $variant->bitrate : -100; + if ($bitRate > $maxBitrate) { + $maxBitrate = $bitRate; + $video = $variant->url; + } + } + if(!is_null($video)) { + // add enclosures + $item['enclosures'][] = $video; + $item['enclosures'][] = $poster; + + $media_html .= <<Video +
    -
    {$image_html}
    +
    {$media_html}
    EOD; - // add quoted tweet - $quotedTweet = $tweet->find('div.QuoteTweet', 0); - if($quotedTweet) { - // get tweet text - $cleanedQuotedTweet = str_replace( - 'href="/', - 'href="' . self::URI, - $quotedTweet->find('div.tweet-text', 0)->innertext - ); - - $this->processContentLinks($quotedTweet); - $this->processEmojis($quotedTweet); - - // Add embeded image to content - $quotedImage_html = ''; - $quotedImages = $this->getQuotedImageURI($tweet); - - if(!$this->getInput('noimg') && !is_null($quotedImages)) { - - foreach ($quotedImages as $image) { - - // Set image scaling - $image_orig = $this->getInput('noimgscaling') ? $image : $image . ':orig'; - $image_thumb = $this->getInput('noimgscaling') ? $image : $image . ':thumb'; - - // add enclosures - $item['enclosures'][] = $image_orig; - - $quotedImage_html .= << - - -EOD; - } - } - - $item['content'] = << -
    -
    {$cleanedQuotedTweet}
    -
    -
    -
    {$quotedImage_html}
    -
    -EOD; - } $item['content'] = htmlspecialchars_decode($item['content'], ENT_QUOTES); // put out $this->items[] = $item; } + + usort($this->items, array('TwitterBridge', 'compareTweetId')); } - private function processEmojis($tweet){ - // process emojis (reduce size) - foreach($tweet->find('img.Emoji') as $img) { - $img->style .= ' height: 1em;'; + private static function compareTweetId($tweet1, $tweet2) { + return (intval($tweet1['id']) < intval($tweet2['id']) ? 1 : -1); + } + + //The aim of this function is to get an API key and a guest token + //This function takes 2 requests, and therefore is cached + private function getApiKey() { + + $cacheFac = new CacheFactory(); + $cacheFac->setWorkingDir(PATH_LIB_CACHES); + $r_cache = $cacheFac->create(Configuration::getConfig('cache', 'type')); + $r_cache->setScope(get_called_class()); + $r_cache->setKey(array('refresh')); + $data = $r_cache->loadData(); + + $refresh = null; + if($data === null) { + $refresh = time(); + $r_cache->saveData($refresh); + } else { + $refresh = $data; } - } - private function processContentLinks($tweet){ - // processing content links - foreach($tweet->find('a') as $link) { - if($link->hasAttribute('data-expanded-url')) { - $link->href = $link->getAttribute('data-expanded-url'); + $cacheFac = new CacheFactory(); + $cacheFac->setWorkingDir(PATH_LIB_CACHES); + $cache = $cacheFac->create(Configuration::getConfig('cache', 'type')); + $cache->setScope(get_called_class()); + $cache->setKey(array('api_key')); + $data = $cache->loadData(); + + $apiKey = null; + if($data === null || (time() - $refresh) > self::GUEST_TOKEN_EXPIRY) { + $twitterPage = getContents('https://twitter.com'); + + $jsLink = false; + $jsMainRegexArray = array( + '/(https:\/\/abs\.twimg\.com\/responsive-web\/web\/main\.[^\.]+\.js)/m', + '/(https:\/\/abs\.twimg\.com\/responsive-web\/web_legacy\/main\.[^\.]+\.js)/m', + '/(https:\/\/abs\.twimg\.com\/responsive-web\/client-web\/main\.[^\.]+\.js)/m', + '/(https:\/\/abs\.twimg\.com\/responsive-web\/client-web-legacy\/main\.[^\.]+\.js)/m', + ); + foreach ($jsMainRegexArray as $jsMainRegex) { + if (preg_match_all($jsMainRegex, $twitterPage, $jsMainMatches, PREG_SET_ORDER, 0)) { + $jsLink = $jsMainMatches[0][0]; + break; + } } - $link->removeAttribute('data-expanded-url'); - $link->removeAttribute('data-query-source'); - $link->removeAttribute('rel'); - $link->removeAttribute('class'); - $link->removeAttribute('target'); - $link->removeAttribute('title'); + if (!$jsLink) { + returnServerError('Could not locate main.js link'); + } + + $jsContent = getContents($jsLink); + $apiKeyRegex = '/([a-zA-Z0-9]{59}%[a-zA-Z0-9]{44})/m'; + preg_match_all($apiKeyRegex, $jsContent, $apiKeyMatches, PREG_SET_ORDER, 0); + $apiKey = $apiKeyMatches[0][0]; + $cache->saveData($apiKey); + } else { + $apiKey = $data; } + + $cacheFac2 = new CacheFactory(); + $cacheFac2->setWorkingDir(PATH_LIB_CACHES); + $gt_cache = $cacheFac->create(Configuration::getConfig('cache', 'type')); + $gt_cache->setScope(get_called_class()); + $gt_cache->setKey(array('guest_token')); + $guestTokenUses = $gt_cache->loadData(); + + $guestToken = null; + if($guestTokenUses === null || !is_array($guestTokenUses) || count($guestTokenUses) != 2 + || $guestTokenUses[0] <= 0 || (time() - $refresh) > self::GUEST_TOKEN_EXPIRY) { + $guestToken = $this->getGuestToken(); + $gt_cache->saveData(array(self::GUEST_TOKEN_USES, $guestToken)); + $r_cache->saveData(time()); + } else { + $guestTokenUses[0] -= 1; + $gt_cache->saveData($guestTokenUses); + $guestToken = $guestTokenUses[1]; + } + + return array($apiKey, $guestToken); + } - private function fixAnchorSpacing($content){ - // fix anchors missing spaces in-between - return str_replace( - 'getApiKey(); + $headers = array('authorization: Bearer ' . $apiKeys[0], + 'x-guest-token: ' . $apiKeys[1], + ); + return getContents($uri, $headers); + } - $container = $tweet->find('div.AdaptiveMedia-container', 0); + private function getRestId($username) { + $searchparams = urlencode('{"screen_name":"' . strtolower($username) . '", "withHighlightedLabel":true}'); + $searchURL = self::API_URI . '/graphql/-xfUfZsnR_zqjFd-IfrN5A/UserByScreenName?variables=' . $searchparams; + $searchResult = $this->getApiContents($searchURL); + $searchResult = json_decode($searchResult); + return $searchResult->data->user->rest_id; + } - if($container && $container->find('img', 0)) { - foreach ($container->find('img') as $img) { - $images[] = $img->src; + private function getListId($username, $listName) { + $searchparams = urlencode('{"screenName":"' + . strtolower($username) + . '", "listSlug": "' + . $listName + . '", "withHighlightedLabel":false}'); + $searchURL = self::API_URI . '/graphql/ErWsz9cObLel1BF-HjuBlA/ListBySlug?variables=' . $searchparams; + $searchResult = $this->getApiContents($searchURL); + $searchResult = json_decode($searchResult); + return $searchResult->data->user_by_screen_name->list->id_str; + } + + private function getUserInformation($userId, $apiData) { + foreach($apiData->users as $user) { + if($user->id_str == $userId) { + return $user; } } - - if (!empty($images)) { - return $images; - } - - return null; - } - - private function getQuotedImageURI($tweet){ - // Find media in tweet - $images = array(); - - $container = $tweet->find('div.QuoteMedia-container', 0); - - if($container && $container->find('img', 0)) { - foreach ($container->find('img') as $img) { - $images[] = $img->src; - } - } - - if (!empty($images)) { - return $images; - } - - return null; - } - - private function getCookies($pageURL){ - - $ctx = stream_context_create(array( - 'http' => array( - 'follow_location' => false - ) - ) - ); - $a = file_get_contents($pageURL, 0, $ctx); - - //First request to get the cookie - $cookies = ''; - foreach($http_response_header as $hdr) { - if(stripos($hdr, 'Set-Cookie') !== false) { - $cLine = explode(':', $hdr)[1]; - $cLine = explode(';', $cLine)[0]; - $cookies .= ';' . $cLine; - } - } - - return substr($cookies, 2); } } diff --git a/bridges/UnraidCommunityApplicationsBridge.php b/bridges/UnraidCommunityApplicationsBridge.php new file mode 100644 index 00000000..1ab06e3c --- /dev/null +++ b/bridges/UnraidCommunityApplicationsBridge.php @@ -0,0 +1,71 @@ +apps = getContents(self::APPSURI) + or returnServerError('Could not fetch JSON for apps.'); + $this->apps = json_decode($this->apps, true)['applist']; + } + + private function sortApps() { + Debug::log('Sorting applications/plugins'); + usort($this->apps, function($app1, $app2) { + return $app1['FirstSeen'] < $app2['FirstSeen'] ? 1 : -1; + }); + } + + public function collectData() { + $this->fetchApps(); + $this->sortApps(); + + Debug::log('Building RSS feed'); + foreach($this->apps as $app) { + if(!array_key_exists('Language', $app)) { + $item = array(); + $item['title'] = $app['Name']; + $item['timestamp'] = $app['FirstSeen']; + $item['author'] = explode('\'', $app['Repo'])[0]; + $item['categories'] = explode(' ', $app['Category']); + $item['content'] = ''; + + if(array_key_exists('Icon', $app)) + $item['content'] .= ''; + + if(array_key_exists('Overview', $app)) + $item['content'] .= '

    ' + . $app['Overview'] + . '

    '; + + if(array_key_exists('Project', $app)) + $item['uri'] = $app['Project']; + + if(array_key_exists('Registry', $app)) + $item['content'] .= '
    Docker Hub'; + + if(array_key_exists('Support', $app)) + $item['content'] .= '
    Support'; + + $this->items[] = $item; + + if(count($this->items) >= 15) + break; + } + } + } +} diff --git a/bridges/VarietyBridge.php b/bridges/VarietyBridge.php index a2e61700..8bc48f46 100644 --- a/bridges/VarietyBridge.php +++ b/bridges/VarietyBridge.php @@ -8,7 +8,7 @@ class VarietyBridge extends FeedExpander { const DESCRIPTION = 'RSS feed for Variety'; public function collectData(){ - $this->collectExpandableDatas('http://feeds.feedburner.com/variety/headlines', 15); + $this->collectExpandableDatas('https://feeds.feedburner.com/variety/headlines', 15); } protected function parseItem($newsItem){ diff --git a/bridges/VkBridge.php b/bridges/VkBridge.php index 713b86f3..89c11278 100644 --- a/bridges/VkBridge.php +++ b/bridges/VkBridge.php @@ -232,11 +232,16 @@ class VkBridge extends BridgeAbstract $div->outertext = ''; } - // get sign + // get sign / post author $post_author = $pageName; - foreach($post->find('a.wall_signed_by') as $a) { - $post_author = $a->innertext; - $a->outertext = ''; + $author_selectors = array('a.wall_signed_by', 'a.author'); + foreach($author_selectors as $author_selector) { + $a = $post->find($author_selector, 0); + if (is_object($a)) { + $post_author = $a->innertext; + $a->outertext = ''; + break; + } } // fix links and get post hashtags @@ -274,16 +279,24 @@ class VkBridge extends BridgeAbstract } } - if (is_object($post->find('div.copy_quote', 0))) { + $copy_quote = $post->find('div.copy_quote', 0); + if (is_object($copy_quote)) { if ($this->getInput('hide_reposts') === true) { continue; } - $copy_quote = $post->find('div.copy_quote', 0); if ($copy_post_header = $copy_quote->find('div.copy_post_header', 0)) { $copy_post_header->outertext = ''; } + + $second_copy_quote = $copy_quote->find('div.published_sec_quote', 0); + if (is_object($second_copy_quote)) { + $second_copy_quote_author = $second_copy_quote->find('a.copy_author', 0)->outertext; + $second_copy_quote_content = $second_copy_quote->find('div.copy_post_date', 0)->outertext; + $second_copy_quote->outertext = "
    Reposted ($second_copy_quote_author): $second_copy_quote_content"; + } + $copy_quote_author = $copy_quote->find('a.copy_author', 0)->outertext; $copy_quote_content = $copy_quote->innertext; - $copy_quote->outertext = "
    Reposted:
    $copy_quote_content"; + $copy_quote->outertext = "
    Reposted ($copy_quote_author):
    $copy_quote_content"; } $item = array(); @@ -333,7 +346,7 @@ class VkBridge extends BridgeAbstract $data = json_decode($arg, true); if ($data == null) return; - $thumb = $data['temp']['base'] . $data['temp']['x_'][0] . '.jpg'; + $thumb = $data['temp']['base'] . $data['temp']['x_'][0]; $original = ''; foreach(array('y_', 'z_', 'w_') as $key) { if (!isset($data['temp'][$key])) continue; @@ -343,7 +356,7 @@ class VkBridge extends BridgeAbstract } else { $base = $data['temp']['base']; } - $original = $base . $data['temp'][$key][0] . '.jpg'; + $original = $base . $data['temp'][$key][0]; } if ($original) { @@ -355,7 +368,7 @@ class VkBridge extends BridgeAbstract private function getTitle($content) { - preg_match('/^["\w\ \p{Cyrillic}\(\)\?#«»-]+/mu', htmlspecialchars_decode($content), $result); + preg_match('/^["\w\ \p{L}\(\)\?#«»-]+/mu', htmlspecialchars_decode($content), $result); if (count($result) == 0) return 'untitled'; return $result[0]; } @@ -374,6 +387,8 @@ class VkBridge extends BridgeAbstract } elseif (strstr($strdate, 'yesterday ') !== false) { $time = time() - 60 * 60 * 24; $strdate = date('d-m-Y', $time) . ' ' . $strdate; + } elseif ($date['month'] && intval(date('m')) < $date['month']) { + $strdate = $strdate . ' ' . (date('Y') - 1); } else { $strdate = $strdate . ' ' . date('Y'); } @@ -421,11 +436,11 @@ class VkBridge extends BridgeAbstract 'count' => 200 )); - if (isset($result['error'])) return; - - foreach($result['response']['items'] as $item) { - $video_id = strval($item['owner_id']) . '_' . strval($item['id']); - $this->videos[$video_id]['url'] = $item['player']; + if (!isset($result['error'])) { + foreach($result['response']['items'] as $item) { + $video_id = strval($item['owner_id']) . '_' . strval($item['id']); + $this->videos[$video_id]['url'] = $item['player']; + } } foreach($this->items as &$item) { diff --git a/bridges/WordPressBridge.php b/bridges/WordPressBridge.php index 1589c723..548e46e1 100644 --- a/bridges/WordPressBridge.php +++ b/bridges/WordPressBridge.php @@ -92,9 +92,9 @@ class WordPressBridge extends FeedExpander { returnClientError('The url parameter must either refer to http or https protocol.'); } try{ - $this->collectExpandableDatas($this->getURI() . '/feed/atom/'); + $this->collectExpandableDatas($this->getURI() . '/feed/atom/', 20); } catch (Exception $e) { - $this->collectExpandableDatas($this->getURI() . '/?feed=atom'); + $this->collectExpandableDatas($this->getURI() . '/?feed=atom', 20); } } diff --git a/bridges/WorldCosplayBridge.php b/bridges/WorldCosplayBridge.php new file mode 100644 index 00000000..21776ff1 --- /dev/null +++ b/bridges/WorldCosplayBridge.php @@ -0,0 +1,141 @@ +%s'; + + const ERR_CONTEXT = 'No context provided'; + const ERR_QUERY = 'Unable to query: %s'; + + const LIMIT_MIN = 1; + const LIMIT_MAX = 24; + + const PARAMETERS = array( + 'Character' => array( + 'cid' => array( + 'name' => 'Character ID', + 'type' => 'number', + 'required' => true, + 'title' => 'WorldCosplay character ID', + 'exampleValue' => 18204 + ) + ), + 'Cosplayer' => array( + 'uid' => array( + 'name' => 'Cosplayer ID', + 'type' => 'number', + 'required' => true, + 'title' => 'Cosplayer\'s WorldCosplay profile ID', + 'exampleValue' => 406782 + ) + ), + 'Series' => array( + 'sid' => array( + 'name' => 'Series ID', + 'type' => 'number', + 'required' => true, + 'title' => 'WorldCosplay series ID', + 'exampleValue' => 3139 + ) + ), + 'Tag' => array( + 'tid' => array( + 'name' => 'Tag ID', + 'type' => 'number', + 'required' => true, + 'title' => 'WorldCosplay tag ID', + 'exampleValue' => 33643 + ) + ), + 'global' => array( + 'limit' => array( + 'name' => 'Limit', + 'type' => 'number', + 'required' => false, + 'title' => 'Maximum number of photos to return', + 'exampleValue' => 5, + 'defaultValue' => 5 + ) + ) + ); + + public function collectData() { + $limit = $this->getInput('limit'); + $limit = min(self::LIMIT_MAX, max(self::LIMIT_MIN, $limit)); + switch($this->queriedContext) { + case 'Character': + $id = $this->getInput('cid'); + $url = self::API_CHARACTER; + break; + case 'Cosplayer': + $id = $this->getInput('uid'); + $url = self::API_COSPLAYER; + break; + case 'Series': + $id = $this->getInput('sid'); + $url = self::API_SERIES; + break; + case 'Tag': + $id = $this->getInput('tid'); + $url = self::API_TAG; + break; + default: + returnClientError(self::ERR_CONTEXT); + } + $url = self::URI . sprintf($url, $id, $limit); + + $json = json_decode(getContents($url)) + or returnServerError(sprintf(self::ERR_QUERY, $url)); + if($json->has_error) { + returnServerError($json->message); + } + $list = $json->list; + + foreach($list as $img) { + $item = array(); + $item['uri'] = self::URI . substr($img->photo->url, 1); + $item['title'] = $img->photo->subject; + $item['timestamp'] = $img->photo->created_at; + $item['author'] = $img->member->global_name; + $item['enclosures'] = array($img->photo->large_url); + $item['uid'] = $img->photo->id; + $item['content'] = sprintf( + self::CONTENT_HTML, + $item['uri'], + $item['enclosures'][0], + $item['title'], + $item['title'] + ); + $this->items[] = $item; + } + } + + public function getName() { + switch($this->queriedContext) { + case 'Character': + $id = $this->getInput('cid'); + break; + case 'Cosplayer': + $id = $this->getInput('uid'); + break; + case 'Series': + $id = $this->getInput('sid'); + break; + case 'Tag': + $id = $this->getInput('tid'); + break; + default: + return parent::getName(); + } + return sprintf('%s %u - ', $this->queriedContext, $id) . self::NAME; + } +} diff --git a/bridges/WosckerBridge.php b/bridges/WosckerBridge.php new file mode 100644 index 00000000..7f348531 --- /dev/null +++ b/bridges/WosckerBridge.php @@ -0,0 +1,51 @@ +getURI()) + or returnServerError('Could not request: ' . $this->getURI()); + + $date = $html->find('h1', 0)->plaintext; + $timestamp = $html->find('span.dateFont', 0)->plaintext . ' ' . $html->find('span.dateFont', 1)->plaintext; + + $item = array(); + $item['title'] = $date; + $item['content'] = $this->formatContent($html); + $item['timestamp'] = $timestamp; + + $this->items[] = $item; + } + + private function formatContent($html) { + $html->find('h1', 0)->outertext = ''; + + foreach ($html->find('hr') as $hr) { + $hr->outertext = ''; + } + + foreach ($html->find('div.betweenHeadline') as $div) { + $div->outertext = ''; + } + + foreach ($html->find('div.dividingBarrier') as $div) { + $div->outertext = ''; + } + + foreach ($html->find('h2') as $h2) { + $h2->outertext = '
    ' . $h2->innertext . '
    '; + } + + foreach ($html->find('h3') as $h3) { + $h3->outertext = $h3->innertext . '
    '; + } + + return $html->find('div.fullContentPiece', 0)->innertext; + } +} diff --git a/bridges/XPathBridge.php b/bridges/XPathBridge.php new file mode 100644 index 00000000..5aa280e0 --- /dev/null +++ b/bridges/XPathBridge.php @@ -0,0 +1,251 @@ +XPath expressions'; + const MAINTAINER = 'Niehztog'; + const PARAMETERS = array( + '' => array( + + 'url' => array( + 'name' => 'Enter web page URL', + 'title' => <<<"EOL" +You can specify any website URL which serves data suited for display in RSS feeds +(for example a news blog). +EOL + , 'type' => 'text', + 'exampleValue' => 'https://news.blizzard.com/en-en', + 'defaultValue' => 'https://news.blizzard.com/en-en', + 'required' => true + ), + + 'item' => array( + 'name' => 'Item selector', + 'title' => <<<"EOL" +Enter an XPath expression matching a list of dom nodes, each node containing one +feed article item in total (usually a surrounding <div> or <span> tag). This will +be the context nodes for all of the following expressions. This expression usually +starts with a single forward slash. +EOL + , 'type' => 'text', + 'exampleValue' => '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article', + 'defaultValue' => '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article', + 'required' => true + ), + + 'title' => array( + 'name' => 'Item title selector', + 'title' => <<<"EOL" +This expression should match a node contained within each article item node +containing the article headline. It should start with a dot followed by two +forward slashes, referring to any descendant nodes of the article item node. +EOL + , 'type' => 'text', + 'exampleValue' => './/div/div[2]/h2', + 'defaultValue' => './/div/div[2]/h2', + 'required' => true + ), + + 'content' => array( + 'name' => 'Item description selector', + 'title' => <<<"EOL" +This expression should match a node contained within each article item node +containing the article content or description. It should start with a dot +followed by two forward slashes, referring to any descendant nodes of the +article item node. +EOL + , 'type' => 'text', + 'exampleValue' => './/div[@class="ArticleListItem-description"]/div[@class="h6"]', + 'defaultValue' => './/div[@class="ArticleListItem-description"]/div[@class="h6"]', + 'required' => false + ), + + 'uri' => array( + 'name' => 'Item URL selector', + 'title' => <<<"EOL" +This expression should match a node's attribute containing the article URL +(usually the href attribute of an <a> tag). It should start with a dot +followed by two forward slashes, referring to any descendant nodes of +the article item node. Attributes can be selected by prepending an @ char +before the attributes name. +EOL + , 'type' => 'text', + 'exampleValue' => './/a[@class="ArticleLink ArticleLink"]/@href', + 'defaultValue' => './/a[@class="ArticleLink ArticleLink"]/@href', + 'required' => false + ), + + 'author' => array( + 'name' => 'Item author selector', + 'title' => <<<"EOL" +This expression should match a node contained within each article item +node containing the article author's name. It should start with a dot +followed by two forward slashes, referring to any descendant nodes of +the article item node. +EOL + , 'type' => 'text', + 'required' => false + ), + + 'timestamp' => array( + 'name' => 'Item date selector', + 'title' => <<<"EOL" +This expression should match a node or node's attribute containing the +article timestamp or date (parsable by PHP's strtotime function). It +should start with a dot followed by two forward slashes, referring to +any descendant nodes of the article item node. Attributes can be +selected by prepending an @ char before the attributes name. +EOL + , 'type' => 'text', + 'exampleValue' => './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp', + 'defaultValue' => './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp', + 'required' => false + ), + + 'enclosures' => array( + 'name' => 'Item image selector', + 'title' => <<<"EOL" +This expression should match a node's attribute containing an article +image URL (usually the src attribute of an <img> tag or a style +attribute). It should start with a dot followed by two forward slashes, +referring to any descendant nodes of the article item node. Attributes +can be selected by prepending an @ char before the attributes name. +EOL + , 'type' => 'text', + 'exampleValue' => './/div[@class="ArticleListItem-image"]/@style', + 'defaultValue' => './/div[@class="ArticleListItem-image"]/@style', + 'required' => false + ), + + 'categories' => array( + 'name' => 'Item category selector', + 'title' => <<<"EOL" +This expression should match a node or node's attribute contained +within each article item node containing the article category. This +could be inside <div> or <span> tags or sometimes be hidden +in a data attribute. It should start with a dot followed by two +forward slashes, referring to any descendant nodes of the article +item node. Attributes can be selected by prepending an @ char +before the attributes name. +EOL + , 'type' => 'text', + 'exampleValue' => './/div[@class="ArticleListItem-label"]', + 'defaultValue' => './/div[@class="ArticleListItem-label"]', + 'required' => false + ), + + 'fix_encoding' => array( + 'name' => 'Fix encoding', + 'title' => <<<"EOL" +Check this to fix feed encoding by invoking PHP's utf8_decode +function on all extracted texts. Try this in case you see "broken" or +"weird" characters in your feed where you'd normally expect umlauts +or any other non-ascii characters. +EOL + , 'type' => 'checkbox', + 'required' => false + ), + + ) + ); + + /** + * Source Web page URL (should provide either HTML or XML content) + * @return string + */ + protected function getSourceUrl(){ + return $this->encodeUri($this->getInput('url')); + } + + /** + * XPath expression for extracting the feed items from the source page + * @return string + */ + protected function getExpressionItem(){ + return urldecode($this->getInput('item')); + } + + /** + * XPath expression for extracting an item title from the item context + * @return string + */ + protected function getExpressionItemTitle(){ + return urldecode($this->getInput('title')); + } + + /** + * XPath expression for extracting an item's content from the item context + * @return string + */ + protected function getExpressionItemContent(){ + return urldecode($this->getInput('content')); + } + + /** + * XPath expression for extracting an item link from the item context + * @return string + */ + protected function getExpressionItemUri(){ + return urldecode($this->getInput('uri')); + } + + /** + * XPath expression for extracting an item author from the item context + * @return string + */ + protected function getExpressionItemAuthor(){ + return urldecode($this->getInput('author')); + } + + /** + * XPath expression for extracting an item timestamp from the item context + * @return string + */ + protected function getExpressionItemTimestamp(){ + return urldecode($this->getInput('timestamp')); + } + + /** + * XPath expression for extracting item enclosures (media content like + * images or movies) from the item context + * @return string + */ + protected function getExpressionItemEnclosures(){ + return urldecode($this->getInput('enclosures')); + } + + /** + * XPath expression for extracting an item category from the item context + * @return string + */ + protected function getExpressionItemCategories(){ + return urldecode($this->getInput('categories')); + } + + /** + * Fix encoding + * @return string + */ + protected function getSettingFixEncoding(){ + return $this->getInput('fix_encoding'); + } + + /** + * Fixes URL encoding issues in input URL's + * @param $uri + * @return string|string[] + */ + private function encodeUri($uri) + { + if (strpos($uri, 'https%3A%2F%2F') === 0 + || strpos($uri, 'http%3A%2F%2F') === 0) { + $uri = urldecode($uri); + } + + $uri = str_replace('|', '%7C', $uri); + + return $uri; + } +} diff --git a/bridges/ZDNetBridge.php b/bridges/ZDNetBridge.php index 75df3b15..13478029 100644 --- a/bridges/ZDNetBridge.php +++ b/bridges/ZDNetBridge.php @@ -185,7 +185,8 @@ class ZDNetBridge extends FeedExpander { '
    'URL de la série', 'type' => 'text', 'required' => true, - 'title' => 'URL d\'une série sans le https://wwv.zone-telechargement.net/', - 'exampleValue' => 'telecharger-series/31079-halt-and-catch-fire-saison-4-french-hd720p.html' + 'title' => 'URL d\'une série sans le https://www.zt-za.com/', + 'exampleValue' => 'telecharger-series/31079-halt-and-catch-fire-saison-4-french-hd720p.html'), + 'filter' => array( + 'name' => 'Type de contenu', + 'type' => 'list', + 'title' => 'Type de contenu à suivre : Téléchargement, Streaming ou les deux', + 'values' => array( + 'Streaming et Téléchargement' => 'both', + 'Téléchargement' => 'download', + 'Streaming' => 'streaming' + ), + 'defaultValue' => 'both' ) ) ); + // This is an URL that is not protected by robot protection for Direct Download + const UNPROTECTED_URI = 'https://www.zone-annuaire.com/'; + + // This is an URL that is not protected by robot protection for Streaming Links + const UNPROTECTED_URI_STREAMING = 'https://zone-telechargement.stream/'; + public function getIcon() { - return self::URI . '/templates/Default/images/favicon.ico'; + return self::UNPROTECTED_URI . '/templates/Default/images/favicon.ico'; } public function collectData(){ - $html = getSimpleHTMLDOM(self::URI . $this->getInput('url')) + $html = getSimpleHTMLDOM(self::UNPROTECTED_URI . $this->getInput('url')) or returnServerError('Could not request Zone Telechargement.'); + $filter = $this->getInput('filter'); + // Get the TV show title $qualityselector = 'div[style=font-size: 18px;margin: 10px auto;color:red;font-weight:bold;text-align:center;]'; $show = trim($html->find('div[class=smallsep]', 0)->next_sibling()->plaintext); $quality = trim(explode("\n", $html->find($qualityselector, 0)->plaintext)[0]); $this->showTitle = $show . ' ' . $quality; - // Get the post content - $linkshtml = $html->find('div[class=postinfo]', 0); - $episodes = array(); - $list = $linkshtml->find('a'); - // Construct the tabble of episodes using the links - foreach($list as $element) { - // Retrieve episode number from link text - $epnumber = explode(' ', $element->plaintext)[1]; - $hoster = $this->findLinkHoster($element); + // Handle the Direct Download links + if($filter == 'both' || $filter == 'download') { + // Get the post content + $linkshtml = $html->find('div[class=postinfo]', 0); - // Format the link and add the link to the corresponding episode table - $episodes[$epnumber][] = '' . $hoster . ' - ' - . $this->showTitle . ' Episode ' . $epnumber . ''; + $list = $linkshtml->find('a'); + // Construct the table of episodes using the links + foreach($list as $element) { + // Retrieve episode number from link text + $epnumber = explode(' ', $element->plaintext)[1]; + $hoster = $this->findLinkHoster($element); + // Format the link and add the link to the corresponding episode table + $episodes[$epnumber]['ddl'][] = '' . $hoster . ' - ' + . $this->showTitle . ' Episode ' . $epnumber . ''; + + } + } + + // Handle the Streaming links + if($filter == 'both' || $filter == 'streaming') { + // Get the post content, on the dedicated streaming website + $htmlstreaming = getSimpleHTMLDOM(self::UNPROTECTED_URI_STREAMING . $this->getInput('url')) + or returnServerError('Could not request Zone Telechargement.'); + // Get the HTML element containing all the links + $streaminglinkshtml = $htmlstreaming->find('p[style=background-color: #FECC00;]', 1)->parent()->next_sibling(); + // Get all streaming Links + $liststreaming = $streaminglinkshtml->find('a'); + foreach($liststreaming as $elementstreaming) { + // Retrieve the episode number from the link text + $epnumber = explode(' ', $elementstreaming->plaintext)[1]; + + // Format the link and add the link to the corresponding episode table + $episodes[$epnumber]['streaming'][] = '' + . $this->showTitle . ' Episode ' . $epnumber . ''; + } } // Finally construct the items array foreach($episodes as $epnum => $episode) { - $item = array(); - // Add every link available in the episode table separated by a
    tag - $item['content'] = implode('
    ', $episode); - $item['title'] = $this->showTitle . ' Episode ' . $epnum; - // As RSS Bridge use the URI as GUID they need to be unique : adding a md5 hash of the title element - // should geneerate unique URI to prevent confusion for RSS readers - $item['uri'] = self::URI . $this->getInput('url') . '#' . hash('md5', $item['title']); - // Insert the episode at the beginning of the item list, to show the newest episode first - array_unshift($this->items, $item); + // Handle the Direct Download links + if(array_key_exists('ddl', $episode)) { + $item = array(); + // Add every link available in the episode table separated by a
    tag + $item['content'] = implode('
    ', $episode['ddl']); + $item['title'] = $this->showTitle . ' Episode ' . $epnum . ' - Téléchargement'; + // Generate an unique UID by hashing the item title to prevent confusion for RSS readers + $item['uid'] = hash('md5', $item['title']); + $item['uri'] = self::URI . $this->getInput('url'); + // Insert the episode at the beginning of the item list, to show the newest episode first + array_unshift($this->items, $item); + } + // Handle the streaming link + if(array_key_exists('streaming', $episode)) { + $item = array(); + // Add every link available in the episode table separated by a
    tag + $item['content'] = implode('
    ', $episode['streaming']); + $item['title'] = $this->showTitle . ' Episode ' . $epnum . ' - Streaming'; + // Generate an unique UID by hashing the item title to prevent confusion for RSS readers + $item['uid'] = hash('md5', $item['title']); + $item['uri'] = self::URI . $this->getInput('url'); + // Insert the episode at the beginning of the item list, to show the newest episode first + array_unshift($this->items, $item); + } } } @@ -82,8 +137,6 @@ class ZoneTelechargementBridge extends BridgeAbstract { private function findLinkHoster($element) { // The hoster name is one level higher than the link tag : get the parent element $element = $element->parent(); - //echo "PARENT : $element \n"; - $continue = true; // Walk through all elements in the reverse order until finding the one with a div and that is not a
    while(!($element->find('div', 0) != null && $element->tag != 'br')) { $element = $element->prev_sibling(); diff --git a/cache/pages/.gitkeep b/cache/pages/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/cache/server/.gitkeep b/cache/server/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/caches/MemcachedCache.php b/caches/MemcachedCache.php index f69f10b0..b431279a 100644 --- a/caches/MemcachedCache.php +++ b/caches/MemcachedCache.php @@ -40,7 +40,7 @@ class MemcachedCache implements CacheInterface { if ($this->data) return $this->data; $result = $this->conn->get($this->getCacheKey()); if ($result === false) { - return false; + return null; } $this->time = $result['time']; diff --git a/composer.json b/composer.json index 3c03eeb0..7a386976 100644 --- a/composer.json +++ b/composer.json @@ -34,6 +34,7 @@ }, "suggest": { "ext-memcached": "Allows to use memcached as cache type", - "ext-sqlite3": "Allows to use an SQLite database for caching" + "ext-sqlite3": "Allows to use an SQLite database for caching", + "ext-dom": "Allows to use some bridges based on XPath expressions" } } diff --git a/formats/AtomFormat.php b/formats/AtomFormat.php index a1ecfcf4..80a388df 100644 --- a/formats/AtomFormat.php +++ b/formats/AtomFormat.php @@ -89,6 +89,10 @@ class AtomFormat extends FormatAbstract{ . PHP_EOL; } + $entryThumbnail = $item->thumbnail; + if (!empty($entryThumbnail)) + $entryThumbnail = ''; + $entryLinkAlternate = ''; if (!empty($entryUri)) { $entryLinkAlternate = '{$entryContent} {$entryEnclosures} {$entryCategories} + {$entryThumbnail} EOD; @@ -125,7 +130,7 @@ EOD; /* Data are prepared, now let's begin the "MAGIE !!!" */ $toReturn = << - + {$title} {$feedUrl} diff --git a/index.php b/index.php index ab4dabab..1c7dab13 100644 --- a/index.php +++ b/index.php @@ -19,7 +19,7 @@ if (isset($argv)) { } define('USER_AGENT', - 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20121202 Firefox/30.0(rss-bridge/' + 'Mozilla/5.0 (X11; Linux x86_64; rv:72.0) Gecko/20100101 Firefox/72.0(rss-bridge/' . Configuration::$VERSION . ';+' . REPOSITORY diff --git a/lib/BridgeAbstract.php b/lib/BridgeAbstract.php index c8ad79c5..ee885bf9 100644 --- a/lib/BridgeAbstract.php +++ b/lib/BridgeAbstract.php @@ -61,6 +61,13 @@ abstract class BridgeAbstract implements BridgeInterface { */ const CACHE_TIMEOUT = 3600; + /** + * Configuration for the bridge + * + * Use {@see BridgeAbstract::getConfiguration()} to read this parameter + */ + const CONFIGURATION = array(); + /** * Parameters for the bridge * @@ -238,6 +245,36 @@ abstract class BridgeAbstract implements BridgeInterface { } + /** + * Loads configuration for the bridge + * + * Returns errors and aborts execution if the provided configuration is + * invalid. + * + * @return void + */ + public function loadConfiguration() { + foreach(static::CONFIGURATION as $optionName => $optionValue) { + + $configurationOption = Configuration::getConfig(get_class($this), $optionName); + + if($configurationOption !== null) { + $this->configuration[$optionName] = $configurationOption; + continue; + } + + if(isset($optionValue['required']) && $optionValue['required'] === true) { + returnServerError( + 'Missing configuration option: ' + . $optionName + ); + } elseif(isset($optionValue['defaultValue'])) { + $this->configuration[$optionName] = $optionValue['defaultValue']; + } + + } + } + /** * Returns the value for the provided input * @@ -251,6 +288,19 @@ abstract class BridgeAbstract implements BridgeInterface { return $this->inputs[$this->queriedContext][$input]['value']; } + /** + * Returns the value for the selected configuration + * + * @param string $input The option name + * @return mixed|null The option value or null if the input is not defined + */ + public function getOption($name){ + if(!isset($this->configuration[$name])) { + return null; + } + return $this->configuration[$name]; + } + /** {@inheritdoc} */ public function getDescription(){ return static::DESCRIPTION; @@ -268,7 +318,12 @@ abstract class BridgeAbstract implements BridgeInterface { /** {@inheritdoc} */ public function getIcon(){ - return ''; + return static::URI . '/favicon.ico'; + } + + /** {@inheritdoc} */ + public function getConfiguration(){ + return static::CONFIGURATION; } /** {@inheritdoc} */ diff --git a/lib/BridgeCard.php b/lib/BridgeCard.php index 4353f643..0ed605bf 100644 --- a/lib/BridgeCard.php +++ b/lib/BridgeCard.php @@ -126,7 +126,7 @@ This bridge is not fetching its content through a secure connection
    '; if(isset($inputEntry['title'])) $form .= 'i'; else - $form .= ''; + $form .= ''; } $form .= ''; diff --git a/lib/BridgeInterface.php b/lib/BridgeInterface.php index d0069180..e9309dbf 100644 --- a/lib/BridgeInterface.php +++ b/lib/BridgeInterface.php @@ -58,6 +58,19 @@ interface BridgeInterface { */ public function collectData(); + /** + * Get the user's supplied configuration for the bridge + */ + public function getConfiguration(); + + /** + * Returns the value for the selected configuration + * + * @param string $input The option name + * @return mixed|null The option value or null if the input is not defined + */ + public function getOption($name); + /** * Returns the description * diff --git a/lib/BridgeList.php b/lib/BridgeList.php index fc92dfb7..8d7eb546 100644 --- a/lib/BridgeList.php +++ b/lib/BridgeList.php @@ -130,7 +130,7 @@ EOD; * @return string The searchbar */ private static function getSearchbar() { - $query = filter_input(INPUT_GET, 'q'); + $query = filter_input(INPUT_GET, 'q', FILTER_SANITIZE_SPECIAL_CHARS); return << diff --git a/lib/Configuration.php b/lib/Configuration.php index fa32be1f..16849e17 100644 --- a/lib/Configuration.php +++ b/lib/Configuration.php @@ -28,7 +28,7 @@ final class Configuration { * * @todo Replace this property by a constant. */ - public static $VERSION = 'dev.2019-12-01'; + public static $VERSION = 'dev.2020-11-10'; /** * Holds the configuration data. @@ -145,10 +145,7 @@ final class Configuration { // Replace default configuration with custom settings foreach(parse_ini_file(FILE_CONFIG, true, INI_SCANNER_TYPED) as $header => $section) { foreach($section as $key => $value) { - // Skip unknown sections and keys - if(array_key_exists($header, Configuration::$config) && array_key_exists($key, Configuration::$config[$header])) { - Configuration::$config[$header][$key] = $value; - } + Configuration::$config[$header][$key] = $value; } } } @@ -218,13 +215,11 @@ final class Configuration { * @return mixed|null The parameter value. */ public static function getConfig($section, $key) { - if(array_key_exists($section, self::$config) && array_key_exists($key, self::$config[$section])) { return self::$config[$section][$key]; } return null; - } /** @@ -244,9 +239,13 @@ final class Configuration { if(@is_readable($headFile)) { $revisionHashFile = '.git/' . substr(file_get_contents($headFile), 5, -1); - $branchName = explode('/', $revisionHashFile)[3]; - if(file_exists($revisionHashFile)) { - return 'git.' . $branchName . '.' . substr(file_get_contents($revisionHashFile), 0, 7); + $parts = explode('/', $revisionHashFile); + + if(isset($parts[3])) { + $branchName = $parts[3]; + if(file_exists($revisionHashFile)) { + return 'git.' . $branchName . '.' . substr(file_get_contents($revisionHashFile), 0, 7); + } } } diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php index 665620a4..bc6dc240 100644 --- a/lib/FeedExpander.php +++ b/lib/FeedExpander.php @@ -346,7 +346,7 @@ abstract class FeedExpander extends BridgeAbstract { if($attribute === 'isPermaLink' && ($value === 'true' || ( filter_var($feedItem->guid, FILTER_VALIDATE_URL) - && !filter_var($item['uri'], FILTER_VALIDATE_URL) + && (empty($item['uri']) || !filter_var($item['uri'], FILTER_VALIDATE_URL)) ) ) ) { diff --git a/lib/ParameterValidator.php b/lib/ParameterValidator.php index 149e8a40..12e07942 100644 --- a/lib/ParameterValidator.php +++ b/lib/ParameterValidator.php @@ -135,6 +135,9 @@ class ParameterValidator { return false; foreach($data as $name => $value) { + // Some RSS readers add a cache-busting parameter (_=) to feed URLs, detect and ignore them. + if ($name === '_') continue; + $registered = false; foreach($parameters as $context => $set) { if(array_key_exists($name, $set)) { diff --git a/lib/XPathAbstract.php b/lib/XPathAbstract.php new file mode 100644 index 00000000..e08f48d1 --- /dev/null +++ b/lib/XPathAbstract.php @@ -0,0 +1,583 @@ + or tag). This will + * be the context nodes for all of the following expressions. This expression usually + * starts with a single forward slash. + * + * Use {@see XPathAbstract::getExpressionItem()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM = ''; + + /** + * XPath expression for extracting an item title from the item context + * This expression should match a node contained within each article item node + * containing the article headline. It should start with a dot followed by two + * forward slashes, referring to any descendant nodes of the article item node. + * + * Use {@see XPathAbstract::getExpressionItemTitle()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM_TITLE = ''; + + /** + * XPath expression for extracting an item's content from the item context + * This expression should match a node contained within each article item node + * containing the article content or description. It should start with a dot + * followed by two forward slashes, referring to any descendant nodes of the + * article item node. + * + * Use {@see XPathAbstract::getExpressionItemContent()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM_CONTENT = ''; + + /** + * XPath expression for extracting an item link from the item context + * This expression should match a node's attribute containing the article URL + * (usually the href attribute of an tag). It should start with a dot + * followed by two forward slashes, referring to any descendant nodes of + * the article item node. Attributes can be selected by prepending an @ char + * before the attributes name. + * + * Use {@see XPathAbstract::getExpressionItemUri()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM_URI = ''; + + /** + * XPath expression for extracting an item author from the item context + * This expression should match a node contained within each article item + * node containing the article author's name. It should start with a dot + * followed by two forward slashes, referring to any descendant nodes of + * the article item node. + * + * Use {@see XPathAbstract::getExpressionItemAuthor()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM_AUTHOR = ''; + + /** + * XPath expression for extracting an item timestamp from the item context + * This expression should match a node or node's attribute containing the + * article timestamp or date (parsable by PHP's strtotime function). It + * should start with a dot followed by two forward slashes, referring to + * any descendant nodes of the article item node. Attributes can be + * selected by prepending an @ char before the attributes name. + * + * Use {@see XPathAbstract::getExpressionItemTimestamp()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM_TIMESTAMP = ''; + + /** + * XPath expression for extracting item enclosures (media content like + * images or movies) from the item context + * This expression should match a node's attribute containing an article + * image URL (usually the src attribute of an tag or a style + * attribute). It should start with a dot followed by two forward slashes, + * referring to any descendant nodes of the article item node. Attributes + * can be selected by prepending an @ char before the attributes name. + * + * Use {@see XPathAbstract::getExpressionItemEnclosures()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM_ENCLOSURES = ''; + + /** + * XPath expression for extracting an item category from the item context + * This expression should match a node or node's attribute contained + * within each article item node containing the article category. This + * could be inside
    or tags or sometimes be hidden + * in a data attribute. It should start with a dot followed by two + * forward slashes, referring to any descendant nodes of the article + * item node. Attributes can be selected by prepending an @ char + * before the attributes name. + * + * Use {@see XPathAbstract::getExpressionItemCategories()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM_CATEGORIES = ''; + + /** + * Fix encoding + * Set this to true for fixing feed encoding by invoking PHP's utf8_decode + * function on all extracted texts. Try this in case you see "broken" or + * "weird" characters in your feed where you'd normally expect umlauts + * or any other non-ascii characters. + * + * Use {@see XPathAbstract::getSettingFixEncoding()} to read this parameter + */ + const SETTING_FIX_ENCODING = false; + + /** + * Internal storage for resulting feed name, automatically detected + * @var string + */ + private $feedName; + + /** + * Internal storage for resulting feed name, automatically detected + * @var string + */ + private $feedUri; + + /** + * Internal storage for resulting feed favicon, automatically detected + * @var string + */ + private $feedIcon; + + public function getName(){ + return $this->feedName ?: parent::getName(); + } + + public function getURI() { + return $this->feedUri ?: parent::getURI(); + } + + public function getIcon() { + return $this->feedIcon ?: parent::getIcon(); + } + + /** + * Source Web page URL (should provide either HTML or XML content) + * @return string + */ + protected function getSourceUrl(){ + return static::FEED_SOURCE_URL; + } + + /** + * XPath expression for extracting the feed title from the source page + * @return string + */ + protected function getExpressionTitle(){ + return static::XPATH_EXPRESSION_FEED_TITLE; + } + + /** + * XPath expression for extracting the feed favicon from the source page + * @return string + */ + protected function getExpressionIcon(){ + return static::XPATH_EXPRESSION_FEED_ICON; + } + + /** + * XPath expression for extracting the feed items from the source page + * @return string + */ + protected function getExpressionItem(){ + return static::XPATH_EXPRESSION_ITEM; + } + + /** + * XPath expression for extracting an item title from the item context + * @return string + */ + protected function getExpressionItemTitle(){ + return static::XPATH_EXPRESSION_ITEM_TITLE; + } + + /** + * XPath expression for extracting an item's content from the item context + * @return string + */ + protected function getExpressionItemContent(){ + return static::XPATH_EXPRESSION_ITEM_CONTENT; + } + + /** + * XPath expression for extracting an item link from the item context + * @return string + */ + protected function getExpressionItemUri(){ + return static::XPATH_EXPRESSION_ITEM_URI; + } + + /** + * XPath expression for extracting an item author from the item context + * @return string + */ + protected function getExpressionItemAuthor(){ + return static::XPATH_EXPRESSION_ITEM_AUTHOR; + } + + /** + * XPath expression for extracting an item timestamp from the item context + * @return string + */ + protected function getExpressionItemTimestamp(){ + return static::XPATH_EXPRESSION_ITEM_TIMESTAMP; + } + + /** + * XPath expression for extracting item enclosures (media content like + * images or movies) from the item context + * @return string + */ + protected function getExpressionItemEnclosures(){ + return static::XPATH_EXPRESSION_ITEM_ENCLOSURES; + } + + /** + * XPath expression for extracting an item category from the item context + * @return string + */ + protected function getExpressionItemCategories(){ + return static::XPATH_EXPRESSION_ITEM_CATEGORIES; + } + + /** + * Fix encoding + * @return string + */ + protected function getSettingFixEncoding(){ + return static::SETTING_FIX_ENCODING; + } + + /** + * Internal helper method for quickly accessing all the user defined constants + * in derived classes + * + * @param $name + * @return bool|string + */ + private function getParam($name){ + switch($name) { + + case 'url': + return $this->getSourceUrl(); + case 'feed_title': + return $this->getExpressionTitle(); + case 'feed_icon': + return $this->getExpressionIcon(); + case 'item': + return $this->getExpressionItem(); + case 'title': + return $this->getExpressionItemTitle(); + case 'content': + return $this->getExpressionItemContent(); + case 'uri': + return $this->getExpressionItemUri(); + case 'author': + return $this->getExpressionItemAuthor(); + case 'timestamp': + return $this->getExpressionItemTimestamp(); + case 'enclosures': + return $this->getExpressionItemEnclosures(); + case 'categories': + return $this->getExpressionItemCategories(); + case 'fix_encoding': + return $this->getSettingFixEncoding(); + } + } + + /** + * Should provide the source website HTML content + * can be easily overwritten for example if special headers or auth infos are required + * @return string + */ + protected function provideWebsiteContent() { + return getContents($this->feedUri); + } + + /** + * Should provide the feeds title + * + * @param DOMXPath $xpath + * @return string + */ + protected function provideFeedTitle(DOMXPath $xpath) { + $title = $xpath->query($this->getParam('feed_title')); + if(count($title) === 1) { + return $this->getItemValueOrNodeValue($title); + } + } + + /** + * Should provide the URL of the feed's favicon + * + * @param DOMXPath $xpath + * @return string + */ + protected function provideFeedIcon(DOMXPath $xpath) { + $icon = $xpath->query($this->getParam('feed_icon')); + if(count($icon) === 1) { + return $this->cleanImageUrl($this->getItemValueOrNodeValue($icon)); + } + } + + /** + * Should provide the feed's items. + * + * @param DOMXPath $xpath + * @return DOMNodeList + */ + protected function provideFeedItems(DOMXPath $xpath) { + return @$xpath->query($this->getParam('item')); + } + + public function collectData() { + + $this->feedUri = $this->getParam('url'); + + $webPageHtml = new DOMDocument(); + libxml_use_internal_errors(true); + $webPageHtml->loadHTML($this->provideWebsiteContent()); + libxml_clear_errors(); + libxml_use_internal_errors(false); + + $xpath = new DOMXPath($webPageHtml); + + $this->feedName = $this->provideFeedTitle($xpath); + $this->feedIcon = $this->provideFeedIcon($xpath); + + $entries = $this->provideFeedItems($xpath); + if($entries === false) { + return; + } + + foreach ($entries as $entry) { + $item = new \FeedItem(); + foreach(array('title', 'content', 'uri', 'author', 'timestamp', 'enclosures', 'categories') as $param) { + + $expression = $this->getParam($param); + if('' === $expression) { + continue; + } + + //can be a string or DOMNodeList, depending on the expression result + $typedResult = @$xpath->evaluate($expression, $entry); + if ($typedResult === false || ($typedResult instanceof DOMNodeList && count($typedResult) === 0) + || (is_string($typedResult) && strlen(trim($typedResult)) === 0)) { + continue; + } + + $item->__set($param, $this->formatParamValue($param, $this->getItemValueOrNodeValue($typedResult))); + + } + + $itemId = $this->generateItemId($item); + if(null !== $itemId) { + $item->setUid($itemId); + } + + $this->items[] = $item; + } + + } + + /** + * @param $param + * @param $value + * @return string|array + */ + protected function formatParamValue($param, $value) + { + $value = $this->fixEncoding($value); + switch ($param) { + case 'title': + return $this->formatItemTitle($value); + case 'content': + return $this->formatItemContent($value); + case 'uri': + return $this->formatItemUri($value); + case 'author': + return $this->formatItemAuthor($value); + case 'timestamp': + return $this->formatItemTimestamp($value); + case 'enclosures': + return array($this->cleanImageUrl($value)); + case 'categories': + return array($this->fixEncoding($value)); + } + return $value; + } + + /** + * Formats the title of a feed item. Takes extracted raw title and returns it formatted + * as string. + * Can be easily overwritten for in case the value needs to be transformed into something + * else. + * @param string $value + * @return string + */ + protected function formatItemTitle($value) { + return $value; + } + + /** + * Formats the timestamp of a feed item. Takes extracted raw timestamp and returns unix + * timestamp as integer. + * Can be easily overwritten for example if a special format has to be expected on the + * source website. + * @param string $value + * @return string + */ + protected function formatItemContent($value) { + return $value; + } + + /** + * Formats the URI of a feed item. Takes extracted raw URI and returns it formatted + * as string. + * Can be easily overwritten for in case the value needs to be transformed into something + * else. + * @param string $value + * @return string + */ + protected function formatItemUri($value) { + if(strlen($value) === 0) { + return ''; + } + if(strpos($value, 'http://') === 0 || strpos($value, 'https://') === 0) { + return $value; + } + + return urljoin($this->feedUri, $value); + } + + /** + * Formats the author of a feed item. Takes extracted raw author and returns it formatted + * as string. + * Can be easily overwritten for in case the value needs to be transformed into something + * else. + * @param string $value + * @return string + */ + protected function formatItemAuthor($value) { + return $value; + } + + /** + * Formats the timestamp of a feed item. Takes extracted raw timestamp and returns unix + * timestamp as integer. + * Can be easily overwritten for example if a special format has to be expected on the + * source website. + * @param string $value + * @return false|int + */ + protected function formatItemTimestamp($value) { + return strtotime($value); + } + + /** + * Formats the enclosures of a feed item. Takes extracted raw enclosures and returns them + * formatted as array. + * Can be easily overwritten for in case the values need to be transformed into something + * else. + * @param string $value + * @return array + */ + protected function formatItemEnclosures($value) { + return array($this->cleanImageUrl($value)); + } + + /** + * Formats the categories of a feed item. Takes extracted raw categories and returns them + * formatted as array. + * Can be easily overwritten for in case the values need to be transformed into something + * else. + * @param string $value + * @return array + */ + protected function formatItemCategories($value) { + return array($value); + } + + /** + * @param $imageUrl + * @return string|void + */ + protected function cleanImageUrl($imageUrl) + { + $result = preg_match('~(?:http(?:s)?:)?[\/a-zA-Z0-9\-_\.]+\.(?:jpg|gif|png|jpeg|ico){1}~', $imageUrl, $matches); + if(1 !== $result) { + return; + } + return urljoin($this->feedUri, $matches[0]); + } + + /** + * @param $typedResult + * @return string + */ + protected function getItemValueOrNodeValue($typedResult) + { + if($typedResult instanceof DOMNodeList) { + $item = $typedResult->item(0); + if ($item instanceof DOMElement) { + return trim($item->nodeValue); + } elseif ($item instanceof DOMAttr) { + return trim($item->value); + } + } elseif(is_string($typedResult) && strlen($typedResult) > 0) { + return trim($typedResult); + } + returnServerError('Unknown type of XPath expression result.'); + } + + /** + * Fixes feed encoding by invoking PHP's utf8_decode function on extracted texts. + * Useful in case of "broken" or "weird" characters in the feed where you'd normally + * expect umlauts. + * + * @param $input + * @return string + */ + protected function fixEncoding($input) + { + return $this->getParam('fix_encoding') ? utf8_decode($input) : $input; + } + + /** + * Allows overriding default mechanism determining items Uid's + * + * @param FeedItem $item + * @return string|null + */ + protected function generateItemId(\FeedItem $item) { + return null; //auto generation + } +} diff --git a/lib/contents.php b/lib/contents.php index b1e31285..21d81e60 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -41,7 +41,7 @@ * 'content' if enabled. * * For more information see http://php.net/manual/en/function.curl-setopt.php - * @return string The contents. + * @return string|array The contents. */ function getContents($url, $header = array(), $opts = array(), $returnHeader = false){ Debug::log('Reading contents from "' . $url . '"'); @@ -82,6 +82,7 @@ function getContents($url, $header = array(), $opts = array(), $returnHeader = f $errorCode = 500; } else { $errorCode = 200; + $retVal['header'] = implode("\r\n", $http_response_header); } $curlError = ''; @@ -197,8 +198,7 @@ EOD if($lastError !== null) $lastError = $lastError['message']; returnError(<<` * tags when returning plaintext. - * @return string Contents as simplehtmldom object. + * @return false|simple_html_dom Contents as simplehtmldom object. */ function getSimpleHTMLDOM($url, $header = array(), @@ -282,7 +282,7 @@ function getSimpleHTMLDOM($url, * when returning plaintext. * @param string $defaultSpanText Specifies the replacement text for `` * tags when returning plaintext. - * @return string Contents as simplehtmldom object. + * @return false|simple_html_dom Contents as simplehtmldom object. */ function getSimpleHTMLDOMCached($url, $duration = 86400, diff --git a/lib/html.php b/lib/html.php index 13db97a4..892ecb17 100644 --- a/lib/html.php +++ b/lib/html.php @@ -195,7 +195,7 @@ function stripRecursiveHTMLSection($string, $tag_name, $tag_start){ } /** - * Convert Markdown into HTML. Only a subset of the Markdown syntax is implemented. + * Convert Markdown into HTML with Parsedown. * * @link https://daringfireball.net/projects/markdown/ Markdown * @link https://github.github.com/gfm/ GitHub Flavored Markdown Spec @@ -205,40 +205,6 @@ function stripRecursiveHTMLSection($string, $tag_name, $tag_start){ */ function markdownToHtml($string) { - //For more details about how these regex work: - // https://github.com/RSS-Bridge/rss-bridge/pull/802#discussion_r216138702 - // Images: https://regex101.com/r/JW9Evr/1 - // Links: https://regex101.com/r/eRGVe7/1 - // Bold: https://regex101.com/r/2p40Y0/1 - // Italic: https://regex101.com/r/xJkET9/1 - // Separator: https://regex101.com/r/ZBEqFP/1 - // Plain URL: https://regex101.com/r/2JHYwb/1 - // Site name: https://regex101.com/r/qIuKYE/1 - - $string = preg_replace('/\!\[([^\]]+)\]\(([^\) ]+)(?: [^\)]+)?\)/', '$1', $string); - $string = preg_replace('/\[([^\]]+)\]\(([^\)]+)\)/', '$1', $string); - $string = preg_replace('/\*\*(.*)\*\*/U', '$1', $string); - $string = preg_replace('/\*(.*)\*/U', '$1', $string); - $string = preg_replace('/__(.*)__/U', '$1', $string); - $string = preg_replace('/_(.*)_/U', '$1', $string); - $string = preg_replace('/[-]{6,99}/', '
    ', $string); - $string = str_replace(' ', '
    ', $string); - $string = preg_replace('/([^"])(https?:\/\/[^ "<]+)([^"])/', '$1$2$3', $string . ' '); - $string = preg_replace('/([^"\/])(www\.[^ "<]+)([^"])/', '$1$2$3', $string . ' '); - - //As the regex are not perfect, we need to fix and that are introduced in URLs - // Fixup regex : https://regex101.com/r/NTRPf6/1 - // Fixup regex : https://regex101.com/r/aNklRp/1 - - $count = 1; - while($count > 0) { - $string = preg_replace('/ (src|href)="([^"]+)([^"]+)"/U', ' $1="$2_$3"', $string, -1, $count); - } - - $count = 1; - while($count > 0) { - $string = preg_replace('/ (src|href)="([^"]+)<\/i>([^"]+)"/U', ' $1="$2_$3"', $string, -1, $count); - } - - return '
    ' . trim($string) . '
    '; + $Parsedown = new Parsedown(); + return $Parsedown->text($string); } diff --git a/lib/rssbridge.php b/lib/rssbridge.php index a025f229..2e7fbf2a 100644 --- a/lib/rssbridge.php +++ b/lib/rssbridge.php @@ -74,6 +74,7 @@ require_once PATH_LIB . 'BridgeList.php'; require_once PATH_LIB . 'ParameterValidator.php'; require_once PATH_LIB . 'ActionFactory.php'; require_once PATH_LIB . 'ActionAbstract.php'; +require_once PATH_LIB . 'XPathAbstract.php'; // Functions require_once PATH_LIB . 'html.php'; @@ -82,5 +83,6 @@ require_once PATH_LIB . 'contents.php'; // Vendor define('MAX_FILE_SIZE', 10000000); /* Allow larger files for simple_html_dom */ -require_once PATH_LIB_VENDOR . 'simplehtmldom/simple_html_dom.php'; +require_once PATH_LIB_VENDOR . 'parsedown/Parsedown.php'; require_once PATH_LIB_VENDOR . 'php-urljoin/src/urljoin.php'; +require_once PATH_LIB_VENDOR . 'simplehtmldom/simple_html_dom.php'; diff --git a/static/search.js b/static/search.js index daf32879..788286d8 100644 --- a/static/search.js +++ b/static/search.js @@ -36,7 +36,7 @@ function search() { } - if(textValue != null || uriValue != null) { + if(textValue != null && uriValue != null) { if(textValue.match(regexMatch) != null || uriValue.hostname.match(regexMatch) || diff --git a/static/style.css b/static/style.css index 5df2c517..80591e43 100644 --- a/static/style.css +++ b/static/style.css @@ -360,7 +360,7 @@ h5 { margin: 3px auto 0; } - .info { + .info, .no-info { display: none; } diff --git a/tests/samples/expectedAtomFormat/feed.common.xml b/tests/samples/expectedAtomFormat/feed.common.xml index 80cb0df4..aa6d0687 100644 --- a/tests/samples/expectedAtomFormat/feed.common.xml +++ b/tests/samples/expectedAtomFormat/feed.common.xml @@ -1,5 +1,5 @@ - + Sample feed with common data https://example.com/feed?type=common&items=4 diff --git a/tests/samples/expectedAtomFormat/feed.empty.xml b/tests/samples/expectedAtomFormat/feed.empty.xml index 371135b6..fc04304d 100644 --- a/tests/samples/expectedAtomFormat/feed.empty.xml +++ b/tests/samples/expectedAtomFormat/feed.empty.xml @@ -1,5 +1,5 @@ - + Sample feed with minimum data https://example.com/feed diff --git a/tests/samples/expectedAtomFormat/feed.emptyItems.xml b/tests/samples/expectedAtomFormat/feed.emptyItems.xml index 462a4e5c..989893fa 100644 --- a/tests/samples/expectedAtomFormat/feed.emptyItems.xml +++ b/tests/samples/expectedAtomFormat/feed.emptyItems.xml @@ -1,5 +1,5 @@ - + Sample feed with minimum data https://example.com/feed diff --git a/tests/samples/expectedAtomFormat/feed.microblog.xml b/tests/samples/expectedAtomFormat/feed.microblog.xml index a6264aee..32bc0273 100644 --- a/tests/samples/expectedAtomFormat/feed.microblog.xml +++ b/tests/samples/expectedAtomFormat/feed.microblog.xml @@ -1,5 +1,5 @@ - + Sample microblog feed https://example.com/feed diff --git a/vendor/parsedown/LICENSE.txt b/vendor/parsedown/LICENSE.txt new file mode 100644 index 00000000..8e7c764d --- /dev/null +++ b/vendor/parsedown/LICENSE.txt @@ -0,0 +1,20 @@ +The MIT License (MIT) + +Copyright (c) 2013-2018 Emanuil Rusev, erusev.com + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/parsedown/Parsedown.php b/vendor/parsedown/Parsedown.php new file mode 100644 index 00000000..1b9d6d5b --- /dev/null +++ b/vendor/parsedown/Parsedown.php @@ -0,0 +1,1712 @@ +DefinitionData = array(); + + # standardize line breaks + $text = str_replace(array("\r\n", "\r"), "\n", $text); + + # remove surrounding line breaks + $text = trim($text, "\n"); + + # split text into lines + $lines = explode("\n", $text); + + # iterate through lines to identify blocks + $markup = $this->lines($lines); + + # trim line breaks + $markup = trim($markup, "\n"); + + return $markup; + } + + # + # Setters + # + + function setBreaksEnabled($breaksEnabled) + { + $this->breaksEnabled = $breaksEnabled; + + return $this; + } + + protected $breaksEnabled; + + function setMarkupEscaped($markupEscaped) + { + $this->markupEscaped = $markupEscaped; + + return $this; + } + + protected $markupEscaped; + + function setUrlsLinked($urlsLinked) + { + $this->urlsLinked = $urlsLinked; + + return $this; + } + + protected $urlsLinked = true; + + function setSafeMode($safeMode) + { + $this->safeMode = (bool) $safeMode; + + return $this; + } + + protected $safeMode; + + protected $safeLinksWhitelist = array( + 'http://', + 'https://', + 'ftp://', + 'ftps://', + 'mailto:', + 'data:image/png;base64,', + 'data:image/gif;base64,', + 'data:image/jpeg;base64,', + 'irc:', + 'ircs:', + 'git:', + 'ssh:', + 'news:', + 'steam:', + ); + + # + # Lines + # + + protected $BlockTypes = array( + '#' => array('Header'), + '*' => array('Rule', 'List'), + '+' => array('List'), + '-' => array('SetextHeader', 'Table', 'Rule', 'List'), + '0' => array('List'), + '1' => array('List'), + '2' => array('List'), + '3' => array('List'), + '4' => array('List'), + '5' => array('List'), + '6' => array('List'), + '7' => array('List'), + '8' => array('List'), + '9' => array('List'), + ':' => array('Table'), + '<' => array('Comment', 'Markup'), + '=' => array('SetextHeader'), + '>' => array('Quote'), + '[' => array('Reference'), + '_' => array('Rule'), + '`' => array('FencedCode'), + '|' => array('Table'), + '~' => array('FencedCode'), + ); + + # ~ + + protected $unmarkedBlockTypes = array( + 'Code', + ); + + # + # Blocks + # + + protected function lines(array $lines) + { + $CurrentBlock = null; + + foreach ($lines as $line) + { + if (chop($line) === '') + { + if (isset($CurrentBlock)) + { + $CurrentBlock['interrupted'] = true; + } + + continue; + } + + if (strpos($line, "\t") !== false) + { + $parts = explode("\t", $line); + + $line = $parts[0]; + + unset($parts[0]); + + foreach ($parts as $part) + { + $shortage = 4 - mb_strlen($line, 'utf-8') % 4; + + $line .= str_repeat(' ', $shortage); + $line .= $part; + } + } + + $indent = 0; + + while (isset($line[$indent]) and $line[$indent] === ' ') + { + $indent ++; + } + + $text = $indent > 0 ? substr($line, $indent) : $line; + + # ~ + + $Line = array('body' => $line, 'indent' => $indent, 'text' => $text); + + # ~ + + if (isset($CurrentBlock['continuable'])) + { + $Block = $this->{'block'.$CurrentBlock['type'].'Continue'}($Line, $CurrentBlock); + + if (isset($Block)) + { + $CurrentBlock = $Block; + + continue; + } + else + { + if ($this->isBlockCompletable($CurrentBlock['type'])) + { + $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock); + } + } + } + + # ~ + + $marker = $text[0]; + + # ~ + + $blockTypes = $this->unmarkedBlockTypes; + + if (isset($this->BlockTypes[$marker])) + { + foreach ($this->BlockTypes[$marker] as $blockType) + { + $blockTypes []= $blockType; + } + } + + # + # ~ + + foreach ($blockTypes as $blockType) + { + $Block = $this->{'block'.$blockType}($Line, $CurrentBlock); + + if (isset($Block)) + { + $Block['type'] = $blockType; + + if ( ! isset($Block['identified'])) + { + $Blocks []= $CurrentBlock; + + $Block['identified'] = true; + } + + if ($this->isBlockContinuable($blockType)) + { + $Block['continuable'] = true; + } + + $CurrentBlock = $Block; + + continue 2; + } + } + + # ~ + + if (isset($CurrentBlock) and ! isset($CurrentBlock['type']) and ! isset($CurrentBlock['interrupted'])) + { + $CurrentBlock['element']['text'] .= "\n".$text; + } + else + { + $Blocks []= $CurrentBlock; + + $CurrentBlock = $this->paragraph($Line); + + $CurrentBlock['identified'] = true; + } + } + + # ~ + + if (isset($CurrentBlock['continuable']) and $this->isBlockCompletable($CurrentBlock['type'])) + { + $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock); + } + + # ~ + + $Blocks []= $CurrentBlock; + + unset($Blocks[0]); + + # ~ + + $markup = ''; + + foreach ($Blocks as $Block) + { + if (isset($Block['hidden'])) + { + continue; + } + + $markup .= "\n"; + $markup .= isset($Block['markup']) ? $Block['markup'] : $this->element($Block['element']); + } + + $markup .= "\n"; + + # ~ + + return $markup; + } + + protected function isBlockContinuable($Type) + { + return method_exists($this, 'block'.$Type.'Continue'); + } + + protected function isBlockCompletable($Type) + { + return method_exists($this, 'block'.$Type.'Complete'); + } + + # + # Code + + protected function blockCode($Line, $Block = null) + { + if (isset($Block) and ! isset($Block['type']) and ! isset($Block['interrupted'])) + { + return; + } + + if ($Line['indent'] >= 4) + { + $text = substr($Line['body'], 4); + + $Block = array( + 'element' => array( + 'name' => 'pre', + 'handler' => 'element', + 'text' => array( + 'name' => 'code', + 'text' => $text, + ), + ), + ); + + return $Block; + } + } + + protected function blockCodeContinue($Line, $Block) + { + if ($Line['indent'] >= 4) + { + if (isset($Block['interrupted'])) + { + $Block['element']['text']['text'] .= "\n"; + + unset($Block['interrupted']); + } + + $Block['element']['text']['text'] .= "\n"; + + $text = substr($Line['body'], 4); + + $Block['element']['text']['text'] .= $text; + + return $Block; + } + } + + protected function blockCodeComplete($Block) + { + $text = $Block['element']['text']['text']; + + $Block['element']['text']['text'] = $text; + + return $Block; + } + + # + # Comment + + protected function blockComment($Line) + { + if ($this->markupEscaped or $this->safeMode) + { + return; + } + + if (isset($Line['text'][3]) and $Line['text'][3] === '-' and $Line['text'][2] === '-' and $Line['text'][1] === '!') + { + $Block = array( + 'markup' => $Line['body'], + ); + + if (preg_match('/-->$/', $Line['text'])) + { + $Block['closed'] = true; + } + + return $Block; + } + } + + protected function blockCommentContinue($Line, array $Block) + { + if (isset($Block['closed'])) + { + return; + } + + $Block['markup'] .= "\n" . $Line['body']; + + if (preg_match('/-->$/', $Line['text'])) + { + $Block['closed'] = true; + } + + return $Block; + } + + # + # Fenced Code + + protected function blockFencedCode($Line) + { + if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([^`]+)?[ ]*$/', $Line['text'], $matches)) + { + $Element = array( + 'name' => 'code', + 'text' => '', + ); + + if (isset($matches[1])) + { + /** + * https://www.w3.org/TR/2011/WD-html5-20110525/elements.html#classes + * Every HTML element may have a class attribute specified. + * The attribute, if specified, must have a value that is a set + * of space-separated tokens representing the various classes + * that the element belongs to. + * [...] + * The space characters, for the purposes of this specification, + * are U+0020 SPACE, U+0009 CHARACTER TABULATION (tab), + * U+000A LINE FEED (LF), U+000C FORM FEED (FF), and + * U+000D CARRIAGE RETURN (CR). + */ + $language = substr($matches[1], 0, strcspn($matches[1], " \t\n\f\r")); + + $class = 'language-'.$language; + + $Element['attributes'] = array( + 'class' => $class, + ); + } + + $Block = array( + 'char' => $Line['text'][0], + 'element' => array( + 'name' => 'pre', + 'handler' => 'element', + 'text' => $Element, + ), + ); + + return $Block; + } + } + + protected function blockFencedCodeContinue($Line, $Block) + { + if (isset($Block['complete'])) + { + return; + } + + if (isset($Block['interrupted'])) + { + $Block['element']['text']['text'] .= "\n"; + + unset($Block['interrupted']); + } + + if (preg_match('/^'.$Block['char'].'{3,}[ ]*$/', $Line['text'])) + { + $Block['element']['text']['text'] = substr($Block['element']['text']['text'], 1); + + $Block['complete'] = true; + + return $Block; + } + + $Block['element']['text']['text'] .= "\n".$Line['body']; + + return $Block; + } + + protected function blockFencedCodeComplete($Block) + { + $text = $Block['element']['text']['text']; + + $Block['element']['text']['text'] = $text; + + return $Block; + } + + # + # Header + + protected function blockHeader($Line) + { + if (isset($Line['text'][1])) + { + $level = 1; + + while (isset($Line['text'][$level]) and $Line['text'][$level] === '#') + { + $level ++; + } + + if ($level > 6) + { + return; + } + + $text = trim($Line['text'], '# '); + + $Block = array( + 'element' => array( + 'name' => 'h' . min(6, $level), + 'text' => $text, + 'handler' => 'line', + ), + ); + + return $Block; + } + } + + # + # List + + protected function blockList($Line) + { + list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]+[.]'); + + if (preg_match('/^('.$pattern.'[ ]+)(.*)/', $Line['text'], $matches)) + { + $Block = array( + 'indent' => $Line['indent'], + 'pattern' => $pattern, + 'element' => array( + 'name' => $name, + 'handler' => 'elements', + ), + ); + + if($name === 'ol') + { + $listStart = stristr($matches[0], '.', true); + + if($listStart !== '1') + { + $Block['element']['attributes'] = array('start' => $listStart); + } + } + + $Block['li'] = array( + 'name' => 'li', + 'handler' => 'li', + 'text' => array( + $matches[2], + ), + ); + + $Block['element']['text'] []= & $Block['li']; + + return $Block; + } + } + + protected function blockListContinue($Line, array $Block) + { + if ($Block['indent'] === $Line['indent'] and preg_match('/^'.$Block['pattern'].'(?:[ ]+(.*)|$)/', $Line['text'], $matches)) + { + if (isset($Block['interrupted'])) + { + $Block['li']['text'] []= ''; + + $Block['loose'] = true; + + unset($Block['interrupted']); + } + + unset($Block['li']); + + $text = isset($matches[1]) ? $matches[1] : ''; + + $Block['li'] = array( + 'name' => 'li', + 'handler' => 'li', + 'text' => array( + $text, + ), + ); + + $Block['element']['text'] []= & $Block['li']; + + return $Block; + } + + if ($Line['text'][0] === '[' and $this->blockReference($Line)) + { + return $Block; + } + + if ( ! isset($Block['interrupted'])) + { + $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']); + + $Block['li']['text'] []= $text; + + return $Block; + } + + if ($Line['indent'] > 0) + { + $Block['li']['text'] []= ''; + + $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']); + + $Block['li']['text'] []= $text; + + unset($Block['interrupted']); + + return $Block; + } + } + + protected function blockListComplete(array $Block) + { + if (isset($Block['loose'])) + { + foreach ($Block['element']['text'] as &$li) + { + if (end($li['text']) !== '') + { + $li['text'] []= ''; + } + } + } + + return $Block; + } + + # + # Quote + + protected function blockQuote($Line) + { + if (preg_match('/^>[ ]?(.*)/', $Line['text'], $matches)) + { + $Block = array( + 'element' => array( + 'name' => 'blockquote', + 'handler' => 'lines', + 'text' => (array) $matches[1], + ), + ); + + return $Block; + } + } + + protected function blockQuoteContinue($Line, array $Block) + { + if ($Line['text'][0] === '>' and preg_match('/^>[ ]?(.*)/', $Line['text'], $matches)) + { + if (isset($Block['interrupted'])) + { + $Block['element']['text'] []= ''; + + unset($Block['interrupted']); + } + + $Block['element']['text'] []= $matches[1]; + + return $Block; + } + + if ( ! isset($Block['interrupted'])) + { + $Block['element']['text'] []= $Line['text']; + + return $Block; + } + } + + # + # Rule + + protected function blockRule($Line) + { + if (preg_match('/^(['.$Line['text'][0].'])([ ]*\1){2,}[ ]*$/', $Line['text'])) + { + $Block = array( + 'element' => array( + 'name' => 'hr' + ), + ); + + return $Block; + } + } + + # + # Setext + + protected function blockSetextHeader($Line, array $Block = null) + { + if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted'])) + { + return; + } + + if (chop($Line['text'], $Line['text'][0]) === '') + { + $Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2'; + + return $Block; + } + } + + # + # Markup + + protected function blockMarkup($Line) + { + if ($this->markupEscaped or $this->safeMode) + { + return; + } + + if (preg_match('/^<(\w[\w-]*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches)) + { + $element = strtolower($matches[1]); + + if (in_array($element, $this->textLevelElements)) + { + return; + } + + $Block = array( + 'name' => $matches[1], + 'depth' => 0, + 'markup' => $Line['text'], + ); + + $length = strlen($matches[0]); + + $remainder = substr($Line['text'], $length); + + if (trim($remainder) === '') + { + if (isset($matches[2]) or in_array($matches[1], $this->voidElements)) + { + $Block['closed'] = true; + + $Block['void'] = true; + } + } + else + { + if (isset($matches[2]) or in_array($matches[1], $this->voidElements)) + { + return; + } + + if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder)) + { + $Block['closed'] = true; + } + } + + return $Block; + } + } + + protected function blockMarkupContinue($Line, array $Block) + { + if (isset($Block['closed'])) + { + return; + } + + if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) # open + { + $Block['depth'] ++; + } + + if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) # close + { + if ($Block['depth'] > 0) + { + $Block['depth'] --; + } + else + { + $Block['closed'] = true; + } + } + + if (isset($Block['interrupted'])) + { + $Block['markup'] .= "\n"; + + unset($Block['interrupted']); + } + + $Block['markup'] .= "\n".$Line['body']; + + return $Block; + } + + # + # Reference + + protected function blockReference($Line) + { + if (preg_match('/^\[(.+?)\]:[ ]*?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $Line['text'], $matches)) + { + $id = strtolower($matches[1]); + + $Data = array( + 'url' => $matches[2], + 'title' => null, + ); + + if (isset($matches[3])) + { + $Data['title'] = $matches[3]; + } + + $this->DefinitionData['Reference'][$id] = $Data; + + $Block = array( + 'hidden' => true, + ); + + return $Block; + } + } + + # + # Table + + protected function blockTable($Line, array $Block = null) + { + if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted'])) + { + return; + } + + if (strpos($Block['element']['text'], '|') !== false and chop($Line['text'], ' -:|') === '') + { + $alignments = array(); + + $divider = $Line['text']; + + $divider = trim($divider); + $divider = trim($divider, '|'); + + $dividerCells = explode('|', $divider); + + foreach ($dividerCells as $dividerCell) + { + $dividerCell = trim($dividerCell); + + if ($dividerCell === '') + { + continue; + } + + $alignment = null; + + if ($dividerCell[0] === ':') + { + $alignment = 'left'; + } + + if (substr($dividerCell, - 1) === ':') + { + $alignment = $alignment === 'left' ? 'center' : 'right'; + } + + $alignments []= $alignment; + } + + # ~ + + $HeaderElements = array(); + + $header = $Block['element']['text']; + + $header = trim($header); + $header = trim($header, '|'); + + $headerCells = explode('|', $header); + + foreach ($headerCells as $index => $headerCell) + { + $headerCell = trim($headerCell); + + $HeaderElement = array( + 'name' => 'th', + 'text' => $headerCell, + 'handler' => 'line', + ); + + if (isset($alignments[$index])) + { + $alignment = $alignments[$index]; + + $HeaderElement['attributes'] = array( + 'style' => 'text-align: '.$alignment.';', + ); + } + + $HeaderElements []= $HeaderElement; + } + + # ~ + + $Block = array( + 'alignments' => $alignments, + 'identified' => true, + 'element' => array( + 'name' => 'table', + 'handler' => 'elements', + ), + ); + + $Block['element']['text'] []= array( + 'name' => 'thead', + 'handler' => 'elements', + ); + + $Block['element']['text'] []= array( + 'name' => 'tbody', + 'handler' => 'elements', + 'text' => array(), + ); + + $Block['element']['text'][0]['text'] []= array( + 'name' => 'tr', + 'handler' => 'elements', + 'text' => $HeaderElements, + ); + + return $Block; + } + } + + protected function blockTableContinue($Line, array $Block) + { + if (isset($Block['interrupted'])) + { + return; + } + + if ($Line['text'][0] === '|' or strpos($Line['text'], '|')) + { + $Elements = array(); + + $row = $Line['text']; + + $row = trim($row); + $row = trim($row, '|'); + + preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]+`|`)+/', $row, $matches); + + foreach ($matches[0] as $index => $cell) + { + $cell = trim($cell); + + $Element = array( + 'name' => 'td', + 'handler' => 'line', + 'text' => $cell, + ); + + if (isset($Block['alignments'][$index])) + { + $Element['attributes'] = array( + 'style' => 'text-align: '.$Block['alignments'][$index].';', + ); + } + + $Elements []= $Element; + } + + $Element = array( + 'name' => 'tr', + 'handler' => 'elements', + 'text' => $Elements, + ); + + $Block['element']['text'][1]['text'] []= $Element; + + return $Block; + } + } + + # + # ~ + # + + protected function paragraph($Line) + { + $Block = array( + 'element' => array( + 'name' => 'p', + 'text' => $Line['text'], + 'handler' => 'line', + ), + ); + + return $Block; + } + + # + # Inline Elements + # + + protected $InlineTypes = array( + '"' => array('SpecialCharacter'), + '!' => array('Image'), + '&' => array('SpecialCharacter'), + '*' => array('Emphasis'), + ':' => array('Url'), + '<' => array('UrlTag', 'EmailTag', 'Markup', 'SpecialCharacter'), + '>' => array('SpecialCharacter'), + '[' => array('Link'), + '_' => array('Emphasis'), + '`' => array('Code'), + '~' => array('Strikethrough'), + '\\' => array('EscapeSequence'), + ); + + # ~ + + protected $inlineMarkerList = '!"*_&[:<>`~\\'; + + # + # ~ + # + + public function line($text, $nonNestables=array()) + { + $markup = ''; + + # $excerpt is based on the first occurrence of a marker + + while ($excerpt = strpbrk($text, $this->inlineMarkerList)) + { + $marker = $excerpt[0]; + + $markerPosition = strpos($text, $marker); + + $Excerpt = array('text' => $excerpt, 'context' => $text); + + foreach ($this->InlineTypes[$marker] as $inlineType) + { + # check to see if the current inline type is nestable in the current context + + if ( ! empty($nonNestables) and in_array($inlineType, $nonNestables)) + { + continue; + } + + $Inline = $this->{'inline'.$inlineType}($Excerpt); + + if ( ! isset($Inline)) + { + continue; + } + + # makes sure that the inline belongs to "our" marker + + if (isset($Inline['position']) and $Inline['position'] > $markerPosition) + { + continue; + } + + # sets a default inline position + + if ( ! isset($Inline['position'])) + { + $Inline['position'] = $markerPosition; + } + + # cause the new element to 'inherit' our non nestables + + foreach ($nonNestables as $non_nestable) + { + $Inline['element']['nonNestables'][] = $non_nestable; + } + + # the text that comes before the inline + $unmarkedText = substr($text, 0, $Inline['position']); + + # compile the unmarked text + $markup .= $this->unmarkedText($unmarkedText); + + # compile the inline + $markup .= isset($Inline['markup']) ? $Inline['markup'] : $this->element($Inline['element']); + + # remove the examined text + $text = substr($text, $Inline['position'] + $Inline['extent']); + + continue 2; + } + + # the marker does not belong to an inline + + $unmarkedText = substr($text, 0, $markerPosition + 1); + + $markup .= $this->unmarkedText($unmarkedText); + + $text = substr($text, $markerPosition + 1); + } + + $markup .= $this->unmarkedText($text); + + return $markup; + } + + # + # ~ + # + + protected function inlineCode($Excerpt) + { + $marker = $Excerpt['text'][0]; + + if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(? strlen($matches[0]), + 'element' => array( + 'name' => 'code', + 'text' => $text, + ), + ); + } + } + + protected function inlineEmailTag($Excerpt) + { + if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<((mailto:)?\S+?@\S+?)>/i', $Excerpt['text'], $matches)) + { + $url = $matches[1]; + + if ( ! isset($matches[2])) + { + $url = 'mailto:' . $url; + } + + return array( + 'extent' => strlen($matches[0]), + 'element' => array( + 'name' => 'a', + 'text' => $matches[1], + 'attributes' => array( + 'href' => $url, + ), + ), + ); + } + } + + protected function inlineEmphasis($Excerpt) + { + if ( ! isset($Excerpt['text'][1])) + { + return; + } + + $marker = $Excerpt['text'][0]; + + if ($Excerpt['text'][1] === $marker and preg_match($this->StrongRegex[$marker], $Excerpt['text'], $matches)) + { + $emphasis = 'strong'; + } + elseif (preg_match($this->EmRegex[$marker], $Excerpt['text'], $matches)) + { + $emphasis = 'em'; + } + else + { + return; + } + + return array( + 'extent' => strlen($matches[0]), + 'element' => array( + 'name' => $emphasis, + 'handler' => 'line', + 'text' => $matches[1], + ), + ); + } + + protected function inlineEscapeSequence($Excerpt) + { + if (isset($Excerpt['text'][1]) and in_array($Excerpt['text'][1], $this->specialCharacters)) + { + return array( + 'markup' => $Excerpt['text'][1], + 'extent' => 2, + ); + } + } + + protected function inlineImage($Excerpt) + { + if ( ! isset($Excerpt['text'][1]) or $Excerpt['text'][1] !== '[') + { + return; + } + + $Excerpt['text']= substr($Excerpt['text'], 1); + + $Link = $this->inlineLink($Excerpt); + + if ($Link === null) + { + return; + } + + $Inline = array( + 'extent' => $Link['extent'] + 1, + 'element' => array( + 'name' => 'img', + 'attributes' => array( + 'src' => $Link['element']['attributes']['href'], + 'alt' => $Link['element']['text'], + ), + ), + ); + + $Inline['element']['attributes'] += $Link['element']['attributes']; + + unset($Inline['element']['attributes']['href']); + + return $Inline; + } + + protected function inlineLink($Excerpt) + { + $Element = array( + 'name' => 'a', + 'handler' => 'line', + 'nonNestables' => array('Url', 'Link'), + 'text' => null, + 'attributes' => array( + 'href' => null, + 'title' => null, + ), + ); + + $extent = 0; + + $remainder = $Excerpt['text']; + + if (preg_match('/\[((?:[^][]++|(?R))*+)\]/', $remainder, $matches)) + { + $Element['text'] = $matches[1]; + + $extent += strlen($matches[0]); + + $remainder = substr($remainder, $extent); + } + else + { + return; + } + + if (preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*"|\'[^\']*\'))?\s*[)]/', $remainder, $matches)) + { + $Element['attributes']['href'] = $matches[1]; + + if (isset($matches[2])) + { + $Element['attributes']['title'] = substr($matches[2], 1, - 1); + } + + $extent += strlen($matches[0]); + } + else + { + if (preg_match('/^\s*\[(.*?)\]/', $remainder, $matches)) + { + $definition = strlen($matches[1]) ? $matches[1] : $Element['text']; + $definition = strtolower($definition); + + $extent += strlen($matches[0]); + } + else + { + $definition = strtolower($Element['text']); + } + + if ( ! isset($this->DefinitionData['Reference'][$definition])) + { + return; + } + + $Definition = $this->DefinitionData['Reference'][$definition]; + + $Element['attributes']['href'] = $Definition['url']; + $Element['attributes']['title'] = $Definition['title']; + } + + return array( + 'extent' => $extent, + 'element' => $Element, + ); + } + + protected function inlineMarkup($Excerpt) + { + if ($this->markupEscaped or $this->safeMode or strpos($Excerpt['text'], '>') === false) + { + return; + } + + if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w[\w-]*[ ]*>/s', $Excerpt['text'], $matches)) + { + return array( + 'markup' => $matches[0], + 'extent' => strlen($matches[0]), + ); + } + + if ($Excerpt['text'][1] === '!' and preg_match('/^/s', $Excerpt['text'], $matches)) + { + return array( + 'markup' => $matches[0], + 'extent' => strlen($matches[0]), + ); + } + + if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w[\w-]*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches)) + { + return array( + 'markup' => $matches[0], + 'extent' => strlen($matches[0]), + ); + } + } + + protected function inlineSpecialCharacter($Excerpt) + { + if ($Excerpt['text'][0] === '&' and ! preg_match('/^&#?\w+;/', $Excerpt['text'])) + { + return array( + 'markup' => '&', + 'extent' => 1, + ); + } + + $SpecialCharacter = array('>' => 'gt', '<' => 'lt', '"' => 'quot'); + + if (isset($SpecialCharacter[$Excerpt['text'][0]])) + { + return array( + 'markup' => '&'.$SpecialCharacter[$Excerpt['text'][0]].';', + 'extent' => 1, + ); + } + } + + protected function inlineStrikethrough($Excerpt) + { + if ( ! isset($Excerpt['text'][1])) + { + return; + } + + if ($Excerpt['text'][1] === '~' and preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $Excerpt['text'], $matches)) + { + return array( + 'extent' => strlen($matches[0]), + 'element' => array( + 'name' => 'del', + 'text' => $matches[1], + 'handler' => 'line', + ), + ); + } + } + + protected function inlineUrl($Excerpt) + { + if ($this->urlsLinked !== true or ! isset($Excerpt['text'][2]) or $Excerpt['text'][2] !== '/') + { + return; + } + + if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE)) + { + $url = $matches[0][0]; + + $Inline = array( + 'extent' => strlen($matches[0][0]), + 'position' => $matches[0][1], + 'element' => array( + 'name' => 'a', + 'text' => $url, + 'attributes' => array( + 'href' => $url, + ), + ), + ); + + return $Inline; + } + } + + protected function inlineUrlTag($Excerpt) + { + if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches)) + { + $url = $matches[1]; + + return array( + 'extent' => strlen($matches[0]), + 'element' => array( + 'name' => 'a', + 'text' => $url, + 'attributes' => array( + 'href' => $url, + ), + ), + ); + } + } + + # ~ + + protected function unmarkedText($text) + { + if ($this->breaksEnabled) + { + $text = preg_replace('/[ ]*\n/', "
    \n", $text); + } + else + { + $text = preg_replace('/(?:[ ][ ]+|[ ]*\\\\)\n/', "
    \n", $text); + $text = str_replace(" \n", "\n", $text); + } + + return $text; + } + + # + # Handlers + # + + protected function element(array $Element) + { + if ($this->safeMode) + { + $Element = $this->sanitiseElement($Element); + } + + $markup = '<'.$Element['name']; + + if (isset($Element['attributes'])) + { + foreach ($Element['attributes'] as $name => $value) + { + if ($value === null) + { + continue; + } + + $markup .= ' '.$name.'="'.self::escape($value).'"'; + } + } + + $permitRawHtml = false; + + if (isset($Element['text'])) + { + $text = $Element['text']; + } + // very strongly consider an alternative if you're writing an + // extension + elseif (isset($Element['rawHtml'])) + { + $text = $Element['rawHtml']; + $allowRawHtmlInSafeMode = isset($Element['allowRawHtmlInSafeMode']) && $Element['allowRawHtmlInSafeMode']; + $permitRawHtml = !$this->safeMode || $allowRawHtmlInSafeMode; + } + + if (isset($text)) + { + $markup .= '>'; + + if (!isset($Element['nonNestables'])) + { + $Element['nonNestables'] = array(); + } + + if (isset($Element['handler'])) + { + $markup .= $this->{$Element['handler']}($text, $Element['nonNestables']); + } + elseif (!$permitRawHtml) + { + $markup .= self::escape($text, true); + } + else + { + $markup .= $text; + } + + $markup .= ''; + } + else + { + $markup .= ' />'; + } + + return $markup; + } + + protected function elements(array $Elements) + { + $markup = ''; + + foreach ($Elements as $Element) + { + $markup .= "\n" . $this->element($Element); + } + + $markup .= "\n"; + + return $markup; + } + + # ~ + + protected function li($lines) + { + $markup = $this->lines($lines); + + $trimmedMarkup = trim($markup); + + if ( ! in_array('', $lines) and substr($trimmedMarkup, 0, 3) === '

    ') + { + $markup = $trimmedMarkup; + $markup = substr($markup, 3); + + $position = strpos($markup, "

    "); + + $markup = substr_replace($markup, '', $position, 4); + } + + return $markup; + } + + # + # Deprecated Methods + # + + function parse($text) + { + $markup = $this->text($text); + + return $markup; + } + + protected function sanitiseElement(array $Element) + { + static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/'; + static $safeUrlNameToAtt = array( + 'a' => 'href', + 'img' => 'src', + ); + + if (isset($safeUrlNameToAtt[$Element['name']])) + { + $Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]); + } + + if ( ! empty($Element['attributes'])) + { + foreach ($Element['attributes'] as $att => $val) + { + # filter out badly parsed attribute + if ( ! preg_match($goodAttribute, $att)) + { + unset($Element['attributes'][$att]); + } + # dump onevent attribute + elseif (self::striAtStart($att, 'on')) + { + unset($Element['attributes'][$att]); + } + } + } + + return $Element; + } + + protected function filterUnsafeUrlInAttribute(array $Element, $attribute) + { + foreach ($this->safeLinksWhitelist as $scheme) + { + if (self::striAtStart($Element['attributes'][$attribute], $scheme)) + { + return $Element; + } + } + + $Element['attributes'][$attribute] = str_replace(':', '%3A', $Element['attributes'][$attribute]); + + return $Element; + } + + # + # Static Methods + # + + protected static function escape($text, $allowQuotes = false) + { + return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8'); + } + + protected static function striAtStart($string, $needle) + { + $len = strlen($needle); + + if ($len > strlen($string)) + { + return false; + } + else + { + return strtolower(substr($string, 0, $len)) === strtolower($needle); + } + } + + static function instance($name = 'default') + { + if (isset(self::$instances[$name])) + { + return self::$instances[$name]; + } + + $instance = new static(); + + self::$instances[$name] = $instance; + + return $instance; + } + + private static $instances = array(); + + # + # Fields + # + + protected $DefinitionData; + + # + # Read-Only + + protected $specialCharacters = array( + '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '|', + ); + + protected $StrongRegex = array( + '*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*[*])+?)[*]{2}(?![*])/s', + '_' => '/^__((?:\\\\_|[^_]|_[^_]*_)+?)__(?!_)/us', + ); + + protected $EmRegex = array( + '*' => '/^[*]((?:\\\\\*|[^*]|[*][*][^*]+?[*][*])+?)[*](?![*])/s', + '_' => '/^_((?:\\\\_|[^_]|__[^_]*__)+?)_(?!_)\b/us', + ); + + protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*(?:\s*=\s*(?:[^"\'=<>`\s]+|"[^"]*"|\'[^\']*\'))?'; + + protected $voidElements = array( + 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', + ); + + protected $textLevelElements = array( + 'a', 'br', 'bdo', 'abbr', 'blink', 'nextid', 'acronym', 'basefont', + 'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing', + 'i', 'rp', 'del', 'code', 'strike', 'marquee', + 'q', 'rt', 'ins', 'font', 'strong', + 's', 'tt', 'kbd', 'mark', + 'u', 'xm', 'sub', 'nobr', + 'sup', 'ruby', + 'var', 'span', + 'wbr', 'time', + ); +}