[Anidex] Fix content retrieval (#1693)

Anidex uses two separate domains, anidex.info and anidex.moe
anidex.info has ddos-guard so we need to request
anidex.moe with Host header set to anidex.info
This commit is contained in:
ORelio 2020-08-31 19:04:56 +02:00 committed by GitHub
parent e00bbe353f
commit 46abc18e87
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -3,7 +3,9 @@ class AnidexBridge extends BridgeAbstract {
const MAINTAINER = 'ORelio'; const MAINTAINER = 'ORelio';
const NAME = 'Anidex'; const NAME = 'Anidex';
const URI = 'https://anidex.info/'; const URI = 'http://anidex.info/'; // anidex.info has ddos-guard so we need to use anidex.moe
const ALTERNATE_URI = 'https://anidex.moe/'; // anidex.moe returns 301 unless Host is set to anidex.info
const ALTERNATE_HOST = 'anidex.info'; // Correct host for requesting anidex.moe without 301 redirect
const DESCRIPTION = 'Returns the newest torrents, with optional search criteria.'; const DESCRIPTION = 'Returns the newest torrents, with optional search criteria.';
const PARAMETERS = array( const PARAMETERS = array(
array( array(
@ -108,7 +110,7 @@ class AnidexBridge extends BridgeAbstract {
public function collectData() { public function collectData() {
// Build Search URL from user-provided parameters // Build Search URL from user-provided parameters
$search_url = self::URI . '?s=upload_timestamp&o=desc'; $search_url = self::ALTERNATE_URI . '?s=upload_timestamp&o=desc';
foreach (array('id', 'lang_id', 'group_id') as $param_name) { foreach (array('id', 'lang_id', 'group_id') as $param_name) {
$param = $this->getInput($param_name); $param = $this->getInput($param_name);
if (!empty($param) && intval($param) != 0 && ctype_digit(str_replace(',', '', $param))) { if (!empty($param) && intval($param) != 0 && ctype_digit(str_replace(',', '', $param))) {
@ -131,8 +133,16 @@ class AnidexBridge extends BridgeAbstract {
$opt[CURLOPT_COOKIE] = 'anidex_h_toggle=' . $h; $opt[CURLOPT_COOKIE] = 'anidex_h_toggle=' . $h;
} }
// We need to use a different Host HTTP header to reach the correct page on ALTERNATE_URI
$headers = array('Host: ' . self::ALTERNATE_HOST);
// The HTTPS certificate presented by anidex.moe is for anidex.info. We need to ignore this.
// As a consequence, the bridge is intentionally marked as insecure by setting self::URI to http://
$opt[CURLOPT_SSL_VERIFYHOST] = 0;
$opt[CURLOPT_SSL_VERIFYPEER] = 0;
// Retrieve torrent listing from search results, which does not contain torrent description // Retrieve torrent listing from search results, which does not contain torrent description
$html = getSimpleHTMLDOM($search_url, array(), $opt) $html = getSimpleHTMLDOM($search_url, $headers, $opt)
or returnServerError('Could not request Anidex: ' . $search_url); or returnServerError('Could not request Anidex: ' . $search_url);
$links = $html->find('a'); $links = $html->find('a');
$results = array(); $results = array();
@ -156,10 +166,11 @@ class AnidexBridge extends BridgeAbstract {
if ($torrent_id != 0 && ctype_digit($torrent_id)) { if ($torrent_id != 0 && ctype_digit($torrent_id)) {
//Retrieve data for this torrent ID //Retrieve data for this torrent ID
$item_uri = self::URI . 'torrent/' . $torrent_id; $item_browse_uri = self::URI . 'torrent/' . $torrent_id;
$item_fetch_uri = self::ALTERNATE_URI . 'torrent/' . $torrent_id;
//Retrieve full description from torrent page //Retrieve full description from torrent page (cached for 24 hours: 86400 seconds)
if ($item_html = getSimpleHTMLDOMCached($item_uri)) { if ($item_html = getSimpleHTMLDOMCached($item_fetch_uri, 86400, $headers, $opt)) {
//Retrieve data from page contents //Retrieve data from page contents
$item_title = str_replace(' (Torrent) - AniDex ', '', $item_html->find('title', 0)->plaintext); $item_title = str_replace(' (Torrent) - AniDex ', '', $item_html->find('title', 0)->plaintext);
@ -191,7 +202,7 @@ class AnidexBridge extends BridgeAbstract {
//Build and add final item //Build and add final item
$item = array(); $item = array();
$item['uri'] = $item_uri; $item['uri'] = $item_browse_uri;
$item['title'] = $item_title; $item['title'] = $item_title;
$item['author'] = $item_author; $item['author'] = $item_author;
$item['timestamp'] = $item_date; $item['timestamp'] = $item_date;