diff --git a/bridges/IPBBridge.php b/bridges/IPBBridge.php new file mode 100644 index 00000000..f3fa14f4 --- /dev/null +++ b/bridges/IPBBridge.php @@ -0,0 +1,307 @@ + array( + 'name' => 'URI', + 'type' => 'text', + 'required' => true, + 'title' => 'Insert forum, subforum or topic URI', + 'exampleValue' => 'https://invisioncommunity.com/forums/forum/499-feedback-and-ideas/' + ), + 'limit' => array( + 'name' => 'Limit', + 'type' => 'number', + 'required' => false, + 'title' => 'Specify how many pages should be fetched (-1: all)', + 'defaultValue' => 1 + ) + ) + ); + const CACHE_TIMEOUT = 3600; + + // Constants for internal use + const FORUM_TYPE_LIST_FILTER = '.cForumTopicTable'; + const FORUM_TYPE_TABLE_FILTER = '#forum_table'; + + const TOPIC_TYPE_ARTICLE = 'article'; + const TOPIC_TYPE_DIV = 'div.post_block'; + + public function getURI(){ + return $this->getInput('uri') ?: parent::getURI(); + } + + public function collectData(){ + // The URI cannot be the mainpage (or anything related) + switch(parse_url($this->getInput('uri'), PHP_URL_PATH)) { + case null: + case '/index.php': + returnClientError('Provided URI is invalid!'); + break; + default: + break; + } + + // Sanitize the URI (because else it won't work) + $uri = rtrim($this->getInput('uri'), '/'); // No trailing slashes! + + // Forums might provide feeds, though that's optional *facepalm* + // Let's check if there is a valid feed available + $headers = get_headers($uri . '.xml'); + + if($headers[0] === 'HTTP/1.1 200 OK') { // Heureka! It's a valid feed! + return $this->collectExpandableDatas($uri); + } + + // No valid feed, so do it the hard way + $html = getSimpleHTMLDOM($uri) + or returnServerError('Could not request ' . $this->getInput('uri') . '!'); + + $limit = $this->getInput('limit'); + + // Determine if this is a topic or a forum + switch(true) { + case $this->isTopic($html): + $this->collectTopic($html, $limit); + break; + case $this->isForum($html); + $this->collectForum($html); + break; + default: + returnClientError('Unknown type!'); + break; + } + } + + private function isForum($html){ + return !is_null($html->find('div[data-controller*=forums.front.forum.forumPage]', 0)) + || !is_null($html->find(static::FORUM_TYPE_TABLE_FILTER, 0)); + } + + private function isTopic($html){ + return !is_null($html->find('div[data-controller*=core.front.core.commentFeed]', 0)) + || !is_null($html->find(static::TOPIC_TYPE_DIV, 0)); + } + + private function collectForum($html){ + // There are multiple forum designs in use (depends on version?) + // 1 - Uses an ordered list (based on https://invisioncommunity.com/forums) + // 2 - Uses a table (based on https://onehallyu.com) + + switch(true) { + case !is_null($html->find(static::FORUM_TYPE_LIST_FILTER, 0)): + $this->collectForumList($html); + break; + case !is_null($html->find(static::FORUM_TYPE_TABLE_FILTER, 0)): + $this->collectForumTable($html); + break; + default: + returnClientError('Unknown forum format!'); + break; + } + } + + private function collectForumList($html){ + foreach($html->find(static::FORUM_TYPE_LIST_FILTER, 0)->children() as $row) { + // Columns: Title, Statistics, Last modified + $item = array(); + + $item['uri'] = $row->find('a', 0)->href; + $item['title'] = $row->find('a', 0)->title; + $item['author'] = $row->find('a', 1)->innertext; + $item['timestamp'] = strtotime($row->find('time', 0)->getAttribute('datetime')); + + $this->items[] = $item; + } + } + + private function collectForumTable($html){ + foreach($html->find(static::FORUM_TYPE_TABLE_FILTER, 0)->children() as $row) { + // Columns: Icon, Content, Preview, Statistics, Last modified + $item = array(); + + // Skip header row + if(!is_null($row->find('th', 0))) continue; + + $item['uri'] = $row->find('a', 0)->href; + $item['title'] = $row->find('.title', 0)->plaintext; + $item['timestamp'] = strtotime($row->find('[itemprop=dateCreated]', 0)->plaintext); + + $this->items[] = $item; + } + } + + private function collectTopic($html, $limit){ + // There are multiple topic designs in use (depends on version?) + // 1 - Uses articles (based on https://invisioncommunity.com/forums) + // 2 - Uses divs (based on https://onehallyu.com) + + switch(true) { + case !is_null($html->find(static::TOPIC_TYPE_ARTICLE, 0)): + $this->collectTopicHistory($html, $limit, 'collectTopicArticle'); + break; + case !is_null($html->find(static::TOPIC_TYPE_DIV, 0)): + $this->collectTopicHistory($html, $limit, 'collectTopicDiv'); + break; + default: + returnClientError('Unknown topic format!'); + break; + } + } + + private function collectTopicHistory($html, $limit, $callback){ + // Make sure the callback is valid! + if(!method_exists($this, $callback)) + returnServerError('Unknown function (\'' . $callback . '\')!'); + + $next = null; // Holds the URI of the next page + + do { + // Skip loading HTML on first iteration + if(!is_null($next)) { + $html = getSimpleHTMLDOMCached($next); + } + + $next = $this->$callback($html, is_null($next)); + $limit--; + } while(!is_null($next) && $limit <> 0); + } + + private function collectTopicArticle($html, $firstrun = true){ + $title = $html->find('h1.ipsType_pageTitle', 0)->plaintext; + + // Are we on last page? + if($firstrun && !is_null($html->find('.ipsPagination', 0))) { + $last = $html->find('.ipsPagination_last a', 0)->{'data-page'}; + $active = $html->find('.ipsPagination_active a', 0)->{'data-page'}; + + if($active !== $last) { + // Load last page into memory (cached) + $html = getSimpleHTMLDOMCached($html->find('.ipsPagination_last a', 0)->href); + } + } + + foreach(array_reverse($html->find(static::TOPIC_TYPE_ARTICLE)) as $article) { + $item = array(); + + $item['uri'] = $article->find('time', 0)->parent()->href; + $item['author'] = $article->find('aside a', 0)->plaintext; + $item['title'] = $item['author'] . ' - ' . $title; + $item['timestamp'] = strtotime($article->find('time', 0)->getAttribute('datetime')); + + $content = $article->find('[data-role=commentContent]', 0); + $content = $this->scaleImages($content); + $item['content'] = $this->fixContent($content); + $item['enclosures'] = $this->findImages($article->find('[data-role=commentContent]', 0)) ?: null; + + $this->items[] = $item; + } + + // Return whatever page comes next (previous, as we add in inverse order) + // Do we have a previous page? (inactive means no) + if(!is_null($html->find('li[class=ipsPagination_prev ipsPagination_inactive]', 0))) { + return null; // No, or no more + } elseif(!is_null($html->find('li[class=ipsPagination_prev]', 0))) { + return $html->find('.ipsPagination_prev a', 0)->href; + } + + return null; + } + + private function collectTopicDiv($html, $firstrun = true){ + $title = $html->find('h1.ipsType_pagetitle', 0)->plaintext; + + // Are we on last page? + if($firstrun && !is_null($html->find('.pagination', 0))) { + + $active = $html->find('li[class=page active]', 0)->plaintext; + + // There are two ways the 'last' page is displayed: + // - With a distict 'last' button (only if there are enough pages) + // - With a button for each page (use last button) + if(!is_null($html->find('li.last', 0))) { + $last = $html->find('li.last a', 0); + } else { + $last = $html->find('li[class=page] a', -1); + } + + if($active !== $last->plaintext) { + // Load last page into memory (cached) + $html = getSimpleHTMLDOMCached($last->href); + } + } + + foreach(array_reverse($html->find(static::TOPIC_TYPE_DIV)) as $article) { + $item = array(); + + $item['uri'] = $article->find('a[rel=bookmark]', 0)->href; + $item['author'] = $article->find('.author', 0)->plaintext; + $item['title'] = $item['author'] . ' - ' . $title; + $item['timestamp'] = strtotime($article->find('.published', 0)->getAttribute('title')); + + $content = $article->find('[itemprop=commentText]', 0); + $content = $this->scaleImages($content); + $item['content'] = $this->fixContent($content); + + $item['enclosures'] = $this->findImages($article->find('.post_body', 0)) ?: null; + + $this->items[] = $item; + } + + // Return whatever page comes next (previous, as we add in inverse order) + // Do we have a previous page? + if(!is_null($html->find('li.prev', 0))) { + return $html->find('li.prev a', 0)->href; + } + + return null; + } + + /** Returns all images from the provide HTML DOM */ + private function findImages($html){ + $images = array(); + + foreach($html->find('img') as $img) { + $images[] = $img->src; + } + + return $images; + } + + /** Sets the maximum width and height for all images */ + private function scaleImages($html, $width = 400, $height = 400){ + foreach($html->find('img') as $img) { + $img->style = "max-width: {$width}px; max-height: {$height}px;"; + } + + return $html; + } + + /** Removes all unnecessary tags and adds formatting */ + private function fixContent($html){ + + // Restore quote highlighting + foreach($html->find('blockquote') as $quote) { + $quote->style = <<innertext, + '