[FourchanBridge] use 'category' and 'thread' parameters instead of full URI

This breaks compatibility with previous versions of FourChanBridge.
Bridges should never use full URIs as inputs as their validation will
always be more complicated, hence prone to security issues,
than rebuilding a clean URI from simple validated inputs.

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>
This commit is contained in:
Pierre Mazière 2016-09-04 14:07:58 +02:00
parent 671703cd37
commit fec864e010

View file

@ -3,35 +3,35 @@ class FourchanBridge extends BridgeAbstract{
const MAINTAINER = "mitsukarenai"; const MAINTAINER = "mitsukarenai";
const NAME = "4chan"; const NAME = "4chan";
const URI = "https://www.4chan.org/"; const URI = "https://boards.4chan.org/";
const DESCRIPTION = "Returns posts from the specified thread"; const DESCRIPTION = "Returns posts from the specified thread";
const PARAMETERS = array( array( const PARAMETERS = array( array(
't'=>array( 'c'=>array(
'name'=>'Thread URL', 'name'=>'Thread category',
'pattern'=>'(https:\/\/)?boards\.4chan\.org\/.*thread\/.*',
'required'=>true 'required'=>true
) ),
't'=>array(
'name'=>'Thread number',
'type'=>'number',
'required'=>true
)
)); ));
public function getURI(){
return static::URI.$this->getInput('c').'/thread/'.$this->getInput('t');
}
public function collectData(){ public function collectData(){
$thread = parse_url($this->getInput('t')) $html = $this->getSimpleHTMLDOM($this->getURI())
or $this->returnClientError('This URL seems malformed, please check it.'); or $this->returnServerError("Could not request 4chan, thread not found");
if($thread['host'] !== 'boards.4chan.org')
$this->returnClientError('4chan thread URL only.');
if(strpos($thread['path'], 'thread/') === FALSE)
$this->returnClientError('You must specify the thread URL.');
$url = 'https://boards.4chan.org'.$thread['path'];
$html = $this->getSimpleHTMLDOM($url)
or $this->returnServerError("Could not request 4chan, thread not found");
foreach($html->find('div.postContainer') as $element) { foreach($html->find('div.postContainer') as $element) {
$item = array(); $item = array();
$item['id'] = $element->find('.post', 0)->getAttribute('id'); $item['id'] = $element->find('.post', 0)->getAttribute('id');
$item['uri'] = $url.'#'.$item['id']; $item['uri'] = $this->getURI().'#'.$item['id'];
$item['timestamp'] = $element->find('span.dateTime', 0)->getAttribute('data-utc'); $item['timestamp'] = $element->find('span.dateTime', 0)->getAttribute('data-utc');
$item['author'] = $element->find('span.name', 0)->plaintext; $item['author'] = $element->find('span.name', 0)->plaintext;