Merge branch 'autodetectFeedExpander' of https://framagit.org/peetah/rss-bridge

This commit is contained in:
logmanoriginal 2016-09-17 18:09:05 +02:00
commit a333226733
16 changed files with 52 additions and 34 deletions

View file

@ -11,7 +11,7 @@ class AcrimedBridge extends FeedExpander {
}
protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem);
$item = parent::parseItem($newsItem);
$hs = new HTMLSanitizer();
$articlePage = $this->getSimpleHTMLDOM($newsItem->link);

View file

@ -10,7 +10,7 @@ class CADBridge extends FeedExpander {
}
protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem);
$item = parent::parseItem($newsItem);
$item['content'] = $this->CADExtractContent($item['uri']);
return $item;
}

View file

@ -11,7 +11,7 @@ class CommonDreamsBridge extends FeedExpander {
}
protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem);
$item = parent::parseItem($newsItem);
$item['content'] = $this->CommonDreamsExtractContent($item['uri']);
return $item;
}

View file

@ -41,7 +41,7 @@ class DauphineLibereBridge extends FeedExpander {
}
protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem);
$item = parent::parseItem($newsItem);
$item['content'] = $this->ExtractContent($item['uri']);
return $item;
}

View file

@ -11,7 +11,7 @@ class DeveloppezDotComBridge extends FeedExpander {
}
protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem);
$item = parent::parseItem($newsItem);
$item['content'] = $this->DeveloppezDotComExtractContent($item['uri']);
return $item;
}

View file

@ -11,8 +11,8 @@ class FreenewsBridge extends FeedExpander {
}
protected function parseItem($newsItem) {
$item = $this->parseRSS_2_0_Item($newsItem);
$item = parent::parseItem($newsItem);
$articlePage = $this->getSimpleHTMLDOMCached($item['uri']);
$content = $articlePage->find('.post-container', 0);
$item['content'] = $content->innertext;

View file

@ -84,9 +84,9 @@ class FuturaSciencesBridge extends FeedExpander {
}
protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem);
$item = parent::parseItem($newsItem);
$item['uri'] = str_replace('#xtor=RSS-8', '', $item['uri']);
$article = $this->getSimpleHTMLDOMCached($item['uri'])
$article = $this->getSimpleHTMLDOMCached($item['uri'])
or $this->returnServerError('Could not request Futura-Sciences: ' . $item['uri']);
$item['content'] = $this->ExtractArticleContent($article);
$item['author'] = empty($this->ExtractAuthor($article)) ? $item['author'] : $this->ExtractAuthor($article);

View file

@ -11,7 +11,7 @@ class LeJournalDuGeekBridge extends FeedExpander {
}
protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem);
$item = parent::parseItem($newsItem);
$item['content'] = $this->LeJournalDuGeekExtractContent($item['uri']);
return $item;
}

View file

@ -11,8 +11,8 @@ class LeMondeInformatiqueBridge extends FeedExpander {
}
protected function parseItem($newsItem){
$item = $this->parseRSS_1_0_Item($newsItem);
$article_html = $this->getSimpleHTMLDOMCached($item['uri'])
$item = parent::parseItem($newsItem);
$article_html = $this->getSimpleHTMLDOMCached($item['uri'])
or $this->returnServerError('Could not request LeMondeInformatique: ' . $item['uri']);
$item['content'] = $this->CleanArticle($article_html->find('div#article', 0)->innertext);
$item['title'] = $article_html->find('h1.cleanprint-title', 0)->plaintext;

View file

@ -11,7 +11,7 @@ class LichessBridge extends FeedExpander {
}
protected function parseItem($newsItem){
$item = $this->parseATOMItem($newsItem);
$item = parent::parseItem($newsItem);
$item['content'] = $this->retrieve_lichess_post($item['uri']);
return $item;
}

View file

@ -11,7 +11,7 @@ class NextInpactBridge extends FeedExpander {
}
protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem);
$item = parent::parseItem($newsItem);
$item['content'] = $this->ExtractContent($item['uri']);
return $item;
}

View file

@ -30,14 +30,14 @@ class NextgovBridge extends FeedExpander {
}
protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem);
$item = parent::parseItem($newsItem);
$item['content'] = '';
$namespaces = $newsItem->getNamespaces(true);
if(isset($namespaces['media'])){
$media = $newsItem->children($namespaces['media']);
if(isset($media->content)){
if(isset($media->content)){
$attributes = $media->content->attributes();
$item['content'] = '<img src="' . $attributes['url'] . '">';
}
@ -56,7 +56,7 @@ class NextgovBridge extends FeedExpander {
}
private function ExtractContent($url){
$article = $this->getSimpleHTMLDOMCached($url)
$article = $this->getSimpleHTMLDOMCached($url)
or $this->returnServerError('Could not request Nextgov: ' . $url);
$contents = $article->find('div.wysiwyg', 0)->innertext;

View file

@ -11,7 +11,7 @@ class NiceMatinBridge extends FeedExpander {
}
protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem);
$item = parent::parseItem($newsItem);
$item['content'] = $this->NiceMatinExtractContent($item['uri']);
return $item;
}

View file

@ -11,7 +11,7 @@ class NumeramaBridge extends FeedExpander {
}
protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem);
$item = parent::parseItem($newsItem);
$item['content'] = $this->ExtractContent($item['uri']);
return $item;
}

View file

@ -11,7 +11,7 @@ class TheOatmealBridge extends FeedExpander{
}
protected function parseItem($newsItem) {
$item = $this->parseRSS_1_0_Item($newsItem);
$item = parent::parseItem($newsItem);
$articlePage = $this->getSimpleHTMLDOMCached($item['uri']);
$content = $articlePage->find('#comic', 0);

View file

@ -5,6 +5,7 @@ abstract class FeedExpander extends BridgeAbstract {
private $name;
private $uri;
private $description;
private $feedType;
public function collectExpandableDatas($url, $maxItems = -1){
if(empty($url)){
@ -21,22 +22,26 @@ abstract class FeedExpander extends BridgeAbstract {
$rssContent = simplexml_load_string($content);
$this->debugMessage('Detecting feed format/version');
if(isset($rssContent->channel[0])){
$this->debugMessage('Detected RSS format');
if(isset($rssContent->item[0])){
$this->debugMessage('Detected RSS 1.0 format');
$this->collect_RSS_1_0_data($rssContent, $maxItems);
} else {
$this->debugMessage('Detected RSS 0.9x or 2.0 format');
$this->collect_RSS_2_0_data($rssContent, $maxItems);
}
} elseif(isset($rssContent->entry[0])){
switch(true){
case isset($rssContent->item[0]):
$this->debugMessage('Detected RSS 1.0 format');
$this->feedType="RSS_1_0";
break;
case isset($rssContent->channel[0]):
$this->debugMessage('Detected RSS 0.9x or 2.0 format');
$this->feedType="RSS_2_0";
break;
case isset($rssContent->entry[0]):
$this->debugMessage('Detected ATOM format');
$this->collect_ATOM_data($rssContent, $maxItems);
} else {
$this->feedType="ATOM_1_0";
break;
default:
$this->debugMessage('Unknown feed format/version');
$this->returnServerError('The feed format is unknown!');
break;
}
$this->{'collect_'.$this->feedType.'_data'}($rssContent,$maxItems);
}
protected function collect_RSS_1_0_data($rssContent, $maxItems){
@ -62,7 +67,7 @@ abstract class FeedExpander extends BridgeAbstract {
}
}
protected function collect_ATOM_data($content, $maxItems){
protected function collect_ATOM_1_0_data($content, $maxItems){
$this->load_ATOM_feed_data($content);
foreach($content->entry as $item){
$this->debugMessage('parsing item ' . var_export($item, true));
@ -163,7 +168,20 @@ abstract class FeedExpander extends BridgeAbstract {
* @param $item the input rss item
* @return a RSS-Bridge Item, with (hopefully) the whole content)
*/
abstract protected function parseItem($item);
protected function parseItem($item){
switch($this->feedType){
case 'RSS_1_0':
return $this->parseRSS_1_0_Item($item);
break;
case 'RSS_2_0':
return $this->parseRSS_2_0_Item($item);
break;
case 'ATOM_1_0':
return $this->parseATOMItem($item);
break;
default: $this->returnClientError('Unknown version ' . $this->getInput('version') . '!');
}
}
public function getURI(){
return $this->uri;