Merge branch 'autodetectFeedExpander' of https://framagit.org/peetah/rss-bridge

This commit is contained in:
logmanoriginal 2016-09-17 18:09:05 +02:00
commit a333226733
16 changed files with 52 additions and 34 deletions

View file

@ -11,7 +11,7 @@ class AcrimedBridge extends FeedExpander {
} }
protected function parseItem($newsItem){ protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem); $item = parent::parseItem($newsItem);
$hs = new HTMLSanitizer(); $hs = new HTMLSanitizer();
$articlePage = $this->getSimpleHTMLDOM($newsItem->link); $articlePage = $this->getSimpleHTMLDOM($newsItem->link);

View file

@ -10,7 +10,7 @@ class CADBridge extends FeedExpander {
} }
protected function parseItem($newsItem){ protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem); $item = parent::parseItem($newsItem);
$item['content'] = $this->CADExtractContent($item['uri']); $item['content'] = $this->CADExtractContent($item['uri']);
return $item; return $item;
} }

View file

@ -11,7 +11,7 @@ class CommonDreamsBridge extends FeedExpander {
} }
protected function parseItem($newsItem){ protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem); $item = parent::parseItem($newsItem);
$item['content'] = $this->CommonDreamsExtractContent($item['uri']); $item['content'] = $this->CommonDreamsExtractContent($item['uri']);
return $item; return $item;
} }

View file

@ -41,7 +41,7 @@ class DauphineLibereBridge extends FeedExpander {
} }
protected function parseItem($newsItem){ protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem); $item = parent::parseItem($newsItem);
$item['content'] = $this->ExtractContent($item['uri']); $item['content'] = $this->ExtractContent($item['uri']);
return $item; return $item;
} }

View file

@ -11,7 +11,7 @@ class DeveloppezDotComBridge extends FeedExpander {
} }
protected function parseItem($newsItem){ protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem); $item = parent::parseItem($newsItem);
$item['content'] = $this->DeveloppezDotComExtractContent($item['uri']); $item['content'] = $this->DeveloppezDotComExtractContent($item['uri']);
return $item; return $item;
} }

View file

@ -11,8 +11,8 @@ class FreenewsBridge extends FeedExpander {
} }
protected function parseItem($newsItem) { protected function parseItem($newsItem) {
$item = $this->parseRSS_2_0_Item($newsItem); $item = parent::parseItem($newsItem);
$articlePage = $this->getSimpleHTMLDOMCached($item['uri']); $articlePage = $this->getSimpleHTMLDOMCached($item['uri']);
$content = $articlePage->find('.post-container', 0); $content = $articlePage->find('.post-container', 0);
$item['content'] = $content->innertext; $item['content'] = $content->innertext;

View file

@ -84,9 +84,9 @@ class FuturaSciencesBridge extends FeedExpander {
} }
protected function parseItem($newsItem){ protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem); $item = parent::parseItem($newsItem);
$item['uri'] = str_replace('#xtor=RSS-8', '', $item['uri']); $item['uri'] = str_replace('#xtor=RSS-8', '', $item['uri']);
$article = $this->getSimpleHTMLDOMCached($item['uri']) $article = $this->getSimpleHTMLDOMCached($item['uri'])
or $this->returnServerError('Could not request Futura-Sciences: ' . $item['uri']); or $this->returnServerError('Could not request Futura-Sciences: ' . $item['uri']);
$item['content'] = $this->ExtractArticleContent($article); $item['content'] = $this->ExtractArticleContent($article);
$item['author'] = empty($this->ExtractAuthor($article)) ? $item['author'] : $this->ExtractAuthor($article); $item['author'] = empty($this->ExtractAuthor($article)) ? $item['author'] : $this->ExtractAuthor($article);

View file

@ -11,7 +11,7 @@ class LeJournalDuGeekBridge extends FeedExpander {
} }
protected function parseItem($newsItem){ protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem); $item = parent::parseItem($newsItem);
$item['content'] = $this->LeJournalDuGeekExtractContent($item['uri']); $item['content'] = $this->LeJournalDuGeekExtractContent($item['uri']);
return $item; return $item;
} }

View file

@ -11,8 +11,8 @@ class LeMondeInformatiqueBridge extends FeedExpander {
} }
protected function parseItem($newsItem){ protected function parseItem($newsItem){
$item = $this->parseRSS_1_0_Item($newsItem); $item = parent::parseItem($newsItem);
$article_html = $this->getSimpleHTMLDOMCached($item['uri']) $article_html = $this->getSimpleHTMLDOMCached($item['uri'])
or $this->returnServerError('Could not request LeMondeInformatique: ' . $item['uri']); or $this->returnServerError('Could not request LeMondeInformatique: ' . $item['uri']);
$item['content'] = $this->CleanArticle($article_html->find('div#article', 0)->innertext); $item['content'] = $this->CleanArticle($article_html->find('div#article', 0)->innertext);
$item['title'] = $article_html->find('h1.cleanprint-title', 0)->plaintext; $item['title'] = $article_html->find('h1.cleanprint-title', 0)->plaintext;

View file

@ -11,7 +11,7 @@ class LichessBridge extends FeedExpander {
} }
protected function parseItem($newsItem){ protected function parseItem($newsItem){
$item = $this->parseATOMItem($newsItem); $item = parent::parseItem($newsItem);
$item['content'] = $this->retrieve_lichess_post($item['uri']); $item['content'] = $this->retrieve_lichess_post($item['uri']);
return $item; return $item;
} }

View file

@ -11,7 +11,7 @@ class NextInpactBridge extends FeedExpander {
} }
protected function parseItem($newsItem){ protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem); $item = parent::parseItem($newsItem);
$item['content'] = $this->ExtractContent($item['uri']); $item['content'] = $this->ExtractContent($item['uri']);
return $item; return $item;
} }

View file

@ -30,14 +30,14 @@ class NextgovBridge extends FeedExpander {
} }
protected function parseItem($newsItem){ protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem); $item = parent::parseItem($newsItem);
$item['content'] = ''; $item['content'] = '';
$namespaces = $newsItem->getNamespaces(true); $namespaces = $newsItem->getNamespaces(true);
if(isset($namespaces['media'])){ if(isset($namespaces['media'])){
$media = $newsItem->children($namespaces['media']); $media = $newsItem->children($namespaces['media']);
if(isset($media->content)){ if(isset($media->content)){
$attributes = $media->content->attributes(); $attributes = $media->content->attributes();
$item['content'] = '<img src="' . $attributes['url'] . '">'; $item['content'] = '<img src="' . $attributes['url'] . '">';
} }
@ -56,7 +56,7 @@ class NextgovBridge extends FeedExpander {
} }
private function ExtractContent($url){ private function ExtractContent($url){
$article = $this->getSimpleHTMLDOMCached($url) $article = $this->getSimpleHTMLDOMCached($url)
or $this->returnServerError('Could not request Nextgov: ' . $url); or $this->returnServerError('Could not request Nextgov: ' . $url);
$contents = $article->find('div.wysiwyg', 0)->innertext; $contents = $article->find('div.wysiwyg', 0)->innertext;

View file

@ -11,7 +11,7 @@ class NiceMatinBridge extends FeedExpander {
} }
protected function parseItem($newsItem){ protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem); $item = parent::parseItem($newsItem);
$item['content'] = $this->NiceMatinExtractContent($item['uri']); $item['content'] = $this->NiceMatinExtractContent($item['uri']);
return $item; return $item;
} }

View file

@ -11,7 +11,7 @@ class NumeramaBridge extends FeedExpander {
} }
protected function parseItem($newsItem){ protected function parseItem($newsItem){
$item = $this->parseRSS_2_0_Item($newsItem); $item = parent::parseItem($newsItem);
$item['content'] = $this->ExtractContent($item['uri']); $item['content'] = $this->ExtractContent($item['uri']);
return $item; return $item;
} }

View file

@ -11,7 +11,7 @@ class TheOatmealBridge extends FeedExpander{
} }
protected function parseItem($newsItem) { protected function parseItem($newsItem) {
$item = $this->parseRSS_1_0_Item($newsItem); $item = parent::parseItem($newsItem);
$articlePage = $this->getSimpleHTMLDOMCached($item['uri']); $articlePage = $this->getSimpleHTMLDOMCached($item['uri']);
$content = $articlePage->find('#comic', 0); $content = $articlePage->find('#comic', 0);

View file

@ -5,6 +5,7 @@ abstract class FeedExpander extends BridgeAbstract {
private $name; private $name;
private $uri; private $uri;
private $description; private $description;
private $feedType;
public function collectExpandableDatas($url, $maxItems = -1){ public function collectExpandableDatas($url, $maxItems = -1){
if(empty($url)){ if(empty($url)){
@ -21,22 +22,26 @@ abstract class FeedExpander extends BridgeAbstract {
$rssContent = simplexml_load_string($content); $rssContent = simplexml_load_string($content);
$this->debugMessage('Detecting feed format/version'); $this->debugMessage('Detecting feed format/version');
if(isset($rssContent->channel[0])){ switch(true){
$this->debugMessage('Detected RSS format'); case isset($rssContent->item[0]):
if(isset($rssContent->item[0])){ $this->debugMessage('Detected RSS 1.0 format');
$this->debugMessage('Detected RSS 1.0 format'); $this->feedType="RSS_1_0";
$this->collect_RSS_1_0_data($rssContent, $maxItems); break;
} else { case isset($rssContent->channel[0]):
$this->debugMessage('Detected RSS 0.9x or 2.0 format'); $this->debugMessage('Detected RSS 0.9x or 2.0 format');
$this->collect_RSS_2_0_data($rssContent, $maxItems); $this->feedType="RSS_2_0";
} break;
} elseif(isset($rssContent->entry[0])){ case isset($rssContent->entry[0]):
$this->debugMessage('Detected ATOM format'); $this->debugMessage('Detected ATOM format');
$this->collect_ATOM_data($rssContent, $maxItems); $this->feedType="ATOM_1_0";
} else { break;
default:
$this->debugMessage('Unknown feed format/version'); $this->debugMessage('Unknown feed format/version');
$this->returnServerError('The feed format is unknown!'); $this->returnServerError('The feed format is unknown!');
break;
} }
$this->{'collect_'.$this->feedType.'_data'}($rssContent,$maxItems);
} }
protected function collect_RSS_1_0_data($rssContent, $maxItems){ protected function collect_RSS_1_0_data($rssContent, $maxItems){
@ -62,7 +67,7 @@ abstract class FeedExpander extends BridgeAbstract {
} }
} }
protected function collect_ATOM_data($content, $maxItems){ protected function collect_ATOM_1_0_data($content, $maxItems){
$this->load_ATOM_feed_data($content); $this->load_ATOM_feed_data($content);
foreach($content->entry as $item){ foreach($content->entry as $item){
$this->debugMessage('parsing item ' . var_export($item, true)); $this->debugMessage('parsing item ' . var_export($item, true));
@ -163,7 +168,20 @@ abstract class FeedExpander extends BridgeAbstract {
* @param $item the input rss item * @param $item the input rss item
* @return a RSS-Bridge Item, with (hopefully) the whole content) * @return a RSS-Bridge Item, with (hopefully) the whole content)
*/ */
abstract protected function parseItem($item); protected function parseItem($item){
switch($this->feedType){
case 'RSS_1_0':
return $this->parseRSS_1_0_Item($item);
break;
case 'RSS_2_0':
return $this->parseRSS_2_0_Item($item);
break;
case 'ATOM_1_0':
return $this->parseATOMItem($item);
break;
default: $this->returnClientError('Unknown version ' . $this->getInput('version') . '!');
}
}
public function getURI(){ public function getURI(){
return $this->uri; return $this->uri;