Merge branch 'autodetectFeedExpander' of https://framagit.org/peetah/rss-bridge
This commit is contained in:
commit
a333226733
16 changed files with 52 additions and 34 deletions
|
@ -11,7 +11,7 @@ class AcrimedBridge extends FeedExpander {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseItem($newsItem){
|
protected function parseItem($newsItem){
|
||||||
$item = $this->parseRSS_2_0_Item($newsItem);
|
$item = parent::parseItem($newsItem);
|
||||||
|
|
||||||
$hs = new HTMLSanitizer();
|
$hs = new HTMLSanitizer();
|
||||||
$articlePage = $this->getSimpleHTMLDOM($newsItem->link);
|
$articlePage = $this->getSimpleHTMLDOM($newsItem->link);
|
||||||
|
|
|
@ -10,7 +10,7 @@ class CADBridge extends FeedExpander {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseItem($newsItem){
|
protected function parseItem($newsItem){
|
||||||
$item = $this->parseRSS_2_0_Item($newsItem);
|
$item = parent::parseItem($newsItem);
|
||||||
$item['content'] = $this->CADExtractContent($item['uri']);
|
$item['content'] = $this->CADExtractContent($item['uri']);
|
||||||
return $item;
|
return $item;
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,7 +11,7 @@ class CommonDreamsBridge extends FeedExpander {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseItem($newsItem){
|
protected function parseItem($newsItem){
|
||||||
$item = $this->parseRSS_2_0_Item($newsItem);
|
$item = parent::parseItem($newsItem);
|
||||||
$item['content'] = $this->CommonDreamsExtractContent($item['uri']);
|
$item['content'] = $this->CommonDreamsExtractContent($item['uri']);
|
||||||
return $item;
|
return $item;
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,7 +41,7 @@ class DauphineLibereBridge extends FeedExpander {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseItem($newsItem){
|
protected function parseItem($newsItem){
|
||||||
$item = $this->parseRSS_2_0_Item($newsItem);
|
$item = parent::parseItem($newsItem);
|
||||||
$item['content'] = $this->ExtractContent($item['uri']);
|
$item['content'] = $this->ExtractContent($item['uri']);
|
||||||
return $item;
|
return $item;
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,7 +11,7 @@ class DeveloppezDotComBridge extends FeedExpander {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseItem($newsItem){
|
protected function parseItem($newsItem){
|
||||||
$item = $this->parseRSS_2_0_Item($newsItem);
|
$item = parent::parseItem($newsItem);
|
||||||
$item['content'] = $this->DeveloppezDotComExtractContent($item['uri']);
|
$item['content'] = $this->DeveloppezDotComExtractContent($item['uri']);
|
||||||
return $item;
|
return $item;
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,8 +11,8 @@ class FreenewsBridge extends FeedExpander {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseItem($newsItem) {
|
protected function parseItem($newsItem) {
|
||||||
$item = $this->parseRSS_2_0_Item($newsItem);
|
$item = parent::parseItem($newsItem);
|
||||||
|
|
||||||
$articlePage = $this->getSimpleHTMLDOMCached($item['uri']);
|
$articlePage = $this->getSimpleHTMLDOMCached($item['uri']);
|
||||||
$content = $articlePage->find('.post-container', 0);
|
$content = $articlePage->find('.post-container', 0);
|
||||||
$item['content'] = $content->innertext;
|
$item['content'] = $content->innertext;
|
||||||
|
|
|
@ -84,9 +84,9 @@ class FuturaSciencesBridge extends FeedExpander {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseItem($newsItem){
|
protected function parseItem($newsItem){
|
||||||
$item = $this->parseRSS_2_0_Item($newsItem);
|
$item = parent::parseItem($newsItem);
|
||||||
$item['uri'] = str_replace('#xtor=RSS-8', '', $item['uri']);
|
$item['uri'] = str_replace('#xtor=RSS-8', '', $item['uri']);
|
||||||
$article = $this->getSimpleHTMLDOMCached($item['uri'])
|
$article = $this->getSimpleHTMLDOMCached($item['uri'])
|
||||||
or $this->returnServerError('Could not request Futura-Sciences: ' . $item['uri']);
|
or $this->returnServerError('Could not request Futura-Sciences: ' . $item['uri']);
|
||||||
$item['content'] = $this->ExtractArticleContent($article);
|
$item['content'] = $this->ExtractArticleContent($article);
|
||||||
$item['author'] = empty($this->ExtractAuthor($article)) ? $item['author'] : $this->ExtractAuthor($article);
|
$item['author'] = empty($this->ExtractAuthor($article)) ? $item['author'] : $this->ExtractAuthor($article);
|
||||||
|
|
|
@ -11,7 +11,7 @@ class LeJournalDuGeekBridge extends FeedExpander {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseItem($newsItem){
|
protected function parseItem($newsItem){
|
||||||
$item = $this->parseRSS_2_0_Item($newsItem);
|
$item = parent::parseItem($newsItem);
|
||||||
$item['content'] = $this->LeJournalDuGeekExtractContent($item['uri']);
|
$item['content'] = $this->LeJournalDuGeekExtractContent($item['uri']);
|
||||||
return $item;
|
return $item;
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,8 +11,8 @@ class LeMondeInformatiqueBridge extends FeedExpander {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseItem($newsItem){
|
protected function parseItem($newsItem){
|
||||||
$item = $this->parseRSS_1_0_Item($newsItem);
|
$item = parent::parseItem($newsItem);
|
||||||
$article_html = $this->getSimpleHTMLDOMCached($item['uri'])
|
$article_html = $this->getSimpleHTMLDOMCached($item['uri'])
|
||||||
or $this->returnServerError('Could not request LeMondeInformatique: ' . $item['uri']);
|
or $this->returnServerError('Could not request LeMondeInformatique: ' . $item['uri']);
|
||||||
$item['content'] = $this->CleanArticle($article_html->find('div#article', 0)->innertext);
|
$item['content'] = $this->CleanArticle($article_html->find('div#article', 0)->innertext);
|
||||||
$item['title'] = $article_html->find('h1.cleanprint-title', 0)->plaintext;
|
$item['title'] = $article_html->find('h1.cleanprint-title', 0)->plaintext;
|
||||||
|
|
|
@ -11,7 +11,7 @@ class LichessBridge extends FeedExpander {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseItem($newsItem){
|
protected function parseItem($newsItem){
|
||||||
$item = $this->parseATOMItem($newsItem);
|
$item = parent::parseItem($newsItem);
|
||||||
$item['content'] = $this->retrieve_lichess_post($item['uri']);
|
$item['content'] = $this->retrieve_lichess_post($item['uri']);
|
||||||
return $item;
|
return $item;
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,7 +11,7 @@ class NextInpactBridge extends FeedExpander {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseItem($newsItem){
|
protected function parseItem($newsItem){
|
||||||
$item = $this->parseRSS_2_0_Item($newsItem);
|
$item = parent::parseItem($newsItem);
|
||||||
$item['content'] = $this->ExtractContent($item['uri']);
|
$item['content'] = $this->ExtractContent($item['uri']);
|
||||||
return $item;
|
return $item;
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,14 +30,14 @@ class NextgovBridge extends FeedExpander {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseItem($newsItem){
|
protected function parseItem($newsItem){
|
||||||
$item = $this->parseRSS_2_0_Item($newsItem);
|
$item = parent::parseItem($newsItem);
|
||||||
|
|
||||||
$item['content'] = '';
|
$item['content'] = '';
|
||||||
|
|
||||||
$namespaces = $newsItem->getNamespaces(true);
|
$namespaces = $newsItem->getNamespaces(true);
|
||||||
if(isset($namespaces['media'])){
|
if(isset($namespaces['media'])){
|
||||||
$media = $newsItem->children($namespaces['media']);
|
$media = $newsItem->children($namespaces['media']);
|
||||||
if(isset($media->content)){
|
if(isset($media->content)){
|
||||||
$attributes = $media->content->attributes();
|
$attributes = $media->content->attributes();
|
||||||
$item['content'] = '<img src="' . $attributes['url'] . '">';
|
$item['content'] = '<img src="' . $attributes['url'] . '">';
|
||||||
}
|
}
|
||||||
|
@ -56,7 +56,7 @@ class NextgovBridge extends FeedExpander {
|
||||||
}
|
}
|
||||||
|
|
||||||
private function ExtractContent($url){
|
private function ExtractContent($url){
|
||||||
$article = $this->getSimpleHTMLDOMCached($url)
|
$article = $this->getSimpleHTMLDOMCached($url)
|
||||||
or $this->returnServerError('Could not request Nextgov: ' . $url);
|
or $this->returnServerError('Could not request Nextgov: ' . $url);
|
||||||
|
|
||||||
$contents = $article->find('div.wysiwyg', 0)->innertext;
|
$contents = $article->find('div.wysiwyg', 0)->innertext;
|
||||||
|
|
|
@ -11,7 +11,7 @@ class NiceMatinBridge extends FeedExpander {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseItem($newsItem){
|
protected function parseItem($newsItem){
|
||||||
$item = $this->parseRSS_2_0_Item($newsItem);
|
$item = parent::parseItem($newsItem);
|
||||||
$item['content'] = $this->NiceMatinExtractContent($item['uri']);
|
$item['content'] = $this->NiceMatinExtractContent($item['uri']);
|
||||||
return $item;
|
return $item;
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,7 +11,7 @@ class NumeramaBridge extends FeedExpander {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseItem($newsItem){
|
protected function parseItem($newsItem){
|
||||||
$item = $this->parseRSS_2_0_Item($newsItem);
|
$item = parent::parseItem($newsItem);
|
||||||
$item['content'] = $this->ExtractContent($item['uri']);
|
$item['content'] = $this->ExtractContent($item['uri']);
|
||||||
return $item;
|
return $item;
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,7 +11,7 @@ class TheOatmealBridge extends FeedExpander{
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function parseItem($newsItem) {
|
protected function parseItem($newsItem) {
|
||||||
$item = $this->parseRSS_1_0_Item($newsItem);
|
$item = parent::parseItem($newsItem);
|
||||||
|
|
||||||
$articlePage = $this->getSimpleHTMLDOMCached($item['uri']);
|
$articlePage = $this->getSimpleHTMLDOMCached($item['uri']);
|
||||||
$content = $articlePage->find('#comic', 0);
|
$content = $articlePage->find('#comic', 0);
|
||||||
|
|
|
@ -5,6 +5,7 @@ abstract class FeedExpander extends BridgeAbstract {
|
||||||
private $name;
|
private $name;
|
||||||
private $uri;
|
private $uri;
|
||||||
private $description;
|
private $description;
|
||||||
|
private $feedType;
|
||||||
|
|
||||||
public function collectExpandableDatas($url, $maxItems = -1){
|
public function collectExpandableDatas($url, $maxItems = -1){
|
||||||
if(empty($url)){
|
if(empty($url)){
|
||||||
|
@ -21,22 +22,26 @@ abstract class FeedExpander extends BridgeAbstract {
|
||||||
$rssContent = simplexml_load_string($content);
|
$rssContent = simplexml_load_string($content);
|
||||||
|
|
||||||
$this->debugMessage('Detecting feed format/version');
|
$this->debugMessage('Detecting feed format/version');
|
||||||
if(isset($rssContent->channel[0])){
|
switch(true){
|
||||||
$this->debugMessage('Detected RSS format');
|
case isset($rssContent->item[0]):
|
||||||
if(isset($rssContent->item[0])){
|
$this->debugMessage('Detected RSS 1.0 format');
|
||||||
$this->debugMessage('Detected RSS 1.0 format');
|
$this->feedType="RSS_1_0";
|
||||||
$this->collect_RSS_1_0_data($rssContent, $maxItems);
|
break;
|
||||||
} else {
|
case isset($rssContent->channel[0]):
|
||||||
$this->debugMessage('Detected RSS 0.9x or 2.0 format');
|
$this->debugMessage('Detected RSS 0.9x or 2.0 format');
|
||||||
$this->collect_RSS_2_0_data($rssContent, $maxItems);
|
$this->feedType="RSS_2_0";
|
||||||
}
|
break;
|
||||||
} elseif(isset($rssContent->entry[0])){
|
case isset($rssContent->entry[0]):
|
||||||
$this->debugMessage('Detected ATOM format');
|
$this->debugMessage('Detected ATOM format');
|
||||||
$this->collect_ATOM_data($rssContent, $maxItems);
|
$this->feedType="ATOM_1_0";
|
||||||
} else {
|
break;
|
||||||
|
default:
|
||||||
$this->debugMessage('Unknown feed format/version');
|
$this->debugMessage('Unknown feed format/version');
|
||||||
$this->returnServerError('The feed format is unknown!');
|
$this->returnServerError('The feed format is unknown!');
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$this->{'collect_'.$this->feedType.'_data'}($rssContent,$maxItems);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function collect_RSS_1_0_data($rssContent, $maxItems){
|
protected function collect_RSS_1_0_data($rssContent, $maxItems){
|
||||||
|
@ -62,7 +67,7 @@ abstract class FeedExpander extends BridgeAbstract {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function collect_ATOM_data($content, $maxItems){
|
protected function collect_ATOM_1_0_data($content, $maxItems){
|
||||||
$this->load_ATOM_feed_data($content);
|
$this->load_ATOM_feed_data($content);
|
||||||
foreach($content->entry as $item){
|
foreach($content->entry as $item){
|
||||||
$this->debugMessage('parsing item ' . var_export($item, true));
|
$this->debugMessage('parsing item ' . var_export($item, true));
|
||||||
|
@ -163,7 +168,20 @@ abstract class FeedExpander extends BridgeAbstract {
|
||||||
* @param $item the input rss item
|
* @param $item the input rss item
|
||||||
* @return a RSS-Bridge Item, with (hopefully) the whole content)
|
* @return a RSS-Bridge Item, with (hopefully) the whole content)
|
||||||
*/
|
*/
|
||||||
abstract protected function parseItem($item);
|
protected function parseItem($item){
|
||||||
|
switch($this->feedType){
|
||||||
|
case 'RSS_1_0':
|
||||||
|
return $this->parseRSS_1_0_Item($item);
|
||||||
|
break;
|
||||||
|
case 'RSS_2_0':
|
||||||
|
return $this->parseRSS_2_0_Item($item);
|
||||||
|
break;
|
||||||
|
case 'ATOM_1_0':
|
||||||
|
return $this->parseATOMItem($item);
|
||||||
|
break;
|
||||||
|
default: $this->returnClientError('Unknown version ' . $this->getInput('version') . '!');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public function getURI(){
|
public function getURI(){
|
||||||
return $this->uri;
|
return $this->uri;
|
||||||
|
|
Loading…
Reference in a new issue