Rss-Bridge/bridges/ElsevierBridge.php

<?php
class ElsevierBridge extends BridgeAbstract{
	const MAINTAINER = 'Pierre Mazière';
	const NAME = 'Elsevier journals recent articles';
	const URI = 'http://www.journals.elsevier.com/';
	const DESCRIPTION = 'Returns the recent articles published in Elsevier journals';

    const PARAMETERS = array( array(
        'j'=>array(
            'name'=>'Journal name',
            'required'=>true,
            'exampleValue'=>'academic-pediactrics',
            'title'=>'Insert html-part of your journal'
        )
    ));

	// Extracts the list of names from an article as string
	private function ExtractArticleName ($article){
		$names = $article->find('small', 0);
		if($names)
			return trim($names->plaintext);
		return '';
	}

	// Extracts the timestamp from an article
	private function ExtractArticleTimestamp ($article){
		$time = $article->find('.article-info', 0);
		if($time){
			$timestring = trim($time->plaintext);
			/*
				The format depends on the age of an article:
				- Available online 29 July 2016
				- July 2016
				- May–June 2016
			*/
			if(preg_match('/\S*(\d+\s\S+\s\d{4})/ims', $timestring, $matches)){
				return strtotime($matches[0]);
			} elseif (preg_match('/[A-Za-z]+\-([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){
				return strtotime($matches[0]);
			} elseif (preg_match('/([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){
				return strtotime($matches[0]);
			} else {
				return 0;
			}
		}
		return 0;
	}

	// Extracts the content from an article
	private function ExtractArticleContent ($article){
		$content = $article->find('.article-content', 0);
		if($content){
			return trim($content->plaintext);
		}
		return '';
	}

	public function collectData(){
		$uri = self::URI . $this->getInput('j') . '/recent-articles/';
		$html = getSimpleHTMLDOM($uri) or returnServerError('No results for Elsevier journal '.$this->getInput('j'));

		foreach($html->find('.pod-listing') as $article){
			$item = array();
			$item['uri'] = $article->find('.pod-listing-header>a',0)->getAttribute('href').'?np=y';
			$item['title'] = $article->find('.pod-listing-header>a',0)->plaintext;
			$item['author'] = $this->ExtractArticleName($article);
			$item['timestamp'] = $this->ExtractArticleTimestamp($article);
			$item['content'] = $this->ExtractArticleContent($article);
			$this->items[] = $item;
		}
	}

	public function getCacheDuration(){
		return 43200; // 12h
	}
}
?>
-												add new bridge: Elsevier journals recent articles

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>

											
										
										
											2016-06-26 11:52:54 +02:00
+								<?php
 								class ElsevierBridge extends BridgeAbstract{
-												[bridges] use constants instead of variable members

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>

											
										
										
											2016-08-30 11:23:55 +02:00
+									const MAINTAINER = 'Pierre Mazière';
 									const NAME = 'Elsevier journals recent articles';
 									const URI = 'http://www.journals.elsevier.com/';
 									const DESCRIPTION = 'Returns the recent articles published in Elsevier journals';
-												add new bridge: Elsevier journals recent articles

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>

											
										
										
											2016-06-26 11:52:54 +02:00
-												[bridges] use constants instead of variable members

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>

											
										
										
											2016-08-30 11:23:55 +02:00
+								    const PARAMETERS = array( array(
-												[core + bridges] get rid of loadMetadata

if a bridge needs to modify some of the data that were initialized
there, ::__construct() should be used instead.

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>

											
										
										
											2016-08-27 21:03:26 +02:00
+								        'j'=>array(
-												[bridges] migrate all bridges to an array based definition of parameters

see github issue 356

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>

											
										
										
											2016-08-22 01:25:56 +02:00
+								            'name'=>'Journal name',
 								            'required'=>true,
 								            'exampleValue'=>'academic-pediactrics',
 								            'title'=>'Insert html-part of your journal'
-												[core + bridges] get rid of loadMetadata

if a bridge needs to modify some of the data that were initialized
there, ::__construct() should be used instead.

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>

											
										
										
											2016-08-27 21:03:26 +02:00
+								        )
 								    ));
-												add new bridge: Elsevier journals recent articles

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>

											
										
										
											2016-06-26 11:52:54 +02:00
-												Fix indentation and improve code style

- Use tab instead of spaces
- Remove obsolete bridge description at start of the file
- Add spaces at the assignment operator ('=' -> ' = ')
- Remove unnecessary empty lines

											
										
										
											2016-08-02 21:40:22 +02:00
+									// Extracts the list of names from an article as string
-												bridges: Remove unused functions and update scopes

Many bridges implemented getDescription() which is only applicable for
bridges extending RSSExpander.

Functions that are not part of the abstract class or interface should
be in private scope for better readability.

											
										
										
											2016-08-06 16:00:56 +02:00
+									private function ExtractArticleName ($article){
-												Fix indentation and improve code style

- Use tab instead of spaces
- Remove obsolete bridge description at start of the file
- Add spaces at the assignment operator ('=' -> ' = ')
- Remove unnecessary empty lines

											
										
										
											2016-08-02 21:40:22 +02:00
+										$names = $article->find('small', 0);
 										if($names)
 											return trim($names->plaintext);
 										return '';
 									}
-												Create member functions to extract information from articles

The extractor function will handle many situations more specifically in
order to provide better results.

											
										
										
											2016-08-02 21:35:13 +02:00
-												Fix indentation and improve code style

- Use tab instead of spaces
- Remove obsolete bridge description at start of the file
- Add spaces at the assignment operator ('=' -> ' = ')
- Remove unnecessary empty lines

											
										
										
											2016-08-02 21:40:22 +02:00
+									// Extracts the timestamp from an article
-												bridges: Remove unused functions and update scopes

Many bridges implemented getDescription() which is only applicable for
bridges extending RSSExpander.

Functions that are not part of the abstract class or interface should
be in private scope for better readability.

											
										
										
											2016-08-06 16:00:56 +02:00
+									private function ExtractArticleTimestamp ($article){
-												Fix indentation and improve code style

- Use tab instead of spaces
- Remove obsolete bridge description at start of the file
- Add spaces at the assignment operator ('=' -> ' = ')
- Remove unnecessary empty lines

											
										
										
											2016-08-02 21:40:22 +02:00
+										$time = $article->find('.article-info', 0);
 										if($time){
 											$timestring = trim($time->plaintext);
-												bridges: use BridgeAbstract::getSimpleHTMLDOM

instead of BridgeAbstract::file_get_html

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>

											
										
										
											2016-07-08 19:06:35 +02:00
+											/*
-												Fix indentation and improve code style

- Use tab instead of spaces
- Remove obsolete bridge description at start of the file
- Add spaces at the assignment operator ('=' -> ' = ')
- Remove unnecessary empty lines

											
										
										
											2016-08-02 21:40:22 +02:00
+												The format depends on the age of an article:
 												- Available online 29 July 2016
 												- July 2016
 												- May–June 2016
 											*/
 											if(preg_match('/\S*(\d+\s\S+\s\d{4})/ims', $timestring, $matches)){
 												return strtotime($matches[0]);
 											} elseif (preg_match('/[A-Za-z]+\-([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){
 												return strtotime($matches[0]);
-												[ElsevierBridge] fix time extraction

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>

											
										
										
											2016-08-19 18:50:50 +02:00
+											} elseif (preg_match('/([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){
 												return strtotime($matches[0]);
-												Fix indentation and improve code style

- Use tab instead of spaces
- Remove obsolete bridge description at start of the file
- Add spaces at the assignment operator ('=' -> ' = ')
- Remove unnecessary empty lines

											
										
										
											2016-08-02 21:40:22 +02:00
+											} else {
 												return 0;
 											}
 										}
 										return 0;
 									}
-												Create member functions to extract information from articles

The extractor function will handle many situations more specifically in
order to provide better results.

											
										
										
											2016-08-02 21:35:13 +02:00
-												Fix indentation and improve code style

- Use tab instead of spaces
- Remove obsolete bridge description at start of the file
- Add spaces at the assignment operator ('=' -> ' = ')
- Remove unnecessary empty lines

											
										
										
											2016-08-02 21:40:22 +02:00
+									// Extracts the content from an article
-												bridges: Remove unused functions and update scopes

Many bridges implemented getDescription() which is only applicable for
bridges extending RSSExpander.

Functions that are not part of the abstract class or interface should
be in private scope for better readability.

											
										
										
											2016-08-06 16:00:56 +02:00
+									private function ExtractArticleContent ($article){
-												Fix indentation and improve code style

- Use tab instead of spaces
- Remove obsolete bridge description at start of the file
- Add spaces at the assignment operator ('=' -> ' = ')
- Remove unnecessary empty lines

											
										
										
											2016-08-02 21:40:22 +02:00
+										$content = $article->find('.article-content', 0);
 										if($content){
 											return trim($content->plaintext);
 										}
 										return '';
 									}
-												Create member functions to extract information from articles

The extractor function will handle many situations more specifically in
order to provide better results.

											
										
										
											2016-08-02 21:35:13 +02:00
-												[core] store parameters values in BridgeAbstract::parameters

This way, any BridgeAbstract method can now have access to these values,
no only collectData

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>

											
										
										
											2016-08-25 01:24:53 +02:00
+									public function collectData(){
-												[bridges] use constants instead of variable members

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>

											
										
										
											2016-08-30 11:23:55 +02:00
+										$uri = self::URI . $this->getInput('j') . '/recent-articles/';
-												[core] extract BridgeAbstract methods to make them functions

- returnError, returnServerError, returnClientError ,debugMessage are
  moved to lib/error.php

- getContents, getSimpleHTMLDOM, getSimpleHTMLDOMCached are moved to
  lib/contents.php

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>

											
										
										
											2016-09-25 23:22:33 +02:00
+										$html = getSimpleHTMLDOM($uri) or returnServerError('No results for Elsevier journal '.$this->getInput('j'));
-												add new bridge: Elsevier journals recent articles

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>

											
										
										
											2016-06-26 11:52:54 +02:00
-												Fix indentation and improve code style

- Use tab instead of spaces
- Remove obsolete bridge description at start of the file
- Add spaces at the assignment operator ('=' -> ' = ')
- Remove unnecessary empty lines

											
										
										
											2016-08-02 21:40:22 +02:00
+										foreach($html->find('.pod-listing') as $article){
-												[bridges] Change all occurrences of the Item object to array

											
										
										
											2016-08-22 18:55:59 +02:00
+											$item = array();
 											$item['uri'] = $article->find('.pod-listing-header>a',0)->getAttribute('href').'?np=y';
 											$item['title'] = $article->find('.pod-listing-header>a',0)->plaintext;
 											$item['author'] = $this->ExtractArticleName($article);
 											$item['timestamp'] = $this->ExtractArticleTimestamp($article);
 											$item['content'] = $this->ExtractArticleContent($article);
-												Fix indentation and improve code style

- Use tab instead of spaces
- Remove obsolete bridge description at start of the file
- Add spaces at the assignment operator ('=' -> ' = ')
- Remove unnecessary empty lines

											
										
										
											2016-08-02 21:40:22 +02:00
+											$this->items[] = $item;
 										}
 									}
-												add new bridge: Elsevier journals recent articles

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>

											
										
										
											2016-06-26 11:52:54 +02:00
-												Fix indentation and improve code style

- Use tab instead of spaces
- Remove obsolete bridge description at start of the file
- Add spaces at the assignment operator ('=' -> ' = ')
- Remove unnecessary empty lines

											
										
										
											2016-08-02 21:40:22 +02:00
+									public function getCacheDuration(){
 										return 43200; // 12h
 									}
-												add new bridge: Elsevier journals recent articles

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>

											
										
										
											2016-06-26 11:52:54 +02:00
+								}
-												bridges: use BridgeAbstract::getSimpleHTMLDOM

instead of BridgeAbstract::file_get_html

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>

											
										
										
											2016-07-08 19:06:35 +02:00
+								?>