Rss-Bridge/bridges/ElsevierBridge.php
logmanoriginal a1c680f8e8 Fix indentation and improve code style
- Use tab instead of spaces
- Remove obsolete bridge description at start of the file
- Add spaces at the assignment operator ('=' -> ' = ')
- Remove unnecessary empty lines
2016-08-02 21:40:22 +02:00

91 lines
2.6 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
class ElsevierBridge extends BridgeAbstract{
public function loadMetadatas() {
$this->maintainer = 'Pierre Mazière';
$this->name = 'Elsevier journals recent articles';
$this->uri = 'http://www.journals.elsevier.com';
$this->description = 'Returns the recent articles published in Elsevier journals';
$this->update = '2016-08-02';
$this->parameters[] =
'[
{
"name" : "Journal name",
"identifier" : "j",
"required" : "true",
"exampleValue" : "academic-pediatrics",
"title" : "Insert html-part of your journal"
}
]';
}
// Extracts the list of names from an article as string
function ExtractArticleName ($article){
$names = $article->find('small', 0);
if($names)
return trim($names->plaintext);
return '';
}
// Extracts the timestamp from an article
function ExtractArticleTimestamp ($article){
$time = $article->find('.article-info', 0);
if($time){
$timestring = trim($time->plaintext);
/*
The format depends on the age of an article:
- Available online 29 July 2016
- July 2016
- MayJune 2016
*/
if(preg_match('/\S*(\d+\s\S+\s\d{4})/ims', $timestring, $matches)){
return strtotime($matches[0]);
} elseif (preg_match('/([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){
return strtotime($matches[0]);
} elseif (preg_match('/[A-Za-z]+\-([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){
return strtotime($matches[0]);
} else {
return 0;
}
}
return 0;
}
// Extracts the content from an article
function ExtractArticleContent ($article){
$content = $article->find('.article-content', 0);
if($content){
return trim($content->plaintext);
}
return '';
}
public function collectData(array $param){
$uri = 'http://www.journals.elsevier.com/' . $param['j'] . '/recent-articles/';
$html = file_get_html($uri) or $this->returnError('No results for Elsevier journal '.$param['j'], 404);
foreach($html->find('.pod-listing') as $article){
$item = new \Item();
$item->uri = $article->find('.pod-listing-header>a',0)->getAttribute('href').'?np=y';
$item->title = $article->find('.pod-listing-header>a',0)->plaintext;
$item->name = $this->ExtractArticleName($article);
$item->timestamp = $this->ExtractArticleTimestamp($article);
$item->content = $this->ExtractArticleContent($article);
$this->items[] = $item;
}
}
public function getName(){
return 'Elsevier journals recent articles';
}
public function getURI(){
return 'http://www.journals.elsevier.com';
}
public function getCacheDuration(){
return 43200; // 12h
}
}
?>