Merge pull request #308 from LogMANOriginal/ElsevierBridge

Elsevier bridge
This commit is contained in:
Mitsu 2016-08-02 22:38:34 +02:00 committed by GitHub
commit 84847bf85f

View file

@ -1,55 +1,91 @@
<?php <?php
/**
* ElsevierBridge
*
* @name Elsevier Bridge
* @description Returns the recent articles published in Elsevier journals
*/
class ElsevierBridge extends BridgeAbstract{ class ElsevierBridge extends BridgeAbstract{
public function loadMetadatas() { public function loadMetadatas() {
$this->maintainer = 'Pierre Mazière'; $this->maintainer = 'Pierre Mazière';
$this->name = 'Elsevier journals recent articles'; $this->name = 'Elsevier journals recent articles';
$this->uri = 'http://www.journals.elsevier.com'; $this->uri = 'http://www.journals.elsevier.com';
$this->description = 'Returns the recent articles published in Elsevier journals'; $this->description = 'Returns the recent articles published in Elsevier journals';
$this->update = '2016-06-26'; $this->update = '2016-08-02';
$this->parameters= $this->parameters[] =
'[ '[
{ {
"name" : "Journal name", "name" : "Journal name",
"identifier" : "j" "identifier" : "j",
} "required" : "true",
]'; "exampleValue" : "academic-pediatrics",
} "title" : "Insert html-part of your journal"
}
]';
}
public function collectData(array $param){ // Extracts the list of names from an article as string
$uri = 'http://www.journals.elsevier.com/'.$param['j'].'/recent-articles/'; function ExtractArticleName ($article){
$html = file_get_html($uri) $names = $article->find('small', 0);
or $this->returnError('No results for Elsevier journal '.$param['j'], 404); if($names)
return trim($names->plaintext);
return '';
}
foreach($html->find('.pod-listing') as $article){ // Extracts the timestamp from an article
function ExtractArticleTimestamp ($article){
$time = $article->find('.article-info', 0);
if($time){
$timestring = trim($time->plaintext);
/*
The format depends on the age of an article:
- Available online 29 July 2016
- July 2016
- MayJune 2016
*/
if(preg_match('/\S*(\d+\s\S+\s\d{4})/ims', $timestring, $matches)){
return strtotime($matches[0]);
} elseif (preg_match('/([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){
return strtotime($matches[0]);
} elseif (preg_match('/[A-Za-z]+\-([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){
return strtotime($matches[0]);
} else {
return 0;
}
}
return 0;
}
$item = new \Item(); // Extracts the content from an article
$item->uri=$article->find('.pod-listing-header>a',0)->getAttribute('href').'?np=y'; function ExtractArticleContent ($article){
$item->title=$article->find('.pod-listing-header>a',0)->plaintext; $content = $article->find('.article-content', 0);
$item->name=trim($article->find('small',0)->plaintext); if($content){
$item->timestamp=strtotime($article->find('.article-info',0)->plaintext); return trim($content->plaintext);
$item->content=trim($article->find('.article-content',0)->plaintext); }
return '';
}
$this->items[]=$item; public function collectData(array $param){
} $uri = 'http://www.journals.elsevier.com/' . $param['j'] . '/recent-articles/';
} $html = file_get_html($uri) or $this->returnError('No results for Elsevier journal '.$param['j'], 404);
public function getName(){ foreach($html->find('.pod-listing') as $article){
return 'Elsevier journals recent articles'; $item = new \Item();
} $item->uri = $article->find('.pod-listing-header>a',0)->getAttribute('href').'?np=y';
$item->title = $article->find('.pod-listing-header>a',0)->plaintext;
$item->name = $this->ExtractArticleName($article);
$item->timestamp = $this->ExtractArticleTimestamp($article);
$item->content = $this->ExtractArticleContent($article);
$this->items[] = $item;
}
}
public function getURI(){ public function getName(){
return 'http://www.journals.elsevier.com'; return 'Elsevier journals recent articles';
} }
public function getCacheDuration(){ public function getURI(){
return 43200; // 12h return 'http://www.journals.elsevier.com';
} }
public function getCacheDuration(){
return 43200; // 12h
}
} }
?>