2016-06-26 11:52:54 +02:00
|
|
|
|
<?php
|
|
|
|
|
class ElsevierBridge extends BridgeAbstract{
|
2016-08-30 11:23:55 +02:00
|
|
|
|
const MAINTAINER = 'Pierre Mazière';
|
|
|
|
|
const NAME = 'Elsevier journals recent articles';
|
|
|
|
|
const URI = 'http://www.journals.elsevier.com/';
|
|
|
|
|
const DESCRIPTION = 'Returns the recent articles published in Elsevier journals';
|
2016-06-26 11:52:54 +02:00
|
|
|
|
|
2016-08-30 11:23:55 +02:00
|
|
|
|
const PARAMETERS = array( array(
|
2016-08-27 21:03:26 +02:00
|
|
|
|
'j'=>array(
|
2016-08-22 01:25:56 +02:00
|
|
|
|
'name'=>'Journal name',
|
|
|
|
|
'required'=>true,
|
|
|
|
|
'exampleValue'=>'academic-pediactrics',
|
|
|
|
|
'title'=>'Insert html-part of your journal'
|
2016-08-27 21:03:26 +02:00
|
|
|
|
)
|
|
|
|
|
));
|
2016-06-26 11:52:54 +02:00
|
|
|
|
|
2016-08-02 21:40:22 +02:00
|
|
|
|
// Extracts the list of names from an article as string
|
2016-08-06 16:00:56 +02:00
|
|
|
|
private function ExtractArticleName ($article){
|
2016-08-02 21:40:22 +02:00
|
|
|
|
$names = $article->find('small', 0);
|
|
|
|
|
if($names)
|
|
|
|
|
return trim($names->plaintext);
|
|
|
|
|
return '';
|
|
|
|
|
}
|
2016-08-02 21:35:13 +02:00
|
|
|
|
|
2016-08-02 21:40:22 +02:00
|
|
|
|
// Extracts the timestamp from an article
|
2016-08-06 16:00:56 +02:00
|
|
|
|
private function ExtractArticleTimestamp ($article){
|
2016-08-02 21:40:22 +02:00
|
|
|
|
$time = $article->find('.article-info', 0);
|
|
|
|
|
if($time){
|
|
|
|
|
$timestring = trim($time->plaintext);
|
2016-07-08 19:06:35 +02:00
|
|
|
|
/*
|
2016-08-02 21:40:22 +02:00
|
|
|
|
The format depends on the age of an article:
|
|
|
|
|
- Available online 29 July 2016
|
|
|
|
|
- July 2016
|
|
|
|
|
- May–June 2016
|
|
|
|
|
*/
|
|
|
|
|
if(preg_match('/\S*(\d+\s\S+\s\d{4})/ims', $timestring, $matches)){
|
|
|
|
|
return strtotime($matches[0]);
|
|
|
|
|
} elseif (preg_match('/[A-Za-z]+\-([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){
|
|
|
|
|
return strtotime($matches[0]);
|
2016-08-19 18:50:50 +02:00
|
|
|
|
} elseif (preg_match('/([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){
|
|
|
|
|
return strtotime($matches[0]);
|
2016-08-02 21:40:22 +02:00
|
|
|
|
} else {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2016-08-02 21:35:13 +02:00
|
|
|
|
|
2016-08-02 21:40:22 +02:00
|
|
|
|
// Extracts the content from an article
|
2016-08-06 16:00:56 +02:00
|
|
|
|
private function ExtractArticleContent ($article){
|
2016-08-02 21:40:22 +02:00
|
|
|
|
$content = $article->find('.article-content', 0);
|
|
|
|
|
if($content){
|
|
|
|
|
return trim($content->plaintext);
|
|
|
|
|
}
|
|
|
|
|
return '';
|
|
|
|
|
}
|
2016-08-02 21:35:13 +02:00
|
|
|
|
|
2016-08-25 01:24:53 +02:00
|
|
|
|
public function collectData(){
|
2016-08-30 11:23:55 +02:00
|
|
|
|
$uri = self::URI . $this->getInput('j') . '/recent-articles/';
|
2016-09-25 23:22:33 +02:00
|
|
|
|
$html = getSimpleHTMLDOM($uri) or returnServerError('No results for Elsevier journal '.$this->getInput('j'));
|
2016-06-26 11:52:54 +02:00
|
|
|
|
|
2016-08-02 21:40:22 +02:00
|
|
|
|
foreach($html->find('.pod-listing') as $article){
|
2016-08-22 18:55:59 +02:00
|
|
|
|
$item = array();
|
|
|
|
|
$item['uri'] = $article->find('.pod-listing-header>a',0)->getAttribute('href').'?np=y';
|
|
|
|
|
$item['title'] = $article->find('.pod-listing-header>a',0)->plaintext;
|
|
|
|
|
$item['author'] = $this->ExtractArticleName($article);
|
|
|
|
|
$item['timestamp'] = $this->ExtractArticleTimestamp($article);
|
|
|
|
|
$item['content'] = $this->ExtractArticleContent($article);
|
2016-08-02 21:40:22 +02:00
|
|
|
|
$this->items[] = $item;
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-06-26 11:52:54 +02:00
|
|
|
|
|
2016-08-02 21:40:22 +02:00
|
|
|
|
public function getCacheDuration(){
|
|
|
|
|
return 43200; // 12h
|
|
|
|
|
}
|
2016-06-26 11:52:54 +02:00
|
|
|
|
}
|
2016-07-08 19:06:35 +02:00
|
|
|
|
?>
|