diff --git a/bridges/ElsevierBridge.php b/bridges/ElsevierBridge.php index bb734777..4f4cd574 100644 --- a/bridges/ElsevierBridge.php +++ b/bridges/ElsevierBridge.php @@ -1,55 +1,91 @@ maintainer = 'Pierre Mazière'; - $this->name = 'Elsevier journals recent articles'; - $this->uri = 'http://www.journals.elsevier.com'; - $this->description = 'Returns the recent articles published in Elsevier journals'; - $this->update = '2016-06-26'; + $this->maintainer = 'Pierre Mazière'; + $this->name = 'Elsevier journals recent articles'; + $this->uri = 'http://www.journals.elsevier.com'; + $this->description = 'Returns the recent articles published in Elsevier journals'; + $this->update = '2016-08-02'; - $this->parameters= - '[ - { - "name" : "Journal name", - "identifier" : "j" - } - ]'; - } + $this->parameters[] = + '[ + { + "name" : "Journal name", + "identifier" : "j", + "required" : "true", + "exampleValue" : "academic-pediatrics", + "title" : "Insert html-part of your journal" + } + ]'; + } - public function collectData(array $param){ - $uri = 'http://www.journals.elsevier.com/'.$param['j'].'/recent-articles/'; - $html = file_get_html($uri) - or $this->returnError('No results for Elsevier journal '.$param['j'], 404); + // Extracts the list of names from an article as string + function ExtractArticleName ($article){ + $names = $article->find('small', 0); + if($names) + return trim($names->plaintext); + return ''; + } - foreach($html->find('.pod-listing') as $article){ + // Extracts the timestamp from an article + function ExtractArticleTimestamp ($article){ + $time = $article->find('.article-info', 0); + if($time){ + $timestring = trim($time->plaintext); + /* + The format depends on the age of an article: + - Available online 29 July 2016 + - July 2016 + - May–June 2016 + */ + if(preg_match('/\S*(\d+\s\S+\s\d{4})/ims', $timestring, $matches)){ + return strtotime($matches[0]); + } elseif (preg_match('/([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){ + return strtotime($matches[0]); + } elseif (preg_match('/[A-Za-z]+\-([A-Za-z]+\s\d{4})/ims', $timestring, $matches)){ + return strtotime($matches[0]); + } else { + return 0; + } + } + return 0; + } - $item = new \Item(); - $item->uri=$article->find('.pod-listing-header>a',0)->getAttribute('href').'?np=y'; - $item->title=$article->find('.pod-listing-header>a',0)->plaintext; - $item->name=trim($article->find('small',0)->plaintext); - $item->timestamp=strtotime($article->find('.article-info',0)->plaintext); - $item->content=trim($article->find('.article-content',0)->plaintext); + // Extracts the content from an article + function ExtractArticleContent ($article){ + $content = $article->find('.article-content', 0); + if($content){ + return trim($content->plaintext); + } + return ''; + } - $this->items[]=$item; - } - } + public function collectData(array $param){ + $uri = 'http://www.journals.elsevier.com/' . $param['j'] . '/recent-articles/'; + $html = file_get_html($uri) or $this->returnError('No results for Elsevier journal '.$param['j'], 404); - public function getName(){ - return 'Elsevier journals recent articles'; - } + foreach($html->find('.pod-listing') as $article){ + $item = new \Item(); + $item->uri = $article->find('.pod-listing-header>a',0)->getAttribute('href').'?np=y'; + $item->title = $article->find('.pod-listing-header>a',0)->plaintext; + $item->name = $this->ExtractArticleName($article); + $item->timestamp = $this->ExtractArticleTimestamp($article); + $item->content = $this->ExtractArticleContent($article); + $this->items[] = $item; + } + } - public function getURI(){ - return 'http://www.journals.elsevier.com'; - } + public function getName(){ + return 'Elsevier journals recent articles'; + } - public function getCacheDuration(){ - return 43200; // 12h - } + public function getURI(){ + return 'http://www.journals.elsevier.com'; + } + + public function getCacheDuration(){ + return 43200; // 12h + } } +?> \ No newline at end of file