2014-05-26 19:45:10 +02:00
< ? php
class CollegeDeFranceBridge extends BridgeAbstract {
2015-11-03 23:28:44 +01:00
public function loadMetadatas () {
$this -> maintainer = " pit-fgfjiudghdf " ;
$this -> name = " CollegeDeFrance " ;
$this -> uri = " http://www.college-de-france.fr/ " ;
2016-05-01 12:39:48 +02:00
$this -> description = " Returns the latest audio and video from CollegeDeFrance " ;
2016-08-09 20:01:21 +02:00
$this -> update = " 2016-08-09 " ;
2016-05-01 12:39:48 +02:00
}
2015-11-03 23:28:44 +01:00
2016-05-01 12:39:48 +02:00
public function collectData ( array $param ) {
$months = array (
'01' => 'janv.' ,
'02' => 'févr.' ,
'03' => 'mars' ,
'04' => 'avr.' ,
'05' => 'mai' ,
'06' => 'juin' ,
'07' => 'juil.' ,
'08' => 'août' ,
'09' => 'sept.' ,
'10' => 'oct.' ,
'11' => 'nov.' ,
'12' => 'déc.'
);
// The "API" used by the site returns a list of partial HTML in this form
/* < li >
* < a href = " /site/thomas-romer/guestlecturer-2016-04-15-14h30.htm " data - target = " after " >
* < span class = " date " >< span class = " list-icon list-icon-video " ></ span >< span class = " list-icon list-icon-audio " ></ span > 15 avr . 2016 </ span >
* < span class = " lecturer " > Christopher Hays </ span >
* < span class = 'title' > Imagery of Divine Suckling in the Hebrew Bible and the Ancient Near East </ span >
* </ a >
* </ li >
*/
2016-06-25 23:17:42 +02:00
$html = $this -> file_get_html ( 'http://www.college-de-france.fr/components/search-audiovideo.jsp?fulltext=&siteid=1156951719600&lang=FR&type=all' ) or $this -> returnError ( 'Could not request CollegeDeFrance.' , 404 );
2016-05-01 12:39:48 +02:00
foreach ( $html -> find ( 'a[data-target]' ) as $element ) {
$item = new \Item ();
$item -> title = $element -> find ( '.title' , 0 ) -> plaintext ;
// Most relative URLs contains an hour in addition to the date, so let's use it
// <a href="/site/yann-lecun/course-2016-04-08-11h00.htm" data-target="after">
2016-05-14 22:02:33 +02:00
//
// Sometimes there's an __1, perhaps it signifies an update "/site/patrick-boucheron/seminar-2016-05-03-18h00__1.htm"
//
// But unfortunately some don't have any hours info
2016-05-01 12:39:48 +02:00
// <a href="/site/institut-physique/The-Mysteries-of-Decoherence-Sebastien-Gleyzes-[Video-3-35].htm" data-target="after">
2016-05-14 22:02:33 +02:00
$timezone = new DateTimeZone ( 'Europe/Paris' );
// strpos($element->href, '201') will break in 2020 but it'll probably break prior to then due to site changes anyway
2016-05-20 11:38:59 +02:00
$d = DateTime :: createFromFormat ( '!Y-m-d-H\hi' , substr ( $element -> href , strpos ( $element -> href , '201' ), 16 ), $timezone ) ? : DateTime :: createFromFormat ( '!d m Y' , trim ( str_replace ( array_values ( $months ), array_keys ( $months ), $element -> find ( '.date' , 0 ) -> plaintext )), $timezone );
2016-05-01 12:39:48 +02:00
$item -> timestamp = $d -> format ( 'U' );
$item -> content = $element -> find ( '.lecturer' , 0 ) -> innertext . ' - ' . $element -> find ( '.title' , 0 ) -> innertext ;
$item -> uri = 'http://www.college-de-france.fr' . $element -> href ;
$this -> items [] = $item ;
}
2015-11-03 23:28:44 +01:00
}
2016-05-01 12:39:48 +02:00
public function getCacheDuration (){
2016-05-14 22:02:33 +02:00
return 3600 * 3 ; // 3 hours
2016-05-01 12:39:48 +02:00
}
}