2013-08-11 13:30:41 +02:00
< ? php
/**
* RssBridgeGoogleMostRecent
* Search Google for most recent pages regarding a specific topic .
* Returns the 100 most recent links in results in past year , sorting by date ( most recent first ) .
* Example :
* http :// www . google . com / search ? q = sebsauvage & num = 100 & complete = 0 & tbs = qdr : y , sbd : 1
* complete = 0 & num = 100 : get 100 results
* qdr : y : in past year
* sbd : 1 : sort by date ( will only work if qdr : is specified )
*
* @ name Google search
* @ description Returns most recent results from Google search .
2013-08-11 22:34:14 +02:00
* @ use1 ( q = " keyword " )
2013-08-11 13:30:41 +02:00
*/
class GoogleSearchBridge extends BridgeAbstract {
2013-08-15 13:58:58 +02:00
private $request ;
2013-08-11 13:30:41 +02:00
public function collectData ( array $param ){
$html = '' ;
if ( isset ( $param [ 'q' ])) { /* keyword search mode */
2013-08-15 13:58:58 +02:00
$this -> request = $param [ 'q' ];
$html = file_get_html ( 'http://www.google.com/search?q=' . urlencode ( $this -> request ) . '&num=100&complete=0&tbs=qdr:y,sbd:1' ) or $this -> returnError ( 'No results for this query.' , 404 );
2013-08-11 13:30:41 +02:00
}
else {
$this -> returnError ( 'You must specify a keyword (?q=...).' , 400 );
}
$emIsRes = $html -> find ( 'div[id=ires]' , 0 );
if ( ! is_null ( $emIsRes ) ){
foreach ( $emIsRes -> find ( 'li[class=g]' ) as $element ) {
2013-08-12 22:37:19 +02:00
$item = new Item ();
// Extract direct URL from google href (eg. /url?q=...)
$t = $element -> find ( 'a[href]' , 0 ) -> href ;
$item -> uri = 'http://google.com' . $t ;
parse_str ( parse_url ( $t , PHP_URL_QUERY ), $parameters );
if ( isset ( $parameters [ 'q' ])) { $item -> uri = $parameters [ 'q' ]; }
2013-08-11 13:30:41 +02:00
$item -> title = $element -> find ( 'h3' , 0 ) -> plaintext ;
$item -> content = $element -> find ( 'span[class=st]' , 0 ) -> plaintext ;
$this -> items [] = $item ;
}
}
}
public function getName (){
2013-08-15 13:58:58 +02:00
return ( ! empty ( $this -> request ) ? $this -> request . ' - ' : '' ) . 'Google search' ;
2013-08-11 13:30:41 +02:00
}
public function getURI (){
return 'http://google.com' ;
}
public function getCacheDuration (){
return 1800 ; // 30 minutes
}
}