bridges: rename file_get_html to getSimpleHTMLDOM
Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>
This commit is contained in:
parent
86515a1560
commit
6e2c7ceaf4
12 changed files with 38 additions and 38 deletions
|
@ -29,7 +29,7 @@ class ArstechnicaBridge extends BridgeAbstract {
|
||||||
|
|
||||||
function ExtractContent($url) {
|
function ExtractContent($url) {
|
||||||
#echo $url;
|
#echo $url;
|
||||||
$html2 = file_get_html($url);
|
$html2 = getSimpleHTMLDOM($url);
|
||||||
|
|
||||||
$text = $html2->find("section[id='article-guts']", 0);
|
$text = $html2->find("section[id='article-guts']", 0);
|
||||||
/*foreach ($text->find('<aside id="social-left">') as $node)
|
/*foreach ($text->find('<aside id="social-left">') as $node)
|
||||||
|
@ -43,7 +43,7 @@ class ArstechnicaBridge extends BridgeAbstract {
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
|
|
||||||
$html = $this->file_get_html('http://feeds.arstechnica.com/arstechnica/index') or $this->returnServerError('Could not request NextInpact.');
|
$html = $this->getSimpleHTMLDOM('http://feeds.arstechnica.com/arstechnica/index') or $this->returnServerError('Could not request NextInpact.');
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
|
|
|
@ -23,7 +23,7 @@ class BastaBridge extends BridgeAbstract{
|
||||||
$item->title = $element->find('title', 0)->innertext;
|
$item->title = $element->find('title', 0)->innertext;
|
||||||
$item->uri = $element->find('guid', 0)->plaintext;
|
$item->uri = $element->find('guid', 0)->plaintext;
|
||||||
$item->timestamp = strtotime($element->find('dc:date', 0)->plaintext);
|
$item->timestamp = strtotime($element->find('dc:date', 0)->plaintext);
|
||||||
$item->content = ReplaceImageUrl($this->file_get_html($item->uri)->find('div.texte', 0)->innertext);
|
$item->content = ReplaceImageUrl($this->getSimpleHTMLDOM($item->uri)->find('div.texte', 0)->innertext);
|
||||||
$this->items[] = $item;
|
$this->items[] = $item;
|
||||||
$limit++;
|
$limit++;
|
||||||
}
|
}
|
||||||
|
|
|
@ -85,7 +85,7 @@ class CastorusBridge extends BridgeAbstract {
|
||||||
if(isset($params['city']))
|
if(isset($params['city']))
|
||||||
$city_filter = trim($params['city']);
|
$city_filter = trim($params['city']);
|
||||||
|
|
||||||
$html = $this->file_get_html($this->uri);
|
$html = $this->getSimpleHTMLDOM($this->uri);
|
||||||
|
|
||||||
if(!$html)
|
if(!$html)
|
||||||
$this->returnServerError('Could not load data from ' . $this->uri . '!');
|
$this->returnServerError('Could not load data from ' . $this->uri . '!');
|
||||||
|
|
|
@ -18,7 +18,7 @@ class EstCeQuonMetEnProdBridge extends BridgeAbstract {
|
||||||
} return false;
|
} return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
$html = $this->file_get_html($this->getURI()) or $this->returnServerError('Could not request EstCeQuonMetEnProd: '.$this->getURI());
|
$html = $this->getSimpleHTMLDOM($this->getURI()) or $this->returnServerError('Could not request EstCeQuonMetEnProd: '.$this->getURI());
|
||||||
|
|
||||||
$item = new \Item();
|
$item = new \Item();
|
||||||
$item->uri = $this->getURI().'#'.date('Y-m-d');
|
$item->uri = $this->getURI().'#'.date('Y-m-d');
|
||||||
|
|
|
@ -57,7 +57,7 @@ class NextgovBridge extends BridgeAbstract {
|
||||||
if ($category !== preg_replace('/[^a-z-]+/', '', $category) || strlen($category > 32))
|
if ($category !== preg_replace('/[^a-z-]+/', '', $category) || strlen($category > 32))
|
||||||
$this->returnClientError('Invalid "category" parameter.');
|
$this->returnClientError('Invalid "category" parameter.');
|
||||||
$url = $this->getURI().'rss/'.$category.'/';
|
$url = $this->getURI().'rss/'.$category.'/';
|
||||||
$html = $this->file_get_html($url) or $this->returnServerError('Could not request Nextgov: '.$url);
|
$html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request Nextgov: '.$url);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach ($html->find('item') as $element) {
|
foreach ($html->find('item') as $element) {
|
||||||
|
@ -69,7 +69,7 @@ class NextgovBridge extends BridgeAbstract {
|
||||||
$article_subtitle = $element->find('description', 0)->plaintext;
|
$article_subtitle = $element->find('description', 0)->plaintext;
|
||||||
$article_timestamp = strtotime($element->find('pubDate', 0)->plaintext);
|
$article_timestamp = strtotime($element->find('pubDate', 0)->plaintext);
|
||||||
$article_thumbnail = ExtractFromDelimiters($element->innertext, '<media:content url="', '"');
|
$article_thumbnail = ExtractFromDelimiters($element->innertext, '<media:content url="', '"');
|
||||||
$article = $this->file_get_html($article_url) or $this->returnServerError('Could not request Nextgov: '.$article_url);
|
$article = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request Nextgov: '.$article_url);
|
||||||
|
|
||||||
$contents = $article->find('div.wysiwyg', 0)->innertext;
|
$contents = $article->find('div.wysiwyg', 0)->innertext;
|
||||||
$contents = StripWithDelimiters($contents, '<div class="ad-container">', '</div>');
|
$contents = StripWithDelimiters($contents, '<div class="ad-container">', '</div>');
|
||||||
|
|
|
@ -20,7 +20,7 @@ class NumeramaBridge extends BridgeAbstract{
|
||||||
}
|
}
|
||||||
|
|
||||||
$feed = $this->uri.'feed/';
|
$feed = $this->uri.'feed/';
|
||||||
$html = $this->file_get_html($feed) or $this->returnServerError('Could not request Numerama: '.$feed);
|
$html = $this->getSimpleHTMLDOM($feed) or $this->returnServerError('Could not request Numerama: '.$feed);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
|
@ -32,7 +32,7 @@ class NumeramaBridge extends BridgeAbstract{
|
||||||
$item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
|
$item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
|
||||||
|
|
||||||
$article_url = NumeramaStripCDATA($element->find('guid', 0)->plaintext);
|
$article_url = NumeramaStripCDATA($element->find('guid', 0)->plaintext);
|
||||||
$article_html = $this->file_get_html($article_url) or $this->returnServerError('Could not request Numerama: '.$article_url);
|
$article_html = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request Numerama: '.$article_url);
|
||||||
$contents = $article_html->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block
|
$contents = $article_html->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block
|
||||||
$contents = '<img alt="" style="max-width:300px;" src="'.$article_html->find('meta[property=og:image]', 0)->getAttribute('content').'">'; // add post picture
|
$contents = '<img alt="" style="max-width:300px;" src="'.$article_html->find('meta[property=og:image]', 0)->getAttribute('content').'">'; // add post picture
|
||||||
$contents = $contents.$article_html->find('article[class=post-content]', 0)->innertext; // extract the post
|
$contents = $contents.$article_html->find('article[class=post-content]', 0)->innertext; // extract the post
|
||||||
|
|
|
@ -65,7 +65,7 @@ class SensCritiqueBridge extends BridgeAbstract {
|
||||||
|
|
||||||
private function collectMoviesData() {
|
private function collectMoviesData() {
|
||||||
$html = '';
|
$html = '';
|
||||||
$html = $this->file_get_html('http://www.senscritique.com/films/cette-semaine') or $this->returnServerError('No results for this query.');
|
$html = $this->getSimpleHTMLDOM('http://www.senscritique.com/films/cette-semaine') or $this->returnServerError('No results for this query.');
|
||||||
$list = $html->find('ul.elpr-list', 0);
|
$list = $html->find('ul.elpr-list', 0);
|
||||||
|
|
||||||
$this->extractDataFromList($list);
|
$this->extractDataFromList($list);
|
||||||
|
@ -73,7 +73,7 @@ class SensCritiqueBridge extends BridgeAbstract {
|
||||||
|
|
||||||
private function collectSeriesData() {
|
private function collectSeriesData() {
|
||||||
$html = '';
|
$html = '';
|
||||||
$html = $this->file_get_html('http://www.senscritique.com/series/actualite') or $this->returnServerError('No results for this query.');
|
$html = $this->getSimpleHTMLDOM('http://www.senscritique.com/series/actualite') or $this->returnServerError('No results for this query.');
|
||||||
$list = $html->find('ul.elpr-list', 0);
|
$list = $html->find('ul.elpr-list', 0);
|
||||||
|
|
||||||
$this->extractDataFromList($list);
|
$this->extractDataFromList($list);
|
||||||
|
@ -81,7 +81,7 @@ class SensCritiqueBridge extends BridgeAbstract {
|
||||||
|
|
||||||
private function collectGamesData() {
|
private function collectGamesData() {
|
||||||
$html = '';
|
$html = '';
|
||||||
$html = $this->file_get_html('http://www.senscritique.com/jeuxvideo/actualite') or $this->returnServerError('No results for this query.');
|
$html = $this->getSimpleHTMLDOM('http://www.senscritique.com/jeuxvideo/actualite') or $this->returnServerError('No results for this query.');
|
||||||
$list = $html->find('ul.elpr-list', 0);
|
$list = $html->find('ul.elpr-list', 0);
|
||||||
|
|
||||||
$this->extractDataFromList($list);
|
$this->extractDataFromList($list);
|
||||||
|
@ -89,7 +89,7 @@ class SensCritiqueBridge extends BridgeAbstract {
|
||||||
|
|
||||||
private function collectBooksData() {
|
private function collectBooksData() {
|
||||||
$html = '';
|
$html = '';
|
||||||
$html = $this->file_get_html('http://www.senscritique.com/livres/actualite') or $this->returnServerError('No results for this query.');
|
$html = $this->getSimpleHTMLDOM('http://www.senscritique.com/livres/actualite') or $this->returnServerError('No results for this query.');
|
||||||
$list = $html->find('ul.elpr-list', 0);
|
$list = $html->find('ul.elpr-list', 0);
|
||||||
|
|
||||||
$this->extractDataFromList($list);
|
$this->extractDataFromList($list);
|
||||||
|
@ -97,7 +97,7 @@ class SensCritiqueBridge extends BridgeAbstract {
|
||||||
|
|
||||||
private function collectBDsData() {
|
private function collectBDsData() {
|
||||||
$html = '';
|
$html = '';
|
||||||
$html = $this->file_get_html('http://www.senscritique.com/bd/actualite') or $this->returnServerError('No results for this query.');
|
$html = $this->getSimpleHTMLDOM('http://www.senscritique.com/bd/actualite') or $this->returnServerError('No results for this query.');
|
||||||
$list = $html->find('ul.elpr-list', 0);
|
$list = $html->find('ul.elpr-list', 0);
|
||||||
|
|
||||||
$this->extractDataFromList($list);
|
$this->extractDataFromList($list);
|
||||||
|
@ -105,7 +105,7 @@ class SensCritiqueBridge extends BridgeAbstract {
|
||||||
|
|
||||||
private function collectMusicsData() {
|
private function collectMusicsData() {
|
||||||
$html = '';
|
$html = '';
|
||||||
$html = $this->file_get_html('http://www.senscritique.com/musique/actualite') or $this->returnServerError('No results for this query.');
|
$html = $this->getSimpleHTMLDOM('http://www.senscritique.com/musique/actualite') or $this->returnServerError('No results for this query.');
|
||||||
$list = $html->find('ul.elpr-list', 0);
|
$list = $html->find('ul.elpr-list', 0);
|
||||||
|
|
||||||
$this->extractDataFromList($list);
|
$this->extractDataFromList($list);
|
||||||
|
|
|
@ -11,7 +11,7 @@ class ShanaprojectBridge extends BridgeAbstract {
|
||||||
// Returns an html object for the Season Anime List (latest season)
|
// Returns an html object for the Season Anime List (latest season)
|
||||||
private function LoadSeasonAnimeList(){
|
private function LoadSeasonAnimeList(){
|
||||||
// First we need to find the URI to the latest season from the 'seasons' page searching for 'Season Anime List'
|
// First we need to find the URI to the latest season from the 'seasons' page searching for 'Season Anime List'
|
||||||
$html = $this->file_get_html($this->getURI() . '/seasons');
|
$html = $this->getSimpleHTMLDOM($this->getURI() . '/seasons');
|
||||||
if(!$html)
|
if(!$html)
|
||||||
$this->returnServerError('Could not load \'seasons\' page!');
|
$this->returnServerError('Could not load \'seasons\' page!');
|
||||||
|
|
||||||
|
@ -19,7 +19,7 @@ class ShanaprojectBridge extends BridgeAbstract {
|
||||||
if(!$season)
|
if(!$season)
|
||||||
$this->returnServerError('Could not find \'Season Anime List\'!');
|
$this->returnServerError('Could not find \'Season Anime List\'!');
|
||||||
|
|
||||||
$html = $this->file_get_html($this->getURI() . $season->href);
|
$html = $this->getSimpleHTMLDOM($this->getURI() . $season->href);
|
||||||
if(!$html)
|
if(!$html)
|
||||||
$this->returnServerError('Could not load \'Season Anime List\' from \'' . $season->innertext . '\'!');
|
$this->returnServerError('Could not load \'Season Anime List\' from \'' . $season->innertext . '\'!');
|
||||||
|
|
||||||
|
|
|
@ -45,7 +45,7 @@ class TheHackerNewsBridge extends BridgeAbstract {
|
||||||
return $string;
|
return $string;
|
||||||
}
|
}
|
||||||
|
|
||||||
$html = $this->file_get_html($this->getURI()) or $this->returnServerError('Could not request TheHackerNews: '.$this->getURI());
|
$html = $this->getSimpleHTMLDOM($this->getURI()) or $this->returnServerError('Could not request TheHackerNews: '.$this->getURI());
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach ($html->find('article') as $element) {
|
foreach ($html->find('article') as $element) {
|
||||||
|
@ -55,7 +55,7 @@ class TheHackerNewsBridge extends BridgeAbstract {
|
||||||
$article_author = trim($element->find('span.vcard', 0)->plaintext);
|
$article_author = trim($element->find('span.vcard', 0)->plaintext);
|
||||||
$article_title = $element->find('a.entry-title', 0)->plaintext;
|
$article_title = $element->find('a.entry-title', 0)->plaintext;
|
||||||
$article_timestamp = strtotime($element->find('span.updated', 0)->plaintext);
|
$article_timestamp = strtotime($element->find('span.updated', 0)->plaintext);
|
||||||
$article = $this->file_get_html($article_url) or $this->returnServerError('Could not request TheHackerNews: '.$article_url);
|
$article = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request TheHackerNews: '.$article_url);
|
||||||
|
|
||||||
$contents = $article->find('div.articlebodyonly', 0)->innertext;
|
$contents = $article->find('div.articlebodyonly', 0)->innertext;
|
||||||
$contents = StripRecursiveHTMLSection($contents, 'div', '<div class=\'clear\'');
|
$contents = StripRecursiveHTMLSection($contents, 'div', '<div class=\'clear\'');
|
||||||
|
|
|
@ -28,7 +28,7 @@ class WeLiveSecurityBridge extends BridgeAbstract {
|
||||||
}
|
}
|
||||||
|
|
||||||
$feed = $this->getURI().'feed/';
|
$feed = $this->getURI().'feed/';
|
||||||
$html = $this->file_get_html($feed) or $this->returnServerError('Could not request '.$this->getName().': '.$feed);
|
$html = $this->getSimpleHTMLDOM($feed) or $this->returnServerError('Could not request '.$this->getName().': '.$feed);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach ($html->find('item') as $element) {
|
foreach ($html->find('item') as $element) {
|
||||||
|
|
|
@ -112,7 +112,7 @@ class WikipediaBridge extends BridgeAbstract{
|
||||||
}
|
}
|
||||||
|
|
||||||
// This will automatically send us to the correct main page in any language (try it!)
|
// This will automatically send us to the correct main page in any language (try it!)
|
||||||
$html = $this->file_get_html($this->uri . '/wiki');
|
$html = $this->getSimpleHTMLDOM($this->uri . '/wiki');
|
||||||
|
|
||||||
if(!$html)
|
if(!$html)
|
||||||
$this->returnServerError('Could not load site: ' . $this->uri . '!');
|
$this->returnServerError('Could not load site: ' . $this->uri . '!');
|
||||||
|
@ -209,7 +209,7 @@ class WikipediaBridge extends BridgeAbstract{
|
||||||
* Loads the full article from a given URI
|
* Loads the full article from a given URI
|
||||||
*/
|
*/
|
||||||
private function LoadFullArticle($uri){
|
private function LoadFullArticle($uri){
|
||||||
$content_html = $this->file_get_html($uri);
|
$content_html = $this->getSimpleHTMLDOM($uri);
|
||||||
|
|
||||||
if(!$content_html)
|
if(!$content_html)
|
||||||
$this->returnServerError('Could not load site: ' . $uri . '!');
|
$this->returnServerError('Could not load site: ' . $uri . '!');
|
||||||
|
|
|
@ -237,7 +237,7 @@ class ZDNetBridge extends BridgeAbstract {
|
||||||
if ($feed !== preg_replace('/[^a-zA-Z0-9-\/]+/', '', $feed) || substr_count($feed, '/') > 1 || strlen($feed > 64))
|
if ($feed !== preg_replace('/[^a-zA-Z0-9-\/]+/', '', $feed) || substr_count($feed, '/') > 1 || strlen($feed > 64))
|
||||||
$this->returnClientError('Invalid "feed" parameter.');
|
$this->returnClientError('Invalid "feed" parameter.');
|
||||||
$url = $baseUri.trim($feed, '/').'/rss.xml';
|
$url = $baseUri.trim($feed, '/').'/rss.xml';
|
||||||
$html = $this->file_get_html($url) or $this->returnServerError('Could not request ZDNet: '.$url);
|
$html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request ZDNet: '.$url);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach ($html->find('item') as $element) {
|
foreach ($html->find('item') as $element) {
|
||||||
|
@ -247,7 +247,7 @@ class ZDNetBridge extends BridgeAbstract {
|
||||||
$article_title = StripCDATA($element->find('title', 0)->plaintext);
|
$article_title = StripCDATA($element->find('title', 0)->plaintext);
|
||||||
$article_subtitle = StripCDATA($element->find('description', 0)->plaintext);
|
$article_subtitle = StripCDATA($element->find('description', 0)->plaintext);
|
||||||
$article_timestamp = strtotime(StripCDATA($element->find('pubDate', 0)->plaintext));
|
$article_timestamp = strtotime(StripCDATA($element->find('pubDate', 0)->plaintext));
|
||||||
$article = $this->file_get_html($article_url) or $this->returnServerError('Could not request ZDNet: '.$article_url);
|
$article = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request ZDNet: '.$article_url);
|
||||||
|
|
||||||
if (!empty($article_author))
|
if (!empty($article_author))
|
||||||
$author = $article_author;
|
$author = $article_author;
|
||||||
|
|
Loading…
Reference in a new issue