bridges: rename file_get_html to getSimpleHTMLDOM

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>
This commit is contained in:
Pierre Mazière 2016-08-09 14:57:42 +02:00
parent 86515a1560
commit 6e2c7ceaf4
12 changed files with 38 additions and 38 deletions

View file

@ -29,7 +29,7 @@ class ArstechnicaBridge extends BridgeAbstract {
function ExtractContent($url) { function ExtractContent($url) {
#echo $url; #echo $url;
$html2 = file_get_html($url); $html2 = getSimpleHTMLDOM($url);
$text = $html2->find("section[id='article-guts']", 0); $text = $html2->find("section[id='article-guts']", 0);
/*foreach ($text->find('<aside id="social-left">') as $node) /*foreach ($text->find('<aside id="social-left">') as $node)
@ -43,7 +43,7 @@ class ArstechnicaBridge extends BridgeAbstract {
return $text; return $text;
} }
$html = $this->file_get_html('http://feeds.arstechnica.com/arstechnica/index') or $this->returnServerError('Could not request NextInpact.'); $html = $this->getSimpleHTMLDOM('http://feeds.arstechnica.com/arstechnica/index') or $this->returnServerError('Could not request NextInpact.');
$limit = 0; $limit = 0;
foreach($html->find('item') as $element) { foreach($html->find('item') as $element) {

View file

@ -23,7 +23,7 @@ class BastaBridge extends BridgeAbstract{
$item->title = $element->find('title', 0)->innertext; $item->title = $element->find('title', 0)->innertext;
$item->uri = $element->find('guid', 0)->plaintext; $item->uri = $element->find('guid', 0)->plaintext;
$item->timestamp = strtotime($element->find('dc:date', 0)->plaintext); $item->timestamp = strtotime($element->find('dc:date', 0)->plaintext);
$item->content = ReplaceImageUrl($this->file_get_html($item->uri)->find('div.texte', 0)->innertext); $item->content = ReplaceImageUrl($this->getSimpleHTMLDOM($item->uri)->find('div.texte', 0)->innertext);
$this->items[] = $item; $this->items[] = $item;
$limit++; $limit++;
} }

View file

@ -85,7 +85,7 @@ class CastorusBridge extends BridgeAbstract {
if(isset($params['city'])) if(isset($params['city']))
$city_filter = trim($params['city']); $city_filter = trim($params['city']);
$html = $this->file_get_html($this->uri); $html = $this->getSimpleHTMLDOM($this->uri);
if(!$html) if(!$html)
$this->returnServerError('Could not load data from ' . $this->uri . '!'); $this->returnServerError('Could not load data from ' . $this->uri . '!');

View file

@ -18,7 +18,7 @@ class EstCeQuonMetEnProdBridge extends BridgeAbstract {
} return false; } return false;
} }
$html = $this->file_get_html($this->getURI()) or $this->returnServerError('Could not request EstCeQuonMetEnProd: '.$this->getURI()); $html = $this->getSimpleHTMLDOM($this->getURI()) or $this->returnServerError('Could not request EstCeQuonMetEnProd: '.$this->getURI());
$item = new \Item(); $item = new \Item();
$item->uri = $this->getURI().'#'.date('Y-m-d'); $item->uri = $this->getURI().'#'.date('Y-m-d');

View file

@ -57,7 +57,7 @@ class NextgovBridge extends BridgeAbstract {
if ($category !== preg_replace('/[^a-z-]+/', '', $category) || strlen($category > 32)) if ($category !== preg_replace('/[^a-z-]+/', '', $category) || strlen($category > 32))
$this->returnClientError('Invalid "category" parameter.'); $this->returnClientError('Invalid "category" parameter.');
$url = $this->getURI().'rss/'.$category.'/'; $url = $this->getURI().'rss/'.$category.'/';
$html = $this->file_get_html($url) or $this->returnServerError('Could not request Nextgov: '.$url); $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request Nextgov: '.$url);
$limit = 0; $limit = 0;
foreach ($html->find('item') as $element) { foreach ($html->find('item') as $element) {
@ -69,7 +69,7 @@ class NextgovBridge extends BridgeAbstract {
$article_subtitle = $element->find('description', 0)->plaintext; $article_subtitle = $element->find('description', 0)->plaintext;
$article_timestamp = strtotime($element->find('pubDate', 0)->plaintext); $article_timestamp = strtotime($element->find('pubDate', 0)->plaintext);
$article_thumbnail = ExtractFromDelimiters($element->innertext, '<media:content url="', '"'); $article_thumbnail = ExtractFromDelimiters($element->innertext, '<media:content url="', '"');
$article = $this->file_get_html($article_url) or $this->returnServerError('Could not request Nextgov: '.$article_url); $article = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request Nextgov: '.$article_url);
$contents = $article->find('div.wysiwyg', 0)->innertext; $contents = $article->find('div.wysiwyg', 0)->innertext;
$contents = StripWithDelimiters($contents, '<div class="ad-container">', '</div>'); $contents = StripWithDelimiters($contents, '<div class="ad-container">', '</div>');

View file

@ -20,7 +20,7 @@ class NumeramaBridge extends BridgeAbstract{
} }
$feed = $this->uri.'feed/'; $feed = $this->uri.'feed/';
$html = $this->file_get_html($feed) or $this->returnServerError('Could not request Numerama: '.$feed); $html = $this->getSimpleHTMLDOM($feed) or $this->returnServerError('Could not request Numerama: '.$feed);
$limit = 0; $limit = 0;
foreach($html->find('item') as $element) { foreach($html->find('item') as $element) {
@ -32,7 +32,7 @@ class NumeramaBridge extends BridgeAbstract{
$item->timestamp = strtotime($element->find('pubDate', 0)->plaintext); $item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
$article_url = NumeramaStripCDATA($element->find('guid', 0)->plaintext); $article_url = NumeramaStripCDATA($element->find('guid', 0)->plaintext);
$article_html = $this->file_get_html($article_url) or $this->returnServerError('Could not request Numerama: '.$article_url); $article_html = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request Numerama: '.$article_url);
$contents = $article_html->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block $contents = $article_html->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block
$contents = '<img alt="" style="max-width:300px;" src="'.$article_html->find('meta[property=og:image]', 0)->getAttribute('content').'">'; // add post picture $contents = '<img alt="" style="max-width:300px;" src="'.$article_html->find('meta[property=og:image]', 0)->getAttribute('content').'">'; // add post picture
$contents = $contents.$article_html->find('article[class=post-content]', 0)->innertext; // extract the post $contents = $contents.$article_html->find('article[class=post-content]', 0)->innertext; // extract the post

View file

@ -65,7 +65,7 @@ class SensCritiqueBridge extends BridgeAbstract {
private function collectMoviesData() { private function collectMoviesData() {
$html = ''; $html = '';
$html = $this->file_get_html('http://www.senscritique.com/films/cette-semaine') or $this->returnServerError('No results for this query.'); $html = $this->getSimpleHTMLDOM('http://www.senscritique.com/films/cette-semaine') or $this->returnServerError('No results for this query.');
$list = $html->find('ul.elpr-list', 0); $list = $html->find('ul.elpr-list', 0);
$this->extractDataFromList($list); $this->extractDataFromList($list);
@ -73,7 +73,7 @@ class SensCritiqueBridge extends BridgeAbstract {
private function collectSeriesData() { private function collectSeriesData() {
$html = ''; $html = '';
$html = $this->file_get_html('http://www.senscritique.com/series/actualite') or $this->returnServerError('No results for this query.'); $html = $this->getSimpleHTMLDOM('http://www.senscritique.com/series/actualite') or $this->returnServerError('No results for this query.');
$list = $html->find('ul.elpr-list', 0); $list = $html->find('ul.elpr-list', 0);
$this->extractDataFromList($list); $this->extractDataFromList($list);
@ -81,7 +81,7 @@ class SensCritiqueBridge extends BridgeAbstract {
private function collectGamesData() { private function collectGamesData() {
$html = ''; $html = '';
$html = $this->file_get_html('http://www.senscritique.com/jeuxvideo/actualite') or $this->returnServerError('No results for this query.'); $html = $this->getSimpleHTMLDOM('http://www.senscritique.com/jeuxvideo/actualite') or $this->returnServerError('No results for this query.');
$list = $html->find('ul.elpr-list', 0); $list = $html->find('ul.elpr-list', 0);
$this->extractDataFromList($list); $this->extractDataFromList($list);
@ -89,7 +89,7 @@ class SensCritiqueBridge extends BridgeAbstract {
private function collectBooksData() { private function collectBooksData() {
$html = ''; $html = '';
$html = $this->file_get_html('http://www.senscritique.com/livres/actualite') or $this->returnServerError('No results for this query.'); $html = $this->getSimpleHTMLDOM('http://www.senscritique.com/livres/actualite') or $this->returnServerError('No results for this query.');
$list = $html->find('ul.elpr-list', 0); $list = $html->find('ul.elpr-list', 0);
$this->extractDataFromList($list); $this->extractDataFromList($list);
@ -97,7 +97,7 @@ class SensCritiqueBridge extends BridgeAbstract {
private function collectBDsData() { private function collectBDsData() {
$html = ''; $html = '';
$html = $this->file_get_html('http://www.senscritique.com/bd/actualite') or $this->returnServerError('No results for this query.'); $html = $this->getSimpleHTMLDOM('http://www.senscritique.com/bd/actualite') or $this->returnServerError('No results for this query.');
$list = $html->find('ul.elpr-list', 0); $list = $html->find('ul.elpr-list', 0);
$this->extractDataFromList($list); $this->extractDataFromList($list);
@ -105,7 +105,7 @@ class SensCritiqueBridge extends BridgeAbstract {
private function collectMusicsData() { private function collectMusicsData() {
$html = ''; $html = '';
$html = $this->file_get_html('http://www.senscritique.com/musique/actualite') or $this->returnServerError('No results for this query.'); $html = $this->getSimpleHTMLDOM('http://www.senscritique.com/musique/actualite') or $this->returnServerError('No results for this query.');
$list = $html->find('ul.elpr-list', 0); $list = $html->find('ul.elpr-list', 0);
$this->extractDataFromList($list); $this->extractDataFromList($list);

View file

@ -11,7 +11,7 @@ class ShanaprojectBridge extends BridgeAbstract {
// Returns an html object for the Season Anime List (latest season) // Returns an html object for the Season Anime List (latest season)
private function LoadSeasonAnimeList(){ private function LoadSeasonAnimeList(){
// First we need to find the URI to the latest season from the 'seasons' page searching for 'Season Anime List' // First we need to find the URI to the latest season from the 'seasons' page searching for 'Season Anime List'
$html = $this->file_get_html($this->getURI() . '/seasons'); $html = $this->getSimpleHTMLDOM($this->getURI() . '/seasons');
if(!$html) if(!$html)
$this->returnServerError('Could not load \'seasons\' page!'); $this->returnServerError('Could not load \'seasons\' page!');
@ -19,7 +19,7 @@ class ShanaprojectBridge extends BridgeAbstract {
if(!$season) if(!$season)
$this->returnServerError('Could not find \'Season Anime List\'!'); $this->returnServerError('Could not find \'Season Anime List\'!');
$html = $this->file_get_html($this->getURI() . $season->href); $html = $this->getSimpleHTMLDOM($this->getURI() . $season->href);
if(!$html) if(!$html)
$this->returnServerError('Could not load \'Season Anime List\' from \'' . $season->innertext . '\'!'); $this->returnServerError('Could not load \'Season Anime List\' from \'' . $season->innertext . '\'!');

View file

@ -45,7 +45,7 @@ class TheHackerNewsBridge extends BridgeAbstract {
return $string; return $string;
} }
$html = $this->file_get_html($this->getURI()) or $this->returnServerError('Could not request TheHackerNews: '.$this->getURI()); $html = $this->getSimpleHTMLDOM($this->getURI()) or $this->returnServerError('Could not request TheHackerNews: '.$this->getURI());
$limit = 0; $limit = 0;
foreach ($html->find('article') as $element) { foreach ($html->find('article') as $element) {
@ -55,7 +55,7 @@ class TheHackerNewsBridge extends BridgeAbstract {
$article_author = trim($element->find('span.vcard', 0)->plaintext); $article_author = trim($element->find('span.vcard', 0)->plaintext);
$article_title = $element->find('a.entry-title', 0)->plaintext; $article_title = $element->find('a.entry-title', 0)->plaintext;
$article_timestamp = strtotime($element->find('span.updated', 0)->plaintext); $article_timestamp = strtotime($element->find('span.updated', 0)->plaintext);
$article = $this->file_get_html($article_url) or $this->returnServerError('Could not request TheHackerNews: '.$article_url); $article = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request TheHackerNews: '.$article_url);
$contents = $article->find('div.articlebodyonly', 0)->innertext; $contents = $article->find('div.articlebodyonly', 0)->innertext;
$contents = StripRecursiveHTMLSection($contents, 'div', '<div class=\'clear\''); $contents = StripRecursiveHTMLSection($contents, 'div', '<div class=\'clear\'');

View file

@ -28,7 +28,7 @@ class WeLiveSecurityBridge extends BridgeAbstract {
} }
$feed = $this->getURI().'feed/'; $feed = $this->getURI().'feed/';
$html = $this->file_get_html($feed) or $this->returnServerError('Could not request '.$this->getName().': '.$feed); $html = $this->getSimpleHTMLDOM($feed) or $this->returnServerError('Could not request '.$this->getName().': '.$feed);
$limit = 0; $limit = 0;
foreach ($html->find('item') as $element) { foreach ($html->find('item') as $element) {

View file

@ -112,7 +112,7 @@ class WikipediaBridge extends BridgeAbstract{
} }
// This will automatically send us to the correct main page in any language (try it!) // This will automatically send us to the correct main page in any language (try it!)
$html = $this->file_get_html($this->uri . '/wiki'); $html = $this->getSimpleHTMLDOM($this->uri . '/wiki');
if(!$html) if(!$html)
$this->returnServerError('Could not load site: ' . $this->uri . '!'); $this->returnServerError('Could not load site: ' . $this->uri . '!');
@ -209,7 +209,7 @@ class WikipediaBridge extends BridgeAbstract{
* Loads the full article from a given URI * Loads the full article from a given URI
*/ */
private function LoadFullArticle($uri){ private function LoadFullArticle($uri){
$content_html = $this->file_get_html($uri); $content_html = $this->getSimpleHTMLDOM($uri);
if(!$content_html) if(!$content_html)
$this->returnServerError('Could not load site: ' . $uri . '!'); $this->returnServerError('Could not load site: ' . $uri . '!');

View file

@ -237,7 +237,7 @@ class ZDNetBridge extends BridgeAbstract {
if ($feed !== preg_replace('/[^a-zA-Z0-9-\/]+/', '', $feed) || substr_count($feed, '/') > 1 || strlen($feed > 64)) if ($feed !== preg_replace('/[^a-zA-Z0-9-\/]+/', '', $feed) || substr_count($feed, '/') > 1 || strlen($feed > 64))
$this->returnClientError('Invalid "feed" parameter.'); $this->returnClientError('Invalid "feed" parameter.');
$url = $baseUri.trim($feed, '/').'/rss.xml'; $url = $baseUri.trim($feed, '/').'/rss.xml';
$html = $this->file_get_html($url) or $this->returnServerError('Could not request ZDNet: '.$url); $html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request ZDNet: '.$url);
$limit = 0; $limit = 0;
foreach ($html->find('item') as $element) { foreach ($html->find('item') as $element) {
@ -247,7 +247,7 @@ class ZDNetBridge extends BridgeAbstract {
$article_title = StripCDATA($element->find('title', 0)->plaintext); $article_title = StripCDATA($element->find('title', 0)->plaintext);
$article_subtitle = StripCDATA($element->find('description', 0)->plaintext); $article_subtitle = StripCDATA($element->find('description', 0)->plaintext);
$article_timestamp = strtotime(StripCDATA($element->find('pubDate', 0)->plaintext)); $article_timestamp = strtotime(StripCDATA($element->find('pubDate', 0)->plaintext));
$article = $this->file_get_html($article_url) or $this->returnServerError('Could not request ZDNet: '.$article_url); $article = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request ZDNet: '.$article_url);
if (!empty($article_author)) if (!empty($article_author))
$author = $article_author; $author = $article_author;