bridges: rename file_get_html to getSimpleHTMLDOM

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>
This commit is contained in:
Pierre Mazière 2016-08-09 14:57:42 +02:00
parent 86515a1560
commit 6e2c7ceaf4
12 changed files with 38 additions and 38 deletions

View file

@ -29,7 +29,7 @@ class ArstechnicaBridge extends BridgeAbstract {
function ExtractContent($url) {
#echo $url;
$html2 = file_get_html($url);
$html2 = getSimpleHTMLDOM($url);
$text = $html2->find("section[id='article-guts']", 0);
/*foreach ($text->find('<aside id="social-left">') as $node)
@ -43,7 +43,7 @@ class ArstechnicaBridge extends BridgeAbstract {
return $text;
}
$html = $this->file_get_html('http://feeds.arstechnica.com/arstechnica/index') or $this->returnServerError('Could not request NextInpact.');
$html = $this->getSimpleHTMLDOM('http://feeds.arstechnica.com/arstechnica/index') or $this->returnServerError('Could not request NextInpact.');
$limit = 0;
foreach($html->find('item') as $element) {

View file

@ -23,7 +23,7 @@ class BastaBridge extends BridgeAbstract{
$item->title = $element->find('title', 0)->innertext;
$item->uri = $element->find('guid', 0)->plaintext;
$item->timestamp = strtotime($element->find('dc:date', 0)->plaintext);
$item->content = ReplaceImageUrl($this->file_get_html($item->uri)->find('div.texte', 0)->innertext);
$item->content = ReplaceImageUrl($this->getSimpleHTMLDOM($item->uri)->find('div.texte', 0)->innertext);
$this->items[] = $item;
$limit++;
}

View file

@ -8,7 +8,7 @@ class CastorusBridge extends BridgeAbstract {
$this->update = '2016-08-17';
$this->parameters["Get latest changes"] = '[]';
$this->parameters["Get latest changes via ZIP code"] =
$this->parameters["Get latest changes via ZIP code"] =
'[
{
"name": "ZIP code",
@ -19,7 +19,7 @@ class CastorusBridge extends BridgeAbstract {
"title" : "Insert ZIP code (complete or partial)"
}
]';
$this->parameters["Get latest changes via city name"] =
$this->parameters["Get latest changes via city name"] =
'[
{
"name": "City name",
@ -38,7 +38,7 @@ class CastorusBridge extends BridgeAbstract {
if(!$title)
$this->returnServerError('Cannot find title!');
return htmlspecialchars(trim($title->plaintext));
}
@ -48,19 +48,19 @@ class CastorusBridge extends BridgeAbstract {
if(!$url)
$this->returnServerError('Cannot find url!');
return $this->uri . $url->href;
}
// Extracts the time from an activity
private function ExtractActivityTime($activity){
// Unfortunately the time is part of the parent node,
// Unfortunately the time is part of the parent node,
// so we have to clear all child nodes first
$nodes = $activity->find('*');
if(!$nodes)
$this->returnServerError('Cannot find nodes!');
foreach($nodes as $node){
$node->outertext = '';
}
@ -74,7 +74,7 @@ class CastorusBridge extends BridgeAbstract {
if(!$price)
$this->returnServerError('Cannot find price!');
return $price->innertext;
}
@ -85,23 +85,23 @@ class CastorusBridge extends BridgeAbstract {
if(isset($params['city']))
$city_filter = trim($params['city']);
$html = $this->file_get_html($this->uri);
$html = $this->getSimpleHTMLDOM($this->uri);
if(!$html)
$this->returnServerError('Could not load data from ' . $this->uri . '!');
$activities = $html->find('div#activite/li');
if(!$activities)
$this->returnServerError('Failed to find activities!');
foreach($activities as $activity){
$item = new \Item();
$item->title = $this->ExtractActivityTitle($activity);
$item->uri = $this->ExtractActivityUrl($activity);
$item->timestamp = $this->ExtractActivityTime($activity);
$item->content = '<a href="' . $item->uri . '">' . $item->title . '</a><br><p>'
$item->content = '<a href="' . $item->uri . '">' . $item->title . '</a><br><p>'
. $this->ExtractActivityPrice($activity) . '</p>';
if(isset($zip_filter) && !(substr($item->title, 0, strlen($zip_filter)) === $zip_filter)){

View file

@ -18,7 +18,7 @@ class EstCeQuonMetEnProdBridge extends BridgeAbstract {
} return false;
}
$html = $this->file_get_html($this->getURI()) or $this->returnServerError('Could not request EstCeQuonMetEnProd: '.$this->getURI());
$html = $this->getSimpleHTMLDOM($this->getURI()) or $this->returnServerError('Could not request EstCeQuonMetEnProd: '.$this->getURI());
$item = new \Item();
$item->uri = $this->getURI().'#'.date('Y-m-d');
@ -33,4 +33,4 @@ class EstCeQuonMetEnProdBridge extends BridgeAbstract {
return 21600; // 6 hours
}
}
?>
?>

View file

@ -57,7 +57,7 @@ class NextgovBridge extends BridgeAbstract {
if ($category !== preg_replace('/[^a-z-]+/', '', $category) || strlen($category > 32))
$this->returnClientError('Invalid "category" parameter.');
$url = $this->getURI().'rss/'.$category.'/';
$html = $this->file_get_html($url) or $this->returnServerError('Could not request Nextgov: '.$url);
$html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request Nextgov: '.$url);
$limit = 0;
foreach ($html->find('item') as $element) {
@ -69,7 +69,7 @@ class NextgovBridge extends BridgeAbstract {
$article_subtitle = $element->find('description', 0)->plaintext;
$article_timestamp = strtotime($element->find('pubDate', 0)->plaintext);
$article_thumbnail = ExtractFromDelimiters($element->innertext, '<media:content url="', '"');
$article = $this->file_get_html($article_url) or $this->returnServerError('Could not request Nextgov: '.$article_url);
$article = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request Nextgov: '.$article_url);
$contents = $article->find('div.wysiwyg', 0)->innertext;
$contents = StripWithDelimiters($contents, '<div class="ad-container">', '</div>');
@ -91,4 +91,4 @@ class NextgovBridge extends BridgeAbstract {
}
}
}
}

View file

@ -20,7 +20,7 @@ class NumeramaBridge extends BridgeAbstract{
}
$feed = $this->uri.'feed/';
$html = $this->file_get_html($feed) or $this->returnServerError('Could not request Numerama: '.$feed);
$html = $this->getSimpleHTMLDOM($feed) or $this->returnServerError('Could not request Numerama: '.$feed);
$limit = 0;
foreach($html->find('item') as $element) {
@ -32,7 +32,7 @@ class NumeramaBridge extends BridgeAbstract{
$item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
$article_url = NumeramaStripCDATA($element->find('guid', 0)->plaintext);
$article_html = $this->file_get_html($article_url) or $this->returnServerError('Could not request Numerama: '.$article_url);
$article_html = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request Numerama: '.$article_url);
$contents = $article_html->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block
$contents = '<img alt="" style="max-width:300px;" src="'.$article_html->find('meta[property=og:image]', 0)->getAttribute('content').'">'; // add post picture
$contents = $contents.$article_html->find('article[class=post-content]', 0)->innertext; // extract the post

View file

@ -65,7 +65,7 @@ class SensCritiqueBridge extends BridgeAbstract {
private function collectMoviesData() {
$html = '';
$html = $this->file_get_html('http://www.senscritique.com/films/cette-semaine') or $this->returnServerError('No results for this query.');
$html = $this->getSimpleHTMLDOM('http://www.senscritique.com/films/cette-semaine') or $this->returnServerError('No results for this query.');
$list = $html->find('ul.elpr-list', 0);
$this->extractDataFromList($list);
@ -73,7 +73,7 @@ class SensCritiqueBridge extends BridgeAbstract {
private function collectSeriesData() {
$html = '';
$html = $this->file_get_html('http://www.senscritique.com/series/actualite') or $this->returnServerError('No results for this query.');
$html = $this->getSimpleHTMLDOM('http://www.senscritique.com/series/actualite') or $this->returnServerError('No results for this query.');
$list = $html->find('ul.elpr-list', 0);
$this->extractDataFromList($list);
@ -81,7 +81,7 @@ class SensCritiqueBridge extends BridgeAbstract {
private function collectGamesData() {
$html = '';
$html = $this->file_get_html('http://www.senscritique.com/jeuxvideo/actualite') or $this->returnServerError('No results for this query.');
$html = $this->getSimpleHTMLDOM('http://www.senscritique.com/jeuxvideo/actualite') or $this->returnServerError('No results for this query.');
$list = $html->find('ul.elpr-list', 0);
$this->extractDataFromList($list);
@ -89,7 +89,7 @@ class SensCritiqueBridge extends BridgeAbstract {
private function collectBooksData() {
$html = '';
$html = $this->file_get_html('http://www.senscritique.com/livres/actualite') or $this->returnServerError('No results for this query.');
$html = $this->getSimpleHTMLDOM('http://www.senscritique.com/livres/actualite') or $this->returnServerError('No results for this query.');
$list = $html->find('ul.elpr-list', 0);
$this->extractDataFromList($list);
@ -97,7 +97,7 @@ class SensCritiqueBridge extends BridgeAbstract {
private function collectBDsData() {
$html = '';
$html = $this->file_get_html('http://www.senscritique.com/bd/actualite') or $this->returnServerError('No results for this query.');
$html = $this->getSimpleHTMLDOM('http://www.senscritique.com/bd/actualite') or $this->returnServerError('No results for this query.');
$list = $html->find('ul.elpr-list', 0);
$this->extractDataFromList($list);
@ -105,7 +105,7 @@ class SensCritiqueBridge extends BridgeAbstract {
private function collectMusicsData() {
$html = '';
$html = $this->file_get_html('http://www.senscritique.com/musique/actualite') or $this->returnServerError('No results for this query.');
$html = $this->getSimpleHTMLDOM('http://www.senscritique.com/musique/actualite') or $this->returnServerError('No results for this query.');
$list = $html->find('ul.elpr-list', 0);
$this->extractDataFromList($list);

View file

@ -11,7 +11,7 @@ class ShanaprojectBridge extends BridgeAbstract {
// Returns an html object for the Season Anime List (latest season)
private function LoadSeasonAnimeList(){
// First we need to find the URI to the latest season from the 'seasons' page searching for 'Season Anime List'
$html = $this->file_get_html($this->getURI() . '/seasons');
$html = $this->getSimpleHTMLDOM($this->getURI() . '/seasons');
if(!$html)
$this->returnServerError('Could not load \'seasons\' page!');
@ -19,7 +19,7 @@ class ShanaprojectBridge extends BridgeAbstract {
if(!$season)
$this->returnServerError('Could not find \'Season Anime List\'!');
$html = $this->file_get_html($this->getURI() . $season->href);
$html = $this->getSimpleHTMLDOM($this->getURI() . $season->href);
if(!$html)
$this->returnServerError('Could not load \'Season Anime List\' from \'' . $season->innertext . '\'!');

View file

@ -45,7 +45,7 @@ class TheHackerNewsBridge extends BridgeAbstract {
return $string;
}
$html = $this->file_get_html($this->getURI()) or $this->returnServerError('Could not request TheHackerNews: '.$this->getURI());
$html = $this->getSimpleHTMLDOM($this->getURI()) or $this->returnServerError('Could not request TheHackerNews: '.$this->getURI());
$limit = 0;
foreach ($html->find('article') as $element) {
@ -55,7 +55,7 @@ class TheHackerNewsBridge extends BridgeAbstract {
$article_author = trim($element->find('span.vcard', 0)->plaintext);
$article_title = $element->find('a.entry-title', 0)->plaintext;
$article_timestamp = strtotime($element->find('span.updated', 0)->plaintext);
$article = $this->file_get_html($article_url) or $this->returnServerError('Could not request TheHackerNews: '.$article_url);
$article = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request TheHackerNews: '.$article_url);
$contents = $article->find('div.articlebodyonly', 0)->innertext;
$contents = StripRecursiveHTMLSection($contents, 'div', '<div class=\'clear\'');
@ -73,4 +73,4 @@ class TheHackerNewsBridge extends BridgeAbstract {
}
}
}
}

View file

@ -28,7 +28,7 @@ class WeLiveSecurityBridge extends BridgeAbstract {
}
$feed = $this->getURI().'feed/';
$html = $this->file_get_html($feed) or $this->returnServerError('Could not request '.$this->getName().': '.$feed);
$html = $this->getSimpleHTMLDOM($feed) or $this->returnServerError('Could not request '.$this->getName().': '.$feed);
$limit = 0;
foreach ($html->find('item') as $element) {
@ -59,4 +59,4 @@ class WeLiveSecurityBridge extends BridgeAbstract {
}
}
}
}
}

View file

@ -112,7 +112,7 @@ class WikipediaBridge extends BridgeAbstract{
}
// This will automatically send us to the correct main page in any language (try it!)
$html = $this->file_get_html($this->uri . '/wiki');
$html = $this->getSimpleHTMLDOM($this->uri . '/wiki');
if(!$html)
$this->returnServerError('Could not load site: ' . $this->uri . '!');
@ -209,7 +209,7 @@ class WikipediaBridge extends BridgeAbstract{
* Loads the full article from a given URI
*/
private function LoadFullArticle($uri){
$content_html = $this->file_get_html($uri);
$content_html = $this->getSimpleHTMLDOM($uri);
if(!$content_html)
$this->returnServerError('Could not load site: ' . $uri . '!');

View file

@ -237,7 +237,7 @@ class ZDNetBridge extends BridgeAbstract {
if ($feed !== preg_replace('/[^a-zA-Z0-9-\/]+/', '', $feed) || substr_count($feed, '/') > 1 || strlen($feed > 64))
$this->returnClientError('Invalid "feed" parameter.');
$url = $baseUri.trim($feed, '/').'/rss.xml';
$html = $this->file_get_html($url) or $this->returnServerError('Could not request ZDNet: '.$url);
$html = $this->getSimpleHTMLDOM($url) or $this->returnServerError('Could not request ZDNet: '.$url);
$limit = 0;
foreach ($html->find('item') as $element) {
@ -247,7 +247,7 @@ class ZDNetBridge extends BridgeAbstract {
$article_title = StripCDATA($element->find('title', 0)->plaintext);
$article_subtitle = StripCDATA($element->find('description', 0)->plaintext);
$article_timestamp = strtotime(StripCDATA($element->find('pubDate', 0)->plaintext));
$article = $this->file_get_html($article_url) or $this->returnServerError('Could not request ZDNet: '.$article_url);
$article = $this->getSimpleHTMLDOM($article_url) or $this->returnServerError('Could not request ZDNet: '.$article_url);
if (!empty($article_author))
$author = $article_author;