use BridgeAbstract::file_get_html in all bridges
instead of simple_html_dom function file_get_html Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>
This commit is contained in:
parent
36d39d3f59
commit
955eecc299
123 changed files with 187 additions and 187 deletions
|
@ -15,7 +15,7 @@ class ABCTabsBridge extends BridgeAbstract{
|
||||||
|
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
$html = '';
|
$html = '';
|
||||||
$html = file_get_html('http://www.abc-tabs.com/tablatures/nouveautes.html') or $this->returnError('No results for this query.', 404);
|
$html = $this->file_get_html('http://www.abc-tabs.com/tablatures/nouveautes.html') or $this->returnError('No results for this query.', 404);
|
||||||
$table = $html->find('table#myTable', 0)->children(1);
|
$table = $html->find('table#myTable', 0)->children(1);
|
||||||
|
|
||||||
foreach ($table->find('tr') as $tab)
|
foreach ($table->find('tr') as $tab)
|
||||||
|
|
|
@ -29,7 +29,7 @@ class AcrimedBridge extends RssExpander{
|
||||||
$item->title = trim($newsItem->title);
|
$item->title = trim($newsItem->title);
|
||||||
$item->timestamp = strtotime($dc->date);
|
$item->timestamp = strtotime($dc->date);
|
||||||
|
|
||||||
$articlePage = file_get_html($newsItem->link);
|
$articlePage = $this->file_get_html($newsItem->link);
|
||||||
$article = $hs->sanitize($articlePage->find('article.article1', 0)->innertext);
|
$article = $hs->sanitize($articlePage->find('article.article1', 0)->innertext);
|
||||||
$article = HTMLSanitizer::defaultImageSrcTo($article, "http://www.acrimed.org/");
|
$article = HTMLSanitizer::defaultImageSrcTo($article, "http://www.acrimed.org/");
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,7 @@ class AllocineFRBridge extends BridgeAbstract{
|
||||||
}
|
}
|
||||||
|
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
$html = file_get_html($this->_URL) or $this->returnError('Could not request Allo cine.', 404);
|
$html = $this->file_get_html($this->_URL) or $this->returnError('Could not request Allo cine.', 404);
|
||||||
|
|
||||||
foreach($html->find('figure.media-meta-fig') as $element)
|
foreach($html->find('figure.media-meta-fig') as $element)
|
||||||
{
|
{
|
||||||
|
|
|
@ -15,7 +15,7 @@ class AllocineT5Bridge extends BridgeAbstract{
|
||||||
}
|
}
|
||||||
|
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
$html = file_get_html($this->_URL) or $this->returnError('Could not request Allo cine.', 404);
|
$html = $this->file_get_html($this->_URL) or $this->returnError('Could not request Allo cine.', 404);
|
||||||
|
|
||||||
foreach($html->find('figure.media-meta-fig') as $element)
|
foreach($html->find('figure.media-meta-fig') as $element)
|
||||||
{
|
{
|
||||||
|
|
|
@ -15,7 +15,7 @@ class AllocineTueursEnSerieBridge extends BridgeAbstract{
|
||||||
}
|
}
|
||||||
|
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
$html = file_get_html($this->_URL) or $this->returnError('Could not request Allo cine.', 404);
|
$html = $this->file_get_html($this->_URL) or $this->returnError('Could not request Allo cine.', 404);
|
||||||
|
|
||||||
foreach($html->find('figure.media-meta-fig') as $element)
|
foreach($html->find('figure.media-meta-fig') as $element)
|
||||||
{
|
{
|
||||||
|
|
|
@ -67,7 +67,7 @@ class AnimeUltimeBridge extends BridgeAbstract {
|
||||||
//Retrive page contents
|
//Retrive page contents
|
||||||
$website = 'http://www.anime-ultime.net/';
|
$website = 'http://www.anime-ultime.net/';
|
||||||
$url = $website.'history-0-1/'.$requestFilter;
|
$url = $website.'history-0-1/'.$requestFilter;
|
||||||
$html = file_get_html($url) or $this->returnError('Could not request Anime-Ultime: '.$url, 500);
|
$html = $this->file_get_html($url) or $this->returnError('Could not request Anime-Ultime: '.$url, 500);
|
||||||
|
|
||||||
//Relases are sorted by day : process each day individually
|
//Relases are sorted by day : process each day individually
|
||||||
foreach ($html->find('div.history', 0)->find('h3') as $daySection) {
|
foreach ($html->find('div.history', 0)->find('h3') as $daySection) {
|
||||||
|
|
|
@ -26,7 +26,7 @@ class BandcampBridge extends BridgeAbstract{
|
||||||
$html = '';
|
$html = '';
|
||||||
if (isset($param['tag'])) {
|
if (isset($param['tag'])) {
|
||||||
$this->request = $param['tag'];
|
$this->request = $param['tag'];
|
||||||
$html = file_get_html('http://bandcamp.com/tag/'.urlencode($this->request).'?sort_field=date') or $this->returnError('No results for this query.', 404);
|
$html = $this->file_get_html('http://bandcamp.com/tag/'.urlencode($this->request).'?sort_field=date') or $this->returnError('No results for this query.', 404);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
$this->returnError('You must specify tag (/tag/...)', 400);
|
$this->returnError('You must specify tag (/tag/...)', 400);
|
||||||
|
|
|
@ -16,11 +16,11 @@ class BastaBridge extends BridgeAbstract{
|
||||||
|
|
||||||
|
|
||||||
function BastaExtractContent($url) {
|
function BastaExtractContent($url) {
|
||||||
$html2 = file_get_html($url);
|
$html2 = $this->file_get_html($url);
|
||||||
$text = $html2->find('div.texte', 0)->innertext;
|
$text = $html2->find('div.texte', 0)->innertext;
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
$html = file_get_html('http://www.bastamag.net/spip.php?page=backend') or $this->returnError('Could not request Bastamag.', 404);
|
$html = $this->file_get_html('http://www.bastamag.net/spip.php?page=backend') or $this->returnError('Could not request Bastamag.', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
|
|
|
@ -12,7 +12,7 @@ class BlaguesDeMerdeBridge extends BridgeAbstract{
|
||||||
}
|
}
|
||||||
|
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
$html = file_get_html('http://www.blaguesdemerde.fr/') or $this->returnError('Could not request BDM.', 404);
|
$html = $this->file_get_html('http://www.blaguesdemerde.fr/') or $this->returnError('Could not request BDM.', 404);
|
||||||
|
|
||||||
foreach($html->find('article.joke_contener') as $element) {
|
foreach($html->find('article.joke_contener') as $element) {
|
||||||
$item = new Item();
|
$item = new Item();
|
||||||
|
|
|
@ -43,7 +43,7 @@ class BooruprojectBridge extends BridgeAbstract{
|
||||||
if (empty($param['i'])) {
|
if (empty($param['i'])) {
|
||||||
$this->returnError('Please enter a ***.booru.org instance.', 404);
|
$this->returnError('Please enter a ***.booru.org instance.', 404);
|
||||||
}
|
}
|
||||||
$html = file_get_html("http://".$param['i'].".booru.org/index.php?page=post&s=list&pid=".$page.$tags) or $this->returnError('Could not request Booruproject.', 404);
|
$html = $this->file_get_html("http://".$param['i'].".booru.org/index.php?page=post&s=list&pid=".$page.$tags) or $this->returnError('Could not request Booruproject.', 404);
|
||||||
|
|
||||||
|
|
||||||
foreach($html->find('div[class=content] span') as $element) {
|
foreach($html->find('div[class=content] span') as $element) {
|
||||||
|
|
|
@ -22,7 +22,7 @@ class CADBridge extends BridgeAbstract{
|
||||||
}
|
}
|
||||||
|
|
||||||
function CADExtractContent($url) {
|
function CADExtractContent($url) {
|
||||||
$html3 = file_get_html($url);
|
$html3 = $this->file_get_html($url);
|
||||||
$htmlpart = explode("/", $url);
|
$htmlpart = explode("/", $url);
|
||||||
if ($htmlpart[3] == 'cad')
|
if ($htmlpart[3] == 'cad')
|
||||||
preg_match_all("/http:\/\/cdn2\.cad-comic\.com\/comics\/cad-\S*png/", $html3, $url2);
|
preg_match_all("/http:\/\/cdn2\.cad-comic\.com\/comics\/cad-\S*png/", $html3, $url2);
|
||||||
|
@ -36,7 +36,7 @@ class CADBridge extends BridgeAbstract{
|
||||||
return '<img src="'.$img.'"/>';
|
return '<img src="'.$img.'"/>';
|
||||||
}
|
}
|
||||||
|
|
||||||
$html = file_get_html('http://cdn2.cad-comic.com/rss.xml') or $this->returnError('Could not request CAD.', 404);
|
$html = $this->file_get_html('http://cdn2.cad-comic.com/rss.xml') or $this->returnError('Could not request CAD.', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
if($limit < 5) {
|
if($limit < 5) {
|
||||||
|
|
|
@ -51,7 +51,7 @@ class CNETBridge extends BridgeAbstract {
|
||||||
$this->topicName = $param['topic'];
|
$this->topicName = $param['topic'];
|
||||||
|
|
||||||
$pageUrl = 'http://www.cnet.com/'.(empty($this->topicName) ? '' : 'topics/'.$this->topicName.'/');
|
$pageUrl = 'http://www.cnet.com/'.(empty($this->topicName) ? '' : 'topics/'.$this->topicName.'/');
|
||||||
$html = file_get_html($pageUrl) or $this->returnError('Could not request CNET: '.$pageUrl, 500);
|
$html = $this->file_get_html($pageUrl) or $this->returnError('Could not request CNET: '.$pageUrl, 500);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($html->find('div.assetBody') as $element) {
|
foreach($html->find('div.assetBody') as $element) {
|
||||||
|
@ -65,7 +65,7 @@ class CNETBridge extends BridgeAbstract {
|
||||||
|
|
||||||
if (!empty($article_title) && !empty($article_uri) && strpos($article_uri, '/news/') !== false) {
|
if (!empty($article_title) && !empty($article_uri) && strpos($article_uri, '/news/') !== false) {
|
||||||
|
|
||||||
$article_html = file_get_html($article_uri) or $this->returnError('Could not request CNET: '.$article_uri, 500);
|
$article_html = $this->file_get_html($article_uri) or $this->returnError('Could not request CNET: '.$article_uri, 500);
|
||||||
|
|
||||||
if (is_null($article_thumbnail))
|
if (is_null($article_thumbnail))
|
||||||
$article_thumbnail = $article_html->find('div.originalImage', 0);
|
$article_thumbnail = $article_html->find('div.originalImage', 0);
|
||||||
|
|
|
@ -19,12 +19,12 @@ class CoinDeskBridge extends BridgeAbstract{
|
||||||
return $string;
|
return $string;
|
||||||
}
|
}
|
||||||
function CoinDeskExtractContent($url) {
|
function CoinDeskExtractContent($url) {
|
||||||
$html2 = file_get_html($url);
|
$html2 = $this->file_get_html($url);
|
||||||
$text = $html2->find('div.single-content', 0)->innertext;
|
$text = $html2->find('div.single-content', 0)->innertext;
|
||||||
$text = strip_tags($text, '<p><a><img>');
|
$text = strip_tags($text, '<p><a><img>');
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
$html = file_get_html('http://www.coindesk.com/feed/atom/') or $this->returnError('Could not request CoinDesk.', 404);
|
$html = $this->file_get_html('http://www.coindesk.com/feed/atom/') or $this->returnError('Could not request CoinDesk.', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($html->find('entry') as $element) {
|
foreach($html->find('entry') as $element) {
|
||||||
|
|
|
@ -33,7 +33,7 @@ class CollegeDeFranceBridge extends BridgeAbstract{
|
||||||
* </a>
|
* </a>
|
||||||
* </li>
|
* </li>
|
||||||
*/
|
*/
|
||||||
$html = file_get_html('http://www.college-de-france.fr/components/search-audiovideo.jsp?fulltext=&siteid=1156951719600&lang=FR&type=all') or $this->returnError('Could not request CollegeDeFrance.', 404);
|
$html = $this->file_get_html('http://www.college-de-france.fr/components/search-audiovideo.jsp?fulltext=&siteid=1156951719600&lang=FR&type=all') or $this->returnError('Could not request CollegeDeFrance.', 404);
|
||||||
foreach($html->find('a[data-target]') as $element) {
|
foreach($html->find('a[data-target]') as $element) {
|
||||||
$item = new \Item();
|
$item = new \Item();
|
||||||
$item->title = $element->find('.title', 0)->plaintext;
|
$item->title = $element->find('.title', 0)->plaintext;
|
||||||
|
|
|
@ -20,14 +20,14 @@ class CommonDreamsBridge extends BridgeAbstract{
|
||||||
}
|
}
|
||||||
|
|
||||||
function CommonDreamsExtractContent($url) {
|
function CommonDreamsExtractContent($url) {
|
||||||
$html3 = file_get_html($url);
|
$html3 = $this->file_get_html($url);
|
||||||
$text = $html3->find('div[class=field--type-text-with-summary]', 0)->innertext;
|
$text = $html3->find('div[class=field--type-text-with-summary]', 0)->innertext;
|
||||||
$html3->clear();
|
$html3->clear();
|
||||||
unset ($html3);
|
unset ($html3);
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
|
|
||||||
$html = file_get_html('http://www.commondreams.org/rss.xml') or $this->returnError('Could not request CommonDreams.', 404);
|
$html = $this->file_get_html('http://www.commondreams.org/rss.xml') or $this->returnError('Could not request CommonDreams.', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
if($limit < 4) {
|
if($limit < 4) {
|
||||||
|
|
|
@ -13,7 +13,7 @@ class CopieDoubleBridge extends BridgeAbstract{
|
||||||
|
|
||||||
|
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
$html = file_get_html('http://www.copie-double.com/') or $this->returnError('Could not request CopieDouble.', 404);
|
$html = $this->file_get_html('http://www.copie-double.com/') or $this->returnError('Could not request CopieDouble.', 404);
|
||||||
$table = $html->find('table table', 2);
|
$table = $html->find('table table', 2);
|
||||||
|
|
||||||
foreach($table->find('tr') as $element)
|
foreach($table->find('tr') as $element)
|
||||||
|
|
|
@ -15,7 +15,7 @@ class CourrierInternationalBridge extends BridgeAbstract{
|
||||||
|
|
||||||
$html = '';
|
$html = '';
|
||||||
|
|
||||||
$html = file_get_html('http://www.courrierinternational.com/') or $this->returnError('Error.', 500);
|
$html = $this->file_get_html('http://www.courrierinternational.com/') or $this->returnError('Error.', 500);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -33,7 +33,7 @@ class CourrierInternationalBridge extends BridgeAbstract{
|
||||||
$item->uri = "http://courrierinternational.fr/".$item->uri;
|
$item->uri = "http://courrierinternational.fr/".$item->uri;
|
||||||
}
|
}
|
||||||
|
|
||||||
$page = file_get_html($item->uri);
|
$page = $this->file_get_html($item->uri);
|
||||||
|
|
||||||
$cleaner = new HTMLSanitizer();
|
$cleaner = new HTMLSanitizer();
|
||||||
|
|
||||||
|
|
|
@ -44,7 +44,7 @@ class CpasbienBridge extends HttpCachingBridgeAbstract{
|
||||||
$html = '';
|
$html = '';
|
||||||
if (isset($param['q'])) { /* keyword search mode */
|
if (isset($param['q'])) { /* keyword search mode */
|
||||||
$this->request = str_replace(" ","-",trim($param['q']));
|
$this->request = str_replace(" ","-",trim($param['q']));
|
||||||
$html = file_get_html($this->uri.'/recherche/'.urlencode($this->request).'.html') or $this->returnError('No results for this query.', 404);
|
$html = $this->file_get_html($this->uri.'/recherche/'.urlencode($this->request).'.html') or $this->returnError('No results for this query.', 404);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
$this->returnError('You must specify a keyword (?q=...).', 400);
|
$this->returnError('You must specify a keyword (?q=...).', 400);
|
||||||
|
|
|
@ -28,7 +28,7 @@ class CryptomeBridge extends BridgeAbstract{
|
||||||
// If you want HTTPS access instead, uncomment the following line:
|
// If you want HTTPS access instead, uncomment the following line:
|
||||||
//$link = 'https://secure.netsolhost.com/cryptome.org/';
|
//$link = 'https://secure.netsolhost.com/cryptome.org/';
|
||||||
|
|
||||||
$html = file_get_html($link) or $this->returnError('Could not request Cryptome.', 404);
|
$html = $this->file_get_html($link) or $this->returnError('Could not request Cryptome.', 404);
|
||||||
if (!empty($param['n'])) { /* number of documents */
|
if (!empty($param['n'])) { /* number of documents */
|
||||||
$num = min(max(1, $param['n']+0), $num);
|
$num = min(max(1, $param['n']+0), $num);
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,7 +47,7 @@ class DailymotionBridge extends BridgeAbstract{
|
||||||
|
|
||||||
function getMetadata($id) {
|
function getMetadata($id) {
|
||||||
$metadata=array();
|
$metadata=array();
|
||||||
$html2 = file_get_html('http://www.dailymotion.com/video/'.$id) or $this->returnError('Could not request Dailymotion.', 404);
|
$html2 = $this->file_get_html('http://www.dailymotion.com/video/'.$id) or $this->returnError('Could not request Dailymotion.', 404);
|
||||||
$metadata['title'] = $html2->find('meta[property=og:title]', 0)->getAttribute('content');
|
$metadata['title'] = $html2->find('meta[property=og:title]', 0)->getAttribute('content');
|
||||||
$metadata['timestamp'] = strtotime($html2->find('meta[property=video:release_date]', 0)->getAttribute('content') );
|
$metadata['timestamp'] = strtotime($html2->find('meta[property=video:release_date]', 0)->getAttribute('content') );
|
||||||
$metadata['thumbnailUri'] = $html2->find('meta[property=og:image]', 0)->getAttribute('content');
|
$metadata['thumbnailUri'] = $html2->find('meta[property=og:image]', 0)->getAttribute('content');
|
||||||
|
@ -63,15 +63,15 @@ class DailymotionBridge extends BridgeAbstract{
|
||||||
|
|
||||||
if (isset($param['u'])) { // user timeline mode
|
if (isset($param['u'])) { // user timeline mode
|
||||||
$this->request = $param['u'];
|
$this->request = $param['u'];
|
||||||
$html = file_get_html('http://www.dailymotion.com/user/'.urlencode($this->request).'/1') or $this->returnError('Could not request Dailymotion.', 404);
|
$html = $this->file_get_html('http://www.dailymotion.com/user/'.urlencode($this->request).'/1') or $this->returnError('Could not request Dailymotion.', 404);
|
||||||
}
|
}
|
||||||
else if (isset($param['p'])) { // playlist mode
|
else if (isset($param['p'])) { // playlist mode
|
||||||
$this->request = strtok($param['p'], '_');
|
$this->request = strtok($param['p'], '_');
|
||||||
$html = file_get_html('http://www.dailymotion.com/playlist/'.urlencode($this->request).'') or $this->returnError('Could not request Dailymotion.', 404);
|
$html = $this->file_get_html('http://www.dailymotion.com/playlist/'.urlencode($this->request).'') or $this->returnError('Could not request Dailymotion.', 404);
|
||||||
}
|
}
|
||||||
else if (isset($param['s'])) { // search mode
|
else if (isset($param['s'])) { // search mode
|
||||||
$this->request = $param['s']; $page = 1; if (isset($param['pa'])) $page = (int)preg_replace("/[^0-9]/",'', $param['pa']);
|
$this->request = $param['s']; $page = 1; if (isset($param['pa'])) $page = (int)preg_replace("/[^0-9]/",'', $param['pa']);
|
||||||
$html = file_get_html('http://www.dailymotion.com/search/'.urlencode($this->request).'/'.$page.'') or $this->returnError('Could not request Dailymotion.', 404);
|
$html = $this->file_get_html('http://www.dailymotion.com/search/'.urlencode($this->request).'/'.$page.'') or $this->returnError('Could not request Dailymotion.', 404);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
$this->returnError('You must either specify a Dailymotion username (?u=...) or a playlist id (?p=...) or search (?s=...)', 400);
|
$this->returnError('You must either specify a Dailymotion username (?u=...) or a playlist id (?p=...) or search (?s=...)', 400);
|
||||||
|
|
|
@ -31,7 +31,7 @@ class DanbooruBridge extends BridgeAbstract{
|
||||||
if (isset($param['t'])) {
|
if (isset($param['t'])) {
|
||||||
$tags = urlencode($param['t']);
|
$tags = urlencode($param['t']);
|
||||||
}
|
}
|
||||||
$html = file_get_html("http://donmai.us/posts?&page=$page&tags=$tags") or $this->returnError('Could not request Danbooru.', 404);
|
$html = $this->file_get_html("http://donmai.us/posts?&page=$page&tags=$tags") or $this->returnError('Could not request Danbooru.', 404);
|
||||||
foreach($html->find('div[id=posts] article') as $element) {
|
foreach($html->find('div[id=posts] article') as $element) {
|
||||||
$item = new \Item();
|
$item = new \Item();
|
||||||
$item->uri = 'http://donmai.us'.$element->find('a', 0)->href;
|
$item->uri = 'http://donmai.us'.$element->find('a', 0)->href;
|
||||||
|
|
|
@ -15,7 +15,7 @@ class DansTonChatBridge extends BridgeAbstract{
|
||||||
$html = '';
|
$html = '';
|
||||||
$link = 'http://danstonchat.com/latest.html';
|
$link = 'http://danstonchat.com/latest.html';
|
||||||
|
|
||||||
$html = file_get_html($link) or $this->returnError('Could not request DansTonChat.', 404);
|
$html = $this->file_get_html($link) or $this->returnError('Could not request DansTonChat.', 404);
|
||||||
|
|
||||||
foreach($html->find('div.item') as $element) {
|
foreach($html->find('div.item') as $element) {
|
||||||
$item = new \Item();
|
$item = new \Item();
|
||||||
|
|
|
@ -87,17 +87,17 @@ class DauphineLibereBridge extends BridgeAbstract{
|
||||||
|
|
||||||
|
|
||||||
function ExtractContent($url) {
|
function ExtractContent($url) {
|
||||||
$html2 = file_get_html($url);
|
$html2 = $this->file_get_html($url);
|
||||||
$text = $html2->find('div.column', 0)->innertext;
|
$text = $html2->find('div.column', 0)->innertext;
|
||||||
$text = preg_replace('@<script[^>]*?>.*?</script>@si', '', $text);
|
$text = preg_replace('@<script[^>]*?>.*?</script>@si', '', $text);
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
if (isset($param['u'])) { /* user timeline mode */
|
if (isset($param['u'])) { /* user timeline mode */
|
||||||
$this->request = $param['u'];
|
$this->request = $param['u'];
|
||||||
$html = file_get_html('http://www.ledauphine.com/'.$this->request.'/rss') or $this->returnError('Could not request DauphineLibere.', 404);
|
$html = $this->file_get_html('http://www.ledauphine.com/'.$this->request.'/rss') or $this->returnError('Could not request DauphineLibere.', 404);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
$html = file_get_html('http://www.ledauphine.com/rss') or $this->returnError('Could not request DauphineLibere.', 404);
|
$html = $this->file_get_html('http://www.ledauphine.com/rss') or $this->returnError('Could not request DauphineLibere.', 404);
|
||||||
}
|
}
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
|
|
|
@ -37,13 +37,13 @@ class DeveloppezDotComBridge extends BridgeAbstract{
|
||||||
}
|
}
|
||||||
|
|
||||||
function DeveloppezDotComExtractContent($url) {
|
function DeveloppezDotComExtractContent($url) {
|
||||||
$articleHTMLContent = file_get_html($url);
|
$articleHTMLContent = $this->file_get_html($url);
|
||||||
$text = convert_smart_quotes($articleHTMLContent->find('div.content', 0)->innertext);
|
$text = convert_smart_quotes($articleHTMLContent->find('div.content', 0)->innertext);
|
||||||
$text = utf8_encode($text);
|
$text = utf8_encode($text);
|
||||||
return trim($text);
|
return trim($text);
|
||||||
}
|
}
|
||||||
|
|
||||||
$rssFeed = file_get_html('http://www.developpez.com/index/rss') or $this->returnError('Could not request http://www.developpez.com/index/rss', 404);
|
$rssFeed = $this->file_get_html('http://www.developpez.com/index/rss') or $this->returnError('Could not request http://www.developpez.com/index/rss', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($rssFeed->find('item') as $element) {
|
foreach($rssFeed->find('item') as $element) {
|
||||||
|
|
|
@ -13,7 +13,7 @@ class DilbertBridge extends BridgeAbstract {
|
||||||
|
|
||||||
public function collectData(array $param) {
|
public function collectData(array $param) {
|
||||||
|
|
||||||
$html = file_get_html($this->getURI()) or $this->returnError('Could not request Dilbert: '.$this->getURI(), 500);
|
$html = $this->file_get_html($this->getURI()) or $this->returnError('Could not request Dilbert: '.$this->getURI(), 500);
|
||||||
|
|
||||||
foreach ($html->find('section.comic-item') as $element) {
|
foreach ($html->find('section.comic-item') as $element) {
|
||||||
|
|
||||||
|
|
|
@ -32,7 +32,7 @@ class DollbooruBridge extends BridgeAbstract{
|
||||||
if (isset($param['t'])) {
|
if (isset($param['t'])) {
|
||||||
$tags = urlencode($param['t']);
|
$tags = urlencode($param['t']);
|
||||||
}
|
}
|
||||||
$html = file_get_html("http://dollbooru.org/post/list/$tags/$page") or $this->returnError('Could not request Dollbooru.', 404);
|
$html = $this->file_get_html("http://dollbooru.org/post/list/$tags/$page") or $this->returnError('Could not request Dollbooru.', 404);
|
||||||
|
|
||||||
|
|
||||||
foreach($html->find('div[class=shm-image-list] a') as $element) {
|
foreach($html->find('div[class=shm-image-list] a') as $element) {
|
||||||
|
|
|
@ -22,7 +22,7 @@ class DuckDuckGoBridge extends BridgeAbstract{
|
||||||
$html = '';
|
$html = '';
|
||||||
$link = 'http://duckduckgo.com/html/?q='.$param[u].'+sort:date';
|
$link = 'http://duckduckgo.com/html/?q='.$param[u].'+sort:date';
|
||||||
|
|
||||||
$html = file_get_html($link) or $this->returnError('Could not request DuckDuckGo.', 404);
|
$html = $this->file_get_html($link) or $this->returnError('Could not request DuckDuckGo.', 404);
|
||||||
|
|
||||||
foreach($html->find('div.results_links') as $element) {
|
foreach($html->find('div.results_links') as $element) {
|
||||||
$item = new \Item();
|
$item = new \Item();
|
||||||
|
|
|
@ -44,7 +44,7 @@ class EZTVBridge extends BridgeAbstract{
|
||||||
foreach($showList as $showID){
|
foreach($showList as $showID){
|
||||||
|
|
||||||
// Get show page
|
// Get show page
|
||||||
$html = file_get_html('https://eztv.ch/shows/'.rawurlencode($showID).'/') or $this->returnError('Could not request EZTV for id "'.$showID.'"', 404);
|
$html = $this->file_get_html('https://eztv.ch/shows/'.rawurlencode($showID).'/') or $this->returnError('Could not request EZTV for id "'.$showID.'"', 404);
|
||||||
|
|
||||||
// Loop on each element that look like an episode entry...
|
// Loop on each element that look like an episode entry...
|
||||||
foreach($html->find('.forum_header_border') as $element) {
|
foreach($html->find('.forum_header_border') as $element) {
|
||||||
|
|
|
@ -13,7 +13,7 @@ class EliteDangerousGalnetBridge extends BridgeAbstract
|
||||||
|
|
||||||
public function collectData(array $param)
|
public function collectData(array $param)
|
||||||
{
|
{
|
||||||
$html = file_get_html('https://community.elitedangerous.com/galnet') or $this->returnError('Error while downloading the website content', 404);
|
$html = $this->file_get_html('https://community.elitedangerous.com/galnet') or $this->returnError('Error while downloading the website content', 404);
|
||||||
foreach($html->find('div.article') as $element) {
|
foreach($html->find('div.article') as $element) {
|
||||||
$item = new Item();
|
$item = new Item();
|
||||||
|
|
||||||
|
|
|
@ -19,12 +19,12 @@ class FSBridge extends BridgeAbstract{
|
||||||
return $string;
|
return $string;
|
||||||
}
|
}
|
||||||
function FS_ExtractContent($url) {
|
function FS_ExtractContent($url) {
|
||||||
$html2 = file_get_html($url);
|
$html2 = $this->file_get_html($url);
|
||||||
$text = $html2->find('div.fiche-actualite', 0)->innertext;
|
$text = $html2->find('div.fiche-actualite', 0)->innertext;
|
||||||
$text = preg_replace('@<script[^>]*?>.*?</script>@si', '', $text);
|
$text = preg_replace('@<script[^>]*?>.*?</script>@si', '', $text);
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
$html = file_get_html('http://www.futura-sciences.com/rss/actualites.xml') or $this->returnError('Could not request Futura Sciences.', 404);
|
$html = $this->file_get_html('http://www.futura-sciences.com/rss/actualites.xml') or $this->returnError('Could not request Futura Sciences.', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
|
|
|
@ -109,9 +109,9 @@ class FacebookBridge extends BridgeAbstract{
|
||||||
if (is_null($html)) {
|
if (is_null($html)) {
|
||||||
if (isset($param['u'])) {
|
if (isset($param['u'])) {
|
||||||
if (!strpos($param['u'], "/")) {
|
if (!strpos($param['u'], "/")) {
|
||||||
$html = file_get_html('https://www.facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
|
$html = $this->file_get_html('https://www.facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
|
||||||
} else {
|
} else {
|
||||||
$html = file_get_html('https://www.facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
|
$html = $this->file_get_html('https://www.facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
$this->returnError('You must specify a Facebook username.', 400);
|
$this->returnError('You must specify a Facebook username.', 400);
|
||||||
|
|
|
@ -14,7 +14,7 @@ Class FierPandaBridge extends BridgeAbstract{
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
$link = 'http://www.fier-panda.fr/';
|
$link = 'http://www.fier-panda.fr/';
|
||||||
|
|
||||||
$html = file_get_html($link) or $this->returnError('Could not request Fier Panda.', 404);
|
$html = $this->file_get_html($link) or $this->returnError('Could not request Fier Panda.', 404);
|
||||||
|
|
||||||
foreach($html->find('div.container-content article') as $element) {
|
foreach($html->find('div.container-content article') as $element) {
|
||||||
$item = new \Item();
|
$item = new \Item();
|
||||||
|
|
|
@ -12,7 +12,7 @@ class FlickrExploreBridge extends BridgeAbstract{
|
||||||
}
|
}
|
||||||
|
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
$html = file_get_html('http://www.flickr.com/explore') or $this->returnError('Could not request Flickr.', 404);
|
$html = $this->file_get_html('http://www.flickr.com/explore') or $this->returnError('Could not request Flickr.', 404);
|
||||||
|
|
||||||
foreach($html->find('span.photo_container') as $element) {
|
foreach($html->find('span.photo_container') as $element) {
|
||||||
$item = new \Item();
|
$item = new \Item();
|
||||||
|
|
|
@ -27,14 +27,14 @@ class FlickrTagBridge extends BridgeAbstract{
|
||||||
}
|
}
|
||||||
|
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
$html = file_get_html('http://www.flickr.com/search/?q=vendee&s=rec') or $this->returnError('Could not request Flickr.', 404);
|
$html = $this->file_get_html('http://www.flickr.com/search/?q=vendee&s=rec') or $this->returnError('Could not request Flickr.', 404);
|
||||||
if (isset($param['q'])) { /* keyword search mode */
|
if (isset($param['q'])) { /* keyword search mode */
|
||||||
$this->request = $param['q'];
|
$this->request = $param['q'];
|
||||||
$html = file_get_html('http://www.flickr.com/search/?q='.urlencode($this->request).'&s=rec') or $this->returnError('No results for this query.', 404);
|
$html = $this->file_get_html('http://www.flickr.com/search/?q='.urlencode($this->request).'&s=rec') or $this->returnError('No results for this query.', 404);
|
||||||
}
|
}
|
||||||
elseif (isset($param['u'])) { /* user timeline mode */
|
elseif (isset($param['u'])) { /* user timeline mode */
|
||||||
$this->request = $param['u'];
|
$this->request = $param['u'];
|
||||||
$html = file_get_html('http://www.flickr.com/photos/'.urlencode($this->request).'/') or $this->returnError('Requested username can\'t be found.', 404);
|
$html = $this->file_get_html('http://www.flickr.com/photos/'.urlencode($this->request).'/') or $this->returnError('Requested username can\'t be found.', 404);
|
||||||
}
|
}
|
||||||
|
|
||||||
else {
|
else {
|
||||||
|
|
|
@ -12,7 +12,7 @@ class FootitoBridge extends BridgeAbstract{
|
||||||
}
|
}
|
||||||
|
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
$html = file_get_html('http://www.footito.fr/') or $this->returnError('Could not request Footito.', 404);
|
$html = $this->file_get_html('http://www.footito.fr/') or $this->returnError('Could not request Footito.', 404);
|
||||||
|
|
||||||
foreach($html->find('div.post') as $element) {
|
foreach($html->find('div.post') as $element) {
|
||||||
$item = new Item();
|
$item = new Item();
|
||||||
|
|
|
@ -32,7 +32,7 @@ class FourchanBridge extends BridgeAbstract{
|
||||||
$this->returnError('You must specify the thread URL.', 400);
|
$this->returnError('You must specify the thread URL.', 400);
|
||||||
|
|
||||||
$url = 'https://boards.4chan.org'.$thread['path'].'';
|
$url = 'https://boards.4chan.org'.$thread['path'].'';
|
||||||
$html = file_get_html($url) or $this->returnError("Could not request 4chan, thread not found", 404);
|
$html = $this->file_get_html($url) or $this->returnError("Could not request 4chan, thread not found", 404);
|
||||||
|
|
||||||
foreach($html->find('div.postContainer') as $element) {
|
foreach($html->find('div.postContainer') as $element) {
|
||||||
$item = new \Item();
|
$item = new \Item();
|
||||||
|
|
|
@ -22,7 +22,7 @@ class FrandroidBridge extends BridgeAbstract
|
||||||
}
|
}
|
||||||
function FrandroidExtractContent($url)
|
function FrandroidExtractContent($url)
|
||||||
{
|
{
|
||||||
$html2 = file_get_html($url);
|
$html2 = $this->file_get_html($url);
|
||||||
$html3 = $html2->find('div.post-content', 0);
|
$html3 = $html2->find('div.post-content', 0);
|
||||||
$html3->find('div.no-sidebar-ad-top', 0)->outertext = '';
|
$html3->find('div.no-sidebar-ad-top', 0)->outertext = '';
|
||||||
$ret = $html3->find('div.shortcode-container');
|
$ret = $html3->find('div.shortcode-container');
|
||||||
|
@ -35,7 +35,7 @@ class FrandroidBridge extends BridgeAbstract
|
||||||
$text = strip_tags($text, '<h1><span><h2><p><b><a><blockquote><img><em><ul><ol>');
|
$text = strip_tags($text, '<h1><span><h2><p><b><a><blockquote><img><em><ul><ol>');
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
$html = file_get_html('http://feeds.feedburner.com/Frandroid?format=xml') or $this->returnError('Could not request Frandroid.', 404);
|
$html = $this->file_get_html('http://feeds.feedburner.com/Frandroid?format=xml') or $this->returnError('Could not request Frandroid.', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach ($html->find('item') as $element) {
|
foreach ($html->find('item') as $element) {
|
||||||
|
|
|
@ -155,13 +155,13 @@ class FuturaSciencesBridge extends BridgeAbstract {
|
||||||
$this->returnError('Invalid "feed" parameter.'.$url, 400);
|
$this->returnError('Invalid "feed" parameter.'.$url, 400);
|
||||||
|
|
||||||
$url = $this->getURI().'rss/'.$param['feed'].'.xml';
|
$url = $this->getURI().'rss/'.$param['feed'].'.xml';
|
||||||
$html = file_get_html($url) or $this->returnError('Could not request Futura-Sciences: '.$url, 500);
|
$html = $this->file_get_html($url) or $this->returnError('Could not request Futura-Sciences: '.$url, 500);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
if ($limit < 10) {
|
if ($limit < 10) {
|
||||||
$article_url = str_replace('#xtor=RSS-8', '', StripCDATA($element->find('guid', 0)->plaintext));
|
$article_url = str_replace('#xtor=RSS-8', '', StripCDATA($element->find('guid', 0)->plaintext));
|
||||||
$article = file_get_html($article_url) or $this->returnError('Could not request Futura-Sciences: '.$article_url, 500);
|
$article = $this->file_get_html($article_url) or $this->returnError('Could not request Futura-Sciences: '.$article_url, 500);
|
||||||
$contents = $article->find('div.content', 0)->innertext;
|
$contents = $article->find('div.content', 0)->innertext;
|
||||||
$author = trim(str_replace(', Futura-Sciences', '', $article->find('span.author', 0)->plaintext));
|
$author = trim(str_replace(', Futura-Sciences', '', $article->find('span.author', 0)->plaintext));
|
||||||
if (empty($author))
|
if (empty($author))
|
||||||
|
|
|
@ -77,7 +77,7 @@ class GBAtempBridge extends BridgeAbstract {
|
||||||
}
|
}
|
||||||
|
|
||||||
function fetch_post_content($uri, $site_url) {
|
function fetch_post_content($uri, $site_url) {
|
||||||
$html = file_get_html($uri) or $this->returnError('Could not request GBAtemp: '.$uri, 500);
|
$html = $this->file_get_html($uri) or $this->returnError('Could not request GBAtemp: '.$uri, 500);
|
||||||
$content = $html->find('div.messageContent', 0)->innertext;
|
$content = $html->find('div.messageContent', 0)->innertext;
|
||||||
return cleanup_post_content($content, $site_url);
|
return cleanup_post_content($content, $site_url);
|
||||||
}
|
}
|
||||||
|
@ -93,7 +93,7 @@ class GBAtempBridge extends BridgeAbstract {
|
||||||
} else $this->returnError('The provided type filter is invalid. Expecting N, R, T, or F.', 400);
|
} else $this->returnError('The provided type filter is invalid. Expecting N, R, T, or F.', 400);
|
||||||
} else $this->returnError('Please provide a type filter. Expecting N, R, T, or F.', 400);
|
} else $this->returnError('Please provide a type filter. Expecting N, R, T, or F.', 400);
|
||||||
|
|
||||||
$html = file_get_html($this->getURI()) or $this->returnError('Could not request GBAtemp.', 500);
|
$html = $this->file_get_html($this->getURI()) or $this->returnError('Could not request GBAtemp.', 500);
|
||||||
|
|
||||||
if ($typeFilter == 'N') {
|
if ($typeFilter == 'N') {
|
||||||
foreach ($html->find('li[class=news_item full]') as $newsItem) {
|
foreach ($html->find('li[class=news_item full]') as $newsItem) {
|
||||||
|
@ -110,7 +110,7 @@ class GBAtempBridge extends BridgeAbstract {
|
||||||
$url = $this->getURI().$reviewItem->find('a', 0)->href;
|
$url = $this->getURI().$reviewItem->find('a', 0)->href;
|
||||||
$img = $this->getURI().ExtractFromDelimiters($reviewItem->find('a', 0)->style, 'image:url(', ')');
|
$img = $this->getURI().ExtractFromDelimiters($reviewItem->find('a', 0)->style, 'image:url(', ')');
|
||||||
$title = $reviewItem->find('span.review_title', 0)->plaintext;
|
$title = $reviewItem->find('span.review_title', 0)->plaintext;
|
||||||
$content = file_get_html($url) or $this->returnError('Could not request GBAtemp: '.$uri, 500);
|
$content = $this->file_get_html($url) or $this->returnError('Could not request GBAtemp: '.$uri, 500);
|
||||||
$author = $content->find('a.username', 0)->plaintext;
|
$author = $content->find('a.username', 0)->plaintext;
|
||||||
$time = intval(ExtractFromDelimiters($content->find('abbr.DateTime', 0)->outertext, 'data-time="', '"'));
|
$time = intval(ExtractFromDelimiters($content->find('abbr.DateTime', 0)->outertext, 'data-time="', '"'));
|
||||||
$intro = '<p><b>'.($content->find('div#review_intro', 0)->plaintext).'</b></p>';
|
$intro = '<p><b>'.($content->find('div#review_intro', 0)->plaintext).'</b></p>';
|
||||||
|
|
|
@ -34,7 +34,7 @@ class GelbooruBridge extends BridgeAbstract{
|
||||||
if (isset($param['t'])) {
|
if (isset($param['t'])) {
|
||||||
$tags = urlencode($param['t']);
|
$tags = urlencode($param['t']);
|
||||||
}
|
}
|
||||||
$html = file_get_html("http://gelbooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnError('Could not request Gelbooru.', 404);
|
$html = $this->file_get_html("http://gelbooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnError('Could not request Gelbooru.', 404);
|
||||||
|
|
||||||
|
|
||||||
foreach($html->find('div[class=content] span') as $element) {
|
foreach($html->find('div[class=content] span') as $element) {
|
||||||
|
|
|
@ -33,7 +33,7 @@ class GiphyBridge extends BridgeAbstract{
|
||||||
$html = '';
|
$html = '';
|
||||||
$base_url = 'http://giphy.com';
|
$base_url = 'http://giphy.com';
|
||||||
if (isset($param['s'])) { /* keyword search mode */
|
if (isset($param['s'])) { /* keyword search mode */
|
||||||
$html = file_get_html($base_url.'/search/'.urlencode($param['s'].'/')) or $this->returnError('No results for this query.', 404);
|
$html = $this->file_get_html($base_url.'/search/'.urlencode($param['s'].'/')) or $this->returnError('No results for this query.', 404);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
$this->returnError('You must specify a search worf (?s=...).', 400);
|
$this->returnError('You must specify a search worf (?s=...).', 400);
|
||||||
|
@ -51,7 +51,7 @@ class GiphyBridge extends BridgeAbstract{
|
||||||
$node = $entry->first_child();
|
$node = $entry->first_child();
|
||||||
$href = $node->getAttribute('href');
|
$href = $node->getAttribute('href');
|
||||||
|
|
||||||
$html2 = file_get_html($base_url . $href) or $this->returnError('No results for this query.', 404);
|
$html2 = $this->file_get_html($base_url . $href) or $this->returnError('No results for this query.', 404);
|
||||||
$figure = $html2->getElementByTagName('figure');
|
$figure = $html2->getElementByTagName('figure');
|
||||||
$img = $figure->firstChild();
|
$img = $figure->firstChild();
|
||||||
$caption = $figure->lastChild();
|
$caption = $figure->lastChild();
|
||||||
|
|
|
@ -14,7 +14,7 @@ class GizmodoFRBridge extends BridgeAbstract{
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
|
|
||||||
function GizmodoFRExtractContent($url) {
|
function GizmodoFRExtractContent($url) {
|
||||||
$articleHTMLContent = file_get_html($url);
|
$articleHTMLContent = $this->file_get_html($url);
|
||||||
$text = $articleHTMLContent->find('div.entry-thumbnail', 0)->innertext;
|
$text = $articleHTMLContent->find('div.entry-thumbnail', 0)->innertext;
|
||||||
$text = $text.$articleHTMLContent->find('div.entry-excerpt', 0)->innertext;
|
$text = $text.$articleHTMLContent->find('div.entry-excerpt', 0)->innertext;
|
||||||
$text = $text.$articleHTMLContent->find('div.entry-content', 0)->innertext;
|
$text = $text.$articleHTMLContent->find('div.entry-content', 0)->innertext;
|
||||||
|
@ -26,7 +26,7 @@ class GizmodoFRBridge extends BridgeAbstract{
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
|
|
||||||
$rssFeed = file_get_html('http://www.gizmodo.fr/feed') or $this->returnError('Could not request http://www.gizmodo.fr/feed', 404);
|
$rssFeed = $this->file_get_html('http://www.gizmodo.fr/feed') or $this->returnError('Could not request http://www.gizmodo.fr/feed', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($rssFeed->find('item') as $element) {
|
foreach($rssFeed->find('item') as $element) {
|
||||||
|
|
|
@ -33,8 +33,8 @@ class GooglePlusPostBridge extends BridgeAbstract
|
||||||
|
|
||||||
$this->request = $param['username'];
|
$this->request = $param['username'];
|
||||||
// get content parsed
|
// get content parsed
|
||||||
// $html = file_get_html(__DIR__ . '/../posts2.html'
|
// $html = $this->file_get_html(__DIR__ . '/../posts2.html'
|
||||||
$html = file_get_html(self::GOOGLE_PLUS_BASE_URL . urlencode($this->request) . '/posts'
|
$html = $this->file_get_html(self::GOOGLE_PLUS_BASE_URL . urlencode($this->request) . '/posts'
|
||||||
// force language
|
// force language
|
||||||
, false, stream_context_create(array('http'=> array(
|
, false, stream_context_create(array('http'=> array(
|
||||||
'header' => 'Accept-Language: fr,fr-be,fr-fr;q=0.8,en;q=0.4,en-us;q=0.2;*' . "\r\n"
|
'header' => 'Accept-Language: fr,fr-be,fr-fr;q=0.8,en;q=0.4,en-us;q=0.2;*' . "\r\n"
|
||||||
|
|
|
@ -35,7 +35,7 @@ class GoogleSearchBridge extends BridgeAbstract{
|
||||||
|
|
||||||
if (isset($param['q'])) { /* keyword search mode */
|
if (isset($param['q'])) { /* keyword search mode */
|
||||||
$this->request = $param['q'];
|
$this->request = $param['q'];
|
||||||
$html = file_get_html('https://www.google.com/search?q=' . urlencode($this->request) . '&num=100&complete=0&tbs=qdr:y,sbd:1') or $this->returnError('No results for this query.', 404);
|
$html = $this->file_get_html('https://www.google.com/search?q=' . urlencode($this->request) . '&num=100&complete=0&tbs=qdr:y,sbd:1') or $this->returnError('No results for this query.', 404);
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
$this->returnError('You must specify a keyword (?q=...).', 400);
|
$this->returnError('You must specify a keyword (?q=...).', 400);
|
||||||
|
|
|
@ -19,11 +19,11 @@ class GuruMedBridge extends BridgeAbstract{
|
||||||
return $string;
|
return $string;
|
||||||
}
|
}
|
||||||
function GurumedExtractContent($url) {
|
function GurumedExtractContent($url) {
|
||||||
$html2 = file_get_html($url);
|
$html2 = $this->file_get_html($url);
|
||||||
$text = $html2->find('div.entry', 0)->innertext;
|
$text = $html2->find('div.entry', 0)->innertext;
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
$html = file_get_html('http://gurumed.org/feed') or $this->returnError('Could not request Gurumed.', 404);
|
$html = $this->file_get_html('http://gurumed.org/feed') or $this->returnError('Could not request Gurumed.', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
|
|
|
@ -48,7 +48,7 @@ class HDWallpapersBridge extends BridgeAbstract {
|
||||||
|
|
||||||
for ($page = 1; $page <= $lastpage; $page++) {
|
for ($page = 1; $page <= $lastpage; $page++) {
|
||||||
$link = $baseUri.'/'.$category.'/page/'.$page;
|
$link = $baseUri.'/'.$category.'/page/'.$page;
|
||||||
$html = file_get_html($link) or $this->returnError('No results for this query.', 404);
|
$html = $this->file_get_html($link) or $this->returnError('No results for this query.', 404);
|
||||||
|
|
||||||
if ($page === 1) {
|
if ($page === 1) {
|
||||||
preg_match('/page\/(\d+)$/', $html->find('.pagination a', -2)->href, $matches);
|
preg_match('/page\/(\d+)$/', $html->find('.pagination a', -2)->href, $matches);
|
||||||
|
|
|
@ -12,7 +12,7 @@ class HentaiHavenBridge extends BridgeAbstract{
|
||||||
}
|
}
|
||||||
|
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
$html = file_get_html('http://hentaihaven.org/') or $this->returnError('Could not request Hentai Haven.', 404);
|
$html = $this->file_get_html('http://hentaihaven.org/') or $this->returnError('Could not request Hentai Haven.', 404);
|
||||||
foreach($html->find('div.zoe-grid') as $element) {
|
foreach($html->find('div.zoe-grid') as $element) {
|
||||||
$item = new \Item();
|
$item = new \Item();
|
||||||
$item->uri = $this->getURI().$element->find('div.brick-content h3 a', 0)->href;
|
$item->uri = $this->getURI().$element->find('div.brick-content h3 a', 0)->href;
|
||||||
|
|
|
@ -25,7 +25,7 @@ class IdenticaBridge extends BridgeAbstract{
|
||||||
$html = '';
|
$html = '';
|
||||||
if (isset($param['u'])) { /* user timeline mode */
|
if (isset($param['u'])) { /* user timeline mode */
|
||||||
$this->request = $param['u'];
|
$this->request = $param['u'];
|
||||||
$html = file_get_html('https://identi.ca/'.urlencode($this->request)) or $this->returnError('Requested username can\'t be found.', 404);
|
$html = $this->file_get_html('https://identi.ca/'.urlencode($this->request)) or $this->returnError('Requested username can\'t be found.', 404);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
$this->returnError('You must specify an Identica username (?u=...).', 400);
|
$this->returnError('You must specify an Identica username (?u=...).', 400);
|
||||||
|
|
|
@ -25,7 +25,7 @@ class InstagramBridge extends BridgeAbstract{
|
||||||
$html = '';
|
$html = '';
|
||||||
if (isset($param['u'])) { /* user timeline mode */
|
if (isset($param['u'])) { /* user timeline mode */
|
||||||
$this->request = $param['u'];
|
$this->request = $param['u'];
|
||||||
$html = file_get_html('http://instagram.com/'.urlencode($this->request)) or $this->returnError('Could not request Instagram.', 404);
|
$html = $this->file_get_html('http://instagram.com/'.urlencode($this->request)) or $this->returnError('Could not request Instagram.', 404);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
$this->returnError('You must specify a Instagram username (?u=...).', 400);
|
$this->returnError('You must specify a Instagram username (?u=...).', 400);
|
||||||
|
|
|
@ -59,7 +59,7 @@ class JapanExpoBridge extends BridgeAbstract{
|
||||||
};
|
};
|
||||||
|
|
||||||
$link = 'http://www.japan-expo-paris.com/fr/actualites';
|
$link = 'http://www.japan-expo-paris.com/fr/actualites';
|
||||||
$html = file_get_html($link) or $this->returnError('Could not request JapanExpo: '.$link , 500);
|
$html = $this->file_get_html($link) or $this->returnError('Could not request JapanExpo: '.$link , 500);
|
||||||
$fullcontent = (!empty($param['mode']) && $param['mode'] == 'full');
|
$fullcontent = (!empty($param['mode']) && $param['mode'] == 'full');
|
||||||
$count = 0;
|
$count = 0;
|
||||||
|
|
||||||
|
@ -73,7 +73,7 @@ class JapanExpoBridge extends BridgeAbstract{
|
||||||
|
|
||||||
if ($fullcontent) {
|
if ($fullcontent) {
|
||||||
if ($count < 5) {
|
if ($count < 5) {
|
||||||
$article_html = file_get_html($url) or $this->returnError('Could not request JapanExpo: '.$url , 500);
|
$article_html = $this->file_get_html($url) or $this->returnError('Could not request JapanExpo: '.$url , 500);
|
||||||
$header = $article_html->find('header.pageHeadBox', 0);
|
$header = $article_html->find('header.pageHeadBox', 0);
|
||||||
$timestamp = strtotime($header->find('time', 0)->datetime);
|
$timestamp = strtotime($header->find('time', 0)->datetime);
|
||||||
$title_html = $header->find('div.section', 0)->next_sibling();
|
$title_html = $header->find('div.section', 0)->next_sibling();
|
||||||
|
|
|
@ -31,7 +31,7 @@ class KonachanBridge extends BridgeAbstract{
|
||||||
if (isset($param['t'])) {
|
if (isset($param['t'])) {
|
||||||
$tags = urlencode($param['t']);
|
$tags = urlencode($param['t']);
|
||||||
}
|
}
|
||||||
$html = file_get_html("http://konachan.com/post?page=$page&tags=$tags") or $this->returnError('Could not request Konachan.', 404);
|
$html = $this->file_get_html("http://konachan.com/post?page=$page&tags=$tags") or $this->returnError('Could not request Konachan.', 404);
|
||||||
$input_json = explode('Post.register(', $html);
|
$input_json = explode('Post.register(', $html);
|
||||||
foreach($input_json as $element)
|
foreach($input_json as $element)
|
||||||
$data[] = preg_replace('/}\)(.*)/', '}', $element);
|
$data[] = preg_replace('/}\)(.*)/', '}', $element);
|
||||||
|
|
|
@ -19,12 +19,12 @@ class KoreusBridge extends BridgeAbstract{
|
||||||
return $string;
|
return $string;
|
||||||
}
|
}
|
||||||
function KoreusExtractContent($url) {
|
function KoreusExtractContent($url) {
|
||||||
$html2 = file_get_html($url);
|
$html2 = $this->file_get_html($url);
|
||||||
$text = $html2->find('p[class=itemText]', 0)->innertext;
|
$text = $html2->find('p[class=itemText]', 0)->innertext;
|
||||||
$text = utf8_encode(preg_replace('/(Sur le m.+?)+$/i','',$text));
|
$text = utf8_encode(preg_replace('/(Sur le m.+?)+$/i','',$text));
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
$html = file_get_html('http://feeds.feedburner.com/Koreus-articles') or $this->returnError('Could not request Koreus.', 404);
|
$html = $this->file_get_html('http://feeds.feedburner.com/Koreus-articles') or $this->returnError('Could not request Koreus.', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
|
|
|
@ -239,7 +239,7 @@ class LeBonCoinBridge extends BridgeAbstract{
|
||||||
else {
|
else {
|
||||||
$link = 'http://www.leboncoin.fr/' . $param['c'] . '/offres/' . $param['r'] . '/?f=a&th=1&q=' . urlencode($param['k']);
|
$link = 'http://www.leboncoin.fr/' . $param['c'] . '/offres/' . $param['r'] . '/?f=a&th=1&q=' . urlencode($param['k']);
|
||||||
}
|
}
|
||||||
$html = file_get_html($link) or $this->returnError('Could not request LeBonCoin.', 404);
|
$html = $this->file_get_html($link) or $this->returnError('Could not request LeBonCoin.', 404);
|
||||||
|
|
||||||
$list = $html->find('.tabsContent', 0);
|
$list = $html->find('.tabsContent', 0);
|
||||||
if($list === NULL) {
|
if($list === NULL) {
|
||||||
|
|
|
@ -20,7 +20,7 @@ class LeJournalDuGeekBridge extends BridgeAbstract{
|
||||||
}
|
}
|
||||||
|
|
||||||
function LeJournalDuGeekExtractContent($url) {
|
function LeJournalDuGeekExtractContent($url) {
|
||||||
$articleHTMLContent = file_get_html($url);
|
$articleHTMLContent = $this->file_get_html($url);
|
||||||
$text = $text.$articleHTMLContent->find('div.post-content', 0)->innertext;
|
$text = $text.$articleHTMLContent->find('div.post-content', 0)->innertext;
|
||||||
foreach($articleHTMLContent->find('a.more') as $element) {
|
foreach($articleHTMLContent->find('a.more') as $element) {
|
||||||
if ($element->innertext == "Source") {
|
if ($element->innertext == "Source") {
|
||||||
|
@ -38,7 +38,7 @@ class LeJournalDuGeekBridge extends BridgeAbstract{
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
|
|
||||||
$rssFeed = file_get_html('http://www.journaldugeek.com/rss') or $this->returnError('Could not request http://www.journaldugeek.com/rss', 404);
|
$rssFeed = $this->file_get_html('http://www.journaldugeek.com/rss') or $this->returnError('Could not request http://www.journaldugeek.com/rss', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($rssFeed->find('item') as $element) {
|
foreach($rssFeed->find('item') as $element) {
|
||||||
|
|
|
@ -34,7 +34,7 @@ class LeMondeInformatiqueBridge extends BridgeAbstract {
|
||||||
}
|
}
|
||||||
|
|
||||||
$feedUrl = 'http://www.lemondeinformatique.fr/rss/rss.xml';
|
$feedUrl = 'http://www.lemondeinformatique.fr/rss/rss.xml';
|
||||||
$html = file_get_html($feedUrl) or $this->returnError('Could not request LeMondeInformatique: '.$feedUrl, 500);
|
$html = $this->file_get_html($feedUrl) or $this->returnError('Could not request LeMondeInformatique: '.$feedUrl, 500);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
|
@ -44,7 +44,7 @@ class LeMondeInformatiqueBridge extends BridgeAbstract {
|
||||||
$article_uri = $element->innertext;
|
$article_uri = $element->innertext;
|
||||||
$article_uri = substr($article_uri, strpos($article_uri, '<link>') + 6);
|
$article_uri = substr($article_uri, strpos($article_uri, '<link>') + 6);
|
||||||
$article_uri = substr($article_uri, 0, strpos($article_uri, '</link>'));
|
$article_uri = substr($article_uri, 0, strpos($article_uri, '</link>'));
|
||||||
$article_html = file_get_html($article_uri) or $this->returnError('Could not request LeMondeInformatique: '.$article_uri, 500);
|
$article_html = $this->file_get_html($article_uri) or $this->returnError('Could not request LeMondeInformatique: '.$article_uri, 500);
|
||||||
$thumbnailUri = $article_html->find('div#article', 0)->find('img#illustration', 0)->src;
|
$thumbnailUri = $article_html->find('div#article', 0)->find('img#illustration', 0)->src;
|
||||||
$article_content = CleanArticle($article_html->find('div#article', 0)->innertext);
|
$article_content = CleanArticle($article_html->find('div#article', 0)->innertext);
|
||||||
$article_title = $article_html->find('h1.cleanprint-title', 0)->plaintext;
|
$article_title = $article_html->find('h1.cleanprint-title', 0)->plaintext;
|
||||||
|
|
|
@ -19,11 +19,11 @@ class LeMotDuJourBridge extends BridgeAbstract{
|
||||||
return $string;
|
return $string;
|
||||||
}
|
}
|
||||||
function ExtractContent($url) {
|
function ExtractContent($url) {
|
||||||
$html2 = file_get_html($url);
|
$html2 = $this->file_get_html($url);
|
||||||
$text = $html2->find('div.single-contenu', 0)->innertext;
|
$text = $html2->find('div.single-contenu', 0)->innertext;
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
$html = file_get_html('http://feeds2.feedburner.com/lemotdujour/lemotdujour') or $this->returnError('Could not request LeMotDuJour.', 404);
|
$html = $this->file_get_html('http://feeds2.feedburner.com/lemotdujour/lemotdujour') or $this->returnError('Could not request LeMotDuJour.', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
|
|
|
@ -12,7 +12,7 @@ class LesJoiesDuCodeBridge extends BridgeAbstract{
|
||||||
}
|
}
|
||||||
|
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
$html = file_get_html('http://lesjoiesducode.fr/') or $this->returnError('Could not request LesJoiesDuCode.', 404);
|
$html = $this->file_get_html('http://lesjoiesducode.fr/') or $this->returnError('Could not request LesJoiesDuCode.', 404);
|
||||||
|
|
||||||
foreach($html->find('div.blog-post') as $element) {
|
foreach($html->find('div.blog-post') as $element) {
|
||||||
$item = new Item();
|
$item = new Item();
|
||||||
|
|
|
@ -13,7 +13,7 @@ class LichessBridge extends BridgeAbstract
|
||||||
|
|
||||||
public function collectData(array $param)
|
public function collectData(array $param)
|
||||||
{
|
{
|
||||||
$xml_feed = file_get_html('http://fr.lichess.org/blog.atom') or $this->returnError('Could not retrieve Lichess blog feed.', 404);
|
$xml_feed = $this->file_get_html('http://fr.lichess.org/blog.atom') or $this->returnError('Could not retrieve Lichess blog feed.', 404);
|
||||||
|
|
||||||
$posts_loaded = 0;
|
$posts_loaded = 0;
|
||||||
foreach($xml_feed->find('entry') as $entry)
|
foreach($xml_feed->find('entry') as $entry)
|
||||||
|
@ -37,7 +37,7 @@ class LichessBridge extends BridgeAbstract
|
||||||
|
|
||||||
private function retrieve_lichess_post($blog_post_uri)
|
private function retrieve_lichess_post($blog_post_uri)
|
||||||
{
|
{
|
||||||
$blog_post_html = file_get_html($blog_post_uri);
|
$blog_post_html = $this->file_get_html($blog_post_uri);
|
||||||
$blog_post_div = $blog_post_html->find('#lichess_blog', 0);
|
$blog_post_div = $blog_post_html->find('#lichess_blog', 0);
|
||||||
|
|
||||||
$post_chapo = $blog_post_div->find('.shortlede', 0)->innertext;
|
$post_chapo = $blog_post_div->find('.shortlede', 0)->innertext;
|
||||||
|
|
|
@ -22,7 +22,7 @@ class LinkedInCompany extends BridgeAbstract{
|
||||||
$html = '';
|
$html = '';
|
||||||
$link = 'https://www.linkedin.com/company/'.$param[c];
|
$link = 'https://www.linkedin.com/company/'.$param[c];
|
||||||
|
|
||||||
$html = file_get_html($link) or $this->returnError('Could not request LinkedIn.', 404);
|
$html = $this->file_get_html($link) or $this->returnError('Could not request LinkedIn.', 404);
|
||||||
|
|
||||||
foreach($html->find('//*[@id="my-feed-post"]/li') as $element) {
|
foreach($html->find('//*[@id="my-feed-post"]/li') as $element) {
|
||||||
$title = $element->find('span.share-body', 0)->innertext;
|
$title = $element->find('span.share-body', 0)->innertext;
|
||||||
|
|
|
@ -32,7 +32,7 @@ class LolibooruBridge extends BridgeAbstract{
|
||||||
if (isset($param['t'])) {
|
if (isset($param['t'])) {
|
||||||
$tags = urlencode($param['t']);
|
$tags = urlencode($param['t']);
|
||||||
}
|
}
|
||||||
$html = file_get_html("http://lolibooru.moe/post?page=$page&tags=$tags") or $this->returnError('Could not request Lolibooru.', 404);
|
$html = $this->file_get_html("http://lolibooru.moe/post?page=$page&tags=$tags") or $this->returnError('Could not request Lolibooru.', 404);
|
||||||
$input_json = explode('Post.register(', $html);
|
$input_json = explode('Post.register(', $html);
|
||||||
foreach($input_json as $element)
|
foreach($input_json as $element)
|
||||||
$data[] = preg_replace('/}\)(.*)/', '}', $element);
|
$data[] = preg_replace('/}\)(.*)/', '}', $element);
|
||||||
|
|
|
@ -12,14 +12,14 @@ class MalikiBridge extends BridgeAbstract{
|
||||||
}
|
}
|
||||||
|
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
$html = file_get_html('http://www.maliki.com/') or $this->returnError('Could not request Maliki.', 404);
|
$html = $this->file_get_html('http://www.maliki.com/') or $this->returnError('Could not request Maliki.', 404);
|
||||||
$count=0;
|
$count=0;
|
||||||
$latest=1; $latest_title="";
|
$latest=1; $latest_title="";
|
||||||
$latest = $html->find('div.conteneur_page a', 1)->href;
|
$latest = $html->find('div.conteneur_page a', 1)->href;
|
||||||
$latest_title = $html->find('div.conteneur_page img', 0)->title;
|
$latest_title = $html->find('div.conteneur_page img', 0)->title;
|
||||||
|
|
||||||
function MalikiExtractContent($url) {
|
function MalikiExtractContent($url) {
|
||||||
$html2 = file_get_html($url);
|
$html2 = $this->file_get_html($url);
|
||||||
$text = 'http://www.maliki.com/'.$html2->find('img', 0)->src;
|
$text = 'http://www.maliki.com/'.$html2->find('img', 0)->src;
|
||||||
$text = '<img alt="" src="'.$text.'"/><br>'.$html2->find('div.imageetnews', 0)->plaintext;
|
$text = '<img alt="" src="'.$text.'"/><br>'.$html2->find('div.imageetnews', 0)->plaintext;
|
||||||
return $text;
|
return $text;
|
||||||
|
|
|
@ -20,7 +20,7 @@ class MemoLinuxBridge extends BridgeAbstract{
|
||||||
}
|
}
|
||||||
|
|
||||||
function ExtractContent($url) {
|
function ExtractContent($url) {
|
||||||
$html2 = file_get_html($url);
|
$html2 = $this->file_get_html($url);
|
||||||
$text = $html2->find('div.entry-content', 0)->innertext;
|
$text = $html2->find('div.entry-content', 0)->innertext;
|
||||||
$text = preg_replace('@<script[^>]*?>.*?</script>@si', '', $text);
|
$text = preg_replace('@<script[^>]*?>.*?</script>@si', '', $text);
|
||||||
$text = preg_replace('@<div[^>]*?>.*?</div>@si', '', $text);
|
$text = preg_replace('@<div[^>]*?>.*?</div>@si', '', $text);
|
||||||
|
@ -28,7 +28,7 @@ class MemoLinuxBridge extends BridgeAbstract{
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
|
|
||||||
$html = file_get_html('http://memo-linux.com/feed/') or $this->returnError('Could not request MemoLinux.', 404);
|
$html = $this->file_get_html('http://memo-linux.com/feed/') or $this->returnError('Could not request MemoLinux.', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
|
|
|
@ -31,7 +31,7 @@ class MilbooruBridge extends BridgeAbstract{
|
||||||
if (isset($param['t'])) {
|
if (isset($param['t'])) {
|
||||||
$tags = urlencode($param['t']);
|
$tags = urlencode($param['t']);
|
||||||
}
|
}
|
||||||
$html = file_get_html("http://sheslostcontrol.net/moe/shimmie/index.php?q=/post/list/$tags/$page") or $this->returnError('Could not request Milbooru.', 404);
|
$html = $this->file_get_html("http://sheslostcontrol.net/moe/shimmie/index.php?q=/post/list/$tags/$page") or $this->returnError('Could not request Milbooru.', 404);
|
||||||
|
|
||||||
|
|
||||||
foreach($html->find('div[class=shm-image-list] span[class=thumb]') as $element) {
|
foreach($html->find('div[class=shm-image-list] span[class=thumb]') as $element) {
|
||||||
|
|
|
@ -14,7 +14,7 @@ class MondeDiploBridge extends BridgeAbstract{
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
$link = 'http://www.monde-diplomatique.fr';
|
$link = 'http://www.monde-diplomatique.fr';
|
||||||
|
|
||||||
$html = file_get_html($link) or $this->returnError('Could not request MondeDiplo. for : ' . $link , 404);
|
$html = $this->file_get_html($link) or $this->returnError('Could not request MondeDiplo. for : ' . $link , 404);
|
||||||
|
|
||||||
foreach($html->find('div.laune') as $element) {
|
foreach($html->find('div.laune') as $element) {
|
||||||
$item = new Item();
|
$item = new Item();
|
||||||
|
|
|
@ -14,12 +14,12 @@ class MsnMondeBridge extends BridgeAbstract{
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
|
|
||||||
function MsnMondeExtractContent($url, &$item) {
|
function MsnMondeExtractContent($url, &$item) {
|
||||||
$html2 = file_get_html($url);
|
$html2 = $this->file_get_html($url);
|
||||||
$item->content = $html2->find('#content', 0)->find('article', 0)->find('section', 0)->plaintext;
|
$item->content = $html2->find('#content', 0)->find('article', 0)->find('section', 0)->plaintext;
|
||||||
$item->timestamp = strtotime($html2->find('.authorinfo-txt', 0)->find('time', 0)->datetime);
|
$item->timestamp = strtotime($html2->find('.authorinfo-txt', 0)->find('time', 0)->datetime);
|
||||||
}
|
}
|
||||||
|
|
||||||
$html = file_get_html('http://www.msn.com/fr-fr/actualite/monde') or $this->returnError('Could not request MsnMonde.', 404);
|
$html = $this->file_get_html('http://www.msn.com/fr-fr/actualite/monde') or $this->returnError('Could not request MsnMonde.', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
foreach($html->find('.smalla') as $article) {
|
foreach($html->find('.smalla') as $article) {
|
||||||
if($limit < 10) {
|
if($limit < 10) {
|
||||||
|
|
|
@ -34,7 +34,7 @@ class MspabooruBridge extends BridgeAbstract{
|
||||||
if (isset($param['t'])) {
|
if (isset($param['t'])) {
|
||||||
$tags = urlencode($param['t']);
|
$tags = urlencode($param['t']);
|
||||||
}
|
}
|
||||||
$html = file_get_html("http://mspabooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnError('Could not request Mspabooru.', 404);
|
$html = $this->file_get_html("http://mspabooru.com/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnError('Could not request Mspabooru.', 404);
|
||||||
|
|
||||||
|
|
||||||
foreach($html->find('div[class=content] span') as $element) {
|
foreach($html->find('div[class=content] span') as $element) {
|
||||||
|
|
|
@ -36,7 +36,7 @@ class NakedSecurityBridge extends BridgeAbstract {
|
||||||
}
|
}
|
||||||
|
|
||||||
$feedUrl = 'https://feeds.feedburner.com/nakedsecurity?format=xml';
|
$feedUrl = 'https://feeds.feedburner.com/nakedsecurity?format=xml';
|
||||||
$html = file_get_html($feedUrl) or $this->returnError('Could not request '.$this->getName().': '.$feedUrl, 500);
|
$html = $this->file_get_html($feedUrl) or $this->returnError('Could not request '.$this->getName().': '.$feedUrl, 500);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach ($html->find('item') as $element) {
|
foreach ($html->find('item') as $element) {
|
||||||
|
@ -44,7 +44,7 @@ class NakedSecurityBridge extends BridgeAbstract {
|
||||||
|
|
||||||
//Retrieve article Uri and get that page
|
//Retrieve article Uri and get that page
|
||||||
$article_uri = $element->find('guid', 0)->plaintext;
|
$article_uri = $element->find('guid', 0)->plaintext;
|
||||||
$article_html = file_get_html($article_uri) or $this->returnError('Could not request '.$this->getName().': '.$article_uri, 500);
|
$article_html = $this->file_get_html($article_uri) or $this->returnError('Could not request '.$this->getName().': '.$article_uri, 500);
|
||||||
|
|
||||||
//Build article contents from corresponding elements
|
//Build article contents from corresponding elements
|
||||||
$article_title = trim($element->find('title', 0)->plaintext);
|
$article_title = trim($element->find('title', 0)->plaintext);
|
||||||
|
|
|
@ -13,7 +13,7 @@ class NasaApodBridge extends BridgeAbstract{
|
||||||
|
|
||||||
public function collectData(array $param) {
|
public function collectData(array $param) {
|
||||||
|
|
||||||
$html = file_get_html('http://apod.nasa.gov/apod/archivepix.html') or $this->returnError('Error while downloading the website content', 404);
|
$html = $this->file_get_html('http://apod.nasa.gov/apod/archivepix.html') or $this->returnError('Error while downloading the website content', 404);
|
||||||
$list = explode("<br>", $html->find('b', 0)->innertext);
|
$list = explode("<br>", $html->find('b', 0)->innertext);
|
||||||
|
|
||||||
for($i = 0; $i < 3;$i++)
|
for($i = 0; $i < 3;$i++)
|
||||||
|
@ -25,7 +25,7 @@ class NasaApodBridge extends BridgeAbstract{
|
||||||
$uri = 'http://apod.nasa.gov/apod/'.$uri_page;
|
$uri = 'http://apod.nasa.gov/apod/'.$uri_page;
|
||||||
$item->uri = $uri;
|
$item->uri = $uri;
|
||||||
|
|
||||||
$picture_html = file_get_html($uri);
|
$picture_html = $this->file_get_html($uri);
|
||||||
$picture_html_string = $picture_html->innertext;
|
$picture_html_string = $picture_html->innertext;
|
||||||
|
|
||||||
//Extract image and explanation
|
//Extract image and explanation
|
||||||
|
|
|
@ -20,7 +20,7 @@ class NeuviemeArtBridge extends BridgeAbstract {
|
||||||
}
|
}
|
||||||
|
|
||||||
$feedUrl = 'http://www.9emeart.fr/9emeart.rss';
|
$feedUrl = 'http://www.9emeart.fr/9emeart.rss';
|
||||||
$html = file_get_html($feedUrl) or $this->returnError('Could not request 9eme Art: '.$feedUrl, 500);
|
$html = $this->file_get_html($feedUrl) or $this->returnError('Could not request 9eme Art: '.$feedUrl, 500);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach ($html->find('item') as $element) {
|
foreach ($html->find('item') as $element) {
|
||||||
|
@ -28,7 +28,7 @@ class NeuviemeArtBridge extends BridgeAbstract {
|
||||||
|
|
||||||
//Retrieve article Uri and get that page
|
//Retrieve article Uri and get that page
|
||||||
$article_uri = $element->find('guid', 0)->plaintext;
|
$article_uri = $element->find('guid', 0)->plaintext;
|
||||||
$article_html = file_get_html($article_uri) or $this->returnError('Could not request 9eme Art: '.$article_uri, 500);
|
$article_html = $this->file_get_html($article_uri) or $this->returnError('Could not request 9eme Art: '.$article_uri, 500);
|
||||||
|
|
||||||
//Build article contents from corresponding elements
|
//Build article contents from corresponding elements
|
||||||
$article_title = trim($element->find('title', 0)->plaintext);
|
$article_title = trim($element->find('title', 0)->plaintext);
|
||||||
|
|
|
@ -20,7 +20,7 @@ class NextInpactBridge extends BridgeAbstract {
|
||||||
}
|
}
|
||||||
|
|
||||||
function ExtractContent($url) {
|
function ExtractContent($url) {
|
||||||
$html2 = file_get_html($url);
|
$html2 = $this->file_get_html($url);
|
||||||
$text = '<p><em>'.$html2->find('span.sub_title', 0)->innertext.'</em></p>'
|
$text = '<p><em>'.$html2->find('span.sub_title', 0)->innertext.'</em></p>'
|
||||||
.'<p><img src="'.$html2->find('div.container_main_image_article', 0)->find('img.dedicated',0)->src.'" alt="-" /></p>'
|
.'<p><img src="'.$html2->find('div.container_main_image_article', 0)->find('img.dedicated',0)->src.'" alt="-" /></p>'
|
||||||
.'<div>'.$html2->find('div[itemprop=articleBody]', 0)->innertext.'</div>';
|
.'<div>'.$html2->find('div[itemprop=articleBody]', 0)->innertext.'</div>';
|
||||||
|
@ -30,7 +30,7 @@ class NextInpactBridge extends BridgeAbstract {
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
|
|
||||||
$html = file_get_html('http://www.nextinpact.com/rss/news.xml') or $this->returnError('Could not request NextInpact.', 404);
|
$html = $this->file_get_html('http://www.nextinpact.com/rss/news.xml') or $this->returnError('Could not request NextInpact.', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
|
|
|
@ -22,13 +22,13 @@ class NiceMatinBridge extends BridgeAbstract{
|
||||||
}
|
}
|
||||||
|
|
||||||
function NiceMatinExtractContent($url) {
|
function NiceMatinExtractContent($url) {
|
||||||
$html2 = file_get_html($url);
|
$html2 = $this->file_get_html($url);
|
||||||
$text = $html2->find('figure[itemprop=associatedMedia]', 0)->innertext;
|
$text = $html2->find('figure[itemprop=associatedMedia]', 0)->innertext;
|
||||||
$text .= $html2->find('div[id=content-article]', 0)->innertext;
|
$text .= $html2->find('div[id=content-article]', 0)->innertext;
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
|
|
||||||
$html = file_get_html('http://www.nicematin.com/derniere-minute/rss') or $this->returnError('Could not request NiceMatin.', 404);
|
$html = $this->file_get_html('http://www.nicematin.com/derniere-minute/rss') or $this->returnError('Could not request NiceMatin.', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
|
|
|
@ -26,7 +26,7 @@ class NovelUpdatesBridge extends BridgeAbstract{
|
||||||
if(strpos($thread['path'], 'series/') === FALSE)
|
if(strpos($thread['path'], 'series/') === FALSE)
|
||||||
$this->returnError('You must specify the novel URL.', 400);
|
$this->returnError('You must specify the novel URL.', 400);
|
||||||
$url = 'http://www.novelupdates.com'.$thread['path'].'';
|
$url = 'http://www.novelupdates.com'.$thread['path'].'';
|
||||||
$fullhtml = file_get_html($url) or $this->returnError("Could not request NovelUpdates, novel not found", 404);
|
$fullhtml = $this->file_get_html($url) or $this->returnError("Could not request NovelUpdates, novel not found", 404);
|
||||||
$this->request = $fullhtml->find('h4.seriestitle', 0)->plaintext;
|
$this->request = $fullhtml->find('h4.seriestitle', 0)->plaintext;
|
||||||
// dirty fix for nasty simpledom bug: https://github.com/sebsauvage/rss-bridge/issues/259
|
// dirty fix for nasty simpledom bug: https://github.com/sebsauvage/rss-bridge/issues/259
|
||||||
// forcefully removes tbody
|
// forcefully removes tbody
|
||||||
|
|
|
@ -21,14 +21,14 @@ class NumeramaBridge extends BridgeAbstract{
|
||||||
|
|
||||||
function NumeramaExtractContent($url)
|
function NumeramaExtractContent($url)
|
||||||
{
|
{
|
||||||
$html2 = file_get_html($url);
|
$html2 = $this->file_get_html($url);
|
||||||
$text = $html2->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block
|
$text = $html2->find('section[class=related-article]', 0)->innertext = ''; // remove related articles block
|
||||||
$text = '<img alt="" style="max-width:300px;" src="'.$html2->find('meta[property=og:image]', 0)->getAttribute('content').'">'; // add post picture
|
$text = '<img alt="" style="max-width:300px;" src="'.$html2->find('meta[property=og:image]', 0)->getAttribute('content').'">'; // add post picture
|
||||||
$text = $text.$html2->find('article[class=post-content]', 0)->innertext; // extract the post
|
$text = $text.$html2->find('article[class=post-content]', 0)->innertext; // extract the post
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
|
|
||||||
$html = file_get_html('http://www.numerama.com/feed/') or $this->returnError('Could not request Numerama.', 404);
|
$html = $this->file_get_html('http://www.numerama.com/feed/') or $this->returnError('Could not request Numerama.', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
|
|
|
@ -68,7 +68,7 @@ class OpenClassroomsBridge extends BridgeAbstract{
|
||||||
$html = '';
|
$html = '';
|
||||||
$link = 'https://openclassrooms.com/courses?categories='.$param['u'].'&title=&sort=updatedAt+desc';
|
$link = 'https://openclassrooms.com/courses?categories='.$param['u'].'&title=&sort=updatedAt+desc';
|
||||||
|
|
||||||
$html = file_get_html($link) or $this->returnError('Could not request OpenClassrooms.', 404);
|
$html = $this->file_get_html($link) or $this->returnError('Could not request OpenClassrooms.', 404);
|
||||||
|
|
||||||
foreach($html->find('.courseListItem') as $element) {
|
foreach($html->find('.courseListItem') as $element) {
|
||||||
$item = new \Item();
|
$item = new \Item();
|
||||||
|
|
|
@ -19,12 +19,12 @@ class OpenTheoryBridge extends BridgeAbstract{
|
||||||
return $string;
|
return $string;
|
||||||
}
|
}
|
||||||
function ExtractContent($url) {
|
function ExtractContent($url) {
|
||||||
$html2 = file_get_html($url);
|
$html2 = $this->file_get_html($url);
|
||||||
$text = $html2->find('div.entry-content', 0)->innertext;
|
$text = $html2->find('div.entry-content', 0)->innertext;
|
||||||
$text = preg_replace('@<script[^>]*?>.*?</script>@si', '', $text);
|
$text = preg_replace('@<script[^>]*?>.*?</script>@si', '', $text);
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
$html = file_get_html('http://open1theory.com/feed') or $this->returnError('Could not request OpenTheory.', 404);
|
$html = $this->file_get_html('http://open1theory.com/feed') or $this->returnError('Could not request OpenTheory.', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
|
|
|
@ -66,7 +66,7 @@ class ParuVenduImmoBridge extends BridgeAbstract
|
||||||
$link .= '&lo='.urlencode($param['lo']);
|
$link .= '&lo='.urlencode($param['lo']);
|
||||||
}
|
}
|
||||||
|
|
||||||
$html = file_get_html($link) or $this->returnError('Could not request paruvendu.', 404);
|
$html = $this->file_get_html($link) or $this->returnError('Could not request paruvendu.', 404);
|
||||||
|
|
||||||
|
|
||||||
foreach($html->find('div.annonce a') as $element) {
|
foreach($html->find('div.annonce a') as $element) {
|
||||||
|
|
|
@ -55,7 +55,7 @@ class PickyWallpapersBridge extends BridgeAbstract {
|
||||||
|
|
||||||
for ($page = 1; $page <= $lastpage; $page++) {
|
for ($page = 1; $page <= $lastpage; $page++) {
|
||||||
$link = $baseUri.'/'.$this->resolution.'/'.$this->category.'/'.(!empty($this->subcategory)?$this->subcategory.'/':'').'page-'.$page.'/';
|
$link = $baseUri.'/'.$this->resolution.'/'.$this->category.'/'.(!empty($this->subcategory)?$this->subcategory.'/':'').'page-'.$page.'/';
|
||||||
$html = file_get_html($link) or $this->returnError('No results for this query.', 404);
|
$html = $this->file_get_html($link) or $this->returnError('No results for this query.', 404);
|
||||||
|
|
||||||
if ($page === 1) {
|
if ($page === 1) {
|
||||||
preg_match('/page-(\d+)\/$/', $html->find('.pages li a', -2)->href, $matches);
|
preg_match('/page-(\d+)\/$/', $html->find('.pages li a', -2)->href, $matches);
|
||||||
|
|
|
@ -51,12 +51,12 @@ class PinterestBridge extends BridgeAbstract{
|
||||||
|
|
||||||
$this->username = $param['u'];
|
$this->username = $param['u'];
|
||||||
$this->board = $param['b'];
|
$this->board = $param['b'];
|
||||||
$html = file_get_html($this->getURI().'/'.urlencode($this->username).'/'.urlencode($this->board)) or $this->returnError('Username and/or board not found', 404);
|
$html = $this->file_get_html($this->getURI().'/'.urlencode($this->username).'/'.urlencode($this->board)) or $this->returnError('Username and/or board not found', 404);
|
||||||
|
|
||||||
} else if (isset($param['q']))
|
} else if (isset($param['q']))
|
||||||
{
|
{
|
||||||
$this->query = $param['q'];
|
$this->query = $param['q'];
|
||||||
$html = file_get_html($this->getURI().'/search/?q='.urlencode($this->query)) or $this->returnError('Could not request Pinterest.', 404);
|
$html = $this->file_get_html($this->getURI().'/search/?q='.urlencode($this->query)) or $this->returnError('Could not request Pinterest.', 404);
|
||||||
}
|
}
|
||||||
|
|
||||||
else {
|
else {
|
||||||
|
|
|
@ -14,11 +14,11 @@ class PlanetLibreBridge extends BridgeAbstract{
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
|
|
||||||
function PlanetLibreExtractContent($url) {
|
function PlanetLibreExtractContent($url) {
|
||||||
$html2 = file_get_html($url);
|
$html2 = $this->file_get_html($url);
|
||||||
$text = $html2->find('div[class="post-text"]', 0)->innertext;
|
$text = $html2->find('div[class="post-text"]', 0)->innertext;
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
$html = file_get_html('http://www.planet-libre.org/') or $this->returnError('Could not request PlanetLibre.', 404);
|
$html = $this->file_get_html('http://www.planet-libre.org/') or $this->returnError('Could not request PlanetLibre.', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
foreach($html->find('div.post') as $element) {
|
foreach($html->find('div.post') as $element) {
|
||||||
if($limit < 5) {
|
if($limit < 5) {
|
||||||
|
|
|
@ -14,7 +14,7 @@ class ProjectMGameBridge extends BridgeAbstract{
|
||||||
|
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
$html = '';
|
$html = '';
|
||||||
$html = file_get_html('http://projectmgame.com/en/') or $this->returnError('Error while downloading the Project M homepage', 404);
|
$html = $this->file_get_html('http://projectmgame.com/en/') or $this->returnError('Error while downloading the Project M homepage', 404);
|
||||||
|
|
||||||
foreach($html->find('article') as $article) {
|
foreach($html->find('article') as $article) {
|
||||||
$item = new \Item();
|
$item = new \Item();
|
||||||
|
|
|
@ -24,7 +24,7 @@ class RTBFBridge extends BridgeAbstract {
|
||||||
$count = 0;
|
$count = 0;
|
||||||
|
|
||||||
if (isset($param['c'])) {
|
if (isset($param['c'])) {
|
||||||
$html = file_get_html('http://www.rtbf.be/auvio/emissions/detail?id='.$param['c']) or $this->returnError('Could not request RTBF.', 404);
|
$html = $this->file_get_html('http://www.rtbf.be/auvio/emissions/detail?id='.$param['c']) or $this->returnError('Could not request RTBF.', 404);
|
||||||
|
|
||||||
foreach($html->find('.rtbf-media-grid article') as $element) {
|
foreach($html->find('.rtbf-media-grid article') as $element) {
|
||||||
if($count < $limit) {
|
if($count < $limit) {
|
||||||
|
|
|
@ -18,14 +18,14 @@ class RaymondBridge extends BridgeAbstract{
|
||||||
return $string;
|
return $string;
|
||||||
}
|
}
|
||||||
function raymondExtractContent($url) {
|
function raymondExtractContent($url) {
|
||||||
$html2 = file_get_html($url);
|
$html2 = $this->file_get_html($url);
|
||||||
$text = $html2->find('div.entry-content', 0)->innertext;
|
$text = $html2->find('div.entry-content', 0)->innertext;
|
||||||
$text = preg_replace('/class="ad".*/', '', $text);
|
$text = preg_replace('/class="ad".*/', '', $text);
|
||||||
$text = strip_tags($text, '<p><a><i><strong><em><img>');
|
$text = strip_tags($text, '<p><a><i><strong><em><img>');
|
||||||
$text = str_replace('(adsbygoogle = window.adsbygoogle || []).push({});', '', $text);
|
$text = str_replace('(adsbygoogle = window.adsbygoogle || []).push({});', '', $text);
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
$html = file_get_html('http://www.raymond.cc/blog/feed') or $this->returnError('Could not request raymond.', 404);
|
$html = $this->file_get_html('http://www.raymond.cc/blog/feed') or $this->returnError('Could not request raymond.', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
if($limit < 3) {
|
if($limit < 3) {
|
||||||
|
|
|
@ -66,7 +66,7 @@ class Releases3DSBridge extends BridgeAbstract {
|
||||||
//Retrieve cover art and short desc from IGN?
|
//Retrieve cover art and short desc from IGN?
|
||||||
$ignResult = false; $ignDescription = ''; $ignLink = ''; $ignDate = time(); $ignCoverArt = '';
|
$ignResult = false; $ignDescription = ''; $ignLink = ''; $ignDate = time(); $ignCoverArt = '';
|
||||||
$ignSearchUrl = 'http://www.ign.com/search?q='.urlencode($name);
|
$ignSearchUrl = 'http://www.ign.com/search?q='.urlencode($name);
|
||||||
if ($ignResult = file_get_html($ignSearchUrl)) {
|
if ($ignResult = $this->file_get_html($ignSearchUrl)) {
|
||||||
$ignCoverArt = $ignResult->find('div.search-item-media', 0)->find('img', 0)->src;
|
$ignCoverArt = $ignResult->find('div.search-item-media', 0)->find('img', 0)->src;
|
||||||
$ignDesc = $ignResult->find('div.search-item-description', 0)->plaintext;
|
$ignDesc = $ignResult->find('div.search-item-description', 0)->plaintext;
|
||||||
$ignLink = $ignResult->find('div.search-item-sub-title', 0)->find('a', 1)->href;
|
$ignLink = $ignResult->find('div.search-item-sub-title', 0)->find('a', 1)->href;
|
||||||
|
|
|
@ -14,7 +14,7 @@ class ReporterreBridge extends BridgeAbstract{
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
|
|
||||||
function ExtractContentReporterre($url) {
|
function ExtractContentReporterre($url) {
|
||||||
$html2 = file_get_html($url);
|
$html2 = $this->file_get_html($url);
|
||||||
foreach($html2->find('div[style=text-align:justify]') as $e) {
|
foreach($html2->find('div[style=text-align:justify]') as $e) {
|
||||||
$text = $e->outertext;
|
$text = $e->outertext;
|
||||||
}
|
}
|
||||||
|
@ -23,7 +23,7 @@ class ReporterreBridge extends BridgeAbstract{
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
|
|
||||||
$html = file_get_html('http://www.reporterre.net/spip.php?page=backend') or $this->returnError('Could not request Reporterre.', 404);
|
$html = $this->file_get_html('http://www.reporterre.net/spip.php?page=backend') or $this->returnError('Could not request Reporterre.', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
|
|
|
@ -22,7 +22,7 @@ class Rue89Bridge extends BridgeAbstract{
|
||||||
|
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
|
|
||||||
$html = file_get_html('http://api.rue89.nouvelobs.com/feed') or $this->returnError('Could not request Rue89.', 404);
|
$html = $this->file_get_html('http://api.rue89.nouvelobs.com/feed') or $this->returnError('Could not request Rue89.', 404);
|
||||||
|
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
|
|
|
@ -33,7 +33,7 @@ class Rule34Bridge extends BridgeAbstract{
|
||||||
if (isset($param['t'])) {
|
if (isset($param['t'])) {
|
||||||
$tags = urlencode($param['t']);
|
$tags = urlencode($param['t']);
|
||||||
}
|
}
|
||||||
$html = file_get_html("http://rule34.xxx/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnError('Could not request Rule34.', 404);
|
$html = $this->file_get_html("http://rule34.xxx/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnError('Could not request Rule34.', 404);
|
||||||
|
|
||||||
|
|
||||||
foreach($html->find('div[class=content] span') as $element) {
|
foreach($html->find('div[class=content] span') as $element) {
|
||||||
|
|
|
@ -32,7 +32,7 @@ class Rule34pahealBridge extends BridgeAbstract{
|
||||||
if (isset($param['t'])) {
|
if (isset($param['t'])) {
|
||||||
$tags = urlencode($param['t']);
|
$tags = urlencode($param['t']);
|
||||||
}
|
}
|
||||||
$html = file_get_html("http://rule34.paheal.net/post/list/$tags/$page") or $this->returnError('Could not request Rule34paheal.', 404);
|
$html = $this->file_get_html("http://rule34.paheal.net/post/list/$tags/$page") or $this->returnError('Could not request Rule34paheal.', 404);
|
||||||
|
|
||||||
|
|
||||||
foreach($html->find('div[class=shm-image-list] div[class=shm-thumb]') as $element) {
|
foreach($html->find('div[class=shm-image-list] div[class=shm-thumb]') as $element) {
|
||||||
|
|
|
@ -34,7 +34,7 @@ class SafebooruBridge extends BridgeAbstract{
|
||||||
if (isset($param['t'])) {
|
if (isset($param['t'])) {
|
||||||
$tags = urlencode($param['t']);
|
$tags = urlencode($param['t']);
|
||||||
}
|
}
|
||||||
$html = file_get_html("http://safebooru.org/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnError('Could not request Safebooru.', 404);
|
$html = $this->file_get_html("http://safebooru.org/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnError('Could not request Safebooru.', 404);
|
||||||
|
|
||||||
|
|
||||||
foreach($html->find('div[class=content] span') as $element) {
|
foreach($html->find('div[class=content] span') as $element) {
|
||||||
|
|
|
@ -31,7 +31,7 @@ class SakugabooruBridge extends BridgeAbstract{
|
||||||
if (isset($param['t'])) {
|
if (isset($param['t'])) {
|
||||||
$tags = urlencode($param['t']);
|
$tags = urlencode($param['t']);
|
||||||
}
|
}
|
||||||
$html = file_get_html("http://sakuga.yshi.org/post?page=$page&tags=$tags") or $this->returnError('Could not request Sakugabooru.', 404);
|
$html = $this->file_get_html("http://sakuga.yshi.org/post?page=$page&tags=$tags") or $this->returnError('Could not request Sakugabooru.', 404);
|
||||||
$input_json = explode('Post.register(', $html);
|
$input_json = explode('Post.register(', $html);
|
||||||
foreach($input_json as $element)
|
foreach($input_json as $element)
|
||||||
$data[] = preg_replace('/}\)(.*)/', '}', $element);
|
$data[] = preg_replace('/}\)(.*)/', '}', $element);
|
||||||
|
|
|
@ -19,11 +19,11 @@ class ScilogsBridge extends BridgeAbstract{
|
||||||
return $string;
|
return $string;
|
||||||
}
|
}
|
||||||
function ScilogsExtractContent($url) {
|
function ScilogsExtractContent($url) {
|
||||||
$html2 = file_get_html($url);
|
$html2 = $this->file_get_html($url);
|
||||||
$text = $html2->find('div.entrybody', 0)->innertext;
|
$text = $html2->find('div.entrybody', 0)->innertext;
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
$html = file_get_html('http://www.scilogs.fr/?wpmu-feed=posts') or $this->returnError('Could not request Scilogs.', 404);
|
$html = $this->file_get_html('http://www.scilogs.fr/?wpmu-feed=posts') or $this->returnError('Could not request Scilogs.', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
|
|
|
@ -13,7 +13,7 @@ class ScmbBridge extends BridgeAbstract{
|
||||||
|
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
$html = '';
|
$html = '';
|
||||||
$html = file_get_html('http://secouchermoinsbete.fr/') or $this->returnError('Could not request Se Coucher Moins Bete.', 404);
|
$html = $this->file_get_html('http://secouchermoinsbete.fr/') or $this->returnError('Could not request Se Coucher Moins Bete.', 404);
|
||||||
|
|
||||||
foreach($html->find('article') as $article) {
|
foreach($html->find('article') as $article) {
|
||||||
$item = new \Item();
|
$item = new \Item();
|
||||||
|
|
|
@ -25,7 +25,7 @@ class ScoopItBridge extends BridgeAbstract{
|
||||||
$this->request = $param['u'];
|
$this->request = $param['u'];
|
||||||
$link = 'http://scoop.it/search?q=' .urlencode($this->request);
|
$link = 'http://scoop.it/search?q=' .urlencode($this->request);
|
||||||
|
|
||||||
$html = file_get_html($link) or $this->returnError('Could not request ScoopIt. for : ' . $link , 404);
|
$html = $this->file_get_html($link) or $this->returnError('Could not request ScoopIt. for : ' . $link , 404);
|
||||||
|
|
||||||
foreach($html->find('div.post-view') as $element) {
|
foreach($html->find('div.post-view') as $element) {
|
||||||
$item = new Item();
|
$item = new Item();
|
||||||
|
|
|
@ -19,12 +19,12 @@ class SegfaultMintBridge extends BridgeAbstract{
|
||||||
return $string;
|
return $string;
|
||||||
}
|
}
|
||||||
function ExtractContent($url) {
|
function ExtractContent($url) {
|
||||||
$html2 = file_get_html($url);
|
$html2 = $this->file_get_html($url);
|
||||||
$text = $html2->find('div.post-bodycopy', 0)->innertext;
|
$text = $html2->find('div.post-bodycopy', 0)->innertext;
|
||||||
$text = preg_replace('@<script[^>]*?>.*?</script>@si', '', $text);
|
$text = preg_replace('@<script[^>]*?>.*?</script>@si', '', $text);
|
||||||
return $text;
|
return $text;
|
||||||
}
|
}
|
||||||
$html = file_get_html('http://segfault.linuxmint.com/feed/') or $this->returnError('Could not request segfault.', 404);
|
$html = $this->file_get_html('http://segfault.linuxmint.com/feed/') or $this->returnError('Could not request segfault.', 404);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
|
|
|
@ -16,7 +16,7 @@ class Sexactu extends BridgeAbstract{
|
||||||
$find = array('janvier', 'février', 'mars', 'avril', 'mai', 'juin', 'juillet', 'août', 'septembre', 'novembre', 'décembre');
|
$find = array('janvier', 'février', 'mars', 'avril', 'mai', 'juin', 'juillet', 'août', 'septembre', 'novembre', 'décembre');
|
||||||
$replace = array('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December');
|
$replace = array('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December');
|
||||||
|
|
||||||
$html = file_get_html($this->getURI()) or $this->returnError('Could not request '.$this->getURI(), 404);
|
$html = $this->file_get_html($this->getURI()) or $this->returnError('Could not request '.$this->getURI(), 404);
|
||||||
|
|
||||||
foreach($html->find('.content-holder') as $contentHolder) {
|
foreach($html->find('.content-holder') as $contentHolder) {
|
||||||
// only use first list as second one only contains pages numbers
|
// only use first list as second one only contains pages numbers
|
||||||
|
|
|
@ -20,7 +20,7 @@ class SiliconBridge extends BridgeAbstract {
|
||||||
}
|
}
|
||||||
|
|
||||||
$feedUrl = 'http://www.silicon.fr/feed';
|
$feedUrl = 'http://www.silicon.fr/feed';
|
||||||
$html = file_get_html($feedUrl) or $this->returnError('Could not request Silicon: '.$feedUrl, 500);
|
$html = $this->file_get_html($feedUrl) or $this->returnError('Could not request Silicon: '.$feedUrl, 500);
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
foreach($html->find('item') as $element) {
|
foreach($html->find('item') as $element) {
|
||||||
|
@ -30,7 +30,7 @@ class SiliconBridge extends BridgeAbstract {
|
||||||
$article_uri = $element->innertext;
|
$article_uri = $element->innertext;
|
||||||
$article_uri = substr($article_uri, strpos($article_uri, '<link>') + 6);
|
$article_uri = substr($article_uri, strpos($article_uri, '<link>') + 6);
|
||||||
$article_uri = substr($article_uri, 0, strpos($article_uri, '</link>'));
|
$article_uri = substr($article_uri, 0, strpos($article_uri, '</link>'));
|
||||||
$article_html = file_get_html($article_uri) or $this->returnError('Could not request Silicon: '.$article_uri, 500);
|
$article_html = $this->file_get_html($article_uri) or $this->returnError('Could not request Silicon: '.$article_uri, 500);
|
||||||
|
|
||||||
//Build article contents from corresponding elements
|
//Build article contents from corresponding elements
|
||||||
$thumbnailUri = $element->find('enclosure', 0)->url;
|
$thumbnailUri = $element->find('enclosure', 0)->url;
|
||||||
|
|
|
@ -47,12 +47,12 @@ class SuperbWallpapersBridge extends BridgeAbstract {
|
||||||
|
|
||||||
// Get last page number
|
// Get last page number
|
||||||
$link = $baseUri.'/'.$this->category.'/9999.html';
|
$link = $baseUri.'/'.$this->category.'/9999.html';
|
||||||
$html = file_get_html($link);
|
$html = $this->file_get_html($link);
|
||||||
$lastpage = min($html->find('.paging .cpage', 0)->innertext(), ceil($max/36));
|
$lastpage = min($html->find('.paging .cpage', 0)->innertext(), ceil($max/36));
|
||||||
|
|
||||||
for ($page = 1; $page <= $lastpage; $page++) {
|
for ($page = 1; $page <= $lastpage; $page++) {
|
||||||
$link = $baseUri.'/'.$this->category.'/'.$page.'.html';
|
$link = $baseUri.'/'.$this->category.'/'.$page.'.html';
|
||||||
$html = file_get_html($link) or $this->returnError('No results for this query.', 404);
|
$html = $this->file_get_html($link) or $this->returnError('No results for this query.', 404);
|
||||||
|
|
||||||
foreach($html->find('.wpl .i a') as $element) {
|
foreach($html->find('.wpl .i a') as $element) {
|
||||||
$thumbnail = $element->find('img', 0);
|
$thumbnail = $element->find('img', 0);
|
||||||
|
|
|
@ -36,7 +36,7 @@ class T411Bridge extends BridgeAbstract {
|
||||||
|
|
||||||
//Retrieve torrent listing from search results, which does not contain torrent description
|
//Retrieve torrent listing from search results, which does not contain torrent description
|
||||||
$url = $this->getURI().'torrents/search/?'.$param['search'].'&order=added&type=desc';
|
$url = $this->getURI().'torrents/search/?'.$param['search'].'&order=added&type=desc';
|
||||||
$html = file_get_html($url) or $this->returnError('Could not request t411: '.$url, 500);
|
$html = $this->file_get_html($url) or $this->returnError('Could not request t411: '.$url, 500);
|
||||||
$results = $html->find('table.results', 0);
|
$results = $html->find('table.results', 0);
|
||||||
if (is_null($results))
|
if (is_null($results))
|
||||||
$this->returnError('No results from t411: '.$url, 500);
|
$this->returnError('No results from t411: '.$url, 500);
|
||||||
|
@ -57,7 +57,7 @@ class T411Bridge extends BridgeAbstract {
|
||||||
$item_date = strtotime($element->find('dd', 0)->plaintext);
|
$item_date = strtotime($element->find('dd', 0)->plaintext);
|
||||||
|
|
||||||
//Retrieve full description from torrent page
|
//Retrieve full description from torrent page
|
||||||
if ($item_html = file_get_html($item_uri)) {
|
if ($item_html = $this->file_get_html($item_uri)) {
|
||||||
|
|
||||||
//Retrieve data from page contents
|
//Retrieve data from page contents
|
||||||
$item_desc = $item_html->find('div.description', 0);
|
$item_desc = $item_html->find('div.description', 0);
|
||||||
|
|
|
@ -24,7 +24,7 @@ class TagBoardBridge extends BridgeAbstract{
|
||||||
$this->request = $param['u'];
|
$this->request = $param['u'];
|
||||||
$link = 'https://post-cache.tagboard.com/search/' .$this->request;
|
$link = 'https://post-cache.tagboard.com/search/' .$this->request;
|
||||||
|
|
||||||
$html = file_get_html($link) or $this->returnError('Could not request TagBoard for : ' . $link , 404);
|
$html = $this->file_get_html($link) or $this->returnError('Could not request TagBoard for : ' . $link , 404);
|
||||||
$parsed_json = json_decode($html);
|
$parsed_json = json_decode($html);
|
||||||
|
|
||||||
foreach($parsed_json->{'posts'} as $element) {
|
foreach($parsed_json->{'posts'} as $element) {
|
||||||
|
|
|
@ -33,7 +33,7 @@ class TbibBridge extends BridgeAbstract{
|
||||||
if (isset($param['t'])) {
|
if (isset($param['t'])) {
|
||||||
$tags = urlencode($param['t']);
|
$tags = urlencode($param['t']);
|
||||||
}
|
}
|
||||||
$html = file_get_html("http://tbib.org/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnError('Could not request Tbib.', 404);
|
$html = $this->file_get_html("http://tbib.org/index.php?page=post&s=list&tags=$tags&pid=$page") or $this->returnError('Could not request Tbib.', 404);
|
||||||
|
|
||||||
|
|
||||||
foreach($html->find('div[class=content] span') as $element) {
|
foreach($html->find('div[class=content] span') as $element) {
|
||||||
|
|
|
@ -12,7 +12,7 @@ class TheCodingLoveBridge extends BridgeAbstract{
|
||||||
}
|
}
|
||||||
|
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
$html = file_get_html('http://thecodinglove.com/') or $this->returnError('Could not request The Coding Love.', 404);
|
$html = $this->file_get_html('http://thecodinglove.com/') or $this->returnError('Could not request The Coding Love.', 404);
|
||||||
|
|
||||||
foreach($html->find('div.post') as $element) {
|
foreach($html->find('div.post') as $element) {
|
||||||
$item = new Item();
|
$item = new Item();
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue