Add http-context to simulate Mozilla user-agent
This fixes "Warning: file_get_contents(http://www.ledauphine.com/rss): failed to open stream: HTTP request failed! HTTP/1.1 403 Forbidden"
This commit is contained in:
parent
4420906a7a
commit
c8c3e9ef65
1 changed files with 21 additions and 11 deletions
|
@ -81,8 +81,8 @@ class DauphineLibereBridge extends BridgeAbstract {
|
||||||
]';
|
]';
|
||||||
}
|
}
|
||||||
|
|
||||||
function ExtractContent($url) {
|
function ExtractContent($url, $context) {
|
||||||
$html2 = $this->file_get_html($url);
|
$html2 = $this->file_get_html($url, false, $context);
|
||||||
$text = $html2->find('div.column', 0)->innertext;
|
$text = $html2->find('div.column', 0)->innertext;
|
||||||
$text = preg_replace('@<script[^>]*?>.*?</script>@si', '', $text);
|
$text = preg_replace('@<script[^>]*?>.*?</script>@si', '', $text);
|
||||||
return $text;
|
return $text;
|
||||||
|
@ -90,12 +90,22 @@ class DauphineLibereBridge extends BridgeAbstract {
|
||||||
|
|
||||||
public function collectData(array $param){
|
public function collectData(array $param){
|
||||||
|
|
||||||
|
// Simulate Mozilla user-agent to fix error 403 (Forbidden)
|
||||||
|
$opts = array('http' =>
|
||||||
|
array(
|
||||||
|
'method' => 'GET',
|
||||||
|
'header' => 'User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
$context = stream_context_create($opts);
|
||||||
|
|
||||||
if (isset($param['u'])) { /* user timeline mode */
|
if (isset($param['u'])) { /* user timeline mode */
|
||||||
$this->request = $param['u'];
|
$this->request = $param['u'];
|
||||||
$html = $this->file_get_html('http://www.ledauphine.com/'.$this->request.'/rss') or $this->returnError('Could not request DauphineLibere.', 404);
|
$html = $this->file_get_html('http://www.ledauphine.com/'.$this->request.'/rss', false, $context) or $this->returnError('Could not request DauphineLibere.', 404);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
$html = $this->file_get_html('http://www.ledauphine.com/rss') or $this->returnError('Could not request DauphineLibere.', 404);
|
$html = $this->file_get_html('http://www.ledauphine.com/rss', false, $context) or $this->returnError('Could not request DauphineLibere.', 404);
|
||||||
}
|
}
|
||||||
$limit = 0;
|
$limit = 0;
|
||||||
|
|
||||||
|
@ -105,7 +115,7 @@ class DauphineLibereBridge extends BridgeAbstract {
|
||||||
$item->title = $element->find('title', 0)->innertext;
|
$item->title = $element->find('title', 0)->innertext;
|
||||||
$item->uri = $element->find('guid', 0)->plaintext;
|
$item->uri = $element->find('guid', 0)->plaintext;
|
||||||
$item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
|
$item->timestamp = strtotime($element->find('pubDate', 0)->plaintext);
|
||||||
$item->content = $this->ExtractContent($item->uri);
|
$item->content = $this->ExtractContent($item->uri, $context);
|
||||||
$this->items[] = $item;
|
$this->items[] = $item;
|
||||||
$limit++;
|
$limit++;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue