Correction de quelques problèmes avec RSSExpander suite à la migration dans lib/Bridge.

Correction de typos.
Ajout de la possibilité d'utiliser un proxy.
This commit is contained in:
Teromene 2015-11-27 14:20:33 +00:00 committed by Mitsukarenai
parent e582c887fb
commit 1a4a428449
6 changed files with 37 additions and 22 deletions

View file

@ -31,10 +31,10 @@ class Gawker extends RssExpander{
trigger_error("If no site is provided, nothing is gonna happen", E_USER_ERROR); trigger_error("If no site is provided, nothing is gonna happen", E_USER_ERROR);
} else { } else {
$this->name = $param['site']; $this->name = $param['site'];
$param['url'] = $this->toURI(strtolower($param['site'])); $url = $this->toURI(strtolower($param['site']));
} }
// $this->message("loading feed from ".$this->getURI()); // $this->message("loading feed from ".$this->getURI());
parent::collectExpandableDatas($param, $name); parent::collectExpandableDatas($param, $url);
} }
protected function parseRSSItem($newsItem) { protected function parseRSSItem($newsItem) {
@ -49,7 +49,7 @@ class Gawker extends RssExpander{
$articlePage = str_get_html($this->get_cached($item->uri)); $articlePage = str_get_html($this->get_cached($item->uri));
if(is_object($articlePage)) { if(is_object($articlePage)) {
$content = $articlePage->find('.post-content', 0); $content = $articlePage->find('.post-content', 0);
$this->defaultImageSrcTo($content, $this->getURI()); HTMLSanitizer::defaultImageSrcTo($content, $this->getURI());
$vcard = $articlePage->find('.vcard', 0); $vcard = $articlePage->find('.vcard', 0);
if(is_object($vcard)) { if(is_object($vcard)) {
$authorLink = $vcard->find('a', 0); $authorLink = $vcard->find('a', 0);

View file

@ -86,7 +86,7 @@ class WorldOfTanks extends HttpCachingBridgeAbstract{
// $this->message("loading page ".$item->uri); // $this->message("loading page ".$item->uri);
$articlePage = str_get_html($this->get_cached($item->uri)); $articlePage = str_get_html($this->get_cached($item->uri));
$content = $articlePage->find('.l-content', 0); $content = $articlePage->find('.l-content', 0);
$this->defaultImageSrcTo($content, WORLD_OF_TANKS); HTMLSanitizer::defaultImageSrcTo($content, WORLD_OF_TANKS);
$item->title = $content->find('h1', 0)->innertext; $item->title = $content->find('h1', 0)->innertext;
$item->content = $content->find('.b-content', 0)->innertext; $item->content = $content->find('.b-content', 0)->innertext;
// $item->name = $auteur->innertext; // $item->name = $auteur->innertext;

View file

@ -11,6 +11,8 @@ TODO :
- implement header('X-Cached-Version: '.date(DATE_ATOM, filemtime($cachefile))); - implement header('X-Cached-Version: '.date(DATE_ATOM, filemtime($cachefile)));
*/ */
//define('PROXY_URL', 'tcp://192.168.0.0:28');
date_default_timezone_set('UTC'); date_default_timezone_set('UTC');
error_reporting(0); error_reporting(0);
//ini_set('display_errors','1'); error_reporting(E_ALL); // For debugging only. //ini_set('display_errors','1'); error_reporting(E_ALL); // For debugging only.
@ -21,6 +23,7 @@ if (!extension_loaded('openssl'))
// FIXME : beta test UA spoofing, please report any blacklisting by PHP-fopen-unfriendly websites // FIXME : beta test UA spoofing, please report any blacklisting by PHP-fopen-unfriendly websites
ini_set('user_agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20121202 Firefox/30.0 (rss-bridge/0.1; +https://github.com/sebsauvage/rss-bridge)'); ini_set('user_agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20121202 Firefox/30.0 (rss-bridge/0.1; +https://github.com/sebsauvage/rss-bridge)');
// ------- // -------
// cache file purge - delete cache files older than 24 hours // cache file purge - delete cache files older than 24 hours

View file

@ -39,7 +39,7 @@ abstract class BridgeAbstract implements BridgeInterface{
} }
/** /**
* Return datas store in the bridge * Return datas stored in the bridge
* @return mixed * @return mixed
*/ */
public function getDatas(){ public function getDatas(){
@ -50,7 +50,7 @@ abstract class BridgeAbstract implements BridgeInterface{
/** /**
* Defined datas with parameters depending choose bridge * Defined datas with parameters depending choose bridge
* Note : you can defined a cache before with "setCache" * Note : you can define a cache before with "setCache"
* @param array $param $_REQUEST, $_GET, $_POST, or array with bridge expected paramters * @param array $param $_REQUEST, $_GET, $_POST, or array with bridge expected paramters
*/ */
public function setDatas(array $param){ public function setDatas(array $param){
@ -90,16 +90,6 @@ abstract class BridgeAbstract implements BridgeInterface{
return $this; return $this;
} }
/**
* Set default image SRC attribute to point on given server when none is provided (that's to say when image src starts with '/'
*/
public function defaultImageSrcTo($content, $server) {
foreach($content->find('img') as $image) {
if(strpos($image->src, '/')==0) {
$image->src = $server.$image->src;
}
}
}
} }
/** /**
@ -311,7 +301,7 @@ abstract class RssExpander extends HttpCachingBridgeAbstract{
public function collectExpandableDatas(array $param, $name){ public function collectExpandableDatas(array $param, $name){
if (empty($name)) { if (empty($name)) {
$this->returnError('There is no $param[\'url\'] for this RSS expander', 404); $this->returnError('There is no $name for this RSS expander', 404);
} }
// $this->message("Loading from ".$param['url']); // $this->message("Loading from ".$param['url']);
// Notice WE DO NOT use cache here on purpose : we want a fresh view of the RSS stream each time // Notice WE DO NOT use cache here on purpose : we want a fresh view of the RSS stream each time
@ -363,3 +353,20 @@ abstract class RssExpander extends HttpCachingBridgeAbstract{
return $this->description; return $this->description;
} }
} }
function advanced_file_get_contents($url) {
if(defined('PROXY_URL')) {
$context = array(
'http' => array(
'proxy' => PROXY_URL,
'request_fulluri' => true,
),
);
$context = stream_context_create($context);
return file_get_contents($url, false, $context);
} else {
return file_get_contents($url);
}
}

View file

@ -91,7 +91,7 @@ CARD;
} else if($inputEntry['type'] == 'number') { } else if($inputEntry['type'] == 'number') {
$card .= '<input '.$additionalInfoString.' id="' . $idArg . '" type="number" value="" placeholder="' . $inputEntry['exampleValue'] . '" name="' . $inputEntry['identifier'] . '" /><br />' . PHP_EOL; $card .= '<input '.$additionalInfoString.' id="' . $idArg . '" type="number" value="" placeholder="' . $inputEntry['exampleValue'] . '" name="' . $inputEntry['identifier'] . '" /><br />' . PHP_EOL;
} else if($inputEntry['type'] == 'list') { } else if($inputEntry['type'] == 'list') {
$card .= '<select '.$additionalInfoString.' id="' . $idArg . '" name="' . $inputEntry['name'] . '" >'; $card .= '<select '.$additionalInfoString.' id="' . $idArg . '" name="' . $inputEntry['identifier'] . '" >';
foreach($inputEntry['values'] as $listValues) { foreach($inputEntry['values'] as $listValues) {
$card .= "<option $additionalInfoString value='" . $listValues['value'] . "'>" . $listValues['name'] . "</option>"; $card .= "<option $additionalInfoString value='" . $listValues['value'] . "'>" . $listValues['name'] . "</option>";
@ -157,9 +157,7 @@ class HTMLSanitizer {
$element->outertext = ''; $element->outertext = '';
} else { } else {
foreach($element->getAllAttributes() as $attributeName => $attribute) { foreach($element->getAllAttributes() as $attributeName => $attribute) {
if(!in_array($attributeName, $this->keptAttributes)) $element->removeAttribute($attributeName); if(!in_array($attributeName, $this->keptAttributes)) $element->removeAttribute($attributeName);
} }
} }
} }
@ -167,6 +165,13 @@ class HTMLSanitizer {
return $htmlContent; return $htmlContent;
} }
public static function defaultImageSrcTo($content, $server) {
foreach($content->find('img') as $image) {
if(strpos($image->src, '/')==0) {
$image->src = $server.$image->src;
}
}
}
} }
?> ?>

View file

@ -73,7 +73,7 @@ function file_get_html($url, $use_include_path = false, $context=null, $offset =
// We DO force the tags to be terminated. // We DO force the tags to be terminated.
$dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText); $dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText);
// For sourceforge users: uncomment the next line and comment the retreive_url_contents line 2 lines down if it is not already done. // For sourceforge users: uncomment the next line and comment the retreive_url_contents line 2 lines down if it is not already done.
$contents = file_get_contents($url, $use_include_path, $context, $offset); $contents = advanced_file_get_contents($url, $use_include_path, $context, $offset);
// Paperg - use our own mechanism for getting the contents as we want to control the timeout. // Paperg - use our own mechanism for getting the contents as we want to control the timeout.
//$contents = retrieve_url_contents($url); //$contents = retrieve_url_contents($url);
if (empty($contents) || strlen($contents) > MAX_FILE_SIZE) if (empty($contents) || strlen($contents) > MAX_FILE_SIZE)
@ -1094,7 +1094,7 @@ class simple_html_dom
function load_file() function load_file()
{ {
$args = func_get_args(); $args = func_get_args();
$this->load(call_user_func_array('file_get_contents', $args), true); $this->load(call_user_func_array('advanced_file_get_contents', $args), true);
// Throw an error if we can't properly load the dom. // Throw an error if we can't properly load the dom.
if (($error=error_get_last())!==null) { if (($error=error_get_last())!==null) {
$this->clear(); $this->clear();