new page entrance + new dedicated dir for bridges + some fixe, optimizations

This commit is contained in:
Yves ASTIER 2013-08-09 18:57:25 +02:00
parent 2797336bbe
commit 50b87448c6
6 changed files with 250 additions and 90 deletions

View File

@ -43,6 +43,9 @@ I'm sebsauvage, webmaster of [sebsauvage.net](http://sebsauvage.net), author of
Thanks to [Mitsukarenai](https://github.com/Mitsukarenai) for the inspiration.
Patch :
- Yves ASTIER (Draeli) : PHP optimizations, minor fixes, dynamic brigde list with all stuff behind
Licence
===
Code is public domain.

View File

@ -1,9 +1,10 @@
<?php
require_once('rss-bridge-lib.php');
/**
* RssBridgeFlickrExplore
* Returns the newest interesting images from http://www.flickr.com/explore
*
* @name Flickr Explore
* @description Returns the latest interesting images from Flickr
*/
class RssBridgeFlickrExplore extends RssBridgeAbstractClass
{
@ -12,7 +13,7 @@ class RssBridgeFlickrExplore extends RssBridgeAbstractClass
protected $bridgeDescription = 'Returns the latest interesting images from Flickr';
protected $cacheDuration = 360; // 6 hours. No need to get more.
protected function collectData($request) {
$html = file_get_html('http://www.flickr.com/explore') or $this->returnError('404 Not Found', 'ERROR: could not request Flickr.');
$html = file_get_html('http://www.flickr.com/explore') or $this->returnError(404, 'could not request Flickr.');
$this->items = Array();
foreach($html->find('span.photo_container') as $element) {
$item['uri'] = 'http://flickr.com'.$element->find('a',0)->href;
@ -25,5 +26,4 @@ class RssBridgeFlickrExplore extends RssBridgeAbstractClass
}
$bridge = new RssBridgeFlickrExplore();
$bridge->process();
?>
$bridge->process();

View File

@ -1,16 +1,17 @@
<?php
require_once('rss-bridge-lib.php');
/**
* RssBridgeGoogleMostRecent
* Search Google for most recent pages regarding a specific topic.
* Returns the 100 most recent links in results in past year,
* sorting by date (most recent first).
* Returns the 100 most recent links in results in past year, sorting by date (most recent first).
* Example:
* http://www.google.com/search?q=sebsauvage&num=100&complete=0&tbs=qdr:y,sbd:1
* complete=0&num=100 : get 100 results
* qdr:y : in past year
* sbd:1 : sort by date (will only work if qdr: is specified)
*
* @name Google search
* @description Returns most recent results from Google search.
* @use1(q="keyword search")
*/
class RssBridgeGoogleSearch extends RssBridgeAbstractClass
@ -22,9 +23,9 @@ class RssBridgeGoogleSearch extends RssBridgeAbstractClass
protected function collectData($request) {
$html = '';
if (isset($request['q'])) { /* keyword search mode */
$html = file_get_html('http://www.google.com/search?q='.urlencode($request['q']).'&num=100&complete=0&tbs=qdr:y,sbd:1') or $this->returnError('404 Not Found', 'ERROR: no results for this query.');
$html = file_get_html('http://www.google.com/search?q='.urlencode($request['q']).'&num=100&complete=0&tbs=qdr:y,sbd:1') or $this->returnError(404, 'no results for this query.');
} else {
$this->returnError('400 Bad Request', 'ERROR: You must specify a keyword (?q=...).');
$this->returnError(400, 'You must specify a keyword (?q=...).');
}
$this->items = Array();
foreach($html->find('div[id=ires]',0)->find('li[class=g]') as $element) {
@ -37,5 +38,4 @@ class RssBridgeGoogleSearch extends RssBridgeAbstractClass
}
$bridge = new RssBridgeGoogleSearch();
$bridge->process();
?>
$bridge->process();

View File

@ -1,9 +1,12 @@
<?php
require_once('rss-bridge-lib.php');
/**
* RssBridgeTwitter
* Based on https://github.com/mitsukarenai/twitterbridge-noapi
*
* @name Twitter Bridge
* @description Returns user timelines or keyword search from http://twitter.com without using their API.
* @use1(q="keyword search")
* @use2(u="user timeline mode")
*/
class RssBridgeTwitter extends RssBridgeAbstractClass
{
@ -14,11 +17,11 @@ class RssBridgeTwitter extends RssBridgeAbstractClass
protected function collectData($request) {
$html = '';
if (isset($request['q'])) { /* keyword search mode */
$html = file_get_html('http://twitter.com/search/realtime?q='.urlencode($request['q']).'+include:retweets&src=typd') or $this->returnError('404 Not Found', 'ERROR: no results for this query.');
$html = file_get_html('http://twitter.com/search/realtime?q='.urlencode($request['q']).'+include:retweets&src=typd') or $this->returnError(404, 'no results for this query.');
} elseif (isset($request['u'])) { /* user timeline mode */
$html = file_get_html('http://twitter.com/'.urlencode($request['u'])) or $this->returnError('404 Not Found', 'ERROR: requested username can\'t be found.');
$html = file_get_html('http://twitter.com/'.urlencode($request['u'])) or $this->returnError(404, 'requested username can\'t be found.');
} else {
$this->returnError('400 Bad Request', 'ERROR: You must specify a keyword (?q=...) or a Twitter username (?u=...).');
$this->returnError(400, 'You must specify a keyword (?q=...) or a Twitter username (?u=...).');
}
$this->items = Array();
foreach($html->find('div.tweet') as $tweet) {
@ -33,8 +36,7 @@ class RssBridgeTwitter extends RssBridgeAbstractClass
$this->items[] = $item;
}
}
}
}
$bridge = new RssBridgeTwitter();
$bridge->process();
?>
$bridge->process();

148
index.php Normal file
View File

@ -0,0 +1,148 @@
<?php
require_once('rss-bridge-lib.php');
define('PATH_BRIDGES_RELATIVE', 'bridges/');
define('PATH_BRIDGES', __DIR__ . DIRECTORY_SEPARATOR . 'bridges' . DIRECTORY_SEPARATOR);
/*
TODO :
- gérer la détection du SSL
- faire la création de l'objet en dehors du bridge
*/
/**
* Read bridge dir and catch informations about each bridge
* @param string @pathDirBridge Dir to the bridge path
* @return array Informations about each bridge
*/
function searchBridgeInformation($pathDirBridge){
$searchCommonPattern = array('description', 'name');
$listBridge = array();
if($handle = opendir($pathDirBridge)) {
while(false !== ($entry = readdir($handle))) {
if( preg_match('@([^.]+)\.php@U', $entry, $out) ){ // Is PHP file ?
$infos = array(); // Information about the bridge
$resParse = token_get_all(file_get_contents($pathDirBridge . $entry)); // Parse PHP file
foreach($resParse as $v){
if( is_array($v) && $v[0] == T_DOC_COMMENT ){ // Lexer node is COMMENT ?
$commentary = $v[1];
foreach( $searchCommonPattern as $name){ // Catch information with common pattern
preg_match('#@' . preg_quote($name, '#') . '\s+(.+)#', $commentary, $outComment);
if( isset($outComment[1]) ){
$infos[$name] = $outComment[1];
}
}
preg_match_all('#@use(?<num>[1-9][0-9]*)\s?\((?<args>.+)\)(?:\r|\n)#', $commentary, $outComment); // Catch specific information about "use".
if( isset($outComment['args']) && is_array($outComment['args']) ){
$infos['use'] = array();
foreach($outComment['args'] as $num => $args){ // Each use
preg_match_all('#(?<name>[a-z]+)="(?<value>.*)"(?:,|$)#U', $args, $outArg); // Catch arguments for current use
if( isset($outArg['name']) ){
$usePos = $outComment['num'][$num]; // Current use name
if( !isset($infos['use'][$usePos]) ){ // Not information actually for this "use" ?
$infos['use'][$usePos] = array();
}
foreach($outArg['name'] as $numArg => $name){ // Each arguments
$infos['use'][$usePos][$name] = $outArg['value'][$numArg];
}
}
}
}
}
}
if( isset($infos['name']) ){ // If informations containt at least a name
// $listBridge
$listBridge[$out[1]] = $infos;
}
}
}
closedir($handle);
}
return $listBridge;
}
function createNetworkLink($bridgeName, $arguments){
}
if( isset($_REQUEST) && isset($_REQUEST['action']) ){
switch($_REQUEST['action']){
case 'create':
if( isset($_REQUEST['bridge']) ){
unset($_REQUEST['action']);
$bridge = $_REQUEST['bridge'];
unset($_REQUEST['bridge']);
// var_dump($_REQUEST);die;
$pathBridge = PATH_BRIDGES_RELATIVE . $bridge . '.php';
if( file_exists($pathBridge) ){
require $pathBridge;
exit();
}
}
break;
}
}
$listBridge = searchBridgeInformation(PATH_BRIDGES);
// echo '<pre>';
// var_dump($listBridge);
// echo '</pre>';
?>
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<title>Rss-bridge - Create your own network !</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="description" content="Rss-bridge" />
</head>
<body>
<ul class="list-bridge">
<?php foreach($listBridge as $bridgeReference => $bridgeInformations): ?>
<li id="bridge-<?php echo $bridgeReference ?>" data-ref="<?php echo $bridgeReference ?>">
<div class="name"><?php echo $bridgeInformations['name'] ?></div>
<div class="informations">
<p class="description">
<?php echo isset($bridgeInformations['description']) ? $bridgeInformations['description'] : 'No description provide' ?>
</p>
<?php if( isset($bridgeInformations['use']) && count($bridgeInformations['use']) > 0 ): ?>
<ol class="list-use">
<?php foreach($bridgeInformations['use'] as $anUseNum => $anUse): ?>
<li data-use="<?php echo $anUseNum ?>">
<form method="POST" action="?">
<input type="hidden" name="action" value="create" />
<input type="hidden" name="bridge" value="<?php echo $bridgeReference ?>" />
<?php foreach($anUse as $argName => $argDescription): ?>
<?php
$idArg = 'arg-' . $bridgeReference . '-' . $anUseNum . '-' . $argName;
?>
<label for="<?php echo $idArg ?>"><?php echo $argDescription ?></label><input id="<?php echo $idArg ?>" type="text" value="" name="<?php echo $argName ?>" /><br />
<?php endforeach; ?>
<button type="submit" name="format" value="json">Json</button>
<button type="submit" name="format" value="plaintext">Text</button>
<button type="submit" name="format" value="html">HTML</button>
<button type="submit" name="format" value="atom">ATOM</button>
</form>
</li>
<?php endforeach; ?>
</ol>
<?php else: ?>
<form method="POST" action="?">
<input type="hidden" name="action" value="create" />
<input type="hidden" name="bridge" value="<?php echo $bridgeReference ?>" />
<button type="submit" name="format" value="json">Json</button>
<button type="submit" name="format" value="plaintext">Text</button>
<button type="submit" name="format" value="html">HTML</button>
<button type="submit" name="format" value="atom">ATOM</button>
</form>
<?php endif; ?>
</div>
</li>
<?php endforeach; ?>
</ul>
</body>
</html>

View File

@ -7,32 +7,34 @@
ini_set('user_agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:20.0) Gecko/20100101 Firefox/20.0');
date_default_timezone_set('UTC');
error_reporting(0);
//ini_set('display_errors','1'); error_reporting(E_ALL); // For debugging only.
ini_set('display_errors','1'); error_reporting(E_ALL); // For debugging only.
define('CACHEDIR','cache/'); // Directory containing cache files. Do not forget trailing slash.
define('CHARSET', 'UTF-8');
define('SimpleDomLib', 'vendor/simplehtmldom/simple_html_dom.php');
ob_start();
// Create cache directory if it does not exist.
if (!is_dir(CACHEDIR)) { mkdir(CACHEDIR,0705); chmod(CACHEDIR,0705); }
// Import DOM library.
if (!file_exists('simple_html_dom.php'))
if (!file_exists(SimpleDomLib))
{
header('HTTP/1.1 500 Internal Server Error');
header('HTTP/1.1 500 Internal Server Error');
header('Content-Type: text/plain');
die('"PHP Simple HTML DOM Parser" is missing. Get it from http://simplehtmldom.sourceforge.net/ and place the script "simple_html_dom.php" in the same folder to allow me to work.');
die('"PHP Simple HTML DOM Parser" is missing. Get it from http://simplehtmldom.sourceforge.net and place the script "simple_html_dom.php" in the same folder to allow me to work.');
}
require_once('simple_html_dom.php');
require_once(SimpleDomLib);
/**
* Abstract RSSBridge class on which all bridges are build upon.
* It provides utility methods (cache, ATOM feed building...)
*/
abstract class RssBridgeAbstractClass
{
abstract class RssBridgeAbstractClass {
/**
* $items is an array of dictionnaries. Each subclass must fill this array when collectData() is called.
* eg. $items = Array( Array('uri'=>'http://foo.bar', 'title'=>'My beautiful foobar', 'content'='Hello, <b>world !</b>','timestamp'=>'1375864834'),
* Array('uri'=>'http://toto.com', 'title'=>'Welcome to toto', 'content'='What is this website about ?','timestamp'=>'1375868313')
* eg. $items = array( array('uri'=>'http://foo.bar', 'title'=>'My beautiful foobar', 'content'='Hello, <b>world !</b>','timestamp'=>'1375864834'),
* array('uri'=>'http://toto.com', 'title'=>'Welcome to toto', 'content'='What is this website about ?','timestamp'=>'1375868313')
* )
* Keys in dictionnaries:
* uri (string;mandatory) = The URI the item points to.
@ -42,82 +44,83 @@ abstract class RssBridgeAbstractClass
* Other keys can be added, but will be ignored.
* $items will be used to build the ATOM feed, json and other outputs.
*/
var $items;
public $items;
private $contentType; // MIME type returned to browser.
/**
* Sets the content-type returns to browser.
* Example: $this->setContentType('text/html; charset=UTF-8')
*
* @param string Content-type returns to browser - Example: $this->setContentType('text/html; charset=UTF-8')
* @return this
*/
private function setContentType($value)
{
private function setContentType($value){
$this->contentType = $value;
header('Content-Type: '.$value);
return $this;
}
/**
* collectData() will be called to ask the bridge to go collect data on the net.
* All derived classes must implement this method.
* This method must fill $this->items with collected items.
* Input: $request : The incoming request (=$_GET). This can be used or ignored by the bridge.
* @param mixed $request : The incoming request (=$_GET). This can be used or ignored by the bridge.
*/
abstract protected function collectData($request);
/**
* Returns a HTTP error to user, with a message.
* Example: $this->returnError('404 Not Found', 'ERROR: no results.');
* Example: $this->returnError(404, 'no results.');
* @param integer $code
* @param string $message
*/
protected function returnError($code, $message)
{
header("HTTP/1.1 $code"); header('Content-Type: text/plain;charset=UTF-8');
die($message);
protected function returnError($code, $message){
$errors = array(
400 => 'Bad Request',
404 => 'Not Found',
501 => 'Not Implemented',
);
header('HTTP/1.1 ' . $code . ( isset($errors[$code]) ? ' ' . $errors[$code] : ''));
header('Content-Type: text/plain;charset=' . CHARSET);
die('ERROR : ' . $message);
}
/**
* Builds an ATOM feed from $this->items and return it to browser.
*/
private function returnATOM()
{
$this->setContentType('application/atom+xml; charset=UTF-8');
private function returnATOM(){
$this->setContentType('application/atom+xml; charset=' . CHARSET);
$https = ( isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] == 'on' ? 's' : '' );
$httpHost = isset($_SERVER['HTTP_HOST']) ? $_SERVER['HTTP_HOST'] : '';
$httpInfo = isset($_SERVER['PATH_INFO']) ? $_SERVER['PATH_INFO'] : '';
echo '<?xml version="1.0" encoding="UTF-8"?><feed xmlns="http://www.w3.org/2005/Atom" xmlns:thr="http://purl.org/syndication/thread/1.0" xml:lang="en-US">'."\n";
echo '<title type="text">'.htmlspecialchars($this->bridgeName).'</title>'."\n";
echo '<id>http'.(isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] == 'on' ? 's' : '')."://{$_SERVER['HTTP_HOST']}{$_SERVER['PATH_INFO']}".'/</id>'."\n";
echo '<id>http' . $https . '://' . $httpHost . $httpInfo . './</id>'."\n";
echo '<updated></updated>'."\n"; // FIXME
echo '<link rel="alternate" type="text/html" href="'.htmlspecialchars($this->bridgeURI).'" />'."\n";
echo '<link rel="self" href="http'.(isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] == 'on' ? 's' : '')."://{$_SERVER['HTTP_HOST']}".htmlentities($_SERVER['REQUEST_URI']).'" />'."\n"."\n";
echo '<link rel="self" href="http'.$https.'://' . $httpHost . htmlentities($_SERVER['REQUEST_URI']).'" />'."\n"."\n";
foreach($this->items as $item) {
echo '<entry><author><name>'.htmlspecialchars($this->bridgeName).'</name><uri>'.htmlspecialchars($this->bridgeURI).'</uri></author>'."\n";
echo '<title type="html"><![CDATA['.$item['title'].']]></title>'."\n";
echo '<link rel="alternate" type="text/html" href="'.$item['uri'].'" />'."\n";
echo '<id>'.$item['uri'].'</id>'."\n";
if (isset($item['timestamp']))
{
echo '<updated>'.date(DATE_ATOM, $item['timestamp']).'</updated>'."\n";
}
else
{
echo '<updated></updated>'."\n";
}
if (isset($item['content']))
{
echo '<content type="html"><![CDATA['.$item['content'].']]></content>'."\n";
}
else
{
echo '<content type="html"></content>'."\n";
}
echo '<updated>' . ( isset($item['timestamp']) ? date(DATE_ATOM, $item['timestamp']) : '' ) . '</updated>'."\n";
echo '<content type="html">' . ( isset($item['content']) ? '<![CDATA[' . $item['content'] . ']]>' : '') . '</content>'."\n";
// FIXME: Security: Disable Javascript ?
echo '</entry>'."\n\n";
}
}
echo '</feed>';
}
private function returnHTML()
{
$this->setContentType('text/html; charset=UTF-8');
private function returnHTML(){
$this->setContentType('text/html; charset=' . CHARSET);
echo '<html><head><title>'.htmlspecialchars($this->bridgeName).'</title>';
echo '<style>body{font-family:"Trebuchet MS",Verdana,Arial,Helvetica,sans-serif;font-size:10pt;background-color:#aaa;}div.rssitem{border:1px solid black;padding:5px;margin:10px;background-color:#fff;}</style></head><body>';
echo '<h1>'.htmlspecialchars($this->bridgeName).'</h1>';
@ -134,8 +137,7 @@ abstract class RssBridgeAbstractClass
/**
* Builds a JSON string from $this->items and return it to browser.
*/
private function returnJSON()
{
private function returnJSON(){
$this->setContentType('application/json');
echo json_encode($this->items);
}
@ -143,25 +145,23 @@ abstract class RssBridgeAbstractClass
/**
* Returns $this->items as raw php data.
*/
private function returnPlaintext()
{
$this->setContentType('text/plain;charset=UTF-8');
private function returnPlaintext(){
$this->setContentType('text/plain;charset=' . CHARSET);
print_r($this->items);
}
/**
* Start processing request and return response to browser.
*/
public function process()
{
public function process(){
$this->serveCachedVersion();
// Cache file does not exists or has expired: We re-fetch the results and cache it.
$this->collectData($_GET);
if (empty($this->items)) { $this->returnError('404 Not Found', 'ERROR: no results.'); }
$this->collectData($_REQUEST);
$format = 'atom';
if (!empty($_GET['format'])) { $format = $_GET['format']; }
if (empty($this->items)) { $this->returnError(404, 'no results.'); }
$format = isset($_REQUEST['format']) ? $_REQUEST['format'] : 'atom';
switch($format) {
case 'plaintext':
$this->returnPlaintext();
@ -179,15 +179,23 @@ abstract class RssBridgeAbstractClass
$this->storeReponseInCache();
}
private function getCacheName(){
if( !isset($_REQUEST) ){
$this->returnError(501, 'WTF ?');
}
$stringToEncode = $_SERVER['REQUEST_URI'] . http_build_query($_REQUEST);
return CACHEDIR.hash('sha1',$stringToEncode).'.cache';
}
/**
* Returns the cached version of current request URI directly to the browser
* if it exists and if cache has not expired.
* Continues execution no cached version available.
*/
private function serveCachedVersion()
{
private function serveCachedVersion(){
// See if cache exists for this request
$cachefile = CACHEDIR.hash('sha1',$_SERVER['REQUEST_URI']).'.cache'; // Cache path and filename
$cachefile = $this->getCacheName(); // Cache path and filename
if (file_exists($cachefile)) { // The cache file exists.
if (time() - ($this->cacheDuration*60) < filemtime($cachefile)) { // Cache file has not expired. Serve it.
$data = json_decode(file_get_contents($cachefile),true);
@ -198,17 +206,16 @@ abstract class RssBridgeAbstractClass
}
}
}
/**
* Stores currently generated page in cache.
* @return this
*/
private function storeReponseInCache()
{
$cachefile = CACHEDIR.hash('sha1',$_SERVER['REQUEST_URI']).'.cache'; // Cache path and filename
$data = Array('data'=>ob_get_contents(), 'Content-Type'=>$this->contentType);
private function storeReponseInCache(){
$cachefile = $this->getCacheName(); // Cache path and filename
$data = array('data'=>ob_get_contents(), 'Content-Type'=>$this->contentType);
file_put_contents($cachefile,json_encode($data));
ob_end_flush();
return $this;
}
}
?>
}