diff --git a/.gitignore b/.gitignore index e14b1fbd..f9dc7066 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ ################# ## Eclipse ################# -simple_html_dom.php +vendor/* data/ *.pydevproject .project diff --git a/README.md b/README.md index 5ba12f66..5b07fb36 100644 --- a/README.md +++ b/README.md @@ -6,36 +6,30 @@ rss-bridge is a collection of independant php scripts capable of generating ATOM Supported sites/pages === - * `rss-bridge-flickr-explore.php` : [Latest interesting images](http://www.flickr.com/explore) from Flickr. - * `rss-bridge-googlesearch.php` : Most recent results from Google Search. Parameters: + * `FlickrExplore` : [Latest interesting images](http://www.flickr.com/explore) from Flickr. + * `GoogleSearch` : Most recent results from Google Search. Parameters: * q=keyword : Keyword search. - * `rss-bridge-twitter.php` : Twitter. Parameters: + * `Twitter` : Twitter. Parameters: * q=keyword : Keyword search. * u=username : Get user timeline. +Easy new bridge system (detail below) ! + Output format === Output format can be used in any rss-bridge: - * `format=atom` (default): ATOM Feed. - * `format=json` : jSon - * `format=html` : html page - * `format=plaintext` : raw text (php object, as returned by print_r) - -If format is not specified, ATOM format will be used. - -Examples -=== - * `rss-bridge-twitter.php?u=Dinnerbone` : Get Dinnerbone (Minecraft developer) timeline, in ATOM format. - * `rss-bridge-twitter.php?q=minecraft&format=html` : Everything Minecraft from Twitter, in html format. - * `rss-bridge-flickr-explore.php` : Latest interesting images from Flickr, in ATOM format. - + * `Atom` : ATOM Feed. + * `Json` : Json + * `Html` : html page + * `Plaintext` : raw text (php object, as returned by print_r) Requirements === * php 5.3 - * [PHP Simple HTML DOM Parser](http://simplehtmldom.sourceforge.net/) + * [PHP Simple HTML DOM Parser](http://simplehtmldom.sourceforge.net) + * Ssl lib activated in PHP config Author === @@ -43,6 +37,9 @@ I'm sebsauvage, webmaster of [sebsauvage.net](http://sebsauvage.net), author of Thanks to [Mitsukarenai](https://github.com/Mitsukarenai) for the inspiration. +Patch : +- Yves ASTIER (Draeli) : PHP optimizations, fixes, dynamic brigde/format list with all stuff behind and extend cache system. Mail : contact@yves-astier.com + Licence === Code is public domain. @@ -51,7 +48,7 @@ Code is public domain. Technical notes === * There is a cache so that source services won't ban you even if you hammer the rss-bridge with requests. Each bridge has a different duration for the cache. The `cache` subdirectory will be automatically created. You can purge it whenever you want. - * To implement a new rss-bridge, import `rss-bridge-lib.php` and subclass `RssBridgeAbstractClass`. Look at existing bridges for examples. For items you generate in `$this->items`, only `uri` and `title` are mandatory in each item. `timestamp` and `content` are optional but recommended. Any additional key will be ignored by ATOM feed (but outputed to jSon). + * To implement a new rss-bridge, create a new class in `bridges` directory and extends with `BridgeAbstract`. Look at existing bridges for examples. For items you generate in `$this->items`, only `uri` and `title` are mandatory in each item. `timestamp` and `content` are optional but recommended. Any additional key will be ignored by ATOM feed (but outputed to jSon). If you want your new bridge appear in `index.php`, don't forget add annotation. Rant === diff --git a/bridges/FlickrExploreBridge.php b/bridges/FlickrExploreBridge.php new file mode 100644 index 00000000..c3b4976d --- /dev/null +++ b/bridges/FlickrExploreBridge.php @@ -0,0 +1,35 @@ +returnError('Could not request Flickr.', 404); + + foreach($html->find('span.photo_container') as $element) { + $item = new \Item(); + $item->uri = 'http://flickr.com'.$element->find('a',0)->href; + $item->thumbnailUri = $element->find('img',0)->getAttribute('data-defer-src'); + $item->content = ''; // FIXME: Filter javascript ? + $item->title = $element->find('a',0)->title; + $this->items[] = $item; + } + } + + public function getName(){ + return 'Flickr Explore'; + } + + public function getURI(){ + return 'http://www.flickr.com/explore'; + } + + public function getCacheDuration(){ + return 21600; // 6 hours + } +} \ No newline at end of file diff --git a/bridges/GoogleSearchBridge.php b/bridges/GoogleSearchBridge.php new file mode 100644 index 00000000..79fcc40d --- /dev/null +++ b/bridges/GoogleSearchBridge.php @@ -0,0 +1,51 @@ +returnError('No results for this query.', 404); + } + else{ + $this->returnError('You must specify a keyword (?q=...).', 400); + } + + $emIsRes = $html->find('div[id=ires]',0); + if( !is_null($emIsRes) ){ + foreach($emIsRes->find('li[class=g]') as $element) { + $item = new \Item(); + $item->uri = $element->find('a[href]',0)->href; + $item->title = $element->find('h3',0)->plaintext; + $item->content = $element->find('span[class=st]',0)->plaintext; + $this->items[] = $item; + } + } + } + + public function getName(){ + return 'Google search'; + } + + public function getURI(){ + return 'http://google.com'; + } + + public function getCacheDuration(){ + return 1800; // 30 minutes + } +} \ No newline at end of file diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php new file mode 100644 index 00000000..53766f13 --- /dev/null +++ b/bridges/TwitterBridge.php @@ -0,0 +1,50 @@ +returnError('No results for this query.', 404); + } + elseif (isset($param['u'])) { /* user timeline mode */ + $html = file_get_html('http://twitter.com/'.urlencode($param['u'])) or $this->returnError('Requested username can\'t be found.', 404); + } + else { + $this->returnError('You must specify a keyword (?q=...) or a Twitter username (?u=...).', 400); + } + + foreach($html->find('div.tweet') as $tweet) { + $item = new \Item(); + $item->username = trim(substr($tweet->find('span.username', 0)->plaintext, 1)); // extract username and sanitize + $item->fullname = $tweet->getAttribute('data-name'); // extract fullname (pseudonym) + $item->avatar = $tweet->find('img', 0)->src; // get avatar link + $item->id = $tweet->getAttribute('data-tweet-id'); // get TweetID + $item->uri = 'https://twitter.com'.$tweet->find('a.details', 0)->getAttribute('href'); // get tweet link + $item->timestamp = $tweet->find('span._timestamp', 0)->getAttribute('data-time'); // extract tweet timestamp + $item->content = str_replace('href="/', 'href="https://twitter.com/', strip_tags($tweet->find('p.tweet-text', 0)->innertext, '')); // extract tweet text + $item->title = $item->fullname . ' (@'. $item->username . ') | ' . $item->content; + $this->items[] = $item; + } + } + + public function getName(){ + return 'Twitter Bridge'; + } + + public function getURI(){ + return 'http://twitter.com'; + } + + public function getCacheDuration(){ + return 300; // 5 minutes + } +} \ No newline at end of file diff --git a/caches/FileCache.php b/caches/FileCache.php new file mode 100644 index 00000000..594343d3 --- /dev/null +++ b/caches/FileCache.php @@ -0,0 +1,92 @@ +isPrepareCache(); + + $datas = json_decode(file_get_contents($this->getCacheFile()),true); + $items = array(); + foreach($datas as $aData){ + $item = new \Item(); + foreach($aData as $name => $value){ + $item->$name = $value; + } + $items[] = $item; + } + + return $items; + } + + public function saveData($datas){ + $this->isPrepareCache(); + + file_put_contents($this->getCacheFile(), json_encode($datas)); + + return $this; + } + + public function getTime(){ + $this->isPrepareCache(); + + $cacheFile = $this->getCacheFile(); + if( file_exists($cacheFile) ){ + return filemtime($cacheFile); + } + + return false; + } + + /** + * Cache is prepared ? + * Note : Cache name is based on request information, then cache must be prepare before use + * @return \Exception|true + */ + protected function isPrepareCache(){ + if( is_null($this->param) ){ + throw new \Exception('Please feed "prepare" method before try to load'); + } + + return true; + } + + /** + * Return cache path (and create if not exist) + * @return string Cache path + */ + protected function getCachePath(){ + $cacheDir = __DIR__ . '/../cache/'; // FIXME : configuration ? + + // FIXME : implement recursive dir creation + if( is_null($this->cacheDirCreated) && !is_dir($cacheDir) ){ + $this->cacheDirCreated = true; + + mkdir($cacheDir,0705); + chmod($cacheDir,0705); + } + + return $cacheDir; + } + + /** + * Get the file name use for cache store + * @return string Path to the file cache + */ + protected function getCacheFile(){ + return $this->getCachePath() . $this->getCacheName(); + } + + /** + * Determines file name for store the cache + * return string + */ + protected function getCacheName(){ + $this->isPrepareCache(); + + $stringToEncode = $_SERVER['REQUEST_URI'] . http_build_query($this->param); + return hash('sha1', $stringToEncode) . '.cache'; + } +} \ No newline at end of file diff --git a/formats/AtomFormat.php b/formats/AtomFormat.php new file mode 100644 index 00000000..2df9090c --- /dev/null +++ b/formats/AtomFormat.php @@ -0,0 +1,79 @@ +getExtraInfos(); + $title = htmlspecialchars($extraInfos['name']); + $uri = htmlspecialchars($extraInfos['uri']); + + $entries = ''; + foreach($this->getDatas() as $data){ + $entryName = is_null($data->name) ? $title : $data->name; + $entryAuthor = is_null($data->author) ? $uri : $data->author; + $entryTitle = is_null($data->title) ? '' : $data->title; + $entryUri = is_null($data->uri) ? '' : $data->uri; + $entryTimestamp = is_null($data->timestamp) ? '' : date(DATE_ATOM, $data->timestamp); + $entryContent = is_null($data->content) ? '' : 'content) . ']]>'; + + $entries .= << + + {$entryName} + {$entryAuthor} + + <![CDATA[{$entryTitle}]]> + + {$entryUri} + {$entryTimestamp} + {$entryContent} + + +EOD; + } + + /* + TODO : + - Security: Disable Javascript ? + - : Define new extra info ? + - : RFC look with xhtml, keep this in spite of ? + */ + + /* Data are prepared, now let's begin the "MAGIE !!!" */ + $toReturn = ''; + $toReturn .= << + + {$title} + http{$https}://{$httpHost}{$httpInfo}/ + + + +{$entries} + +EOD; + + return $toReturn; + } + + public function display(){ + // $this + // ->setContentType('application/atom+xml; charset=' . $this->getCharset()) + // ->callContentType(); + + return parent::display(); + } +} \ No newline at end of file diff --git a/formats/HtmlFormat.php b/formats/HtmlFormat.php new file mode 100644 index 00000000..86c61737 --- /dev/null +++ b/formats/HtmlFormat.php @@ -0,0 +1,62 @@ +getExtraInfos(); + $title = htmlspecialchars($extraInfos['name']); + $uri = htmlspecialchars($extraInfos['uri']); + + $entries = ''; + foreach($this->getDatas() as $data){ + $entryUri = is_null($data->uri) ? $uri : $data->uri; + $entryTitle = is_null($data->title) ? '' : htmlspecialchars(strip_tags($data->title)); + $entryTimestamp = is_null($data->timestamp) ? '' : '' . date(DATE_ATOM, $data->timestamp) . ''; + $entryContent = is_null($data->content) ? '' : '

' . $data->content . '

'; + + $entries .= << +

{$entryTitle}

+ {$entryTimestamp} + {$entryContent} + + +EOD; + } + + $styleCss = <<<'EOD' +body{font-family:"Trebuchet MS",Verdana,Arial,Helvetica,sans-serif;font-size:10pt;background-color:#aaa;}div.rssitem{border:1px solid black;padding:5px;margin:10px;background-color:#fff;} +EOD; + + /* Data are prepared, now let's begin the "MAGIE !!!" */ + $toReturn = << + + {$title} + + + +

{$title}

+{$entries} + + +EOD; + + return $toReturn; + } + + public function display(){ + $this + ->setContentType('text/html; charset=' . $this->getCharset()) + ->callContentType(); + + return parent::display(); + } +} \ No newline at end of file diff --git a/formats/JsonFormat.php b/formats/JsonFormat.php new file mode 100644 index 00000000..400f91f7 --- /dev/null +++ b/formats/JsonFormat.php @@ -0,0 +1,24 @@ +items and return it to browser. +* +* @name Json +*/ +class JsonFormat extends FormatAbstract{ + + public function stringify(){ + // FIXME : sometime content can be null, transform to empty string + $datas = $this->getDatas(); + + return json_encode($datas); + } + + public function display(){ + $this + ->setContentType('application/json') + ->callContentType(); + + return parent::display(); + } +} \ No newline at end of file diff --git a/formats/PlaintextFormat.php b/formats/PlaintextFormat.php new file mode 100644 index 00000000..32b4e020 --- /dev/null +++ b/formats/PlaintextFormat.php @@ -0,0 +1,22 @@ +items as raw php data. +* +* @name Plaintext +*/ +class PlaintextFormat extends FormatAbstract{ + + public function stringify(){ + $datas = $this->getDatas(); + return print_r($datas, true); + } + + public function display(){ + $this + ->setContentType('text/plain;charset=' . $this->getCharset()) + ->callContentType(); + + return parent::display(); + } +} \ No newline at end of file diff --git a/index.php b/index.php new file mode 100644 index 00000000..4c5ba76e --- /dev/null +++ b/index.php @@ -0,0 +1,172 @@ +setCache($cache) // Comment this lign for avoid cache use + ->setDatas($_REQUEST); + + // Data transformation + $format = Format::create($format); + $format + ->setDatas($bridge->getDatas()) + ->setExtraInfos(array( + 'name' => $bridge->getName(), + 'uri' => $bridge->getURI(), + )) + ->display(); + die; + } + break; + } + } +} +catch(HttpException $e){ + header('HTTP/1.1 ' . $e->getCode() . ' ' . Http::getMessageForCode($e->getCode())); + header('Content-Type: text/plain'); + die($e->getMessage()); +} +catch(\Exception $e){ + die($e->getMessage()); +} + +function getHelperButtonFormat($value, $name){ + return ''; +} + +$bridges = Bridge::searchInformation(); +$formats = Format::searchInformation(); +?> + + + + + Rss-bridge - Create your own network ! + + + + + +
+

RSS-Bridge

+ +

+ RSS-Bridge +

+
+ + \ No newline at end of file diff --git a/lib/Bridge.php b/lib/Bridge.php new file mode 100644 index 00000000..7b6c4751 --- /dev/null +++ b/lib/Bridge.php @@ -0,0 +1,187 @@ +items; + } + + /** + * Defined datas with parameters depending choose bridge + * Note : you can defined a cache before with "setCache" + * @param array $param $_REQUEST, $_GET, $_POST, or array with bridge expected paramters + */ + public function setDatas(array $param){ + if( !is_null($this->cache) ){ + $this->cache->prepare($param); + $time = $this->cache->getTime(); + } + else{ + $time = false; // No cache ? No time ! + } + + if( $time !== false && ( time() - $this->getCacheDuration() < $time ) ){ // Cache file has not expired. Serve it. + $this->items = $this->cache->loadData(); + } + else{ + $this->collectData($param); + + if( !is_null($this->cache) ){ // Cache defined ? We go to refresh is memory :D + $this->cache->saveData($this->getDatas()); + } + } + } + + /** + * Define default duraction for cache + */ + public function getCacheDuration(){ + return 3600; + } + + /** + * Defined cache object to use + */ + public function setCache(\CacheAbstract $cache){ + $this->cache = $cache; + + return $this; + } +} + +class Bridge{ + + static protected $dirBridge; + + public function __construct(){ + throw new \LogicException('Please use ' . __CLASS__ . '::create for new object.'); + } + + /** + * Create a new bridge object + * @param string $nameBridge Defined bridge name you want use + * @return Bridge object dedicated + */ + static public function create($nameBridge){ + if( !static::isValidNameBridge($nameBridge) ){ + throw new \InvalidArgumentException('Name bridge must be at least one uppercase follow or not by alphanumeric or dash characters.'); + } + + $pathBridge = self::getDir() . $nameBridge . '.php'; + + if( !file_exists($pathBridge) ){ + throw new \Exception('The bridge you looking for does not exist.'); + } + + require_once $pathBridge; + + return new $nameBridge(); + } + + static public function setDir($dirBridge){ + if( !is_string($dirBridge) ){ + throw new \InvalidArgumentException('Dir bridge must be a string.'); + } + + if( !file_exists($dirBridge) ){ + throw new \Exception('Dir bridge does not exist.'); + } + + self::$dirBridge = $dirBridge; + } + + static public function getDir(){ + $dirBridge = self::$dirBridge; + + if( is_null($dirBridge) ){ + throw new \LogicException(__CLASS__ . ' class need to know bridge path !'); + } + + return $dirBridge; + } + + static public function isValidNameBridge($nameBridge){ + return preg_match('@^[A-Z][a-zA-Z0-9-]*$@', $nameBridge); + } + + /** + * Read bridge dir and catch informations about each bridge depending annotation + * @return array Informations about each bridge + */ + static public function searchInformation(){ + $pathDirBridge = self::getDir(); + + $listBridge = array(); + + $searchCommonPattern = array('description', 'name'); + + $dirFiles = scandir($pathDirBridge); + if( $dirFiles !== false ){ + foreach( $dirFiles as $fileName ){ + if( preg_match('@([^.]+)\.php@U', $fileName, $out) ){ // Is PHP file ? + $infos = array(); // Information about the bridge + $resParse = token_get_all(file_get_contents($pathDirBridge . $fileName)); // Parse PHP file + foreach($resParse as $v){ + if( is_array($v) && $v[0] == T_DOC_COMMENT ){ // Lexer node is COMMENT ? + $commentary = $v[1]; + foreach( $searchCommonPattern as $name){ // Catch information with common pattern + preg_match('#@' . preg_quote($name, '#') . '\s+(.+)#', $commentary, $outComment); + if( isset($outComment[1]) ){ + $infos[$name] = $outComment[1]; + } + } + + preg_match_all('#@use(?[1-9][0-9]*)\s?\((?.+)\)(?:\r|\n)#', $commentary, $outComment); // Catch specific information about "use". + if( isset($outComment['args']) && is_array($outComment['args']) ){ + $infos['use'] = array(); + foreach($outComment['args'] as $num => $args){ // Each use + preg_match_all('#(?[a-z]+)="(?.*)"(?:,|$)#U', $args, $outArg); // Catch arguments for current use + if( isset($outArg['name']) ){ + $usePos = $outComment['num'][$num]; // Current use name + if( !isset($infos['use'][$usePos]) ){ // Not information actually for this "use" ? + $infos['use'][$usePos] = array(); + } + + foreach($outArg['name'] as $numArg => $name){ // Each arguments + $infos['use'][$usePos][$name] = $outArg['value'][$numArg]; + } + } + } + } + } + } + + if( isset($infos['name']) ){ // If informations containt at least a name + $listBridge[$out[1]] = $infos; + } + } + } + } + + return $listBridge; + } +} \ No newline at end of file diff --git a/lib/Cache.php b/lib/Cache.php new file mode 100644 index 00000000..9bf8ffa8 --- /dev/null +++ b/lib/Cache.php @@ -0,0 +1,72 @@ +param = $param; + + return $this; + } +} + +class Cache{ + + static protected $dirCache; + + public function __construct(){ + throw new \LogicException('Please use ' . __CLASS__ . '::create for new object.'); + } + + static public function create($nameCache){ + if( !static::isValidNameCache($nameCache) ){ + throw new \InvalidArgumentException('Name cache must be at least one uppercase follow or not by alphanumeric or dash characters.'); + } + + $pathCache = self::getDir() . $nameCache . '.php'; + + if( !file_exists($pathCache) ){ + throw new \Exception('The cache you looking for does not exist.'); + } + + require_once $pathCache; + + return new $nameCache(); + } + + static public function setDir($dirCache){ + if( !is_string($dirCache) ){ + throw new \InvalidArgumentException('Dir cache must be a string.'); + } + + if( !file_exists($dirCache) ){ + throw new \Exception('Dir cache does not exist.'); + } + + self::$dirCache = $dirCache; + } + + static public function getDir(){ + $dirCache = self::$dirCache; + + if( is_null($dirCache) ){ + throw new \LogicException(__CLASS__ . ' class need to know cache path !'); + } + + return $dirCache; + } + + static public function isValidNameCache($nameCache){ + return preg_match('@^[A-Z][a-zA-Z0-9-]*$@', $nameCache); + } +} \ No newline at end of file diff --git a/lib/Exceptions.php b/lib/Exceptions.php new file mode 100644 index 00000000..202060f6 --- /dev/null +++ b/lib/Exceptions.php @@ -0,0 +1,61 @@ + 'OK', + 201 => 'Created', + 202 => 'Accepted', + 300 => 'Multiple Choices', + 301 => 'Moved Permanently', + 302 => 'Moved Temporarily', + 307 => 'Temporary Redirect', + 310 => 'Too many Redirects', + 400 => 'Bad Request', + 401 => 'Unauthorized', + 402 => 'Payment Required', + 403 => 'Forbidden', + 404 => 'Not Found', + 405 => 'Method Not', + 406 => 'Not Acceptable', + 407 => 'Proxy Authentication Required', + 408 => 'Request Time-out', + 409 => 'Conflict', + 410 => 'Gone', + 411 => 'Length Required', + 412 => 'Precondition Failed', + 413 => 'Request Entity Too Large', + 414 => 'Request-URI Too Long', + 415 => 'Unsupported Media Type', + 416 => 'Requested range unsatisfiable', + 417 => 'Expectation failed', + 500 => 'Internal Server Error', + 501 => 'Not Implemented', + 502 => 'Bad Gateway', + 503 => 'Service Unavailable', + 504 => 'Gateway Time-out', + 508 => 'Loop detected', + ); + } +} \ No newline at end of file diff --git a/lib/Format.php b/lib/Format.php new file mode 100644 index 00000000..70c8d7ab --- /dev/null +++ b/lib/Format.php @@ -0,0 +1,183 @@ +charset = $charset; + + return $this; + } + + public function getCharset(){ + $charset = $this->charset; + + return is_null($charset) ? self::DEFAULT_CHARSET : $charset; + } + + protected function setContentType($contentType){ + $this->contentType = $contentType; + + return $this; + } + + protected function callContentType(){ + header('Content-Type: ' . $this->contentType); + } + + public function display(){ + echo $this->stringify(); + + return $this; + } + + public function setDatas(array $datas){ + $this->datas = $datas; + + return $this; + } + + public function getDatas(){ + if( !is_array($this->datas) ){ + throw new \LogicException('Feed the ' . get_class($this) . ' with "setDatas" method before !'); + } + + return $this->datas; + } + + /** + * Define common informations can be required by formats and set default value for unknow values + * @param array $extraInfos array with know informations (there isn't merge !!!) + * @return this + */ + public function setExtraInfos(array $extraInfos = array()){ + foreach(array('name', 'uri') as $infoName){ + if( !isset($extraInfos[$infoName]) ){ + $extraInfos[$infoName] = ''; + } + } + + $this->extraInfos = $extraInfos; + + return $this; + } + + /** + * Return extra infos + * @return array See "setExtraInfos" detail method to know what extra are disponibles + */ + public function getExtraInfos(){ + if( is_null($this->extraInfos) ){ // No extra info ? + $this->setExtraInfos(); // Define with default value + } + + return $this->extraInfos; + } +} + +class Format{ + + static protected $dirFormat; + + public function __construct(){ + throw new \LogicException('Please use ' . __CLASS__ . '::create for new object.'); + } + + static public function create($nameFormat){ + if( !static::isValidNameFormat($nameFormat) ){ + throw new \InvalidArgumentException('Name format must be at least one uppercase follow or not by alphabetic characters.'); + } + + $pathFormat = self::getDir() . $nameFormat . '.php'; + + if( !file_exists($pathFormat) ){ + throw new \Exception('The format you looking for does not exist.'); + } + + require_once $pathFormat; + + return new $nameFormat(); + } + + static public function setDir($dirFormat){ + if( !is_string($dirFormat) ){ + throw new \InvalidArgumentException('Dir format must be a string.'); + } + + if( !file_exists($dirFormat) ){ + throw new \Exception('Dir format does not exist.'); + } + + self::$dirFormat = $dirFormat; + } + + static public function getDir(){ + $dirFormat = self::$dirFormat; + + if( is_null($dirFormat) ){ + throw new \LogicException(__CLASS__ . ' class need to know format path !'); + } + + return $dirFormat; + } + + static public function isValidNameFormat($nameFormat){ + return preg_match('@^[A-Z][a-zA-Z]*$@', $nameFormat); + } + + /** + * Read format dir and catch informations about each format depending annotation + * @return array Informations about each format + */ + static public function searchInformation(){ + $pathDirFormat = self::getDir(); + + $listFormat = array(); + + $searchCommonPattern = array('name'); + + $dirFiles = scandir($pathDirFormat); + if( $dirFiles !== false ){ + foreach( $dirFiles as $fileName ){ + if( preg_match('@([^.]+)\.php@U', $fileName, $out) ){ // Is PHP file ? + $infos = array(); // Information about the bridge + $resParse = token_get_all(file_get_contents($pathDirFormat . $fileName)); // Parse PHP file + foreach($resParse as $v){ + if( is_array($v) && $v[0] == T_DOC_COMMENT ){ // Lexer node is COMMENT ? + $commentary = $v[1]; + foreach( $searchCommonPattern as $name){ // Catch information with common pattern + preg_match('#@' . preg_quote($name, '#') . '\s+(.+)#', $commentary, $outComment); + if( isset($outComment[1]) ){ + $infos[$name] = $outComment[1]; + } + } + } + } + + if( isset($infos['name']) ){ // If informations containt at least a name + $listFormat[$out[1]] = $infos; + } + } + } + } + + return $listFormat; + } +} \ No newline at end of file diff --git a/lib/Item.php b/lib/Item.php new file mode 100644 index 00000000..806ecca8 --- /dev/null +++ b/lib/Item.php @@ -0,0 +1,16 @@ +$name = $value; + } + + public function __get($name){ + return isset($this->$name) ? $this->$name : null; + } +} \ No newline at end of file diff --git a/lib/RssBridge.php b/lib/RssBridge.php new file mode 100644 index 00000000..a7fea279 --- /dev/null +++ b/lib/RssBridge.php @@ -0,0 +1,42 @@ +collectData($_REQUEST); + + // Data transformation + Format::setDir(__DIR__ . '/formats/'); + $format = Format::create('Atom'); + $format + ->setDatas($bridge->getDatas()) + ->setExtraInfos(array( + 'name' => $bridge->getName(), + 'uri' => $bridge->getURI(), + )) + ->display(); + +*/ \ No newline at end of file diff --git a/rss-bridge-flickr-explore.php b/rss-bridge-flickr-explore.php deleted file mode 100644 index e7f153eb..00000000 --- a/rss-bridge-flickr-explore.php +++ /dev/null @@ -1,29 +0,0 @@ -returnError('404 Not Found', 'ERROR: could not request Flickr.'); - $this->items = Array(); - foreach($html->find('span.photo_container') as $element) { - $item['uri'] = 'http://flickr.com'.$element->find('a',0)->href; - $item['thumbnailUri'] = $element->find('img',0)->getAttribute('data-defer-src'); - $item['content'] = ''; // FIXME: Filter javascript ? - $item['title'] = $element->find('a',0)->title; - $this->items[] = $item; - } - } -} - -$bridge = new RssBridgeFlickrExplore(); -$bridge->process(); -?> \ No newline at end of file diff --git a/rss-bridge-googlesearch.php b/rss-bridge-googlesearch.php deleted file mode 100644 index c19a6ca3..00000000 --- a/rss-bridge-googlesearch.php +++ /dev/null @@ -1,41 +0,0 @@ -returnError('404 Not Found', 'ERROR: no results for this query.'); - } else { - $this->returnError('400 Bad Request', 'ERROR: You must specify a keyword (?q=...).'); - } - $this->items = Array(); - foreach($html->find('div[id=ires]',0)->find('li[class=g]') as $element) { - $item['uri'] = $element->find('a[href]',0)->href; - $item['title'] = $element->find('h3',0)->plaintext; - $item['content'] = $element->find('span[class=st]',0)->plaintext; - $this->items[] = $item; - } - } -} - -$bridge = new RssBridgeGoogleSearch(); -$bridge->process(); -?> \ No newline at end of file diff --git a/rss-bridge-lib.php b/rss-bridge-lib.php deleted file mode 100644 index b4ff7f40..00000000 --- a/rss-bridge-lib.php +++ /dev/null @@ -1,214 +0,0 @@ -'http://foo.bar', 'title'=>'My beautiful foobar', 'content'='Hello, world !','timestamp'=>'1375864834'), - * Array('uri'=>'http://toto.com', 'title'=>'Welcome to toto', 'content'='What is this website about ?','timestamp'=>'1375868313') - * ) - * Keys in dictionnaries: - * uri (string;mandatory) = The URI the item points to. - * title (string;mandatory) = Title of item - * content (string;optionnal) = item content (usually HTML code) - * timestamp (string;optionnal) = item date. Must be in EPOCH format. - * Other keys can be added, but will be ignored. - * $items will be used to build the ATOM feed, json and other outputs. - */ - var $items; - - private $contentType; // MIME type returned to browser. - - /** - * Sets the content-type returns to browser. - * Example: $this->setContentType('text/html; charset=UTF-8') - */ - private function setContentType($value) - { - $this->contentType = $value; - header('Content-Type: '.$value); - } - - /** - * collectData() will be called to ask the bridge to go collect data on the net. - * All derived classes must implement this method. - * This method must fill $this->items with collected items. - * Input: $request : The incoming request (=$_GET). This can be used or ignored by the bridge. - */ - abstract protected function collectData($request); - - /** - * Returns a HTTP error to user, with a message. - * Example: $this->returnError('404 Not Found', 'ERROR: no results.'); - */ - protected function returnError($code, $message) - { - header("HTTP/1.1 $code"); header('Content-Type: text/plain;charset=UTF-8'); - die($message); - } - - /** - * Builds an ATOM feed from $this->items and return it to browser. - */ - private function returnATOM() - { - $this->setContentType('application/atom+xml; charset=UTF-8'); - echo ''."\n"; - echo ''.htmlspecialchars($this->bridgeName).''."\n"; - echo 'http'.(isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] == 'on' ? 's' : '')."://{$_SERVER['HTTP_HOST']}{$_SERVER['PATH_INFO']}".'/'."\n"; - echo ''."\n"; // FIXME - echo ''."\n"; - echo ''."\n"."\n"; - - foreach($this->items as $item) { - echo ''.htmlspecialchars($this->bridgeName).''.htmlspecialchars($this->bridgeURI).''."\n"; - echo '<![CDATA['.$item['title'].']]>'."\n"; - echo ''."\n"; - echo ''.$item['uri'].''."\n"; - if (isset($item['timestamp'])) - { - echo ''.date(DATE_ATOM, $item['timestamp']).''."\n"; - - } - else - { - echo ''."\n"; - } - if (isset($item['content'])) - { - echo ''."\n"; - } - else - { - echo ''."\n"; - } - // FIXME: Security: Disable Javascript ? - echo ''."\n\n"; - } - echo ''; - } - - private function returnHTML() - { - $this->setContentType('text/html; charset=UTF-8'); - echo ''.htmlspecialchars($this->bridgeName).''; - echo ''; - echo '

'.htmlspecialchars($this->bridgeName).'

'; - foreach($this->items as $item) { - echo '

'.htmlspecialchars(strip_tags($item['title'])).'

'; - if (isset($item['timestamp'])) { echo ''.date(DATE_ATOM, $item['timestamp']).''; } - if (isset($item['content'])) { echo '

'.$item['content'].'

'; } - - echo "
\n\n"; - } - echo ''; - } - - /** - * Builds a JSON string from $this->items and return it to browser. - */ - private function returnJSON() - { - $this->setContentType('application/json'); - echo json_encode($this->items); - } - - /** - * Returns $this->items as raw php data. - */ - private function returnPlaintext() - { - $this->setContentType('text/plain;charset=UTF-8'); - print_r($this->items); - } - - /** - * Start processing request and return response to browser. - */ - public function process() - { - $this->serveCachedVersion(); - - // Cache file does not exists or has expired: We re-fetch the results and cache it. - $this->collectData($_GET); - if (empty($this->items)) { $this->returnError('404 Not Found', 'ERROR: no results.'); } - - $format = 'atom'; - if (!empty($_GET['format'])) { $format = $_GET['format']; } - switch($format) { - case 'plaintext': - $this->returnPlaintext(); - break; - case 'json': - $this->returnJSON(); - break; - case 'html': - $this->returnHTML(); - break; - default: - $this->returnATOM(); - } - - $this->storeReponseInCache(); - } - - /** - * Returns the cached version of current request URI directly to the browser - * if it exists and if cache has not expired. - * Continues execution no cached version available. - */ - private function serveCachedVersion() - { - // See if cache exists for this request - $cachefile = CACHEDIR.hash('sha1',$_SERVER['REQUEST_URI']).'.cache'; // Cache path and filename - if (file_exists($cachefile)) { // The cache file exists. - if (time() - ($this->cacheDuration*60) < filemtime($cachefile)) { // Cache file has not expired. Serve it. - $data = json_decode(file_get_contents($cachefile),true); - header('Content-Type: '.$data['Content-Type']); // Send proper MIME Type - header('X-Cached-Version: '.date(DATE_ATOM, filemtime($cachefile))); - echo $data['data']; - exit(); - } - } - } - - /** - * Stores currently generated page in cache. - */ - private function storeReponseInCache() - { - $cachefile = CACHEDIR.hash('sha1',$_SERVER['REQUEST_URI']).'.cache'; // Cache path and filename - $data = Array('data'=>ob_get_contents(), 'Content-Type'=>$this->contentType); - file_put_contents($cachefile,json_encode($data)); - ob_end_flush(); - } -} - -?> \ No newline at end of file diff --git a/rss-bridge-twitter.php b/rss-bridge-twitter.php deleted file mode 100644 index b08ac0d9..00000000 --- a/rss-bridge-twitter.php +++ /dev/null @@ -1,40 +0,0 @@ -returnError('404 Not Found', 'ERROR: no results for this query.'); - } elseif (isset($request['u'])) { /* user timeline mode */ - $html = file_get_html('http://twitter.com/'.urlencode($request['u'])) or $this->returnError('404 Not Found', 'ERROR: requested username can\'t be found.'); - } else { - $this->returnError('400 Bad Request', 'ERROR: You must specify a keyword (?q=...) or a Twitter username (?u=...).'); - } - $this->items = Array(); - foreach($html->find('div.tweet') as $tweet) { - $item['username'] = trim(substr($tweet->find('span.username', 0)->plaintext, 1)); // extract username and sanitize - $item['fullname'] = $tweet->getAttribute('data-name'); // extract fullname (pseudonym) - $item['avatar'] = $tweet->find('img', 0)->src; // get avatar link - $item['id'] = $tweet->getAttribute('data-tweet-id'); // get TweetID - $item['uri'] = 'https://twitter.com'.$tweet->find('a.details', 0)->getAttribute('href'); // get tweet link - $item['timestamp'] = $tweet->find('span._timestamp', 0)->getAttribute('data-time'); // extract tweet timestamp - $item['content'] = str_replace('href="/', 'href="https://twitter.com/', strip_tags($tweet->find('p.tweet-text', 0)->innertext, '')); // extract tweet text - $item['title'] = $item['fullname'] . ' (@'.$item['username'] . ') | ' . $item['content']; - $this->items[] = $item; - } - } -} - -$bridge = new RssBridgeTwitter(); -$bridge->process(); -?> \ No newline at end of file