diff --git a/0.1/README.md b/0.1/README.md new file mode 100755 index 0000000..d68b214 --- /dev/null +++ b/0.1/README.md @@ -0,0 +1,18 @@ +Projet Autoblog serie 0.1 +============== + +Auteur: Sebastien Sauvage + +Licence: Domaine Public + +- À propos du Projet Autoblog + +lire: http://sebsauvage.net/streisand.me/fr/ + +- Contraintes techniques + +voir: http://sebsauvage.net/streisand.me/fr/tech.html + +- Instructions + +Personnalisez vvb.ini. Envoyez index.php et vvb.ini sur votre site web, dans le répertoire de votre choix. Terminé ! diff --git a/0.1/index.php b/0.1/index.php new file mode 100755 index 0000000..7fe2161 --- /dev/null +++ b/0.1/index.php @@ -0,0 +1,403 @@ += 0) { libxml_disable_entity_loader(true); } + +$CONFIG=parse_ini_file('vvb.ini') or die('Missing or bad config file vvb.ini'); // Read config file. +$CONFIG['ARTICLES_PER_PAGE']=10; +$CONFIG['DOWNLOAD_MEDIA_TYPES']=array('jpeg','jpg','gif','png','pdf','txt','odt'); // Media types which will be downloaded. +$CONFIG['MEDIA_TO_DOWNLOAD']=array(); // List of media to download in background. +// ================================================================================================== +/* Callback for the preg_replace_callback() function in remapImageUrls() which remaps URLs to point to local cache. + (src=... and href=...) */ +function remap_callback($matches) +{ + global $CONFIG; + $attr = $matches[1]; $url = $matches[2]; $srchost=parse_url($url,PHP_URL_HOST); + if (!mediaAuthorized($url)) { return $attr.'="'.$url.'"'; } // Not authorized: do not remap URL. + if (!file_exists('media/'.sanitize($url)) ) { $CONFIG['MEDIA_TO_DOWNLOAD'][] = $url; } // If media not present in the cache, add URL to list of media to download in background. + return $attr.'="?m='.$url.'"'; // Return remapped URL. +} + +/* Remaps image URL to point to local cache (src= and href=) +eg. src="http://toto.com/..." --> src="?m=http://toto.com/..." +*/ +function remapImageUrls($html) +{ + return preg_replace_callback("@(src|href)=[\"\'](.+?)[\"\']@i",'remap_callback',$html); +} + +/* updateFeed(): Update articles database from a RSS2.0 feed. + Articles deleted from the feed are not deleted from the database. + You can force the refresh by passing ?force_the_refresh in URL. +*/ +function updateFeed() +{ + global $CONFIG; + // Only update feed if last check was > 60 minutes + // but you can force it with force_the_refresh in GET parameters. + if (@filemtime('store')>time()-(3600) && !isset($_GET['force_the_refresh'])) { return; } + + // Read database from disk + $feed_items=(file_exists('store') ? unserialize(file_get_contents('store')) : array() ); + + // Read the feed and update the database. + $xml = simplexml_load_file($CONFIG['FEED_URL']); + if (isset($xml->entry)) // ATOM feed. + { + foreach ($xml->entry as $item) + { + $pubDate=$item->published; if (!$pubDate) { $pubDate=$item->updated; } + $i=array('title'=>strval($item->title),'link'=>strval($item->link['href']),'guid'=>strval($item->id),'pubDate'=>strval($pubDate), + 'description'=>'','content'=>remapImageUrls(strval($item->content))); + $i['dateiso'] = date('Ymd_His', strtotime($i['pubDate'])); + $feed_items[$i['dateiso']] = $i; + } + } + elseif (isset($xml->item)) // RSS 1.0 /RDF + { + foreach ($xml->item as $item) + { + $guid =$item->attributes('http://www.w3.org/1999/02/22-rdf-syntax-ns#')->about; + $date =$item->children('http://purl.org/dc/elements/1.1/')->date; + $content = $item->children('http://purl.org/rss/1.0/modules/content/'); + $i=array('title'=>strval($item->title),'link'=>strval($item->link),'guid'=>strval($guid),'pubDate'=>strval($date), + 'description'=>strval($item->description),'content'=>remapImageUrls(strval($content))); + $i['dateiso'] = date('Ymd_His', strtotime($i['pubDate'])); + $feed_items[$i['dateiso']] = $i; + } + } + elseif (isset($xml->channel->item)) // RSS 2.0 + { + foreach ($xml->channel->item as $item) + { + $content = strval($item->children('http://purl.org/rss/1.0/modules/content/')); // Get + if (!$content) { $content = strval($item->description); } // Some feeds put content in the description. + $pubDate = $item->pubDate; + if (!$pubDate) { $pubDate=$item->children('http://purl.org/dc/elements/1.1/')->date; } // To read the tag content. + $i=array('title'=>strval($item->title),'link'=>strval($item->link),'guid'=>strval($item->guid),'pubDate'=>strval($pubDate), + 'description'=>strval($item->description),'content'=>remapImageUrls($content)); + $i['dateiso'] = date('Ymd_His', strtotime($i['pubDate'])); + $feed_items[$i['dateiso']] = $i; + } + } + krsort($feed_items); // Sort array, latest articles first. + file_put_contents('store', serialize($feed_items)); // Write database to disk +} + +/* feed(): Returns the feed as an associative array (latest articles first). + Key is timestamp in compact iso format (eg. '20110628_073208') + Value is an associative array (title,link,content,pubDate...) +*/ +function feed() +{ + $data=file_get_contents('store'); + if ($data===FALSE) { $feed_items=array(); } else { $feed_items = unserialize($data); } + return $feed_items; +} + +/* Remove accents (é-->e) */ +function replace_accents($str) { + $str = htmlentities($str, ENT_COMPAT, "UTF-8"); + $str = preg_replace('/&([a-zA-Z])(uml|acute|grave|circ|tilde);/','$1',$str); + return html_entity_decode($str); +} + +// Sanitize strings for use in filename or URLs +function sanitize($name) +{ + $fname=replace_accents($name); + $replace="_"; + $pattern="/([[:alnum:]_\.-]*)/"; // The autorized characters. + $fname=str_replace(str_split(preg_replace($pattern,$replace,$fname)),$replace,$fname); + return $fname; +} + +// Tells if a string start with a substring or not. +function startsWith($haystack,$needle,$case=true) { + if($case){return (strcmp(substr($haystack, 0, strlen($needle)),$needle)===0);} + return (strcasecmp(substr($haystack, 0, strlen($needle)),$needle)===0); +} +// Tells if a string ends with a substring or not. +function endsWith($haystack,$needle,$case=true) { + if($case){return (strcmp(substr($haystack, strlen($haystack) - strlen($needle)),$needle)===0);} + return (strcasecmp(substr($haystack, strlen($haystack) - strlen($needle)),$needle)===0); +} + +/* Returns the CSS stylesheet to include in HTML document */ +function css() +{ + return << + + +HTML; +} + +/* Render a single article + $article : the article itself (associative array with title,pubDate,content,dateiso keys.) +*/ +function renderArticle($article) +{ + echo '
'; + echo '

'.$article['title'].'

'.$article['pubDate']; + if ($article['link']!='') { echo ' - (source)'; } + echo '
'.$article['content'].'
'; + echo '
'; +} + +function rssHeaderLink() { return ''; } +function searchForm() { return ''; } +function powered() { return '
Powered by VroumVroumBlog 0.1.32 - RSS Feed
Download config articles
'; } +function canonical_metatag($url) { return ''; } + +/* Show a single article + $articleid = article identifier (eg.'20110629_010334') +*/ +function showArticle($articleid) +{ + global $CONFIG; + header('Content-Type: text/html; charset=utf-8'); + $feed=feed();if (!array_key_exists($articleid,$feed)) { die('Article not found.'); } + $a=$feed[$articleid]; + echo ''.$a['title'].' - '.$CONFIG['SITE_TITLE'].''.canonical_metatag($a['link']).css().rssHeaderLink().''; + echo '

'.$CONFIG['SITE_TITLE'].'

'.$CONFIG['SITE_DESCRIPTION'].searchForm().'
'; + renderArticle($a); + echo ''.powered().''; +} + +/* Show a list of articles, starting at a specific page. + $page = start page. First page is page 1. +*/ +function showArticles($page) +{ + global $CONFIG; + header('Content-Type: text/html; charset=utf-8'); + $feed=feed(); + $keys=array_keys($feed); + echo ''.$CONFIG['SITE_TITLE'].''.canonical_metatag($CONFIG['SITE_URL']).css().rssHeaderLink().''; + echo '

'.$CONFIG['SITE_TITLE'].'

'.$CONFIG['SITE_DESCRIPTION'].searchForm().'
'; + $i = ($page-1)*$CONFIG['ARTICLES_PER_PAGE']; // Start index. + $end = $i+$CONFIG['ARTICLES_PER_PAGE']; + while ($i<$end && $i
'; + if ($i!=count($keys)) { echo ''; } + echo ''; + if ($page>1) { echo ''; } + echo '
'.powered().''; +} + +/* Search for text in articles content and title. + $textpage = text to search. +*/ +function search($text) +{ + global $CONFIG; + header('Content-Type: text/html; charset=utf-8'); + $txt = urldecode($text); + echo ''.$CONFIG['SITE_TITLE'].''.css().rssHeaderLink().''; + echo '

'.$CONFIG['SITE_TITLE'].'

'.$CONFIG['SITE_DESCRIPTION'].searchForm().'
'; + echo '
Search for '.htmlspecialchars($txt).' :
'; + $feed=feed(); + foreach($feed as $article) + { + if (stripos($article['content'],$txt) || stripos($article['title'],$txt)) { renderArticle($article); } + } + echo ''.powered().''; +} + +/* Tells if a media URL should be downloaded or not. + Input: $url = absolute URL of a media (jpeg,pdf...) + Output: true= can download. false= should not download (wrong host, wrong file extension) */ +function mediaAuthorized($url) +{ + global $CONFIG; + $goodhost=false; $srchost=parse_url($url,PHP_URL_HOST); + foreach( explode(',',$CONFIG['DOWNLOAD_MEDIA_FROM']) as $host) // Does the URL point to an authorized host ? + { if ($srchost==$host) { $goodhost=true; } } + if (!$goodhost) { return false; } // Wrong host. + $ext = pathinfo($url, PATHINFO_EXTENSION); // Get file extension (eg.'png','gif'...) + if (!in_array(strtolower($ext),$CONFIG['DOWNLOAD_MEDIA_TYPES'])) { return false; } // Not in authorized file extensions. + return true; +} + +// Returns the MIME type corresponding to a file extension. +// (I do not trust mime_content_type() because of some dodgy hosting providers with ill-configured magic.mime file.) +function mime_type($filename) +{ + $MIME_TYPES=array('.jpg'=>'image/jpeg','.jpeg'=>'image/jpeg','.png'=>'image/png','.gif'=>'image/gif', + '.txt'=>'text/plain','.odt'=>'application/vnd.oasis.opendocument.text'); + foreach($MIME_TYPES as $extension=>$mime_type) { if (endswith($filename,$extension,false)) { return $mime_type; } } + return 'application/octet-stream'; // For an unkown extension. +} +// Returns a media from the local cache (and download it if not available). +function showMedia($imgurl) +{ + if (!mediaAuthorized($imgurl)) { header('HTTP/1.1 404 Not Found'); return; } + downloadMedia($imgurl); // Will only download if necessary. + $filename = 'media/'.sanitize($imgurl); + header('Content-Type: '.mime_type($filename)); + readfile($filename); +} + +// Download a media to local cache (if necessary) +function downloadMedia($imgurl) +{ + $filename = 'media/'.sanitize($imgurl); + if (!file_exists($filename) ) // Only download image if not present + { + if (!is_dir('media')) { mkdir('media',0705); file_put_contents('media/index.html',' '); } + file_put_contents($filename, file_get_contents($imgurl,NULL, NULL, 0, 4000000)); // We download at most 4 Mb from source. + } +} + +/* Output the whole feed in RSS 2.0 format with article content (BIG!) */ +function outputFeed() +{ + global $CONFIG; + header('Content-Type: application/xhtml+xml; charset=utf-8'); + echo ''; + echo ''.htmlspecialchars($CONFIG['SITE_TITLE']).''.htmlspecialchars($CONFIG['SITE_URL']).''; + echo ''.htmlspecialchars($CONFIG['SITE_URL']).''."\n\n"; + $feed=feed(); + foreach($feed as $a) + { + echo ''.$a['title'].''.$a['guid'].'http://'.$_SERVER["HTTP_HOST"].$_SERVER["SCRIPT_NAME"].'?'.$a['dateiso'].'_'.sanitize($a['title']).''.$a['pubDate'].''; + echo ''."\n\n"; + } + echo ''; +} + +// ================================================================================================== +// Update feed if necessary. (you can force refresh with ?force_the_refresh in URL) +updateFeed(); + +// Handle media download requests (eg. http://myserver.com/?m=http___anotherserver.net_images_myimage.jpg) +if (startswith($_SERVER["QUERY_STRING"],'m=')) { showMedia(substr($_SERVER["QUERY_STRING"],2)); } + +// Handle single article URI (eg. http://myserver.com/?20110506_224455-chit-chat) +elseif (preg_match('/^(\d{8}_\d{6})/',$_SERVER["QUERY_STRING"],$matches)) { showArticle($matches[1]); } + +// Handle page URI (eg. http://myserver.com/?page5) +elseif (preg_match('/^page(\d+)/',$_SERVER["QUERY_STRING"],$matches)) { showArticles($matches[1]); } + +// Handle RSS 2.0 feed request (http://myserver.com/?feed) +elseif (startswith($_SERVER["QUERY_STRING"],'feed')) { outputFeed(); } + +// Handle search request (eg. http://myserver.com/?s=tuto4pc) +elseif (startswith($_SERVER["QUERY_STRING"],'s=')) { search(substr($_SERVER["QUERY_STRING"],2)); } + +// Nothing ? Then render page1. +else { showArticles(1); } + +// Force flush, rendered page is fully sent to browser. +ob_end_flush(); +flush(); + +// Now we've finised rendering the page and sending to the user, +// it's time for some background tasks: Are there media to download ? +foreach($CONFIG['MEDIA_TO_DOWNLOAD'] as $url) { downloadMedia($url); } + +exit; +?> \ No newline at end of file diff --git a/0.1/vvb.ini b/0.1/vvb.ini new file mode 100644 index 0000000..60ebaa3 --- /dev/null +++ b/0.1/vvb.ini @@ -0,0 +1,6 @@ +[VroumVroumBlogConfig] +SITE_TITLE="Autoblog de Sebsauvage" +SITE_DESCRIPTION="Ce site n'est pas le site officiel de Sebsauvage
C'est un blog automatisé qui réplique les articles de sebsauvage.net" +SITE_URL=http://sebsauvage.net/rhaa/ +FEED_URL=http://sebsauvage.net/rhaa/rss_fulltext.php +DOWNLOAD_MEDIA_FROM=sebsauvage.net diff --git a/0.2/README.md b/0.2/README.md new file mode 100755 index 0000000..1042354 --- /dev/null +++ b/0.2/README.md @@ -0,0 +1,22 @@ +Projet Autoblog serie 0.2 +============== + +Auteurs: BohwaZ (VVB) & Arthur Hoaro, Mitsukarenai, Oros (index ferme d'autoblogs) + +Licence: Domaine Public + +- À propos du Projet Autoblog + +lire: http://sebsauvage.net/streisand.me/fr/ + +- Présentation et Instructions pour VVB 0.2 (par BohwaZ) + +voir: http://blogs.kd2.org/bohwaz/?2011/07/14/369-auto-blog-vroumvroumblog-et-effet-streisand + +- Présentation et Instructions pour la ferme d'autoblogs (par Arthur Hoaro) + +voir: http://wiki.hoa.ro/doku.php?id=web%3Aferme-autoblog + +- Améliorations pour la ferme d'autoblogs et XSAF (par Mitsukarenai et Oros) + +voir: https://www.suumitsu.eu/2012/08/autoblogs-petites-ameliorations/ diff --git a/autoblog.php b/0.2/autoblog.php similarity index 100% rename from autoblog.php rename to 0.2/autoblog.php diff --git a/automicroblog.php b/0.2/automicroblog.php similarity index 100% rename from automicroblog.php rename to 0.2/automicroblog.php diff --git a/config.php b/0.2/config.php similarity index 100% rename from config.php rename to 0.2/config.php diff --git a/_experimental/icon-logo.svg b/0.2/icon-logo.svg similarity index 100% rename from _experimental/icon-logo.svg rename to 0.2/icon-logo.svg diff --git a/index.php b/0.2/index.php similarity index 100% rename from index.php rename to 0.2/index.php diff --git a/xsaf2.php b/0.2/xsaf2.php similarity index 100% rename from xsaf2.php rename to 0.2/xsaf2.php diff --git a/0.3-beta/README.md b/0.3-beta/README.md new file mode 100755 index 0000000..9976a3a --- /dev/null +++ b/0.3-beta/README.md @@ -0,0 +1,18 @@ +Projet Autoblog serie 0.3 +============== + +PHASE BETA ! "git pullez" souvent, et merci pour vos rapports de bugs. + +Auteurs: Mitsu (https://www.suumitsu.eu/) & Oros (https://www.ecirtam.net/) + +Licence: Domaine Public + +- À propos du Projet Autoblog + +lire: http://sebsauvage.net/streisand.me/fr/ + +Instructions + +- uploader les fichiers sur un serveur avec PHP 5.3+ + +- ..c'est tout. Hackez le code pour apprendre comment ça marche et comment le personnaliser :) diff --git a/_experimental/autoblog-0.3.php b/0.3-beta/autoblog.php old mode 100644 new mode 100755 similarity index 98% rename from _experimental/autoblog-0.3.php rename to 0.3-beta/autoblog.php index 1856f9a..edcae2d --- a/_experimental/autoblog-0.3.php +++ b/0.3-beta/autoblog.php @@ -3,11 +3,6 @@ VroumVroumBlog 0.3.0 This blog automatically publishes articles from an external RSS 2.0 or ATOM feed. - Installation: - - copy this script (index.php) to a directory on your webserver. - - optionnaly copy the database ('articles.db'). Otherwise, it will be created automatically. - - tweak setting in vvb.ini - Requirement for the source RSS feed: - Source feed MUST be a valid RSS 2.0, RDF 1.0 or ATOM 1.0 feed. - Source feed MUST be valid UTF-8 @@ -802,7 +797,7 @@ else echo ' -Propulsé par Ferme d'Autoblogs 0.3.0 de Mitsu et Oros (Domaine Public) +Propulsé par Projet Autoblog 0.3 de Mitsu et Oros (Domaine Public) ".$HTML_footer; } ?> diff --git a/_experimental/xsaf3.php b/0.3-beta/xsaf3.php similarity index 99% rename from _experimental/xsaf3.php rename to 0.3-beta/xsaf3.php index 37f8a22..4ad9e0e 100755 --- a/_experimental/xsaf3.php +++ b/0.3-beta/xsaf3.php @@ -1,9 +1,8 @@