[add] show favicon of site

[add] fetch and cache favicon
This commit is contained in:
Knah Tsaeb 2015-01-29 16:59:59 +01:00
parent f945bb9b05
commit 268682859a
5 changed files with 410 additions and 39 deletions

40
inc/DataAccess.php Normal file
View File

@ -0,0 +1,40 @@
<?php
namespace Favicon;
/**
* DataAccess is a wrapper used to read/write data locally or remotly
* Aside from SOLID principles, this wrapper is also useful to mock remote resources in unit tests
* Note: remote access warning are silenced because we don't care if a website is unreachable
**/
class DataAccess {
public function retrieveUrl($url) {
$this->set_context();
return @file_get_contents($url);
}
public function retrieveHeader($url) {
$this->set_context();
return @get_headers($url, TRUE);
}
public function saveCache($file, $data) {
file_put_contents($file, $data);
}
public function readCache($file) {
return file_get_contents($file);
}
private function set_context() {
stream_context_set_default(
array(
'http' => array(
'method' => 'GET',
'timeout' => 10,
'header' => "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:20.0; Favicon; +https://github.com/ArthurHoaro/favicon) Gecko/20100101 Firefox/32.0\r\n",
)
)
);
}
}

293
inc/Favicon.php Normal file
View File

@ -0,0 +1,293 @@
<?php
namespace Favicon;
class Favicon
{
protected $url = '';
protected $cacheDir;
protected $cacheTimeout;
protected $dataAccess;
public function __construct($args = array())
{
if (isset($args['url'])) {
$this->url = $args['url'];
}
$this->cacheDir = __DIR__ . '/../../resources/cache';
$this->dataAccess = new DataAccess();
}
public function cache($args = array()) {
if (isset($args['dir'])) {
$this->cacheDir = $args['dir'];
}
if (!empty($args['timeout'])) {
$this->cacheTimeout = $args['timeout'];
} else {
$this->cacheTimeout = 0;
}
}
public static function baseUrl($url, $path = false)
{
$return = '';
if (!$url = parse_url($url)) {
return FALSE;
}
// Scheme
$scheme = isset($url['scheme']) ? strtolower($url['scheme']) : null;
if ($scheme != 'http' && $scheme != 'https') {
return FALSE;
}
$return .= "{$scheme}://";
// Username and password
if (isset($url['user'])) {
$return .= $url['user'];
if (isset($url['pass'])) {
$return .= ":{$url['pass']}";
}
$return .= '@';
}
// Hostname
if( !isset($url['host']) ) {
return FALSE;
}
$return .= $url['host'];
// Port
if (isset($url['port'])) {
$return .= ":{$url['port']}";
}
// Path
if( $path && isset($url['path']) ) {
$return .= $url['path'];
}
$return .= '/';
return $return;
}
public function info($url)
{
if(empty($url) || $url === false) {
return false;
}
$max_loop = 5;
// Discover real status by following redirects.
$loop = TRUE;
while ($loop && $max_loop-- > 0) {
$headers = $this->dataAccess->retrieveHeader($url);
$exploded = explode(' ', $headers[0]);
if( !isset($exploded[1]) ) {
return false;
}
list(,$status) = $exploded;
switch ($status) {
case '301':
case '302':
$url = $headers['Location'];
break;
default:
$loop = FALSE;
break;
}
}
return array('status' => $status, 'url' => $url);
}
public function endRedirect($url) {
$out = $this->info($url);
return !empty($out['url']) ? $out['url'] : false;
}
/**
* Find remote (or cached) favicon
* @return favicon URL, false if nothing was found
**/
public function get($url = '')
{
// URLs passed to this method take precedence.
if (!empty($url)) {
$this->url = $url;
}
// Get the base URL without the path for clearer concatenations.
$original = rtrim($this->baseUrl($this->url, true), '/');
$url = rtrim($this->endRedirect($this->baseUrl($this->url, false)), '/');
if(($favicon = $this->checkCache($url)) || ($favicon = $this->getFavicon($url))) {
$base = true;
}
elseif(($favicon = $this->checkCache($original)) || ($favicon = $this->getFavicon($original, false))) {
$base = false;
}
else
return false;
// Save cache if necessary
$cache = $this->cacheDir . '/' . md5($base ? $url : $original);
if ($this->cacheTimeout && !file_exists($cache) || (is_writable($cache) && time() - filemtime($cache) > $this->cacheTimeout)) {
$this->dataAccess->saveCache($cache, $favicon);
}
return $favicon;
}
private function getFavicon($url, $checkDefault = true) {
$favicon = false;
if(empty($url)) {
return false;
}
// Try /favicon.ico first.
if( $checkDefault ) {
$info = $this->info("{$url}/favicon.ico");
if ($info['status'] == '200') {
$favicon = $info['url'];
}
}
// See if it's specified in a link tag in domain url.
if (!$favicon) {
$favicon = $this->getInPage($url);
}
// Make sure the favicon is an absolute URL.
if( $favicon && filter_var($favicon, FILTER_VALIDATE_URL) === false ) {
$favicon = $url . '/' . $favicon;
}
// Sometimes people lie, so check the status.
// And sometimes, it's not even an image. Sneaky bastards!
// If cacheDir isn't writable, that's not our problem
if ($favicon && is_writable($this->cacheDir) && !$this->checkImageMType($favicon)) {
$favicon = false;
}
return $favicon;
}
private function getInPage($url) {
$html = $this->dataAccess->retrieveUrl("{$url}/");
preg_match('!<head.*?>.*</head>!ims', $html, $match);
if(empty($match) || count($match) == 0) {
return false;
}
$head = $match[0];
$dom = new \DOMDocument();
// Use error supression, because the HTML might be too malformed.
if (@$dom->loadHTML($head)) {
$links = $dom->getElementsByTagName('link');
foreach ($links as $link) {
if ($link->hasAttribute('rel') && strtolower($link->getAttribute('rel')) == 'shortcut icon') {
return $link->getAttribute('href');
} elseif ($link->hasAttribute('rel') && strtolower($link->getAttribute('rel')) == 'icon') {
return $link->getAttribute('href');
} elseif ($link->hasAttribute('href') && strpos($link->getAttribute('href'), 'favicon') !== FALSE) {
return $link->getAttribute('href');
}
}
}
return false;
}
private function checkCache($url) {
if ($this->cacheTimeout) {
$cache = $this->cacheDir . '/' . md5($url);
if (file_exists($cache) && is_readable($cache) && (time() - filemtime($cache) < $this->cacheTimeout)) {
return $this->dataAccess->readCache($cache);
}
}
return false;
}
private function checkImageMType($url) {
$tmpFile = $this->cacheDir . '/tmp.ico';
$fileContent = $this->dataAccess->retrieveUrl($url);
$this->dataAccess->saveCache($tmpFile, $fileContent);
$finfo = finfo_open(FILEINFO_MIME_TYPE);
$isImage = strpos(finfo_file($finfo, $tmpFile), 'image') !== false;
finfo_close($finfo);
unlink($tmpFile);
return $isImage;
}
/**
* @return mixed
*/
public function getCacheDir()
{
return $this->cacheDir;
}
/**
* @param mixed $cacheDir
*/
public function setCacheDir($cacheDir)
{
$this->cacheDir = $cacheDir;
}
/**
* @return mixed
*/
public function getCacheTimeout()
{
return $this->cacheTimeout;
}
/**
* @param mixed $cacheTimeout
*/
public function setCacheTimeout($cacheTimeout)
{
$this->cacheTimeout = $cacheTimeout;
}
/**
* @return string
*/
public function getUrl()
{
return $this->url;
}
/**
* @param string $url
*/
public function setUrl($url)
{
$this->url = $url;
}
/**
* @param DataAccess $dataAccess
*/
public function setDataAccess($dataAccess)
{
$this->dataAccess = $dataAccess;
}
}

View File

@ -282,7 +282,7 @@ font-size:9pt;
.thumbnail { float:left; margin-right: 10px; }
.linkcontainer { position: static; margin-left:130px; }
*/
.favicon {width:16px;height:16px;margin-right:0.1em;}

View File

@ -22,9 +22,11 @@ $GLOBALS['config']['BAN_DURATION'] = 1800; // Ban duration for IP address after
$GLOBALS['config']['OPEN_SHAARLI'] = false; // If true, anyone can add/edit/delete links without having to login
$GLOBALS['config']['HIDE_TIMESTAMPS'] = false; // If true, the moment when links were saved are not shown to users that are not logged in.
$GLOBALS['config']['ENABLE_THUMBNAILS'] = true; // Enable thumbnails in links.
$GLOBALS['config']['ENABLE_FAVICON'] = true; // Enable favicon in links.
$GLOBALS['config']['CACHEDIR'] = 'cache'; // Cache directory for thumbnails for SLOW services (like flickr)
$GLOBALS['config']['PAGECACHE'] = 'pagecache'; // Page cache directory.
$GLOBALS['config']['ENABLE_LOCALCACHE'] = true; // Enable Shaarli to store thumbnail in a local cache. Disable to reduce webspace usage.
// Care if favicon is active and local cache are false serve page can be long
$GLOBALS['config']['PUBSUBHUB_URL'] = ''; // PubSubHubbub support. Put an empty string to disable, or put your hub url here to enable.
$GLOBALS['config']['UPDATECHECK_FILENAME'] = $GLOBALS['config']['DATADIR'].'/lastupdatecheck.txt'; // For updates check of Shaarli.
$GLOBALS['config']['UPDATECHECK_INTERVAL'] = 86400 ; // Updates check frequency for Shaarli. 86400 seconds=24 hours
@ -2052,6 +2054,42 @@ function lazyThumbnail($url,$href=false)
return $html;
}
function returnFavicon($url){
if(!$GLOBALS['config']['ENABLE_FAVICON']){
return;
}
$faviconHash = md5($url);
$path = substr($faviconHash, 0,2).'/'.substr($faviconHash, 2,2);
$faviconPath = $GLOBALS['config']['CACHEDIR'].'/'.$path.'/'.$faviconHash.'.ico';
if($GLOBALS['config']['ENABLE_LOCALCACHE'] === true && file_exists($faviconPath)){
$content = file_get_contents($faviconPath);
return '<img class="favicon" alt="favicon" src="data:image/ico;base64,'.base64_encode($content).'"/>';
}
if(file_exists($GLOBALS['config']['CACHEDIR'].'/'.$path.'/'.$faviconHash)){
return;
}
require_once 'inc/DataAccess.php';
require_once 'inc/Favicon.php';
$favicon = new \Favicon\Favicon();
$urlOfFavicon = $favicon->get($url);
if(!$urlOfFavicon){
if($GLOBALS['config']['ENABLE_LOCALCACHE'] === true){
mkdir($GLOBALS['config']['CACHEDIR'].'/'.$path,0777,true);
touch($GLOBALS['config']['CACHEDIR'].'/'.$path.'/'.$faviconHash);
return;
} else {
return;
}
}
if($GLOBALS['config']['ENABLE_LOCALCACHE'] === true && !is_dir($GLOBALS['config']['CACHEDIR'].'/'.$path.'/')){
mkdir($GLOBALS['config']['CACHEDIR'].'/'.$path,0777,true);
}
$content = file_get_contents($urlOfFavicon);
if($GLOBALS['config']['ENABLE_LOCALCACHE'] === true){
file_put_contents($faviconPath, $content);
}
return '<img class="favicon" alt="favicon" src="data:image/ico;base64,'.base64_encode($content).'"/>';
}
// -----------------------------------------------------------------------------------------------
// Installation

View File

@ -40,7 +40,7 @@
<input type="hidden" name="token" value="{$token}"><input type="hidden" name="delete_link"><input type="image" alt="Delete" src="images/delete_icon.png#" title="Delete" class="button_delete" onClick="return confirmDeleteLink();"></form>
</div>
{/if}
<span class="linktitle"><a href="{$redirector}{$value.url|htmlspecialchars}">{$value.title|htmlspecialchars}</a></span>
<span class="linktitle">{function="returnFavicon($value.url)"}<a href="{$redirector}{$value.url|htmlspecialchars}">{$value.title|htmlspecialchars}</a></span>
<br>
{if="$value.description"}<div class="linkdescription"{if condition="$search_type=='permalink'"} style="max-height:none !important;"{/if}>{$value.description}</div>{/if}
{if="!$GLOBALS['config']['HIDE_TIMESTAMPS'] || isLoggedIn()"}