Rss-Bridge/bridges/KununuBridge.php
logmanoriginal 50eee7e7b3 [KununuBridge] Add feed item limit
This bridge currently takes a very long time to process
all news items on the page, when in many cases only one
or two had been added since the last check.

This commit adds a new parameter 'limit', which defines
the maximum number of items to add to the feed. This is
an optional paramter that defaults to 3.
2019-11-01 15:27:35 +01:00

243 lines
6.5 KiB
PHP

<?php
class KununuBridge extends BridgeAbstract {
const MAINTAINER = 'logmanoriginal';
const NAME = 'Kununu Bridge';
const URI = 'https://www.kununu.com/';
const CACHE_TIMEOUT = 86400; // 24h
const DESCRIPTION = 'Returns the latest reviews for a company and site of your choice.';
const PARAMETERS = array(
'global' => array(
'site' => array(
'name' => 'Site',
'type' => 'list',
'title' => 'Select your site',
'values' => array(
'Austria' => 'at',
'Germany' => 'de',
'Switzerland' => 'ch',
'United States' => 'us'
)
),
'full' => array(
'name' => 'Load full article',
'type' => 'checkbox',
'exampleValue' => 'checked',
'title' => 'Activate to load full article'
),
'include_ratings' => array(
'name' => 'Include ratings',
'type' => 'checkbox',
'title' => 'Activate to include ratings in the feed'
),
'include_benefits' => array(
'name' => 'Include benefits',
'type' => 'checkbox',
'title' => 'Activate to include benefits in the feed'
),
'limit' => array(
'name' => 'Limit',
'type' => 'number',
'defaultValue' => 3,
'title' => "Maximum number of items to return in the feed.\n0 = unlimited"
)
),
array(
'company' => array(
'name' => 'Company',
'required' => true,
'exampleValue' => 'kununu-us',
'title' => 'Insert company name (i.e. Kununu US) or URI path (i.e. kununu-us)'
)
)
);
private $companyName = '';
public function getURI(){
if(!is_null($this->getInput('company')) && !is_null($this->getInput('site'))) {
$company = $this->fixCompanyName($this->getInput('company'));
$site = $this->getInput('site');
$section = '';
switch($site) {
case 'at':
case 'de':
case 'ch':
$section = 'kommentare';
break;
case 'us':
$section = 'reviews';
break;
}
return self::URI . $site . '/' . $company . '/' . $section . '?sort=update_time_desc';
}
return parent::getURI();
}
public function getName(){
if(!is_null($this->getInput('company'))) {
$company = $this->fixCompanyName($this->getInput('company'));
return ($this->companyName ?: $company) . ' - ' . self::NAME;
}
return parent::getName();
}
public function getIcon() {
return 'https://www.kununu.com/favicon-196x196.png';
}
public function collectData(){
$full = $this->getInput('full');
// Load page
$html = getSimpleHTMLDOM($this->getURI())
or returnServerError('Unable to receive data from ' . $this->getURI() . '!');
$html = defaultLinkTo($html, static::URI);
// Update name for this request
$company = $html->find('span[class="company-name"]', 0)
or returnServerError('Cannot find company name!');
$this->companyName = $company->innertext;
// Find the section with all the panels (reviews)
$section = $html->find('section.kununu-scroll-element', 0)
or returnServerError('Unable to find panel section!');
// Find all articles (within the panels)
$articles = $section->find('article')
or returnServerError('Unable to find articles!');
$limit = $this->getInput('limit') ?: 0;
// Go through all articles
foreach($articles as $article) {
$anchor = $article->find('h1.review-title a', 0)
or returnServerError('Cannot find article URI!');
$date = $article->find('meta[itemprop=dateCreated]', 0)
or returnServerError('Cannot find article date!');
$rating = $article->find('span.rating', 0)
or returnServerError('Cannot find article rating!');
$summary = $article->find('[itemprop=name]', 0)
or returnServerError('Cannot find article summary!');
$item = array();
$item['author'] = $this->extractArticleAuthorPosition($article);
$item['timestamp'] = strtotime($date->content);
$item['title'] = $rating->getAttribute('aria-label')
. ' : '
. strip_tags($summary->innertext);
$item['uri'] = $anchor->href;
if($full) {
$item['content'] = $this->extractFullDescription($item['uri']);
} else {
$item['content'] = $this->extractArticleDescription($article);
}
$this->items[] = $item;
if ($limit > 0 && count($this->items) >= $limit) break;
}
}
/*
* Returns a fixed version of the provided company name
*/
private function fixCompanyName($company){
$company = trim($company);
$company = str_replace(' ', '-', $company);
$company = strtolower($company);
$umlauts = Array('/ä/','/ö/','/ü/','/Ä/','/Ö/','/Ü/','/ß/');
$replace = Array('ae','oe','ue','Ae','Oe','Ue','ss');
return preg_replace($umlauts, $replace, $company);
}
/**
* Returns the position of the author from a given article
*/
private function extractArticleAuthorPosition($article){
// We need to parse the user-content manually
$user_content = $article->find('div.user-content', 0)
or returnServerError('Cannot find user content!');
// Go through all h2 elements to find index of required span (I know... it's stupid)
$author_position = 'Unknown';
foreach($user_content->find('div') as $content) {
if(stristr(strtolower($content->plaintext), 'position')) { /* This works for at, ch, de, us */
$author_position = $content->next_sibling()->plaintext;
break;
}
}
return $author_position;
}
/**
* Returns the description from a given article
*/
private function extractArticleDescription($article){
$description = $article->find('[itemprop=reviewBody]', 0)
or returnServerError('Cannot find article description!');
$retVal = $description->innertext;
if($this->getInput('include_ratings')
&& ($ratings = $article->find('.review-ratings .rating-group'))) {
$retVal .= (empty($retVal) ? '' : '<hr>') . '<table>';
foreach($ratings as $rating) {
$retVal .= <<<EOD
<tr>
<td>{$rating->find('.rating-title', 0)->plaintext}
<td>{$rating->find('.rating-badge', 0)->plaintext}
</tr>
EOD;
}
$retVal .= '</table>';
}
if($this->getInput('include_benefits')
&& ($benefits = $article->find('benefit'))) {
$retVal .= (empty($retVal) ? '' : '<hr>') . '<ul>';
foreach($benefits as $benefit) {
$retVal .= "<li>{$benefit->plaintext}</li>";
}
$retVal .= '</ul>';
}
return $retVal;
}
/**
* Returns the full description from a given uri
*/
private function extractFullDescription($uri){
// Load full article
$html = getSimpleHTMLDOMCached($uri)
or returnServerError('Could not load full description!');
$html = defaultLinkTo($html, static::URI);
// Find the article
$article = $html->find('article', 0)
or returnServerError('Cannot find article!');
// Luckily they use the same layout for the review overview and full article pages :)
return $this->extractArticleDescription($article);
}
}