[KununuBridge] Fix broken bridge and simplify implementation
This commit is contained in:
parent
811e8d8c88
commit
e6476a600d
1 changed files with 51 additions and 100 deletions
|
@ -64,7 +64,7 @@ class KununuBridge extends BridgeAbstract {
|
||||||
return parent::getURI();
|
return parent::getURI();
|
||||||
}
|
}
|
||||||
|
|
||||||
function getName(){
|
public function getName(){
|
||||||
if(!is_null($this->getInput('company'))) {
|
if(!is_null($this->getInput('company'))) {
|
||||||
$company = $this->fixCompanyName($this->getInput('company'));
|
$company = $this->fixCompanyName($this->getInput('company'));
|
||||||
return ($this->companyName ?: $company) . ' - ' . self::NAME;
|
return ($this->companyName ?: $company) . ' - ' . self::NAME;
|
||||||
|
@ -73,52 +73,67 @@ class KununuBridge extends BridgeAbstract {
|
||||||
return parent::getName();
|
return parent::getName();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function getIcon() {
|
||||||
|
return 'https://www.kununu.com/favicon-196x196.png';
|
||||||
|
}
|
||||||
|
|
||||||
public function collectData(){
|
public function collectData(){
|
||||||
$full = $this->getInput('full');
|
$full = $this->getInput('full');
|
||||||
|
|
||||||
// Load page
|
// Load page
|
||||||
$html = getSimpleHTMLDOMCached($this->getURI());
|
$html = getSimpleHTMLDOM($this->getURI())
|
||||||
if(!$html)
|
or returnServerError('Unable to receive data from ' . $this->getURI() . '!');
|
||||||
returnServerError('Unable to receive data from ' . $this->getURI() . '!');
|
|
||||||
|
$html = defaultLinkTo($html, static::URI);
|
||||||
|
|
||||||
// Update name for this request
|
// Update name for this request
|
||||||
$this->companyName = $this->extractCompanyName($html);
|
$company = $html->find('span[class="company-name"]', 0)
|
||||||
|
or returnServerError('Cannot find company name!');
|
||||||
|
|
||||||
|
$this->companyName = $company->innertext;
|
||||||
|
|
||||||
// Find the section with all the panels (reviews)
|
// Find the section with all the panels (reviews)
|
||||||
$section = $html->find('section.kununu-scroll-element', 0);
|
$section = $html->find('section.kununu-scroll-element', 0)
|
||||||
if($section === false)
|
or returnServerError('Unable to find panel section!');
|
||||||
returnServerError('Unable to find panel section!');
|
|
||||||
|
|
||||||
// Find all articles (within the panels)
|
// Find all articles (within the panels)
|
||||||
$articles = $section->find('article');
|
$articles = $section->find('article')
|
||||||
if($articles === false || empty($articles))
|
or returnServerError('Unable to find articles!');
|
||||||
returnServerError('Unable to find articles!');
|
|
||||||
|
|
||||||
// Go through all articles
|
// Go through all articles
|
||||||
foreach($articles as $article) {
|
foreach($articles as $article) {
|
||||||
|
|
||||||
|
$anchor = $article->find('h1.review-title a', 0)
|
||||||
|
or returnServerError('Cannot find article URI!');
|
||||||
|
|
||||||
|
$date = $article->find('meta[itemprop=dateCreated]', 0)
|
||||||
|
or returnServerError('Cannot find article date!');
|
||||||
|
|
||||||
|
$rating = $article->find('span.rating', 0)
|
||||||
|
or returnServerError('Cannot find article rating!');
|
||||||
|
|
||||||
|
$summary = $article->find('[itemprop=name]', 0)
|
||||||
|
or returnServerError('Cannot find article summary!');
|
||||||
|
|
||||||
$item = array();
|
$item = array();
|
||||||
|
|
||||||
$item['author'] = $this->extractArticleAuthorPosition($article);
|
$item['author'] = $this->extractArticleAuthorPosition($article);
|
||||||
$item['timestamp'] = $this->extractArticleDate($article);
|
$item['timestamp'] = strtotime($date);
|
||||||
$item['title'] = $this->extractArticleRating($article)
|
$item['title'] = $rating->getAttribute('aria-label')
|
||||||
. ' : '
|
. ' : '
|
||||||
. $this->extractArticleSummary($article);
|
. strip_tags($summary->innertext);
|
||||||
|
|
||||||
$item['uri'] = $this->extractArticleUri($article);
|
$item['uri'] = $anchor->href;
|
||||||
|
|
||||||
if($full)
|
if($full) {
|
||||||
$item['content'] = $this->extractFullDescription($item['uri']);
|
$item['content'] = $this->extractFullDescription($item['uri']);
|
||||||
else
|
} else {
|
||||||
$item['content'] = $this->extractArticleDescription($article);
|
$item['content'] = $this->extractArticleDescription($article);
|
||||||
|
}
|
||||||
|
|
||||||
$this->items[] = $item;
|
$this->items[] = $item;
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
}
|
||||||
* Fixes relative URLs in the given text
|
|
||||||
*/
|
|
||||||
private function fixUrl($text){
|
|
||||||
return preg_replace('/href=(\'|\")\//i', 'href="'.self::URI, $text);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -128,73 +143,11 @@ class KununuBridge extends BridgeAbstract {
|
||||||
$company = trim($company);
|
$company = trim($company);
|
||||||
$company = str_replace(' ', '-', $company);
|
$company = str_replace(' ', '-', $company);
|
||||||
$company = strtolower($company);
|
$company = strtolower($company);
|
||||||
return $this->encodeUmlauts($company);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Encodes unmlauts in the given text
|
|
||||||
*/
|
|
||||||
private function encodeUmlauts($text){
|
|
||||||
$umlauts = Array('/ä/','/ö/','/ü/','/Ä/','/Ö/','/Ü/','/ß/');
|
$umlauts = Array('/ä/','/ö/','/ü/','/Ä/','/Ö/','/Ü/','/ß/');
|
||||||
$replace = Array('ae','oe','ue','Ae','Oe','Ue','ss');
|
$replace = Array('ae','oe','ue','Ae','Oe','Ue','ss');
|
||||||
|
|
||||||
return preg_replace($umlauts, $replace, $text);
|
return preg_replace($umlauts, $replace, $company);
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the company name from the review html
|
|
||||||
*/
|
|
||||||
private function extractCompanyName($html){
|
|
||||||
$company_name = $html->find('h1[itemprop=name]', 0);
|
|
||||||
if(is_null($company_name))
|
|
||||||
returnServerError('Cannot find company name!');
|
|
||||||
|
|
||||||
return $company_name->plaintext;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the date from a given article
|
|
||||||
*/
|
|
||||||
private function extractArticleDate($article){
|
|
||||||
// They conviniently provide a time attribute for us :)
|
|
||||||
$date = $article->find('meta[itemprop=dateCreated]', 0);
|
|
||||||
if(is_null($date))
|
|
||||||
returnServerError('Cannot find article date!');
|
|
||||||
|
|
||||||
return strtotime($date->content);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the rating from a given article
|
|
||||||
*/
|
|
||||||
private function extractArticleRating($article){
|
|
||||||
$rating = $article->find('span.rating', 0);
|
|
||||||
if(is_null($rating))
|
|
||||||
returnServerError('Cannot find article rating!');
|
|
||||||
|
|
||||||
return $rating->getAttribute('aria-label');
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the summary from a given article
|
|
||||||
*/
|
|
||||||
private function extractArticleSummary($article){
|
|
||||||
$summary = $article->find('[itemprop=name]', 0);
|
|
||||||
if(is_null($summary))
|
|
||||||
returnServerError('Cannot find article summary!');
|
|
||||||
|
|
||||||
return strip_tags($summary->innertext);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the URI from a given article
|
|
||||||
*/
|
|
||||||
private function extractArticleUri($article){
|
|
||||||
$anchor = $article->find('h1.review-title a', 0);
|
|
||||||
if(is_null($anchor))
|
|
||||||
returnServerError('Cannot find article URI!');
|
|
||||||
|
|
||||||
return self::URI . $anchor->href;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -202,9 +155,8 @@ class KununuBridge extends BridgeAbstract {
|
||||||
*/
|
*/
|
||||||
private function extractArticleAuthorPosition($article){
|
private function extractArticleAuthorPosition($article){
|
||||||
// We need to parse the user-content manually
|
// We need to parse the user-content manually
|
||||||
$user_content = $article->find('div.user-content', 0);
|
$user_content = $article->find('div.user-content', 0)
|
||||||
if(is_null($user_content))
|
or returnServerError('Cannot find user content!');
|
||||||
returnServerError('Cannot find user content!');
|
|
||||||
|
|
||||||
// Go through all h2 elements to find index of required span (I know... it's stupid)
|
// Go through all h2 elements to find index of required span (I know... it's stupid)
|
||||||
$author_position = 'Unknown';
|
$author_position = 'Unknown';
|
||||||
|
@ -222,11 +174,10 @@ class KununuBridge extends BridgeAbstract {
|
||||||
* Returns the description from a given article
|
* Returns the description from a given article
|
||||||
*/
|
*/
|
||||||
private function extractArticleDescription($article){
|
private function extractArticleDescription($article){
|
||||||
$description = $article->find('[itemprop=reviewBody]', 0);
|
$description = $article->find('[itemprop=reviewBody]', 0)
|
||||||
if(is_null($description))
|
or returnServerError('Cannot find article description!');
|
||||||
returnServerError('Cannot find article description!');
|
|
||||||
|
|
||||||
return $this->fixUrl($description->innertext);
|
return $description->innertext;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -234,14 +185,14 @@ class KununuBridge extends BridgeAbstract {
|
||||||
*/
|
*/
|
||||||
private function extractFullDescription($uri){
|
private function extractFullDescription($uri){
|
||||||
// Load full article
|
// Load full article
|
||||||
$html = getSimpleHTMLDOMCached($uri);
|
$html = getSimpleHTMLDOMCached($uri)
|
||||||
if($html === false)
|
or returnServerError('Could not load full description!');
|
||||||
returnServerError('Could not load full description!');
|
|
||||||
|
$html = defaultLinkTo($html, static::URI);
|
||||||
|
|
||||||
// Find the article
|
// Find the article
|
||||||
$article = $html->find('article', 0);
|
$article = $html->find('article', 0)
|
||||||
if(is_null($article))
|
or returnServerError('Cannot find article!');
|
||||||
returnServerError('Cannot find article!');
|
|
||||||
|
|
||||||
// Luckily they use the same layout for the review overview and full article pages :)
|
// Luckily they use the same layout for the review overview and full article pages :)
|
||||||
return $this->extractArticleDescription($article);
|
return $this->extractArticleDescription($article);
|
||||||
|
|
Loading…
Reference in a new issue