From d0c9397613d0830c28d0914de3bba92db2bee760 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sat, 17 Dec 2016 18:04:21 +0100 Subject: [PATCH] [Kununu] Fix source layout changed --- bridges/KununuBridge.php | 91 ++++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 40 deletions(-) diff --git a/bridges/KununuBridge.php b/bridges/KununuBridge.php index 4384fadf..9e00148e 100644 --- a/bridges/KununuBridge.php +++ b/bridges/KununuBridge.php @@ -39,36 +39,46 @@ class KununuBridge extends BridgeAbstract { ) ); - private $companyName=''; + private $companyName = ''; public function getURI(){ - $company = $this->encode_umlauts(strtolower(str_replace(' ', '-', trim($this->getInput('company'))))); - $site=$this->getInput('site'); - $section = ''; - switch($site){ - case 'at': - case 'de': - case 'ch': - $section = 'kommentare'; - break; - case 'us': - $section = 'reviews'; - break; + if(!is_null($this->getInput('company')) && !is_null($this->getInput('site'))){ + + $company = $this->fix_company_name($this->getInput('company')); + $site = $this->getInput('site'); + $section = ''; + + switch($site){ + case 'at': + case 'de': + case 'ch': + $section = 'kommentare'; + break; + case 'us': + $section = 'reviews'; + break; + } + + return self::URI . $site . '/' . $company . '/' . $section; } - return self::URI.$site.'/'.$company.'/'.$section; + return parent::getURI(); } function getName(){ - $company = $this->encode_umlauts(strtolower(str_replace(' ', '-', trim($this->getInput('company'))))); - return ($this->companyName?:$company).' - '.self::NAME; + if(!is_null($this->getInput('company'))){ + $company = $this->fix_company_name($this->getInput('company')); + return ($this->companyName?:$company).' - '.self::NAME; + } + + return paren::getName(); } public function collectData(){ $full = $this->getInput('full'); // Load page - $html = getSimpleHTMLDOM($this->getURI()); + $html = getSimpleHTMLDOMCached($this->getURI()); if(!$html) returnServerError('Unable to receive data from ' . $this->getURI() . '!'); // Update name for this request @@ -109,6 +119,16 @@ class KununuBridge extends BridgeAbstract { return preg_replace('/href=(\'|\")\//i', 'href="'.self::URI, $text); } + /* + * Returns a fixed version of the provided company name + */ + private function fix_company_name($company){ + $company = trim($company); + $company = str_replace(' ', '-', $company); + $company = strtolower($company); + return $this->encode_umlauts($company); + } + /** * Encodes unmlauts in the given text */ @@ -123,11 +143,7 @@ class KununuBridge extends BridgeAbstract { * Returns the company name from the review html */ private function extract_company_name($html){ - $panel = $html->find('div.panel', 0); - if(is_null($panel)) - returnServerError('Cannot find panel for company name!'); - - $company_name = $panel->find('h1', 0); + $company_name = $html->find('h1[itemprop=name]', 0); if(is_null($company_name)) returnServerError('Cannot find company name!'); @@ -139,11 +155,11 @@ class KununuBridge extends BridgeAbstract { */ private function extract_article_date($article){ // They conviniently provide a time attribute for us :) - $date = $article->find('time[itemprop=dtreviewed]', 0); + $date = $article->find('meta[itemprop=dateCreated]', 0); if(is_null($date)) returnServerError('Cannot find article date!'); - return strtotime($date->datetime); + return strtotime($date->content); } /** @@ -161,7 +177,7 @@ class KununuBridge extends BridgeAbstract { * Returns the summary from a given article */ private function extract_article_summary($article){ - $summary = $article->find('[itemprop=summary]', 0); + $summary = $article->find('[itemprop=name]', 0); if(is_null($summary)) returnServerError('Cannot find article summary!'); @@ -172,32 +188,27 @@ class KununuBridge extends BridgeAbstract { * Returns the URI from a given article */ private function extract_article_uri($article){ - // Notice: This first part is the same as in extract_article_summary! - $summary = $article->find('[itemprop=summary]', 0); - if(is_null($summary)) - returnServerError('Cannot find article summary!'); - - $anchor = $summary->find('a', 0); + $anchor = $article->find('ku-company-review-more', 0); if(is_null($anchor)) returnServerError('Cannot find article URI!'); - return self::URI . $anchor->href; + return self::URI . $anchor->{'review-url'}; } /** * Returns the position of the author from a given article */ private function extract_article_author_position($article){ - // We need to parse the aside manually - $aside = $article->find('aside', 0); - if(is_null($aside)) - returnServerError('Cannot find article author information!'); + // We need to parse the user-content manually + $user_content = $article->find('div.user-content', 0); + if(is_null($user_content)) + returnServerError('Cannot find user content!'); // Go through all h2 elements to find index of required span (I know... it's stupid) $author_position = 'Unknown'; - foreach($aside->find('h2') as $subject){ - if(stristr(strtolower($subject->plaintext), 'position')){ /* This works for at, ch, de, us */ - $author_position = $subject->next_sibling()->plaintext; + foreach($user_content->find('div') as $content){ + if(stristr(strtolower($content->plaintext), 'position')){ /* This works for at, ch, de, us */ + $author_position = $content->next_sibling()->plaintext; break; } } @@ -209,7 +220,7 @@ class KununuBridge extends BridgeAbstract { * Returns the description from a given article */ private function extract_article_description($article){ - $description = $article->find('div[itemprop=description]', 0); + $description = $article->find('[itemprop=reviewBody]', 0); if(is_null($description)) returnServerError('Cannot find article description!');