From 3276d4e3d5767ca8ef361dc6496e93e1ea021de3 Mon Sep 17 00:00:00 2001 From: logmanoriginal Date: Sun, 26 Mar 2017 16:40:05 +0200 Subject: [PATCH] [GooglePlusPostBridge] Fix content loading - Do not force language via HTTP header The header enforced the language to be french which caused problems parsing the exact time due to spellings (strtotime cannot work with 'semaines'). If further issues are experienced try forcing en-us instead. => This should really be done in the RSS-Bridge core - Fix loading problems due to pinned articles Pinned articles do not provide a timestamp. Building the timestamp step-by-step solves parsing errors. - Use class names instead of CSS paths CSS paths change based on the article. Pinned articles provide a different DOM structure which caused parsing errors. Reported via #499 --- bridges/GooglePlusPostBridge.php | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/bridges/GooglePlusPostBridge.php b/bridges/GooglePlusPostBridge.php index b7fc9673..4dcf801d 100644 --- a/bridges/GooglePlusPostBridge.php +++ b/bridges/GooglePlusPostBridge.php @@ -19,15 +19,8 @@ class GooglePlusPostBridge extends BridgeAbstract{ public function collectData(){ // get content parsed - $html = getSimpleHTMLDOMCached(self::URI . urlencode($this->getInput('username')) . '/posts', - // force language - 84600, - false, - stream_context_create(array( - 'http' => array( - 'header' => 'Accept-Language: fr,fr-be,fr-fr;q=0.8,en;q=0.4,en-us;q=0.2;*' . "\r\n" - ))) - ) or returnServerError('No results for this query.'); + $html = getSimpleHTMLDOMCached(self::URI . urlencode($this->getInput('username')) . '/posts') + or returnServerError('No results for this query.'); // get title, url, ... there is a lot of intresting stuff in meta $this->_title = $html->find('meta[property=og:title]', 0)->getAttribute('content'); @@ -41,10 +34,15 @@ class GooglePlusPostBridge extends BridgeAbstract{ $item['id'] = $post->find('div div div', 0)->getAttribute('id'); $item['avatar'] = $post->find('div img', 0)->src; $item['uri'] = self::URI . $post->find('div div div a', 1)->href; - $item['timestamp'] = strtotime( - '+' . preg_replace('/[^0-9A-Za-z]/', - '', - $post->find('div div div a span', 1)->getAttribute('aria-label'))); + + $timestamp = $post->find('a.qXj2He span', 0); + + if($timestamp){ + $item['timestamp'] = strtotime('+' . preg_replace( + '/[^0-9A-Za-z]/', + '', + $timestamp->getAttribute('aria-label'))); + } // hashtag to treat : https://plus.google.com/explore/tag // $hashtags = array(); @@ -65,7 +63,7 @@ class GooglePlusPostBridge extends BridgeAbstract{ . $item['avatar'] . '" />'; - $content = $post->find('div div[id^=body] div div', 0); + $content = $post->find('div[jsname=EjRJtf]', 0); // extract plaintext $item['content_simple'] = $content->plaintext; $item['title'] = substr($item['content_simple'], 0, 72) . '...';