diff --git a/bridges/NextgovBridge.php b/bridges/NextgovBridge.php new file mode 100644 index 00000000..eac7aa10 --- /dev/null +++ b/bridges/NextgovBridge.php @@ -0,0 +1,110 @@ +maintainer = 'ORelio'; + $this->name = $this->getName(); + $this->uri = $this->getURI(); + $this->description = 'USA Federal technology news, best practices, and web 2.0 tools.'; + $this->update = '2016-07-20'; + + $this->parameters[] = + '[ + { + "name" : "Category", + "type" : "list", + "identifier" : "category", + "values" : + [ + { "name" : "All", "value" : "all" }, + { "name" : "Technology News", "value" : "technology-news" }, + { "name" : "CIO Briefing", "value" : "cio-briefing" }, + { "name" : "Emerging Tech", "value" : "emerging-tech" }, + { "name" : "Cloud", "value" : "cloud-computing" }, + { "name" : "Cybersecurity", "value" : "cybersecurity" }, + { "name" : "Mobile", "value" : "mobile" }, + { "name" : "Health", "value" : "health" }, + { "name" : "Defense", "value" : "defense" }, + { "name" : "Big Data", "value" : "big-data" } + ] + } + ]'; + + } + + public function collectData(array $param) { + + function ExtractFromDelimiters($string, $start, $end) { + if (strpos($string, $start) !== false) { + $section_retrieved = substr($string, strpos($string, $start) + strlen($start)); + $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end)); + return $section_retrieved; + } return false; + } + + function StripWithDelimiters($string, $start, $end) { + while (strpos($string, $start) !== false) { + $section_to_remove = substr($string, strpos($string, $start)); + $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end)); + $string = str_replace($section_to_remove, '', $string); + } return $string; + } + + $category = $param['category']; + if (empty($category)) + $category = 'all'; + if ($category !== preg_replace('/[^a-z-]+/', '', $category) || strlen($category > 32)) + $this->returnError('Invalid "category" parameter.', 400); + $url = $this->getURI().'rss/'.$category.'/'; + $html = $this->file_get_html($url) or $this->returnError('Could not request Nextgov: '.$url, 500); + $limit = 0; + + foreach ($html->find('item') as $element) { + if ($limit < 10) { + + $article_url = ExtractFromDelimiters($element->innertext, '', ''); + $article_author = ExtractFromDelimiters($element->innertext, 'dc/elements/1.1/">', ''); + $article_title = $element->find('title', 0)->plaintext; + $article_subtitle = $element->find('description', 0)->plaintext; + $article_timestamp = strtotime($element->find('pubDate', 0)->plaintext); + $article_thumbnail = ExtractFromDelimiters($element->innertext, 'file_get_html($article_url) or $this->returnError('Could not request Nextgov: '.$article_url, 500); + + $contents = $article->find('div.wysiwyg', 0)->innertext; + $contents = StripWithDelimiters($contents, '
', '
'); + $contents = StripWithDelimiters($contents, ''); //ad outer div + $contents = StripWithDelimiters($contents, ''); + $contents = ($article_thumbnail == '' ? '' : '

') + .'

'.$article_subtitle.'

' + .trim($contents); + + if ($article_thumbnail == '') + $article_thumbnail = 'http://cdn.nextgov.com/nextgov/images/logo.png'; + + $item = new \Item(); + $item->uri = $article_url; + $item->title = $article_title; + $item->author = $article_author; + $item->thumbnailUri = $article_thumbnail; + $item->timestamp = $article_timestamp; + $item->content = $contents; + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName() { + return 'Nextgov Bridge'; + } + + public function getURI() { + return 'https://www.nextgov.com/'; + } + + public function getCacheDuration() { + return 3600; //1 hour + } +} \ No newline at end of file diff --git a/bridges/TheHackerNewsBridge.php b/bridges/TheHackerNewsBridge.php new file mode 100644 index 00000000..4081b936 --- /dev/null +++ b/bridges/TheHackerNewsBridge.php @@ -0,0 +1,90 @@ +maintainer = 'ORelio'; + $this->name = $this->getName(); + $this->uri = $this->getURI(); + $this->description = 'Cyber Security, Hacking, Technology News.'; + $this->update = '2016-07-22'; + + } + + public function collectData(array $param) { + + function StripWithDelimiters($string, $start, $end) { + while (strpos($string, $start) !== false) { + $section_to_remove = substr($string, strpos($string, $start)); + $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end)); + $string = str_replace($section_to_remove, '', $string); + } return $string; + } + + function StripRecursiveHTMLSection($string, $tag_name, $tag_start) { + $open_tag = '<'.$tag_name; + $close_tag = ''; + $close_tag_length = strlen($close_tag); + if (strpos($tag_start, $open_tag) === 0) { + while (strpos($string, $tag_start) !== false) { + $max_recursion = 100; + $section_to_remove = null; + $section_start = strpos($string, $tag_start); + $search_offset = $section_start; + do { + $max_recursion--; + $section_end = strpos($string, $close_tag, $search_offset); + $search_offset = $section_end + $close_tag_length; + $section_to_remove = substr($string, $section_start, $section_end - $section_start + $close_tag_length); + $open_tag_count = substr_count($section_to_remove, $open_tag); + $close_tag_count = substr_count($section_to_remove, $close_tag); + } while ($open_tag_count > $close_tag_count && $max_recursion > 0); + $string = str_replace($section_to_remove, '', $string); + } + } + return $string; + } + + $html = $this->file_get_html($this->getURI()) or $this->returnError('Could not request TheHackerNews: '.$this->getURI(), 500); + $limit = 0; + + foreach ($html->find('article') as $element) { + if ($limit < 5) { + + $article_url = $element->find('a.entry-title', 0)->href; + $article_author = trim($element->find('span.vcard', 0)->plaintext); + $article_title = $element->find('a.entry-title', 0)->plaintext; + $article_timestamp = strtotime($element->find('span.updated', 0)->plaintext); + $article_thumbnail = $element->find('img', 0)->src; + $article = $this->file_get_html($article_url) or $this->returnError('Could not request TheHackerNews: '.$article_url, 500); + + $contents = $article->find('div.articlebodyonly', 0)->innertext; + $contents = StripRecursiveHTMLSection($contents, 'div', '
'); + + $item = new \Item(); + $item->uri = $article_url; + $item->title = $article_title; + $item->author = $article_author; + $item->thumbnailUri = $article_thumbnail; + $item->timestamp = $article_timestamp; + $item->content = trim($contents); + $this->items[] = $item; + $limit++; + } + } + + } + + public function getName() { + return 'The Hacker News Bridge'; + } + + public function getURI() { + return 'https://thehackernews.com/'; + } + + public function getCacheDuration() { + return 3600; //1 hour + } +} \ No newline at end of file diff --git a/bridges/ZDNetBridge.php b/bridges/ZDNetBridge.php index 4267b983..79924aa0 100644 --- a/bridges/ZDNetBridge.php +++ b/bridges/ZDNetBridge.php @@ -6,8 +6,8 @@ class ZDNetBridge extends BridgeAbstract { $this->maintainer = 'ORelio'; $this->name = $this->getName(); $this->uri = $this->getURI(); - $this->description = 'Returns the newest articles.'; - $this->update = '2016-07-18'; + $this->description = 'Technology News, Analysis, Comments and Product Reviews for IT Professionals.'; + $this->update = '2016-07-20'; $this->parameters[] = // http://www.zdnet.com/zdnet.opml @@ -261,7 +261,7 @@ class ZDNetBridge extends BridgeAbstract { $thumbnail = $article->find('meta[itemprop=image]', 0); if (is_object($thumbnail)) $thumbnail = $thumbnail->content; - else $thumbnail = 'http://zdnet1.cbsistatic.com/fly/bundles/zdnetcss/images/logos/logo-192x192.png'; + else $thumbnail = ''; $contents = $article->find('article', 0)->innertext; foreach (array( @@ -277,8 +277,17 @@ class ZDNetBridge extends BridgeAbstract { } $contents = StripWithDelimiters($contents, ''); $contents = StripWithDelimiters($contents, ''); - $contents = StripWithDelimiters($contents, '
')); + $content_img = strpos($contents, '

'; //Include thumbnail + $contents = $content_img + .'

'.$article_subtitle.'

' + .$contents; + + if ($thumbnail == '') + $thumbnail = 'http://zdnet1.cbsistatic.com/fly/bundles/zdnetcss/images/logos/logo-192x192.png'; $item = new \Item(); $item->author = $author; @@ -303,6 +312,6 @@ class ZDNetBridge extends BridgeAbstract { } public function getCacheDuration() { - return 3600; + return 3600; //1 hour } -} \ No newline at end of file +}