Rss-Bridge/bridges/NextgovBridge.php
logmanoriginal 5432cabef5 bridges: Put name/uri directly in bridge metadata
Some bridges used getName() and getURI() to put information into the
metadatas. Instead the metadatas should be initialized with data and
(not yet done) returned by default via getName() and getURI().
2016-08-06 18:07:42 +02:00

106 lines
4.6 KiB
PHP

<?php
class NextgovBridge extends BridgeAbstract {
public function loadMetadatas() {
$this->maintainer = 'ORelio';
$this->name = 'Nextgov Bridge';
$this->uri = 'https://www.nextgov.com/';
$this->description = 'USA Federal technology news, best practices, and web 2.0 tools.';
$this->update = '2016-08-06';
$this->parameters[] =
'[
{
"name" : "Category",
"type" : "list",
"identifier" : "category",
"values" :
[
{ "name" : "All", "value" : "all" },
{ "name" : "Technology News", "value" : "technology-news" },
{ "name" : "CIO Briefing", "value" : "cio-briefing" },
{ "name" : "Emerging Tech", "value" : "emerging-tech" },
{ "name" : "Cloud", "value" : "cloud-computing" },
{ "name" : "Cybersecurity", "value" : "cybersecurity" },
{ "name" : "Mobile", "value" : "mobile" },
{ "name" : "Health", "value" : "health" },
{ "name" : "Defense", "value" : "defense" },
{ "name" : "Big Data", "value" : "big-data" }
]
}
]';
}
public function collectData(array $param) {
function ExtractFromDelimiters($string, $start, $end) {
if (strpos($string, $start) !== false) {
$section_retrieved = substr($string, strpos($string, $start) + strlen($start));
$section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end));
return $section_retrieved;
} return false;
}
function StripWithDelimiters($string, $start, $end) {
while (strpos($string, $start) !== false) {
$section_to_remove = substr($string, strpos($string, $start));
$section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
$string = str_replace($section_to_remove, '', $string);
} return $string;
}
$category = $param['category'];
if (empty($category))
$category = 'all';
if ($category !== preg_replace('/[^a-z-]+/', '', $category) || strlen($category > 32))
$this->returnError('Invalid "category" parameter.', 400);
$url = $this->getURI().'rss/'.$category.'/';
$html = $this->file_get_html($url) or $this->returnError('Could not request Nextgov: '.$url, 500);
$limit = 0;
foreach ($html->find('item') as $element) {
if ($limit < 10) {
$article_url = ExtractFromDelimiters($element->innertext, '<link>', '</link>');
$article_author = ExtractFromDelimiters($element->innertext, 'dc/elements/1.1/">', '</dc:creator>');
$article_title = $element->find('title', 0)->plaintext;
$article_subtitle = $element->find('description', 0)->plaintext;
$article_timestamp = strtotime($element->find('pubDate', 0)->plaintext);
$article_thumbnail = ExtractFromDelimiters($element->innertext, '<media:content url="', '"');
$article = $this->file_get_html($article_url) or $this->returnError('Could not request Nextgov: '.$article_url, 500);
$contents = $article->find('div.wysiwyg', 0)->innertext;
$contents = StripWithDelimiters($contents, '<div class="ad-container">', '</div>');
$contents = StripWithDelimiters($contents, '<div', '</div>'); //ad outer div
$contents = StripWithDelimiters($contents, '<script', '</script>');
$contents = ($article_thumbnail == '' ? '' : '<p><img src="'.$article_thumbnail.'" /></p>')
.'<p><b>'.$article_subtitle.'</b></p>'
.trim($contents);
if ($article_thumbnail == '')
$article_thumbnail = 'http://cdn.nextgov.com/nextgov/images/logo.png';
$item = new \Item();
$item->uri = $article_url;
$item->title = $article_title;
$item->author = $article_author;
$item->thumbnailUri = $article_thumbnail;
$item->timestamp = $article_timestamp;
$item->content = $contents;
$this->items[] = $item;
$limit++;
}
}
}
public function getName() {
return $this->name;
}
public function getURI() {
return $this->uri;
}
}