Rss-Bridge/bridges/DiarioDoAlentejoBridge.php

61 lines
1.9 KiB
PHP
Raw Permalink Normal View History

<?php
class DiarioDoAlentejoBridge extends BridgeAbstract {
const MAINTAINER = 'somini';
const NAME = 'Diário do Alentejo';
const URI = 'https://www.diariodoalentejo.pt';
const DESCRIPTION = 'Semanário Regionalista Independente';
const CACHE_TIMEOUT = 28800; // 8h
/* This is used to hack around obtaining a timestamp. It's just a list of Month names in Portuguese ... */
const PT_MONTH_NAMES = array(
'janeiro',
'fevereiro',
'março',
'abril',
'maio',
'junho',
'julho',
'agosto',
'setembro',
'outubro',
'novembro',
'dezembro');
public function getIcon() {
return 'https://www.diariodoalentejo.pt/images/favicon/apple-touch-icon.png';
}
public function collectData(){
/* This is slow as molasses (>30s!), keep the cache timeout high to avoid killing the host */
$html = getSimpleHTMLDOMCached($this->getURI() . '/pt/noticias-listagem.aspx')
or returnServerError('Could not load content');
foreach($html->find('.list_news .item') as $element) {
$item = array();
$item_link = $element->find('.body h2.title a', 0);
/* Another broken URL, see also `bridges/ComboiosDePortugalBridge.php` */
$item['uri'] = self::URI . implode('/', array_map('urlencode', explode('/', $item_link->href)));
$item['title'] = $item_link->innertext;
$item['timestamp'] = str_ireplace(
array_map(function($name) { return ' ' . $name . ' '; }, self::PT_MONTH_NAMES),
array_map(function($num) { return sprintf('-%02d-', $num); }, range(1, sizeof(self::PT_MONTH_NAMES))),
$element->find('span.date', 0)->innertext);
/* Fix the Image URL */
$item_image = $element->find('img.thumb', 0);
$item_image->src = preg_replace('/.*&img=([^&]+).*/', '\1', $item_image->getAttribute('data-src'));
/* Content: */
/* - Image */
/* - Category */
$content = $item_image .
'<center>' . $element->find('a.category', 0) . '</center>';
$item['content'] = defaultLinkTo($content, self::URI);
$this->items[] = $item;
}
}
}