Rss-Bridge/bridges/BakaUpdatesMangaReleasesBridge.php
fulmeek 9d85b951f7 [BakaUpdatesMangaReleasesBridge] rework to parse new layout (#1052)
* rework to parse new layout
* skip incomplete rows

The last row could have fewer columns if there are less rows than the items limit. This usually should not happen, though.

* use constant for skipping
2019-03-02 19:09:16 +01:00

91 lines
2.3 KiB
PHP

<?php
class BakaUpdatesMangaReleasesBridge extends BridgeAbstract {
const NAME = 'Baka Updates Manga Releases';
const URI = 'https://www.mangaupdates.com/';
const DESCRIPTION = 'Get the latest series releases';
const MAINTAINER = 'fulmeek';
const PARAMETERS = array(array(
'series_id' => array(
'name' => 'Series ID',
'type' => 'number',
'required' => true,
'exampleValue' => '12345'
)
));
const LIMIT_COLS = 5;
const LIMIT_ITEMS = 10;
private $feedName = '';
public function collectData() {
$html = getSimpleHTMLDOM($this->getURI())
or returnServerError('Series not found');
// content is an unstructured pile of divs, ugly to parse
$cols = $html->find('div#main_content div.row > div.text');
if (!$cols)
returnServerError('No releases');
$rows = array_slice(
array_chunk($cols, self::LIMIT_COLS), 0, self::LIMIT_ITEMS
);
if (isset($rows[0][1])) {
$this->feedName = html_entity_decode($rows[0][1]->plaintext);
}
foreach($rows as $cols) {
if (count($cols) < self::LIMIT_COLS) continue;
$item = array();
$title = array();
$item['content'] = '';
$objDate = $cols[0];
if ($objDate)
$item['timestamp'] = strtotime($objDate->plaintext);
$objTitle = $cols[1];
if ($objTitle) {
$title[] = html_entity_decode($objTitle->plaintext);
$item['content'] .= '<p>Series: ' . $objTitle->innertext . '</p>';
}
$objVolume = $cols[2];
if ($objVolume && !empty($objVolume->plaintext))
$title[] = 'Vol.' . $objVolume->plaintext;
$objChapter = $cols[3];
if ($objChapter && !empty($objChapter->plaintext))
$title[] = 'Chp.' . $objChapter->plaintext;
$objAuthor = $cols[4];
if ($objAuthor && !empty($objAuthor->plaintext)) {
$item['author'] = html_entity_decode($objAuthor->plaintext);
$item['content'] .= '<p>Groups: ' . $objAuthor->innertext . '</p>';
}
$item['title'] = implode(' ', $title);
$item['uri'] = $this->getURI();
$item['uid'] = hash('sha1', $item['title']);
$this->items[] = $item;
}
}
public function getURI(){
$series_id = $this->getInput('series_id');
if (!empty($series_id)) {
return self::URI . 'releases.html?search=' . $series_id . '&stype=series';
}
return self::URI;
}
public function getName(){
if(!empty($this->feedName)) {
return $this->feedName . ' - ' . self::NAME;
}
return parent::getName();
}
}