diff --git a/bridges/NordbayernBridge.php b/bridges/NordbayernBridge.php new file mode 100644 index 00000000..a7ae72e3 --- /dev/null +++ b/bridges/NordbayernBridge.php @@ -0,0 +1,127 @@ + array( + 'name' => 'region', + 'type' => 'list', + 'exampleValue' => 'Nürnberg', + 'title' => 'Select a region', + 'values' => array( + 'Nürnberg' => 'nuernberg', + 'Fürth' => 'fuerth', + 'Altdorf' => 'altdorf', + 'Ansbach' => 'ansbach', + 'Bad Windsheim' => 'bad-windsheim', + 'Bamberg' => 'bamberg', + 'Dinkelsbühl/Feuchtwangen' => 'dinkelsbuehl-feuchtwangen', + 'Feucht' => 'feucht', + 'Forchheim' => 'forchheim', + 'Gunzenhausen' => 'gunzenhausen', + 'Hersbruck' => 'hersbruck', + 'Herzogenaurach' => 'herzogenaurach', + 'Hilpolstein' => 'holpolstein', + 'Höchstadt' => 'hoechstadt', + 'Lauf' => 'lauf', + 'Neumarkt' => 'neumarkt', + 'Neustadt/Aisch' => 'neustadt-aisch', + 'Pegnitz' => 'pegnitz', + 'Roth' => 'roth', + 'Rothenburg o.d.T.' => 'rothenburg-o-d-t', + 'Schwabach' => 'schwabach', + 'Treuchtlingen' => 'treuchtlingen', + 'Weißenburg' => 'weissenburg' + ) + ), + 'policeReports' => array( + 'name' => 'Police Reports', + 'type' => 'checkbox', + 'exampleValue' => 'checked', + 'title' => 'Read Police Reports', + ) + )); + + private function getImageUrlFromScript($script) { + preg_match("#src=\\\\'(https:[-:\\.\\\\/a-zA-Z0-9_]*\\.jpg)#", $script->innertext, $matches, PREG_OFFSET_CAPTURE); + if(isset($matches[1][0])) { + return stripcslashes($matches[1][0]) . '?w=800'; + } else { + return null; + } + } + + private function handleArticle($link) { + $item = array(); + $article = getSimpleHTMLDOM($link); + $content = $article->find('div[class*=article-content]', 0); + $item['uri'] = $link; + $item['title'] = $article->find('h1', 0)->innertext; + $item['content'] = ''; + + //first get image from block/modul + $figure = $article->find('figure[class*=panorama]', 0); + if($figure !== null) { + $imgUrl = self::getImageUrlFromScript($figure->find('script', 0)); + if($imgUrl === null) { + $imgUrl = self::getImageUrlFromScript($figure->find('script', 1)); + } + $item['content'] .= ''; + } + + // get regular paragraphs + foreach($content->children() as $child) { + if($child->tag === 'p') { + $item['content'] .= $child; + } + } + + //get image divs + foreach($content->find('div[class*=article-slideshow]') as $slides) { + foreach($slides->children() as $child) { + switch($child->tag) { + case 'p': + $item['content'] .= $child; + break; + case 'h5': + $item['content'] .= '
' . $child->plaintext . '
'; + break; + case 'a': + $url = self::getImageUrlFromScript($child->find('script', 0)); + $item['content'] .= ''; + break; + } + } + } + $this->items[] = $item; + $article->clear(); + } + + private function handleNewsblock($listSite, $readPoliceReports) { + $newsBlocks = $listSite->find('section[class*=newsblock]'); + $policeBlock = $newsBlocks[0]; + $regionalNewsBlock = $newsBlocks[1]; + if($readPoliceReports === true) { + foreach($policeBlock->find('h2') as $headline) { + self::handleArticle(self::URI . $headline->find('a', 0)->href); + } + } + foreach($regionalNewsBlock->find('h2') as $headline) { + self::handleArticle(self::URI . $headline->find('a', 0)->href); + } + } + + public function collectData() { + $item = array(); + $region = $this->getInput('region'); + $listSite = getSimpleHTMLDOM(self::URI . '/region/' . $region); + + self::handleNewsblock($listSite, $this->getInput('policeReports')); + } +}