<?php
define('WORDPRESS_TYPE_ATOM', 1); // Content is of type ATOM
define('WORDPRESS_TYPE_RSS', 2); // Content is of type RSS
class WordPressBridge extends HttpCachingBridgeAbstract {

	public $sitename; // Name of the site

	public $maintainer = "aledeg";
	public $name = "Wordpress Bridge";
	public $uri = "https://wordpress.org/";
	public $description = "Returns the 3 newest full posts of a Wordpress blog";

	public $parameters = array( array(
		'url'=>array(
			'name'=>'Blog URL',
			'required'=>true
		)
	));

	// Replaces all 'link' tags with 'url' for simplehtmldom to actually find 'links' ('url')
	private function ReplaceLinkTagsWithUrlTags($element){
		// We need to fix the 'link' tag as simplehtmldom cannot parse it (just rename it and load back as dom)
		$element_text = $element->outertext;
		$element_text = str_replace('<link>', '<url>', $element_text);
		$element_text = str_replace('</link>', '</url>', $element_text);
		$element_text = str_replace('<link ', '<url ', $element_text);
		return str_get_html($element_text);
	}

	private function StripCDATA($string) {
		$string = str_replace('<![CDATA[', '', $string);
		$string = str_replace(']]>', '', $string);
		return $string;
	}

	private function ClearContent($content) {
		$content = preg_replace('/<script[^>]*>[^<]*<\/script>/', '', $content);
		$content = preg_replace('/<div class="wpa".*/', '', $content);
		$content = preg_replace('/<form.*\/form>/', '', $content);
		return $content;
	}

	public function collectData(){

		$html = $this->getSimpleHTMLDOM($this->getURI().'/feed/atom')
				or $this->returnServerError("Could not request ".$this->getURI().'/feed/atom');

		// Notice: We requested an ATOM feed, however some sites return RSS feeds instead!
		if($html->find('entry')){
				$type=WORDPRESS_TYPE_ATOM;
		}else if($html->find('item')){
				$type=WORDPRESS_TYPE_RSS;
		}else{
				$type=WORDPRESS_TYPE_ATOM; // Make ATOM default
		}

		if($type === WORDPRESS_TYPE_RSS)
			$posts = $html->find('item');
		else
			$posts = $html->find('entry');

		if(!empty($posts) ) {
			$this->sitename = $html->find('title', 0)->plaintext;
			$i=0;

			foreach ($posts as $article) {
				if($i < 3) {

					$item = array();

					$article = $this->ReplaceLinkTagsWithUrlTags($article);

					if($type === WORDPRESS_TYPE_RSS){
						$item['uri'] = $article->find('url', 0)->innertext; // 'link' => 'url'!
						$item['title'] = $article->find('title', 0)->plaintext;
						$item['author'] = trim($this->StripCDATA($article->find('dc:creator', 0)->innertext));
						$item['timestamp'] = strtotime($article->find('pubDate', 0)->innertext);
					} else {
						$item['uri'] = $article->find('url', 0)->getAttribute('href'); // 'link' => 'url'!
						$item['title'] = $this->StripCDATA($article->find('title', 0)->plaintext);
						$item['author'] = trim($article->find('author', 0)->innertext);
						$item['timestamp'] = strtotime($article->find('updated', 0)->innertext);
					}

					if($this->get_cached_time($item['uri']) <= strtotime('-24 hours'))
						$this->remove_from_cache($item['uri']);

					$article_html = $this->get_cached($item['uri']);

					// Attempt to find most common content div
					if(!isset($item['content'])){
						$article = $article_html->find('article', 0);
						if(!empty($article)){
							$item['content'] = $this->ClearContent($article->innertext);
						}
					}

					// another common content div
					if(!isset($item['content'])){
						$article = $article_html->find('.single-content', 0);
						if(!empty($article)){
							$item['content'] = $this->ClearContent($article->innertext);
						}
					}

					// for old WordPress themes without HTML5
					if(!isset($item['content'])){
						$article = $article_html->find('.post', 0);
						if(!empty($article)){
							$item['content'] = $this->ClearContent($article->innertext);
						}
					}

					$this->items[] = $item;
					$i++;
				}
			}
		} else {
			$this->returnServerError("Sorry, ".$this->getURI()." doesn't seem to be a Wordpress blog.");
		}
	}

	public function getURI(){
		return $this->getInput('url');
	}

	public function getName() {
		return "{$this->sitename} - Wordpress Bridge";
	}

	public function getCacheDuration() {
		return 3600*3; // 3 hours
	}
}