Rss-Bridge/bridges/YoutubeBridge.php
logmanoriginal 9a9ce30b16 [YoutubeBridge] Fix issues loading playlists
Videos that are part of a playlist have the playlist ID encoded in
the URI. When loading the video info the page contents change unex-
pectedly due to the playlist being part of the page.

This removes any trailing parameters from the video ID in order to
ensure only pure videos are loaded at all times.
2017-08-19 18:51:30 +02:00

194 lines
6.5 KiB
PHP

<?php
/**
* RssBridgeYoutube
* Returns the newest videos
* WARNING: to parse big playlists (over ~90 videos), you need to edit simple_html_dom.php:
* change: define('MAX_FILE_SIZE', 600000);
* into: define('MAX_FILE_SIZE', 900000); (or more)
*/
class YoutubeBridge extends BridgeAbstract {
const NAME = 'YouTube Bridge';
const URI = 'https://www.youtube.com/';
const CACHE_TIMEOUT = 10800; // 3h
const DESCRIPTION = 'Returns the 10 newest videos by username/channel/playlist or search';
const MAINTAINER = 'mitsukarenai';
const PARAMETERS = array(
'By username' => array(
'u' => array(
'name' => 'username',
'exampleValue' => 'test',
'required' => true
)
),
'By channel id' => array(
'c' => array(
'name' => 'channel id',
'exampleValue' => "15",
'required' => true
)
),
'By playlist Id' => array(
'p' => array(
'name' => 'playlist id',
'exampleValue' => "15"
)
),
'Search result' => array(
's' => array(
'name' => 'search keyword',
'exampleValue' => 'test'
),
'pa' => array(
'name' => 'page',
'type' => 'number',
'exampleValue' => 1
)
)
);
private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){
$html = $this->ytGetSimpleHTMLDOM(self::URI . "watch?v=$vid");
$author = $html->innertext;
$author = substr($author, strpos($author, '"author=') + 8);
$author = substr($author, 0, strpos($author, '\u0026'));
if(!is_null($html->find('div#watch-description-text', 0)))
$desc = $html->find('div#watch-description-text', 0)->innertext;
if(!is_null($html->find('meta[itemprop=datePublished]', 0)))
$time = strtotime($html->find('meta[itemprop=datePublished]', 0)->getAttribute('content'));
}
private function ytBridgeAddItem($vid, $title, $author, $desc, $time){
$item = array();
$item['id'] = $vid;
$item['title'] = $title;
$item['author'] = $author;
$item['timestamp'] = $time;
$item['uri'] = self::URI . 'watch?v=' . $vid;
$thumbnailUri = str_replace('/www.', '/img.', self::URI) . 'vi/' . $vid . '/0.jpg';
$item['content'] = '<a href="' . $item['uri'] . '"><img src="' . $thumbnailUri . '" /></a><br />' . $desc;
$this->items[] = $item;
}
private function ytBridgeParseXmlFeed($xml) {
foreach($xml->find('entry') as $element) {
$title = $this->ytBridgeFixTitle($element->find('title', 0)->plaintext);
$author = $element->find('name', 0)->plaintext;
$desc = $element->find('media:description', 0)->innertext;
// Make sure the description is easy on the eye :)
$desc = htmlspecialchars($desc);
$desc = nl2br($desc);
$desc = preg_replace('/(http[s]{0,1}\:\/\/[a-zA-Z0-9.\/\?\&=\-_]{4,})/ims',
'<a href="$1" target="_blank">$1</a> ',
$desc);
$vid = str_replace('yt:video:', '', $element->find('id', 0)->plaintext);
$time = strtotime($element->find('published', 0)->plaintext);
$this->ytBridgeAddItem($vid, $title, $author, $desc, $time);
}
$this->request = $this->ytBridgeFixTitle($xml->find('feed > title', 0)->plaintext);
}
private function ytBridgeParseHtmlListing($html, $element_selector, $title_selector){
$limit = 10;
$count = 0;
foreach($html->find($element_selector) as $element) {
if($count < $limit) {
$author = '';
$desc = '';
$time = 0;
$vid = str_replace('/watch?v=', '', $element->find('a', 0)->href);
$vid = substr($vid, 0, strpos($vid, '&') ?: strlen($vid));
$title = $this->ytBridgeFixTitle($element->find($title_selector, 0)->plaintext);
if($title != '[Private Video]') {
$this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time);
$this->ytBridgeAddItem($vid, $title, $author, $desc, $time);
$count++;
}
}
}
}
private function ytBridgeFixTitle($title) {
// convert both &#1234; and &quot; to UTF-8
return html_entity_decode($title, ENT_QUOTES, 'UTF-8');
}
private function ytGetSimpleHTMLDOM($url){
return getSimpleHTMLDOM($url,
$use_include_path = false,
$context = null,
$offset = 0,
$maxLen = null,
$lowercase = true,
$forceTagsClosed = true,
$target_charset = DEFAULT_TARGET_CHARSET,
$stripRN = false,
$defaultBRText = DEFAULT_BR_TEXT,
$defaultSpanText = DEFAULT_SPAN_TEXT);
}
public function collectData(){
$xml = '';
$html = '';
$url_feed = '';
$url_listing = '';
if($this->getInput('u')) { /* User and Channel modes */
$this->request = $this->getInput('u');
$url_feed = self::URI . 'feeds/videos.xml?user=' . urlencode($this->request);
$url_listing = self::URI . 'user/' . urlencode($this->request) . '/videos';
} elseif($this->getInput('c')) {
$this->request = $this->getInput('c');
$url_feed = self::URI . 'feeds/videos.xml?channel_id=' . urlencode($this->request);
$url_listing = self::URI . 'channel/' . urlencode($this->request) . '/videos';
}
if(!empty($url_feed) && !empty($url_listing)) {
if($xml = $this->ytGetSimpleHTMLDOM($url_feed)) {
$this->ytBridgeParseXmlFeed($xml);
} elseif($html = $this->ytGetSimpleHTMLDOM($url_listing)) {
$this->ytBridgeParseHtmlListing($html, 'li.channels-content-item', 'h3');
} else {
returnServerError("Could not request YouTube. Tried:\n - $url_feed\n - $url_listing");
}
} elseif($this->getInput('p')) { /* playlist mode */
$this->request = $this->getInput('p');
$url_listing = self::URI . 'playlist?list=' . urlencode($this->request);
$html = $this->ytGetSimpleHTMLDOM($url_listing)
or returnServerError("Could not request YouTube. Tried:\n - $url_listing");
$this->ytBridgeParseHtmlListing($html, 'tr.pl-video', '.pl-video-title a');
$this->request = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext);
} elseif($this->getInput('s')) { /* search mode */
$this->request = $this->getInput('s');
$page = 1;
if($this->getInput('pa'))
$page = (int)preg_replace("/[^0-9]/", '', $this->getInput('pa'));
$url_listing = self::URI
. 'results?search_query='
. urlencode($this->request)
. '&page='
. $page
. '&filters=video&search_sort=video_date_uploaded';
$html = $this->ytGetSimpleHTMLDOM($url_listing)
or returnServerError("Could not request YouTube. Tried:\n - $url_listing");
$this->ytBridgeParseHtmlListing($html, 'div.yt-lockup', 'h3');
$this->request = 'Search: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext);
} else { /* no valid mode */
returnClientError("You must either specify either:\n - YouTube
username (?u=...)\n - Channel id (?c=...)\n - Playlist id (?p=...)\n - Search (?s=...)");
}
}
public function getName(){
return (!empty($this->request) ? $this->request . ' - ' : '') . 'YouTube Bridge';
}
}