[MediapartBridge] Fix article parsing

* Only process article item, fix issue #1292
This commit is contained in:
killruana 2019-09-16 21:26:19 +02:00 committed by Lyra
parent f12f6a2dba
commit c694810d9a
1 changed files with 25 additions and 20 deletions

View File

@ -30,29 +30,34 @@ class MediapartBridge extends FeedExpander {
protected function parseItem($newsItem) { protected function parseItem($newsItem) {
$item = parent::parseItem($newsItem); $item = parent::parseItem($newsItem);
// Enable single page mode? // Mediapart provide multiple type of contents.
if ($this->getInput('single_page_mode') === true) { // We only process items relative to the newspaper
$item['uri'] .= '?onglet=full'; // See issue #1292 - https://github.com/RSS-Bridge/rss-bridge/issues/1292
} if (strpos($item['uri'], self::URI . 'journal/') === 0) {
// Enable single page mode?
if ($this->getInput('single_page_mode') === true) {
$item['uri'] .= '?onglet=full';
}
// If a session cookie is defined, get the full article // If a session cookie is defined, get the full article
$mpsessid = $this->getInput('mpsessid'); $mpsessid = $this->getInput('mpsessid');
if (!empty($mpsessid)) { if (!empty($mpsessid)) {
// Set the session cookie // Set the session cookie
$opt = array(); $opt = array();
$opt[CURLOPT_COOKIE] = 'MPSESSID=' . $mpsessid; $opt[CURLOPT_COOKIE] = 'MPSESSID=' . $mpsessid;
// Get the page // Get the page
$articlePage = getSimpleHTMLDOM( $articlePage = getSimpleHTMLDOM(
$newsItem->link . '?onglet=full', $newsItem->link . '?onglet=full',
array(), array(),
$opt); $opt);
// Extract the article content // Extract the article content
$content = $articlePage->find('div.content-article', 0)->innertext; $content = $articlePage->find('div.content-article', 0)->innertext;
$content = sanitize($content); $content = sanitize($content);
$content = defaultLinkTo($content, static::URI); $content = defaultLinkTo($content, static::URI);
$item['content'] .= $content; $item['content'] .= $content;
}
} }
return $item; return $item;