Revert "all: Use ->remove() instead of ->outertext = ''"
This reverts commit 052844f5e1
.
There is a bug in ->remove() that causes the parser to incorrectly
identify elements in the DOM tree that shouldn't exist anymore.
References #1151
This commit is contained in:
parent
468d8be72d
commit
6c4098d655
21 changed files with 49 additions and 44 deletions
|
@ -50,18 +50,18 @@ class AsahiShimbunAJWBridge extends BridgeAbstract {
|
||||||
$e_lead = $element->find('span.Lead', 0);
|
$e_lead = $element->find('span.Lead', 0);
|
||||||
if ($e_lead) {
|
if ($e_lead) {
|
||||||
$item['content'] = $e_lead->innertext;
|
$item['content'] = $e_lead->innertext;
|
||||||
$e_lead->remove();
|
$e_lead->outertext = '';
|
||||||
} else {
|
} else {
|
||||||
$item['content'] = $element->innertext;
|
$item['content'] = $element->innertext;
|
||||||
}
|
}
|
||||||
$e_date = $element->find('span.EnDate', 0);
|
$e_date = $element->find('span.EnDate', 0);
|
||||||
if ($e_date) {
|
if ($e_date) {
|
||||||
$item['timestamp'] = strtotime($e_date->innertext);
|
$item['timestamp'] = strtotime($e_date->innertext);
|
||||||
$e_date->remove();
|
$e_date->outertext = '';
|
||||||
}
|
}
|
||||||
$e_video = $element->find('span.EnVideo', 0);
|
$e_video = $element->find('span.EnVideo', 0);
|
||||||
if ($e_video) {
|
if ($e_video) {
|
||||||
$e_video->remove();
|
$e_video->outertext = '';
|
||||||
$element->innertext = "VIDEO: $element->innertext";
|
$element->innertext = "VIDEO: $element->innertext";
|
||||||
}
|
}
|
||||||
$item['title'] = $element->innertext;
|
$item['title'] = $element->innertext;
|
||||||
|
|
|
@ -55,7 +55,7 @@ class BundesbankBridge extends BridgeAbstract {
|
||||||
$title = $study->find('.teasable__title div.h2', 0);
|
$title = $study->find('.teasable__title div.h2', 0);
|
||||||
|
|
||||||
foreach($title->children as &$child) {
|
foreach($title->children as &$child) {
|
||||||
$child->remove();
|
$child->outertext = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
$item['title'] = $title->innertext;
|
$item['title'] = $title->innertext;
|
||||||
|
|
|
@ -58,7 +58,7 @@ class CastorusBridge extends BridgeAbstract {
|
||||||
returnServerError('Cannot find nodes!');
|
returnServerError('Cannot find nodes!');
|
||||||
|
|
||||||
foreach($nodes as $node) {
|
foreach($nodes as $node) {
|
||||||
$node->remove();
|
$node->outertext = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
return strtotime($activity->innertext);
|
return strtotime($activity->innertext);
|
||||||
|
|
|
@ -50,7 +50,7 @@ class DauphineLibereBridge extends FeedExpander {
|
||||||
private function extractContent($url){
|
private function extractContent($url){
|
||||||
$html2 = getSimpleHTMLDOMCached($url);
|
$html2 = getSimpleHTMLDOMCached($url);
|
||||||
foreach ($html2->find('.noprint, link, script, iframe, .shareTool, .contentInfo') as $remove) {
|
foreach ($html2->find('.noprint, link, script, iframe, .shareTool, .contentInfo') as $remove) {
|
||||||
$remove->remove();
|
$remove->outertext = '';
|
||||||
}
|
}
|
||||||
return $html2->find('div.content', 0)->innertext;
|
return $html2->find('div.content', 0)->innertext;
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,16 +29,16 @@ class EconomistBridge extends BridgeAbstract {
|
||||||
// Remove newsletter subscription box
|
// Remove newsletter subscription box
|
||||||
$newsletter = $content->find('div[class="newsletter-form__message"]', 0);
|
$newsletter = $content->find('div[class="newsletter-form__message"]', 0);
|
||||||
if ($newsletter)
|
if ($newsletter)
|
||||||
$newsletter->remove();
|
$newsletter->outertext = '';
|
||||||
|
|
||||||
$newsletterForm = $content->find('form', 0);
|
$newsletterForm = $content->find('form', 0);
|
||||||
if ($newsletterForm)
|
if ($newsletterForm)
|
||||||
$newsletterForm->remove();
|
$newsletterForm->outertext = '';
|
||||||
|
|
||||||
// Remove next and previous article URLs at the bottom
|
// Remove next and previous article URLs at the bottom
|
||||||
$nextprev = $content->find('div[class="blog-post__next-previous-wrapper"]', 0);
|
$nextprev = $content->find('div[class="blog-post__next-previous-wrapper"]', 0);
|
||||||
if ($nextprev)
|
if ($nextprev)
|
||||||
$nextprev->remove();
|
$nextprev->outertext = '';
|
||||||
|
|
||||||
$section = [ $article->find('h3[itemprop="articleSection"]', 0)->plaintext ];
|
$section = [ $article->find('h3[itemprop="articleSection"]', 0)->plaintext ];
|
||||||
|
|
||||||
|
|
|
@ -584,7 +584,7 @@ EOD;
|
||||||
|
|
||||||
foreach($content_filters as $filter) {
|
foreach($content_filters as $filter) {
|
||||||
foreach($content->find($filter) as $subject) {
|
foreach($content->find($filter) as $subject) {
|
||||||
$subject->remove();
|
$subject->outertext = '';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -50,7 +50,7 @@ class HaveIBeenPwnedBridge extends BridgeAbstract {
|
||||||
$permalink = $breach->find('p', 1)->find('a', 0)->href;
|
$permalink = $breach->find('p', 1)->find('a', 0)->href;
|
||||||
|
|
||||||
// Remove permalink
|
// Remove permalink
|
||||||
$breach->find('p', 1)->find('a', 0)->remove();
|
$breach->find('p', 1)->find('a', 0)->outertext = '';
|
||||||
|
|
||||||
$item['title'] = $breach->find('h3', 0)->plaintext . ' - ' . $accounts[1] . ' breached accounts';
|
$item['title'] = $breach->find('h3', 0)->plaintext . ' - ' . $accounts[1] . ' breached accounts';
|
||||||
$item['dateAdded'] = strtotime($dateAdded[1]);
|
$item['dateAdded'] = strtotime($dateAdded[1]);
|
||||||
|
|
|
@ -239,16 +239,16 @@ class JustETFBridge extends BridgeAbstract {
|
||||||
or returnServerError('Article body not found!');
|
or returnServerError('Article body not found!');
|
||||||
|
|
||||||
// Remove teaser image
|
// Remove teaser image
|
||||||
$element->find('img.teaser-img', 0)->remove();
|
$element->find('img.teaser-img', 0)->outertext = '';
|
||||||
|
|
||||||
// Remove self advertisements
|
// Remove self advertisements
|
||||||
foreach($element->find('.call-action') as $adv) {
|
foreach($element->find('.call-action') as $adv) {
|
||||||
$adv->remove();
|
$adv->outertext = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove tips
|
// Remove tips
|
||||||
foreach($element->find('.panel-edu') as $tip) {
|
foreach($element->find('.panel-edu') as $tip) {
|
||||||
$tip->remove();
|
$tip->outertext = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove inline scripts (used for i.e. interactive graphs) as they are
|
// Remove inline scripts (used for i.e. interactive graphs) as they are
|
||||||
|
@ -318,7 +318,7 @@ class JustETFBridge extends BridgeAbstract {
|
||||||
$description = $description->parent();
|
$description = $description->parent();
|
||||||
|
|
||||||
foreach($description->find('div') as $div) {
|
foreach($description->find('div') as $div) {
|
||||||
$div->remove();
|
$div->outertext = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
$quote = $html->find('div.infobox div.val', 0)
|
$quote = $html->find('div.infobox div.val', 0)
|
||||||
|
|
|
@ -61,7 +61,7 @@ class NextgovBridge extends FeedExpander {
|
||||||
return 'Could not request Nextgov: ' . $url;
|
return 'Could not request Nextgov: ' . $url;
|
||||||
|
|
||||||
$contents = $article->find('div.wysiwyg', 0);
|
$contents = $article->find('div.wysiwyg', 0);
|
||||||
$contents->find('svg.content-tombstone', 0)->remove();
|
$contents->find('svg.content-tombstone', 0)->outertext = '';
|
||||||
$contents = $contents->innertext;
|
$contents = $contents->innertext;
|
||||||
$contents = stripWithDelimiters($contents, '<div class="ad-container">', '</div>');
|
$contents = stripWithDelimiters($contents, '<div class="ad-container">', '</div>');
|
||||||
$contents = stripWithDelimiters($contents, '<div', '</div>'); //ad outer div
|
$contents = stripWithDelimiters($contents, '<div', '</div>'); //ad outer div
|
||||||
|
|
|
@ -51,7 +51,7 @@ class OsmAndBlogBridge extends BridgeAbstract {
|
||||||
|
|
||||||
private function cleanupContent($content, ...$removeItems) {
|
private function cleanupContent($content, ...$removeItems) {
|
||||||
foreach ($removeItems as $obj) {
|
foreach ($removeItems as $obj) {
|
||||||
if ($obj) $obj->remove();
|
if ($obj) $obj->outertext = '';
|
||||||
}
|
}
|
||||||
foreach ($content->find('img') as $obj) {
|
foreach ($content->find('img') as $obj) {
|
||||||
$obj->src = $this->filterURL($obj->src);
|
$obj->src = $this->filterURL($obj->src);
|
||||||
|
|
|
@ -83,7 +83,7 @@ class PikabuBridge extends BridgeAbstract {
|
||||||
|
|
||||||
foreach($el_to_remove_selectors as $el_to_remove_selector) {
|
foreach($el_to_remove_selectors as $el_to_remove_selector) {
|
||||||
foreach($post->find($el_to_remove_selector) as $el) {
|
foreach($post->find($el_to_remove_selector) as $el) {
|
||||||
$el->remove();
|
$el->outertext = '';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -38,17 +38,20 @@ class RadioMelodieBridge extends BridgeAbstract {
|
||||||
$imgs = $textDOM->find('img[src^="http://www.radiomelodie.com/image.php]');
|
$imgs = $textDOM->find('img[src^="http://www.radiomelodie.com/image.php]');
|
||||||
foreach($imgs as $img) {
|
foreach($imgs as $img) {
|
||||||
$img->src = $this->rewriteImage($img->src);
|
$img->src = $this->rewriteImage($img->src);
|
||||||
|
$article->save();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove Google Ads
|
// Remove Google Ads
|
||||||
$ads = $article->find('div[class=adInline]');
|
$ads = $article->find('div[class=adInline]');
|
||||||
foreach($ads as $ad) {
|
foreach($ads as $ad) {
|
||||||
$ad->remove();
|
$ad->outertext = '';
|
||||||
|
$article->save();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove Radio Melodie Logo
|
// Remove Radio Melodie Logo
|
||||||
$logoHTML = $article->find('div[id=logoArticleRM]', 0);
|
$logoHTML = $article->find('div[id=logoArticleRM]', 0);
|
||||||
$logoHTML->remove();
|
$logoHTML->outertext = '';
|
||||||
|
$article->save();
|
||||||
|
|
||||||
$author = $article->find('p[class=AuthorName]', 0)->plaintext;
|
$author = $article->find('p[class=AuthorName]', 0)->plaintext;
|
||||||
|
|
||||||
|
@ -62,7 +65,8 @@ class RadioMelodieBridge extends BridgeAbstract {
|
||||||
$header = '<img src="' . $picture[0] . '"/>';
|
$header = '<img src="' . $picture[0] . '"/>';
|
||||||
|
|
||||||
// Remove the Date and Author part
|
// Remove the Date and Author part
|
||||||
$textDOM->find('div[class=AuthorDate]', 0)->remove();
|
$textDOM->find('div[class=AuthorDate]', 0)->outertext = '';
|
||||||
|
$article->save();
|
||||||
$text = $textDOM->innertext;
|
$text = $textDOM->innertext;
|
||||||
$item['content'] = '<h1>' . $item['title'] . '</h1>' . $date . '<br/>' . $header . $text;
|
$item['content'] = '<h1>' . $item['title'] . '</h1>' . $date . '<br/>' . $header . $text;
|
||||||
$this->items[] = $item;
|
$this->items[] = $item;
|
||||||
|
|
|
@ -48,7 +48,7 @@ class SIMARBridge extends BridgeAbstract {
|
||||||
foreach($e_item->find('p') as $paragraph) {
|
foreach($e_item->find('p') as $paragraph) {
|
||||||
/* Remove empty paragraphs */
|
/* Remove empty paragraphs */
|
||||||
if (preg_match('/^(\W| )+$/', $paragraph->innertext) == 1) {
|
if (preg_match('/^(\W| )+$/', $paragraph->innertext) == 1) {
|
||||||
$paragraph->remove();
|
$paragraph->outertext = '';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ($e_item) {
|
if ($e_item) {
|
||||||
|
|
|
@ -18,7 +18,7 @@ class ScmbBridge extends BridgeAbstract {
|
||||||
$item['title'] = $article->find('header h1 a', 0)->innertext;
|
$item['title'] = $article->find('header h1 a', 0)->innertext;
|
||||||
|
|
||||||
// remove text "En savoir plus" from anecdote content
|
// remove text "En savoir plus" from anecdote content
|
||||||
$article->find('span.read-more', 0)->remove();
|
$article->find('span.read-more', 0)->outertext = '';
|
||||||
$content = $article->find('p.summary a', 0)->innertext;
|
$content = $article->find('p.summary a', 0)->innertext;
|
||||||
|
|
||||||
// remove superfluous spaces at the end
|
// remove superfluous spaces at the end
|
||||||
|
|
|
@ -171,7 +171,7 @@ class TwitterBridge extends BridgeAbstract {
|
||||||
|
|
||||||
// remove 'invisible' content
|
// remove 'invisible' content
|
||||||
foreach($tweet->find('.invisible') as $invisible) {
|
foreach($tweet->find('.invisible') as $invisible) {
|
||||||
$invisible->remove();
|
$invisible->outertext = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip protmoted tweets
|
// Skip protmoted tweets
|
||||||
|
|
|
@ -62,8 +62,9 @@ class VkBridge extends BridgeAbstract
|
||||||
$this->pageName = htmlspecialchars_decode($pageName);
|
$this->pageName = htmlspecialchars_decode($pageName);
|
||||||
}
|
}
|
||||||
foreach ($html->find('div.replies') as $comment_block) {
|
foreach ($html->find('div.replies') as $comment_block) {
|
||||||
$comment_block->remove();
|
$comment_block->outertext = '';
|
||||||
}
|
}
|
||||||
|
$html->load($html->save());
|
||||||
|
|
||||||
$pinned_post_item = null;
|
$pinned_post_item = null;
|
||||||
$last_post_id = 0;
|
$last_post_id = 0;
|
||||||
|
@ -81,7 +82,7 @@ class VkBridge extends BridgeAbstract
|
||||||
|
|
||||||
if (is_object($post->find('a.wall_post_more', 0))) {
|
if (is_object($post->find('a.wall_post_more', 0))) {
|
||||||
//delete link "show full" in content
|
//delete link "show full" in content
|
||||||
$post->find('a.wall_post_more', 0)->remove();
|
$post->find('a.wall_post_more', 0)->outertext = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
$content_suffix = '';
|
$content_suffix = '';
|
||||||
|
@ -113,7 +114,7 @@ class VkBridge extends BridgeAbstract
|
||||||
|
|
||||||
foreach($external_link_selectors_to_remove as $sel) {
|
foreach($external_link_selectors_to_remove as $sel) {
|
||||||
if (is_object($post->find($sel, 0))) {
|
if (is_object($post->find($sel, 0))) {
|
||||||
$post->find($sel, 0)->remove();
|
$post->find($sel, 0)->outertext = '';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -139,7 +140,7 @@ class VkBridge extends BridgeAbstract
|
||||||
$content_suffix .= "<br><img src='" . $matches[1] . "'>";
|
$content_suffix .= "<br><img src='" . $matches[1] . "'>";
|
||||||
}
|
}
|
||||||
$content_suffix .= "<br>Article: <a href='$article_link'>$article_title ($article_author)</a>";
|
$content_suffix .= "<br>Article: <a href='$article_link'>$article_title ($article_author)</a>";
|
||||||
$article->remove();
|
$article->outertext = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
// get video on post
|
// get video on post
|
||||||
|
@ -149,7 +150,7 @@ class VkBridge extends BridgeAbstract
|
||||||
$video_title = $video->find('div.post_video_title', 0)->plaintext;
|
$video_title = $video->find('div.post_video_title', 0)->plaintext;
|
||||||
$video_link = $video->find('a.lnk', 0)->getAttribute('href');
|
$video_link = $video->find('a.lnk', 0)->getAttribute('href');
|
||||||
$this->appendVideo($video_title, $video_link, $content_suffix, $post_videos);
|
$this->appendVideo($video_title, $video_link, $content_suffix, $post_videos);
|
||||||
$video->remove();
|
$video->outertext = '';
|
||||||
$main_video_link = $video_link;
|
$main_video_link = $video_link;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -160,14 +161,14 @@ class VkBridge extends BridgeAbstract
|
||||||
if (count($temp) > 1) $video_title = $temp[1];
|
if (count($temp) > 1) $video_title = $temp[1];
|
||||||
$video_link = $a->getAttribute('href');
|
$video_link = $a->getAttribute('href');
|
||||||
if ($video_link != $main_video_link) $this->appendVideo($video_title, $video_link, $content_suffix, $post_videos);
|
if ($video_link != $main_video_link) $this->appendVideo($video_title, $video_link, $content_suffix, $post_videos);
|
||||||
$a->remove();
|
$a->outertext = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
// get all photos
|
// get all photos
|
||||||
foreach($post->find('div.wall_text > a.page_post_thumb_wrap') as $a) {
|
foreach($post->find('div.wall_text > a.page_post_thumb_wrap') as $a) {
|
||||||
$result = $this->getPhoto($a);
|
$result = $this->getPhoto($a);
|
||||||
if ($result == null) continue;
|
if ($result == null) continue;
|
||||||
$a->remove();
|
$a->outertext = '';
|
||||||
$content_suffix .= "<br>$result";
|
$content_suffix .= "<br>$result";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -176,7 +177,7 @@ class VkBridge extends BridgeAbstract
|
||||||
$a = $el->find('.page_album_link', 0);
|
$a = $el->find('.page_album_link', 0);
|
||||||
$album_title = $a->find('.page_album_title_text', 0)->getAttribute('title');
|
$album_title = $a->find('.page_album_title_text', 0)->getAttribute('title');
|
||||||
$album_link = $a->getAttribute('href');
|
$album_link = $a->getAttribute('href');
|
||||||
$el->remove();
|
$el->outertext = '';
|
||||||
$content_suffix .= "<br>Album: <a href='$album_link'>$album_title</a>";
|
$content_suffix .= "<br>Album: <a href='$album_link'>$album_title</a>";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -199,7 +200,7 @@ class VkBridge extends BridgeAbstract
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$a->remove();
|
$a->outertext = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
// get other documents
|
// get other documents
|
||||||
|
@ -216,7 +217,7 @@ class VkBridge extends BridgeAbstract
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$div->remove();
|
$div->outertext = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
// get polls
|
// get polls
|
||||||
|
@ -226,14 +227,14 @@ class VkBridge extends BridgeAbstract
|
||||||
foreach($div->find('div.page_poll_text') as $poll_stat_title) {
|
foreach($div->find('div.page_poll_text') as $poll_stat_title) {
|
||||||
$content_suffix .= '<br>- ' . $poll_stat_title->innertext;
|
$content_suffix .= '<br>- ' . $poll_stat_title->innertext;
|
||||||
}
|
}
|
||||||
$div->remove();
|
$div->outertext = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
// get sign
|
// get sign
|
||||||
$post_author = $pageName;
|
$post_author = $pageName;
|
||||||
foreach($post->find('a.wall_signed_by') as $a) {
|
foreach($post->find('a.wall_signed_by') as $a) {
|
||||||
$post_author = $a->innertext;
|
$post_author = $a->innertext;
|
||||||
$a->remove();
|
$a->outertext = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_object($post->find('div.copy_quote', 0))) {
|
if (is_object($post->find('div.copy_quote', 0))) {
|
||||||
|
@ -242,7 +243,7 @@ class VkBridge extends BridgeAbstract
|
||||||
}
|
}
|
||||||
$copy_quote = $post->find('div.copy_quote', 0);
|
$copy_quote = $post->find('div.copy_quote', 0);
|
||||||
if ($copy_post_header = $copy_quote->find('div.copy_post_header', 0)) {
|
if ($copy_post_header = $copy_quote->find('div.copy_post_header', 0)) {
|
||||||
$copy_post_header->remove();
|
$copy_post_header->outertext = '';
|
||||||
}
|
}
|
||||||
$copy_quote_content = $copy_quote->innertext;
|
$copy_quote_content = $copy_quote->innertext;
|
||||||
$copy_quote->outertext = "<br>Reposted: <br>$copy_quote_content";
|
$copy_quote->outertext = "<br>Reposted: <br>$copy_quote_content";
|
||||||
|
|
|
@ -141,7 +141,7 @@ class WikipediaBridge extends BridgeAbstract {
|
||||||
$anchorFallbackIndex = 0){
|
$anchorFallbackIndex = 0){
|
||||||
// Clean the bottom of the featured article
|
// Clean the bottom of the featured article
|
||||||
if ($element->find('div', -1))
|
if ($element->find('div', -1))
|
||||||
$element->find('div', -1)->remove();
|
$element->find('div', -1)->outertext = '';
|
||||||
|
|
||||||
// The title and URI of the article can be found in an anchor containing
|
// The title and URI of the article can be found in an anchor containing
|
||||||
// the string '...' in most wikis ('full article ...')
|
// the string '...' in most wikis ('full article ...')
|
||||||
|
@ -202,10 +202,10 @@ class WikipediaBridge extends BridgeAbstract {
|
||||||
// Let's remove a couple of things from the article
|
// Let's remove a couple of things from the article
|
||||||
$table = $content->find('#toc', 0); // Table of contents
|
$table = $content->find('#toc', 0); // Table of contents
|
||||||
if(!$table === false)
|
if(!$table === false)
|
||||||
$table->remove();
|
$table->outertext = '';
|
||||||
|
|
||||||
foreach($content->find('ol.references') as $reference) // References
|
foreach($content->find('ol.references') as $reference) // References
|
||||||
$reference->remove();
|
$reference->outertext = '';
|
||||||
|
|
||||||
return str_replace('href="/', 'href="' . $this->getURI() . '/', $content->innertext);
|
return str_replace('href="/', 'href="' . $this->getURI() . '/', $content->innertext);
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,7 +50,7 @@ class WordPressBridge extends FeedExpander {
|
||||||
|
|
||||||
foreach ($article->find('h1.entry-title') as $title)
|
foreach ($article->find('h1.entry-title') as $title)
|
||||||
if ($title->plaintext == $item['title'])
|
if ($title->plaintext == $item['title'])
|
||||||
$title->remove();
|
$title->outertext = '';
|
||||||
|
|
||||||
$article_image = $article_html->find('img.wp-post-image', 0);
|
$article_image = $article_html->find('img.wp-post-image', 0);
|
||||||
if(!empty($item['content']) && (!is_object($article_image) || empty($article_image->src))) {
|
if(!empty($item['content']) && (!is_object($article_image) || empty($article_image->src))) {
|
||||||
|
|
|
@ -44,7 +44,7 @@ class WorldOfTanksBridge extends FeedExpander {
|
||||||
|
|
||||||
// Remove the scripts, please
|
// Remove the scripts, please
|
||||||
foreach($content->find('script') as $script) {
|
foreach($content->find('script') as $script) {
|
||||||
$script->remove();
|
$script->outertext = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
return $content->innertext;
|
return $content->innertext;
|
||||||
|
|
|
@ -193,7 +193,7 @@ class XenForoBridge extends BridgeAbstract {
|
||||||
|
|
||||||
// Remove script tags
|
// Remove script tags
|
||||||
foreach($content->find('script') as $script) {
|
foreach($content->find('script') as $script) {
|
||||||
$script->remove();
|
$script->outertext = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
$item['content'] = $content->innertext;
|
$item['content'] = $content->innertext;
|
||||||
|
|
|
@ -36,7 +36,7 @@ function sanitize($html,
|
||||||
if(in_array($element->tag, $text_to_keep)) {
|
if(in_array($element->tag, $text_to_keep)) {
|
||||||
$element->outertext = $element->plaintext;
|
$element->outertext = $element->plaintext;
|
||||||
} elseif(in_array($element->tag, $tags_to_remove)) {
|
} elseif(in_array($element->tag, $tags_to_remove)) {
|
||||||
$element->remove();
|
$element->outertext = '';
|
||||||
} else {
|
} else {
|
||||||
foreach($element->getAllAttributes() as $attributeName => $attribute) {
|
foreach($element->getAllAttributes() as $attributeName => $attribute) {
|
||||||
if(!in_array($attributeName, $attributes_to_keep))
|
if(!in_array($attributeName, $attributes_to_keep))
|
||||||
|
|
Loading…
Reference in a new issue