diff --git a/bridges/AsahiShimbunAJWBridge.php b/bridges/AsahiShimbunAJWBridge.php index 0ceb0381..62b9739d 100644 --- a/bridges/AsahiShimbunAJWBridge.php +++ b/bridges/AsahiShimbunAJWBridge.php @@ -50,18 +50,18 @@ class AsahiShimbunAJWBridge extends BridgeAbstract { $e_lead = $element->find('span.Lead', 0); if ($e_lead) { $item['content'] = $e_lead->innertext; - $e_lead->outertext = ''; + $e_lead->remove(); } else { $item['content'] = $element->innertext; } $e_date = $element->find('span.EnDate', 0); if ($e_date) { $item['timestamp'] = strtotime($e_date->innertext); - $e_date->outertext = ''; + $e_date->remove(); } $e_video = $element->find('span.EnVideo', 0); if ($e_video) { - $e_video->outertext = ''; + $e_video->remove(); $element->innertext = "VIDEO: $element->innertext"; } $item['title'] = $element->innertext; diff --git a/bridges/BundesbankBridge.php b/bridges/BundesbankBridge.php index b64a6425..d78873c6 100644 --- a/bridges/BundesbankBridge.php +++ b/bridges/BundesbankBridge.php @@ -55,7 +55,7 @@ class BundesbankBridge extends BridgeAbstract { $title = $study->find('.teasable__title div.h2', 0); foreach($title->children as &$child) { - $child->outertext = ''; + $child->remove(); } $item['title'] = $title->innertext; diff --git a/bridges/CastorusBridge.php b/bridges/CastorusBridge.php index 3ed1331e..48af9696 100644 --- a/bridges/CastorusBridge.php +++ b/bridges/CastorusBridge.php @@ -58,7 +58,7 @@ class CastorusBridge extends BridgeAbstract { returnServerError('Cannot find nodes!'); foreach($nodes as $node) { - $node->outertext = ''; + $node->remove(); } return strtotime($activity->innertext); diff --git a/bridges/DauphineLibereBridge.php b/bridges/DauphineLibereBridge.php index 20c82070..1ff25106 100644 --- a/bridges/DauphineLibereBridge.php +++ b/bridges/DauphineLibereBridge.php @@ -50,7 +50,7 @@ class DauphineLibereBridge extends FeedExpander { private function extractContent($url){ $html2 = getSimpleHTMLDOMCached($url); foreach ($html2->find('.noprint, link, script, iframe, .shareTool, .contentInfo') as $remove) { - $remove->outertext = ''; + $remove->remove(); } return $html2->find('div.content', 0)->innertext; } diff --git a/bridges/EconomistBridge.php b/bridges/EconomistBridge.php index 1256be45..19b2a832 100644 --- a/bridges/EconomistBridge.php +++ b/bridges/EconomistBridge.php @@ -29,16 +29,16 @@ class EconomistBridge extends BridgeAbstract { // Remove newsletter subscription box $newsletter = $content->find('div[class="newsletter-form__message"]', 0); if ($newsletter) - $newsletter->outertext = ''; + $newsletter->remove(); $newsletterForm = $content->find('form', 0); if ($newsletterForm) - $newsletterForm->outertext = ''; + $newsletterForm->remove(); // Remove next and previous article URLs at the bottom $nextprev = $content->find('div[class="blog-post__next-previous-wrapper"]', 0); if ($nextprev) - $nextprev->outertext = ''; + $nextprev->remove(); $section = [ $article->find('h3[itemprop="articleSection"]', 0)->plaintext ]; diff --git a/bridges/FacebookBridge.php b/bridges/FacebookBridge.php index c0901072..a0331da9 100644 --- a/bridges/FacebookBridge.php +++ b/bridges/FacebookBridge.php @@ -584,7 +584,7 @@ EOD; foreach($content_filters as $filter) { foreach($content->find($filter) as $subject) { - $subject->outertext = ''; + $subject->remove(); } } diff --git a/bridges/HaveIBeenPwnedBridge.php b/bridges/HaveIBeenPwnedBridge.php index f256623a..8fac1e33 100644 --- a/bridges/HaveIBeenPwnedBridge.php +++ b/bridges/HaveIBeenPwnedBridge.php @@ -50,7 +50,7 @@ class HaveIBeenPwnedBridge extends BridgeAbstract { $permalink = $breach->find('p', 1)->find('a', 0)->href; // Remove permalink - $breach->find('p', 1)->find('a', 0)->outertext = ''; + $breach->find('p', 1)->find('a', 0)->remove(); $item['title'] = $breach->find('h3', 0)->plaintext . ' - ' . $accounts[1] . ' breached accounts'; $item['dateAdded'] = strtotime($dateAdded[1]); diff --git a/bridges/JustETFBridge.php b/bridges/JustETFBridge.php index 8d5b3d5a..c9201e4b 100644 --- a/bridges/JustETFBridge.php +++ b/bridges/JustETFBridge.php @@ -239,16 +239,16 @@ class JustETFBridge extends BridgeAbstract { or returnServerError('Article body not found!'); // Remove teaser image - $element->find('img.teaser-img', 0)->outertext = ''; + $element->find('img.teaser-img', 0)->remove(); // Remove self advertisements foreach($element->find('.call-action') as $adv) { - $adv->outertext = ''; + $adv->remove(); } // Remove tips foreach($element->find('.panel-edu') as $tip) { - $tip->outertext = ''; + $tip->remove(); } // Remove inline scripts (used for i.e. interactive graphs) as they are @@ -318,7 +318,7 @@ class JustETFBridge extends BridgeAbstract { $description = $description->parent(); foreach($description->find('div') as $div) { - $div->outertext = ''; + $div->remove(); } $quote = $html->find('div.infobox div.val', 0) diff --git a/bridges/NextgovBridge.php b/bridges/NextgovBridge.php index 74bfc54a..5e393457 100644 --- a/bridges/NextgovBridge.php +++ b/bridges/NextgovBridge.php @@ -61,7 +61,7 @@ class NextgovBridge extends FeedExpander { return 'Could not request Nextgov: ' . $url; $contents = $article->find('div.wysiwyg', 0); - $contents->find('svg.content-tombstone', 0)->outertext = ''; + $contents->find('svg.content-tombstone', 0)->remove(); $contents = $contents->innertext; $contents = stripWithDelimiters($contents, '
', '
'); $contents = stripWithDelimiters($contents, ''); //ad outer div diff --git a/bridges/OsmAndBlogBridge.php b/bridges/OsmAndBlogBridge.php index 402c0301..25e765f5 100644 --- a/bridges/OsmAndBlogBridge.php +++ b/bridges/OsmAndBlogBridge.php @@ -51,7 +51,7 @@ class OsmAndBlogBridge extends BridgeAbstract { private function cleanupContent($content, ...$removeItems) { foreach ($removeItems as $obj) { - if ($obj) $obj->outertext = ''; + if ($obj) $obj->remove(); } foreach ($content->find('img') as $obj) { $obj->src = $this->filterURL($obj->src); diff --git a/bridges/PikabuBridge.php b/bridges/PikabuBridge.php index af603aca..1e1d5c8e 100644 --- a/bridges/PikabuBridge.php +++ b/bridges/PikabuBridge.php @@ -63,7 +63,7 @@ class PikabuBridge extends BridgeAbstract { foreach($el_to_remove_selectors as $el_to_remove_selector) { foreach($post->find($el_to_remove_selector) as $el) { - $el->outertext = ''; + $el->remove(); } } diff --git a/bridges/RadioMelodieBridge.php b/bridges/RadioMelodieBridge.php index fb5aca6e..8e2cf05d 100644 --- a/bridges/RadioMelodieBridge.php +++ b/bridges/RadioMelodieBridge.php @@ -38,20 +38,17 @@ class RadioMelodieBridge extends BridgeAbstract { $imgs = $textDOM->find('img[src^="http://www.radiomelodie.com/image.php]'); foreach($imgs as $img) { $img->src = $this->rewriteImage($img->src); - $article->save(); } // Remove Google Ads $ads = $article->find('div[class=adInline]'); foreach($ads as $ad) { - $ad->outertext = ''; - $article->save(); + $ad->remove(); } // Remove Radio Melodie Logo $logoHTML = $article->find('div[id=logoArticleRM]', 0); - $logoHTML->outertext = ''; - $article->save(); + $logoHTML->remove(); $author = $article->find('p[class=AuthorName]', 0)->plaintext; @@ -65,8 +62,7 @@ class RadioMelodieBridge extends BridgeAbstract { $header = ''; // Remove the Date and Author part - $textDOM->find('div[class=AuthorDate]', 0)->outertext = ''; - $article->save(); + $textDOM->find('div[class=AuthorDate]', 0)->remove(); $text = $textDOM->innertext; $item['content'] = '

' . $item['title'] . '

' . $date . '
' . $header . $text; $this->items[] = $item; diff --git a/bridges/SIMARBridge.php b/bridges/SIMARBridge.php index 1e446cf5..41d517b4 100644 --- a/bridges/SIMARBridge.php +++ b/bridges/SIMARBridge.php @@ -48,7 +48,7 @@ class SIMARBridge extends BridgeAbstract { foreach($e_item->find('p') as $paragraph) { /* Remove empty paragraphs */ if (preg_match('/^(\W| )+$/', $paragraph->innertext) == 1) { - $paragraph->outertext = ''; + $paragraph->remove(); } } if ($e_item) { diff --git a/bridges/ScmbBridge.php b/bridges/ScmbBridge.php index 2107aa3d..65fbbf01 100644 --- a/bridges/ScmbBridge.php +++ b/bridges/ScmbBridge.php @@ -18,7 +18,7 @@ class ScmbBridge extends BridgeAbstract { $item['title'] = $article->find('header h1 a', 0)->innertext; // remove text "En savoir plus" from anecdote content - $article->find('span.read-more', 0)->outertext = ''; + $article->find('span.read-more', 0)->remove(); $content = $article->find('p.summary a', 0)->innertext; // remove superfluous spaces at the end diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index b3b7bed4..f3ba39c1 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -171,7 +171,7 @@ class TwitterBridge extends BridgeAbstract { // remove 'invisible' content foreach($tweet->find('.invisible') as $invisible) { - $invisible->outertext = ''; + $invisible->remove(); } // Skip protmoted tweets diff --git a/bridges/VkBridge.php b/bridges/VkBridge.php index 8653e7c9..5274180f 100644 --- a/bridges/VkBridge.php +++ b/bridges/VkBridge.php @@ -62,9 +62,8 @@ class VkBridge extends BridgeAbstract $this->pageName = htmlspecialchars_decode($pageName); } foreach ($html->find('div.replies') as $comment_block) { - $comment_block->outertext = ''; + $comment_block->remove(); } - $html->load($html->save()); $pinned_post_item = null; $last_post_id = 0; @@ -82,7 +81,7 @@ class VkBridge extends BridgeAbstract if (is_object($post->find('a.wall_post_more', 0))) { //delete link "show full" in content - $post->find('a.wall_post_more', 0)->outertext = ''; + $post->find('a.wall_post_more', 0)->remove(); } $content_suffix = ''; @@ -114,7 +113,7 @@ class VkBridge extends BridgeAbstract foreach($external_link_selectors_to_remove as $sel) { if (is_object($post->find($sel, 0))) { - $post->find($sel, 0)->outertext = ''; + $post->find($sel, 0)->remove(); } } @@ -140,7 +139,7 @@ class VkBridge extends BridgeAbstract $content_suffix .= "
"; } $content_suffix .= "
Article: $article_title ($article_author)"; - $article->outertext = ''; + $article->remove(); } // get video on post @@ -150,7 +149,7 @@ class VkBridge extends BridgeAbstract $video_title = $video->find('div.post_video_title', 0)->plaintext; $video_link = $video->find('a.lnk', 0)->getAttribute('href'); $this->appendVideo($video_title, $video_link, $content_suffix, $post_videos); - $video->outertext = ''; + $video->remove(); $main_video_link = $video_link; } @@ -161,14 +160,14 @@ class VkBridge extends BridgeAbstract if (count($temp) > 1) $video_title = $temp[1]; $video_link = $a->getAttribute('href'); if ($video_link != $main_video_link) $this->appendVideo($video_title, $video_link, $content_suffix, $post_videos); - $a->outertext = ''; + $a->remove(); } // get all photos foreach($post->find('div.wall_text > a.page_post_thumb_wrap') as $a) { $result = $this->getPhoto($a); if ($result == null) continue; - $a->outertext = ''; + $a->remove(); $content_suffix .= "
$result"; } @@ -177,7 +176,7 @@ class VkBridge extends BridgeAbstract $a = $el->find('.page_album_link', 0); $album_title = $a->find('.page_album_title_text', 0)->getAttribute('title'); $album_link = $a->getAttribute('href'); - $el->outertext = ''; + $el->remove(); $content_suffix .= "
Album: $album_title"; } @@ -200,7 +199,7 @@ class VkBridge extends BridgeAbstract } - $a->outertext = ''; + $a->remove(); } // get other documents @@ -217,7 +216,7 @@ class VkBridge extends BridgeAbstract } - $div->outertext = ''; + $div->remove(); } // get polls @@ -227,14 +226,14 @@ class VkBridge extends BridgeAbstract foreach($div->find('div.page_poll_text') as $poll_stat_title) { $content_suffix .= '
- ' . $poll_stat_title->innertext; } - $div->outertext = ''; + $div->remove(); } // get sign $post_author = $pageName; foreach($post->find('a.wall_signed_by') as $a) { $post_author = $a->innertext; - $a->outertext = ''; + $a->remove(); } if (is_object($post->find('div.copy_quote', 0))) { @@ -243,7 +242,7 @@ class VkBridge extends BridgeAbstract } $copy_quote = $post->find('div.copy_quote', 0); if ($copy_post_header = $copy_quote->find('div.copy_post_header', 0)) { - $copy_post_header->outertext = ''; + $copy_post_header->remove(); } $copy_quote_content = $copy_quote->innertext; $copy_quote->outertext = "
Reposted:
$copy_quote_content"; diff --git a/bridges/WikipediaBridge.php b/bridges/WikipediaBridge.php index 7ca763fc..a53652dd 100644 --- a/bridges/WikipediaBridge.php +++ b/bridges/WikipediaBridge.php @@ -141,7 +141,7 @@ class WikipediaBridge extends BridgeAbstract { $anchorFallbackIndex = 0){ // Clean the bottom of the featured article if ($element->find('div', -1)) - $element->find('div', -1)->outertext = ''; + $element->find('div', -1)->remove(); // The title and URI of the article can be found in an anchor containing // the string '...' in most wikis ('full article ...') @@ -202,10 +202,10 @@ class WikipediaBridge extends BridgeAbstract { // Let's remove a couple of things from the article $table = $content->find('#toc', 0); // Table of contents if(!$table === false) - $table->outertext = ''; + $table->remove(); foreach($content->find('ol.references') as $reference) // References - $reference->outertext = ''; + $reference->remove(); return str_replace('href="/', 'href="' . $this->getURI() . '/', $content->innertext); } diff --git a/bridges/WordPressBridge.php b/bridges/WordPressBridge.php index 1589c723..18045559 100644 --- a/bridges/WordPressBridge.php +++ b/bridges/WordPressBridge.php @@ -50,7 +50,7 @@ class WordPressBridge extends FeedExpander { foreach ($article->find('h1.entry-title') as $title) if ($title->plaintext == $item['title']) - $title->outertext = ''; + $title->remove(); $article_image = $article_html->find('img.wp-post-image', 0); if(!empty($item['content']) && (!is_object($article_image) || empty($article_image->src))) { diff --git a/bridges/WorldOfTanksBridge.php b/bridges/WorldOfTanksBridge.php index 46dd588d..a5fa0446 100644 --- a/bridges/WorldOfTanksBridge.php +++ b/bridges/WorldOfTanksBridge.php @@ -44,7 +44,7 @@ class WorldOfTanksBridge extends FeedExpander { // Remove the scripts, please foreach($content->find('script') as $script) { - $script->outertext = ''; + $script->remove(); } return $content->innertext; diff --git a/bridges/XenForoBridge.php b/bridges/XenForoBridge.php index 7bf1f15d..dc3a1a5e 100644 --- a/bridges/XenForoBridge.php +++ b/bridges/XenForoBridge.php @@ -193,7 +193,7 @@ class XenForoBridge extends BridgeAbstract { // Remove script tags foreach($content->find('script') as $script) { - $script->outertext = ''; + $script->remove(); } $item['content'] = $content->innertext; diff --git a/lib/html.php b/lib/html.php index 13db97a4..49c77f04 100644 --- a/lib/html.php +++ b/lib/html.php @@ -36,7 +36,7 @@ function sanitize($html, if(in_array($element->tag, $text_to_keep)) { $element->outertext = $element->plaintext; } elseif(in_array($element->tag, $tags_to_remove)) { - $element->outertext = ''; + $element->remove(); } else { foreach($element->getAllAttributes() as $attributeName => $attribute) { if(!in_array($attributeName, $attributes_to_keep))