From 8f9a385b4deedb8e5f3cd70d0404f562b937b94e Mon Sep 17 00:00:00 2001 From: Nemo Date: Wed, 1 Aug 2018 01:14:37 +0530 Subject: [PATCH] [AmazonPriceTrackerBridge] Improve Amazon scraper logic (#761) - Now works on all websites, and even with products with multiple prices - Closes #750 --- bridges/AmazonPriceTrackerBridge.php | 68 ++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 15 deletions(-) diff --git a/bridges/AmazonPriceTrackerBridge.php b/bridges/AmazonPriceTrackerBridge.php index dd352af1..e31a03bb 100644 --- a/bridges/AmazonPriceTrackerBridge.php +++ b/bridges/AmazonPriceTrackerBridge.php @@ -92,6 +92,14 @@ class AmazonPriceTrackerBridge extends BridgeAbstract { } } + private function parseDynamicImage($attribute) { + $json = json_decode(html_entity_decode($attribute), true); + + if ($json and count($json) > 0) { + return array_keys($json)[0]; + } + } + /** * Returns a generated image tag for the product */ @@ -99,11 +107,15 @@ class AmazonPriceTrackerBridge extends BridgeAbstract { $imageSrc = $html->find('#main-image-container img', 0); if ($imageSrc) { - $imageSrc = $imageSrc ? $imageSrc->getAttribute('data-old-hires') : ''; - return << -EOT; + $hiresImage = $imageSrc->getAttribute('data-old-hires'); + $dynamicImageAttribute = $imageSrc->getAttribute('data-a-dynamic-image'); + $image = $hiresImage ?: $this->parseDynamicImage($dynamicImageAttribute); } + $image = $image ?: 'https://placekitten.com/200/300'; + + return << +EOT; } /** @@ -116,6 +128,39 @@ EOT; return getSimpleHTMLDOM($uri) ?: returnServerError('Could not request Amazon.'); } + private function scrapePriceFromMetrics($html) { + $asinData = $html->find('#cerberus-data-metrics', 0); + + //