From 291e8c2a2374a6f7ac34c352295bc0fdccba4992 Mon Sep 17 00:00:00 2001 From: sysadminstory Date: Thu, 4 Apr 2019 22:39:39 +0200 Subject: [PATCH 1/2] [AutoJMBridge] Fix bridge after website change (#1081) * [AutoJMBridge] Fix bridge after website change The website was totally reworked, so the bridge had to be reworked too. The bridge parameters changed, therefore old RSS feed will not work anymore, but it was impossible to do it in another way. --- bridges/AutoJMBridge.php | 194 +++++++++++++++++++++++++++++++-------- 1 file changed, 157 insertions(+), 37 deletions(-) diff --git a/bridges/AutoJMBridge.php b/bridges/AutoJMBridge.php index 598f0431..091d64a1 100644 --- a/bridges/AutoJMBridge.php +++ b/bridges/AutoJMBridge.php @@ -3,63 +3,183 @@ class AutoJMBridge extends BridgeAbstract { const NAME = 'AutoJM'; - const URI = 'http://www.autojm.fr/'; + const URI = 'https://www.autojm.fr/'; const DESCRIPTION = 'Suivre les offres de véhicules proposés par AutoJM en fonction des critères de filtrages'; const MAINTAINER = 'sysadminstory'; const PARAMETERS = array( 'Afficher les offres de véhicules disponible en fonction des critères du site AutoJM' => array( 'url' => array( - 'name' => 'URL de la recherche', + 'name' => 'URL du modèle', 'type' => 'text', 'required' => true, 'title' => 'URL d\'une recherche avec filtre de véhicules sans le http://www.autojm.fr/', - 'exampleValue' => 'gammes/index/398?order_by=finition_asc&energie[]=3&transmission[]=2&dispo=all' + 'exampleValue' => 'achat-voitures-neuves-peugeot-nouvelle-308-5p' + ), + 'isDispo' => array( + 'name' => 'Disponibilité', + 'type' => 'list', + 'values' => array( + '-' => '', + 'En stock' => 1, + 'Sur commande' => 0 + ), + 'title' => 'Critère de disponibilité' + ), + 'energy' => array( + 'name' => 'Carburant', + 'type' => 'list', + 'values' => array( + '-' => '', + 'Diesel' => 1, + 'Essence' => 3, + 'Hybride' => 5 + ), + 'title' => 'Carburant' + ), + 'transmission' => array( + 'name' => 'Transmission', + 'type' => 'list', + 'values' => array( + '-' => '', + 'Automatique' => 1, + 'Manuelle' => 2 + ), + 'title' => 'Transmission' + ), + 'priceMin' => array( + 'name' => 'Prix minimum', + 'type' => 'number', + 'required' => false, + 'title' => 'Prix minimum du véhicule', + 'exampleValue' => '10000', + 'defaultValue' => '0' + ), + 'priceMax' => array( + 'name' => 'Prix maximum', + 'type' => 'number', + 'required' => false, + 'title' => 'Prix maximum du véhicule', + 'exampleValue' => '15000', + 'defaultValue' => '150000' ) ) ); const CACHE_TIMEOUT = 3600; public function getIcon() { - return self::URI . 'assets/images/favicon.ico'; + return self::URI . 'favicon.ico'; } public function collectData() { - $html = getSimpleHTMLDOM(self::URI . $this->getInput('url')) + + $model_url = self::URI . $this->getInput('url'); + + // Get the session cookies and the form token + $this->getInitialParameters($model_url); + + // Build the form + $post_data = array( + 'form[isDispo]' => $this->getInput('isDispo'), + 'form[energy]' => $this->getInput('energy'), + 'form[transmission]' => $this->getInput('transmission'), + 'form[priceMin]' => $this->getInput('priceMin'), + 'form[priceMin]' => $this->getInput('priceMin'), + 'form[_token]' => $this->token + ); + + // Set the Form request content type + $header = array( + 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8', + ); + + // Set the curl options (POST query and content, and session cookies + $curl_opts = array( + CURLOPT_POST => true, + CURLOPT_POSTFIELDS => http_build_query($post_data), + CURLOPT_COOKIE => $this->cookies + ); + + // Get the JSON content of the form + $json = getContents($model_url, $header, $curl_opts) or returnServerError('Could not request AutoJM.'); - $list = $html->find('div[class*=ligne_modele]'); - foreach($list as $element) { - $image = $element->find('img[class=width-100]', 0)->src; - $serie = $element->find('div[class=serie]', 0)->find('span', 0)->plaintext; - $url = $element->find('div[class=serie]', 0)->find('a[class=btn_ligne color-black]', 0)->href; - if($element->find('div[class*=hasStock-info]', 0) != null) { - $dispo = 'Disponible'; - } else { - $dispo = 'Sur commande'; + + // Extract the HTML content from the JSON result + $data = json_decode($json); + $html = str_get_html($data->content); + + // Go through every finisha of the model + $list = $html->find('h2'); + foreach ($list as $finish) { + $finish_name = $finish->plaintext; + $motorizations = $finish->next_sibling()->find('li'); + foreach ($motorizations as $element) { + $image = $element->find('div[class=block-product-image]', 0)->{'data-ga-banner'}; + $serie = $element->find('span[class=model]', 0)->plaintext; + $url = self::URI . substr($element->find('a', 0)->href, 1); + if ($element->find('span[class*=block-product-nbModel]', 0) != null) { + $availability = 'En Stock'; + } else { + $availability = 'Sur commande'; + } + $discount_html = $element->find('span[class*=tag--promo]', 0); + if ($discount_html != null) { + $discount = $discount_html->plaintext; + } else { + $discount = 'inconnue'; + } + $price = $element->find('span[class=price red h1]', 0)->plaintext; + $item = array(); + $item['title'] = $finish_name . ' ' . $serie; + $item['content'] = '

' + . $finish_name . ' ' . $serie . '

'; + $item['content'] .= ''; + + // Add a fictionnal anchor to the RSS element URL, based on the item content ; + // As the URL could be identical even if the price change, some RSS reader will not show those offers as new items + $item['uri'] = $url . '#' . md5($item['content']); + + $this->items[] = $item; } - $carburant = str_replace('dispo |', '', $element->find('div[class=carburant]', 0)->plaintext); - $transmission = $element->find('div[class*=bv]', 0)->plaintext; - $places = $element->find('div[class*=places]', 0)->plaintext; - $portes = $element->find('div[class*=nb_portes]', 0)->plaintext; - $carosserie = $element->find('div[class*=coloris]', 0)->plaintext; - $remise = $element->find('div[class*=remise]', 0)->plaintext; - $prix = $element->find('div[class*=prixjm]', 0)->plaintext; - - $item = array(); - $item['uri'] = $url; - $item['title'] = $serie; - $item['content'] = '

' . $serie . '

'; - $item['content'] .= ''; - - $this->items[] = $item; } + } + + /** + * Gets the session cookie and the form token + * + * @param string $pageURL The URL from which to get the values + */ + private function getInitialParameters($pageURL) { + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $pageURL); + curl_setopt($ch, CURLOPT_HEADER, true); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + $data = curl_exec($ch); + + // Separate the response header and the content + $headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE); + $header = substr($data, 0, $headerSize); + $content = substr($data, $headerSize); + curl_close($ch); + + // Extract the cookies from the headers + $cookies = ''; + $http_response_header = explode("\r\n", $header); + foreach ($http_response_header as $hdr) { + if (strpos($hdr, 'Set-Cookie') !== false) { + $cLine = explode(':', $hdr)[1]; + $cLine = explode(';', $cLine)[0]; + $cookies .= ';' . $cLine; + } + } + $this->cookies = trim(substr($cookies, 1)); + + // Get the token from the content + $html = str_get_html($content); + $token = $html->find('input[type=hidden][id=form__token]', 0); + $this->token = $token->value; } } From 966d450d2703a51028deaa79eb7f92d388294498 Mon Sep 17 00:00:00 2001 From: Thibault Couraud <1036233+couraudt@users.noreply.github.com> Date: Thu, 4 Apr 2019 20:44:44 +0000 Subject: [PATCH 2/2] [FindACrew] Update bridge according new findacrew.net website (#1080) * update bridge according new crewbay.com website --- bridges/FindACrewBridge.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bridges/FindACrewBridge.php b/bridges/FindACrewBridge.php index c245c84a..1dac775a 100644 --- a/bridges/FindACrewBridge.php +++ b/bridges/FindACrewBridge.php @@ -62,10 +62,10 @@ class FindACrewBridge extends BridgeAbstract { foreach ($annonces as $annonce) { $item = array(); - $img = parent::getURI() . $annonce->find('.css_LstPic img', 0)->getAttribute('src'); - $item['title'] = $annonce->find('.css_LstCtrls span', 0)->plaintext; - $item['uri'] = parent::getURI() . $annonce->find('.css_PnlCtrls a', 0)->href; - $content = $annonce->find('.css_LstDtl div', 2)->innertext; + $img = parent::getURI() . $annonce->find('.lst-pic img', 0)->getAttribute('src'); + $item['title'] = $annonce->find('.lst-tags span', 0)->plaintext; + $item['uri'] = parent::getURI() . $annonce->find('.lst-ctrls a', 0)->href; + $content = $annonce->find('.lst-dtl', 0)->innertext; $item['content'] = "
$content"; $item['enclosures'] = array($img); $item['categories'] = array($annonce->find('.css_AccLocCur', 0)->plaintext);