From c7479558fd790396f07a02c342aec8645132e06b Mon Sep 17 00:00:00 2001 From: Ryan Senanayake Date: Wed, 10 Mar 2021 23:44:19 -0500 Subject: [PATCH] update products search to new /s/query endpoint (old endpoint returns 503) --- bin/cli.js | 0 lib/Amazon.js | 17 +++++++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) mode change 100644 => 100755 bin/cli.js diff --git a/bin/cli.js b/bin/cli.js old mode 100644 new mode 100755 diff --git a/lib/Amazon.js b/lib/Amazon.js index 044da27..2e13122 100644 --- a/lib/Amazon.js +++ b/lib/Amazon.js @@ -259,12 +259,15 @@ class AmazonScraper { throw new Error('Done'); } if (this.scrapeType === 'products') { - let totalResultCount = body.match(/"totalResultCount":\w+(.[0-9])/gm); + let body_parts = body.split('"html" : "').slice(1); + body_parts = body_parts.map((part) => part.split('",\n')[0]); + let body_html = body_parts.join(''); + let totalResultCount = body_html.match(/"totalResultCount":\w+(.[0-9])/gm); if (totalResultCount) { this.totalProducts = totalResultCount[0].split('totalResultCount":')[1]; } - this.grabProduct(body, item); + this.grabProduct(body_html, item); } if (this.scrapeType === 'reviews') { this.grabReviews(body); @@ -352,7 +355,7 @@ class AmazonScraper { get setRequestEndpoint() { switch (this.scrapeType) { case 'products': - return 's'; + return 's/query'; case 'reviews': return `product-reviews/${this.asin}/ref=cm_cr_arp_d_viewopt_srt?formatType=${ CONST.reviewFilter.formatType[this.reviewFilter.formatType] @@ -1156,7 +1159,9 @@ class AmazonScraper { * @param {*} body */ grabProduct(body, p) { - const $ = cheerio.load(body.replace(/\s\s+/g, '').replace(/\n/g, '')); + const body_replaced = body.replace(/\s\s+/g, '').replace(/\\n/g, '').replace(/\\"/g,'"'); + const $ = cheerio.load(body_replaced); + let productList = $('div[data-index]'); const scrapingResult = {}; @@ -1246,12 +1251,12 @@ class AmazonScraper { } if (ratingSearch) { - scrapingResult[key].reviews.rating = parseFloat(ratingSearch.children[0].children[0].data); + const ratingText = ratingSearch.children[0].children[0].data; + scrapingResult[key].reviews.rating = parseFloat(ratingText.split(' out of')[0]); scrapingResult[key].reviews.total_reviews = parseInt( ratingSearch.parent.parent.parent.next.attribs['aria-label'].replace(/\,/g, ''), ); - scrapingResult[key].score = parseFloat(scrapingResult[key].reviews.rating * scrapingResult[key].reviews.total_reviews).toFixed(2); }