Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions lib/Amazon.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ const spinner = ora('Amazon Scraper Started');
const { Parser } = require('json2csv');
const moment = require('moment');
const { SocksProxyAgent } = require('socks-proxy-agent');

const CONST = require('./constant');

class AmazonScraper {
Expand Down Expand Up @@ -150,7 +149,7 @@ class AmazonScraper {
'user-agent': this.userAgent,
cookie: this.cookie,
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'accept-language': 'en-US,en;q=0.9',
'accept-language': this.geo.acceptLang,
'accept-encoding': 'gzip, deflate, br',
...(this.getReferer ? { referer: this.getReferer } : {}),
...(Math.round(Math.random()) ? { downlink: Math.floor(Math.random() * 30) + 10 } : {}),
Expand Down Expand Up @@ -254,11 +253,14 @@ class AmazonScraper {
/**
* Main loop that collects data
*/

async mainLoop() {
return new Promise((resolve, reject) => {
const iterable = Array.from({ length: this.asyncPage }, (_, k) => k + 1);

forEachLimit(
Array.from({ length: this.asyncPage }, (_, k) => k + 1),
this.asyncTasks,
iterable,
Math.min(iterable.length, this.asyncTasks),
async (item) => {
const body = await this.buildRequest(this.bulk ? item : this.searchPage);
if (this.scrapeType === 'asin') {
Expand Down Expand Up @@ -555,7 +557,8 @@ class AmazonScraper {
try {
const featureList = $('#feature-bullets > ul .a-list-item');
for (let item in featureList) {
if (featureList[item].children[0].data) featureBullets.push(featureList[item].children[0].data);
const feature = featureList[item].children[0].data && featureList[item].children[0].data.trim();
if (feature) featureBullets.push(feature.trim());
}
} catch {
// continue regardless of error
Expand Down Expand Up @@ -1073,6 +1076,7 @@ class AmazonScraper {
sponsored_products: [],
also_bought: [],
other_sellers: [],
rawBody: body,
};

const book_in_series = this.extractBookInSeries($);
Expand Down Expand Up @@ -1154,13 +1158,15 @@ class AmazonScraper {
? $($(`span.reviewCountTextLinkedHistogram.noUnderline`)[0]).text().split(/\s/g)[0]
: 0;

output.price.current_price = $(`span.a-price.priceToPay`)[0] ? this.geo.price_format($($(`span.a-price.priceToPay`)[0].children[0]).text()) : 0;
output.price.current_price = $(`span.a-price.priceToPay`)[0]
? this.geo.price_format($($(`span.a-price.priceToPay`)[0].children[0]).text())
: 0;
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

auto formatting, sorry for that

if (!output.current_price) {
try {
output.price.current_price = this.geo.price_format($($(`span.a-price.apexPriceToPay`)[0].children[0]).text());
} catch {
try {
output.price.current_price = this.geo.price_format($($(".a-price")[0].children[0]).text());
output.price.current_price = this.geo.price_format($($('.a-price')[0].children[0]).text());
} catch {
// continue regardless of error
}
Expand Down
16 changes: 7 additions & 9 deletions lib/constant.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const moment = require('moment');
const { DateTime } = require('luxon');

module.exports = {
limit: {
Expand Down Expand Up @@ -30,6 +30,7 @@ module.exports = {
country: 'United States of America',
currency: 'USD',
symbol: '$',
acceptLang: 'en-US,en;q=0.9',
host: 'www.amazon.com',
variants: {
split_text: 'Click to select ',
Expand All @@ -56,8 +57,7 @@ module.exports = {
return '';
},
price_format: (price) => {
const formatedPrice = price.replace(/[^\d+\.]/g, '');
return parseFloat(formatedPrice);
return price.replace(/[$€+\.,]/g, '') / 100;
},
product_information: {
id: [
Expand Down Expand Up @@ -376,6 +376,7 @@ module.exports = {
country: 'Germany',
currency: 'EUR',
host: 'www.amazon.de',
acceptLang: 'de-DE,de;q=0.9',
symbol: '€',
variants: {
split_text: 'Click to select ',
Expand All @@ -390,20 +391,17 @@ module.exports = {
return '';
},
review_date: (date) => {
const dateRegex = /on (.+)$/.exec(date);
const dateRegex = /vom (.+)$/.exec(date);
if (dateRegex) {
return {
date: dateRegex[1],
unix: moment(new Date(`${dateRegex[1]} 02:00:00`))
.utc()
.unix(),
unix: DateTime.fromFormat(dateRegex[1], 'dd. MMMM yyyy', { locale: 'de-DE' }).toUnixInteger(),
};
}
return '';
},
price_format: (price) => {
const formatedPrice = price.replace(/[^\d+\.]/g, '');
return parseFloat(formatedPrice);
return price.replace(/[$€+\.,]/g, '') / 100;
},
product_information: {
// <<------ NOT CORRECT! Requires translation of the {fields} key values. I don't have much time to do it
Expand Down
Loading