From a88cd4ac393d88c0897e302ae7c08cebbbf01597 Mon Sep 17 00:00:00 2001 From: dsiguero <26867647+dsiguero@users.noreply.github.com> Date: Wed, 5 Feb 2025 00:23:42 +0100 Subject: [PATCH 1/2] chore: generate fake users with faker and add script to do a batch insert in dynamodb --- package-lock.json | 18 +++++++ package.json | 1 + utils/generate-random-users.js | 87 ++++++++++++++++++++++++++++++++ utils/generate-random-users.sh | 33 ------------ utils/insert-batch-into-table.sh | 62 +++++++++++++++++++++++ 5 files changed, 168 insertions(+), 33 deletions(-) create mode 100644 utils/generate-random-users.js delete mode 100755 utils/generate-random-users.sh create mode 100755 utils/insert-batch-into-table.sh diff --git a/package-lock.json b/package-lock.json index d53550529..fa2e30e02 100644 --- a/package-lock.json +++ b/package-lock.json @@ -32,6 +32,7 @@ "@aws-sdk/types": "^3.723.0", "@eslint/compat": "^1.2.5", "@eslint/js": "^9.18.0", + "@faker-js/faker": "^9.4.0", "@stoplight/spectral-cli": "^6.14.2", "@types/aws-lambda": "^8.10.147", "@types/http-errors": "^2.0.4", @@ -1665,6 +1666,23 @@ "node": "^18.18.0 || ^20.9.0 || >=21.1.0" } }, + "node_modules/@faker-js/faker": { + "version": "9.4.0", + "resolved": "https://registry.npmjs.org/@faker-js/faker/-/faker-9.4.0.tgz", + "integrity": "sha512-85+k0AxaZSTowL0gXp8zYWDIrWclTbRPg/pm/V0dSFZ6W6D4lhcG3uuZl4zLsEKfEvs69xDbLN2cHQudwp95JA==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/fakerjs" + } + ], + "license": "MIT", + "engines": { + "node": ">=18.0.0", + "npm": ">=9.0.0" + } + }, "node_modules/@humanfs/core": { "version": "0.19.1", "resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.1.tgz", diff --git a/package.json b/package.json index e107b723b..d5240ac53 100644 --- a/package.json +++ b/package.json @@ -47,6 +47,7 @@ "@aws-sdk/types": "^3.723.0", "@eslint/compat": "^1.2.5", "@eslint/js": "^9.18.0", + "@faker-js/faker": "^9.4.0", "@stoplight/spectral-cli": "^6.14.2", "@types/aws-lambda": "^8.10.147", "@types/http-errors": "^2.0.4", diff --git a/utils/generate-random-users.js b/utils/generate-random-users.js new file mode 100644 index 000000000..625980e67 --- /dev/null +++ b/utils/generate-random-users.js @@ -0,0 +1,87 @@ +import { faker } from '@faker-js/faker'; + +const MAX_NUMBER_OF_CALENDARS = 10; + +function generateTimestamps() { + /** Generate a random timestamp within the last year */ + const now = Date.now(); + const past = now - Math.floor(Math.random() * 365 * 24 * 60 * 60 * 1000); + return { signedUpAt: past, lastSignInAt: now }; +} + +function generateRandomUserCalendars() { + const numberOfCalendars = Math.floor(Math.random() * MAX_NUMBER_OF_CALENDARS) + 1; + return Array.from({ length: numberOfCalendars }).map((_) => ({ + name: `${faker.person.fullName()} Calendar`, + id: `${faker.string.alphanumeric(20)}@google.com` + })); +} + +function generateUser() { + /** Generate a fake user */ + const timestamps = generateTimestamps(); + + return { + userId: faker.string.uuid(), + signedUpAt: timestamps.signedUpAt.toString(), + lastSignInAt: timestamps.lastSignInAt.toString(), + idp: faker.helpers.arrayElement(['google.com']), + idpId: faker.string.ulid(), + config: { + businessName: faker.company.name(), + businessAddress: faker.location.streetAddress(), + calendars: generateRandomUserCalendars() + }, + userStatus: faker.helpers.arrayElement(['live', 'onboarding', 'banned']), + idpAuthorization: { + refreshToken: faker.internet.jwt() + }, + email: faker.internet.email() + }; +} + +function generateItem(user) { + /** Generate an item based on the fake user data */ + return { + Item: { + UserId: { S: user.userId }, + Idp: { S: user.idp }, + IdpId: { S: user.idpId }, + SignedUpAt: { N: user.signedUpAt }, + LastSignInAt: { N: user.lastSignInAt }, + Config: { + M: { + businessName: { S: user.config.businessName }, + businessAddress: { S: user.config.businessAddress }, + calendars: { + L: user.config.calendars.map((c) => ({ + M: { + name: { S: c.name }, + id: { S: c.id } + } + })) + } + } + }, + UserStatus: { S: faker.helpers.arrayElement(['live', 'onboarding', 'banned']) }, + IdpAuthorization: { + M: { + refreshToken: { S: user.idpAuthorization.refreshToken } + } + }, + Email: { S: user.email } + } + }; +} + +function generateBatch(batchSize) { + /** Generate a batch of items */ + const users = Array.from({ length: batchSize }, generateUser); + return users.map(generateItem); +} + +// Run and save the batch +const batchSize = process.argv[2] ? parseInt(process.argv[2], 10) : 10; +const batchData = generateBatch(batchSize); + +console.log(JSON.stringify(batchData, null, 4)); diff --git a/utils/generate-random-users.sh b/utils/generate-random-users.sh deleted file mode 100755 index 7fa41cd34..000000000 --- a/utils/generate-random-users.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -if [ -z "$USERS_TABLE_NAME" ]; then - echo "Error: USERS_TABLE_NAME environment variable is not set." - exit 1 -fi - -# Number of random emails to generate (10 by default) -count="${1:-10}" -max_batch_size=10 - -# Use curl to fetch data from randomuser.me API -response=$(curl -s "https://randomuser.me/api/?results=${count}&inc=email") -batch_items=$(echo "${response}" | jq -c '[.results[].email | {PutRequest: {Item: {"UserId": {"S": .}}}}]') -total_emails=$(echo "${batch_items}" | jq '. | length') - -# Split the items into chunks and write to DynamoDB -for ((i=0; i " + exit 1 +fi + +TABLE_NAME="$1" +JSON_FILE="$2" + +# Check if the file exists +if [ ! -f "$JSON_FILE" ]; then + echo "Error: File '$JSON_FILE' not found!" + exit 1 +fi + +# Check the table exists + +# Read the entire JSON array into a variable +ITEMS=$(jq -c '.[]' "$JSON_FILE") + +# Convert the multi-line string into an array +mapfile -t ITEM_ARRAY <<< "$ITEMS" + +# Initialize batch array +BATCH=() +COUNTER=0 + + +# Process items in batches of 10 +for ITEM in "${ITEM_ARRAY[@]}"; do + BATCH+=("$ITEM") + COUNTER=$((COUNTER + 1)) + + if [ "$COUNTER" -eq 10 ]; then + echo "Processing batch of 10 items..." + + # Format JSON correctly for DynamoDB + BATCH_JSON=$(printf '%s\n' "${BATCH[@]}" | jq -s --arg TABLE_NAME "$TABLE_NAME" '{ ($TABLE_NAME): [ .[] | { PutRequest: { Item: .Item } } ] }') + + # Send batch to DynamoDB + aws dynamodb batch-write-item --request-items "$BATCH_JSON" + + # Reset batch + BATCH=() + COUNTER=0 + fi +done + +# Process remaining items (if batch has fewer than 10 items) +if [ "$COUNTER" -gt 0 ]; then + echo "Processing final batch of $COUNTER items..." + + BATCH_JSON=$(printf '%s\n' "${BATCH[@]}" | jq -s --arg TABLE_NAME "$TABLE_NAME" '{ ($TABLE_NAME): [ .[] | { PutRequest: { Item: .Item } } ] }') + + echo "${BATCH_JSON}" > foo.json + + aws dynamodb batch-write-item --request-items "$BATCH_JSON" +fi + +echo "Upload complete!" From 3399e5edff3c1de270133f8d17bb3b4c81dfaba6 Mon Sep 17 00:00:00 2001 From: dsiguero <26867647+dsiguero@users.noreply.github.com> Date: Wed, 5 Feb 2025 00:33:53 +0100 Subject: [PATCH 2/2] improve script --- utils/insert-batch-into-table.sh | 65 ++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/utils/insert-batch-into-table.sh b/utils/insert-batch-into-table.sh index dacecbb2b..a6d2f96e9 100755 --- a/utils/insert-batch-into-table.sh +++ b/utils/insert-batch-into-table.sh @@ -2,61 +2,70 @@ # Ensure correct usage if [ "$#" -ne 2 ]; then - echo "Usage: $0 " - exit 1 + echo "Usage: $0 " + exit 1 fi TABLE_NAME="$1" JSON_FILE="$2" # Check if the file exists -if [ ! -f "$JSON_FILE" ]; then - echo "Error: File '$JSON_FILE' not found!" - exit 1 +if [ ! -f "${JSON_FILE}" ]; then + echo "Error: File '${JSON_FILE}' not found!" + exit 1 +fi + +if ! aws dynamodb describe-table --table-name "${TABLE_NAME}" >/dev/null 2>&1; then + echo "Error: Table '${TABLE_NAME}' not found!" + exit 1 fi # Check the table exists # Read the entire JSON array into a variable -ITEMS=$(jq -c '.[]' "$JSON_FILE") +ITEMS=$(jq -c '.[]' "${JSON_FILE}") # Convert the multi-line string into an array -mapfile -t ITEM_ARRAY <<< "$ITEMS" +mapfile -t ITEM_ARRAY <<< "${ITEMS}" # Initialize batch array BATCH=() COUNTER=0 +process_batch() { + local -n batch_ref=$1 # Reference to the batch array + local batch_size=${#batch_ref[@]} -# Process items in batches of 10 -for ITEM in "${ITEM_ARRAY[@]}"; do - BATCH+=("$ITEM") - COUNTER=$((COUNTER + 1)) + if [ "$batch_size" -eq 0 ]; then + return # Skip if the batch is empty + fi + + echo "Processing batch of $batch_size items..." - if [ "$COUNTER" -eq 10 ]; then - echo "Processing batch of 10 items..." + # Format JSON correctly for DynamoDB + BATCH_JSON=$(printf '%s\n' "${BATCH[@]}" | jq -s --arg TABLE_NAME "$TABLE_NAME" '{ ($TABLE_NAME): [ .[] | { PutRequest: { Item: .Item } } ] }') - # Format JSON correctly for DynamoDB - BATCH_JSON=$(printf '%s\n' "${BATCH[@]}" | jq -s --arg TABLE_NAME "$TABLE_NAME" '{ ($TABLE_NAME): [ .[] | { PutRequest: { Item: .Item } } ] }') + # Send batch to DynamoDB + aws dynamodb batch-write-item --request-items "$BATCH_JSON" - # Send batch to DynamoDB - aws dynamodb batch-write-item --request-items "$BATCH_JSON" + # Clear batch + batch_ref=() +} - # Reset batch - BATCH=() - COUNTER=0 - fi +# Process items in batches of 10 +for ITEM in "${ITEM_ARRAY[@]}"; do + BATCH+=("$ITEM") + COUNTER=$((COUNTER + 1)) + + if [ "${COUNTER}" -eq 10 ]; then + process_batch BATCH + COUNTER=0 + fi done # Process remaining items (if batch has fewer than 10 items) if [ "$COUNTER" -gt 0 ]; then - echo "Processing final batch of $COUNTER items..." - - BATCH_JSON=$(printf '%s\n' "${BATCH[@]}" | jq -s --arg TABLE_NAME "$TABLE_NAME" '{ ($TABLE_NAME): [ .[] | { PutRequest: { Item: .Item } } ] }') - - echo "${BATCH_JSON}" > foo.json - - aws dynamodb batch-write-item --request-items "$BATCH_JSON" + process_batch BATCH fi echo "Upload complete!"