Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions example-github-username/README.md

This file was deleted.

4 changes: 4 additions & 0 deletions misha-z1nchuk/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
AWS_BUCKET_NAME=bucket-name
AWS_ACCESS_KEY=AFDFDFDDFfYUDFFDSSD
AWS_ACCESS_SECRET_KEY=2S23fdfdtfdfdgMfdfiDxWX/fEFa+Q9/o9Buqab9s
AWS_BUCKET_REGION=us-east-1
16 changes: 16 additions & 0 deletions misha-z1nchuk/.eslintrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"env": {
"browser": true,
"commonjs": true,
"es2021": true
},
"extends": [
"airbnb-base"
],
"parserOptions": {
"ecmaVersion": "latest"
},
"rules": {
"indent": ["error", 4]
}
}
4 changes: 4 additions & 0 deletions misha-z1nchuk/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
folder_to_upload
.env
.idea
node_modules
74 changes: 74 additions & 0 deletions misha-z1nchuk/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Multi-threaded file uploader (Backend)

> Ideal candidate: skilled python developer with solid knowledge of cloud and distributed systems.

# Overview

Create a python application that uploads a set of given files to a cloud object storage in parallel through the cloud provider's or third party API.

# Requirements

1. Support up to 100,000nds of files, all inside one directory with arbitrary sizes. The root directory may contain subdirectories.
1. The object storage container which holds the objects is private and only credential-based access is allowed.
1. Each object inside object storage should have an associated metadata which contains file size, last modification time and file permissions.

# Expectations

- Fast (utilize full network bandwidth), low CPU (do not block all other processes) and low Memory (<25% tentatively) file uploader
- Support for AWS S3
- Modular and Object oriented implementation (to add other cloud providers)
- Clean and documented code
- Tests

# Timeline

We leave exact timing to the candidate. Must fit Within 5 days total.

# Notes

- we can provide temporary credentials to access AWS/Azure.



# Multithreaded File Uploader (AWS S3)




## Environment Variables

To run this project, you will need to add the following environment variables to your .env file

`AWS_BUCKET_NAME` `AWS_ACCESS_KEY` `AWS_ACCESS_SECRET_KEY` `AWS_BUCKET_REGION`

See .env.example file.



## Run Locally

Clone the project

```bash
git clone https://github.com/misha-z1nchuk/multithread-s3-file-uploader
```

Go to the project directory

```bash
cd multithread-s3-file-uploader
```

Install dependencies

```bash
npm install
```

Add folder that you want to upload to root folder of project

And run

```bash
node lib/main.js -f ../<folder name>
```
31 changes: 31 additions & 0 deletions misha-z1nchuk/lib/CloudStorages/AWS_S3.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/* eslint-disable no-throw-literal */
const S3 = require('aws-sdk/clients/s3');
const stream = require('stream');
const fs = require('fs');


if (!process.env.AWS_BUCKET_REGION ||
!process.env.AWS_ACCESS_KEY ||
!process.env.AWS_ACCESS_SECRET_KEY) throw "All credentials for S3 is required"


const s3 = new S3({
region: process.env.AWS_BUCKET_REGION,
accessKeyId: process.env.AWS_ACCESS_KEY,
secretAccessKey: process.env.AWS_ACCESS_SECRET_KEY,
});

async function AwsUploadFile(Key, file, Bucket) {
if (!fs.existsSync(file)) {
throw 'File not exists';
}
const pass = new stream.PassThrough();
fs.createReadStream(file).pipe(pass);

return s3.upload({
Key,
Bucket,
Body: pass,
}).promise();
}
module.exports = AwsUploadFile;
54 changes: 54 additions & 0 deletions misha-z1nchuk/lib/FileUploader.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/* eslint-disable no-console */
/* eslint-disable no-throw-literal */
const path = require('path');
const fs = require('fs');
const readdir = require('recursive-readdir');
const Workers = require('piscina');
const os = require('os');

const rootFolder = path.resolve(__dirname, '../');

// get array of path to files, that we need to upload
function getFiles(dirPath) {
return fs.existsSync(dirPath) ? readdir(dirPath) : [];
}

class FileUploader {
workerPool; // worker threads pool, with queue

constructor({ threads = undefined }) {
this.workerPool = new Workers({
filename: path.resolve(__dirname, 'thread.js'), // path to worker instructions
maxThreads: threads || os.cpus().length, // config max amount of threads
});
}

async uploadContent(uploadFolder) {
if (fs.existsSync(path.resolve(__dirname, uploadFolder))) {
const filesToUpload = await getFiles(path.resolve(__dirname, uploadFolder));

await Promise.all(filesToUpload.map(async (file) => {
try {
const Key = file.replace(`${rootFolder}/`, '');

console.log(`uploading: [${Key}]`);
await this.workerPool.run({
file,
Key,
});

console.log(`remaining files amount :${this.workerPool.queueSize}`);
} catch (e) {
console.log(e);
throw e;
}
}));
} else {
throw 'Folder does not exist';
}
}
}
module.exports = {
FileUploader,
getFiles,
};
22 changes: 22 additions & 0 deletions misha-z1nchuk/lib/commander.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
const { program } = require('commander');

program
.name('Multi-threaded-s3-file-uploader')
.description('CLI')
.version('0.0.1');

program
.option('--threads <int>', 'max amount of threads to use')
.option('-f, --folder <string>', "path to folder for uploading, from lib folder");

program.parse();

const options = program.opts();
const threads = options.threads || undefined;
const pathFolder = options.folder || undefined;

module.exports = {
program,
threads: parseInt(threads, 10),
pathFolder,
};
16 changes: 16 additions & 0 deletions misha-z1nchuk/lib/main.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/* eslint-disable no-console */
require('dotenv').config();
const { threads, pathFolder } = require('./commander');
const { FileUploader } = require('./FileUploader');

const uploadFolder = pathFolder || '../folder_to_upload';

const fileUploader = new FileUploader({ threads });

fileUploader.uploadContent(uploadFolder).then(() => {
console.log('task complete');
process.exit(0);
}).catch((err) => {
console.error(err);
process.exit(1);
});
14 changes: 14 additions & 0 deletions misha-z1nchuk/lib/thread.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/* eslint-disable no-console */
/* eslint-disable consistent-return */
require('dotenv').config();
const AwsUploadFile = require('./CloudStorages/AWS_S3');

module.exports = async ({ Key, file }) => {
try {
await AwsUploadFile(Key, file, process.env.AWS_BUCKET_NAME);

return 'done';
} catch (e) {
console.log(e);
}
};
Loading