diff --git a/README.md b/README.md index a47f675a..596d96fb 100644 --- a/README.md +++ b/README.md @@ -25,11 +25,10 @@ See [dev branch](https://github.com/Exabyte-io/rewotes/tree/dev) also. ## Notes -Examples listed here are only meant as guidelines and do not necessarily reflect on the type of work to be performed at the company. +Examples listed here are only meant as guidelines and do not necessarily reflect on the type of work to be performed at the company. Modifications to the individual assignments with an advance notice are encouraged. -Modifications to the individual assignments with an advance notice are encouraged. Candidates are free to share the results. +We will screen for the ability to (1) pick up new concepts quickly, (2) implement a working proof-of-concept solution, and (3) outline how the PoC can become more mature. We value attention to details and modularity. -We will screen for the ability to pick up new concepts quickly and implement a working solution. We value attention to details and modularity. ## Hiring process diff --git a/jkolyer/.gitignore b/jkolyer/.gitignore new file mode 100644 index 00000000..44051d1a --- /dev/null +++ b/jkolyer/.gitignore @@ -0,0 +1,115 @@ +*.py[cod] + +# C extensions +*.so + +# Packages +*.egg +*.egg-info +dist +build +eggs +parts +bin +var +sdist +develop-eggs +.installed.cfg +lib +lib64 + +# Installer logs +pip-log.txt + +# Unit test / coverage reports +.coverage +.tox +nosetests.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# Complexity +output/*.html +output/*/index.html + +# Sphinx +docs/_build + + +# Created by https://www.toptal.com/developers/gitignore/api/osx,git,node,react,macos,elisp,emacs,dotenv,typings,librarian-chef +### Elisp ### +# Compiled +*.elc + +# Packaging +.cask + +# Backup files +*~ + +# Undo-tree save-files +*.~undo-tree + +### Emacs ### +# -*- mode: gitignore; -*- +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +auto-save-list +tramp +.\#* + +# Org-mode +.org-id-locations +*_archive + +# flymake-mode +*_flymake.* + +# eshell files +/eshell/history +/eshell/lastdir + +# elpa packages +/elpa/ + +# reftex files +*.rel + +# AUCTeX auto folder +/auto/ + +# cask packages +.cask/ +dist/ + +# Flycheck +flycheck_*.el + +# server auth directory +/server/ + +# projectiles files +.projectile + +# directory configuration +.dir-locals.el + +# network security +/network-security.data + +# sqlite +*.db + +# sample files +samples/ + +# virtualenv +env/ + diff --git a/jkolyer/.travis.yml b/jkolyer/.travis.yml new file mode 100644 index 00000000..4f685745 --- /dev/null +++ b/jkolyer/.travis.yml @@ -0,0 +1,17 @@ +# Config file for automatic testing at travis-ci.org + +language: python + +python: + - "3.3" + - "2.7" + - "2.6" + - "pypy" + +# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors +install: + - pip install -r requirements.txt + +# command to run tests, e.g. python setup.py test +script: + - py.test diff --git a/jkolyer/AUTHORS.rst b/jkolyer/AUTHORS.rst new file mode 100644 index 00000000..72f220fa --- /dev/null +++ b/jkolyer/AUTHORS.rst @@ -0,0 +1,13 @@ +======= +Credits +======= + +Development Lead +---------------- + +* Jonathan Kolyer + +Contributors +------------ + +None yet. Why not be the first? diff --git a/jkolyer/CONTRIBUTING.rst b/jkolyer/CONTRIBUTING.rst new file mode 100644 index 00000000..4f203a99 --- /dev/null +++ b/jkolyer/CONTRIBUTING.rst @@ -0,0 +1,109 @@ +============ +Contributing +============ + +Contributions are welcome, and they are greatly appreciated! Every +little bit helps, and credit will always be given. + +You can contribute in many ways: + +Types of Contributions +---------------------- + +Report Bugs +~~~~~~~~~~~ + +Report bugs at https://github.com/jkolyer/jkolyer/issues. + +If you are reporting a bug, please include: + +* Your operating system name and version. +* Any details about your local setup that might be helpful in troubleshooting. +* Detailed steps to reproduce the bug. + +Fix Bugs +~~~~~~~~ + +Look through the GitHub issues for bugs. Anything tagged with "bug" +is open to whoever wants to implement it. + +Implement Features +~~~~~~~~~~~~~~~~~~ + +Look through the GitHub issues for features. Anything tagged with "feature" +is open to whoever wants to implement it. + +Write Documentation +~~~~~~~~~~~~~~~~~~~ + +FileUploader could always use more documentation, whether as part of the +official FileUploader docs, in docstrings, or even on the web in blog posts, +articles, and such. + +Submit Feedback +~~~~~~~~~~~~~~~ + +The best way to send feedback is to file an issue at https://github.com/jkolyer/jkolyer/issues. + +If you are proposing a feature: + +* Explain in detail how it would work. +* Keep the scope as narrow as possible, to make it easier to implement. +* Remember that this is a volunteer-driven project, and that contributions + are welcome :) + +Get Started! +------------ + +Ready to contribute? Here's how to set up `jkolyer` for +local development. + +1. Fork_ the `jkolyer` repo on GitHub. +2. Clone your fork locally:: + + $ git clone git@github.com:your_name_here/jkolyer.git + +3. Create a branch for local development:: + + $ git checkout -b name-of-your-bugfix-or-feature + +Now you can make your changes locally. + +4. When you're done making changes, check that your changes pass style and unit + tests, including testing other Python versions with tox:: + + $ tox + +To get tox, just pip install it. + +5. Commit your changes and push your branch to GitHub:: + + $ git add . + $ git commit -m "Your detailed description of your changes." + $ git push origin name-of-your-bugfix-or-feature + +6. Submit a pull request through the GitHub website. + +.. _Fork: https://github.com/jkolyer/jkolyer/fork + +Pull Request Guidelines +----------------------- + +Before you submit a pull request, check that it meets these guidelines: + +1. The pull request should include tests. +2. If the pull request adds functionality, the docs should be updated. Put + your new functionality into a function with a docstring, and add the + feature to the list in README.rst. +3. The pull request should work for Python 2.6, 2.7, and 3.3, and for PyPy. + Check https://travis-ci.org/jkolyer/jkolyer + under pull requests for active pull requests or run the ``tox`` command and + make sure that the tests pass for all supported Python versions. + + +Tips +---- + +To run a subset of tests:: + + $ py.test test/test_jkolyer.py diff --git a/jkolyer/HISTORY.rst b/jkolyer/HISTORY.rst new file mode 100644 index 00000000..c7dc9409 --- /dev/null +++ b/jkolyer/HISTORY.rst @@ -0,0 +1,9 @@ +.. :changelog: + +History +------- + +0.1.0 (2013-07-10) +++++++++++++++++++ + +* First release on PyPI. diff --git a/jkolyer/LICENSE b/jkolyer/LICENSE new file mode 100644 index 00000000..ceeccad6 --- /dev/null +++ b/jkolyer/LICENSE @@ -0,0 +1,20 @@ +The MIT License (MIT) + +Copyright (c) 2013 Jonathan Kolyer + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/jkolyer/MANIFEST.in b/jkolyer/MANIFEST.in new file mode 100644 index 00000000..f7d4f773 --- /dev/null +++ b/jkolyer/MANIFEST.in @@ -0,0 +1,5 @@ +include AUTHORS.rst +include CONTRIBUTING.rst +include HISTORY.rst +include LICENSE +include README.rst diff --git a/jkolyer/Makefile b/jkolyer/Makefile new file mode 100644 index 00000000..6be660f0 --- /dev/null +++ b/jkolyer/Makefile @@ -0,0 +1,56 @@ +.PHONY: help clean clean-pyc clean-build list test test-all coverage docs release sdist + +help: + @echo "clean-build - remove build artifacts" + @echo "clean-pyc - remove Python file artifacts" + @echo "lint - check style with flake8" + @echo "test - run tests quickly with the default Python" + @echo "test-all - run tests on every Python version with tox" + @echo "coverage - check code coverage quickly with the default Python" + @echo "docs - generate Sphinx HTML documentation, including API docs" + @echo "release - package and upload a release" + @echo "sdist - package" + +clean: clean-build clean-pyc + +clean-build: + rm -fr build/ + rm -fr dist/ + rm -fr *.egg-info + +clean-pyc: + find . -name '*.pyc' -exec rm -f {} + + find . -name '*.pyo' -exec rm -f {} + + find . -name '*~' -exec rm -f {} + + +lint: + flake8 jkolyer test + +test: + py.test + +test-all: + tox + +coverage: + coverage run --source jkolyer setup.py test + coverage report -m + coverage html + open htmlcov/index.html + +docs: + rm -f docs/jkolyer.rst + rm -f docs/modules.rst + sphinx-apidoc -o docs/ jkolyer + $(MAKE) -C docs clean + $(MAKE) -C docs html + open docs/_build/html/index.html + +release: clean + python setup.py sdist upload + python setup.py bdist_wheel upload + +sdist: clean + python setup.py sdist + python setup.py bdist_wheel upload + ls -l dist diff --git a/jkolyer/README.md b/jkolyer/README.md new file mode 100644 index 00000000..2c79a50e --- /dev/null +++ b/jkolyer/README.md @@ -0,0 +1,137 @@ +# Parallel uploading service in python + + +## Objective + +Create a python application that uploads a set of given files to a cloud object storage in parallel through the cloud provider's or third party API. + +### Requirements + +1. Support up to 100,000nds of files, all inside one directory with arbitrary sizes. The root directory may contain subdirectories. +1. The object storage container which holds the objects is private and only credential-based access is allowed. +1. Each object inside object storage should have an associated metadata which contains file size, last modification time and file permissions. + +### Expectations + +- Fast (utilize full network bandwidth), low CPU (do not block all other processes) and low Memory (<25% tentatively) file uploader +- Support for AWS S3 +- Modular and Object oriented implementation (to add other cloud providers) +- Clean and documented code +- Tests + +## PFU Implementation + +### Overview +This service ***pfu*** (parallel file upload) reads files from a root directory and loads a SQLite database with file metadata records. Then a batch job initiates uploading files and metadata to a given object storage provider iteratively. + +Parallism is achieved through [concurrency](https://docs.python.org/3/library/asyncio.html) or through [multiprocessing](https://docs.python.org/3/library/multiprocessing.html) – these are the two modes which the service runs. + +The service has a test suite which mocks object storage. In regular usage it leverages [localstack](https://localstack.cloud/) as storage provider. + +Speed is achieved through parallelism, low CPU through using maximum `nice` setting, and low memory through paginating database result sets. + +A companion file generator utility creates a tree of sample files, with user-specified depth. This is for development and testing purposes only. + +### Environment +* Python 3.9 +* SQLite 3 +* [mkfile](https://ss64.com/bash/mkfile.html) (sample tests) +* [localstack](https://localstack.cloud) +* Virtualenv +* Shell scripting + +#### Inputs +* File root directory +* Localstack docker instance +* Object storage provider endpoint + +#### Outputs +* Files and metadata uploaded into object storage +* SQLite database file + * file metadata + * upload result flag (success/fail) + * batch upload metadata + + +## Install & Run + +### Installation +Using shell with python 3.9 + +* `git clone git@github.com:jkolyer/rewotes.git` +* `cd rewotes` +* `git switch jkolyer` +* `cd jkolyer` +* `python3 -m venv env` +* `source env/bin/activate` +* `pip install -r frozen-requirements.txt` + * if this fails re-run after this `pip uninstall plux` +* `source scripts.sh`# utility shell scripts +* `samplegen 2` # generates 2-level directory tree of sample files +* `runtest` # runs the test suite + +The shell environment should be read to run the ***pfu*** service. See `localstack_p` and `localstack_c` below for the two upload modes. + + +### Run the service +Shell scripts are provided as a convenience. The *file sample directory* is hardcoded as "samples". + +* `cd rewotes/jkolyer` +* `source scripts.sh` +* `activate` – for the virtualenv +* Create samples directory tree + * `samplegen` script, providing tree depth argument; e.g., `samplegen 3` will create 3-level directory structure, each level containing 3 files (using `mkfile` utility). +* Run localstack docker instance + * follow localstack instructions + * note which port it's running +* Parallel mode + * `localstack_p` with port number argument; e.g., `localstack_p 4566` +* Concurrent mode + * `localstack_c` with port number argument; e.g., `localstack_p 4566` +* Tests use the `runtest` script. + +## Design Components + +### Data model +#### Database Tables +##### FileStat +Tracks file metadata with upload status. +> `CREATE TABLE FileStat + ( id TEXT PRIMARY KEY, + created_at INTEGER, + file_size INTEGER, + last_modified INTEGER, + permissions TEXT, + file_path TEXT, + status INTEGER + )` + +##### BatchJob +Tracks upload sessions and root directory. +> `CREATE TABLE BatchJob + ( id TEXT PRIMARY KEY, + status INTEGER, + created_at INTEGER, + root_dir TEXT + )` + +#### Data Objects +##### BaseModel +Abstract base class for other models. Provides support for SQL generation. + +##### FileModel +Represents file for upload, and it's upload status. Provides SQL generation, and upload state machine. + +##### BatchJobModel +Tracks and performs upload sessions. Creates `FileModel` instances for a given directory and provides iteration across data. Handles parallel and concurrent upload process. SQL generation. + + +### Uploader + +The abstraction class `Uploader` serves as an interface for the `S3Uploader`. It performs the basic unit of work for the service: uploading a file and metadata to 3rd party provider. Uploads are performed by order of ascending file size. + +Upload jobs can be run in concurrent mode (using [asyncio](https://docs.python.org/3/library/asyncio.html)) or parallel mode (using [multiprocessing](https://docs.python.org/3/library/multiprocessing.html)). + +### File generator + +Bootstrapping a test environment. Creates a small directory tree with files of varying sizes (under 1G). \ No newline at end of file diff --git a/jkolyer/README.rst b/jkolyer/README.rst new file mode 100644 index 00000000..e90c7ca3 --- /dev/null +++ b/jkolyer/README.rst @@ -0,0 +1,18 @@ +============================= +FileUploader +============================= + +.. image:: https://badge.fury.io/py/jkolyer.png + :target: http://badge.fury.io/py/jkolyer + +.. image:: https://travis-ci.org/jkolyer/jkolyer.png?branch=master + :target: https://travis-ci.org/jkolyer/jkolyer + +Deploy static HTML sites to S3 with the simple 'alotofeffort' command. + + +Features +-------- + +* TODO + diff --git a/jkolyer/docs/Makefile b/jkolyer/docs/Makefile new file mode 100644 index 00000000..0e35bee9 --- /dev/null +++ b/jkolyer/docs/Makefile @@ -0,0 +1,177 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build + +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/complexity.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/complexity.qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/complexity" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/complexity" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/jkolyer/docs/authors.rst b/jkolyer/docs/authors.rst new file mode 100644 index 00000000..e122f914 --- /dev/null +++ b/jkolyer/docs/authors.rst @@ -0,0 +1 @@ +.. include:: ../AUTHORS.rst diff --git a/jkolyer/docs/conf.py b/jkolyer/docs/conf.py new file mode 100644 index 00000000..86e05990 --- /dev/null +++ b/jkolyer/docs/conf.py @@ -0,0 +1,254 @@ +# -*- coding: utf-8 -*- +# +# complexity documentation build configuration file, created by +# sphinx-quickstart on Tue Jul 9 22:26:36 2013. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys, os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +cwd = os.getcwd() +parent = os.path.dirname(cwd) +sys.path.insert(0, parent) + +import jkolyer + +# -- General configuration ----------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'FileUploader' +copyright = u'2013, Jonathan Kolyer' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = jkolyer.__version__ +# The full version, including alpha/beta/rc tags. +release = jkolyer.__version__ + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +#keep_warnings = False + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'default' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'jkolyerdoc' + + +# -- Options for LaTeX output -------------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('index', 'jkolyer.tex', u'FileUploader Documentation', + u'Jonathan Kolyer', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output -------------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'jkolyer', u'FileUploader Documentation', + [u'Jonathan Kolyer'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------------ + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('index', 'jkolyer', u'FileUploader Documentation', + u'Jonathan Kolyer', 'jkolyer', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +#texinfo_no_detailmenu = False diff --git a/jkolyer/docs/contributing.rst b/jkolyer/docs/contributing.rst new file mode 100644 index 00000000..e582053e --- /dev/null +++ b/jkolyer/docs/contributing.rst @@ -0,0 +1 @@ +.. include:: ../CONTRIBUTING.rst diff --git a/jkolyer/docs/history.rst b/jkolyer/docs/history.rst new file mode 100644 index 00000000..25064996 --- /dev/null +++ b/jkolyer/docs/history.rst @@ -0,0 +1 @@ +.. include:: ../HISTORY.rst diff --git a/jkolyer/docs/index.rst b/jkolyer/docs/index.rst new file mode 100644 index 00000000..1918a5ff --- /dev/null +++ b/jkolyer/docs/index.rst @@ -0,0 +1,27 @@ +.. complexity documentation master file, created by + sphinx-quickstart on Tue Jul 9 22:26:36 2013. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +.. include:: ../README.rst + +Contents: +========= + +.. toctree:: + :maxdepth: 2 + + installation + usage + contributing + authors + history + +Feedback +======== + +If you have any suggestions or questions about **FileUploader** feel free to email me +at jonathankolyer@gmail.com. + +If you encounter any errors or problems with **FileUploader**, please let me know! +Open an Issue at the GitHub http://github.com/jkolyer/jkolyer main repository. diff --git a/jkolyer/docs/installation.rst b/jkolyer/docs/installation.rst new file mode 100644 index 00000000..ccf7739a --- /dev/null +++ b/jkolyer/docs/installation.rst @@ -0,0 +1,13 @@ +============ +Installation +============ + +At the command line either via easy_install or pip:: + + $ easy_install jkolyer + $ pip install jkolyer + +Or, if you have virtualenvwrapper installed:: + + $ mkvirtualenv jkolyer + $ pip install jkolyer diff --git a/jkolyer/docs/make.bat b/jkolyer/docs/make.bat new file mode 100644 index 00000000..2df9a8cb --- /dev/null +++ b/jkolyer/docs/make.bat @@ -0,0 +1,242 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set BUILDDIR=_build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . +set I18NSPHINXOPTS=%SPHINXOPTS% . +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% + set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^` where ^ is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. singlehtml to make a single large HTML file + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. devhelp to make HTML files and a Devhelp project + echo. epub to make an epub + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. text to make text files + echo. man to make manual pages + echo. texinfo to make Texinfo files + echo. gettext to make PO message catalogs + echo. changes to make an overview over all changed/added/deprecated items + echo. xml to make Docutils-native XML files + echo. pseudoxml to make pseudoxml-XML files for display purposes + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + + +%SPHINXBUILD% 2> nul +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "singlehtml" ( + %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\complexity.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\complexity.ghc + goto end +) + +if "%1" == "devhelp" ( + %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. + goto end +) + +if "%1" == "epub" ( + %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The epub file is in %BUILDDIR%/epub. + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "latexpdf" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + cd %BUILDDIR%/latex + make all-pdf + cd %BUILDDIR%/.. + echo. + echo.Build finished; the PDF files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "latexpdfja" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + cd %BUILDDIR%/latex + make all-pdf-ja + cd %BUILDDIR%/.. + echo. + echo.Build finished; the PDF files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "text" ( + %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The text files are in %BUILDDIR%/text. + goto end +) + +if "%1" == "man" ( + %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The manual pages are in %BUILDDIR%/man. + goto end +) + +if "%1" == "texinfo" ( + %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. + goto end +) + +if "%1" == "gettext" ( + %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The message catalogs are in %BUILDDIR%/locale. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + if errorlevel 1 exit /b 1 + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + if errorlevel 1 exit /b 1 + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + if errorlevel 1 exit /b 1 + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +if "%1" == "xml" ( + %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The XML files are in %BUILDDIR%/xml. + goto end +) + +if "%1" == "pseudoxml" ( + %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. + goto end +) + +:end diff --git a/jkolyer/docs/usage.rst b/jkolyer/docs/usage.rst new file mode 100644 index 00000000..42755220 --- /dev/null +++ b/jkolyer/docs/usage.rst @@ -0,0 +1,7 @@ +======== +Usage +======== + +To use FileUploader in a project:: + + import jkolyer diff --git a/jkolyer/frozen-requirements.txt b/jkolyer/frozen-requirements.txt new file mode 100644 index 00000000..d9c25df6 --- /dev/null +++ b/jkolyer/frozen-requirements.txt @@ -0,0 +1,69 @@ +attrs==21.4.0 +boto3==1.21.8 +botocore==1.24.8 +cachetools==3.1.1 +certifi==2021.10.8 +cffi==1.15.0 +chardet==4.0.0 +charset-normalizer==2.0.12 +click==8.0.4 +colorama==0.4.4 +commonmark==0.9.1 +cryptography==36.0.1 +cuid==0.3 +deepdiff==5.7.0 +dill==0.3.2 +distlib==0.3.4 +dnslib==0.9.19 +dnspython==2.2.0 +dulwich==0.20.32 +execnet==1.9.0 +filelock==3.6.0 +flake8==4.0.1 +idna==2.10 +iniconfig==1.1.1 +Jinja2==3.0.3 +jmespath==0.10.0 +localstack==0.14.0.5 +localstack-client==1.32 +localstack-ext==0.14.0.18 +localstack-plugin-loader==1.1.1 +MarkupSafe==2.1.0 +mccabe==0.6.1 +moto==3.0.5 +ordered-set==4.0.2 +packaging==21.3 +pbr==5.8.1 +platformdirs==2.5.1 +pluggy==1.0.0 +psutil==5.9.0 +py==1.11.0 +pyaes==1.6.1 +pycodestyle==2.8.0 +pycparser==2.21 +pyflakes==2.4.0 +Pygments==2.11.2 +pyparsing==3.0.7 +pytest==7.0.1 +pytest-asyncio==0.18.1 +pytest-forked==1.4.0 +python-dateutil==2.8.2 +python-dotenv==0.19.2 +pytz==2021.3 +PyYAML==6.0 +requests==2.25.1 +responses==0.18.0 +rich==11.2.0 +s3transfer==0.5.2 +semver==2.13.0 +six==1.16.0 +stevedore==3.5.0 +tabulate==0.8.9 +tailer==0.4.1 +toml==0.10.2 +tomli==2.0.1 +tox==3.24.5 +urllib3==1.26.8 +virtualenv==20.13.2 +Werkzeug==2.0.3 +xmltodict==0.12.0 diff --git a/jkolyer/jkolyer/__init__.py b/jkolyer/jkolyer/__init__.py new file mode 100644 index 00000000..655344ba --- /dev/null +++ b/jkolyer/jkolyer/__init__.py @@ -0,0 +1,3 @@ +__author__ = 'Jonathan Kolyer' +__email__ = 'jonathankolyer@gmail.com' +__version__ = '0.1.0' diff --git a/jkolyer/jkolyer/directories.py b/jkolyer/jkolyer/directories.py new file mode 100644 index 00000000..fbf12a5a --- /dev/null +++ b/jkolyer/jkolyer/directories.py @@ -0,0 +1,96 @@ +"""DirectoryTree: utility for generating sample files for uploading. + Uses `mkfile` command line utility. +""" +import os +import subprocess +import pathlib +import argparse +import pathlib +import sys +import random + +class DirectoryTree: + def __init__(self, root_dir, tree_depth): + """Instance constructor. + :param root_dir: parent directory of files + :param tree_depth: number of tree levels + :return: type describe + """ + self.root_dir = root_dir + self.tree_depth = tree_depth + + def _generate_file(self, file_path): + """Utility for generating a file at the given path. + Selects a random file size under 100 of units + bytes, kilobytes, megabytes (chosen at random). + :param file_path: file location relative to root + """ + sizetypes = ['b','k','m'] + sizetype = sizetypes[random.randint(0,len(sizetypes)-1)] + size = random.randint(8,100) + args = f"{size}{sizetype}" + cmd = ['mkfile', "-n", args, file_path] + print(cmd) + process = subprocess.run(['mkfile', "-n", args, file_path]) + + def _generate(self, path, dir_level): + """Iterates over `tree_depth` levels. Calls itself + recursively if `dir_level` < `tree_depth`. + Creates directory at given path. + :param path: directory to insert child files/dirs + :param dir_level: current level + :return: type describe + """ + print(f"makedir: {path}") + os.makedirs(path) + + for file_num in range(self.tree_depth): + _path = f"{path}{os.sep}{file_num}" + self._generate_file(_path) + + if dir_level < self.tree_depth: + _path = f"{path}{os.sep}d{dir_level}" + self._generate(_path, dir_level + 1) + + def generate(self): + for level in range(self.tree_depth): + path = f"{self.root_dir}{os.sep}d{level}" + self._generate(path, 1) + +def parse_cmd_line_arguments(): + """Parses command line for `root_dir` and `tree_depth` + Expected usage: `python directories.py --tree_depth N ROOT_DIR + :return: parsed arguments + """ + parser = argparse.ArgumentParser( + prog="tree", + description="RP Tree, a directory tree generator", + epilog="Thanks for using RP Tree!", + ) + parser.add_argument( + "root_dir", + metavar="ROOT_DIR", + nargs="?", + default=".", + help="Generate a full directory tree starting at ROOT_DIR", + ) + parser.add_argument( + "--tree_depth", + metavar="TREE_DEPTH", + nargs=1, + default="3", + type=int, + required=True, + help="How many directory levels to create", + ) + return parser.parse_args() + +if __name__ == '__main__': + args = parse_cmd_line_arguments() + root_dir = pathlib.Path(args.root_dir) + tree_depth = args.tree_depth + if not root_dir.is_dir(): + print("The specified root directory doesn't exist") + sys.exit() + tree = DirectoryTree(root_dir, tree_depth[0]) + tree.generate() diff --git a/jkolyer/jkolyer/models/base_model.py b/jkolyer/jkolyer/models/base_model.py new file mode 100644 index 00000000..1d303ce0 --- /dev/null +++ b/jkolyer/jkolyer/models/base_model.py @@ -0,0 +1,98 @@ +""" Abstract superclass for SQL data wrappers. Includes + utility functions and classes. +""" +from abc import ABC, abstractmethod +import sqlite3 +from datetime import datetime +from enum import Enum +import logging + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.DEBUG) + +def dateSinceEpoch(mydate=datetime.now()): + """Seconds between given date and epoch date 1970-01-01 + :param mydate: source date or now + :return: float: seconds since epoch date + """ + result = (mydate - datetime(1970, 1, 1)).total_seconds() + return result + +class UploadStatus(Enum): + """Enumerator for upload status state changes. + """ + FAILED = -1 + PENDING = 0 + IN_PROGRESS = 1 + COMPLETED = 2 + +class BaseModel(ABC): + """Holds class properties for database and object storage + :param db_name: SQLite database name + :param db_conn: database connection provided by `sqlite3` + :param bucket_name: object storage bucket name + """ + db_name = 'parallel-file-upload.db' + db_conn = sqlite3.connect(db_name) + bucket_name = 'rewotes-pfu-bucket' + + @classmethod + @abstractmethod + def table_name(cls): + """Name of the underlying SQL database table. + :return: string: receiver's database table name + """ + pass + + @classmethod + @abstractmethod + def create_table_sql(cls): + """All the sql create scripts needed by file objects for tables and indices + :return: string[] sql statements + """ + pass + + @classmethod + def create_tables(cls): + """Creates receiver's table in the database if they don't already exist. + :return: None + """ + cursor = cls.db_conn.cursor() + try: + sqls = cls.create_table_sql() + for sql in sqls: cursor.execute(sql) + cls.db_conn.commit() + except sqlite3.Error as error: + logger.error(f"Error running sql: {error}; ${sql}") + finally: + cursor.close() + + @classmethod + def run_sql_query(cls, sql): + """Performs the given SQL query on the database and returns + results as provided by the database. + :param sql: SQL statement + :return: tuple[]: Array of tuples containing properties + """ + cursor = cls.db_conn.cursor() + try: + return cursor.execute(sql).fetchall() + except sqlite3.Error as error: + logger.error(f"Error running sql: {error}; ${sql}") + finally: + cursor.close() + + @classmethod + def run_sql_command(cls, sql): + """Executes the given SQL on the database and commits. + :param sql: SQL statement + :return: None + """ + cursor = cls.db_conn.cursor() + try: + cursor.execute(sql) + cls.db_conn.commit() + except sqlite3.Error as error: + logger.error(f"Error running sql: {error}; ${sql}") + finally: + cursor.close() diff --git a/jkolyer/jkolyer/models/batch_model.py b/jkolyer/jkolyer/models/batch_model.py new file mode 100644 index 00000000..8dcbdf06 --- /dev/null +++ b/jkolyer/jkolyer/models/batch_model.py @@ -0,0 +1,353 @@ +"""BatchJobModel: the data model for the BatchJob database table. + +Provides SQL wrapper around upload status for a set of files. +""" +import os +import stat +from pathlib import Path +import sqlite3 +import asyncio +import json +from multiprocessing import cpu_count, Process, Queue, Semaphore, Value +from cuid import cuid +import logging +import time + +from jkolyer.models.base_model import BaseModel, UploadStatus, dateSinceEpoch +from jkolyer.models.file_model import FileModel +from jkolyer.uploader import S3Uploader + +for name in logging.Logger.manager.loggerDict.keys(): + if ('boto' in name) or \ + ('urllib3' in name) or \ + ('boto3' in name) or \ + ('botocore' in name) or \ + ('nose' in name): + logging.getLogger(name).setLevel(logging.CRITICAL) +logging.getLogger('s3transfer').setLevel(logging.CRITICAL) + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.DEBUG) + +class BatchJobModel(BaseModel): + + @classmethod + def table_name(cls): + """Returns the SQL table name 'BatchJob' + :return: string + """ + return 'BatchJob' + + @classmethod + def create_table_sql(cls): + """All the sql create scripts needed by file objects + for tables and indices. + Does nothing if the tables/indices already exist. + :return: string[] SQL statements + """ + return [ + """CREATE TABLE IF NOT EXISTS {table_name} + ( id TEXT PRIMARY KEY, + status INTEGER, + created_at INTEGER, + root_dir TEXT ); + """.format(table_name=cls.table_name()), + f"CREATE INDEX IF NOT EXISTS IdxCreatedAt ON {cls.table_name()}(created_at);", + ] + + @classmethod + def new_record_sql(cls, root_dir): + """Provides the SQL for creating a new record with default values. + :param root_dir: the file directories root path + :return: string SQL INSERT statement + """ + return """INSERT INTO {table_name} + ( id, status, created_at, root_dir ) + VALUES + ( '{idval}', {status}, {created_at}, '{root_dir}' ) + """.format( + table_name=cls.table_name(), + idval=cuid(), + status=UploadStatus.PENDING.value, + created_at=dateSinceEpoch(), + root_dir=root_dir,) + + @classmethod + def query_latest(cls): + """Fetches the most recent record from the database + :return: BatchModel: latest instance or None + """ + sql = f"SELECT * FROM {cls.table_name()} ORDER BY created_at DESC LIMIT 1" + cursor = cls.db_conn.cursor() + try: + result = cursor.execute(sql).fetchall() + if len(result) == 0: return None + + # logger.debug(f"BatchJobModel.query_latest: {result}") + model = BatchJobModel(result[0]) + return model + + except sqlite3.Error as error: + logger.error(f"Error running sql: {error}; ${sql}") + finally: + cursor.close() + return None + + @classmethod + def new_instance(cls, root_dir): + """Creates and return new instance + :param root_dir: the file directories root path + :return: BatchModel: the instance + """ + sql = cls.new_record_sql(root_dir) + cls.run_sql_command(sql) + return cls.query_latest() + + def __init__(self, props): + """Instance constructor, setting table properties + :param args: tuple of values ordered as in create table script + """ + self.id = props[0] + self.status = props[1] + self.created_at = props[2] + self.root_dir = props[3] + + def generate_file_records(self): + """Loads all files from receiver's `root_dir` and + creates a `FileModel` instance for each, + which then saves a new database record. + :return: int: the count of file records created + """ + cursor = self.db_conn.cursor() + file_count = 0 + try: + for file_path in Path(self.root_dir).rglob('*'): + fstat = os.stat(file_path) + fmode = fstat.st_mode + if stat.S_ISDIR(fmode): continue + + # logger.debug(file_path) + file_size = fstat.st_size + last_modified = fstat.st_mtime + permissions = oct(fstat.st_mode)[-3:] + status = UploadStatus.PENDING.value + + file_obj = FileModel(( + cuid(), + dateSinceEpoch(), + file_size, + last_modified, + permissions, + file_path, + status + )) + file_obj.save(cursor) + self.db_conn.commit() + + file_count += 1 + + except sqlite3.Error as error: + logger.error(f"Error running sql: {error}") + finally: + cursor.close() + return file_count + + def _fetch_files(self, cursor, page_num, page_size): + """Convenience method for retrieving a page of database + records for `FileModel`. Orders by `file_size` (ascending). + :param cursor: used for SQL execution + :param page_num: page number for query offset + :param page_size: records per page + :return: tuple[]: an array of tuples for FileModel + """ + offset = page_num * page_size + + # paginate without using sql OFFSET https://gist.github.com/ssokolow/262503 + sql = """ + SELECT * FROM {table_name} + WHERE status = {status} AND + (id NOT IN ( + SELECT id FROM {table_name} + ORDER BY file_size ASC LIMIT {offset} + )) + ORDER BY file_size ASC + LIMIT {page_size} + """.format( + table_name = FileModel.table_name(), + status = UploadStatus.PENDING.value, + offset = offset, + page_size = page_size + ) + return cursor.execute(sql).fetchall() + + def file_iterator(self, cursor=None): + """Generator method for iterating over a page of `FileModel` data. + Yields to caller with model instance and cursor. Page size is 10 records. + :param cursor: for SQL execution; creates cursor if not provided + :return: None + """ + _cursor = cursor if cursor else self.db_conn.cursor() + + page_num = 0 + page_size = 10 + try: + while True: + results = self._fetch_files(_cursor, page_num, page_size) + if len(results) == 0: break + + page_num += 1 + for result in results: + yield FileModel(result), _cursor + + except sqlite3.Error as error: + logger.error(f"Error running sql: {error}") + finally: + """only close the cursor if created in this method""" + if cursor is None: + _cursor.close + + def reset_file_status(self): + """Resets all the `FileModel` status values to `UploadStatus.PENDING`. + Useful for testing or restarting a previous batch upload. + :return: None + """ + cursor = self.db_conn.cursor() + try: + sql = f"UPDATE {FileModel.table_name()} SET status = {UploadStatus.PENDING.value}" + cursor.execute(sql) + self.db_conn.commit() + except sqlite3.Error as error: + logger.error(f"Error running sql: {error}; {sql}") + finally: + cursor.close + + async def async_upload_files(self): + """Performs asynchronous file upload across all pending `FileModel` instances. + Uses `asyncio` module. Maximum 8 concurrent jobs. + :return: None + """ + cursor = self.db_conn.cursor() + max_concur = 8 + sem = asyncio.Semaphore(max_concur) + + async def task_wrapper(model, cursor): + try: + model.start_upload(cursor) + finally: + sem.release() + + for file_model, _cursor in self.file_iterator(cursor): + """ this blocks until concurrent jobs drops below max """ + await sem.acquire() + asyncio.create_task(task_wrapper(file_model, _cursor)) + + # wait for all tasks to complete + for i in range(max_concur): + await sem.acquire() + cursor.close() + + +MAX_NICENESS = 19 + +def parallel_upload(file_dto_string, queue, sema): + """Perform upload command in multiprocessing mode. This runs + seperate from the main process. Sets process nice level to 19. + Sets the uploader endpoint based on dto. + :param file_dto_string: JSON-formatted string as FileModel dto + :param queue: inter-process queue to share upload results + :param sema: semaphora to limit number of active processes + :return: None + """ + os.nice(MAX_NICENESS) + file_dto = json.loads(file_dto_string) + + # logger.debug(f"parallel_upload: file_dto_string = {file_dto_string}") + S3Uploader.set_endpoint_url(file_dto["endpoint_url"]) + + uploader = S3Uploader() + + metadata = file_dto["metadata"] + + completed = uploader.upload_file( + file_dto["file_path"], + file_dto["bucket_name"], + file_dto["id"], + metadata["file_size"], + ) + if completed: + completed = uploader.upload_metadata( + json.dumps(metadata), file_dto["bucket_name"], f"metadata-{file_dto['id']}" + ) + + file_dto["status"] = UploadStatus.COMPLETED.value if completed else UploadStatus.FAILED.value + file_dto_string = json.dumps(file_dto) + + queue.put(file_dto_string) + + # `release` will add 1 to `sema`, allowing other + # processes blocked on it to continue + sema.release() + + +def parallel_upload_files(batch_model): + """Uploads files using multiprocessing. + Limits concurrent process count to half `multiprocessing.cpu_count`. + :param batch_model: the BatchJobModel instance + :return: None + """ + concurrency = cpu_count() // 2 + """ used to limit the total number of processes spawned""" + sema = Semaphore(concurrency) + active_processes = {} + queue = Queue() + """temporary store of file_models in progress""" + file_models_progress = {} + + def _read_queue(): + """Handle data in the queue which is + populated by the `parallel_upload` function + run in separate process. Here we update database + using `FileModel` state change methods. + """ + cursor = BatchJobModel.db_conn.cursor() + try: + while not queue.empty(): + dto = queue.get() + dto = json.loads(dto) + fmodel = file_models_progress[dto["id"]] + try: + if dto["status"] == UploadStatus.COMPLETED.value: + fmodel.upload_complete(cursor) + elif dto["status"] == UploadStatus.FAILED.value: + fmodel.upload_failed(cursor) + + del file_models_progress[fmodel.id] + del active_processes[fmodel.id] + + except sqlite3.Error as error: + logger.error(f"Error running sql: {error}") + finally: + cursor.close() + + for file_model, _cursor in batch_model.file_iterator(): + """once max processes are running, the following `acquire` call + will block the main process since `sema` has been reduced to 0 + """ + sema.acquire() + + """ cache of `FileModel` used when queue is processed """ + file_models_progress[file_model.id] = file_model + + dtoStr = file_model.parallel_dto_string() + proc = Process(target=parallel_upload, args=(dtoStr, queue, sema)) + active_processes[file_model.id] = proc + proc.start() + """ check if queue has any data (also clears active_processes and file_models_progress) """ + _read_queue() + + # inside main process, wait for all processes to finish + for proc in active_processes.values(): + proc.join() + + _read_queue() + diff --git a/jkolyer/jkolyer/models/file_model.py b/jkolyer/jkolyer/models/file_model.py new file mode 100644 index 00000000..f7854ba8 --- /dev/null +++ b/jkolyer/jkolyer/models/file_model.py @@ -0,0 +1,207 @@ +"""FileModel: the data model for the FileStat database table. + +Provides SQL wrapper around file metadata and upload status. +""" +import sqlite3 +import json +import logging +from jkolyer.uploader import S3Uploader, Uploader +from jkolyer.models.base_model import BaseModel, UploadStatus + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.DEBUG) + +class FileModel(BaseModel): + + @classmethod + def table_name(cls): + """Returns the SQL table name 'FileStat' + :return: string + """ + return 'FileStat' + + @classmethod + def create_table_sql(cls): + """All the sql create scripts needed by file objects + for tables and indices. + Does nothing if the tables/indices already exist. + :return: string[] SQL statements + """ + sql = """CREATE TABLE IF NOT EXISTS {table_name} + ( id TEXT PRIMARY KEY, + created_at INTEGER, + file_size INTEGER, + last_modified INTEGER, + permissions TEXT, + file_path TEXT, + status INTEGER + );""".format(table_name=cls.table_name()) + return [sql, + f"CREATE UNIQUE INDEX IF NOT EXISTS IdxFilePath ON \ + {cls.table_name()}(file_path)", + f"CREATE INDEX IF NOT EXISTS IdxStatus ON \ + {cls.table_name()}(status);"] + + @classmethod + def bootstrap_table(cls): + """Drops and recreates SQL tables + :return: None + """ + cursor = cls.db_conn.cursor() + try: + cursor.execute(f"DROP TABLE IF EXISTS {cls.table_name()}") + cls.db_conn.commit() + for sql in cls.create_table_sql(): cursor.execute(sql) + cls.db_conn.commit() + except sqlite3.Error as error: + logger.error(f"Error running sql: {error}; ${sql}") + finally: + cursor.close() + + @classmethod + def fetch_record(cls, status): + """Retrieves the most recently-created instance with the given status. + :param status (`UploadStatusEnum`): target status + :return: FileModel instance if found, otherwise None + """ + sql = f"SELECT * FROM {FileModel.table_name()} WHERE status = {status} ORDER BY created_at DESC LIMIT 1" + cursor = cls.db_conn.cursor() + try: + result = cursor.execute(sql).fetchone() + return FileModel(result) if result is not None else None + except sqlite3.Error as error: + logger.error(f"Error running sql: {error}; ${sql}") + finally: + cursor.close() + return None + + def __init__(self, *args): + """Instance constructor, setting table properties, and local `S3Uploader` instance. + :param args: tuple of values ordered as in create table script + """ + tpl = args[0] + self.id = tpl[0] + self.created_at = tpl[1] + self.file_size = tpl[2] + self.last_modified = tpl[3] + self.permissions = tpl[4] + self.file_path = tpl[5] + self.status = tpl[6] + self.uploader = S3Uploader() + + def save(self, cursor): + """Saves the receiver's properties into the database using INSERT OR IGNORE statement. + Will throw exception on error. + :param cursor: active cursor to execute SQL + :return: None + """ + sql = """INSERT OR IGNORE INTO {table_name} + ( id, created_at, file_size, last_modified, permissions, file_path, status ) + VALUES + ( '{id}', {created_at}, {file_size}, {last_modified}, '{permissions}', '{file_path}', {status} ) + """.format( + table_name=self.__class__.table_name(), + id=self.id, + created_at=self.created_at, + file_size=self.file_size, + last_modified=self.last_modified, + permissions=self.permissions, + file_path=self.file_path, + status=self.status + ) + cursor.execute(sql) + + def metadata(self): + """Data structure used to store file metadata in storage provider. + Properties include `file_size`, `last_modified`, and `permissions`. + Rendered as string for storage purposes. + :return: dict JSON-formatted + """ + data = { + "file_size": self.file_size, + "last_modified": self.last_modified, + "permissions": self.permissions, + } + return data + + def _update_status(self, cursor): + """Convenience method for SQL UPDATE of the `status` property. + Executes SQL and commits. Throws exception on error. + :param cursor: used for SQL execution + :return: None + """ + sql = f"UPDATE {self.table_name()} SET status = {self.status} WHERE id = '{self.id}'" + cursor.execute(sql) + self.db_conn.commit() + + def start_upload(self, cursor): + """Status state change to initiate upload. Calls `_update_status`. + Invokes `upload_file` and `upload_metadata` on the uploader property. + If either upload fails, calls `upload_failed`; otherwise calls `upload_complete` + :param cursor: used for SQL execution + :return: None + """ + self.status = UploadStatus.IN_PROGRESS.value + self._update_status(cursor) + + completed = self.uploader.upload_file( + self.file_path, + S3Uploader.bucket_name, + self.id, + self.file_size, + ) + if completed: + completed = self.uploader.upload_metadata( + self.metadata(), + S3Uploader.bucket_name, + f"metadata-{self.id}" + ) + self.upload_complete(cursor) if completed else self.upload_failed(cursor) + + def upload_complete(self, cursor): + """Status state change to success upload completion. Calls `_update_status`. + :param cursor: used for SQL execution + :return: None + """ + self.status = UploadStatus.COMPLETED.value + self._update_status(cursor) + + def upload_failed(self, cursor): + """Status state change to failed upload. Calls `_update_status`. + :param cursor: used for SQL execution + :return: None + """ + self.status = UploadStatus.FAILED.value + self._update_status(cursor) + + def get_uploaded_file(self): + """For testing purposes, fetches the uploaded file from object storage + :return: binary string: the uploaded file bytes or None + """ + return self.uploader.get_uploaded_data(Uploader.bucket_name, self.id) + + def get_uploaded_metadata(self): + """For testing purposes, fetches the uploaded metadata from object storage + :return: dict: the uploaded metadata or None + """ + metadata = self.uploader.get_uploaded_data(Uploader.bucket_name, f"metadata-{self.id}") + return json.loads(metadata) + + def parallel_dto_string(self): + """For parallel uploading with multiprocessing module, + provides data transfer object needed for uploading + in a separate process: `id`, `file_path`, `metadata`, + `bucket_name`, `status`, `endpoint_url`. + :return: string: the JSON-formatted string of properties + """ + dto = { + "id": self.id, + "file_path": self.file_path, + "metadata": self.metadata(), + "bucket_name": S3Uploader.bucket_name, + "status": self.status, + "endpoint_url": S3Uploader.endpoint_url + } + return json.dumps(dto) + + diff --git a/jkolyer/jkolyer/uploader.py b/jkolyer/jkolyer/uploader.py new file mode 100644 index 00000000..7df460bf --- /dev/null +++ b/jkolyer/jkolyer/uploader.py @@ -0,0 +1,149 @@ +"""Uploader: abstract superclass for wrapping object storage providers. + +""" +import os +from abc import ABC, abstractmethod +import json +import boto3 +from botocore.exceptions import ClientError +import logging +from moto import mock_s3 # workaround for multiprocessing / pytest limits + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.DEBUG) + + +class Uploader(ABC): + """ static bucket_name """ + bucket_name = 'rewotes-pfu-bucket' + + @abstractmethod + def get_uploaded_data(self, bucket_name, fname): + """Retrieves stored data (either file or metadata) for given key. + Used for testing and validation purposes. + :param bucket_name: bucket where the data was uploaded + :param key: lookup key for the uploaded data + :return: bytes + """ + pass + + @abstractmethod + def upload_metadata(self, metadata, bucket_name, key): + """Performs metadata upload to given bucket under given key. + :param metadata: JSON string representation + :param bucket_name: bucket where the data was uploaded + :param key: lookup key for the uploaded data + :return: bool: True if no errors, False otherwise + """ + pass + + @abstractmethod + def upload_file(self, file_name, bucket, key): + """Upload a file to a given bucket + :param file_name: File path to upload + :param bucket: Bucket to upload to + :param object_id: S3 object name + :return: True if file was uploaded, else False + """ + pass + + +class S3Uploader(Uploader): + """S3Uploader: concrete instance for S3 object storage uploads. + Uses `boto3` module. + :Properties: + : client: instance provided by `boto3` for S3 + """ + """ process-based `boto3` client, which will vary based on environment and parallel algorithm""" + boto3_client = None + """ used to initialze `boto3` clients""" + endpoint_url = None + """ In multiprocessing tests, used to replace `endpoint_url` as a signal to use mock `boto3` client.""" + MOCK_ENDPOINT_KEY = 's3_mock' + + @classmethod + def s3_mock(cls): + """Initializes a mock wrapper for boto3 for use in testing purposes. + To support tests with multiprocessing, we need to define the mock + within this class (instead of keeping it with the tests). + This may be candidate for refactoring. + :return: mocked boto3 client + """ + mock = mock_s3() + mock.start() + s3 = boto3.client('s3', region_name='us-east-1') + s3.create_bucket(Bucket=cls.bucket_name) + cls.endpoint_url = cls.MOCK_ENDPOINT_KEY + return s3 + + @classmethod + def set_boto3_client(cls, client): + """Sets the boto3 client used for uploads. This is set one time, and + used by all instances of the receiver. May be a mocked value in test. + :param client: the `boto3` client used + :return: None + """ + cls.boto3_client = client + client.create_bucket(Bucket=cls.bucket_name) + + @classmethod + def set_endpoint_url(cls, url): + cls.endpoint_url = url + + def __init__(self): + """Instance constructor. Sets `client` property. + """ + if self.boto3_client: + ''' here the client was instantiated elsewhere ''' + self.client = self.boto3_client + else: + ''' we instantiate client here, based on endpoint_url ''' + if self.endpoint_url: + ''' if we have endpoint that's not our mock value, use it in client ''' + if self.endpoint_url != self.MOCK_ENDPOINT_KEY: + self.client = boto3.client("s3", endpoint_url=self.endpoint_url) + else: + ''' in this case we're running in test ''' + self.client = self.s3_mock() + else: + ''' boto3 must be configured using environment variables ''' + self.client = boto3.client("s3") + try: + ''' make sure we have the expected bucket ''' + self.client.create_bucket(Bucket=self.bucket_name) + except: + logger.warn(f"Could not create bucket named {self.bucket_name}") + self.client = None + + def get_uploaded_data(self, bucket_name, key): + ''' see superclass ''' + response = self.client.get_object(Bucket=bucket_name, Key=key) + contents = response["Body"].read() + return contents + + def upload_metadata(self, metadata, bucket_name, key): + ''' see superclass ''' + if not self.client: + logger.warn(f"upload_metadadta: no client for {key}") + return False + try: + self.client.put_object(Bucket=bucket_name, Key=key, Body=json.dumps(metadata)) + except ClientError as err: + logging.error(err) + return False + return True + + def upload_file(self, file_name, bucket, key, file_size): + ''' see superclass ''' + if not self.client: + logger.warn(f"upload_file: no client for {key}: {file_name}") + return False + try: + logger.info(f"S3Uploader.upload_file: {file_name}; file_size={file_size}") + self.client.upload_file(file_name, bucket, key) + except ClientError as err: + logging.error(err) + return False + return True + + diff --git a/jkolyer/pfu.py b/jkolyer/pfu.py new file mode 100644 index 00000000..6911f6ab --- /dev/null +++ b/jkolyer/pfu.py @@ -0,0 +1,111 @@ +import os +import pathlib +import argparse +import pathlib +import sys +import logging +import boto3 + +import asyncio +from jkolyer.models.batch_model import BatchJobModel, parallel_upload_files +from jkolyer.models.file_model import FileModel +from jkolyer.uploader import S3Uploader + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.DEBUG) + +def parse_cmd_line_arguments(): + """Parses command line arguments + --root_dir: root file directory + --endpoint_url: upload destination url + --parallel: if present use parallel mode (default, optional) + --concurrent: if present use concurrent mode (optional) + :return: parsed arguments dictionary + """ + parser = argparse.ArgumentParser( + description="PFU: Parallel File Upload", + epilog="Thanks for using the service!", + ) + parser.add_argument( + "--parallel", + action='store_true', + help="Runs the uploads in multiple processes (up to CPU count), default is concurrent.", + ) + parser.add_argument( + "--concurrent", + action='store_true', + help="Runs the uploads in a single process using asyncio (default).", + ) + parser.add_argument( + "--endpoint_url", + nargs=1, + action="store", + metavar="ENDPOINT_URL", + help="Endpoint for localstack S3 in the form http://localhost:4566", + ) + parser.add_argument( + "--root_dir", + metavar="ROOT_DIR", + action="store", + required=True, + help="Directory to load files for upload", + ) + return parser.parse_args() + +def perform_file_upload(parallel, root_dir): + """Initializes database tables, creates new batch instance, + populates files database. Initiates upload in either + parallel or concurrent mode. + :param parallel: if true, uses parallel mode, otherwise concurrent + :param root_dir: file root directory + :return: None + """ + logger.info(f"initializing database") + + FileModel.create_tables() + BatchJobModel.create_tables() + + batch = BatchJobModel.new_instance(root_dir) + batch.generate_file_records() + batch.reset_file_status() + + if parallel: + logger.info(f"performing upload: multiprocessing ") + parallel_upload_files(batch) + else: + logger.info(f"performing upload: async") + loop = asyncio.get_event_loop() + try: + loop.run_until_complete( + asyncio.gather( + batch.async_upload_files() + )) + finally: + loop.close() + +if __name__ == '__main__': + args = parse_cmd_line_arguments() + root_dir = pathlib.Path(args.root_dir) + if not root_dir.is_dir(): + print("The specified root directory doesn't exist") + sys.exit() + + concurrent = args.concurrent + parallel = args.parallel + + if concurrent and parallel: + concurrent = False + elif not concurrent and not parallel: + parallel = True + + endpoint_url = args.endpoint_url + if endpoint_url: + client = boto3.client("s3", endpoint_url=endpoint_url[0], region_name='us-east-1') + S3Uploader.set_boto3_client(client) + S3Uploader.set_endpoint_url(endpoint_url[0]) + + logger.info(f"pfu: root_dir = {root_dir}; endpoint_url = {endpoint_url}") + + perform_file_upload(parallel, root_dir) + + diff --git a/jkolyer/pyvenv.cfg b/jkolyer/pyvenv.cfg new file mode 100644 index 00000000..3937f6dc --- /dev/null +++ b/jkolyer/pyvenv.cfg @@ -0,0 +1,3 @@ +home = /usr/local/opt/python@3.9/bin +include-system-site-packages = false +version = 3.9.7 diff --git a/jkolyer/requirements.txt b/jkolyer/requirements.txt new file mode 100644 index 00000000..7eaa5c92 --- /dev/null +++ b/jkolyer/requirements.txt @@ -0,0 +1 @@ +wheel>=0.22 diff --git a/jkolyer/scripts.sh b/jkolyer/scripts.sh new file mode 100644 index 00000000..e6484151 --- /dev/null +++ b/jkolyer/scripts.sh @@ -0,0 +1,44 @@ + +activate() { + source env/bin/activate +} + +runtest() { + pytest -s --log-cli-level=DEBUG --asyncio-mode=auto test +} + +flush_db() { + if test -f "parallel-file-upload.db"; then + rm -r parallel-file-upload.db + fi +} + +samplegen() { + if [ $# -eq 0 ]; then + echo "Enter directory tree depth: ex. samplegen 3" + exit 1 + fi + flush_db + rm -r samples &> 2 + mkdir samples + python jkolyer/directories.py --tree_depth $1 samples +} + +localstack_p() { + if [ $# -eq 0 ]; then + echo "Enter endpoint port: ex. localstack_p 4566" + else + # run uploads with multiprocessing against localstack + nice python pfu.py --root_dir ./samples --endpoint_url "http://localhost:$1" + fi +} + +localstack_c() { + if [ $# -eq 0 ]; then + echo "Enter endpoint port: ex. localstack_c 4566" + else + # run uploads with concurrency against localstack + nice python pfu.py --root_dir ./samples --endpoint_url "http://localhost:$1" --concurrent + fi +} + diff --git a/jkolyer/setup.cfg b/jkolyer/setup.cfg new file mode 100644 index 00000000..0a8df87a --- /dev/null +++ b/jkolyer/setup.cfg @@ -0,0 +1,2 @@ +[wheel] +universal = 1 \ No newline at end of file diff --git a/jkolyer/setup.py b/jkolyer/setup.py new file mode 100644 index 00000000..b8a533a1 --- /dev/null +++ b/jkolyer/setup.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python + +import os +import sys + +try: + from setuptools import setup +except ImportError: + from distutils.core import setup + + +if sys.argv[-1] == 'publish': + os.system('python setup.py sdist upload') + sys.exit() + +readme = open('README.rst').read() +doclink = """ +Documentation +------------- + +The full documentation is at http://jkolyer.rtfd.org.""" +history = open('HISTORY.rst').read().replace('.. :changelog:', '') + +setup( + name='jkolyer', + version='0.1.0', + description="Deploy static HTML sites to S3 with the simple 'alotofeffort' command.", + long_description=readme + '\n\n' + doclink + '\n\n' + history, + author='Jonathan Kolyer', + author_email='jonathankolyer@gmail.com', + url='https://github.com/jkolyer/jkolyer', + packages=[ + 'jkolyer', + ], + package_dir={'jkolyer': 'jkolyer'}, + include_package_data=True, + install_requires=[ + ], + license='MIT', + zip_safe=False, + keywords='jkolyer', + classifiers=[ + 'Development Status :: 2 - Pre-Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: MIT License', + 'Natural Language :: English', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: Implementation :: PyPy', + ], +) diff --git a/jkolyer/test/__init__.py b/jkolyer/test/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/jkolyer/test/test_jkolyer.py b/jkolyer/test/test_jkolyer.py new file mode 100644 index 00000000..b74a4025 --- /dev/null +++ b/jkolyer/test/test_jkolyer.py @@ -0,0 +1,131 @@ +import os +import pytest +import sqlite3 +import boto3 +from moto import mock_s3 +from threading import Thread +import logging + +for name in logging.Logger.manager.loggerDict.keys(): + if ('boto' in name) or \ + ('urllib3' in name) or \ + ('boto3' in name) or \ + ('botocore' in name) or \ + ('nose' in name): + logging.getLogger(name).setLevel(logging.CRITICAL) +logging.getLogger('s3transfer').setLevel(logging.CRITICAL) + +from jkolyer.models.base_model import BaseModel, UploadStatus +from jkolyer.models.batch_model import BatchJobModel, parallel_upload_files +from jkolyer.models.file_model import FileModel +from jkolyer.uploader import S3Uploader + + +@pytest.fixture +def batch_job(): + return BatchJobModel.new_instance('./samples') + +class TestJkolyer(object): + @classmethod + def setup_class(cls): + S3Uploader.set_boto3_client(S3Uploader.s3_mock()) + + @classmethod + def teardown_class(cls): + pass + +class TestTables(TestJkolyer): + def test_create_tables(self): + FileModel.create_tables() + BatchJobModel.create_tables() + sql = f"SELECT name FROM sqlite_master WHERE type='table' AND name='{FileModel.table_name()}'" + result = BaseModel.run_sql_query(sql) + assert result[0][0] == FileModel.table_name() + +class TestBatchJob(TestJkolyer): + + def test_create_table(self): + sql = f"SELECT name FROM sqlite_master WHERE type='table' AND name='{BatchJobModel.table_name()}'" + result = BaseModel.run_sql_query(sql) + assert result[0][0] == BatchJobModel.table_name() + + def test_create_batch(self, batch_job): + result = BatchJobModel.query_latest() + assert result is not None + +class TestFileModel(TestJkolyer): + + @classmethod + def setup_class(cls): + TestJkolyer.setup_class() + FileModel.bootstrap_table() + + def test_create_file_records(self): + batch = BatchJobModel.query_latest() + file_count = batch.generate_file_records() + + cursor = BaseModel.db_conn.cursor() + result = cursor.execute(f"SELECT COUNT(*) FROM {FileModel.table_name()}").fetchall() + assert result[0][0] == file_count + + # ensure no duplicates are created + new_file_count = batch.generate_file_records() + result = cursor.execute(f"SELECT COUNT(*) FROM {FileModel.table_name()}").fetchall() + assert result[0][0] == file_count + + cursor.close() + + def test_file_upload(self): + model = FileModel.fetch_record(UploadStatus.PENDING.value) + assert model is not None + cursor = FileModel.db_conn.cursor() + model.start_upload(cursor) + assert model.status == UploadStatus.COMPLETED.value + model2 = FileModel.fetch_record(UploadStatus.COMPLETED.value) + assert model2 is not None + assert model2.id == model.id + cursor.close() + + file_contents = model.get_uploaded_file() + assert file_contents is not None + metadata = model.get_uploaded_metadata() + assert metadata is not None + + def test_batch_uploads_sequential(self): + batch = BatchJobModel.query_latest() + + for file_model, cursor in batch.file_iterator(): + assert file_model.status == UploadStatus.PENDING.value + file_model.start_upload(cursor) + assert file_model.status == UploadStatus.COMPLETED.value + +class TestAsyncFileModel(TestJkolyer): + + @classmethod + def setup_class(cls): + batch = BatchJobModel.query_latest() + # reset the file records + batch.reset_file_status() + + @pytest.mark.asyncio + async def test_batch_uploads_async(self): + batch = BatchJobModel.query_latest() + await batch.async_upload_files() + + for file_model, cursor in batch.file_iterator(): + assert file_model.status == UploadStatus.COMPLETED.value + +class TestParallelFileModel(TestJkolyer): + + @classmethod + def setup_class(cls): + batch = BatchJobModel.query_latest() + batch.reset_file_status() + + def test_batch_uploads_parallel(self): + batch = BatchJobModel.query_latest() + parallel_upload_files(batch) + + for file_model, cursor in batch.file_iterator(): + assert file_model.status == UploadStatus.COMPLETED.value + diff --git a/jkolyer/tox.ini b/jkolyer/tox.ini new file mode 100644 index 00000000..163db2d2 --- /dev/null +++ b/jkolyer/tox.ini @@ -0,0 +1,32 @@ +[tox] +envlist = py39, style, docs + +[testenv] +setenv = + PYTHONPATH = {toxinidir}:{toxinidir}/jkolyer +deps = + -r{toxinidir}/requirements.txt + pytest +commands = + py.test --basetemp={envtmpdir} + +[testenv:style] +deps = + -r{toxinidir}/requirements.txt + flake8 +commands = + python setup.py flake8 + +[testenv:docs] +changedir=docs/ +deps = + -r{toxinidir}/requirements.txt + sphinx +commands = + sphinx-build -b linkcheck ./ _build/ + sphinx-build -b html ./ _build/ + +[pytest] +log_cli = 1 +log_cli_level = DEBUG + \ No newline at end of file