diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bc11ef0..8dd8ffd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,48 +1,48 @@ -name: build - -on: - push: - branches: - - master - - release/* - pull_request: - branches: - - master - -jobs: - build: - - runs-on: ubuntu-latest - strategy: - max-parallel: 4 - matrix: - python-version: [3.7] - - steps: - - uses: actions/checkout@v1 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v1 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - - name: Lint with flake8 - run: | - pip install flake8 - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with pytest - run: | - pip install pytest pytest-cov - python -m pytest --cov=./ --cov-report=xml - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v1.0.2 - with: - token: ${{secrets.CODECOV_TOKEN}} - file: ./coverage.xml - flags: unittests - name: codecov-umbrella +name: build + +on: + push: + branches: + - master + - release/* + pull_request: + branches: + - master + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + max-parallel: 4 + matrix: + python-version: [3.7] + + steps: + - uses: actions/checkout@v1 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Lint with flake8 + run: | + pip install flake8 + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with pytest + run: | + pip install pytest pytest-cov + python -m pytest --cov=./ --cov-report=xml + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v1.0.2 + with: + token: ${{secrets.CODECOV_TOKEN}} + file: ./coverage.xml + flags: unittests + name: codecov-umbrella diff --git a/.gitignore b/.gitignore index 894a44c..60fbcad 100644 --- a/.gitignore +++ b/.gitignore @@ -1,104 +1,104 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# pyenv -.python-version - -# celery beat schedule file -celerybeat-schedule - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..cd5d5f7 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,9 @@ +{ + "python.pythonPath": "venv\\Scripts\\python.exe", + "python.testing.pytestArgs": [ + "tests" + ], + "python.testing.unittestEnabled": false, + "python.testing.nosetestsEnabled": false, + "python.testing.pytestEnabled": true +} \ No newline at end of file diff --git a/LICENSE b/LICENSE index 35ce4ed..a63ed44 100644 --- a/LICENSE +++ b/LICENSE @@ -1,21 +1,21 @@ -MIT License - -Copyright (c) 2019 Umedzhon Abdumuminov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +MIT License + +Copyright (c) 2019 Umedzhon Abdumuminov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 570e40c..c37d27f 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ -# xl2pgtable - -[![GitHub Actions status](https://github.com/umed/xl2pgtable/workflows/build/badge.svg)](https://github.com/umed/xl2pgtable/actions) -[![codecov](https://codecov.io/gh/umed/xl2pgtable/branch/master/graph/badge.svg)](https://codecov.io/gh/umed/xl2pgtable) - -It's an python library to upload directory of Excel files into PostgreSQL database. - -# How to run - -``` -python -m xl2pgtable/uploader list: -# if not os.path.exists(dir_name): -# logging.error('"{}" does not exist'.format(dir_name)) -# exit(1) -# if not exclude: -# exclude = [] -# files_info = [] -# files = get_excel_files_in_dir(dir_name, exclude) -# for file_path in files: -# try: -# files_info.append(create_file_info(file_path)) -# except ValueError: -# print("Error happened during '{}' reading. Will be skipped.".format(file_path)) -# return files_info - - -# def create_argparse(): -# argparse. - - -def main(): - if len(sys.argv) == 2: - files_path = sys.argv[1] - tables = [{'Link': f, 'Table name': create_table_name(f), 'mappings': None} - for f in get_excel_files_in_dir(files_path, [])] - elif len(sys.argv) == 3: - files_config_path = sys.argv[1] - mappings_path = sys.argv[2] - tables = read_config(files_config_path) - apply_column_mappings(mappings_path, tables) - else: - files_path = PATH_TO_FOLDER_WITH_EXCEL_FILES - tables = [{'Link': f, 'Table name': create_table_name(f), 'mappings': None} - for f in get_excel_files_in_dir(files_path, [])] - - settings = DatabaseSettings() - db = Database(settings) - file_uploader = FileUploader(db) - for table in tables: - file_uploader.upload(table['Link'], table['Table name'], table['mappings']) - sys.exit(0) - - -# for info in files_info: -# db.rewrite_data(info[TABLE_NAME], info[COLUMNS_INFO], info[ROWS]) -# exit(0) - - -if __name__ == "__main__": - main() +from uploader.utils import create_table_name +import logging +import os +import sys + +from uploader.database.database import Database +from uploader.database.database_settings import DatabaseSettings +from uploader.excel_helper import get_excel_files_in_dir +from uploader.file_uploader import FileUploader +from uploader.config_reader import read_config, apply_column_mappings +import argparse + +# REPLACE PATH BY YOUR PATH TO EXCEL FILES +# use double slashes on windows +PATH_TO_FOLDER_WITH_EXCEL_FILES = 'C:\\Users\\uabdumum\\Desktop\\projects\\test_data' + + +# def create_files_info(dir_name: str, exclude: list = None) -> list: +# if not os.path.exists(dir_name): +# logging.error('"{}" does not exist'.format(dir_name)) +# exit(1) +# if not exclude: +# exclude = [] +# files_info = [] +# files = get_excel_files_in_dir(dir_name, exclude) +# for file_path in files: +# try: +# files_info.append(create_file_info(file_path)) +# except ValueError: +# print("Error happened during '{}' reading. Will be skipped.".format(file_path)) +# return files_info + + +# def create_argparse(): +# argparse. + + +def main(): + if len(sys.argv) == 2: + files_path = sys.argv[1] + tables = [{'Link': f, 'Table name': create_table_name(f), 'mappings': None} + for f in get_excel_files_in_dir(files_path, [])] + elif len(sys.argv) == 3: + files_config_path = sys.argv[1] + mappings_path = sys.argv[2] + tables = read_config(files_config_path) + apply_column_mappings(mappings_path, tables) + else: + files_path = PATH_TO_FOLDER_WITH_EXCEL_FILES + tables = [{'Link': f, 'Table name': create_table_name(f), 'mappings': None} + for f in get_excel_files_in_dir(files_path, [])] + + settings = DatabaseSettings() + db = Database(settings) + file_uploader = FileUploader(db) + for table in tables: + file_uploader.upload(table['Link'], table['Table name'], table['mappings']) + sys.exit(0) + + +# for info in files_info: +# postgres.rewrite_data(info[TABLE_NAME], info[COLUMNS_INFO], info[ROWS]) +# exit(0) + + +if __name__ == "__main__": + main() diff --git a/uploader/base/__init__.py b/uploader/base/__init__.py new file mode 100644 index 0000000..5e50570 --- /dev/null +++ b/uploader/base/__init__.py @@ -0,0 +1,3 @@ +from .idata import * +from .ireader import * +from .iwriter import * diff --git a/uploader/base/idata.py b/uploader/base/idata.py new file mode 100644 index 0000000..e1b81b5 --- /dev/null +++ b/uploader/base/idata.py @@ -0,0 +1,27 @@ +import abc +from typing import List +from .itype_recognizer import ITypeRecognizer + +NULL = 'NULL' + + +class IData(metaclass=abc.ABCMeta): + @abc.abstractmethod + def to_dict(self) -> List[dict]: + pass + + @abc.abstractmethod + def columns(self) -> list: + pass + + @abc.abstractmethod + def set_type_recognizer(self, type_recognizer: ITypeRecognizer): + pass + + @abc.abstractmethod + def types(self) -> dict: + pass + + @abc.abstractmethod + def rows(self) -> List[list]: + pass diff --git a/uploader/base/ireader.py b/uploader/base/ireader.py new file mode 100644 index 0000000..3fbaaa7 --- /dev/null +++ b/uploader/base/ireader.py @@ -0,0 +1,10 @@ +import abc + +from uploader.base import IData + + +class IReader(metaclass=abc.ABCMeta): + @abc.abstractmethod + def read(self, path: str, top_offset: int, bottom_offset: int, left_offset: int, + right_offset: int) -> IData: + pass diff --git a/uploader/base/irepresenter.py b/uploader/base/irepresenter.py new file mode 100644 index 0000000..db19f4d --- /dev/null +++ b/uploader/base/irepresenter.py @@ -0,0 +1,5 @@ +import abc + + +class IRepresenter(metaclass=abc.ABCMeta): + pass diff --git a/uploader/base/itype_recognizer.py b/uploader/base/itype_recognizer.py new file mode 100644 index 0000000..d0efa8b --- /dev/null +++ b/uploader/base/itype_recognizer.py @@ -0,0 +1,15 @@ +import abc + + +class ITypeRecognizer(metaclass=abc.ABCMeta): + @abc.abstractmethod + def type(self, value) -> type: + pass + + @abc.abstractmethod + def default_type(self) -> type: + pass + + @abc.abstractmethod + def convert(self, value): + pass diff --git a/uploader/base/iwriter.py b/uploader/base/iwriter.py new file mode 100644 index 0000000..ed75899 --- /dev/null +++ b/uploader/base/iwriter.py @@ -0,0 +1,8 @@ +import abc +from uploader.base.idata import IData + + +class IWriter(metaclass=abc.ABCMeta): + @abc.abstractmethod + def write(self, data: IData, mapping, append: bool): + pass diff --git a/uploader/config_reader.py b/uploader/config_reader.py index f873a5a..d836d27 100644 --- a/uploader/config_reader.py +++ b/uploader/config_reader.py @@ -1,55 +1,55 @@ -import logging - -from uploader.excel_helper import is_excel_file, excel_to_list_of_dicts -from uploader.utils import create_table_name -from uploader import utils -from typing import List -import os - -COLUMNS_LIST = ["Link", "Table name", "Department name"] - - -def __absolute_path(base: str, file_path: str): - if os.path.isabs(file_path): - return file_path - return os.path.join(base, file_path) - - -def read_config(file_path: str) -> list: - configs = __read_excel(file_path, COLUMNS_LIST) - file_dir = os.path.dirname(file_path) - for config in configs: - table_name = config.get('Table name', None) - config['Link'] = __absolute_path(file_dir, config['Link']) - if not table_name or table_name == utils.NULL: - config['Table name'] = create_table_name(config['Link'], config['Department name']) - return configs - - -def __read_excel(file_path, columns_names_to_check: list) -> List[dict]: - if not is_excel_file(file_path): - raise FileExistsError('"{}" is not excel file'.format(file_path)) - configs = excel_to_list_of_dicts(file_path) - if not configs or len(configs) == 0: - raise ValueError('Config file is empty or could not parse it') - if not all(item in configs[0] for item in columns_names_to_check): - raise ValueError('File format is invalid') - return configs - - -def __get_table_columns(column_mappings: List[dict], table_name: str): - table_mappings = [] - for mapping in column_mappings: - if mapping.get('Table name', None) == table_name: - table_mappings.append(mapping) - if len(table_mappings) == 0: - logging.error('There is no columns mapping of {} table'.format(table_name)) - return table_mappings - - -def apply_column_mappings(file_path: str, configs: List[dict]): - column_mappings = __read_excel(file_path, []) - for config in configs: - table_name = config.get('Table name', None) - if table_name: - config['mappings'] = __get_table_columns(column_mappings, table_name) +import logging + +from uploader.excel_helper import is_excel_file, excel_to_list_of_dicts +from uploader.utils import create_table_name +from uploader import utils +from typing import List +import os + +COLUMNS_LIST = ["Link", "Table name", "Department name"] + + +def __absolute_path(base: str, file_path: str): + if os.path.isabs(file_path): + return file_path + return os.path.join(base, file_path) + + +def read_config(file_path: str) -> list: + configs = __read_excel(file_path, COLUMNS_LIST) + file_dir = os.path.dirname(file_path) + for config in configs: + table_name = config.get('Table name', None) + config['Link'] = __absolute_path(file_dir, config['Link']) + if not table_name or table_name == utils.NULL: + config['Table name'] = create_table_name(config['Link'], config['Department name']) + return configs + + +def __read_excel(file_path, columns_names_to_check: list) -> List[dict]: + if not is_excel_file(file_path): + raise FileExistsError('"{}" is not excel file'.format(file_path)) + configs = excel_to_list_of_dicts(file_path) + if not configs or len(configs) == 0: + raise ValueError('Config file is empty or could not parse it') + if not all(item in configs[0] for item in columns_names_to_check): + raise ValueError('File format is invalid') + return configs + + +def __get_table_columns(column_mappings: List[dict], table_name: str): + table_mappings = [] + for mapping in column_mappings: + if mapping.get('Table name', None) == table_name: + table_mappings.append(mapping) + if len(table_mappings) == 0: + logging.error('There is no columns mapping of {} table'.format(table_name)) + return table_mappings + + +def apply_column_mappings(file_path: str, configs: List[dict]): + column_mappings = __read_excel(file_path, []) + for config in configs: + table_name = config.get('Table name', None) + if table_name: + config['mappings'] = __get_table_columns(column_mappings, table_name) diff --git a/uploader/core/__init__.py b/uploader/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/uploader/core/common/__init__.py b/uploader/core/common/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/uploader/core/common/config.py b/uploader/core/common/config.py new file mode 100644 index 0000000..845655b --- /dev/null +++ b/uploader/core/common/config.py @@ -0,0 +1,6 @@ +from uploader.base import IData + + +class Config: + def __init__(self, data: IData): + pass diff --git a/uploader/core/common/file_data.py b/uploader/core/common/file_data.py new file mode 100644 index 0000000..c312c2c --- /dev/null +++ b/uploader/core/common/file_data.py @@ -0,0 +1,43 @@ +from typing import List, Type + +from uploader.base import IData, NULL, ITypeRecognizer + + +class FileData(IData): + def __init__(self, data: List[dict]): + self._data = data + self._items = [] + self._columns = [] + self._types = {} + self._type_recognizer: ITypeRecognizer = None + + def to_dict(self) -> List[dict]: + return self._data + + def columns(self) -> list: + if len(self._data) > 0 and len(self._columns) != len(self._data[0].keys()): + self._columns = list(self._data[0].keys()) + return self._columns + + def set_type_recognizer(self, type_recognizer: ITypeRecognizer): + self._type_recognizer = type_recognizer + + def types(self) -> dict: + if not self._type_recognizer: + raise ReferenceError("type recognizer is not set") + if not bool(self._types) and len(self._data) != 0: + for key in self.columns(): + for row in self._data: + value = row[key] + if value != NULL: + self._types[key] = self._type_recognizer.type(value) + break + if key not in self._types: + self._types[key] = self._type_recognizer.default_type() + return self._types + + def rows(self) -> List[list]: + if len(self._items) != len(self._data): + for row in self._data: + self._items.append(list(row.values())) + return self._items diff --git a/uploader/core/common/type_recognizer.py b/uploader/core/common/type_recognizer.py new file mode 100644 index 0000000..a31cdba --- /dev/null +++ b/uploader/core/common/type_recognizer.py @@ -0,0 +1,37 @@ +import datetime as dt + +from uploader.base import ITypeRecognizer + +TYPE_CONVERTERS = { + int: int, + float: float, + dt.time: lambda value: value if type(value) == dt.time else dt.datetime.strptime(value, '%H:%M:%S').time(), + dt.date: lambda value: value if type(value) == dt.date else dt.datetime.strptime(value, '%d.%m.%Y').date(), + dt.datetime: lambda value: value if type(value) == dt.datetime else dt.datetime.strptime(value, + '%d.%m.%Y %H:%M:%S'), + str: str, +} + + +class TypeRecognizer(ITypeRecognizer): + def type(self, value) -> type: + global TYPE_CONVERTERS + for converter_type, converter in TYPE_CONVERTERS.items(): + try: + converter(value) + return converter_type + except Exception: + continue + return self.default_type() + + def default_type(self): + return str + + def convert(self, value): + global TYPE_CONVERTERS + for _, converter in TYPE_CONVERTERS.items(): + try: + return converter(value) + except Exception: + continue + return self.default_type()(value) diff --git a/uploader/core/excel/__init__.py b/uploader/core/excel/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/uploader/core/excel/reader.py b/uploader/core/excel/reader.py new file mode 100644 index 0000000..aa9b3e3 --- /dev/null +++ b/uploader/core/excel/reader.py @@ -0,0 +1,58 @@ +import logging + +import pandas as pd +import xlrd + +from typing import List + +from xlrd import XLRDError + +from uploader.base import IReader, IData, NULL +from uploader.core.common.file_data import FileData + + +class ExcelReader(IReader): + def read(self, path: str, top_offset: int = 0, bottom_offset: int = 0, left_offset: int = 0, + right_offset: int = 0) -> IData: + if not self.is_excel_file(path): + # TODO: log it, raise exception + pass + return FileData(self.__excel_to_list_of_dicts(path)) + + @staticmethod + def __get_cols_indexes_to_skip(df: pd.DataFrame) -> list: + (_, row_values) = next(df.iterrows()) + cols_number_to_skip = 0 + for cols_number_to_skip, value in enumerate(row_values): + if not pd.isna(value): + break + if cols_number_to_skip == len(row_values): + error_message = 'Cannot handle file. Probably, it is empty' + logging.error(error_message) + raise ValueError(error_message) + return list(range(0, cols_number_to_skip)) + + def __excel_to_data_frame(self, file_path) -> pd.DataFrame: + df = pd.read_excel(file_path, header=None) + df.dropna(how='all', inplace=True) + # shift table if data are not placed in the first row/column + cols_indexes_to_skip = self.__get_cols_indexes_to_skip(df) + df.drop(df.columns[cols_indexes_to_skip], axis=1, inplace=True) + # first row as columns names + df.fillna(NULL, inplace=True) + df.rename(columns=df.iloc[0], inplace=True) + df.drop(df.index[0], inplace=True) + return df + + @staticmethod + def is_excel_file(file_path: str) -> bool: + try: + xlrd.open_workbook(file_path).release_resources() + return True + except XLRDError: + return False + except Exception: + return False + + def __excel_to_list_of_dicts(self, file_path: str) -> List[dict]: + return self.__excel_to_data_frame(file_path).to_dict('records') diff --git a/uploader/core/postgres/__init__.py b/uploader/core/postgres/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/uploader/core/postgres/sql_executor.py b/uploader/core/postgres/sql_executor.py new file mode 100644 index 0000000..f17e320 --- /dev/null +++ b/uploader/core/postgres/sql_executor.py @@ -0,0 +1,33 @@ +import logging +import psycopg2 as pg +import sys +from typing import List + +from uploader.database.database_settings import DatabaseSettings + + +class SqlExecutor: + def __init__(self, settings: DatabaseSettings): + self._settings = settings + + def execute(self, command: str) -> List[tuple]: + connection = None + cursor = None + try: + connection = pg.connect(**dict(self._settings)) + cursor = connection.cursor() + cursor.execute(command) + data = [] + if cursor.statusmessage.startswith('SELECT '): + data = cursor.fetchall() + connection.commit() + return data + except Exception as e: + logging.error( + "Something goes wrong during SQL script execution: {}".format(str(e))) + sys.exit(1) + finally: + if cursor: + cursor.close() + if connection: + connection.close() diff --git a/uploader/core/postgres/sql_representer.py b/uploader/core/postgres/sql_representer.py new file mode 100644 index 0000000..b9cf943 --- /dev/null +++ b/uploader/core/postgres/sql_representer.py @@ -0,0 +1,16 @@ +from uploader.base import IData +from uploader.base.irepresenter import IRepresenter + + +class SqlRepresenter(IRepresenter): + def __init__(self, data: IData): + self._table: str = None + self._values: str = None + self._data = data + + def scheme(self) -> dict: + d = {'create': '', 'drop': 'mapping'} + return d + + def data(self) -> dict: + pass diff --git a/uploader/core/postgres/writer.py b/uploader/core/postgres/writer.py new file mode 100644 index 0000000..cb1a83e --- /dev/null +++ b/uploader/core/postgres/writer.py @@ -0,0 +1,33 @@ +from uploader.core.common.type_recognizer import TypeRecognizer +from uploader.base import IWriter, IData, ITypeRecognizer +from uploader.core.postgres.sql_executor import SqlExecutor +from uploader.core.postgres.sql_representer import SqlRepresenter + + +class Writer(IWriter): + def __init__(self, settings=None, type_recognizer: ITypeRecognizer = TypeRecognizer()): + # FIXME: inject type recognizer + self._type_recognizer = type_recognizer + self._executor = SqlExecutor(settings) + + def write(self, data: IData, mapping: dict, append: bool = False, drop_if_exists: bool = False): + data.set_type_recognizer(self._type_recognizer) + representer = SqlRepresenter(data) + self._executor.execute(representer.scheme()['create']) + self._executor.execute(representer.data()['query']) + + def dd(self, drop_if_exists: bool = False): + if drop_if_exists: + command_to_drop_table = 'DROP TABLE IF EXISTS {}.{};'.format(self._settings.schema, table_name) + self.execute(command_to_drop_table) + self.create_table(table_name, columns, data) + else: + command = "SELECT exists(SELECT 1 FROM information_schema.tables WHERE " \ + "table_schema = '{}' AND table_name = '{}')".format(self._settings.schema, table_name) + result = self.execute(command) + print(table_name, result) + if len(result) > 0 and len(result[0]) > 0 and result[0][0]: + self.execute('TRUNCATE {}.{}'.format(self._settings.schema, table_name)) + self.__insert_rows(table_name, columns, data) + else: + self.create_table(table_name, columns, data) \ No newline at end of file diff --git a/uploader/database/database.py b/uploader/database/database.py index 7145a8d..61d0599 100644 --- a/uploader/database/database.py +++ b/uploader/database/database.py @@ -1,88 +1,89 @@ -import logging -import sys -from typing import List - -import psycopg2 as pg - -from uploader.database.database_settings import DatabaseSettings -from uploader.database.database_utils import py_type_to_pg_type, py_value_to_pg_value - - -class Database(object): - def __init__(self, settings: DatabaseSettings): - self._settings = settings - - def execute(self, command: str) -> List[tuple]: - connection = None - cursor = None - try: - connection = pg.connect(**dict(self._settings)) - cursor = connection.cursor() - cursor.execute(command) - data = [] - if cursor.statusmessage.startswith('SELECT '): - data = cursor.fetchall() - connection.commit() - return data - except Exception as e: - logging.error( - "Something goes wrong during SQL script execution: {}".format(str(e))) - sys.exit(1) - finally: - if cursor: - cursor.close() - if connection: - connection.close() - - @staticmethod - def __row_to_insert_str(columns: dict, row: dict) -> str: - values = ', '.join([py_value_to_pg_value(columns[key], value) for key, value in row.items()]) - return '({})'.format(values) - - @staticmethod - def __rows_to_insert_str(columns: dict, rows: list) -> str: - rows_to_insert_list = [Database.__row_to_insert_str(columns, row) for row in rows] - return ', '.join(rows_to_insert_list) - - def __create_insert_query(self, table_name: str, columns: dict, data: list) -> str: - rows_to_insert_str = Database.__rows_to_insert_str(columns, data) - return 'insert into {}.{} values {}'.format(self._settings.schema, table_name, rows_to_insert_str) - - def __create_table(self, name, columns: dict): - columns_definition_list = [] - for column_key, column_value in columns.items(): - column_mapping = column_value.get('mapping', None) - column_name = column_mapping['name'] if column_mapping and column_mapping['name'] else column_value['name'] - column_type = column_mapping['type'] if column_mapping and column_mapping['type'] else py_type_to_pg_type( - column_value['type']) - column_definition = '{} {}'.format(column_name, column_type) - columns_definition_list.append(column_definition) - columns_definition = ', '.join(columns_definition_list) - command = 'create table {}.{} ({})'.format(self._settings.schema, name, columns_definition) - print(command) - self.execute(command) - print('Table "{}" was created/updated'.format(name)) - - def __insert_rows(self, name: str, columns: dict, data: list): - insert_query = self.__create_insert_query(name, columns, data) - self.execute(insert_query) - print('Rows inserted to table {}'.format(name)) - - def create_table(self, name: str, columns: dict, data: list): - self.__create_table(name, columns) - self.__insert_rows(name, columns, data) - - def rewrite_data(self, table_name: str, columns: dict, data: list, drop_if_exists: bool = False): - if drop_if_exists: - command_to_drop_table = 'DROP TABLE IF EXISTS {}.{};'.format(self._settings.schema, table_name) - self.execute(command_to_drop_table) - self.create_table(table_name, columns, data) - else: - command = "SELECT exists(SELECT 1 FROM information_schema.tables WHERE " \ - "table_schema = '{}' AND table_name = '{}')".format(self._settings.schema, table_name) - result = self.execute(command) - if len(result) > 0 and result[0]: - self.execute('TRUNCATE {}.{}'.format(self._settings.schema, table_name)) - self.__insert_rows(table_name, columns, data) - else: - self.create_table(table_name, columns, data) +import logging +import sys +from typing import List + +import psycopg2 as pg + +from uploader.database.database_settings import DatabaseSettings +from uploader.database.database_utils import py_type_to_pg_type, py_value_to_pg_value + + +class Database(object): + def __init__(self, settings: DatabaseSettings): + self._settings = settings + + def execute(self, command: str) -> List[tuple]: + connection = None + cursor = None + try: + connection = pg.connect(**dict(self._settings)) + cursor = connection.cursor() + cursor.execute(command) + data = [] + if cursor.statusmessage.startswith('SELECT '): + data = cursor.fetchall() + connection.commit() + return data + except Exception as e: + logging.error( + "Something goes wrong during SQL script execution: {}".format(str(e))) + sys.exit(1) + finally: + if cursor: + cursor.close() + if connection: + connection.close() + + @staticmethod + def __row_to_insert_str(columns: dict, row: dict) -> str: + values = ', '.join([py_value_to_pg_value(columns[key], value) for key, value in row.items()]) + return '({})'.format(values) + + @staticmethod + def __rows_to_insert_str(columns: dict, rows: list) -> str: + rows_to_insert_list = [Database.__row_to_insert_str(columns, row) for row in rows] + return ', '.join(rows_to_insert_list) + + def __create_insert_query(self, table_name: str, columns: dict, data: list) -> str: + rows_to_insert_str = Database.__rows_to_insert_str(columns, data) + return 'insert into {}.{} values {}'.format(self._settings.schema, table_name, rows_to_insert_str) + + def __create_table(self, name, columns: dict): + columns_definition_list = [] + for column_key, column_value in columns.items(): + column_mapping = column_value.get('mapping', None) + column_name = column_mapping['name'] if column_mapping and column_mapping['name'] else column_value['name'] + column_type = column_mapping['type'] if column_mapping and column_mapping['type'] else py_type_to_pg_type( + column_value['type']) + column_definition = '{} {}'.format(column_name, column_type) + columns_definition_list.append(column_definition) + columns_definition = ', '.join(columns_definition_list) + command = 'create table {}.{} ({})'.format(self._settings.schema, name, columns_definition) + print(command) + self.execute(command) + print('Table "{}" was created/updated'.format(name)) + + def __insert_rows(self, name: str, columns: dict, data: list): + insert_query = self.__create_insert_query(name, columns, data) + self.execute(insert_query) + print('Rows inserted to table {}'.format(name)) + + def create_table(self, name: str, columns: dict, data: list): + self.__create_table(name, columns) + self.__insert_rows(name, columns, data) + + def rewrite_data(self, table_name: str, columns: dict, data: list, drop_if_exists: bool = False): + if drop_if_exists: + command_to_drop_table = 'DROP TABLE IF EXISTS {}.{};'.format(self._settings.schema, table_name) + self.execute(command_to_drop_table) + self.create_table(table_name, columns, data) + else: + command = "SELECT exists(SELECT 1 FROM information_schema.tables WHERE " \ + "table_schema = '{}' AND table_name = '{}')".format(self._settings.schema, table_name) + result = self.execute(command) + print(table_name, result) + if len(result) > 0 and len(result[0]) > 0 and result[0][0]: + self.execute('TRUNCATE {}.{}'.format(self._settings.schema, table_name)) + self.__insert_rows(table_name, columns, data) + else: + self.create_table(table_name, columns, data) diff --git a/uploader/database/database_settings.py b/uploader/database/database_settings.py index 7f49336..5431efa 100644 --- a/uploader/database/database_settings.py +++ b/uploader/database/database_settings.py @@ -1,12 +1,12 @@ -class DatabaseSettings: - user = "postgres" - password = "pswd" - database = "postgres" - host = "127.0.0.1" - schema = "public" - - def __iter__(self): - yield "database", self.database - yield "host", self.host - yield "user", self.user - yield "password", self.password +class DatabaseSettings: + user = "postgres" + password = "pswd" + database = "postgres" + host = "127.0.0.1" + schema = "public" + + def __iter__(self): + yield "database", self.database + yield "host", self.host + yield "user", self.user + yield "password", self.password diff --git a/uploader/database/database_utils.py b/uploader/database/database_utils.py index be34a55..f2eaaf8 100644 --- a/uploader/database/database_utils.py +++ b/uploader/database/database_utils.py @@ -1,74 +1,74 @@ -import datetime as dt -from uploader.utils import NULL - -__ESCAPE_SYMBOLS_MAPPING = {"'": r"''"} - - -def __value_empty(value) -> bool: - return value == NULL or value is None or not value or (isinstance(value, str) and value.isspace()) - - -def __escaped_symbols() -> dict: - if not hasattr(__escaped_symbols, 'translation'): - __escaped_symbols.translation = str.maketrans(__ESCAPE_SYMBOLS_MAPPING) - return __escaped_symbols.translation - - -def convert_datetime_to_str(value, dt_format: str) -> str: - if type(value) == str: - return value - else: - return value.strftime(dt_format) - - -def null_or_format_str(value, str_format: str): - if __value_empty(value): - return NULL - else: - return str_format.format(str(value).translate(__escaped_symbols())) - - -def py_type_to_pg_type(py_type): - return PG_SQL_TYPES_TO_PYTHON_TYPES[py_type]['type'] - - -def py_value_to_pg_value(value_type, value) -> str: - current_type = value_type['type'] if type(value_type) is dict else value_type - return PG_SQL_TYPES_TO_PYTHON_TYPES[current_type]['converter'](value) - - -# def datetime_to_null_or_str_format(value, dt_format, str_format): -# result = convert_datetime_to_str(value, dt_format) -# result = null_or_format_str(result, str_format) -# return result - - -PG_SQL_TYPES_TO_PYTHON_TYPES = { - int: { - 'type': 'numeric', - 'converter': lambda value: null_or_format_str(value, '{}') - }, - float: { - 'type': 'real', - 'converter': lambda value: null_or_format_str(value, '{}') - }, - str: { - 'type': 'varchar', - 'converter': lambda value: null_or_format_str(value, "'{}'") - }, - dt.time: { - 'type': 'time', - 'converter': lambda value: null_or_format_str(convert_datetime_to_str(value, '%H:%M:%S'), - "'{}'") - }, - dt.datetime: { - 'type': 'timestamp', - 'converter': lambda value: null_or_format_str(convert_datetime_to_str(value, '%d.%m.%Y %H:%M:%S'), - "to_timestamp('{}', 'dd.mm.yyyy hh24:mi:ss')") - }, - dt.date: { - 'type': 'date', - 'converter': lambda value: null_or_format_str(convert_datetime_to_str(value, '%d.%m.%Y'), - "to_date('{}', 'dd.mm.yyyy')") - } -} +import datetime as dt +from uploader.utils import NULL + +__ESCAPE_SYMBOLS_MAPPING = {"'": r"''"} + + +def __value_empty(value) -> bool: + return value == NULL or value is None or not value or (isinstance(value, str) and value.isspace()) + + +def __escaped_symbols() -> dict: + if not hasattr(__escaped_symbols, 'translation'): + __escaped_symbols.translation = str.maketrans(__ESCAPE_SYMBOLS_MAPPING) + return __escaped_symbols.translation + + +def convert_datetime_to_str(value, dt_format: str) -> str: + if type(value) == str: + return value + else: + return value.strftime(dt_format) + + +def null_or_format_str(value, str_format: str): + if __value_empty(value): + return NULL + else: + return str_format.format(str(value).translate(__escaped_symbols())) + + +def py_type_to_pg_type(py_type): + return PG_SQL_TYPES_TO_PYTHON_TYPES[py_type]['type'] + + +def py_value_to_pg_value(value_type, value) -> str: + current_type = value_type['type'] if type(value_type) is dict else value_type + return PG_SQL_TYPES_TO_PYTHON_TYPES[current_type]['converter'](value) + + +# def datetime_to_null_or_str_format(value, dt_format, str_format): +# result = convert_datetime_to_str(value, dt_format) +# result = null_or_format_str(result, str_format) +# return result + + +PG_SQL_TYPES_TO_PYTHON_TYPES = { + int: { + 'type': 'numeric', + 'converter': lambda value: null_or_format_str(value, '{}') + }, + float: { + 'type': 'real', + 'converter': lambda value: null_or_format_str(value, '{}') + }, + str: { + 'type': 'varchar', + 'converter': lambda value: null_or_format_str(value, "'{}'") + }, + dt.time: { + 'type': 'time', + 'converter': lambda value: null_or_format_str(convert_datetime_to_str(value, '%H:%M:%S'), + "'{}'") + }, + dt.datetime: { + 'type': 'timestamp', + 'converter': lambda value: null_or_format_str(convert_datetime_to_str(value, '%d.%m.%Y %H:%M:%S'), + "to_timestamp('{}', 'dd.mm.yyyy hh24:mi:ss')") + }, + dt.date: { + 'type': 'date', + 'converter': lambda value: null_or_format_str(convert_datetime_to_str(value, '%d.%m.%Y'), + "to_date('{}', 'dd.mm.yyyy')") + } +} diff --git a/uploader/database/row.py b/uploader/database/row.py new file mode 100644 index 0000000..f24b5df --- /dev/null +++ b/uploader/database/row.py @@ -0,0 +1,6 @@ +class Row(object): + pass + + +class RowsContainer(object): + pass \ No newline at end of file diff --git a/uploader/database/table.py b/uploader/database/table.py new file mode 100644 index 0000000..35f6193 --- /dev/null +++ b/uploader/database/table.py @@ -0,0 +1,20 @@ +from uploader.database_utils import py_type_to_pg_type, py_value_to_pg_value +from uploader.database import Database + + +class Table(object): + def __init__(self, db: Database, name: str, columns: dict): + self._columns = columns + self._name = name + self._db = db + + def to_sql(self): + columns_definition_list = [] + for column_name, column_type in self._columns.items(): + column_definition = '{} {}'.format( + column_name, py_type_to_pg_type(column_type)) + columns_definition_list.append(column_definition) + columns_definition = ', '.join(columns_definition_list) + command = 'create table {}.{} ({})'.format( + self._db._settings.schema, self._name, columns_definition) + return command diff --git a/uploader/database_new/database.py b/uploader/database_new/database.py index 72ac003..69da347 100644 --- a/uploader/database_new/database.py +++ b/uploader/database_new/database.py @@ -1,24 +1,24 @@ -import psycopg2 as pg -from uploader.database.database_settings import DatabaseSettings -import logging - - -class Database(object): - def __init__(self, settings: DatabaseSettings): - self._settings = settings - - def settings(self) -> DatabaseSettings: - return self._settings - - def execute(self, command: str): - try: - connection = pg.connect(**dict(self._settings)) - cursor = connection.cursor() - cursor.execute(command) - connection.commit() - cursor.close() - connection.close() - except Exception as e: - logging.error( - "Something goes wrong during SQL script execution: {}".format(str(e))) - exit(1) +import psycopg2 as pg +from uploader.database.database_settings import DatabaseSettings +import logging + + +class Database(object): + def __init__(self, settings: DatabaseSettings): + self._settings = settings + + def settings(self) -> DatabaseSettings: + return self._settings + + def execute(self, command: str): + try: + connection = pg.connect(**dict(self._settings)) + cursor = connection.cursor() + cursor.execute(command) + connection.commit() + cursor.close() + connection.close() + except Exception as e: + logging.error( + "Something goes wrong during SQL script execution: {}".format(str(e))) + exit(1) diff --git a/uploader/database_new/row.py b/uploader/database_new/row.py index 02a07c3..b84232a 100644 --- a/uploader/database_new/row.py +++ b/uploader/database_new/row.py @@ -1,2 +1,2 @@ -class Row(object): - pass +class Row(object): + pass diff --git a/uploader/database_new/table.py b/uploader/database_new/table.py index 698cf11..885440b 100644 --- a/uploader/database_new/table.py +++ b/uploader/database_new/table.py @@ -1,20 +1,20 @@ -from uploader.database.database_utils import py_type_to_pg_type -from uploader.database import Database - - -class Table(object): - def __init__(self, db: Database, name: str, columns: dict): - self._columns = columns - self._name = name - self._db = db - - def to_sql(self): - columns_definition_list = [] - for column_name, column_type in self._columns.items(): - column_definition = '{} {}'.format( - column_name, py_type_to_pg_type(column_type)) - columns_definition_list.append(column_definition) - columns_definition = ', '.join(columns_definition_list) - command = 'create table {}.{} ({})'.format( - self._db._settings.schema, self._name, columns_definition) - return command +from uploader.database.database_utils import py_type_to_pg_type +from uploader.database import Database + + +class Table(object): + def __init__(self, db: Database, name: str, columns: dict): + self._columns = columns + self._name = name + self._db = db + + def to_sql(self): + columns_definition_list = [] + for column_name, column_type in self._columns.items(): + column_definition = '{} {}'.format( + column_name, py_type_to_pg_type(column_type)) + columns_definition_list.append(column_definition) + columns_definition = ', '.join(columns_definition_list) + command = 'create table {}.{} ({})'.format( + self._db._settings.schema, self._name, columns_definition) + return command diff --git a/uploader/excel_helper.py b/uploader/excel_helper.py index 74eea33..0cf23ca 100644 --- a/uploader/excel_helper.py +++ b/uploader/excel_helper.py @@ -1,97 +1,97 @@ -import datetime as dt -import logging -import os - -import pandas as pd -import xlrd - -from uploader.utils import create_adopted_columns_names, NULL - - -def get_type(value) -> type: - for converter_type, converter in TYPE_CONVERTERS.items(): - try: - converter(value) - return converter_type - except Exception: - continue - return str - - -def column_types(rows: list) -> dict: - if len(rows) == 0: - return {} - item_types = {} - for key in rows[0].keys(): - for row in rows: - value = row[key] - if value != NULL: - item_types[key] = get_type(value) - break - if key not in item_types: - item_types[key] = str - return item_types - - -def __get_cols_indexes_to_skip(df: pd.DataFrame) -> list: - (_, row_values) = next(df.iterrows()) - cols_number_to_skip = 0 - for cols_number_to_skip, value in enumerate(row_values): - if not pd.isna(value): - break - if cols_number_to_skip == len(row_values): - error_message = 'Cannot handle file. Probably, it is empty' - logging.error(error_message) - raise ValueError(error_message) - return list(range(0, cols_number_to_skip)) - - -def read_excel(file_path: str) -> list: - df = excel_to_data_frame(file_path) - df.columns = create_adopted_columns_names(df.columns) - return df.to_dict('records') - - -def excel_to_data_frame(file_path) -> pd.DataFrame: - df = pd.read_excel(file_path, header=None) - df.dropna(how='all', inplace=True) - # shift table if data are not placed in the first row/column - cols_indexes_to_skip = __get_cols_indexes_to_skip(df) - df.drop(df.columns[cols_indexes_to_skip], axis=1, inplace=True) - # first row as columns names - df.fillna(NULL, inplace=True) - df.rename(columns=df.iloc[0], inplace=True) - df.drop(df.index[0], inplace=True) - return df - - -def excel_to_list_of_dicts(file_path: str) -> list: - return excel_to_data_frame(file_path).to_dict('records') - - -def is_excel_file(file_path: str) -> bool: - try: - xlrd.open_workbook(file_path).release_resources() - return True - except Exception: - return False - - -def get_excel_files_in_dir(dir_path: str, exclude: list) -> list: - def is_acceptable_file(file_path): - return is_excel_file(file_path) and \ - os.path.basename(file_path) not in exclude - - files = [os.path.join(dir_path, file_name) for file_name in os.listdir(dir_path)] - return [f for f in files if is_acceptable_file(f)] - - -TYPE_CONVERTERS = { - int: int, - float: float, - dt.time: lambda value: value if type(value) == dt.time else dt.datetime.strptime(value, '%H:%M:%S').time(), - dt.date: lambda value: value if type(value) == dt.date else dt.datetime.strptime(value, '%d.%m.%Y').date(), - dt.datetime: lambda value: value if type(value) == dt.datetime else dt.datetime.strptime(value, - '%d.%m.%Y %H:%M:%S'), - str: str, -} +import datetime as dt +import logging +import os + +import pandas as pd +import xlrd + +from uploader.utils import create_adopted_columns_names, NULL + + +def get_type(value) -> type: + for converter_type, converter in TYPE_CONVERTERS.items(): + try: + converter(value) + return converter_type + except Exception: + continue + return str + + +def column_types(rows: list) -> dict: + if len(rows) == 0: + return {} + item_types = {} + for key in rows[0].keys(): + for row in rows: + value = row[key] + if value != NULL: + item_types[key] = get_type(value) + break + if key not in item_types: + item_types[key] = str + return item_types + + +def __get_cols_indexes_to_skip(df: pd.DataFrame) -> list: + (_, row_values) = next(df.iterrows()) + cols_number_to_skip = 0 + for cols_number_to_skip, value in enumerate(row_values): + if not pd.isna(value): + break + if cols_number_to_skip == len(row_values): + error_message = 'Cannot handle file. Probably, it is empty' + logging.error(error_message) + raise ValueError(error_message) + return list(range(0, cols_number_to_skip)) + + +def read_excel(file_path: str) -> list: + df = excel_to_data_frame(file_path) + df.columns = create_adopted_columns_names(df.columns) + return df.to_dict('records') + + +def excel_to_data_frame(file_path) -> pd.DataFrame: + df = pd.read_excel(file_path, header=None) + df.dropna(how='all', inplace=True) + # shift table if data are not placed in the first row/column + cols_indexes_to_skip = __get_cols_indexes_to_skip(df) + df.drop(df.columns[cols_indexes_to_skip], axis=1, inplace=True) + # first row as columns names + df.fillna(NULL, inplace=True) + df.rename(columns=df.iloc[0], inplace=True) + df.drop(df.index[0], inplace=True) + return df + + +def excel_to_list_of_dicts(file_path: str) -> list: + return excel_to_data_frame(file_path).to_dict('records') + + +def is_excel_file(file_path: str) -> bool: + try: + xlrd.open_workbook(file_path).release_resources() + return True + except Exception: + return False + + +def get_excel_files_in_dir(dir_path: str, exclude: list) -> list: + def is_acceptable_file(file_path): + return is_excel_file(file_path) and \ + os.path.basename(file_path) not in exclude + + files = [os.path.join(dir_path, file_name) for file_name in os.listdir(dir_path)] + return [f for f in files if is_acceptable_file(f)] + + +TYPE_CONVERTERS = { + int: int, + float: float, + dt.time: lambda value: value if type(value) == dt.time else dt.datetime.strptime(value, '%H:%M:%S').time(), + dt.date: lambda value: value if type(value) == dt.date else dt.datetime.strptime(value, '%d.%m.%Y').date(), + dt.datetime: lambda value: value if type(value) == dt.datetime else dt.datetime.strptime(value, + '%d.%m.%Y %H:%M:%S'), + str: str, +} diff --git a/uploader/file_uploader.py b/uploader/file_uploader.py index ae2b1bb..9b91a18 100644 --- a/uploader/file_uploader.py +++ b/uploader/file_uploader.py @@ -1,36 +1,36 @@ -from uploader.database.database import Database -from uploader.excel_helper import column_types, excel_to_list_of_dicts -from typing import List - -from uploader.utils import create_column_name, NULL - - -class FileUploader: - def __init__(self, db: Database): - self._db = db - - def upload(self, file_path: str, table_name: str, mappings: List[dict] = None): - rows = excel_to_list_of_dicts(file_path) - columns = column_types(rows) - for key, value in columns.items(): - d = { - 'type': value, - 'name': create_column_name(key) - } - for mapping in mappings: - if key != mapping['Original name']: - continue - d.update({ - 'mapping': { - 'name': mapping['Code'] if mapping['Code'] != NULL else None, - 'type': mapping['Data type'] if mapping['Data type'] != NULL else None, - 'comment': mapping['Comment'] - } - }) - columns[key] = d - self._db.rewrite_data(table_name, columns, rows) - - -TABLE_NAME = 'table_name' -COLUMNS_INFO = 'columns_info' -ROWS = 'rows' +from uploader.database.database import Database +from uploader.excel_helper import column_types, excel_to_list_of_dicts +from typing import List + +from uploader.utils import create_column_name, NULL + + +class FileUploader: + def __init__(self, db: Database): + self._db = db + + def upload(self, file_path: str, table_name: str, mappings: List[dict] = None): + rows = excel_to_list_of_dicts(file_path) + columns = column_types(rows) + for key, value in columns.items(): + d = { + 'type': value, + 'name': create_column_name(key) + } + for mapping in mappings: + if key != mapping['Original name']: + continue + d.update({ + 'mapping': { + 'name': mapping['Code'] if mapping['Code'] != NULL else None, + 'type': mapping['Data type'] if mapping['Data type'] != NULL else None, + 'comment': mapping['Comment'] + } + }) + columns[key] = d + self._db.rewrite_data(table_name, columns, rows) + + +TABLE_NAME = 'table_name' +COLUMNS_INFO = 'columns_info' +ROWS = 'rows' diff --git a/uploader/main.py b/uploader/main.py new file mode 100644 index 0000000..7c84e08 --- /dev/null +++ b/uploader/main.py @@ -0,0 +1,9 @@ +# from uploader.core.common.type_recognizer import TypeRecognizer +# from uploader.core.excel.reader import ExcelReader +# +# +# def main(): +# reader = ExcelReader() +# type_recognizer = TypeRecognizer() +# data = reader.read("path") +# data.set_type_recognizer(type_recognizer) diff --git a/uploader/utils.py b/uploader/utils.py index 9b195dc..ee35ac9 100644 --- a/uploader/utils.py +++ b/uploader/utils.py @@ -1,31 +1,31 @@ -import os - -from transliterate import translit - -NULL = 'NULL' - - -def transliterate(text: str) -> str: - """Transliterate given text""" - return translit(text, 'ru', reversed=True) - - -def to_allowed_symbols(text: str) -> str: - result = ''.join([s for s in text if s.isalpha() or s in [' ', '_'] or s.isdigit()]) - result = ' '.join(result.split()) - return result.replace(' ', '_') - - -def create_column_name(text: str) -> str: - return to_allowed_symbols(text) - - -def create_table_name(file_path: str, department: str = None) -> str: - department_adopted = department + '_' if department else '' - file_name_with_ext = os.path.basename(file_path) - file_name = department_adopted + os.path.splitext(file_name_with_ext)[0] - return transliterate(to_allowed_symbols(file_name)) - - -def create_adopted_columns_names(columns) -> list: - return [create_column_name(column) for column in columns] +import os + +from transliterate import translit + +NULL = 'NULL' + + +def transliterate(text: str) -> str: + """Transliterate given text""" + return translit(text, 'ru', reversed=True) + + +def to_allowed_symbols(text: str) -> str: + result = ''.join([s for s in text if s.isalpha() or s in [' ', '_'] or s.isdigit()]) + result = ' '.join(result.split()) + return result.replace(' ', '_') + + +def create_column_name(text: str) -> str: + return to_allowed_symbols(text) + + +def create_table_name(file_path: str, department: str = None) -> str: + department_adopted = department + '_' if department else '' + file_name_with_ext = os.path.basename(file_path) + file_name = department_adopted + os.path.splitext(file_name_with_ext)[0] + return transliterate(to_allowed_symbols(file_name)) + + +def create_adopted_columns_names(columns) -> list: + return [create_column_name(column) for column in columns]