From fb62ef2665add74ab7c81213fbcd73414fcbe13c Mon Sep 17 00:00:00 2001 From: Umedzhon Abdumuminov Date: Mon, 30 Sep 2019 21:44:27 +0300 Subject: [PATCH 1/2] Added initial version of new interfaces, implemented some of them --- docker/dbeaver.Dockerfile | 20 +++++++ docker/docker-compose.yml | 4 +- uploader/__main__.py | 2 +- uploader/database/database.py | 3 +- uploader/implementations/__init__.py | 0 uploader/implementations/config.py | 6 ++ uploader/implementations/excel/__init__.py | 0 uploader/implementations/excel/reader.py | 56 +++++++++++++++++++ uploader/implementations/file_data.py | 47 ++++++++++++++++ uploader/implementations/postgres/__init__.py | 0 .../postgres/type_recognizer.py | 25 +++++++++ uploader/implementations/postgres/writer.py | 13 +++++ uploader/interfaces/__init__.py | 3 + uploader/interfaces/idata.py | 27 +++++++++ uploader/interfaces/ireader.py | 10 ++++ uploader/interfaces/itype_recognizer.py | 7 +++ uploader/interfaces/iwriter.py | 8 +++ uploader/main.py | 9 +++ 18 files changed, 236 insertions(+), 4 deletions(-) create mode 100644 docker/dbeaver.Dockerfile create mode 100644 uploader/implementations/__init__.py create mode 100644 uploader/implementations/config.py create mode 100644 uploader/implementations/excel/__init__.py create mode 100644 uploader/implementations/excel/reader.py create mode 100644 uploader/implementations/file_data.py create mode 100644 uploader/implementations/postgres/__init__.py create mode 100644 uploader/implementations/postgres/type_recognizer.py create mode 100644 uploader/implementations/postgres/writer.py create mode 100644 uploader/interfaces/__init__.py create mode 100644 uploader/interfaces/idata.py create mode 100644 uploader/interfaces/ireader.py create mode 100644 uploader/interfaces/itype_recognizer.py create mode 100644 uploader/interfaces/iwriter.py create mode 100644 uploader/main.py diff --git a/docker/dbeaver.Dockerfile b/docker/dbeaver.Dockerfile new file mode 100644 index 0000000..a2c2aa9 --- /dev/null +++ b/docker/dbeaver.Dockerfile @@ -0,0 +1,20 @@ +ARG http_proxy=http://proxy-chain.intel.com:911 +ARG https_proxy=http://proxy-chain.intel.com:912 +FROM openjdk:8-jre-slim + +ARG VERSION=6.1.5 +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ + libswt-gtk-4-jni \ + libswt-gtk-4-java \ + wget \ + && rm -rf /var/lib/apt/lists/* + + +RUN wget https://github.com/dbeaver/dbeaver/releases/download/${VERSION}/dbeaver-ce_${VERSION}_amd64.deb \ + && dpkg -i dbeaver-ce_${VERSION}_amd64.deb \ + && rm dbeaver-ce_${VERSION}_amd64.deb + +ENV DBEAVER_VERSION=${VERSION} + +ENTRYPOINT dbeaver & tail -f temp.log diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index f7861a2..39effba 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -6,10 +6,10 @@ services: network_mode: "host" environment: - DISPLAY=10.0.75.1:0.0 - volumes: + # volumes: # - $HOME/.Xauthority:/root/.Xauthority # - /tmp/.X11-unix:/tmp/.X11-unix - - dbeaver_home:/root + # - dbeaver_home:/root db: image: postgres ports: diff --git a/uploader/__main__.py b/uploader/__main__.py index 7250e68..d6fce4d 100644 --- a/uploader/__main__.py +++ b/uploader/__main__.py @@ -59,7 +59,7 @@ def main(): # for info in files_info: -# db.rewrite_data(info[TABLE_NAME], info[COLUMNS_INFO], info[ROWS]) +# postgres.rewrite_data(info[TABLE_NAME], info[COLUMNS_INFO], info[ROWS]) # exit(0) diff --git a/uploader/database/database.py b/uploader/database/database.py index 7145a8d..c58ce87 100644 --- a/uploader/database/database.py +++ b/uploader/database/database.py @@ -81,7 +81,8 @@ def rewrite_data(self, table_name: str, columns: dict, data: list, drop_if_exist command = "SELECT exists(SELECT 1 FROM information_schema.tables WHERE " \ "table_schema = '{}' AND table_name = '{}')".format(self._settings.schema, table_name) result = self.execute(command) - if len(result) > 0 and result[0]: + print(table_name, result) + if len(result) > 0 and len(result[0]) > 0 and result[0][0]: self.execute('TRUNCATE {}.{}'.format(self._settings.schema, table_name)) self.__insert_rows(table_name, columns, data) else: diff --git a/uploader/implementations/__init__.py b/uploader/implementations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/uploader/implementations/config.py b/uploader/implementations/config.py new file mode 100644 index 0000000..ac14ab1 --- /dev/null +++ b/uploader/implementations/config.py @@ -0,0 +1,6 @@ +from uploader.interfaces import IData + + +class Config: + def __init__(self, data: IData): + pass diff --git a/uploader/implementations/excel/__init__.py b/uploader/implementations/excel/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/uploader/implementations/excel/reader.py b/uploader/implementations/excel/reader.py new file mode 100644 index 0000000..d04bde6 --- /dev/null +++ b/uploader/implementations/excel/reader.py @@ -0,0 +1,56 @@ +import logging + +import pandas as pd +import xlrd + +from typing import List, Type + +from xlrd import XLRDError + +from uploader.interfaces import IReader, IData, NULL +from uploader.implementations.file_data import FileData + + +class ExcelReader(IReader): + def read(self, path: str, top_offset: int = 0, bottom_offset: int = 0, left_offset: int = 0, + right_offset: int = 0) -> IData: + if not self.is_excel_file(path): + # TODO: log it, raise exception + pass + return FileData(self.__excel_to_list_of_dicts(path)) + + @staticmethod + def __get_cols_indexes_to_skip(df: pd.DataFrame) -> list: + (_, row_values) = next(df.iterrows()) + cols_number_to_skip = 0 + for cols_number_to_skip, value in enumerate(row_values): + if not pd.isna(value): + break + if cols_number_to_skip == len(row_values): + error_message = 'Cannot handle file. Probably, it is empty' + logging.error(error_message) + raise ValueError(error_message) + return list(range(0, cols_number_to_skip)) + + def __excel_to_data_frame(self, file_path) -> pd.DataFrame: + df = pd.read_excel(file_path, header=None) + df.dropna(how='all', inplace=True) + # shift table if data are not placed in the first row/column + cols_indexes_to_skip = self.__get_cols_indexes_to_skip(df) + df.drop(df.columns[cols_indexes_to_skip], axis=1, inplace=True) + # first row as columns names + df.fillna(NULL, inplace=True) + df.rename(columns=df.iloc[0], inplace=True) + df.drop(df.index[0], inplace=True) + return df + + @staticmethod + def is_excel_file(file_path: str) -> bool: + try: + xlrd.open_workbook(file_path).release_resources() + return True + except XLRDError: + return False + + def __excel_to_list_of_dicts(self, file_path: str) -> List[dict]: + return self.__excel_to_data_frame(file_path).to_dict('records') diff --git a/uploader/implementations/file_data.py b/uploader/implementations/file_data.py new file mode 100644 index 0000000..ebc8814 --- /dev/null +++ b/uploader/implementations/file_data.py @@ -0,0 +1,47 @@ +from typing import List, Type + +from uploader.interfaces import IData, NULL, ITypeRecognizer + + +class FileData(IData): + + def __init__(self, data: List[dict]): + self._data = data + self._items = None + self._columns = None + self._types = None + self._type_recognizer = None + + def to_dict(self) -> List[dict]: + return self._data + + def columns(self) -> list: + if not self._columns: + self._columns = list(self._data[0].keys()) + return self._columns + + def set_type_recognizer(self, type_recognizer: ITypeRecognizer): + self._type_recognizer = type_recognizer + + def types(self) -> dict: + if not self._type_recognizer: + raise ReferenceError("type recognizer is not set") + if not self._types: + if len(self._data) == 0: + return {} + self._types = {} + for key in self.columns(): + for row in self._data: + value = row[key] + if value != NULL: + self._types[key] = self._type_recognizer.type(value) + break + if key not in self._types: + self._types[key] = str + return self._types + + def rows(self) -> List[list]: + if not self._items: + for row in self._data: + self._items.append(list(row.values())) + return self._items diff --git a/uploader/implementations/postgres/__init__.py b/uploader/implementations/postgres/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/uploader/implementations/postgres/type_recognizer.py b/uploader/implementations/postgres/type_recognizer.py new file mode 100644 index 0000000..f1b45e9 --- /dev/null +++ b/uploader/implementations/postgres/type_recognizer.py @@ -0,0 +1,25 @@ +import datetime as dt + +from uploader.interfaces import ITypeRecognizer + +TYPE_CONVERTERS = { + int: int, + float: float, + dt.time: lambda value: value if type(value) == dt.time else dt.datetime.strptime(value, '%H:%M:%S').time(), + dt.date: lambda value: value if type(value) == dt.date else dt.datetime.strptime(value, '%d.%m.%Y').date(), + dt.datetime: lambda value: value if type(value) == dt.datetime else dt.datetime.strptime(value, + '%d.%m.%Y %H:%M:%S'), + str: str, +} + + +class TypeRecognizer(ITypeRecognizer): + def type(self, value): + global TYPE_CONVERTERS + for converter_type, converter in TYPE_CONVERTERS.items(): + try: + converter(value) + return converter_type + except Exception: + continue + return str diff --git a/uploader/implementations/postgres/writer.py b/uploader/implementations/postgres/writer.py new file mode 100644 index 0000000..5a68ce2 --- /dev/null +++ b/uploader/implementations/postgres/writer.py @@ -0,0 +1,13 @@ +from uploader.implementations.postgres.type_recognizer import TypeRecognizer +from uploader.interfaces import IWriter, IData + + +class Writer(IWriter): + def __init__(self, settings=None): + # FIXME: inject type recognizer + self._type_recognizer = TypeRecognizer() + + def write(self, data: IData, append: bool): + data.set_type_recognizer(self._type_recognizer) + # FIXME: implement uploading data + raise NotImplemented() \ No newline at end of file diff --git a/uploader/interfaces/__init__.py b/uploader/interfaces/__init__.py new file mode 100644 index 0000000..2eae997 --- /dev/null +++ b/uploader/interfaces/__init__.py @@ -0,0 +1,3 @@ +from .idata import * +from .ireader import * +from .iwriter import * diff --git a/uploader/interfaces/idata.py b/uploader/interfaces/idata.py new file mode 100644 index 0000000..9fcce4e --- /dev/null +++ b/uploader/interfaces/idata.py @@ -0,0 +1,27 @@ +import abc +from typing import List +from .itype_recognizer import ITypeRecognizer + +NULL = 'NULL' + + +class IData(metaclass=abc.ABCMeta): + @abc.abstractmethod + def to_dict(self) -> List[dict]: + pass + + @abc.abstractmethod + def columns(self) -> list: + pass + + @abc.abstractmethod + def set_type_recognizer(self, type_recognizer: ITypeRecognizer): + pass + + @abc.abstractmethod + def types(self) -> dict: + pass + + @abc.abstractmethod + def rows(self) -> List[list]: + pass diff --git a/uploader/interfaces/ireader.py b/uploader/interfaces/ireader.py new file mode 100644 index 0000000..35ac6bf --- /dev/null +++ b/uploader/interfaces/ireader.py @@ -0,0 +1,10 @@ +import abc + +from uploader.interfaces import IData + + +class IReader(metaclass=abc.ABCMeta): + @abc.abstractmethod + def read(self, path: str, top_offset: int, bottom_offset: int, left_offset: int, + right_offset: int) -> IData: + pass diff --git a/uploader/interfaces/itype_recognizer.py b/uploader/interfaces/itype_recognizer.py new file mode 100644 index 0000000..070d179 --- /dev/null +++ b/uploader/interfaces/itype_recognizer.py @@ -0,0 +1,7 @@ +import abc + + +class ITypeRecognizer(metaclass=abc.ABCMeta): + @abc.abstractmethod + def type(self, value): + pass diff --git a/uploader/interfaces/iwriter.py b/uploader/interfaces/iwriter.py new file mode 100644 index 0000000..babf9f8 --- /dev/null +++ b/uploader/interfaces/iwriter.py @@ -0,0 +1,8 @@ +import abc +from uploader.interfaces.idata import IData + + +class IWriter(metaclass=abc.ABCMeta): + @abc.abstractmethod + def write(self, data: IData, append: bool): + pass diff --git a/uploader/main.py b/uploader/main.py new file mode 100644 index 0000000..d141b29 --- /dev/null +++ b/uploader/main.py @@ -0,0 +1,9 @@ +from uploader.implementations.postgres.type_recognizer import TypeRecognizer +from uploader.implementations.excel.reader import ExcelReader + + +def main(): + reader = ExcelReader() + type_recognizer = TypeRecognizer() + data = reader.read("path") + data.set_type_recognizer(type_recognizer) From 935b4db4f664a28eee3dfeb59e7f8d9aebc6f0bf Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Mon, 30 Sep 2019 18:45:56 +0000 Subject: [PATCH 2/2] Bump psycopg2-binary from 2.8.2 to 2.8.3 Bumps [psycopg2-binary](https://github.com/psycopg/psycopg2) from 2.8.2 to 2.8.3. - [Release notes](https://github.com/psycopg/psycopg2/releases) - [Changelog](https://github.com/psycopg/psycopg2/blob/master/NEWS) - [Commits](https://github.com/psycopg/psycopg2/commits) Signed-off-by: dependabot-preview[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 96f993f..8fb6b69 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ pandas==0.24.2 -psycopg2-binary==2.8.2 +psycopg2-binary==2.8.3 xlrd==1.2.0 transliterate==1.10.2