Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions docker/dbeaver.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
ARG http_proxy=http://proxy-chain.intel.com:911
ARG https_proxy=http://proxy-chain.intel.com:912
FROM openjdk:8-jre-slim

ARG VERSION=6.1.5
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
libswt-gtk-4-jni \
libswt-gtk-4-java \
wget \
&& rm -rf /var/lib/apt/lists/*


RUN wget https://github.com/dbeaver/dbeaver/releases/download/${VERSION}/dbeaver-ce_${VERSION}_amd64.deb \
&& dpkg -i dbeaver-ce_${VERSION}_amd64.deb \
&& rm dbeaver-ce_${VERSION}_amd64.deb

ENV DBEAVER_VERSION=${VERSION}

ENTRYPOINT dbeaver & tail -f temp.log
4 changes: 2 additions & 2 deletions docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ services:
network_mode: "host"
environment:
- DISPLAY=10.0.75.1:0.0
volumes:
# volumes:
# - $HOME/.Xauthority:/root/.Xauthority
# - /tmp/.X11-unix:/tmp/.X11-unix
- dbeaver_home:/root
# - dbeaver_home:/root
db:
image: postgres
ports:
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pandas==0.24.2
psycopg2-binary==2.8.2
psycopg2-binary==2.8.3
xlrd==1.2.0
transliterate==1.10.2
2 changes: 1 addition & 1 deletion uploader/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def main():


# for info in files_info:
# db.rewrite_data(info[TABLE_NAME], info[COLUMNS_INFO], info[ROWS])
# postgres.rewrite_data(info[TABLE_NAME], info[COLUMNS_INFO], info[ROWS])
# exit(0)


Expand Down
3 changes: 2 additions & 1 deletion uploader/database/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ def rewrite_data(self, table_name: str, columns: dict, data: list, drop_if_exist
command = "SELECT exists(SELECT 1 FROM information_schema.tables WHERE " \
"table_schema = '{}' AND table_name = '{}')".format(self._settings.schema, table_name)
result = self.execute(command)
if len(result) > 0 and result[0]:
print(table_name, result)
if len(result) > 0 and len(result[0]) > 0 and result[0][0]:
self.execute('TRUNCATE {}.{}'.format(self._settings.schema, table_name))
self.__insert_rows(table_name, columns, data)
else:
Expand Down
Empty file.
6 changes: 6 additions & 0 deletions uploader/implementations/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from uploader.interfaces import IData


class Config:
def __init__(self, data: IData):
pass
Empty file.
56 changes: 56 additions & 0 deletions uploader/implementations/excel/reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import logging

import pandas as pd
import xlrd

from typing import List, Type

from xlrd import XLRDError

from uploader.interfaces import IReader, IData, NULL
from uploader.implementations.file_data import FileData


class ExcelReader(IReader):
def read(self, path: str, top_offset: int = 0, bottom_offset: int = 0, left_offset: int = 0,
right_offset: int = 0) -> IData:
if not self.is_excel_file(path):
# TODO: log it, raise exception
pass
return FileData(self.__excel_to_list_of_dicts(path))

@staticmethod
def __get_cols_indexes_to_skip(df: pd.DataFrame) -> list:
(_, row_values) = next(df.iterrows())
cols_number_to_skip = 0
for cols_number_to_skip, value in enumerate(row_values):
if not pd.isna(value):
break
if cols_number_to_skip == len(row_values):
error_message = 'Cannot handle file. Probably, it is empty'
logging.error(error_message)
raise ValueError(error_message)
return list(range(0, cols_number_to_skip))

def __excel_to_data_frame(self, file_path) -> pd.DataFrame:
df = pd.read_excel(file_path, header=None)
df.dropna(how='all', inplace=True)
# shift table if data are not placed in the first row/column
cols_indexes_to_skip = self.__get_cols_indexes_to_skip(df)
df.drop(df.columns[cols_indexes_to_skip], axis=1, inplace=True)
# first row as columns names
df.fillna(NULL, inplace=True)
df.rename(columns=df.iloc[0], inplace=True)
df.drop(df.index[0], inplace=True)
return df

@staticmethod
def is_excel_file(file_path: str) -> bool:
try:
xlrd.open_workbook(file_path).release_resources()
return True
except XLRDError:
return False

def __excel_to_list_of_dicts(self, file_path: str) -> List[dict]:
return self.__excel_to_data_frame(file_path).to_dict('records')
47 changes: 47 additions & 0 deletions uploader/implementations/file_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from typing import List, Type

from uploader.interfaces import IData, NULL, ITypeRecognizer


class FileData(IData):

def __init__(self, data: List[dict]):
self._data = data
self._items = None
self._columns = None
self._types = None
self._type_recognizer = None

def to_dict(self) -> List[dict]:
return self._data

def columns(self) -> list:
if not self._columns:
self._columns = list(self._data[0].keys())
return self._columns

def set_type_recognizer(self, type_recognizer: ITypeRecognizer):
self._type_recognizer = type_recognizer

def types(self) -> dict:
if not self._type_recognizer:
raise ReferenceError("type recognizer is not set")
if not self._types:
if len(self._data) == 0:
return {}
self._types = {}
for key in self.columns():
for row in self._data:
value = row[key]
if value != NULL:
self._types[key] = self._type_recognizer.type(value)
break
if key not in self._types:
self._types[key] = str
return self._types

def rows(self) -> List[list]:
if not self._items:
for row in self._data:
self._items.append(list(row.values()))
return self._items
Empty file.
25 changes: 25 additions & 0 deletions uploader/implementations/postgres/type_recognizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import datetime as dt

from uploader.interfaces import ITypeRecognizer

TYPE_CONVERTERS = {
int: int,
float: float,
dt.time: lambda value: value if type(value) == dt.time else dt.datetime.strptime(value, '%H:%M:%S').time(),
dt.date: lambda value: value if type(value) == dt.date else dt.datetime.strptime(value, '%d.%m.%Y').date(),
dt.datetime: lambda value: value if type(value) == dt.datetime else dt.datetime.strptime(value,
'%d.%m.%Y %H:%M:%S'),
str: str,
}


class TypeRecognizer(ITypeRecognizer):
def type(self, value):
global TYPE_CONVERTERS
for converter_type, converter in TYPE_CONVERTERS.items():
try:
converter(value)
return converter_type
except Exception:
continue
return str
13 changes: 13 additions & 0 deletions uploader/implementations/postgres/writer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from uploader.implementations.postgres.type_recognizer import TypeRecognizer
from uploader.interfaces import IWriter, IData


class Writer(IWriter):
def __init__(self, settings=None):
# FIXME: inject type recognizer
self._type_recognizer = TypeRecognizer()

def write(self, data: IData, append: bool):
data.set_type_recognizer(self._type_recognizer)
# FIXME: implement uploading data
raise NotImplemented()
3 changes: 3 additions & 0 deletions uploader/interfaces/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .idata import *
from .ireader import *
from .iwriter import *
27 changes: 27 additions & 0 deletions uploader/interfaces/idata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import abc
from typing import List
from .itype_recognizer import ITypeRecognizer

NULL = 'NULL'


class IData(metaclass=abc.ABCMeta):
@abc.abstractmethod
def to_dict(self) -> List[dict]:
pass

@abc.abstractmethod
def columns(self) -> list:
pass

@abc.abstractmethod
def set_type_recognizer(self, type_recognizer: ITypeRecognizer):
pass

@abc.abstractmethod
def types(self) -> dict:
pass

@abc.abstractmethod
def rows(self) -> List[list]:
pass
10 changes: 10 additions & 0 deletions uploader/interfaces/ireader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import abc

from uploader.interfaces import IData


class IReader(metaclass=abc.ABCMeta):
@abc.abstractmethod
def read(self, path: str, top_offset: int, bottom_offset: int, left_offset: int,
right_offset: int) -> IData:
pass
7 changes: 7 additions & 0 deletions uploader/interfaces/itype_recognizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import abc


class ITypeRecognizer(metaclass=abc.ABCMeta):
@abc.abstractmethod
def type(self, value):
pass
8 changes: 8 additions & 0 deletions uploader/interfaces/iwriter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import abc
from uploader.interfaces.idata import IData


class IWriter(metaclass=abc.ABCMeta):
@abc.abstractmethod
def write(self, data: IData, append: bool):
pass
9 changes: 9 additions & 0 deletions uploader/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from uploader.implementations.postgres.type_recognizer import TypeRecognizer
from uploader.implementations.excel.reader import ExcelReader


def main():
reader = ExcelReader()
type_recognizer = TypeRecognizer()
data = reader.read("path")
data.set_type_recognizer(type_recognizer)