From 37169897ea44a4109d0c3c414046d83fc542016f Mon Sep 17 00:00:00 2001 From: "Abdumuminov, Umedzhon" Date: Sun, 6 Oct 2019 20:57:16 +0300 Subject: [PATCH 1/2] Redesigning of tool, to make it more extensible --- docker/dbeaver.Dockerfile | 18 ++++++ docker/docker-compose.yml | 4 +- .../common/test_type_recognizer.py | 16 +++++ tests_data/data/column-mappings.xlsx | Bin 0 -> 9222 bytes tests_data/data/examples/test1.xlsx | Bin 0 -> 8929 bytes tests_data/data/examples/test2.xlsx | Bin 0 -> 8754 bytes tests_data/data/files.xlsx | Bin 0 -> 8703 bytes tests_data/test_configuration_files.py | 0 uploader/__main__.py | 2 +- uploader/base/__init__.py | 3 + uploader/base/idata.py | 27 ++++++++ uploader/base/ireader.py | 10 +++ uploader/base/irepresenter.py | 5 ++ uploader/base/itype_recognizer.py | 15 +++++ uploader/base/iwriter.py | 8 +++ uploader/core/__init__.py | 0 uploader/core/common/__init__.py | 0 uploader/core/common/config.py | 6 ++ uploader/core/common/file_data.py | 43 +++++++++++++ uploader/core/common/type_recognizer.py | 37 +++++++++++ uploader/core/excel/__init__.py | 0 uploader/core/excel/reader.py | 58 ++++++++++++++++++ uploader/core/postgres/__init__.py | 0 uploader/core/postgres/sql_executor.py | 33 ++++++++++ uploader/core/postgres/sql_representer.py | 16 +++++ uploader/core/postgres/writer.py | 33 ++++++++++ uploader/database/database.py | 3 +- uploader/database/row.py | 6 ++ uploader/database/table.py | 20 ++++++ uploader/main.py | 9 +++ 30 files changed, 368 insertions(+), 4 deletions(-) create mode 100644 docker/dbeaver.Dockerfile create mode 100644 tests/implementations/common/test_type_recognizer.py create mode 100644 tests_data/data/column-mappings.xlsx create mode 100644 tests_data/data/examples/test1.xlsx create mode 100644 tests_data/data/examples/test2.xlsx create mode 100644 tests_data/data/files.xlsx create mode 100644 tests_data/test_configuration_files.py create mode 100644 uploader/base/__init__.py create mode 100644 uploader/base/idata.py create mode 100644 uploader/base/ireader.py create mode 100644 uploader/base/irepresenter.py create mode 100644 uploader/base/itype_recognizer.py create mode 100644 uploader/base/iwriter.py create mode 100644 uploader/core/__init__.py create mode 100644 uploader/core/common/__init__.py create mode 100644 uploader/core/common/config.py create mode 100644 uploader/core/common/file_data.py create mode 100644 uploader/core/common/type_recognizer.py create mode 100644 uploader/core/excel/__init__.py create mode 100644 uploader/core/excel/reader.py create mode 100644 uploader/core/postgres/__init__.py create mode 100644 uploader/core/postgres/sql_executor.py create mode 100644 uploader/core/postgres/sql_representer.py create mode 100644 uploader/core/postgres/writer.py create mode 100644 uploader/database/row.py create mode 100644 uploader/database/table.py create mode 100644 uploader/main.py diff --git a/docker/dbeaver.Dockerfile b/docker/dbeaver.Dockerfile new file mode 100644 index 0000000..4577740 --- /dev/null +++ b/docker/dbeaver.Dockerfile @@ -0,0 +1,18 @@ +FROM openjdk:8-jre-slim + +ARG VERSION=6.1.5 +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ + libswt-gtk-4-jni \ + libswt-gtk-4-java \ + wget \ + && rm -rf /var/lib/apt/lists/* + + +RUN wget https://github.com/dbeaver/dbeaver/releases/download/${VERSION}/dbeaver-ce_${VERSION}_amd64.deb \ + && dpkg -i dbeaver-ce_${VERSION}_amd64.deb \ + && rm dbeaver-ce_${VERSION}_amd64.deb + +ENV DBEAVER_VERSION=${VERSION} + +ENTRYPOINT dbeaver & tail -f temp.log diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index f7861a2..39effba 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -6,10 +6,10 @@ services: network_mode: "host" environment: - DISPLAY=10.0.75.1:0.0 - volumes: + # volumes: # - $HOME/.Xauthority:/root/.Xauthority # - /tmp/.X11-unix:/tmp/.X11-unix - - dbeaver_home:/root + # - dbeaver_home:/root db: image: postgres ports: diff --git a/tests/implementations/common/test_type_recognizer.py b/tests/implementations/common/test_type_recognizer.py new file mode 100644 index 0000000..d72a727 --- /dev/null +++ b/tests/implementations/common/test_type_recognizer.py @@ -0,0 +1,16 @@ +import pytest +import datetime as dt +from uploader.core.common.type_recognizer import TypeRecognizer + + +@pytest.mark.parametrize("test_input, expected", [(123, int), ("cool text here", str), ("12.04.2018", dt.date)]) +def test_type(test_input, expected): + recognizer = TypeRecognizer() + assert recognizer.type(test_input) == expected + + +@pytest.mark.parametrize("test_input, expected", + [(123, 123), ("cool text here", "cool text here"), ("12.04.2018", dt.date(2018, 4, 12))]) +def test_convert(test_input, expected): + recognizer = TypeRecognizer() + assert recognizer.convert(test_input) == expected diff --git a/tests_data/data/column-mappings.xlsx b/tests_data/data/column-mappings.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..bb5b6be6ed690891e826f380cdd8b24a8fa34950 GIT binary patch literal 9222 zcmeHNgmv^y+0fIGH z&-LmMF7-6vmKYR?^dUNyRdp5#g$Ihq3BHUHPuSVALXD5Va4MojGeGCbJdF#~fqGi3 zW||FdS83o@ca{%)lh4iXEb_qOTkb6kEnzgZ3el?YQ&ty8os<~##M`sC`91UVZDV;E zeWHHVC`f8!!fBW-6$}g+n*pfXN7s^-^EwUtcS(}wD-Rxgq+j^Xv_eeXwM1pnm>m~y zp?aUekz=`_H(O3kM2IaU;9Xz;2jO_{-Wv=6;Pw_3p#B%LglqFWI6-1f8EJJmNV7C?vvhFh=K6X3 zpE>>)+u)xby)02ht&;~Awy*FbyzhK+A)Y`6>?N(x1biA0q&SZckIiSKT4;MnP4JZR z1?rQ)mVh4vhy}6Otv=w{Dt}ot5sB!7ID6$(1LRl&f}HoNlG{i z;bz#;-p0r5uVqCr1Nj0@mZ;bojr69#+^EhlpZ(9hp63GhR#AyhJJ^e&h|qkA9ri`h zoj)IcM#VJSEvF56f=*1dv$b8ii*xUlJ8^?VVjo8~iSWh+3ll|nej$V0Ny8Y7-)@M< zgV)Y|gk}JVnZeq%psP@=o$`>{`g;0TtXAu{4YG(cj5i@Zbfd=(r%S+7bbAKNy5QyzdEJAZw!gdR|78_WlY)=KBC^8{&v# z@MPa=t-p722|3e$hfJWywtfr}E>-9N06y{#BxL>!ma?ZB&Y3&}0dHz=gMO%11;>+w zJuM*M923;GpO(N+zxtrR1BF~(KN zaxCR&$<6~8(iB8p6`zIx;II!|fiKQdUBdCq(GIw2j#VwnTlE;L4d4`F>e{QY>`Pv< znn9vec1j^7in{4_h2@FqlM=G$e@_CoCYreWp?D=B1y`bmHX!-Ochi1vz?QVPf{H+Kddu(8;PCr zxRLdGwx*J`+cCdqS{mH}SbzK=O7W7Sb!sf+tM(C1-uwtn7no9?1EnlpYC|Sx)5wV63b!+C-z)Wj4{06A+DY5j;2PX0S=Qamv7(Q1>tv? z4jvQ7KK$V}+taEKTB=DfzqdzY4fP;$s?+fr-hQ?NY@Y}hqv_$qoIlToW^dmiX`~HC zcTif{4bt!rOybRQ%2*FsZ$^p+Wn`igJ^cBl1P&@XCC0cme}lg zZ5i8@o&n4x%+pA2m->)kwIMR@+6dhVFrP<+MAEi+3x;yfpkamrZe-5sW}DK}PudSB zOG_#sE#9Z4HhttHF!qd*^^SO=_%ODHGNQ($_mck7kKRjx+e-)p} z3Y0R#L@YVksqKDRoB=~kt@oN*Ad<3Ed*19&@kQSseiv`8T^xpYH-o%i7(yL~7fyhO z-)IlHZ&wpu6MB7{BtP5XQjnCou`+iQrE_+GZ7`+F9FzGUwpcurm`xFdJn6crs(0FI zZJ-0Pnji+D_eE`j6qtRx9oSJ?ce~XRDftNk>CnFZ7+HIo-RPv9WZt z~OKqLX&Cz6;gqOFS6EnQR&v#N7D%)srpYpxD)<^3=DL41p`Siy;6 zrv>T?U=wWrZCO@4{*jTioAj_y!D~WszQpp3Q{l|P!dfPKy|<%YLw@cHEopez>0_OrE=Ewb!tM%^8;kU#C1|eT>oeh;y1PC5QS!;s@0bGZ(1;VUngq+23iRMl1f<=fZul-pz8xNV;FS9(viS z8|Z)DaQu~fsmlDYQ&t2kpt|&6PwK()!2Am z-~+APrVIgiST}zLu*U~#;(%i;m+XL}4_p#H?Yw@6L*qr9`@nDss1$>CPVOGBYvxV? zn#u8bktdf!p<3y(#^)JPWeM@Ze6-rA*2DV6fh|k4Ym>nYuRlH}Mush>BL4nSbuD4& zQ>v=jwp|DXJhm}BPMNWpEW)4<|B=8)ZwgMdC`YbvmO|h5^Y{n{M*P%Gz24J?Amw2m zOVz*-k^0$ZYNpKwWA8&g&WyLJrBy{Lql?sD&dbJZ_R)&sJ|Gz`&r&4XoKd>~CbPIX z@tD5Y=R3wrIFG-Z?4WY)7!nJ4!--Ze^~^&_KDh5;^hDy|V0y;>mqb-U9X8ZCX9Zv7 zM84DO<|LwRQIGXxm~-d#-IS(qvVJ^V?!k>ur_vuJG3&p# zk?%R~A6*W5X%o{9?B|WNQ2hH%G&fZDPo+B_Z>PONKKs z?mUP5K#yX^MNGsudKq`UW!*_ZNRRt8-`*#RaE~@S9V#=wf%-Xdj#Yob{h(CU1P%dI%gS~?N5vv_3<`klt7&K47ngrE)D}S zGL_AMloD;S>0E%_c3Jz!{@`^T#A(Y8HK6Wk#A@UxElH^$|yZ; zsf5kcwMGwbH;8_53#br}$eacC>j{)pQ9z?U33od=Q;Gk55>PDw-dC zg76ANM79^pD$aYZA52e55f=cX4B;3d2k)7t@91CFC%kx@sn|2lN{b`DYY-g+@x;M? zV;go_%o_-;thK;VIG5%Z@dQB<7Uk5<0`l2Vg@M#B155PzAEFKMP0Q#^EEPwxrWsvJ zX-uk5z5FI7a~YEt?SrAq&{&wiH}z;_@92@Nl#s0OtA4Dr*VYR)@9Zc+qUK8mE-DbJWXgx+rS)?3FPo;dy*7URgA=c52of>m<#I zaADRepc$^#Wj`oO@J3|N?WpROXVL3yD`{VBb%x&{(YF`keX>mAEsd^Q9LkN)8X|Av z`7)~?qnY7ct`!u@Zk>==2W?yRYOdS}FHjoaPit<}WfizqZJ9W{_J!~3vbQ+xq9}7P zM3n}djamZkYCtjqeKmHe-k7XT>LbD)MGq?b8?&;nZXJW+b=SwyvQm(}yKw2!#Vw2i zofbY8pBAT@3G5e6#pI!}i#g<1pQo0Cx2ahxr;EdlN1Rhb?Cz&F=P+yv85Y2FOFf!F z?`F>*;~!b##)u`wiIEW)ciW#k|6?*eFk3+$i_Ay(umJ$P-*Xa(kAvlJ$;exs!Iua@ zk_GZUNrHL%lghO@*jswqxFCR{I;2plzE*iw?t*~Dm*HVwWAqQ%+uT8}D}27d39v?; zK+>lNw497bQRnl$4b2u;jil5@S_?c(eALjQY++ut z9q*5I+up{`y81TcPS6L%tH6YMCis?|2k=PWif9R>9K6#sAVAe>ImX zGdCe%u98iUJQAS~`4)$mV^1lHGzIr$%zEhEk90fpaxIRaTmSnbYLz?8idJ z_IEIK>?Vr_vQR4CmqewUK)v4GOgvmC#EMsw#U_@F*y~!{B2F+f8Mn9E?DR2UwWWMo zQqE8DvO^;?WcEq1ME}Q|JFxHCK68Gc>ChL9T>;stQS7m}>UT4?851$Lbh3ztQeVwl zk`l)3X9-t_yxa|V{X8DZeo{_1lu3se-L`~9Gkl{hJrpxvhke7qt~eIOUVT%wUbX=D z=tgrKxqF3mU4fQxjtYtN3I6K4mO;Y|K|JfOA*F3THTHf}TR@<@Hl-ixp;+SBS#CPnV zDJ3E&iY5CKRLpm8vrOig53N1gX;C~4feoZ#JkGilFi&%POUY2FtK3d^;`TrVRB#BpaW|_z2F)4k51KWvIh1s#~;b!}Fx-v@9=Ph))Xh&t&V>&ht8@ zi;ei1tZeLO20FDZ+;|)>?bc@o624t3z7toT?MSf`#$92A7JT$sfmz`6}NZfS~_>csk@-?>7T^ZhyWD|cKqE+)>bLER(gz=5!KiciJhIS)9lJ6fR0s!v+%Wi)zg57N_Eg|mQzjePQpjqig zNqNE~u>E?wThwlURB80uiQ)`T=&4z0;fDSD2cQB<%uKo6isH+CC=n$ICT9khp)}oM zmitk*LRO%6zvi~`eKnib%7iboWc}ui;G@ouH#fVRz-mq|m)1_!&;1_*nC5RPe;mhM zhE-b!Fv{uW#2<(GljNm0aK*(jcFyOb_pc<0TT+m@4BX@5zTYA0p^@6y`W4*?mNd^r zYUL1tv9O<%US{JrpunX>2-4)B4+v1kHZHb2Bxt~``KonDFkS|TbWCJk)sCFgAsZG9 z&*E)X0q&FY_f(yJTwdwCEusXqEK#t6%|IdxICqm5csHD`w7j-fKj>_2NGf2&8i9r) zv)W6OZ=t|k#qxENZcrO5*@Rj3GY3y4Pn_@X&d<&P?-e9`rr&MVn4Gz_f}g`Ib;urk z54$G(ET}c?ndWYpLxB7{WMVz5PNT6$rXn4JH6J>y8GYB50T zEy?ofQ6Y~f=)n>dFt7+CCWn6+J|<}U9;$Wpp&F_W@V?bq&Ha?-@yE~7>tv=CnDrjEDEGwb(t7_ zbS6?WGMiw$*(3VGT(w7(#9YOS#UpK$U@!fw$i>J?3L{OwdLR&9v^21ngdtu2*ejt@ zdxLBARfLCpQE|n4Cq|47nyhq-ea0#Ik=hhJa$7?7ScSo`ZET;1d^Qyw8H#7BR+MiS zD2>qIRvrb?28(*S3VZ^S9aNv!b#?hSw7JY=r_uWt`&*-PbR;G0;&o&z`1{}wGFJwn z@7Myqjd@MOBSXp0^!!m4Ja^G_x>neTri#&A%!TyiI}B0~DObiI=>{GTK5QrQye2lw zTl=R@O*a)vv-XrJhb z;#+94nbe%Kt_OgX=V?v{TAVHD@QqN=_|4V}RSFR}_7f_v$K5nF1>d?m{15DS1XYp* z#<@VB)DC;EF5!j(+J!ACfTtUpJS#!rb15RURYjz(oqewN}rvtSfJik^rCKYS#`^fG7_Wa6ipNlk3w7KHu;txOJ_tfQIdn9KL! zxk%+$*F43l;J&2;rS*#7T-v=m{iW5-sV?-G0uyPQBrJtp+G=!o<@q!h2d-YqxMh>| z$;~yei`1)kH6msXMHc~;%PCpWQJZzoO{(+rxl*#~?d)U21@3P5c%v~bVQkTUs;rH5 zrGRhQPQEBe$AJWTJL=|LI1*rFDV;Ej5=T^FASPnqY6|+2-9KVD0lA3|=`eGV^CA0l zdG05uZI5X@?IZCNF=bJ7iLSaWfj1%U%=;Z^vGA%g{A%*JjYsU}%Sq?yk*`Jo z*QJp)AJfuJ_04~HKa@M1NY3`3cU=5?y8hk&!~P3(@Lv`DwKn{3!JmB^(kA{?DgIUP zul2G&3pOK}$p623_N$&>izt6++DC3G`K6@ttMIQx_8-DU$QOnb{u{0RtA<~x(?2wL zBUhnF4Zjkpzbg23-TH?DF(kGA^Dq2o75l5`uc`ANq9^44dGx;}(!Xl?YwY|(2LK>K q4FLQtlKv|GR~Pta@haLsiT~~p)xqe<*#!Wwk)I%Buyz6d{Puqc+b5I& literal 0 HcmV?d00001 diff --git a/tests_data/data/examples/test1.xlsx b/tests_data/data/examples/test1.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..1598724070d5d249065a88e83496f646ec50669a GIT binary patch literal 8929 zcmeHt1y@|j)^_6^EV#S7yF0->35~ln4#Az^Zovs2f+Rq23GTrmxI=Jve>*dCznRI* z_Y3Z=UVUoyT4(P*r|NmOmZ|~_EEWJB00aO4-tf?Oe?4 zTnscl9L${cSln%G$ns#J>2d(jko*5P{)<czd}2DAJ^l^BrwAg~vM zK~1EsE3v1M1+?4oxB#+nTo$V`RaL<7CLP%FebnFs!FZ9XT?fuT4vzke9W) zcb~cy7eB!Xq^a8#$Fv=)p?gj-c26=%!B4 z;mNxlKaNLu0O08f2B7*kTGnW>QJzDvrU+pj5`>lpPG&aFtSmpT|D)r7F$e$h>SYN^ zDxGY|AxAInL;J3#7UR$)6x_sLG?8oi`pYdmuZhm5B3x{xB|_6A2!fIIYw^7wm|ql% z-t8m5T;nQ>#K01ytaU37Nx5}=gTO%J^iIaHY_$vBb?Rd3GEG+AgU+=jmZ7x1I9H*6 znNnurM5+>Pgyj{%Gt5Gw5FFvO08o#T_LA}SC#We2jf3)#iaOq$-Nca$zv*{HJ1C-I z{EGYIY1nnM93oFIG*4}eqe>MF) z4LPUp`7$9xqu%T93al{pAs$CVy{=c>sB18o7wwG25g4$Zm^Md(aP~uILoh(&gL0BU zSvX9L{oTFN10>XRXUqnXgg&NhEWRJtbkz8v`9%~`=MCc_T-L*EZ#b--M~Md@m>I0z z@V|Ylv0oloT~|kzPH(>X)F3&33I9IOgLLc+|CyW+iU(k9dy<^-x?TDbc-nk#uH-Qc zZqL6(-XtnjASvM%_6~6&3+V6Z=FA78azKT-{|k1T88ZO zKOsYjmQv*e0|4wH007S+cR(QXSFn_6>DlM=Vfh$XJ;AJH8Do<9r^i6U+7_m1P3?SZ zmMS4)Yu9NjvrX5Wy1#01mAI{WzBnw?Wq^yiwd8fUc|Y+LT<*m{wf@l2W6@V@X>b~G zF(3SJP~<={pAd15ZRMeYdkK?#`5|)IPjK1Mv2OK#7}{T-Wnbnaxuc#oPc`Lf)bnZt zg&}EKESv7{$w4^<2`GgnV7=$LqX)pf0~iU4rJ(k`^klBB&6zQMVli0TaOm4AqoB;u z##3PjVsy9lm851ufN>|Nhy=OV_KlfS!$AXC2F}9LdfOaQy~MQ%Y!GkDGevv-sY-Q+ ze()Rk(Zhgq!Y6hoc3{$x4skVv7VAoex^#6_`~gh#iZB z;bhi(Ty}d5fo>nj^dgZv-xo}zS=*LUtCuqrS=Ll+Rcs30u8BoS3RK~gk))5r9q4Cn zcvF`}d^fe3LIHXNh7JHbXOPab$fC?ji?_q`@Q<*!kGH2kTMrvrpdbY6OrIacKSKHx!$ z$L%FtanHweV0|k`&Cz`&707nS}dMK=F#O6!E7FWyC?U#58kfK7tl%pnOs?9UvXJw7tMF8VoU=B2z-r&?|%$jtg9G;Oui7!Rn9 z+D3_OJCzC5)S98$-=f)N^vjNNye_mEID^>mM>|D8!)p72mfT20q7lNhrxw|XP!n2g z2Aic^qYYyQF&j_Y?~6ndb`lG;d8CR7#RmkhEVf5$0lJtPZQ! zd@{=EAzv{!x|vefg>>M$F|`kTAKDMG8-7^HPXd$6Bj@%pg>iZ(CX9Ue_;C6cX~Qo8abJnYPdLA*;zRMH0U;Q z?2s@NE5wcJ(9pw(!c`)e%xM8C9J39otcxoiW4?x%PBAzjIT*VOog%q}*q^g`0hmug zP91gRwfrQyQ@NA(ajd5L(>Oz?qZE4r!?bjvIsVsXK2f3eK95{PH83@&HzRTO>4Wf( zOj+_$-4KN%CHb&Hg;^l36{XgUuGKt{R7Q|;_%%J^EBT#_bgCqhL>W!QeBHW%&?a`7 z@{%9rLU(Xj&OaE2h2Af}kP))juvj2B`G&U0eCjva9;Xcs1?t~Xm*oVh%yhsvKBCt? zO!RpMahFlRvL3^cRve~KuJfCc=14yHsPY_tEhT!@Ehj~*Cb6!;C6}0=G;TE27xsjV zK{C?%uRgII7wxMuWNmxMe&%z(88{b9Gg~v(->&RG7jR#DD3XvDs}=WF6xrGBo_jr( zdTC|ECUKF$fnD@vs7W6OsRSJ}RvXD%n>EvcTesYo3O?+m`Go(gA1&7@LIyz>L|(Q$dJ zyhw^dH+yDxgT!>NF#q1y3Ej3I*gFGrBW08Oj#Q3+o&l#D7!qIKX3iI|OcT}nqJ#21 zPA?I0&{8T6W1)IE9%aI{exR|cmk$Ryz!2+ zVZqz#hs5M_r$*WAf+TBR<5zST7`~P_)DK&@b*wmJFKMJU;k700a-KLE6b-#!k2#<2 zv|Epp0fRtZg$6Liy1f^B51`iC@efdzT!G0&wKD>H$#!u@D5|$pxlON*v<7jOMh*17 zG!U1>22bZ``c;IGA~&u`BYhU97t!qu%`*RBu{WKAWuVhJt#F{PT6Ydsm6CI+4`B(e zv36^i!#3^Mj=@|)4iaxkpdV3-Ln+-?*HC;-!Mh2HI^+-m_(Q8x$*uKwBdd%8tLh`^ z_*c5-sV9IK_8}NFhV`2CDDqm@pG5*5izp*%**FT4^$f#b6RKaj8+Y|9*4{JbO!1Pj zW(#{g9`BrmwhH^5Z{KtpR5jEvDA*xTYiJx!#Tq<4Jm4-Fv_2m1p78EW(w=m5wcg(h zrnEkt2wYUGEnu^L^*KG=NyJ_Dx!6*PC7zonB2GOf53fA_v}$rh6KNEHdUpaPn$^W7 z(Ul;EzU^9kIM@fRuS2%m@uuRSW1i@B4_vcQD|oh%^rV3*ChfCKCES8Zlq;T_z)DH_ z;PJ6Tw&G&qSwnbJ_-H)%kVU=>aJ9vm^J-0fHoC&>2TBA+coARwxJGNGWE62}lVsTD zv@pk-Vnz7*A=>AlunB5OCqrs=mTF1ku)z*9vbS@mLrb=wJBz_n!rrEoMv&)V^9}pB z*)wR#qPDNjFU8JH{58^agTu+MU6$q(E0vUI1pV2q3DaAJ9|EBl1mdlT$m^(gBzj#u z2?|cz;|-B`obkf0GtsI-&U1 z9NcOcdqrWGOuR2&#@IKkb&+WZWtLC^{!Z_d0%SD1I!Usvu&{RwbjP6!M_(?uA&^>> z*W=sF)bz}XK~-AehTbIIh@7CGhTCRKXlj+)iw0_2qSdh0nb`r3c#H8fZfKKKfY z?`@4wc3F2hgr)`#6fz0{A|m5128Llj>Plsmf@}_xGG;P(p2J|tyllZt+=%WT39q8P zcN-vagK~z4i`6-~OA^SSGux61`10(ElnLx>V&bb9B0$tB;n=*+dLM9z*ao3)=1l64K+-0 zoz9)gw2(x}yzWRNO3N`9JBkdFG+S)Tj8MGLbmyeWF)0Xr!-)NUwERx_%sn=GLfN9B ztTFu~5?sQKJH2kSWQu&zHc0qrseE$JcU#jurvj2eEtb0KGiF!i`jtLFByevj402R@s&TC2xvWBjFB|M5zR%g&z+;4(kf&T zPc2)SG+;V}S*zub; zZfk}Emeb7<BpT3Q$Fp}b_%%->J?y(4^mcU%Ls4Vi5P7OH2 zTm7&o>SoxTbV=L^72H56J(ZlSVkoqGP-b@e1ZHVK#hCJtZ(g1BiG*oqV}AdQLrJ?? zxijL+7q$6nP9`9-$|TRV?Hd_B;ge6aqJ3H4KySpD_T;m!ZwD5dr_2%D0)#vr{P;eo zZ>vh!r%=+}juZAJ@fb!fBx*+kmz()%nV)>=Z4egaXPA=+j9Sq&gVb};YfFP&@m>x- zROs%$VR*h`7_X*_IPrE4@Q?P~S8cbesJeUo`87(Fsl(cl|y&FbyiSKV1}$0KyI zth<_l9Bo@y>tOR5VLI3r<^0k8?*hU4-5OmLpM{ATz#3AiHtNB=)NeWItcJKzy0+YFcW!*n`Ib+TO{n zK8YYVw0+;fR7pUtO-SAF0nXbQr$MS3T+d!C?~gU{lkMMa!WQvm826sbiLbF`V#qag z2zN-ldxEC;=wah3{&6!#;OmdTCdts?cqZIB!a@{1q?|b!Oi&(AdOBXskJN%QkwHoi ztLmwmCV+0!+Gkw0|BXG zlXZqC#pk?Q*D6c1vYQ_tVlFlPX*;M?6qu{UQ44~G-;hXC?@@mGxB~9Nickioe2lkkzBdVSOq}jfq2Sqt_elRHS;`_xZo9=m9Qf2RmygBZYZ6y z48n^PHkIi)O}5!BUm`sZhSvxF#j6?R23bq(M<79j-7Tx`9T`RjZ+hXret#!H#9^_bf#=x&A#g=y_Uk4!0)_!Pey7w?~e@}34Cm;P257J znu*QxOpxwnl-$PyHQ4)9R6;~=23ZYJ5hN@Z9aigam2UjCYqml{^i{T4ou+p+aJIqS z)^orshuhTEFAwor&G9GnyD>*Kdt@BmwW98e`5#BxL09b^6&(=-ot*dU<~Md<8|l{! zaab(cHxj7Zg|t5fnmV=UmPdHtg-(mgj`TB{eeI~s8AK8j-=BZ@7I(*+;eqi$kA{Y zn-ZzphSF3L_d`0uK8PxkU7H2;aF63{ul&7l{NlO3qdETukj?2-m+@koh;*e7L25#P zPimlS<|OKsR>T)}x_kQ!n0Imh$oR_BKDBo*N0+$TejPl>lHKSvIGW;fZ!$82;e^4_uFf)3DMU|y$(G>SjRaLY<5d3&0OQ_nAlRvf=k(URlCHFOF)myrL$GN1-(Wbn-!F+ z!alypbWKuLZk>6bHU51du;Nbd*)kNWykmXb% zAz8|46DWdZtPX@FZp+j5^gusLaI#(L-^Pdk;0WyUXFomUt^ zCDd9d6`AB#QH7Po?@!0pj!n*b?nZBTx!+33EfNVBoX3rItA=mpVZp%?f^jdC-Dei{ ze6isYp4FVNi&4k;ugMsAt7i~lnmYNz$-e5mnMJdlGzCLzzQ$#MCs!MAA5_0|S_QMx`W1He@ z--?yw%*cqHT0?`X{Ct+=%sOjubSO8%UXMF0^)mb}$+wE?Xh-~-U8|{}!c-&|KX=<# zdDpgR@R{=GG=i96mGJYEQ8g+rv_-q`qnH7?2{x%ACj3{!;45iXSs2`{9&$ryqg5iu ztv;AXYaW^j!LC96lRmZ+Wxn|UdjaQRUi~zcTUsyTCex1iV2z8;U<>aTP3~Zz{SJrR^iVSA1{a?{21iqPAsvsN~t$My5hi&IZEUzQs^3r_0g$ErW$0w`!^ zNTKfE4-@=nyZ$r(%h3W=g}*EKd)xCrfdKY=vO_z z_6PpdbObps@k_VhSK(i)mwyV!Apa)(k6Pxh8h)+b{i$ITl1)JV{?}UGuL^$6KmJsp zg8lm={J(7ESJ7X?x<5rZAyM8Rul|o9?^i8x|)@|Gg?gY2s7MuhRkl+v?I5e(}y9IX(8iG3n2o8_`` zGn1L`7ra-!R^8QW)j8F5Ywsibw30j=JRSfMfD8Ztr~pPsAI!h#dZVz9*-P zzPUc@yUdsVQ~{G;0ZLX8H3a2cwqxGiFYt3;4;^<>F5l6uAQ^5K-H( ztmXG*N-7HQJqz$D>>p_0Z&y4tCiAi_m_YglFLnKhcZt2*bk;78HPB8)>m72q*1s#{ zc`&~9F7W8&!>%va10n$M_y`A3`U@><)j4R+p;&tjWgQxnmii!|wIe&*&-4H2_+QMy zKRtR`{2RqC4)kEis~;i#S2N48SmN@oVy~L1)O`G8m!8x{<IcaB0v441)wYpyYwKDw+6U+o&6lcA69-FzrrrTxbP0TC2ME6@z}car#vCMKUjccckQ5)uG1 zjGHz4zj)$m2eLAOFl0IwF)tr@QhqDWIhm7>OT=8OVz~NqWFc*d6z+VWv6aHj2jgvt;7&f4^p=BM!+epq4yuoDAl%9OZE4vUD6L zAB19NsOFpB?R(XO@_?H9ddf5=)2+uwiN#CAPXX=}6KBLIvceedfQg-HD(34BsY~S3 zmLH~X+()fCa&OVM$V%m(mhg&r1v^oO^vh8AGQ~S?<)k*NL)M+d?*^IL7Ef7*&LO7X z2Kr`Jp>O){kdYsco{@nA04k9HfG5x^ppf}9SjyCO?b12$yg$`E`neT;g-6bsWO_ON z%tm#_c~L%Z07kV1U|MIPqJ9Tyrl-1JkeFkY`{BHPz2rFt7VxgOBHau|9dIK|zlKjm zJVe#2J`#N1rn9ACK%x>?L5l$~w_Z`PXch9j`|dQ)uv3-D9uH7!@8n6G28T)=Vf z^jz@I-^igut=m}m0M5!wuHKCYNVWemL0@qF&e;KnN_&Q7waL|=%Y@!DVe%6 z)#CO8%+9dmmzn3>kDS$|6kUQ4=w zH89SI5Xf=(%Tze;`MzEg;_jVd-b||H>s%@oR`~*R;&jKar>x#il)<9Jns6FOBWC?I zW6_{iENNM0E}lKq_re_#z-yl!w_91P-W+8$C&`IKS5zmr<9&TzKBGcjq#)Ad^X+!{ zpiWqC|IZxp)YMk)Vrn+%IbkC_+TL+%50;esAoCEHB;`is7X6EW$E=|EQX~?(F|-YK zH96G7CX80d&EcHlCV)qZhv`Hd1#WhG9;v_A35wbA3JyJ`_#i^cQLQoo`Eq<-;J-_g=*MV?B9uyp|iL$qBbT9LmixN#8Q}BpCQcW+ei=tw^wz?Cqff7fV1D0sN zqMGkU!{LDow~clj>Y=KR?{;NnA2M4ZGuxdUf(%$*mA)r*~Hc7o!9rn z-b1+T5-CJjq@;d_iNYjwkyYi(~MiZxNP7d3#^EkCty>VJGkmVGmU3pcRyCj zFlEbkBarq>Tb}aAD$uZrA$ad4RnU@*p!W5{t=DpZ(aH23C$4q_b=Qi>3u05{a(Qb* zEGD6H0rV8%&mb|&giksN$prh#j>S8kPqFBtJf@r>f;X?S#4}&9c=@FbVt`8yqhs}D{Euc z3Cpx59}~%H>kgDPxVhq6NO5851TIt<^YW9$?L-nMU&3L4&tp(%Yj!Cn7)HL*lyA|d zTgz#?U_{6~fC~$+GJQPGoK7aeDIPK@Xr1vN^jnoqMKU74rqE7(@U`M+6xTW9#4#cnSKpRY(PdX{c2$dq2JNUE zky9<48fJ0|J>Bq}ykf+`@iD)lyWb|PXD67@pqJc2)RM5xdgN++ZQ!Lj;dr{+VL3^O z9H{M?uMbzG(|56duiaKRSqNj!6OdF;_f>E|$u`yyL+N%VyZJ3deTZOXTwnJ~BY8d&o|BxOMj!E8abmadHp z_$Hk@(YPzV^nLfwk^M zx+!EFyI>r8g9bGw3_11d&!YYh1vKGx99(%xx&~pIq$-zgM%@Fi>wYk2&G1pOXNq_} z9Pgfmw2Anh@7#3hS2xx^lea~pQ&l~hiP3+&zb9PLZ+kf2JK@`%W;p5WZu@aFl-%}s zB6v}`v4qdw>V0~;n?ShgeX*?=L%y&`MxJs^6;^dzv2F~Zk1+JdygPw;@v)mjygU9S z_Kr)@(NI6E-aE>@PUp(Q&P6iKUW69mHmmt2ijzj#=+w{BRR~MQkuF58f@>vdL&wMV znXi`<&Kkp-!^Y#Rj@abN09V`GS#LK~=A$Zs-!Z~*!V361CRN+2BqGU6nlA7OnC44tBr02$D!u+>Nyg${KBDc>)gj;z>x?kcjH5%DsiF@*jO4xiBCt==JX zHsu2q0ZC3?^418GZ(JbX4jK9ic3Me~aHg|cW0v>w-vvW1NW|Ju(Kj*gp6Yf#kCzAS zOg2UkamR_ce2r2Pc3gNJ-{0*+k4YsJ=Nc$W=oK?BBq<_6rc}+jRF6{Ed2dSHfbh*S zEs-Jg7C1^?)e@NsrXX|*sEhzE2-khB9SP$W5kdBQpc9P7-Y@&FmZk+z1%MuvhbLNFAAh25g~7BR&%hp@vD-`pm|^((&p>}g9sIBJ!8 zrrmpSc_C(*+T!IuNDn#P$y98q+Tx~4JQ_miX%=_&hHsgOMB*q~RBmEuw1gR@zE0y! zVOdI~Vcm3~f5E`D5HpUhEdg9^&j^2gq2|U-pJkjE;>?WyX}tVS;mj>2YD&SZv8*Yr z7!4u*#*IlQN+MaVU`Ja7vQj?1@3W)ks+Ek$I6p;X_}Y|I*_lXLA5$Dp&+NV4e5&Sz zljeldUUIg!w8IuEhYdcy+t%fp6nHB{Y(Xs8lq_-E7{9P4wI#9wbu?AlBZ0d(6-LR3 zotrYoNp-%-!QIl?q693J$4V3F^d*X8k|Tg{okeKm!<|-dQ)Ug)1?M34)Vstqp0vYEEmu7)3o39CaS z69sh=6b2DF<5D~myIJ*FeUmVFCdhJZnBV$giE3}Ay=10HcffKI>UZPnps@&ECQ{B+ zpzvQ;t+0P6=~*Q&qn#(~1XD!CfZ+tU`LOdeHQAYwqQm?m0zrEtoc`&R_;Fh|t{Vn} z=5sCKlLCp+G=dRIE;%VCnVr006~hyX%oIe5^JyK~`)tA~C2*Hu#brKvo=QwtKg+khS6~HISg|!?;>@_qwX9EjN5FM(uznimdZX2%&=vmW zi}GR(Hw!Yl;`H-t8)s>Lk&_CB7yTcHwVhwG?8|+;z8zd@nK4Cj^%wTA_vJ5C*-;X= zOQvDGoh0o~d~Oi2l%N%byxJnb!20OJWR0{e_mwq~#IOxZEl?#JyRJ0Ig-B!QzEWq; z`Pq{-gE(a!%&Glb7#+iLDHn^+eRFI_n_H|SLD&%yul0usYu9f#T6N~V93a?~AMa`h zv$SknEQ3sIMHu113c2G4vw}f-3!BcC9|=?7L&J0E8xnqx$f~F)2|SPcW)!KKg@!bCK$gLA5QMpz~8El zJoQD}MGkatib`Ls^P@qRO%@buLU~m=E zOHNF$Q$5Mgds~iYJ{Gy94dib^<}@mq#($fR?(4%CKrB70tgf7mKXmM`<(1UtouKWC zb>K6UA_2&@4z3?PBzXT7G(=lV=+TGgRa_f4-7#w&x=bw1y#GX2Y=a{MN4Bw3q*MIE zBP?~XyS0m0@m928>-T_WiIAW;7Q%Ycd<{H8{jKt@y=`6!*X}3$@fHc+j^oIg|!-^IVyaE{wXh~>kUqd z&iT}@6<6kEwuR_m;y#*?kCA6Ggc+CoTs+ZH{$(#+4im_+&q{6KQ_-QZ6T>=(EP zb>51NLT;L4 zjP^tNjoV~^Drdib;lo0v8X2-wUz zzQxmZ2y0aYn1H}K<>Br`A#*Qe#s-+5CLCSBh2;)(qPl^Gl@63P?&p)&CkhHz-w2OC zb^O#fb?l83B<4UY-C-hU)?w|D3NMAzQ@U&7o`5$H>Ur8J!$7M%{X##}tJ+?Nt7AsF zG*{f1JrsWN1)f;6T?`~%uQHMJy345)fgj6Wg(07U=7_|K-+(HsbLa%BH)`K`{( zNL&nG)s8E!^WGP zkMX6gDjX>Q#W1s?)MhenC1P-j-Vd78rcOatpCYQtMQ*A0qN#UeWOj@@&w zhG7v48XVr8T@W-K4rNVTLRZJYmj>m_+d7o;4NsG}&5}JsBXC<{Ha zeBp>72p*}c(NU9AOY(fi!|4ayRW#{!O4Nc)F}yc{9pHL&q!lCH)aa+C4{lzM=>@X% z4xJUuIaHsff+*N&1}F1Q#-5R3t8zi%H{K+ONj=8-Fb#7n2Hl%l4pJ#w_`>s>8c#|PEy}YM-f}>OVqw2;%3oOle zj;05Z%M!D#dz!g3Ex%1S53egEnxdKXcCLd5o6UJ0l#a`2UYRVh$8g~1w_(>SV(L`2 z2DEl3M{6dMh{iFE5YjPiG(#aiaei2^htdqQ!E4b|pM?2MH_ zcJ_|!#&#g!?^C7!NpjGwi;UNm@8ZAW2|Kn@8d{3Yi|koJ~2=cis{JOM@g!hDU` zToHaArqMAjQ{*Xv%(CYOU;rUPJm`HYSvi;w-neg;vX~S;1a(8@ove4m0{-(v?{WM! z!+0HIluXbbQ-y!kZs?BRw=#*eC{|K{8IfFUR!i3|WzVYK6OAT)G?M)7#7@YYlwGX) zvF~O0LRX_2ki5>LoTLD4MHl@ZKhEj1%8Il&Xns ze=(;GY}hWU*HYOwGdz)p`CY#3GXWt25t?6!gy?-85NHiP)8?^&dfKD2sv>1_dEDRPn?5GYAds z?f-{C=$8F{WWX+z864(4L& zCZ^{-_M*Ody4_03E|UrBpT|!0D1~k1;32@1S`l6*xqV&M^}$DoN2xvGd`TDWw;`?X zrIJpDYXb5^kby6gluy)9$+5}iZZHt2oa|mA-gpiHT9cTqKVL}3gdHfYZuwwOiOf5l zyo<+J(5O=J(c^w9p6)KYb^K|~-uj}9IKg{Zt z!j0Qg&4{nh-hA@I-U%Vd8t|2-fo$s<7DE&zZE{rEvM KcLv4JZ~q4>MK22g literal 0 HcmV?d00001 diff --git a/tests_data/data/files.xlsx b/tests_data/data/files.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..df8456d0e79bd1c9cd0700cd1ad76cf76cefbb1a GIT binary patch literal 8703 zcmeHNgLBA*Wb;?)AXUAvlC+xCOTUI03CJz|BnCS8K_JfQSap;kw+^1 zkX`3`46l{K72FFSBxBc-={k%YFF#x+!!7^ z(56k05H{4UXLd)JyMJ((wTqet;$dWnD4@6_#YghzWD*q6 z#g?3Kta;HYL}_QvWt=A!#uWZ(7NF^vP)}9G=Q94RPm;1&b>!d!bIGlIhRWY^W-!@!tCL}Z-$`FBHbE$R&+rwr7)azO!>1Lic`|<)}wA25@rfVwO7#T@UC; zbZH|n6hk_Ke+&kJM-CWa>0q z@v%RfPseliisrY4D$u2S3ZD<<-jj?!1k>V?m(WF1J<1L<8c;J_v^=Xuo08GltBS5^ z5-#}mVmv2gCZlwlP%;*zy89`ca>&!t_DglpkQ39%Ijx?K9mu}MGS5Sl+27pSsryJS zr;Ge5h)X?pNSj%J?2K<*agep(B*ds*;Be%Xf1fa3Z^h67G(3Uf?IJ2d{hcIX5^0%7 zs2OQRk%Sh2i{|IV{V$&Qx_UTTy1F|4j9$NU1`QR$P+tDKk4kMdl|CMl4*csF9^V`v zO42W0+|0XrJH)uqdf@zhCjQ{#r914crUuh0+!(IW{)o{*AEY4BDhByU569bhGRy#S zCxjT5>*&!a2Cn5^6@9oO7CG7Ox1EYTe4>}0}*&Bw%^ zsRA<Wuzp*yb#Y$5{aib}Wedhhy+ zj`~B7qbmKe@)~qm`D^s-1?b4bwHZS7@XCwXJ^1AX=jmWr)N%2z$O*R}%a zRZ}OA380rOMt-XbFza?ItS+&}>fUb`4>+7lZmm+I>WwuFd z-71IOgF`vFbLU1NfMav3zUAO2j9k1O7d^P`*&G{A9{7kO)}z;lggniLo2vy~b5&Be zEFRkXnQXlDI#k;iL|p2^^CnE}m0G6DZ3{AOK3qlXLjrp(t=z}cSwG~HqtsJe@eig? zA{shkmYYexWDk%Bx0pCCadsfS48O-gsvmSn;5Qk+>7drTk7TqE3B{H?HYRCCh$ET# zL9dfOZ5{86QRmQYk=!cuZvKh8HS3LH<$0sAH^IaqW@5V&+z@P7k@#7LO zJGK$X5?bAnt*Wh-FuO4vj;C<2Q$r5MCdaz69!;Cre>Qxhg}#*v(7^GdX!cu)ap9rU z^1XdFd4=t56dBPU#Eb9jz8jQqGYE_kVzz)2tB>&64#U|qTrA+%t2;EGC)59WM`i7hBJtd!17%FDfLEMESB@ zM{K+j*$yS~fTK;uy zt4yWijZOveyEN@IOQn9Z zeQEh}s^IOD48>-Kf1Dm~iER59s8lME_GcaQ8?n9YY@BVle>?O3oSj`m-v{ z4Cgxp>C#?S_hLQ1A6tvrw7|N#U$XFAvNhI6F4Af@^w|er&H1FrEJWH!5W#4y`4i9F z?mD1~70a^FtMmszQS2^Wt{L|90=j#UX7zAOx4>tjtX8`OB`UtY8|@k^LZl`_iS_}E zrdGW&77t?d;+9qDKTU#U>i1+M$$iB8kdZr@eOTglZImZpGkm!-VCpV%{Zz)IE(O;f zZSW@^20G75>tL-q<3}9@b*JwDx=yPXNx>7u)7KP$q(Pofi3$6k5sj^nBnKS|c0aAQ z&1_9oV4EG8FD=6lH^#d;G&h&#yNR)x{`$?IfKSvV_9V9(|^Hq?b9bG<+ zCFagnJi}z~l-*1UuozrB zk_zKx<)Cb=WAxebntq;XJVc}EMmCydy&>mElj-reb3ICR8SSBXOLmYTItg{CN&SLG zPep}vo@AiU_k(R{uek>OBT@e|&&4JWn^{w7sA^N-c`rER8M5W*EB8`Y&=2qF6MONQ zL+;><)qdBmuHi?C&A9zXu6gZ#7whLu=GTX7RtZS;cm>fQCCiVm9M5K3pq94_q9o}{ zT%YnLlJIhBVs>&>Uo!KB&Sb>SxtTgeANbs2x56+pg9|p(%Wcb$sEmV)X94^C3T>PX z*vsX+DhUT}sULS=y}`fhOI~=-c!}w8A|{gBGX-qvNuw}Z;2%*WS3sj)SbT7ye=P zbEih;`#4oBQ2jYvHgS81UX0)##duY>HX5#u-x@n}-XJMFfKXh%gnIf|KT?hI9^3 z^2+DJY?3nZek>n0^dMmpGBT2tjaY-Ilj^f$%(*HBs6zNpuF`5RZxU9Y* z@R7j6V<}GH;+n(Wn1$I(f&wjP_Q^4ij?^4Qw`^B(0ywAq@PKmmSz_!Kql~A)GI&bJ zYru1cfBz#*jCYrV{vFGp;ikDqoU6}-pr;j6oIDz?fs*E8w*S6YN{xM+yA4Nq|VFApp^SOx%DX4@{0G1wpf6RTJ@S8+#QWC6v zPu*gPIP;LsA!Y=NyNF*z{7!;E;L4JM;U=+_Ubjcy*h5`>T$M|Pt^e30f;((c^S5jJ2n$yW!;xgxnMG5}cVCL4+ z;{9pfiT&@qvQombA}^tMr>|@m>fSiefYbyMK%X@!L5_3%$5(3KX)7$r1PyFHRW%Z} zPqa=y-YL+ms_Ubv+jmsOrEMtUk9cX_4(!#gJJNqK2YQHGuY_s5+L(8*GSzQBn_*WS zT$RhHzw=n{>|1ZlH41(E;eN-<6n@fJ;Ic?=VqmzuxleK~6uX@nDc7kG$zhucX$g=XJ$Z-)o({C4UJbv39?dN`#25K6^q}nOY}SV+59_P>LzbRxRg;xCM_0F zUyM#Khwab-Yi7z~%qCnj!X0jBv==aM3mcb2gDbq-72bS7iV}afAxIQ|@m!pW#H`;D z>H3c`jMk6fT@nTWFpLKP5dJ&Fd-*%r{1)3^>yNt3i;#43*B}@Vs<$3MW`tNMz3I@f z=8EiUyUpz5YU92S7HsJ*7Ja3^AbJ+_XulAV(heh1hU(9~_O6o{TDJ@CJbxOkds?~n zv5l#`M0=f06hBo_j@6NU8gy~gdi-X{QIJaTsdft8o^nDSW!FtcL%{gZR5Zz67(Q&% zuK2`<9@DvjiLS}kLj|+l{`IOsuYOg8PX%tZu~nZTNz4k%a@MVN@sIxA@#M)JDYxF;mf9=A1e;N->F#=x*QO^)V048!(Hf;9 zR>r~&Lo13O9Q+W_(4vhz?0}1%$Z04S3=s-$$jVPE6&k;Th!uKz5lInZ9r?<1QkVE+ zQSLXyS6uIxzRP4ZM@}_z2v8QgH<79&@kafb|Bc`Ior(#8d^0s`zT*P4f{haj>@I);@Cu&`c#P8#wiZw=psK-TF#MO>Ld-XYx?Zsqof%^MohWF9BrcMUS zdl`G9yjDacO~$iCkM{HVTd8tI(iwQp`rMvQsuR0#puwNFu9|7k-P*|h3{-qKsx}ZE z88$wZ%%|2LcHv6s(gnZvM4s<|-kWI~Z@LLcnGCB?AN8d$+KUiF@12N|ccAy#pl5=o zY$9P}*F9o@>Ncy(=*=o2aGvw7>@7geeZocFXV3hyi`OnF%eIp--kR+YcBIa=4#C{7 zKIJmP`uZ4O1%LT4)Z5jhL(y&i@H&f&dt^30I-x8-_r+v>NkU2fed>gb+FWULY6eAW z#7(o$o-g`Gg|v^x*qL?TUhMUl-?ahJI@1M>Fm%&f<@dih@TZLm_NP|}=IhZ35d~>n z@g=Qpuo#+@+K7J0lQ;|@ieU4g?m){Q6Ep!O#uw_#Mh$4QkvWB=$noehiv&kf-u$Bu zQDNt9+JY)7WpMz2+yCP2&!W=P&c?>ell!;fw*)j-59%_Hs=leWBuUy8+c=GD=w7=g zZfX{61(X7ZnVI9*mAR!;Ll7yo6x{Y}11a+Hx&8=3TZNZqh;O%?vvf@aXio1E-<#G< zJ$jnHf>@#gTP3F5S5s$))euPXMZL2{EU7igNW6vcCIJeyXdX7W=*;NVTMKF>+Zker zMWqPB4%ooe4y^ZDyvsR+586|W$rqQ2n9?(!#1<{5jbfS3RKo7YT2sOb%+uTT9vP`| zhgHM{88m@)C%L3J{rgXfvHIVZZeNv?LO3~M1HURp7QKq(2o0J2JbS`l`cem&##WCp zRjjtcK)=;=ix7`0DEJh~}x;bALoi*;O1(jggawpfPXi-g%=>mWUc zf}~trbis7U=5xKw#bj0E0V0R2JQ=I z+`(N7(M~?i<2lVMsbVKV(+Jt*65M0$jA~-%SWF}*bX}qEEOIQ z>UFD65yGOZ%l6mt#jue=QZbBi{jf@y_e@xxwY)|jz#Cm?QT8LK-Qr}-jF_~mabr4W z3f5W790aT&r?#Q?fgs4J1F9U4L_~E)z!oynQY01JR#cEr4nUP)OcX&h%ueHTqN}=8Sh151TxxUw-1fNSJ`cg zao?e$nYd)Zs<)qd2_uh$Z}66QCfpN_ZpBXmqXG7Z3M@5#186^aD;0K1^f_`>@*PeE zd=%#upGWzpeAU@)I^T~OMbtP*SbD^J`pRteF<;*#*;JN-PLo!y&H}YjWtS zf_ad34js9*M<|veW~H1;n!a|Cb0L586Hv{kJ~+*)5W>cZ)^7t(=>dupNc5S!uJaCa|RckIg?=LK0dxLMjrbf zdww#pK|{$zK_gmLX4bpNwh-w5b% zi04pUWIoJQ)N@SOEL-E^LEzms>kp|>Iw!qRwn0j5evY_iGyW2~^V8OE&#}~ZBnT%T zx$<2ySueb-iMg0M>f#ZMJ9!2H9w3^idWd0_m^jf87HAY+g?HC-Z-Ah?*)`sMQ4kZlOM* Ms90_PQ#l3vAB21legFUf literal 0 HcmV?d00001 diff --git a/tests_data/test_configuration_files.py b/tests_data/test_configuration_files.py new file mode 100644 index 0000000..e69de29 diff --git a/uploader/__main__.py b/uploader/__main__.py index 7250e68..d6fce4d 100644 --- a/uploader/__main__.py +++ b/uploader/__main__.py @@ -59,7 +59,7 @@ def main(): # for info in files_info: -# db.rewrite_data(info[TABLE_NAME], info[COLUMNS_INFO], info[ROWS]) +# postgres.rewrite_data(info[TABLE_NAME], info[COLUMNS_INFO], info[ROWS]) # exit(0) diff --git a/uploader/base/__init__.py b/uploader/base/__init__.py new file mode 100644 index 0000000..2eae997 --- /dev/null +++ b/uploader/base/__init__.py @@ -0,0 +1,3 @@ +from .idata import * +from .ireader import * +from .iwriter import * diff --git a/uploader/base/idata.py b/uploader/base/idata.py new file mode 100644 index 0000000..9fcce4e --- /dev/null +++ b/uploader/base/idata.py @@ -0,0 +1,27 @@ +import abc +from typing import List +from .itype_recognizer import ITypeRecognizer + +NULL = 'NULL' + + +class IData(metaclass=abc.ABCMeta): + @abc.abstractmethod + def to_dict(self) -> List[dict]: + pass + + @abc.abstractmethod + def columns(self) -> list: + pass + + @abc.abstractmethod + def set_type_recognizer(self, type_recognizer: ITypeRecognizer): + pass + + @abc.abstractmethod + def types(self) -> dict: + pass + + @abc.abstractmethod + def rows(self) -> List[list]: + pass diff --git a/uploader/base/ireader.py b/uploader/base/ireader.py new file mode 100644 index 0000000..e9fcc18 --- /dev/null +++ b/uploader/base/ireader.py @@ -0,0 +1,10 @@ +import abc + +from uploader.base import IData + + +class IReader(metaclass=abc.ABCMeta): + @abc.abstractmethod + def read(self, path: str, top_offset: int, bottom_offset: int, left_offset: int, + right_offset: int) -> IData: + pass diff --git a/uploader/base/irepresenter.py b/uploader/base/irepresenter.py new file mode 100644 index 0000000..e2e5997 --- /dev/null +++ b/uploader/base/irepresenter.py @@ -0,0 +1,5 @@ +import abc + + +class IRepresenter(metaclass=abc.ABCMeta): + pass diff --git a/uploader/base/itype_recognizer.py b/uploader/base/itype_recognizer.py new file mode 100644 index 0000000..109f4f4 --- /dev/null +++ b/uploader/base/itype_recognizer.py @@ -0,0 +1,15 @@ +import abc + + +class ITypeRecognizer(metaclass=abc.ABCMeta): + @abc.abstractmethod + def type(self, value) -> type: + pass + + @abc.abstractmethod + def default_type(self) -> type: + pass + + @abc.abstractmethod + def convert(self, value): + pass diff --git a/uploader/base/iwriter.py b/uploader/base/iwriter.py new file mode 100644 index 0000000..91fe859 --- /dev/null +++ b/uploader/base/iwriter.py @@ -0,0 +1,8 @@ +import abc +from uploader.base.idata import IData + + +class IWriter(metaclass=abc.ABCMeta): + @abc.abstractmethod + def write(self, data: IData, mapping, append: bool): + pass diff --git a/uploader/core/__init__.py b/uploader/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/uploader/core/common/__init__.py b/uploader/core/common/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/uploader/core/common/config.py b/uploader/core/common/config.py new file mode 100644 index 0000000..cda1f02 --- /dev/null +++ b/uploader/core/common/config.py @@ -0,0 +1,6 @@ +from uploader.base import IData + + +class Config: + def __init__(self, data: IData): + pass diff --git a/uploader/core/common/file_data.py b/uploader/core/common/file_data.py new file mode 100644 index 0000000..4802545 --- /dev/null +++ b/uploader/core/common/file_data.py @@ -0,0 +1,43 @@ +from typing import List, Type + +from uploader.base import IData, NULL, ITypeRecognizer + + +class FileData(IData): + def __init__(self, data: List[dict]): + self._data = data + self._items = [] + self._columns = [] + self._types = {} + self._type_recognizer: ITypeRecognizer = None + + def to_dict(self) -> List[dict]: + return self._data + + def columns(self) -> list: + if len(self._data) > 0 and len(self._columns) != len(self._data[0].keys()): + self._columns = list(self._data[0].keys()) + return self._columns + + def set_type_recognizer(self, type_recognizer: ITypeRecognizer): + self._type_recognizer = type_recognizer + + def types(self) -> dict: + if not self._type_recognizer: + raise ReferenceError("type recognizer is not set") + if not bool(self._types) and len(self._data) != 0: + for key in self.columns(): + for row in self._data: + value = row[key] + if value != NULL: + self._types[key] = self._type_recognizer.type(value) + break + if key not in self._types: + self._types[key] = self._type_recognizer.default_type() + return self._types + + def rows(self) -> List[list]: + if len(self._items) != len(self._data): + for row in self._data: + self._items.append(list(row.values())) + return self._items diff --git a/uploader/core/common/type_recognizer.py b/uploader/core/common/type_recognizer.py new file mode 100644 index 0000000..afa9a96 --- /dev/null +++ b/uploader/core/common/type_recognizer.py @@ -0,0 +1,37 @@ +import datetime as dt + +from uploader.base import ITypeRecognizer + +TYPE_CONVERTERS = { + int: int, + float: float, + dt.time: lambda value: value if type(value) == dt.time else dt.datetime.strptime(value, '%H:%M:%S').time(), + dt.date: lambda value: value if type(value) == dt.date else dt.datetime.strptime(value, '%d.%m.%Y').date(), + dt.datetime: lambda value: value if type(value) == dt.datetime else dt.datetime.strptime(value, + '%d.%m.%Y %H:%M:%S'), + str: str, +} + + +class TypeRecognizer(ITypeRecognizer): + def type(self, value) -> type: + global TYPE_CONVERTERS + for converter_type, converter in TYPE_CONVERTERS.items(): + try: + converter(value) + return converter_type + except Exception: + continue + return self.default_type() + + def default_type(self): + return str + + def convert(self, value): + global TYPE_CONVERTERS + for _, converter in TYPE_CONVERTERS.items(): + try: + return converter(value) + except Exception: + continue + return self.default_type()(value) diff --git a/uploader/core/excel/__init__.py b/uploader/core/excel/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/uploader/core/excel/reader.py b/uploader/core/excel/reader.py new file mode 100644 index 0000000..bcc7eb4 --- /dev/null +++ b/uploader/core/excel/reader.py @@ -0,0 +1,58 @@ +import logging + +import pandas as pd +import xlrd + +from typing import List + +from xlrd import XLRDError + +from uploader.base import IReader, IData, NULL +from uploader.core.common.file_data import FileData + + +class ExcelReader(IReader): + def read(self, path: str, top_offset: int = 0, bottom_offset: int = 0, left_offset: int = 0, + right_offset: int = 0) -> IData: + if not self.is_excel_file(path): + # TODO: log it, raise exception + pass + return FileData(self.__excel_to_list_of_dicts(path)) + + @staticmethod + def __get_cols_indexes_to_skip(df: pd.DataFrame) -> list: + (_, row_values) = next(df.iterrows()) + cols_number_to_skip = 0 + for cols_number_to_skip, value in enumerate(row_values): + if not pd.isna(value): + break + if cols_number_to_skip == len(row_values): + error_message = 'Cannot handle file. Probably, it is empty' + logging.error(error_message) + raise ValueError(error_message) + return list(range(0, cols_number_to_skip)) + + def __excel_to_data_frame(self, file_path) -> pd.DataFrame: + df = pd.read_excel(file_path, header=None) + df.dropna(how='all', inplace=True) + # shift table if data are not placed in the first row/column + cols_indexes_to_skip = self.__get_cols_indexes_to_skip(df) + df.drop(df.columns[cols_indexes_to_skip], axis=1, inplace=True) + # first row as columns names + df.fillna(NULL, inplace=True) + df.rename(columns=df.iloc[0], inplace=True) + df.drop(df.index[0], inplace=True) + return df + + @staticmethod + def is_excel_file(file_path: str) -> bool: + try: + xlrd.open_workbook(file_path).release_resources() + return True + except XLRDError: + return False + except Exception: + return False + + def __excel_to_list_of_dicts(self, file_path: str) -> List[dict]: + return self.__excel_to_data_frame(file_path).to_dict('records') diff --git a/uploader/core/postgres/__init__.py b/uploader/core/postgres/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/uploader/core/postgres/sql_executor.py b/uploader/core/postgres/sql_executor.py new file mode 100644 index 0000000..9cc6185 --- /dev/null +++ b/uploader/core/postgres/sql_executor.py @@ -0,0 +1,33 @@ +import logging +import psycopg2 as pg +import sys +from typing import List + +from uploader.database.database_settings import DatabaseSettings + + +class SqlExecutor: + def __init__(self, settings: DatabaseSettings): + self._settings = settings + + def execute(self, command: str) -> List[tuple]: + connection = None + cursor = None + try: + connection = pg.connect(**dict(self._settings)) + cursor = connection.cursor() + cursor.execute(command) + data = [] + if cursor.statusmessage.startswith('SELECT '): + data = cursor.fetchall() + connection.commit() + return data + except Exception as e: + logging.error( + "Something goes wrong during SQL script execution: {}".format(str(e))) + sys.exit(1) + finally: + if cursor: + cursor.close() + if connection: + connection.close() diff --git a/uploader/core/postgres/sql_representer.py b/uploader/core/postgres/sql_representer.py new file mode 100644 index 0000000..86139b1 --- /dev/null +++ b/uploader/core/postgres/sql_representer.py @@ -0,0 +1,16 @@ +from uploader.base import IData +from uploader.base.irepresenter import IRepresenter + + +class SqlRepresenter(IRepresenter): + def __init__(self, data: IData): + self._table: str = None + self._values: str = None + self._data = data + + def scheme(self) -> dict: + d = {'create': '', 'drop': 'mapping'} + return d + + def data(self) -> dict: + pass diff --git a/uploader/core/postgres/writer.py b/uploader/core/postgres/writer.py new file mode 100644 index 0000000..55db11c --- /dev/null +++ b/uploader/core/postgres/writer.py @@ -0,0 +1,33 @@ +from uploader.core.common.type_recognizer import TypeRecognizer +from uploader.base import IWriter, IData, ITypeRecognizer +from uploader.core.postgres.sql_executor import SqlExecutor +from uploader.core.postgres.sql_representer import SqlRepresenter + + +class Writer(IWriter): + def __init__(self, settings=None, type_recognizer: ITypeRecognizer = TypeRecognizer()): + # FIXME: inject type recognizer + self._type_recognizer = type_recognizer + self._executor = SqlExecutor(settings) + + def write(self, data: IData, mapping: dict, append: bool = False, drop_if_exists: bool = False): + data.set_type_recognizer(self._type_recognizer) + representer = SqlRepresenter(data) + self._executor.execute(representer.scheme()['create']) + self._executor.execute(representer.data()['query']) + + def dd(self, drop_if_exists: bool = False): + if drop_if_exists: + command_to_drop_table = 'DROP TABLE IF EXISTS {}.{};'.format(self._settings.schema, table_name) + self.execute(command_to_drop_table) + self.create_table(table_name, columns, data) + else: + command = "SELECT exists(SELECT 1 FROM information_schema.tables WHERE " \ + "table_schema = '{}' AND table_name = '{}')".format(self._settings.schema, table_name) + result = self.execute(command) + print(table_name, result) + if len(result) > 0 and len(result[0]) > 0 and result[0][0]: + self.execute('TRUNCATE {}.{}'.format(self._settings.schema, table_name)) + self.__insert_rows(table_name, columns, data) + else: + self.create_table(table_name, columns, data) \ No newline at end of file diff --git a/uploader/database/database.py b/uploader/database/database.py index 7145a8d..c58ce87 100644 --- a/uploader/database/database.py +++ b/uploader/database/database.py @@ -81,7 +81,8 @@ def rewrite_data(self, table_name: str, columns: dict, data: list, drop_if_exist command = "SELECT exists(SELECT 1 FROM information_schema.tables WHERE " \ "table_schema = '{}' AND table_name = '{}')".format(self._settings.schema, table_name) result = self.execute(command) - if len(result) > 0 and result[0]: + print(table_name, result) + if len(result) > 0 and len(result[0]) > 0 and result[0][0]: self.execute('TRUNCATE {}.{}'.format(self._settings.schema, table_name)) self.__insert_rows(table_name, columns, data) else: diff --git a/uploader/database/row.py b/uploader/database/row.py new file mode 100644 index 0000000..353e959 --- /dev/null +++ b/uploader/database/row.py @@ -0,0 +1,6 @@ +class Row(object): + pass + + +class RowsContainer(object): + pass \ No newline at end of file diff --git a/uploader/database/table.py b/uploader/database/table.py new file mode 100644 index 0000000..803a050 --- /dev/null +++ b/uploader/database/table.py @@ -0,0 +1,20 @@ +from uploader.database_utils import py_type_to_pg_type, py_value_to_pg_value +from uploader.database import Database + + +class Table(object): + def __init__(self, db: Database, name: str, columns: dict): + self._columns = columns + self._name = name + self._db = db + + def to_sql(self): + columns_definition_list = [] + for column_name, column_type in self._columns.items(): + column_definition = '{} {}'.format( + column_name, py_type_to_pg_type(column_type)) + columns_definition_list.append(column_definition) + columns_definition = ', '.join(columns_definition_list) + command = 'create table {}.{} ({})'.format( + self._db._settings.schema, self._name, columns_definition) + return command diff --git a/uploader/main.py b/uploader/main.py new file mode 100644 index 0000000..f9bd2c6 --- /dev/null +++ b/uploader/main.py @@ -0,0 +1,9 @@ +# from uploader.core.common.type_recognizer import TypeRecognizer +# from uploader.core.excel.reader import ExcelReader +# +# +# def main(): +# reader = ExcelReader() +# type_recognizer = TypeRecognizer() +# data = reader.read("path") +# data.set_type_recognizer(type_recognizer) From 8bb12950897e3b5e683e9bf95ddf24117e2ef093 Mon Sep 17 00:00:00 2001 From: Umedzhon Abdumuminov Date: Tue, 15 Oct 2019 03:00:03 +0300 Subject: [PATCH 2/2] Some new stuff --- .github/workflows/ci.yml | 96 ++++---- .gitignore | 208 +++++++++--------- .vscode/settings.json | 9 + LICENSE | 42 ++-- README.md | 24 +- docker/dbeaver.Dockerfile | 36 +-- docker/docker-compose.yml | 42 ++-- requirements.txt | 8 +- .../common/test_type_recognizer.py | 32 +-- tests/test_database_utils.py | 20 +- uploader/__main__.py | 134 +++++------ uploader/base/__init__.py | 6 +- uploader/base/idata.py | 54 ++--- uploader/base/ireader.py | 20 +- uploader/base/irepresenter.py | 10 +- uploader/base/itype_recognizer.py | 30 +-- uploader/base/iwriter.py | 16 +- uploader/config_reader.py | 110 ++++----- uploader/core/common/config.py | 12 +- uploader/core/common/file_data.py | 86 ++++---- uploader/core/common/type_recognizer.py | 74 +++---- uploader/core/excel/reader.py | 116 +++++----- uploader/core/postgres/sql_executor.py | 66 +++--- uploader/core/postgres/sql_representer.py | 32 +-- uploader/core/postgres/writer.py | 64 +++--- uploader/database/database.py | 178 +++++++-------- uploader/database/database_settings.py | 24 +- uploader/database/database_utils.py | 148 ++++++------- uploader/database/row.py | 10 +- uploader/database/table.py | 40 ++-- uploader/database_new/database.py | 48 ++-- uploader/database_new/row.py | 4 +- uploader/database_new/table.py | 40 ++-- uploader/excel_helper.py | 194 ++++++++-------- uploader/file_uploader.py | 72 +++--- uploader/main.py | 18 +- uploader/utils.py | 62 +++--- 37 files changed, 1097 insertions(+), 1088 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bc11ef0..8dd8ffd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,48 +1,48 @@ -name: build - -on: - push: - branches: - - master - - release/* - pull_request: - branches: - - master - -jobs: - build: - - runs-on: ubuntu-latest - strategy: - max-parallel: 4 - matrix: - python-version: [3.7] - - steps: - - uses: actions/checkout@v1 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v1 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - - name: Lint with flake8 - run: | - pip install flake8 - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with pytest - run: | - pip install pytest pytest-cov - python -m pytest --cov=./ --cov-report=xml - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v1.0.2 - with: - token: ${{secrets.CODECOV_TOKEN}} - file: ./coverage.xml - flags: unittests - name: codecov-umbrella +name: build + +on: + push: + branches: + - master + - release/* + pull_request: + branches: + - master + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + max-parallel: 4 + matrix: + python-version: [3.7] + + steps: + - uses: actions/checkout@v1 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Lint with flake8 + run: | + pip install flake8 + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with pytest + run: | + pip install pytest pytest-cov + python -m pytest --cov=./ --cov-report=xml + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v1.0.2 + with: + token: ${{secrets.CODECOV_TOKEN}} + file: ./coverage.xml + flags: unittests + name: codecov-umbrella diff --git a/.gitignore b/.gitignore index 894a44c..60fbcad 100644 --- a/.gitignore +++ b/.gitignore @@ -1,104 +1,104 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# pyenv -.python-version - -# celery beat schedule file -celerybeat-schedule - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..cd5d5f7 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,9 @@ +{ + "python.pythonPath": "venv\\Scripts\\python.exe", + "python.testing.pytestArgs": [ + "tests" + ], + "python.testing.unittestEnabled": false, + "python.testing.nosetestsEnabled": false, + "python.testing.pytestEnabled": true +} \ No newline at end of file diff --git a/LICENSE b/LICENSE index 35ce4ed..a63ed44 100644 --- a/LICENSE +++ b/LICENSE @@ -1,21 +1,21 @@ -MIT License - -Copyright (c) 2019 Umedzhon Abdumuminov - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +MIT License + +Copyright (c) 2019 Umedzhon Abdumuminov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 570e40c..c37d27f 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ -# xl2pgtable - -[![GitHub Actions status](https://github.com/umed/xl2pgtable/workflows/build/badge.svg)](https://github.com/umed/xl2pgtable/actions) -[![codecov](https://codecov.io/gh/umed/xl2pgtable/branch/master/graph/badge.svg)](https://codecov.io/gh/umed/xl2pgtable) - -It's an python library to upload directory of Excel files into PostgreSQL database. - -# How to run - -``` -python -m xl2pgtable/uploader list: -# if not os.path.exists(dir_name): -# logging.error('"{}" does not exist'.format(dir_name)) -# exit(1) -# if not exclude: -# exclude = [] -# files_info = [] -# files = get_excel_files_in_dir(dir_name, exclude) -# for file_path in files: -# try: -# files_info.append(create_file_info(file_path)) -# except ValueError: -# print("Error happened during '{}' reading. Will be skipped.".format(file_path)) -# return files_info - - -# def create_argparse(): -# argparse. - - -def main(): - if len(sys.argv) == 2: - files_path = sys.argv[1] - tables = [{'Link': f, 'Table name': create_table_name(f), 'mappings': None} - for f in get_excel_files_in_dir(files_path, [])] - elif len(sys.argv) == 3: - files_config_path = sys.argv[1] - mappings_path = sys.argv[2] - tables = read_config(files_config_path) - apply_column_mappings(mappings_path, tables) - else: - files_path = PATH_TO_FOLDER_WITH_EXCEL_FILES - tables = [{'Link': f, 'Table name': create_table_name(f), 'mappings': None} - for f in get_excel_files_in_dir(files_path, [])] - - settings = DatabaseSettings() - db = Database(settings) - file_uploader = FileUploader(db) - for table in tables: - file_uploader.upload(table['Link'], table['Table name'], table['mappings']) - sys.exit(0) - - -# for info in files_info: -# postgres.rewrite_data(info[TABLE_NAME], info[COLUMNS_INFO], info[ROWS]) -# exit(0) - - -if __name__ == "__main__": - main() +from uploader.utils import create_table_name +import logging +import os +import sys + +from uploader.database.database import Database +from uploader.database.database_settings import DatabaseSettings +from uploader.excel_helper import get_excel_files_in_dir +from uploader.file_uploader import FileUploader +from uploader.config_reader import read_config, apply_column_mappings +import argparse + +# REPLACE PATH BY YOUR PATH TO EXCEL FILES +# use double slashes on windows +PATH_TO_FOLDER_WITH_EXCEL_FILES = 'C:\\Users\\uabdumum\\Desktop\\projects\\test_data' + + +# def create_files_info(dir_name: str, exclude: list = None) -> list: +# if not os.path.exists(dir_name): +# logging.error('"{}" does not exist'.format(dir_name)) +# exit(1) +# if not exclude: +# exclude = [] +# files_info = [] +# files = get_excel_files_in_dir(dir_name, exclude) +# for file_path in files: +# try: +# files_info.append(create_file_info(file_path)) +# except ValueError: +# print("Error happened during '{}' reading. Will be skipped.".format(file_path)) +# return files_info + + +# def create_argparse(): +# argparse. + + +def main(): + if len(sys.argv) == 2: + files_path = sys.argv[1] + tables = [{'Link': f, 'Table name': create_table_name(f), 'mappings': None} + for f in get_excel_files_in_dir(files_path, [])] + elif len(sys.argv) == 3: + files_config_path = sys.argv[1] + mappings_path = sys.argv[2] + tables = read_config(files_config_path) + apply_column_mappings(mappings_path, tables) + else: + files_path = PATH_TO_FOLDER_WITH_EXCEL_FILES + tables = [{'Link': f, 'Table name': create_table_name(f), 'mappings': None} + for f in get_excel_files_in_dir(files_path, [])] + + settings = DatabaseSettings() + db = Database(settings) + file_uploader = FileUploader(db) + for table in tables: + file_uploader.upload(table['Link'], table['Table name'], table['mappings']) + sys.exit(0) + + +# for info in files_info: +# postgres.rewrite_data(info[TABLE_NAME], info[COLUMNS_INFO], info[ROWS]) +# exit(0) + + +if __name__ == "__main__": + main() diff --git a/uploader/base/__init__.py b/uploader/base/__init__.py index 2eae997..5e50570 100644 --- a/uploader/base/__init__.py +++ b/uploader/base/__init__.py @@ -1,3 +1,3 @@ -from .idata import * -from .ireader import * -from .iwriter import * +from .idata import * +from .ireader import * +from .iwriter import * diff --git a/uploader/base/idata.py b/uploader/base/idata.py index 9fcce4e..e1b81b5 100644 --- a/uploader/base/idata.py +++ b/uploader/base/idata.py @@ -1,27 +1,27 @@ -import abc -from typing import List -from .itype_recognizer import ITypeRecognizer - -NULL = 'NULL' - - -class IData(metaclass=abc.ABCMeta): - @abc.abstractmethod - def to_dict(self) -> List[dict]: - pass - - @abc.abstractmethod - def columns(self) -> list: - pass - - @abc.abstractmethod - def set_type_recognizer(self, type_recognizer: ITypeRecognizer): - pass - - @abc.abstractmethod - def types(self) -> dict: - pass - - @abc.abstractmethod - def rows(self) -> List[list]: - pass +import abc +from typing import List +from .itype_recognizer import ITypeRecognizer + +NULL = 'NULL' + + +class IData(metaclass=abc.ABCMeta): + @abc.abstractmethod + def to_dict(self) -> List[dict]: + pass + + @abc.abstractmethod + def columns(self) -> list: + pass + + @abc.abstractmethod + def set_type_recognizer(self, type_recognizer: ITypeRecognizer): + pass + + @abc.abstractmethod + def types(self) -> dict: + pass + + @abc.abstractmethod + def rows(self) -> List[list]: + pass diff --git a/uploader/base/ireader.py b/uploader/base/ireader.py index e9fcc18..3fbaaa7 100644 --- a/uploader/base/ireader.py +++ b/uploader/base/ireader.py @@ -1,10 +1,10 @@ -import abc - -from uploader.base import IData - - -class IReader(metaclass=abc.ABCMeta): - @abc.abstractmethod - def read(self, path: str, top_offset: int, bottom_offset: int, left_offset: int, - right_offset: int) -> IData: - pass +import abc + +from uploader.base import IData + + +class IReader(metaclass=abc.ABCMeta): + @abc.abstractmethod + def read(self, path: str, top_offset: int, bottom_offset: int, left_offset: int, + right_offset: int) -> IData: + pass diff --git a/uploader/base/irepresenter.py b/uploader/base/irepresenter.py index e2e5997..db19f4d 100644 --- a/uploader/base/irepresenter.py +++ b/uploader/base/irepresenter.py @@ -1,5 +1,5 @@ -import abc - - -class IRepresenter(metaclass=abc.ABCMeta): - pass +import abc + + +class IRepresenter(metaclass=abc.ABCMeta): + pass diff --git a/uploader/base/itype_recognizer.py b/uploader/base/itype_recognizer.py index 109f4f4..d0efa8b 100644 --- a/uploader/base/itype_recognizer.py +++ b/uploader/base/itype_recognizer.py @@ -1,15 +1,15 @@ -import abc - - -class ITypeRecognizer(metaclass=abc.ABCMeta): - @abc.abstractmethod - def type(self, value) -> type: - pass - - @abc.abstractmethod - def default_type(self) -> type: - pass - - @abc.abstractmethod - def convert(self, value): - pass +import abc + + +class ITypeRecognizer(metaclass=abc.ABCMeta): + @abc.abstractmethod + def type(self, value) -> type: + pass + + @abc.abstractmethod + def default_type(self) -> type: + pass + + @abc.abstractmethod + def convert(self, value): + pass diff --git a/uploader/base/iwriter.py b/uploader/base/iwriter.py index 91fe859..ed75899 100644 --- a/uploader/base/iwriter.py +++ b/uploader/base/iwriter.py @@ -1,8 +1,8 @@ -import abc -from uploader.base.idata import IData - - -class IWriter(metaclass=abc.ABCMeta): - @abc.abstractmethod - def write(self, data: IData, mapping, append: bool): - pass +import abc +from uploader.base.idata import IData + + +class IWriter(metaclass=abc.ABCMeta): + @abc.abstractmethod + def write(self, data: IData, mapping, append: bool): + pass diff --git a/uploader/config_reader.py b/uploader/config_reader.py index f873a5a..d836d27 100644 --- a/uploader/config_reader.py +++ b/uploader/config_reader.py @@ -1,55 +1,55 @@ -import logging - -from uploader.excel_helper import is_excel_file, excel_to_list_of_dicts -from uploader.utils import create_table_name -from uploader import utils -from typing import List -import os - -COLUMNS_LIST = ["Link", "Table name", "Department name"] - - -def __absolute_path(base: str, file_path: str): - if os.path.isabs(file_path): - return file_path - return os.path.join(base, file_path) - - -def read_config(file_path: str) -> list: - configs = __read_excel(file_path, COLUMNS_LIST) - file_dir = os.path.dirname(file_path) - for config in configs: - table_name = config.get('Table name', None) - config['Link'] = __absolute_path(file_dir, config['Link']) - if not table_name or table_name == utils.NULL: - config['Table name'] = create_table_name(config['Link'], config['Department name']) - return configs - - -def __read_excel(file_path, columns_names_to_check: list) -> List[dict]: - if not is_excel_file(file_path): - raise FileExistsError('"{}" is not excel file'.format(file_path)) - configs = excel_to_list_of_dicts(file_path) - if not configs or len(configs) == 0: - raise ValueError('Config file is empty or could not parse it') - if not all(item in configs[0] for item in columns_names_to_check): - raise ValueError('File format is invalid') - return configs - - -def __get_table_columns(column_mappings: List[dict], table_name: str): - table_mappings = [] - for mapping in column_mappings: - if mapping.get('Table name', None) == table_name: - table_mappings.append(mapping) - if len(table_mappings) == 0: - logging.error('There is no columns mapping of {} table'.format(table_name)) - return table_mappings - - -def apply_column_mappings(file_path: str, configs: List[dict]): - column_mappings = __read_excel(file_path, []) - for config in configs: - table_name = config.get('Table name', None) - if table_name: - config['mappings'] = __get_table_columns(column_mappings, table_name) +import logging + +from uploader.excel_helper import is_excel_file, excel_to_list_of_dicts +from uploader.utils import create_table_name +from uploader import utils +from typing import List +import os + +COLUMNS_LIST = ["Link", "Table name", "Department name"] + + +def __absolute_path(base: str, file_path: str): + if os.path.isabs(file_path): + return file_path + return os.path.join(base, file_path) + + +def read_config(file_path: str) -> list: + configs = __read_excel(file_path, COLUMNS_LIST) + file_dir = os.path.dirname(file_path) + for config in configs: + table_name = config.get('Table name', None) + config['Link'] = __absolute_path(file_dir, config['Link']) + if not table_name or table_name == utils.NULL: + config['Table name'] = create_table_name(config['Link'], config['Department name']) + return configs + + +def __read_excel(file_path, columns_names_to_check: list) -> List[dict]: + if not is_excel_file(file_path): + raise FileExistsError('"{}" is not excel file'.format(file_path)) + configs = excel_to_list_of_dicts(file_path) + if not configs or len(configs) == 0: + raise ValueError('Config file is empty or could not parse it') + if not all(item in configs[0] for item in columns_names_to_check): + raise ValueError('File format is invalid') + return configs + + +def __get_table_columns(column_mappings: List[dict], table_name: str): + table_mappings = [] + for mapping in column_mappings: + if mapping.get('Table name', None) == table_name: + table_mappings.append(mapping) + if len(table_mappings) == 0: + logging.error('There is no columns mapping of {} table'.format(table_name)) + return table_mappings + + +def apply_column_mappings(file_path: str, configs: List[dict]): + column_mappings = __read_excel(file_path, []) + for config in configs: + table_name = config.get('Table name', None) + if table_name: + config['mappings'] = __get_table_columns(column_mappings, table_name) diff --git a/uploader/core/common/config.py b/uploader/core/common/config.py index cda1f02..845655b 100644 --- a/uploader/core/common/config.py +++ b/uploader/core/common/config.py @@ -1,6 +1,6 @@ -from uploader.base import IData - - -class Config: - def __init__(self, data: IData): - pass +from uploader.base import IData + + +class Config: + def __init__(self, data: IData): + pass diff --git a/uploader/core/common/file_data.py b/uploader/core/common/file_data.py index 4802545..c312c2c 100644 --- a/uploader/core/common/file_data.py +++ b/uploader/core/common/file_data.py @@ -1,43 +1,43 @@ -from typing import List, Type - -from uploader.base import IData, NULL, ITypeRecognizer - - -class FileData(IData): - def __init__(self, data: List[dict]): - self._data = data - self._items = [] - self._columns = [] - self._types = {} - self._type_recognizer: ITypeRecognizer = None - - def to_dict(self) -> List[dict]: - return self._data - - def columns(self) -> list: - if len(self._data) > 0 and len(self._columns) != len(self._data[0].keys()): - self._columns = list(self._data[0].keys()) - return self._columns - - def set_type_recognizer(self, type_recognizer: ITypeRecognizer): - self._type_recognizer = type_recognizer - - def types(self) -> dict: - if not self._type_recognizer: - raise ReferenceError("type recognizer is not set") - if not bool(self._types) and len(self._data) != 0: - for key in self.columns(): - for row in self._data: - value = row[key] - if value != NULL: - self._types[key] = self._type_recognizer.type(value) - break - if key not in self._types: - self._types[key] = self._type_recognizer.default_type() - return self._types - - def rows(self) -> List[list]: - if len(self._items) != len(self._data): - for row in self._data: - self._items.append(list(row.values())) - return self._items +from typing import List, Type + +from uploader.base import IData, NULL, ITypeRecognizer + + +class FileData(IData): + def __init__(self, data: List[dict]): + self._data = data + self._items = [] + self._columns = [] + self._types = {} + self._type_recognizer: ITypeRecognizer = None + + def to_dict(self) -> List[dict]: + return self._data + + def columns(self) -> list: + if len(self._data) > 0 and len(self._columns) != len(self._data[0].keys()): + self._columns = list(self._data[0].keys()) + return self._columns + + def set_type_recognizer(self, type_recognizer: ITypeRecognizer): + self._type_recognizer = type_recognizer + + def types(self) -> dict: + if not self._type_recognizer: + raise ReferenceError("type recognizer is not set") + if not bool(self._types) and len(self._data) != 0: + for key in self.columns(): + for row in self._data: + value = row[key] + if value != NULL: + self._types[key] = self._type_recognizer.type(value) + break + if key not in self._types: + self._types[key] = self._type_recognizer.default_type() + return self._types + + def rows(self) -> List[list]: + if len(self._items) != len(self._data): + for row in self._data: + self._items.append(list(row.values())) + return self._items diff --git a/uploader/core/common/type_recognizer.py b/uploader/core/common/type_recognizer.py index afa9a96..a31cdba 100644 --- a/uploader/core/common/type_recognizer.py +++ b/uploader/core/common/type_recognizer.py @@ -1,37 +1,37 @@ -import datetime as dt - -from uploader.base import ITypeRecognizer - -TYPE_CONVERTERS = { - int: int, - float: float, - dt.time: lambda value: value if type(value) == dt.time else dt.datetime.strptime(value, '%H:%M:%S').time(), - dt.date: lambda value: value if type(value) == dt.date else dt.datetime.strptime(value, '%d.%m.%Y').date(), - dt.datetime: lambda value: value if type(value) == dt.datetime else dt.datetime.strptime(value, - '%d.%m.%Y %H:%M:%S'), - str: str, -} - - -class TypeRecognizer(ITypeRecognizer): - def type(self, value) -> type: - global TYPE_CONVERTERS - for converter_type, converter in TYPE_CONVERTERS.items(): - try: - converter(value) - return converter_type - except Exception: - continue - return self.default_type() - - def default_type(self): - return str - - def convert(self, value): - global TYPE_CONVERTERS - for _, converter in TYPE_CONVERTERS.items(): - try: - return converter(value) - except Exception: - continue - return self.default_type()(value) +import datetime as dt + +from uploader.base import ITypeRecognizer + +TYPE_CONVERTERS = { + int: int, + float: float, + dt.time: lambda value: value if type(value) == dt.time else dt.datetime.strptime(value, '%H:%M:%S').time(), + dt.date: lambda value: value if type(value) == dt.date else dt.datetime.strptime(value, '%d.%m.%Y').date(), + dt.datetime: lambda value: value if type(value) == dt.datetime else dt.datetime.strptime(value, + '%d.%m.%Y %H:%M:%S'), + str: str, +} + + +class TypeRecognizer(ITypeRecognizer): + def type(self, value) -> type: + global TYPE_CONVERTERS + for converter_type, converter in TYPE_CONVERTERS.items(): + try: + converter(value) + return converter_type + except Exception: + continue + return self.default_type() + + def default_type(self): + return str + + def convert(self, value): + global TYPE_CONVERTERS + for _, converter in TYPE_CONVERTERS.items(): + try: + return converter(value) + except Exception: + continue + return self.default_type()(value) diff --git a/uploader/core/excel/reader.py b/uploader/core/excel/reader.py index bcc7eb4..aa9b3e3 100644 --- a/uploader/core/excel/reader.py +++ b/uploader/core/excel/reader.py @@ -1,58 +1,58 @@ -import logging - -import pandas as pd -import xlrd - -from typing import List - -from xlrd import XLRDError - -from uploader.base import IReader, IData, NULL -from uploader.core.common.file_data import FileData - - -class ExcelReader(IReader): - def read(self, path: str, top_offset: int = 0, bottom_offset: int = 0, left_offset: int = 0, - right_offset: int = 0) -> IData: - if not self.is_excel_file(path): - # TODO: log it, raise exception - pass - return FileData(self.__excel_to_list_of_dicts(path)) - - @staticmethod - def __get_cols_indexes_to_skip(df: pd.DataFrame) -> list: - (_, row_values) = next(df.iterrows()) - cols_number_to_skip = 0 - for cols_number_to_skip, value in enumerate(row_values): - if not pd.isna(value): - break - if cols_number_to_skip == len(row_values): - error_message = 'Cannot handle file. Probably, it is empty' - logging.error(error_message) - raise ValueError(error_message) - return list(range(0, cols_number_to_skip)) - - def __excel_to_data_frame(self, file_path) -> pd.DataFrame: - df = pd.read_excel(file_path, header=None) - df.dropna(how='all', inplace=True) - # shift table if data are not placed in the first row/column - cols_indexes_to_skip = self.__get_cols_indexes_to_skip(df) - df.drop(df.columns[cols_indexes_to_skip], axis=1, inplace=True) - # first row as columns names - df.fillna(NULL, inplace=True) - df.rename(columns=df.iloc[0], inplace=True) - df.drop(df.index[0], inplace=True) - return df - - @staticmethod - def is_excel_file(file_path: str) -> bool: - try: - xlrd.open_workbook(file_path).release_resources() - return True - except XLRDError: - return False - except Exception: - return False - - def __excel_to_list_of_dicts(self, file_path: str) -> List[dict]: - return self.__excel_to_data_frame(file_path).to_dict('records') +import logging + +import pandas as pd +import xlrd + +from typing import List + +from xlrd import XLRDError + +from uploader.base import IReader, IData, NULL +from uploader.core.common.file_data import FileData + + +class ExcelReader(IReader): + def read(self, path: str, top_offset: int = 0, bottom_offset: int = 0, left_offset: int = 0, + right_offset: int = 0) -> IData: + if not self.is_excel_file(path): + # TODO: log it, raise exception + pass + return FileData(self.__excel_to_list_of_dicts(path)) + + @staticmethod + def __get_cols_indexes_to_skip(df: pd.DataFrame) -> list: + (_, row_values) = next(df.iterrows()) + cols_number_to_skip = 0 + for cols_number_to_skip, value in enumerate(row_values): + if not pd.isna(value): + break + if cols_number_to_skip == len(row_values): + error_message = 'Cannot handle file. Probably, it is empty' + logging.error(error_message) + raise ValueError(error_message) + return list(range(0, cols_number_to_skip)) + + def __excel_to_data_frame(self, file_path) -> pd.DataFrame: + df = pd.read_excel(file_path, header=None) + df.dropna(how='all', inplace=True) + # shift table if data are not placed in the first row/column + cols_indexes_to_skip = self.__get_cols_indexes_to_skip(df) + df.drop(df.columns[cols_indexes_to_skip], axis=1, inplace=True) + # first row as columns names + df.fillna(NULL, inplace=True) + df.rename(columns=df.iloc[0], inplace=True) + df.drop(df.index[0], inplace=True) + return df + + @staticmethod + def is_excel_file(file_path: str) -> bool: + try: + xlrd.open_workbook(file_path).release_resources() + return True + except XLRDError: + return False + except Exception: + return False + + def __excel_to_list_of_dicts(self, file_path: str) -> List[dict]: + return self.__excel_to_data_frame(file_path).to_dict('records') diff --git a/uploader/core/postgres/sql_executor.py b/uploader/core/postgres/sql_executor.py index 9cc6185..f17e320 100644 --- a/uploader/core/postgres/sql_executor.py +++ b/uploader/core/postgres/sql_executor.py @@ -1,33 +1,33 @@ -import logging -import psycopg2 as pg -import sys -from typing import List - -from uploader.database.database_settings import DatabaseSettings - - -class SqlExecutor: - def __init__(self, settings: DatabaseSettings): - self._settings = settings - - def execute(self, command: str) -> List[tuple]: - connection = None - cursor = None - try: - connection = pg.connect(**dict(self._settings)) - cursor = connection.cursor() - cursor.execute(command) - data = [] - if cursor.statusmessage.startswith('SELECT '): - data = cursor.fetchall() - connection.commit() - return data - except Exception as e: - logging.error( - "Something goes wrong during SQL script execution: {}".format(str(e))) - sys.exit(1) - finally: - if cursor: - cursor.close() - if connection: - connection.close() +import logging +import psycopg2 as pg +import sys +from typing import List + +from uploader.database.database_settings import DatabaseSettings + + +class SqlExecutor: + def __init__(self, settings: DatabaseSettings): + self._settings = settings + + def execute(self, command: str) -> List[tuple]: + connection = None + cursor = None + try: + connection = pg.connect(**dict(self._settings)) + cursor = connection.cursor() + cursor.execute(command) + data = [] + if cursor.statusmessage.startswith('SELECT '): + data = cursor.fetchall() + connection.commit() + return data + except Exception as e: + logging.error( + "Something goes wrong during SQL script execution: {}".format(str(e))) + sys.exit(1) + finally: + if cursor: + cursor.close() + if connection: + connection.close() diff --git a/uploader/core/postgres/sql_representer.py b/uploader/core/postgres/sql_representer.py index 86139b1..b9cf943 100644 --- a/uploader/core/postgres/sql_representer.py +++ b/uploader/core/postgres/sql_representer.py @@ -1,16 +1,16 @@ -from uploader.base import IData -from uploader.base.irepresenter import IRepresenter - - -class SqlRepresenter(IRepresenter): - def __init__(self, data: IData): - self._table: str = None - self._values: str = None - self._data = data - - def scheme(self) -> dict: - d = {'create': '', 'drop': 'mapping'} - return d - - def data(self) -> dict: - pass +from uploader.base import IData +from uploader.base.irepresenter import IRepresenter + + +class SqlRepresenter(IRepresenter): + def __init__(self, data: IData): + self._table: str = None + self._values: str = None + self._data = data + + def scheme(self) -> dict: + d = {'create': '', 'drop': 'mapping'} + return d + + def data(self) -> dict: + pass diff --git a/uploader/core/postgres/writer.py b/uploader/core/postgres/writer.py index 55db11c..cb1a83e 100644 --- a/uploader/core/postgres/writer.py +++ b/uploader/core/postgres/writer.py @@ -1,33 +1,33 @@ -from uploader.core.common.type_recognizer import TypeRecognizer -from uploader.base import IWriter, IData, ITypeRecognizer -from uploader.core.postgres.sql_executor import SqlExecutor -from uploader.core.postgres.sql_representer import SqlRepresenter - - -class Writer(IWriter): - def __init__(self, settings=None, type_recognizer: ITypeRecognizer = TypeRecognizer()): - # FIXME: inject type recognizer - self._type_recognizer = type_recognizer - self._executor = SqlExecutor(settings) - - def write(self, data: IData, mapping: dict, append: bool = False, drop_if_exists: bool = False): - data.set_type_recognizer(self._type_recognizer) - representer = SqlRepresenter(data) - self._executor.execute(representer.scheme()['create']) - self._executor.execute(representer.data()['query']) - - def dd(self, drop_if_exists: bool = False): - if drop_if_exists: - command_to_drop_table = 'DROP TABLE IF EXISTS {}.{};'.format(self._settings.schema, table_name) - self.execute(command_to_drop_table) - self.create_table(table_name, columns, data) - else: - command = "SELECT exists(SELECT 1 FROM information_schema.tables WHERE " \ - "table_schema = '{}' AND table_name = '{}')".format(self._settings.schema, table_name) - result = self.execute(command) - print(table_name, result) - if len(result) > 0 and len(result[0]) > 0 and result[0][0]: - self.execute('TRUNCATE {}.{}'.format(self._settings.schema, table_name)) - self.__insert_rows(table_name, columns, data) - else: +from uploader.core.common.type_recognizer import TypeRecognizer +from uploader.base import IWriter, IData, ITypeRecognizer +from uploader.core.postgres.sql_executor import SqlExecutor +from uploader.core.postgres.sql_representer import SqlRepresenter + + +class Writer(IWriter): + def __init__(self, settings=None, type_recognizer: ITypeRecognizer = TypeRecognizer()): + # FIXME: inject type recognizer + self._type_recognizer = type_recognizer + self._executor = SqlExecutor(settings) + + def write(self, data: IData, mapping: dict, append: bool = False, drop_if_exists: bool = False): + data.set_type_recognizer(self._type_recognizer) + representer = SqlRepresenter(data) + self._executor.execute(representer.scheme()['create']) + self._executor.execute(representer.data()['query']) + + def dd(self, drop_if_exists: bool = False): + if drop_if_exists: + command_to_drop_table = 'DROP TABLE IF EXISTS {}.{};'.format(self._settings.schema, table_name) + self.execute(command_to_drop_table) + self.create_table(table_name, columns, data) + else: + command = "SELECT exists(SELECT 1 FROM information_schema.tables WHERE " \ + "table_schema = '{}' AND table_name = '{}')".format(self._settings.schema, table_name) + result = self.execute(command) + print(table_name, result) + if len(result) > 0 and len(result[0]) > 0 and result[0][0]: + self.execute('TRUNCATE {}.{}'.format(self._settings.schema, table_name)) + self.__insert_rows(table_name, columns, data) + else: self.create_table(table_name, columns, data) \ No newline at end of file diff --git a/uploader/database/database.py b/uploader/database/database.py index c58ce87..61d0599 100644 --- a/uploader/database/database.py +++ b/uploader/database/database.py @@ -1,89 +1,89 @@ -import logging -import sys -from typing import List - -import psycopg2 as pg - -from uploader.database.database_settings import DatabaseSettings -from uploader.database.database_utils import py_type_to_pg_type, py_value_to_pg_value - - -class Database(object): - def __init__(self, settings: DatabaseSettings): - self._settings = settings - - def execute(self, command: str) -> List[tuple]: - connection = None - cursor = None - try: - connection = pg.connect(**dict(self._settings)) - cursor = connection.cursor() - cursor.execute(command) - data = [] - if cursor.statusmessage.startswith('SELECT '): - data = cursor.fetchall() - connection.commit() - return data - except Exception as e: - logging.error( - "Something goes wrong during SQL script execution: {}".format(str(e))) - sys.exit(1) - finally: - if cursor: - cursor.close() - if connection: - connection.close() - - @staticmethod - def __row_to_insert_str(columns: dict, row: dict) -> str: - values = ', '.join([py_value_to_pg_value(columns[key], value) for key, value in row.items()]) - return '({})'.format(values) - - @staticmethod - def __rows_to_insert_str(columns: dict, rows: list) -> str: - rows_to_insert_list = [Database.__row_to_insert_str(columns, row) for row in rows] - return ', '.join(rows_to_insert_list) - - def __create_insert_query(self, table_name: str, columns: dict, data: list) -> str: - rows_to_insert_str = Database.__rows_to_insert_str(columns, data) - return 'insert into {}.{} values {}'.format(self._settings.schema, table_name, rows_to_insert_str) - - def __create_table(self, name, columns: dict): - columns_definition_list = [] - for column_key, column_value in columns.items(): - column_mapping = column_value.get('mapping', None) - column_name = column_mapping['name'] if column_mapping and column_mapping['name'] else column_value['name'] - column_type = column_mapping['type'] if column_mapping and column_mapping['type'] else py_type_to_pg_type( - column_value['type']) - column_definition = '{} {}'.format(column_name, column_type) - columns_definition_list.append(column_definition) - columns_definition = ', '.join(columns_definition_list) - command = 'create table {}.{} ({})'.format(self._settings.schema, name, columns_definition) - print(command) - self.execute(command) - print('Table "{}" was created/updated'.format(name)) - - def __insert_rows(self, name: str, columns: dict, data: list): - insert_query = self.__create_insert_query(name, columns, data) - self.execute(insert_query) - print('Rows inserted to table {}'.format(name)) - - def create_table(self, name: str, columns: dict, data: list): - self.__create_table(name, columns) - self.__insert_rows(name, columns, data) - - def rewrite_data(self, table_name: str, columns: dict, data: list, drop_if_exists: bool = False): - if drop_if_exists: - command_to_drop_table = 'DROP TABLE IF EXISTS {}.{};'.format(self._settings.schema, table_name) - self.execute(command_to_drop_table) - self.create_table(table_name, columns, data) - else: - command = "SELECT exists(SELECT 1 FROM information_schema.tables WHERE " \ - "table_schema = '{}' AND table_name = '{}')".format(self._settings.schema, table_name) - result = self.execute(command) - print(table_name, result) - if len(result) > 0 and len(result[0]) > 0 and result[0][0]: - self.execute('TRUNCATE {}.{}'.format(self._settings.schema, table_name)) - self.__insert_rows(table_name, columns, data) - else: - self.create_table(table_name, columns, data) +import logging +import sys +from typing import List + +import psycopg2 as pg + +from uploader.database.database_settings import DatabaseSettings +from uploader.database.database_utils import py_type_to_pg_type, py_value_to_pg_value + + +class Database(object): + def __init__(self, settings: DatabaseSettings): + self._settings = settings + + def execute(self, command: str) -> List[tuple]: + connection = None + cursor = None + try: + connection = pg.connect(**dict(self._settings)) + cursor = connection.cursor() + cursor.execute(command) + data = [] + if cursor.statusmessage.startswith('SELECT '): + data = cursor.fetchall() + connection.commit() + return data + except Exception as e: + logging.error( + "Something goes wrong during SQL script execution: {}".format(str(e))) + sys.exit(1) + finally: + if cursor: + cursor.close() + if connection: + connection.close() + + @staticmethod + def __row_to_insert_str(columns: dict, row: dict) -> str: + values = ', '.join([py_value_to_pg_value(columns[key], value) for key, value in row.items()]) + return '({})'.format(values) + + @staticmethod + def __rows_to_insert_str(columns: dict, rows: list) -> str: + rows_to_insert_list = [Database.__row_to_insert_str(columns, row) for row in rows] + return ', '.join(rows_to_insert_list) + + def __create_insert_query(self, table_name: str, columns: dict, data: list) -> str: + rows_to_insert_str = Database.__rows_to_insert_str(columns, data) + return 'insert into {}.{} values {}'.format(self._settings.schema, table_name, rows_to_insert_str) + + def __create_table(self, name, columns: dict): + columns_definition_list = [] + for column_key, column_value in columns.items(): + column_mapping = column_value.get('mapping', None) + column_name = column_mapping['name'] if column_mapping and column_mapping['name'] else column_value['name'] + column_type = column_mapping['type'] if column_mapping and column_mapping['type'] else py_type_to_pg_type( + column_value['type']) + column_definition = '{} {}'.format(column_name, column_type) + columns_definition_list.append(column_definition) + columns_definition = ', '.join(columns_definition_list) + command = 'create table {}.{} ({})'.format(self._settings.schema, name, columns_definition) + print(command) + self.execute(command) + print('Table "{}" was created/updated'.format(name)) + + def __insert_rows(self, name: str, columns: dict, data: list): + insert_query = self.__create_insert_query(name, columns, data) + self.execute(insert_query) + print('Rows inserted to table {}'.format(name)) + + def create_table(self, name: str, columns: dict, data: list): + self.__create_table(name, columns) + self.__insert_rows(name, columns, data) + + def rewrite_data(self, table_name: str, columns: dict, data: list, drop_if_exists: bool = False): + if drop_if_exists: + command_to_drop_table = 'DROP TABLE IF EXISTS {}.{};'.format(self._settings.schema, table_name) + self.execute(command_to_drop_table) + self.create_table(table_name, columns, data) + else: + command = "SELECT exists(SELECT 1 FROM information_schema.tables WHERE " \ + "table_schema = '{}' AND table_name = '{}')".format(self._settings.schema, table_name) + result = self.execute(command) + print(table_name, result) + if len(result) > 0 and len(result[0]) > 0 and result[0][0]: + self.execute('TRUNCATE {}.{}'.format(self._settings.schema, table_name)) + self.__insert_rows(table_name, columns, data) + else: + self.create_table(table_name, columns, data) diff --git a/uploader/database/database_settings.py b/uploader/database/database_settings.py index 7f49336..5431efa 100644 --- a/uploader/database/database_settings.py +++ b/uploader/database/database_settings.py @@ -1,12 +1,12 @@ -class DatabaseSettings: - user = "postgres" - password = "pswd" - database = "postgres" - host = "127.0.0.1" - schema = "public" - - def __iter__(self): - yield "database", self.database - yield "host", self.host - yield "user", self.user - yield "password", self.password +class DatabaseSettings: + user = "postgres" + password = "pswd" + database = "postgres" + host = "127.0.0.1" + schema = "public" + + def __iter__(self): + yield "database", self.database + yield "host", self.host + yield "user", self.user + yield "password", self.password diff --git a/uploader/database/database_utils.py b/uploader/database/database_utils.py index be34a55..f2eaaf8 100644 --- a/uploader/database/database_utils.py +++ b/uploader/database/database_utils.py @@ -1,74 +1,74 @@ -import datetime as dt -from uploader.utils import NULL - -__ESCAPE_SYMBOLS_MAPPING = {"'": r"''"} - - -def __value_empty(value) -> bool: - return value == NULL or value is None or not value or (isinstance(value, str) and value.isspace()) - - -def __escaped_symbols() -> dict: - if not hasattr(__escaped_symbols, 'translation'): - __escaped_symbols.translation = str.maketrans(__ESCAPE_SYMBOLS_MAPPING) - return __escaped_symbols.translation - - -def convert_datetime_to_str(value, dt_format: str) -> str: - if type(value) == str: - return value - else: - return value.strftime(dt_format) - - -def null_or_format_str(value, str_format: str): - if __value_empty(value): - return NULL - else: - return str_format.format(str(value).translate(__escaped_symbols())) - - -def py_type_to_pg_type(py_type): - return PG_SQL_TYPES_TO_PYTHON_TYPES[py_type]['type'] - - -def py_value_to_pg_value(value_type, value) -> str: - current_type = value_type['type'] if type(value_type) is dict else value_type - return PG_SQL_TYPES_TO_PYTHON_TYPES[current_type]['converter'](value) - - -# def datetime_to_null_or_str_format(value, dt_format, str_format): -# result = convert_datetime_to_str(value, dt_format) -# result = null_or_format_str(result, str_format) -# return result - - -PG_SQL_TYPES_TO_PYTHON_TYPES = { - int: { - 'type': 'numeric', - 'converter': lambda value: null_or_format_str(value, '{}') - }, - float: { - 'type': 'real', - 'converter': lambda value: null_or_format_str(value, '{}') - }, - str: { - 'type': 'varchar', - 'converter': lambda value: null_or_format_str(value, "'{}'") - }, - dt.time: { - 'type': 'time', - 'converter': lambda value: null_or_format_str(convert_datetime_to_str(value, '%H:%M:%S'), - "'{}'") - }, - dt.datetime: { - 'type': 'timestamp', - 'converter': lambda value: null_or_format_str(convert_datetime_to_str(value, '%d.%m.%Y %H:%M:%S'), - "to_timestamp('{}', 'dd.mm.yyyy hh24:mi:ss')") - }, - dt.date: { - 'type': 'date', - 'converter': lambda value: null_or_format_str(convert_datetime_to_str(value, '%d.%m.%Y'), - "to_date('{}', 'dd.mm.yyyy')") - } -} +import datetime as dt +from uploader.utils import NULL + +__ESCAPE_SYMBOLS_MAPPING = {"'": r"''"} + + +def __value_empty(value) -> bool: + return value == NULL or value is None or not value or (isinstance(value, str) and value.isspace()) + + +def __escaped_symbols() -> dict: + if not hasattr(__escaped_symbols, 'translation'): + __escaped_symbols.translation = str.maketrans(__ESCAPE_SYMBOLS_MAPPING) + return __escaped_symbols.translation + + +def convert_datetime_to_str(value, dt_format: str) -> str: + if type(value) == str: + return value + else: + return value.strftime(dt_format) + + +def null_or_format_str(value, str_format: str): + if __value_empty(value): + return NULL + else: + return str_format.format(str(value).translate(__escaped_symbols())) + + +def py_type_to_pg_type(py_type): + return PG_SQL_TYPES_TO_PYTHON_TYPES[py_type]['type'] + + +def py_value_to_pg_value(value_type, value) -> str: + current_type = value_type['type'] if type(value_type) is dict else value_type + return PG_SQL_TYPES_TO_PYTHON_TYPES[current_type]['converter'](value) + + +# def datetime_to_null_or_str_format(value, dt_format, str_format): +# result = convert_datetime_to_str(value, dt_format) +# result = null_or_format_str(result, str_format) +# return result + + +PG_SQL_TYPES_TO_PYTHON_TYPES = { + int: { + 'type': 'numeric', + 'converter': lambda value: null_or_format_str(value, '{}') + }, + float: { + 'type': 'real', + 'converter': lambda value: null_or_format_str(value, '{}') + }, + str: { + 'type': 'varchar', + 'converter': lambda value: null_or_format_str(value, "'{}'") + }, + dt.time: { + 'type': 'time', + 'converter': lambda value: null_or_format_str(convert_datetime_to_str(value, '%H:%M:%S'), + "'{}'") + }, + dt.datetime: { + 'type': 'timestamp', + 'converter': lambda value: null_or_format_str(convert_datetime_to_str(value, '%d.%m.%Y %H:%M:%S'), + "to_timestamp('{}', 'dd.mm.yyyy hh24:mi:ss')") + }, + dt.date: { + 'type': 'date', + 'converter': lambda value: null_or_format_str(convert_datetime_to_str(value, '%d.%m.%Y'), + "to_date('{}', 'dd.mm.yyyy')") + } +} diff --git a/uploader/database/row.py b/uploader/database/row.py index 353e959..f24b5df 100644 --- a/uploader/database/row.py +++ b/uploader/database/row.py @@ -1,6 +1,6 @@ -class Row(object): - pass - - -class RowsContainer(object): +class Row(object): + pass + + +class RowsContainer(object): pass \ No newline at end of file diff --git a/uploader/database/table.py b/uploader/database/table.py index 803a050..35f6193 100644 --- a/uploader/database/table.py +++ b/uploader/database/table.py @@ -1,20 +1,20 @@ -from uploader.database_utils import py_type_to_pg_type, py_value_to_pg_value -from uploader.database import Database - - -class Table(object): - def __init__(self, db: Database, name: str, columns: dict): - self._columns = columns - self._name = name - self._db = db - - def to_sql(self): - columns_definition_list = [] - for column_name, column_type in self._columns.items(): - column_definition = '{} {}'.format( - column_name, py_type_to_pg_type(column_type)) - columns_definition_list.append(column_definition) - columns_definition = ', '.join(columns_definition_list) - command = 'create table {}.{} ({})'.format( - self._db._settings.schema, self._name, columns_definition) - return command +from uploader.database_utils import py_type_to_pg_type, py_value_to_pg_value +from uploader.database import Database + + +class Table(object): + def __init__(self, db: Database, name: str, columns: dict): + self._columns = columns + self._name = name + self._db = db + + def to_sql(self): + columns_definition_list = [] + for column_name, column_type in self._columns.items(): + column_definition = '{} {}'.format( + column_name, py_type_to_pg_type(column_type)) + columns_definition_list.append(column_definition) + columns_definition = ', '.join(columns_definition_list) + command = 'create table {}.{} ({})'.format( + self._db._settings.schema, self._name, columns_definition) + return command diff --git a/uploader/database_new/database.py b/uploader/database_new/database.py index 72ac003..69da347 100644 --- a/uploader/database_new/database.py +++ b/uploader/database_new/database.py @@ -1,24 +1,24 @@ -import psycopg2 as pg -from uploader.database.database_settings import DatabaseSettings -import logging - - -class Database(object): - def __init__(self, settings: DatabaseSettings): - self._settings = settings - - def settings(self) -> DatabaseSettings: - return self._settings - - def execute(self, command: str): - try: - connection = pg.connect(**dict(self._settings)) - cursor = connection.cursor() - cursor.execute(command) - connection.commit() - cursor.close() - connection.close() - except Exception as e: - logging.error( - "Something goes wrong during SQL script execution: {}".format(str(e))) - exit(1) +import psycopg2 as pg +from uploader.database.database_settings import DatabaseSettings +import logging + + +class Database(object): + def __init__(self, settings: DatabaseSettings): + self._settings = settings + + def settings(self) -> DatabaseSettings: + return self._settings + + def execute(self, command: str): + try: + connection = pg.connect(**dict(self._settings)) + cursor = connection.cursor() + cursor.execute(command) + connection.commit() + cursor.close() + connection.close() + except Exception as e: + logging.error( + "Something goes wrong during SQL script execution: {}".format(str(e))) + exit(1) diff --git a/uploader/database_new/row.py b/uploader/database_new/row.py index 02a07c3..b84232a 100644 --- a/uploader/database_new/row.py +++ b/uploader/database_new/row.py @@ -1,2 +1,2 @@ -class Row(object): - pass +class Row(object): + pass diff --git a/uploader/database_new/table.py b/uploader/database_new/table.py index 698cf11..885440b 100644 --- a/uploader/database_new/table.py +++ b/uploader/database_new/table.py @@ -1,20 +1,20 @@ -from uploader.database.database_utils import py_type_to_pg_type -from uploader.database import Database - - -class Table(object): - def __init__(self, db: Database, name: str, columns: dict): - self._columns = columns - self._name = name - self._db = db - - def to_sql(self): - columns_definition_list = [] - for column_name, column_type in self._columns.items(): - column_definition = '{} {}'.format( - column_name, py_type_to_pg_type(column_type)) - columns_definition_list.append(column_definition) - columns_definition = ', '.join(columns_definition_list) - command = 'create table {}.{} ({})'.format( - self._db._settings.schema, self._name, columns_definition) - return command +from uploader.database.database_utils import py_type_to_pg_type +from uploader.database import Database + + +class Table(object): + def __init__(self, db: Database, name: str, columns: dict): + self._columns = columns + self._name = name + self._db = db + + def to_sql(self): + columns_definition_list = [] + for column_name, column_type in self._columns.items(): + column_definition = '{} {}'.format( + column_name, py_type_to_pg_type(column_type)) + columns_definition_list.append(column_definition) + columns_definition = ', '.join(columns_definition_list) + command = 'create table {}.{} ({})'.format( + self._db._settings.schema, self._name, columns_definition) + return command diff --git a/uploader/excel_helper.py b/uploader/excel_helper.py index 74eea33..0cf23ca 100644 --- a/uploader/excel_helper.py +++ b/uploader/excel_helper.py @@ -1,97 +1,97 @@ -import datetime as dt -import logging -import os - -import pandas as pd -import xlrd - -from uploader.utils import create_adopted_columns_names, NULL - - -def get_type(value) -> type: - for converter_type, converter in TYPE_CONVERTERS.items(): - try: - converter(value) - return converter_type - except Exception: - continue - return str - - -def column_types(rows: list) -> dict: - if len(rows) == 0: - return {} - item_types = {} - for key in rows[0].keys(): - for row in rows: - value = row[key] - if value != NULL: - item_types[key] = get_type(value) - break - if key not in item_types: - item_types[key] = str - return item_types - - -def __get_cols_indexes_to_skip(df: pd.DataFrame) -> list: - (_, row_values) = next(df.iterrows()) - cols_number_to_skip = 0 - for cols_number_to_skip, value in enumerate(row_values): - if not pd.isna(value): - break - if cols_number_to_skip == len(row_values): - error_message = 'Cannot handle file. Probably, it is empty' - logging.error(error_message) - raise ValueError(error_message) - return list(range(0, cols_number_to_skip)) - - -def read_excel(file_path: str) -> list: - df = excel_to_data_frame(file_path) - df.columns = create_adopted_columns_names(df.columns) - return df.to_dict('records') - - -def excel_to_data_frame(file_path) -> pd.DataFrame: - df = pd.read_excel(file_path, header=None) - df.dropna(how='all', inplace=True) - # shift table if data are not placed in the first row/column - cols_indexes_to_skip = __get_cols_indexes_to_skip(df) - df.drop(df.columns[cols_indexes_to_skip], axis=1, inplace=True) - # first row as columns names - df.fillna(NULL, inplace=True) - df.rename(columns=df.iloc[0], inplace=True) - df.drop(df.index[0], inplace=True) - return df - - -def excel_to_list_of_dicts(file_path: str) -> list: - return excel_to_data_frame(file_path).to_dict('records') - - -def is_excel_file(file_path: str) -> bool: - try: - xlrd.open_workbook(file_path).release_resources() - return True - except Exception: - return False - - -def get_excel_files_in_dir(dir_path: str, exclude: list) -> list: - def is_acceptable_file(file_path): - return is_excel_file(file_path) and \ - os.path.basename(file_path) not in exclude - - files = [os.path.join(dir_path, file_name) for file_name in os.listdir(dir_path)] - return [f for f in files if is_acceptable_file(f)] - - -TYPE_CONVERTERS = { - int: int, - float: float, - dt.time: lambda value: value if type(value) == dt.time else dt.datetime.strptime(value, '%H:%M:%S').time(), - dt.date: lambda value: value if type(value) == dt.date else dt.datetime.strptime(value, '%d.%m.%Y').date(), - dt.datetime: lambda value: value if type(value) == dt.datetime else dt.datetime.strptime(value, - '%d.%m.%Y %H:%M:%S'), - str: str, -} +import datetime as dt +import logging +import os + +import pandas as pd +import xlrd + +from uploader.utils import create_adopted_columns_names, NULL + + +def get_type(value) -> type: + for converter_type, converter in TYPE_CONVERTERS.items(): + try: + converter(value) + return converter_type + except Exception: + continue + return str + + +def column_types(rows: list) -> dict: + if len(rows) == 0: + return {} + item_types = {} + for key in rows[0].keys(): + for row in rows: + value = row[key] + if value != NULL: + item_types[key] = get_type(value) + break + if key not in item_types: + item_types[key] = str + return item_types + + +def __get_cols_indexes_to_skip(df: pd.DataFrame) -> list: + (_, row_values) = next(df.iterrows()) + cols_number_to_skip = 0 + for cols_number_to_skip, value in enumerate(row_values): + if not pd.isna(value): + break + if cols_number_to_skip == len(row_values): + error_message = 'Cannot handle file. Probably, it is empty' + logging.error(error_message) + raise ValueError(error_message) + return list(range(0, cols_number_to_skip)) + + +def read_excel(file_path: str) -> list: + df = excel_to_data_frame(file_path) + df.columns = create_adopted_columns_names(df.columns) + return df.to_dict('records') + + +def excel_to_data_frame(file_path) -> pd.DataFrame: + df = pd.read_excel(file_path, header=None) + df.dropna(how='all', inplace=True) + # shift table if data are not placed in the first row/column + cols_indexes_to_skip = __get_cols_indexes_to_skip(df) + df.drop(df.columns[cols_indexes_to_skip], axis=1, inplace=True) + # first row as columns names + df.fillna(NULL, inplace=True) + df.rename(columns=df.iloc[0], inplace=True) + df.drop(df.index[0], inplace=True) + return df + + +def excel_to_list_of_dicts(file_path: str) -> list: + return excel_to_data_frame(file_path).to_dict('records') + + +def is_excel_file(file_path: str) -> bool: + try: + xlrd.open_workbook(file_path).release_resources() + return True + except Exception: + return False + + +def get_excel_files_in_dir(dir_path: str, exclude: list) -> list: + def is_acceptable_file(file_path): + return is_excel_file(file_path) and \ + os.path.basename(file_path) not in exclude + + files = [os.path.join(dir_path, file_name) for file_name in os.listdir(dir_path)] + return [f for f in files if is_acceptable_file(f)] + + +TYPE_CONVERTERS = { + int: int, + float: float, + dt.time: lambda value: value if type(value) == dt.time else dt.datetime.strptime(value, '%H:%M:%S').time(), + dt.date: lambda value: value if type(value) == dt.date else dt.datetime.strptime(value, '%d.%m.%Y').date(), + dt.datetime: lambda value: value if type(value) == dt.datetime else dt.datetime.strptime(value, + '%d.%m.%Y %H:%M:%S'), + str: str, +} diff --git a/uploader/file_uploader.py b/uploader/file_uploader.py index ae2b1bb..9b91a18 100644 --- a/uploader/file_uploader.py +++ b/uploader/file_uploader.py @@ -1,36 +1,36 @@ -from uploader.database.database import Database -from uploader.excel_helper import column_types, excel_to_list_of_dicts -from typing import List - -from uploader.utils import create_column_name, NULL - - -class FileUploader: - def __init__(self, db: Database): - self._db = db - - def upload(self, file_path: str, table_name: str, mappings: List[dict] = None): - rows = excel_to_list_of_dicts(file_path) - columns = column_types(rows) - for key, value in columns.items(): - d = { - 'type': value, - 'name': create_column_name(key) - } - for mapping in mappings: - if key != mapping['Original name']: - continue - d.update({ - 'mapping': { - 'name': mapping['Code'] if mapping['Code'] != NULL else None, - 'type': mapping['Data type'] if mapping['Data type'] != NULL else None, - 'comment': mapping['Comment'] - } - }) - columns[key] = d - self._db.rewrite_data(table_name, columns, rows) - - -TABLE_NAME = 'table_name' -COLUMNS_INFO = 'columns_info' -ROWS = 'rows' +from uploader.database.database import Database +from uploader.excel_helper import column_types, excel_to_list_of_dicts +from typing import List + +from uploader.utils import create_column_name, NULL + + +class FileUploader: + def __init__(self, db: Database): + self._db = db + + def upload(self, file_path: str, table_name: str, mappings: List[dict] = None): + rows = excel_to_list_of_dicts(file_path) + columns = column_types(rows) + for key, value in columns.items(): + d = { + 'type': value, + 'name': create_column_name(key) + } + for mapping in mappings: + if key != mapping['Original name']: + continue + d.update({ + 'mapping': { + 'name': mapping['Code'] if mapping['Code'] != NULL else None, + 'type': mapping['Data type'] if mapping['Data type'] != NULL else None, + 'comment': mapping['Comment'] + } + }) + columns[key] = d + self._db.rewrite_data(table_name, columns, rows) + + +TABLE_NAME = 'table_name' +COLUMNS_INFO = 'columns_info' +ROWS = 'rows' diff --git a/uploader/main.py b/uploader/main.py index f9bd2c6..7c84e08 100644 --- a/uploader/main.py +++ b/uploader/main.py @@ -1,9 +1,9 @@ -# from uploader.core.common.type_recognizer import TypeRecognizer -# from uploader.core.excel.reader import ExcelReader -# -# -# def main(): -# reader = ExcelReader() -# type_recognizer = TypeRecognizer() -# data = reader.read("path") -# data.set_type_recognizer(type_recognizer) +# from uploader.core.common.type_recognizer import TypeRecognizer +# from uploader.core.excel.reader import ExcelReader +# +# +# def main(): +# reader = ExcelReader() +# type_recognizer = TypeRecognizer() +# data = reader.read("path") +# data.set_type_recognizer(type_recognizer) diff --git a/uploader/utils.py b/uploader/utils.py index 9b195dc..ee35ac9 100644 --- a/uploader/utils.py +++ b/uploader/utils.py @@ -1,31 +1,31 @@ -import os - -from transliterate import translit - -NULL = 'NULL' - - -def transliterate(text: str) -> str: - """Transliterate given text""" - return translit(text, 'ru', reversed=True) - - -def to_allowed_symbols(text: str) -> str: - result = ''.join([s for s in text if s.isalpha() or s in [' ', '_'] or s.isdigit()]) - result = ' '.join(result.split()) - return result.replace(' ', '_') - - -def create_column_name(text: str) -> str: - return to_allowed_symbols(text) - - -def create_table_name(file_path: str, department: str = None) -> str: - department_adopted = department + '_' if department else '' - file_name_with_ext = os.path.basename(file_path) - file_name = department_adopted + os.path.splitext(file_name_with_ext)[0] - return transliterate(to_allowed_symbols(file_name)) - - -def create_adopted_columns_names(columns) -> list: - return [create_column_name(column) for column in columns] +import os + +from transliterate import translit + +NULL = 'NULL' + + +def transliterate(text: str) -> str: + """Transliterate given text""" + return translit(text, 'ru', reversed=True) + + +def to_allowed_symbols(text: str) -> str: + result = ''.join([s for s in text if s.isalpha() or s in [' ', '_'] or s.isdigit()]) + result = ' '.join(result.split()) + return result.replace(' ', '_') + + +def create_column_name(text: str) -> str: + return to_allowed_symbols(text) + + +def create_table_name(file_path: str, department: str = None) -> str: + department_adopted = department + '_' if department else '' + file_name_with_ext = os.path.basename(file_path) + file_name = department_adopted + os.path.splitext(file_name_with_ext)[0] + return transliterate(to_allowed_symbols(file_name)) + + +def create_adopted_columns_names(columns) -> list: + return [create_column_name(column) for column in columns]