From 0935f77fd591d6b7cee4f030707f13093bd536a1 Mon Sep 17 00:00:00 2001 From: DimitriBez Date: Mon, 30 Mar 2026 12:54:30 +0200 Subject: [PATCH 1/2] changes integrated --- bitepy/__pycache__/__init__.cpython-311.pyc | Bin 0 -> 832 bytes bitepy/__pycache__/simulation.cpython-311.pyc | Bin 0 -> 59879 bytes bitepy/data.py | 276 +++++++++++++++++- pyproject.toml | 1 + 4 files changed, 276 insertions(+), 1 deletion(-) create mode 100644 bitepy/__pycache__/__init__.cpython-311.pyc create mode 100644 bitepy/__pycache__/simulation.cpython-311.pyc diff --git a/bitepy/__pycache__/__init__.cpython-311.pyc b/bitepy/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e121ed25251de06bd5c22af12e80bde29145219b GIT binary patch literal 832 zcmZWny>AmS6t{DiTzoBr4 zH$%dAPEdzjxljk^!AvME)8xu!S)ZK9cp5QdL)P<;m|-sKozTvs9T# zj(V{$wqJ%R_Z)IkA&UOzezE4JkOyUn%{T%|Srk#4wjP*|^LQCD>jP6EJDidia?Pc@p=0TkDX(XPj2Nvs|iJn=p@-W7$J%pbw(UZG; zi5@Sv`2y`NxA_7+T5j{Ifm+X&q-%K*T(%#69)FqqZtwqT?=M;_pS1i%;68QFZv77w C75Hrc literal 0 HcmV?d00001 diff --git a/bitepy/__pycache__/simulation.cpython-311.pyc b/bitepy/__pycache__/simulation.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d2942a77266678000d28080474f75c79e94175a7 GIT binary patch literal 59879 zcmdtL32<9!o*xGA1PPD;!TS&i-h@O_q;65veRu0jWi3?7* z*Atu5>B?#*+8XYx*2`II+nlt&<)Ewv7e(*y)+d zWD-~M`+whi-+K=SsqUFwuLDuvyT0@PUH|+02Nf0NHvGQ4^go6FpMPny{qOXk|BAGm zFMkg=-?Ig6LHmsDnw?z>t`)Lt(X}FWExuNaYr#y(Z0WU9JH;)Wam+fdIoQ2@rtDf- znJripES@Q!bzXDYZIQ-KTgcUAld7&&6yPpc@~Pcs`waj2FI4gC1ws3@igzmR*zefx z7GAa8u@~4*+k&OvvIQNV79cME^WU|~pnc5dFZ&5SUhvz~W%2NAC>WZF2k=l7|3+~3 zCzLTBD#yaJ^D}{XI2xIloSR#C|5PA66AJp`QD1m=E-J-+6F0)~(A+)WY&1AO6B_iL zjKt=pkS{(Rh$9I3j&IxM3*C)}A~EFQ3&(tuQAr9-#%J#NZp?>g;=Vv6=nF?;@xTl+ z9-QP=FSga)*k8o>0kDd9dnR z#n(!M-jHLeAXxpaqHATr8pLEhPBp>XTD)}z>q3r=@}DXR)+3%PcsSUIm`$OIsiI&r z@~#ZFWaPI6@!Y{y#3%^1;ocMUeam*OD%g(D8|*+>O<#kZp^{)%sODC!1`!*|=tlgy ztQz*2eH0DLxSl#6xLEC=%p%-nbM9h9fs8OkbUo_aCB4h%#)`pE2c}ttWCIr6}PG9G9g)+m8wFdvi7GtD`sV$I(xEW zy7R=W%8FU8myX^k4|;Qstm>>(-oI6w@a8F_CM$Neh*_Jc&I6a)%v5m(J%XUk_9^~< z#_lLc*e1kYlqgErZi%N`BF%(Y4HCt9Vz4>H@+-*`qeL$Ad*?^id0JU}=R-3rb9oHwm+i$gt z``mFm#P@={P^`DY0Z=UXC7@W4DA*K=ou*oC2E{H@+|8gUMl9;E85Da=`ECZqUJ+`b}{+gW^_G+|8i4%@lVtDE8}dg9V^r8s|U7;#%(0 zerF@$csMWv49pks83-b$FCvqn!EzDxsB|+nBAzI{f#3Q1=7NLcpo#EmcFsTI8wVl7 zKKlqcBx=bQonquoOwdRDzVIOEpF7jx$!Xu51afFH9`gl23(Z6WLC{R$2&g6^iHtOZ zzVq|3xbFt&CFHpfjiBClkYvzOF&!1nSuK404^pXYtk_z;eqVHs$i~3ThVav}5x@&Y z7>Kmj9O$vUVDg=v@?8Mb`g0`mX#{92fZk$V7Km~13oPFX=2$!`1#X5WW&?M9eN*VP zcvc(m3H?2bYO$!k$-rD-G914rMuYZ#9^=s0cmB$>f6#X#G!>W!oeR=;cxZ6QTILK$ zhhXUD#AFo1AWxx)6ADN^L<1qhNO926h^~)7OPT-j*B@`sDtA{_vC&v;A{@zAVmNXO z6gtuCp{c3xWH=O=%+ti7f#K_xCJqhm-<{D!Dl0mlw|RHM@#&z1VL$V$C`})hQ=w44 z*6Cx5qD)04HYP%MC#M6En@E!rfatgV)>cL4XF(7~C87%Bp_s1^vpZ|JAkvMHL>;9M z7%esx8@v|@%!VgvsX)ihf{46nf#mS8wM4yT;L`-|zF@D3o@=bMhsOEJH7 z&}WSJcxVzMK7SKJlhcv#BnCs?M)>+>W4s0%7Bm&U8wyUuqB9?aCfM}P*+ndtkHzAP zM4qlO3iYAy28P+lkwVyOVsUh$kwI+3O7ZGUNg?nGXj0{w@2D(-RxVxHONA<4%YH&blxAq7m>XF+q@%&yo`s;DEmJ&IY90 zA9Na?f|-4L%I8QEDv>@=a&N$BOh@&40lqC^TfwR^8)M5J%|14- zP%~{yJ>(m*4i~YKeY{S9WGDhaP6CjYHei@%u-tf4@SbhZMk}IH5TGSs z-VTN4fP}GDf?WsjO=3>bd=KFc(;5+djE9HHvgI6A#h_u+9qEJNSm4GCFf?FX^D!)m z)HxqSXXa-^12+OOV0e@xpwf@Ei|zR%SSSqGFLW>zl5rJ%vl`xaBZLZs&`Qp2ys)rU zTa8WZ#(a?(H*=xEjJN37()zs6Xu&SbpCsa);Jbzf=$`)I#UwXNUqsX?zG z?VtKrR$g=FrYOFmGIhqnFJ7aP67drj4reXa<~Y8-&6eDSmdu6KoPMD}U&q)IBtB8C zOiPpLefT9(?gEq~}KaeheD=;%3Iw?tz*AOEu?Eq_gmZU)GlDPmdmKZV2 z86JE-a5r6!!FD2a^EfcmbO{ZtvypTuT`$hZ)8$&hr$V801xCpj(v301DP1;)8FPXV zf^;Pt1!J^kjDs9XmjvfdVW&(x_3oo`%6N~_trJk3kB4|-HZ!1Nf*UJGgA_= z@gy$=#E_%v3i2rJGOsMWn?X*b9c)3ts*!eTg->9SNV_xEg>;4fl}#*&A9b8C?a5pS z((a6zop$M?fhKdh+?c>=Pv*X#uFBe8CE^`QBu0zToyIq?Tz)aI{mmHmjP0Qh0^7%@ zL$PpdI~Q{57ZU+Lht8is+r#mRiG}Jh!3`MHUlLCx_71q&AKBL5x7%EG%MD3qyXRPQm_bu8uFB0IZN?b~EmEACei%P%|qEY?}Y`EY1Z7VyH?rz!L{i4n%x8G3eCe^x0*)y4PHZ5ODI$LFDD@%Sr z@s6tAQQ0+0O&m@-TV!Vo`(kcFhuxxp)85%gNruaxaJe8;bXm>b)epF8$17 zYwT4T`qYNLrJ_$NUT)n@7uC_CI(k!E_n01f2E;>Ws&~71=t^1A42+5|TT}iW;-M!s zct|{KOZ6D#bf&iL6<_*N{+;5XC$(*#c<4)QKP(>lQ`?W29vCEE4DNjLx-xh~9Xzsh z;*;~L%cr`wrM4e6;;ol&G>B_e>Y*+Mxjs>H`TY}aiw_dMjIZtiN~JI$8EdSZ3dM3QqJm64x^J;!G{#@VbyzB zb{$StH?LLqB&&OVKmO>hQaz+r4=p)TeM6RldIlfOh=QnF9xaN;uFOaOu(?+2B{o;9 z>gL}^2q1m7 z-NKdsR)FMW{}LH;XlSI6wn)LkZwUFnE#5V>|jODLQ)$^l*W693Q1O z5yS{IKA&`%V%H)z)K?-C@#qB0ogx1VCdTn#&KUm%W#mJECB4#CR`E&2nqzCyv2}Hy z;@G7+cFFpmG)~171g9)L0){p3k$y-YNgp6+7Xl+w@7xl-^U+SkDcmGZJK_{=62~1E zGE?XqWa|WMT9eJ|ahr`jsb}+6ZC+oVSIk=LZ2g2hwO*l7pY;otS|u!0&r!t6*Rog6 zrBKTyXCJ(BE=5`{IY-0_u!!tmu)x|P)BHB|wvi6nU>I4SiP5zw1b#K8X6Sm{60-SV zL9qNkDKJKE(7AxhQh#g+(f?4j(tF5d)ObZ`5g1Um+2TS{ZnNDIIs%U?tk7Sf)gbHl zDc%_R5{u9R3ReC#mRE@K+?$mb^n9(h6w!G5E6Tf=M$KPQ-X+M}G`Dm2`PYO=De|t` ztlpOKB=kkTRv$P}p7*b@Jkp#I`YS6!PsKd!69v$6D}t_133PQFiLy_jck`Lhy#WqF z(%$O85o|A3G{koT7|$uNBbZ2*ZR3pF_lkH3=sQymjKwnzmH|g&DAP$|@Iktv3lH%z z=sQ047DIR-hahT{aX3NF&(6h&!T9F*aZ$&2b3BT&|W_ib?Z!%Tw?42i*7LSu0=syPOkB0 zg}|2N;RcdNH2Q$=cr*fr4w%i>OBb`oTwqKJ zy)9{n-japkqt?73M+-;hzzRjD=sZAcI~Vr8LXupqOBbTnzwmZn9q67N=njr|pBd>s zKhiyR?H4s!?*_Z4e)8{0P1SxaT@Vib(4H<1#_!FA(xt?GfYt-}HefEM3xPhTi|3^9 zB(zw+F&~J;(LMf3prP?}L3}}~$Fu`OZUHi9212^;x+08yNF}n7^E(-&ZNW!f-H_c4!rj zp(jw|Sh|FDNG$C#4K8pOwH^sm4AvgJ(;9?#G1@t4Q1Bl|>4^ZC2u@{S^lc`-Os)w3 z6>^HXfG+}}Z?#ppm+pM>`X?7u*B05;z1p#QUEO*>-FisfdQ{$e_W2F@(mQ_=Q7=rW z7jDQGZZWdYU&Th*hN z<+t9JuTH9O1=Y81s&Cy&9=#Xq+g=EVG`C>?EnNsQRmV7%R&qNTaEm2vTS6b$C)M-z$anF-Duanw%c&+hF zvhj?3c0y@%ERR@+zQuU3?18RNWTK#ace)wszTtBSTA5!ZNt<|4Q z)}K`BPpkE(mt3jZ`sMNm@2%DLCu{qk?3Qc$mD>Gk?f$jeBgxt$O6@VV_82%M<|2P| z?D1=?(7kHa-nFVj$*Mz2)e*Jo2nsdl_6IwDJi_ujq}ColJ*m{5R%=f$mA`bj zh*lj)ItErJpX_`3=Cf1J&&k(<^3;qRxvMztsg8TH{>O>eGLmdf5FI{g#<+>HU|fao zgN^_VTd;x`*}qJk_Qj$^;YxNiyvd4LoG8xCp8&m37%Y5T^eM{!Owf3XC2>>z3_ar9 zG!yWjVyjII>1_r}7J!>9+HcwxOA|$Lfn%X|rH?bHrUk4%iw?6kM;WA*tXiRJ#jvJ8lAyWvRA zn2@6LfcZSI2{!7|uE}{AghjaBjr47lkaK5mx*1|o0>rV4O#sWAAW)+7?k#ki_V@hJ z{yk;=mk6-SH`%uIuC)v&TZW%(=lht94YR3Zt!W_HG_V>~nhvQ=hnC#n?{@86>l{gT zjyx-PzJvDc&Jm^as@i!~cKAN8se5qxd*{A;Zl!Vcm{PM{t=Ya*mU2|xe^+s|FO?y2 zRrONw%lf__?SI-XZyQy%je=>d)Q@5VsBDtA>`qqhmMeFs>e}S$Gb=4??R%2#d!8&P z?Z?#iV|c~?mQJKxRljlW<8#Y33Yg8VR+1388h+>O>ag50pb!EuptuH>&aIc)+|43a z@aa9ZOD?vIkEhFM8nbmgT?z#HMiiq)ES`V~F(~Ny%O7JI{hrNim}D>_bE~r(W_EeL z3pP}q?_$Z!^Ifo4@_ZL$V$lA$;8W!GnaFXm@GG@Muxav?B$zdMzKbm*&v!ut=lPCB zod0sP1covz!hl1MM^_j@fOO8-4vQ`A7G@ovBK~J0F*eP1@nB12FodxQ7D!d`P9sLJ zFafhJYe{yS#l)jDSR90rSfapX!z@(0PZd3)cSoe zgKcTIeX<4v4N7Ih>dVY3)^6?;x};q{J@D5w4<7mXWJEsmDh~^7?@86FDFOU%=n77cI$e6CLMdB z(Fi>?Z0kU&NU<1nGAS`aGti`mPMVLVDsRRi0-7U*LmE>|M{BftW}uIRyBXRMqy`y| zW~lBB_|8HUSI^B3Shk3ybv|%UqKwa*UKo=nw&j8L7Kw6i2#zjEjZL zOzq2wroqDFgw8qKkhx~&Nstm(rjiM%-f~saD5@jf0fUd(B$z57y@z@9!xCvHF4A@i zhA9v@oHT>S1s{z(6Y%bUTL=Y94y!0SexZbo%fSWRDj%cr1n|3G*t>sn7diNg)5Y;9 z4C#UPg<_NGA~Hir7e~OtjHOF8^~HtS;~d|D@}gcCX2dV-+qo)Uv2Sp2(8ut{3kx*{ zVud*o3CE^^gZo?41#`i4*|^RoWc(kA)+lKg1w$0yY`TU7r+ z)qghCy>;DLT-X1F4Z%+-SStUS%LXiUbY=X}b;gJ~YR;vr@6ma+YoFS6IMwYJ5lne^ z_94TXt;n!uD>AIvy5v}Q*=iftYIY`Tc0M`#bm93vrDj~M8DA>3#eCT`5n^nx`x2>5{vz zJRSeh~vC{Xd92iahy-(z7qwzK^qfWY7Lob^U{bYIUz#y?5PauZr4Vbap@5zdG~u#M3(HyF9! zvw>j(5#A5WO8r&C{-sBd_Tk$2ZXiChvT60(cZu?5Plqbkt0WD`XsW^eB z1*{Db2ozDvf;=oL5{5w#wjoqXA7PktE$yH;s%T+^j(2%(opaWW9PcZEQ9*VVqucGlfjOoY0lpXg>O#B3BN)- znBM_yf#kxBkY!_3=5_n4$jL0}fLB0T#>N~MPGE1F@rKNMxL8Klo&j7<)|^~^muH@ zG2juF2pEtJE^uP9%e4ZL=l{W-z#PwZkP?FG9kRe z6a@qbp@?{^SrwYU>6R@SgM|Bb0-tVnE?uF6hJlcwc+7-E3S(VLm+;E^i=}@=%M{7P z(nSOmh~)^W)EblaWY)@Gnl5Ff-nb|IfTapUoCrDO{H*k!QdWPDf)xaQ7e}fX*5snR zCqSUUI2PzB%OMmr6<*9s^aw$;->MdqKr{o@an zo>ObxuP3`-SGv!u-REUbM{4xU^Wo&^IT?TT{?&6z{XQ9gm&fiGy>vCKx!RJhw$zrM zRQq76p(W+(f6>*uTKt389=*2iENQO<2Gw5q1#l_%PbnaTN(($TSHrrkunhXx4G(5k zMwPlDwQgwX^wR0i>BV;<4*8+3sqJ51`}lM~ekDOryNW2pTh<=u*?N zxf#Wrhxx$iQ`+LV9S~I>asZyWK4x`gR_O^K`9n>2^`GM2th*}F2E9;c8q+4~=kL&9 z_8|fWbB(5-+L&}SF83;q4%N{i>wl~(ClOoxh?UBEunSDrSJ4)>A`0&!)uMf|V6kwK zp>&I-W{p!rDwnBaylBr8FB4%Y$P+Jvxd*ndU^_zlj#|WiV3VS?pNSSP7UhYTfmSUR z=ZTkrrUHr0U2cZfDUemW!8VV*X-Oq=fJD-yDHkkwTxeyr(RU)GvO*?PK8muq=3{}W-fem64Nrsy;+>=3_)hhPcM^sYQypT7Jb4wGQT7~p z;X8wYfEr*R?h}Q<(hX~Z?;ECeY?9isVd}C?QkO9(6q%rKJr5L$Hh}`ZZ%~t>VEKkM z!S@YQZ`_+jT5lG=LOUQF*svz}zClfjH)#jHZ=Ew%@pk6qRk%~DGvX2C1R zEPUmdMXwyQBs*rX;$bmqTYxC83^>eU2P>}}jCbEnP3I!$BN7qVh0b*a@6XWE%t?p= zL1C%{bR}0hprcLF0wW*B;ny@#QGh|5-45N$&`uyMV*vK)lVS453iKQ9Sc7mA03)6m zQ_Xxa*v2!5t^yI$@sj~gp^K!)&z|6K7#`yzMs-FreZhIKZ;98LBeTHdWM~dlD!0G^ ziNh>&b(L2As}8%%XLIEd;<6qikBi*sKL)h|-F!%Rc+q+xcTTs#b2rsR%ZDL|aDM@N z_i&61{WV2dJx`-fjHf^TL%T$-;=q6=H=(hg5W@<7bxEu89sWK5XH+w3hfa{C-Gp+Q zoRyGQUT9keLwCW{cV~20+Qms9-g#-a4kQhP(7~|6BVA!>7L+-du~aEo;O8(x4f;$X z(J=Huqf8}+)%P3prWh=bNHAT3Izr)8>ccyK87JlbIqs!DrQm;|Kt_;u1J4ajQiGV# zCMGHL)^j5;gJuJ3*XvnCf~gqE-1V`g8D=?%IZq!g7Pb1z?-F`ToG75TEkHMq7W|a3 z&esb5Pr6(#_&@mN3%ZQ+Z{r0}Zd-CMx2$~l)cw3ozIIa%-+R&2^00s9L#1i2+O&7c z{esxnD+;c_gxuW)5S-QA*xPdNRi*c;+Itmco>p94 zRK3b4A3nRQ?tMdk>zdq$D!rrjy`xp>#n%4EgHPL@H>vx_=hU+gGCE3jwJg6yJ+)bcpMe3<@i4J^ z@!2l9?TpfPMr}LuqNVlWxz(1Z<8sR}t{3&9tvxeN)|=0_v^{)xb>L}S-g1JQx~^9^ zJmu>)1WQE_P?E|$tN)tfU*c>I{zFi(o`RWmi}83u|;tZSf(T02D@b2ZRZo9vWEO^l?BYdkLf4_HSS zl9eDYAQ7iBJAcw*nA{F*%Jg~#k*?S}QQ%ah)xS89{= z^?A~_>*+TfI}LeKcf3;S#)Ky`_h1vybjaA@1Z%DvsGdW7k0bK})E-bKt5zoxi z2`${6?RK)-VR{DrhTC24t`d<+bM%7%v{L7x7fU|ixMemuR5t6JY%bsfA}|@ZaLmX; zx}OY+cqxcrREk%~HwBM027+!h!oXw*%Ny{W&{TK{-IhW#a6|?WXb#wj1vnMHsj<{x zI7PPa1c}T{#-^ipnBD_{hT07`YnpFeWC1&0Sdderb;OcssY<_zzFVjoqsI8g!Ljo7 zL2tu9vfw_69Lz5o$F&A#$MukZKhouamp*dGIg&1fPwIt+bKH|P+yugwi~C9?e}Na~Vu zEmO2L)NA(xI9#)qDcTa(ovYU#qPxQs9HHPS1;;3${gb1^ze{(&M**$b5-q(Ft)CJt zf)cG{5-mrPkAiLrXmyZil1p6_bW+emK`#P-4d)F0itDPmP&`2@y5tg(-B>&t(wn?a z{g@_5+5>V8mS)DBvEcWaAO97Wp;!qvZs7M`TV2zG>uRl^yDk%Yq2x}irs2VPwWd$4 z*#YY>%LCHPs&!k*WZP-zl2V$g+6Px(5TQw@393E|ppzEV;G#9-fD> zH!QrFv0CfH0kvt6nW?3k+a4Z-nylJ<1l-1qr!DOdud7>zH3ww1jYc!be(l^!_{qYv zyYgET@^nnTyC~OuQ>po;TJz1NvKQW(2mLFjpX_^9_xz^(-c5Pt8?skYyprmbmP$F3 zbKszS@-6w@+v@<8{dfT`=L+7V%VfbcU0T8gV8kK>j}{?n_qn4&u54e~uQ&!&$AIh@ zctJ1PR_YapUv>Cpho8Oh%i9hpj#1SyDmzA58sAFSqXD&JkJ@qc*>1&gR&|_}9cNkO zzSXYB1I$GK>~m!Nrs{Z8cD%_xbqdYrBZ}jw>NqMpjEG0LiO!ZeMg>kD~>a&I43*KWt4T=S_vb&Gv>?yhFhXT&?op~ z^k%wD_$G(i6?1+Z%WWAxjoD*0B!s*l@Axl6+rTc)+7fgl@cs<#4?0r@ioh%nm=c*| zX6SVwrb+8#MNq4H>2zgOPN5aMf{dF7B_ zQ-Nx8-6(BDir+djEQJa3G3iN`xj1i?tnoUxMs;D+1eSML+K{y|g zq`*DqI%oI{CJg)i7 zVMsCeQl$CI(W*l*Ae0c6>vSr?*XI;7{ooB4zPhP5)dUXNm};)X9GDh%tI_QQQa(R0tavp5On!Kam{3#+`su>&sri{1 zo*&(5>H8q_oQ6-FJAR>d%u8mcisanAbV6 z;;<4?QSRAYjBeduQNU-N0o-+btr#-G))6l64U2s0CNo#HjO+In$EE{-J8>sCo3)P5 z7t#>cv~t-}lat$8%(26NN{K(G;9nz1R~kvlz%WK@I|Dew+oXSs$AxwiD>iq54F$kL z{xRahj;i10B0oNhigSx<`0!=kdJZezBdYg^>^cHh#mn}O4=o*H?{_KQ-KuxD?AraZ zam&LkYmEn!jR&9Je0ER1d{t?@rZ!$%s(jJd{IF#uqBQPS8+R{NzI1O<-JK+no3_6g z*#7vvrx%_dQb(`J?+4|9kTMWb2SQ61mJcbetuF?L9!H+O_xz%I@Ll=FlstG-8N8_u z-o)p_ifbEkk{w$(PZ!M)KO&8Qg4({E!rkAs|J`yjw+Y&R4_cN>_9+6v@f4TQ# zYn+TzJu~ztS*fPo@fGTw;ZY?shrgnFXPA;_mTIcEz-EF@jthMaC}o{!ioU5ZFpxaH zX);bs{Tk(For(Hwdv3or);|6?W9%5vSP@u|K4$0&UL0`0&jb!hBaiSWTyu_vSI&ju zQPZ}d=kMD;E=O)=UyMS2V6KU^9g9Uv<(P?H%*3LsnOKx4f;pC`Gg?udD1t+Rns1k5 zK4#&q*3yl$r&mlkqEQoTJt(aP50j@#6I6wi!pH&j$3d<`+dFGjrv%KKfgPj9;o91k8$0lDhZ<68(z4jF$*E0G6 zw4dQ;6zi_xq6B_Mi$P7b%s(zMQBz~{H^3qkVHFzoSZpq*ggPDhC`&1neoBFY?265da=0&{zy*`^TFS}&Wh z*cjCg{ym|LU?B$xc{7n0<3Mw{Iv#2Ulohs*EF~%jCs*^}i*a9~5u#8ms-Une5siT( zh?_Uz)B%wXjm&U5c*=U>70Q=uNj7Y6wx@sv)@O?u~DnC8?fa2ajaVm&O_nbgN726GH{w1S&iy%NZBH9WyyIbhY{WB zlpKdpUoSG_YA2LOF0B*ZVnNsu_;_ymD^Ar>#W1sUk|z_&#EXX)xgcsOz_FS1MyJ&+ z^9<$aq`97_Zro}eZ@M2_%tuR)N66O-%~qIqfjrV^eRfP5^&;XjGfdN*{-->y#Hd36KziG(W_74%ema!(P?eeO~$IFEl%3vasx#K zqi7GEP{+wMz9eEz;~>td2!50uHHoE0I5jpJim3$I2vh&1#ZS{y;-qk_&rh=q1#uTO z>@D=$;U0MgkQPJYLNp$-+9<;W8#;2Pb|4$p6kqgCL_2?uw~3uD8_DHx21zsk(-56y zy^lp^22Kt2eZbl`I9OcO%1}e`J%J#`)jMPB;$n{z;R3XbgYLJNXqMqo5a>&eN8NT`F@bkNBj& zL{=9T%#N)2`Jg(l#5$`NE@r|NUJN4a_(kr7+jk)_PCGZv;Q{WK+UN*A?U`$Ay(ZGX zL-`9``XaYs*=Mb>HY-?o(oE>J( z#?pdU<7w&O0>_9+jt>E4f`jK{Z(c<(e=l8#4Io`KcJkaggxCVuv0^i9DCQn}El4lM zo`a7)j5s*n$A~K%vq=yq@c{lWzodIMtep&N!r63i3}hL1($5g*>kPO>8gQLROarcA zjQ_24J>B)}*W?42@wZ&VuL${Yt34ZGwSYbVHbOoaMtloiOCR5P9R_Qea+dKbapDvZq?bn=JeyRIK{b5b#9|nJehGkb6 z*}|#$hpMYhb#o0D;mlrZ$Y> zgA+at>}4U|vdJgcx+2VYa{M)3w!Z%}pxQ3%|jmpr&eF1ZJjwS!9SkXk#0D(v}o zIX-rDnObow*?tP*Rn`i%{o-2t+sSsga*rzQb1MDC<@vkv2X_&w?f2xwhf4d0IU9Q< z*?Qzz4R5U4dUmb#a)^ah~ za#3lyq_#j3-T2Dh!o4;3-+s`)=Iu>-dw+D}@h#cg%a03O^BzfhacUbKNeE4ZS96OF zFY_1G#(wT&{^?1%aktWVOl>^2)_6AAcvfkAU2S}QsS-fs2W9O_;&iOtr=FL{Z(NbD zz9+vQQ5;d#5ta2n&H^_v%xr|65JK_gDBk`R+KCh-%*O49%>rhVN%<=px93f5Vn^k$ z6Di=#vWVMk)StIq!mcT6iNY8BYHX&TVET;qT6gX=_zRlEvj;Vv`L0gQODquK=Ao49JII2m4 zU^2lJ6pd(xiF;wHBxs32W^|wtCPWRfTB1og8Zb2zfCAw?NI=ase`AxwQ$Qjyx~p(u9+foJ3?ss~T}R5V%8CC)qM#roQNnTj-Z8D)KlP z0C6f3Do)%2h(lP{8|2i52LHf}sejOlY(M6&WDjJb>3o43L|$@Px@km#6UHoOHOXb* zCe#k$UWLaC-369U!-jW~6*L4M;)EEC9SEuE+#BA^SLW-8A*E4{Ic!Q8hmQuksp8p{ z;r+#Dh-Pd&k)4^fP1?xC@j+U;4YOr!d9WaKlZEvat^X}{L(ebAGj zvukGV`K>kCoXf@Q#EB7JDUC4UON9aOe6oDro0)?nH&aW*(%De2%HGlkgEsVX=dMTE zJab^&G>?p7ViM0Uv|ost$FBxZOcur;!3-aS=YNLv*MON~p#0}KuAfU>&-6mmbY*@Xy6=cYHboSpysqW71(?puIr~&0o>JP+`nc!rXXpo$R!sEwW)#M1af@H(M2b-{e7Q?3Y4Y246gXWF}-nokzj+}o4x?TUL?bq~W~ zD(|o|Y>)5_OdK&9vieWpoW!j`pV@#>+E=K@vwm@6kZ3={HH%g72**_> zK`#!=F{RPd+A@?qf`#OVG$S7}x+B#Q_RB2@hly8#RUr}LFS1WA3Oq&gMc0o3OAHwJ z2h;H~on(iVU6X3iLPOAGW6y+%!3(qLR1=XeZSAI5Oe)2a)1mD?N`(D`4Rh|0jcqXe zcsn0D9Cc!=G8cVO2a23IrbC+&uMWNq`Y!5*)21>h8;UX)g3*R0KDmMm%*Ufx(cmQx z`;*9=t%VdHZ@CN1d(nB;bOxtL=nm?htG#asOAV2iHsx>;uc$hlJ2&D33O0_qGXy|H zpqY47h#oZTkeaXI33MJ|?(V_yI$18$HAxngYD`Zy+4J>|xl05t&UY^Ovdica0^=b- z78j!{L{nh&KnIcdM(CuWnSLfBg_|4es&i!;!cp=mL%epNw&-`XHAdzoIU9tQn2_Vk z0r+E&i4rrGxVMOHds333l88ZVKRd;0%ew(pCvGo6*pTiSuo4ML!jMsr_%^`sSOEco z3e@;O4ZxKQOqFXe@^we^`op7sq0Lwhz(ShXqnQ7&3Q zsyTevY_M~80->NOgR22836K?FFa5Z>qi z5j`?=$%#gfq0%%N;UD!leW=F<#T>(@D+my5tW3#&5QW8*t|w{A{4Wc_ag2F$sy$$&}N zWAzq>2;k3uT$d5Y_8{laXwPTAF8Y1?aOEe1n=S0k?f2ZOYmbbn(8 zQbyVuO)Lx=+d5&zoPw}Jy)+%+Y${(Arfojr*-Y?@m$V#iaPn+y9_k?AnZUS^xzpA? zPl5JeD$Hymn{5uFGas1tM#}xTcEBz-L^I&V!RqOtNt9$W|1@L~aHyg58f6bFAYeC1=|N-3W-nqOMQZ=Im}JqlB+D^fgSC^uA7)^IPg`3HE3$G-`OzNbO7%9i zdfSqNO!5w`_C497ZXJ16BsZT_nop|DCzl-e>l8-=cbN6IeCVROoB8hQc}MAaN9}oM z$st#DD2~pJlfQJ=FS$9*RCMxIT>-3Mh??7hw5t}sAL!f#%UHyHm zp+YUb1Y1f0)Z@v;E+X(BRB*+X1H&$K1^*>V;8Ztb>%nOj4ohs2GEt0UM=h~&K7>7C zTJBfGCZd->*G&HX3EO@9^^@%6i*h&&DThlC7tVx0Ot}8QSql~?sipkn3u8Nj^N&iS zWwF1Vs9YgPWc@zHo6p4C+>~U{@i;><9*#rYw*=(~=N!xxY0!x~w&^9@xv9yZD_9Y> zXF?OX6(D6LZI^H?;tsmmoeOs!c2|KrrnC%d)E)G)caD{uO4kW@!n7@0`vLNeOt@8N z!OfHKSm5UQmBOtmQDuT#l?Jz(M3n}&s;>mMD!{Euz^zKatxCYHO2DmZ6S!4v1h@P4 zPfjNsv9B;cEOVn=A7BXw#!{`7pRnjnc)?Go3w#gLLTpy!q!Yk5Ifm=c$4S=}TT3Pb zkR7zfH=aK7*8V{_I1d#lj#C(01#tB!G_OEF=qFx4t&zCR18|%Wni-&zX6c|TO*_=I zQN0>PZeG4Xs^&$L#<87>QZ&^!6byf(`5R`?k%S7JpNC*XlX2*}haI+8?c_`~K?CC%;nYs#-y-|1c>PdUG8VF`0>nEcKeSxOc z`)d@VCEu(R(RSJuzD6;6`lCp`Sy_6EtXmEW#R$s-!V|h14o$G3WkE0_DGQKHs4g<( z7}z2^`o=(usK@o{!MVnp@kwsuslVgwhdwT00at+K1csIV9o;9zkt#z!*L2R`uxYGs z#_i2Jtj*ZJ##ctc7}7Lkz#e0YrKZWp=bn$vJ+2R{X$x7^FW)f+8XL(B6`--4**x<* z4yIl!+AmVDK1L4u^k7x6%#&}-!;wljey{hsSdG92=*O0xF4ej&?a8cwp`*l$PrLLO zR3KMED`sVJjZ4~N{6$R7k9{=hVG`;|o_*QaFva*e-jB26nxQ%@?WXz^-UtV|@+RfS z>L?B4gS3N!ofPb%;Qz)~sD{OEBe^Sl6YBdRP(b83*vZ`a8fYv(u@7}e_dTUN?MRPf z^FqXVU`eDuq0E3_)12mKxBLUV@mF(}$2KaDfF%u3Fi6361nEjXAcPiJWej6~GGsi1 zQqmdt5Dk$V4vzWhGmU{FT`pcvrM)_g33HU!x&+ePCzR*F+2g+ z#lvX_X`J)dfQ#{((*USIH5x^;FXN}lsu+)n#x`6K-=G}%P`1db^2DhHsu+6{*d=iM z*X)H&aDl1^P=RsTnoil}MySY~WC}5^23V$&g3`Vhgz>Ld%GG<7>b*E@PIm2m+1Lzg zQ|3r`@Pt6+p|%G0&DK3nBC7x7n*V&#fBtz?@xQ0~-&?nBD?4ugf`xD)2#>QPFWU#{ ztmw93dDjW04eDbpNL=5((zJR?ZQuE1KG{C1)*W0rovLnnaB$t`sW@(b+1mZPGvA;2 zt;oa3TI;@K>%ON!xpkk?dR%Qie&31YeLL0OefOPk2JGp=Sv9yjrFc41t^T#vp=9gO z)4pe~$@?$LtwT!d8*1wtEX}r^i1wP|=}q}}s&L8B}|YDxPDi=a}p{hAK34tZZFE4MIT1jTK6Yg_roo1>wcy6gxZQC{|2<+|3V!+t@-*L+&g$> z*}weW(=NICuu^?ktv*9&%LHTZ^U!8>A4BdHKylgJlC3@ zTku?GdftNPded_&o*PWhZ5Ts7%NS}*G$oo7Er~6O)dDixyR7`f=?zbbIc<~VTOhFVtb+;WOY+u08>~XvZsNO69K3BSTQ)ZK)!smUw{Q{ z(rzP>>uuoYng%FNYZw3?$V6w1^KKA&qJ#`JPEr{J6CHYFOnM2FC>Vfzg2WVLF5>Ic zQ8-2l`8mJYM8}&dW5`W43Ili@Go~U;PPcw#iU-sfv=Vl=3k;P0c186%J zkfN!YG8pJGW4|=oM6^1YOz`tcvxNOj$_cIl!IOCFn}X)mI0;5gKvcp}OduawN!3GS z@|~EYO{f^JJAifmim&e=!fDnK!vjpWPwRnL24x7509o%*$4muq>LR)XG$k29l1AwS z3iNGb)Lc*s++mN*=a3==3flf^;(nfncY#_fFKT&lUI&P2D6|-d>adYTvBZdFLKfssFL8(t8;f*r z8nYSopsy_VtV(b#C=<{rYIc1D>&VDJ%)HB7L03FyN>t3mJNRR)Z6Y$k!c4?#3?`B& zW;BT#t+ThUpj=DG2yrh=j8l=JWNH#q87qS22v~!IM>VA10JEP0$BU*tHy*}9(Ch1C z%YY_J1S5zPKM<1M4XGs1VnHLdpWWq+#xjXVa}~?wIJzn?H>cu!M!2lE0FqZzlZpxD zUtLPZ@*ywxeY7lMMdUj1{Pk=!2uEEwauS$(ELSZ!rAvSt@j>gwtiyK)V^I^`k&7(0 zLL>U@T&{co6PzxjbxKA?aBu}{=@|2swQ(+txyU=!JnGqN;%{l^8{~kFStpk_U0aOV zdNrLR1rF&1z?dZgYdXdyX(wZzvZ<wY(o?s`#J9CIUtC5+$Xb;Cbj6GNY83qxe8}(hBG-epCV%kCV z1(n#ddG;@Jt4{`?r;ek9vGPJ;kxm998Mk#sN59jG$Oi%8N1{^V=2c|doxV#9wJ`G0 zwkx*JXq&GIEg>(6o3?<2=0Xyo3C;7%6-<(JN~qU}RbUcQ2DlP5Kq(sc70WPkIJjeI zaL9l*FY*){1za-{gV!{eX>o-1Cih&(bX*8wv{XZ91yZsYnk$irIKOPI>%Hzf z2xWrkY=}MjeLX&NT*eh0|3~@6@%s*J@DN}|HR;?f8yIf zEM3VW>P?DC#Hk1$Ip*fvY@AO=O4ppRuBljrgXwak+;q9IRi>S$?apAer=2EHq^pcA zKtINuSjVhUZgvxa7Zm6PQ7g;oh1Mh|-Rmu4AG8Ls4|;nlD3;M2;^zCSm@bJ#RJ+Mb zHW_xO>o9N634`lnSZBsay9NjT!T3x&j*ko;(uE7*IjNC2?1kvDbTPv((}foT;0*E?uTzjDm3r-b4V~5V2bG*}#D!y+tvuQ1A}wC;g8Ukaas(&~fO{N|))~7n9zl z*w-m|kAevb-lrfy!3_lIT2pBRbgi(f$T-%4fY7szf1JtRlz-`5=r(EY!$exi@o1cGgYSrCN%7-o}jj*e|2p^NqI${=gq^jFxS35C}y-P)@o?UA_Bgvi- zrDs&_fws_rvg$7=P+T3zYdOeHa+Cf0pA|i?mB~u@qS|tC={&K|Tb5r_>wCd8Z)3*U zZ4fuKq?%fn7uBYLrPtv-e5De;#Fx(He7B`t?%1Jj*|~J#Wp|(K>UisvB#gf=-rwxYVo!GeaN+R+gIC8zrQ@R7aWU1|pW4!qYVCT_*}Lla z!J$WoQvSp1wazWxbsK^&Y|g6cpHhHFJ!rAjH!tsh5QSK!wL|XQrMB)~x|nL*vOJ?U zZd-cI1k_ybTF4r{Wq9fQ%Pv?SwKM3}FSo63eRBKR$n#&5-wl5S*!HZ}LB1k;y8oJB z+y2YK_Rz+#{h|Ux>1RFgF!0H{%Uw!En_AHZ9|P{Hr82IxEQA?Y3NSUf2!u{aconOw zOyy#-y9%6w{6uW~X45MTO9TQ7qbSmKhb$OaIAs06Ac34Sehi~2f&q9`))ysl!dV5@XD1q=bg$>huWwxJ^#ysNk!yw6uC@JRMOXYAz^G3mr#*VT!7>zmWSl zAmn{9(jJMys0(uWNucx@iLFNLHeyhAczEkj(k%WG7XZNsP6sqC=?SycBF-oIW*ZJ(07x*^FyLySIo; z5X&~Og9X|5$QG-d_|2Wfzy_rXu- z>v+R5!;N`unbZBWBn7!ArwIBF!}lH-5;TZAtn~pKkJku24a}F>!%YNChAMsjyj2%bc_vnIHtMo% zv;;6{xPti;3ehsmnrjg++ry`83cVAjfjlJ&8x0V)^aGz6`= z_`<wc6bPp@;ovM4M?AV#T;0S|-he-A+D5+J;gjgB#V+iH*0j{ihP6p7?cfGQT7LyP39R1HNM+;y2-=b}%*uAsz*tgvE|Z z)`uL2sHm5?dDXQx5DZS-2uDbDluZMMqxW^p1>)1&14-`z#d}co9$YG2ci3FjzZrY*Yxif8 z&JNkxL5jJ1md-;d)8qXlv3z>9>&b4}yGD?u>HlhyanWb|WiZOgPnz->Xf+|r1`IPdn7nJcnE^n{RzNUx%uvj+Q~k9s-NNs8AczxdvDni>ZW>Pqh)UPY zoO6x~vAQtC85YuvbD-nRL?a0#-vFy%`H1ygWFv0=YL{-ROXgzC-m$iGSyKLmSfu{d zt-|DQ>jElukyha`8gt`VRnNowE-kl<6j)bZ>6>_GS#ld`AcFL&%oZSlbuF9a2kP^7lB!*g${&6jF!Np*D6JwCOxraF34oqegEt*LDT>z(Hd ziqF~CZS1nL;}L{V?Dpx7Khkeg-Mx?AT`%MjeYUo)^>%*WVe9c*Gtd(2m-mWFi+8V1 zuG?_Q47^>%3-%T1QDWUjcR9lGg0|w?C&$-qxIDZ3$M3QmnF){BZDw%HK3wcuw;@;_ zW0%!k?2;Kw*>@J#uS$o#1T#@OZgDd;MOTWb-VvzHa` zSV0-MWCnG8#rxN72so5+^Ar*Fm!`XMdrxu0edz%Jf}6|$GT_FxRLekW%l1@H?>g+m zDGr4%>lz-+tQWHvCAR9i^-^~0u+=xMm$BP&II&rGvRjv}y-Q16X)AYq(z5Pm?^#Yf zKP`N#@JvB5WOEcgd6Qq|H^%u*j&QuFqj=A17lCxo6LcIdnZX%*eeo`ef=gzwqqBH~ zj|SYV+pNJEdpo}Fcs#mp!%g0RPf(hces{Wrlc8*3Nf)ydOS!{#=9*{;kl8(Y=WrJyqR?B9A(vArTy z)GKHG{XA8Qt}C=x7(uGMc4=G6aSJj}X9>i~&XSi-*Lrb5amCN^TQ5Xpx|U;c1cY;k fU$FN-&GR)z+rjUZ_-#KdK3w<@T_c761lRu$4zj(k literal 0 HcmV?d00001 diff --git a/bitepy/data.py b/bitepy/data.py index ba8bf52..9fc9e44 100644 --- a/bitepy/data.py +++ b/bitepy/data.py @@ -10,11 +10,16 @@ import pandas as pd import numpy as np +import polars as pl from zipfile import ZipFile import os +import time +import zipfile +from io import BytesIO, StringIO +from concurrent.futures import ThreadPoolExecutor, as_completed from tqdm import tqdm from pathlib import Path -from datetime import datetime +from datetime import datetime, date, timedelta try: from ._bitepy import Simulation_cpp @@ -24,6 +29,151 @@ ) from e +# ── Constants for fast EPEX parsing ────────────────────────────────────────── + +_EPEX_DROP_COLS = [ + "LinkedBasketId", "DeliveryArea", "ParentId", "DeliveryEnd", "Currency", + "Product", "UserDefinedBlock", "RevisionNo", "ExecutionRestriction", + "CreationTime", "QuantityUnit", "Volume", "VolumeUnit", +] + +_EPEX_RENAME_MAP = { + "OrderId": "order", + "InitialId": "initial", + "DeliveryStart": "start", + "Side": "side", + "Price": "price", + "ValidityTime": "validity", + "ActionCode": "action", + "TransactionTime": "transaction", + "Quantity": "quantity", +} + +_EPEX_DEDUP_COLS = ["OrderId", "InitialId", "ActionCode", "ValidityTime", "Price", "Quantity"] + + +def _read_raw_epex_file_fast(timestamp: date, datapath: str) -> pl.DataFrame: + """Read and process a single raw EPEX zip file using Polars.""" + year = timestamp.strftime("%Y") + month = timestamp.strftime("%m") + datestr = f"Continuous_Orders-DE-{timestamp.strftime('%Y%m%d')}" + + folder = f"{datapath}/{year}/{month}" + zip_file_name = next(f for f in os.listdir(folder) if datestr in f) + csv_file_name = zip_file_name[:-4] + + with ZipFile(f"{folder}/{zip_file_name}") as zf: + raw_bytes = zf.read(csv_file_name) + + df = pl.read_csv(BytesIO(raw_bytes), skip_rows=1, infer_schema_length=10000) + + # Filter and clean + df = ( + df.unique(subset=_EPEX_DEDUP_COLS, keep="first", maintain_order=True) + .filter(pl.col("UserDefinedBlock") == "N") + .filter(pl.col("Product").is_in(["Intraday_Hour_Power", "XBID_Hour_Power"])) + .filter(pl.col("ActionCode").is_in(["A", "D", "C", "I"])) + .drop(_EPEX_DROP_COLS) + .rename(_EPEX_RENAME_MAP) + .with_columns( + pl.col("start").str.strptime(pl.Datetime("ms"), "%Y-%m-%dT%H:%M:%SZ"), + pl.col("validity").str.strptime(pl.Datetime("ms"), "%Y-%m-%dT%H:%M:%SZ", strict=False), + pl.col("transaction").str.strptime(pl.Datetime("ms"), "%Y-%m-%dT%H:%M:%S%.fZ"), + ) + ) + + # Remove iceberg orders + iceberg_ids = df.filter(pl.col("action") == "I")["initial"].unique().to_list() + df = df.filter(~pl.col("initial").is_in(iceberg_ids)) + + df = df.with_row_index("_idx") + + # Process change messages (shift instead of while loop) + c_orders = df.filter(pl.col("action") == "C")["order"].unique().to_list() + a_orders = df.filter(pl.col("action") == "A")["order"].unique().to_list() + orders_to_process = list(set(c_orders) & set(a_orders)) + + if orders_to_process: + chain = ( + df.filter( + pl.col("order").is_in(orders_to_process) + & pl.col("action").is_in(["A", "C"]) + ) + .sort("_idx") + ) + + chain = chain.with_columns( + pl.col("transaction").shift(-1).over("order").alias("_new_validity") + ) + + updates = chain.filter(pl.col("_new_validity").is_not_null()) + update_map = dict(zip(updates["_idx"].to_list(), updates["_new_validity"].to_list())) + + if update_map: + update_indices = list(update_map.keys()) + df = df.with_columns( + pl.when(pl.col("_idx").is_in(update_indices)) + .then( + pl.col("_idx").replace_strict( + update_map, default=None, return_dtype=pl.Datetime("ms") + ) + ) + .otherwise(pl.col("validity")) + .alias("validity") + ) + + c_indices = df.filter( + pl.col("order").is_in(orders_to_process) & (pl.col("action") == "C") + )["_idx"].to_list() + df = df.with_columns( + pl.when(pl.col("_idx").is_in(c_indices)) + .then(pl.lit("A")) + .otherwise(pl.col("action")) + .alias("action") + ) + + # Process cancel messages + cancel_messages = df.filter(pl.col("action") == "D") + a_orders_for_cancel = df.filter(pl.col("action") == "A")["order"].unique().to_list() + cancel_messages = cancel_messages.filter(pl.col("order").is_in(a_orders_for_cancel)) + + if not cancel_messages.is_empty(): + a_rows = ( + df.filter( + (pl.col("action") == "A") & pl.col("order").is_in(cancel_messages["order"].to_list()) + ) + .sort("transaction", "_idx") + .unique(subset=["order"], keep="last", maintain_order=True) + .select("order", "_idx") + .rename({"_idx": "_a_idx"}) + ) + + merged = cancel_messages.select("order", "transaction", "_idx").join(a_rows, on="order") + + update_map = dict(zip(merged["_a_idx"].to_list(), merged["transaction"].to_list())) + if update_map: + df = df.with_columns( + pl.when(pl.col("_idx").is_in(list(update_map.keys()))) + .then( + pl.col("_idx").replace_strict( + update_map, default=None, return_dtype=pl.Datetime("ms") + ) + ) + .otherwise(pl.col("validity")) + .alias("validity") + ) + + df = df.filter(pl.col("action") != "D").drop("order", "action", "_idx") + + df = df.with_columns( + pl.col("start").dt.strftime("%Y-%m-%dT%H:%M:%SZ"), + pl.col("transaction").dt.strftime("%Y-%m-%dT%H:%M:%S.%3fZ"), + pl.col("validity").dt.strftime("%Y-%m-%dT%H:%M:%S.%3fZ"), + ) + + return df + + class Data: def __init__(self): """Initialize a Data instance.""" @@ -467,6 +617,130 @@ def parse_market_data(self, start_date_str: str, end_date_str: str, marketdatapa print("\nWriting CSV data completed.") + def parse_market_data_fast(self, start_date_str: str, end_date_str: str, marketdatapath: str, + savepath: str, market_type: str, max_workers: int = 4, verbose: bool = True): + """ + Fast version of parse_market_data using Polars + parallel raw file reading. + + Same interface as parse_market_data, but ~2x faster for EPEX data (2021+ format). + Currently supports EPEX only. Falls back to parse_market_data for NordPool. + + Args: + start_date_str (str): Start date in format "YYYY-MM-DD" + end_date_str (str): End date in format "YYYY-MM-DD" + marketdatapath (str): Path to market data folder with yearly/monthly subfolders + savepath (str): Directory where processed CSV files will be saved + market_type (str): "EPEX" or "NordPool" (NordPool falls back to original method) + max_workers (int, optional): Number of parallel threads for reading raw files. Defaults to 4. + verbose (bool, optional): Print progress messages. Defaults to True. + """ + if market_type != "EPEX": + return self.parse_market_data(start_date_str, end_date_str, marketdatapath, savepath, market_type, verbose) + + os.makedirs(savepath, exist_ok=True) + + start = date.fromisoformat(start_date_str) + end = date.fromisoformat(end_date_str) + + if start > end: + raise ValueError("Error: Start date is after end date.") + if start.year < 2020: + raise ValueError("Error: Years before 2020 are not supported.") + + # We need raw files from start through end + 1 day (parsed file of end_date will also be complete, unless end_date + 1 does not exist) + all_raw_dates = [] + d = start + while d <= end + timedelta(days=1): + all_raw_dates.append(d) + d += timedelta(days=1) + + # Parallel read of all raw files + raw_data: dict[date, pl.DataFrame] = {} + t0 = time.time() + + with ThreadPoolExecutor(max_workers=max_workers) as executor: + futures = { + executor.submit(_read_raw_epex_file_fast, dt, marketdatapath): dt + for dt in all_raw_dates + } + for future in as_completed(futures): + dt = futures[future] + try: + raw_data[dt] = future.result() + if verbose: + print(f" Read raw file for {dt} ({raw_data[dt].shape[0]} rows)") + except Exception as e: + print(f" ERROR reading {dt}: {e}") + raw_data[dt] = pl.DataFrame( + schema={ + "initial": pl.Int64, "side": pl.Utf8, "start": pl.Utf8, + "transaction": pl.Utf8, "validity": pl.Utf8, + "price": pl.Float64, "quantity": pl.Float64, + } + ) + + if verbose: + print(f"All raw files read in {time.time() - t0:.1f}s") + + # Sequential: combine adjacent days, group by transaction_date, save + target_dates = [] + d = start + while d <= end: + target_dates.append(d) + d += timedelta(days=1) + + df2 = pl.DataFrame() + for dt1 in target_dates: + dt2 = dt1 + timedelta(days=1) + + df1 = df2 if not df2.is_empty() else raw_data.get(dt1, pl.DataFrame()) + df2 = raw_data.get(dt2, pl.DataFrame()) if dt2 <= end + timedelta(days=1) else pl.DataFrame() + + frames = [f for f in [df1, df2] if not f.is_empty()] + if not frames: + continue + df = pl.concat(frames) + + df = df.with_columns( + pl.col("transaction").str.slice(0, 10).alias("transaction_date"), + pl.col("price").cast(pl.Float64).round(2), + pl.col("quantity").cast(pl.Float64).round(1), + ) + + target_str = dt1.isoformat() + group = df.filter(pl.col("transaction_date") == target_str) + + if group.is_empty(): + if verbose: + print(f" No data for {dt1}, skipping") + continue + + group = ( + group.sort("transaction") + .with_row_index("") + .drop("transaction_date") + ) + + group = group.with_columns(pl.col("validity").fill_null("")) + + group = group.select(["", "initial", "side", "start", "transaction", "validity", "price", "quantity"]) + + daily_filename = f"orderbook_{dt1}.csv" + zip_path = f"{savepath}{daily_filename}.zip" + + buf = StringIO() + group.write_csv(buf) + csv_bytes = buf.getvalue().encode("utf-8") + + with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: + zf.writestr(daily_filename, csv_bytes) + + if verbose: + print(f" Saved {dt1}: {group.shape[0]} rows") + + if verbose: + print(f"\nTotal time: {time.time() - t0:.1f}s") + def create_bins_from_csv(self, csv_list: list, save_path: str, verbose: bool = True): """ Convert zipped CSV files of pre-processed order book data into binary files. diff --git a/pyproject.toml b/pyproject.toml index c1c3916..37603ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ classifiers = [ dependencies = [ "numpy>=1.16.0", "pandas>=0.24.0", + "polars>=1.0.0", "matplotlib>=3.0.0", "tqdm>=4.0.0", ] From 0bed902c6dfe30ea40fb8afd7199ea2655f07baf Mon Sep 17 00:00:00 2001 From: DimitriBez Date: Thu, 2 Apr 2026 15:40:34 +0200 Subject: [PATCH 2/2] New parser logic --- bitepy/data.py | 234 ++++++++++++++++++++++++++++++------------------- 1 file changed, 144 insertions(+), 90 deletions(-) diff --git a/bitepy/data.py b/bitepy/data.py index 9fc9e44..c2681ba 100644 --- a/bitepy/data.py +++ b/bitepy/data.py @@ -8,6 +8,8 @@ # Licensed under MIT License, see https://opensource.org/license/mit ###################################################################### +import gc +import multiprocessing import pandas as pd import numpy as np import polars as pl @@ -51,8 +53,13 @@ _EPEX_DEDUP_COLS = ["OrderId", "InitialId", "ActionCode", "ValidityTime", "Price", "Quantity"] +_EPEX_PRODUCT_FILTERS = { + "Hourly": ["Intraday_Hour_Power", "XBID_Hour_Power"], + "Quarter-Hourly": ["Intraday_Quarter_Hour_Power", "XBID_Quarter_Hour_Power"], +} + -def _read_raw_epex_file_fast(timestamp: date, datapath: str) -> pl.DataFrame: +def _read_raw_epex_file_fast(timestamp: date, datapath: str, product: str = "Hourly") -> pl.DataFrame: """Read and process a single raw EPEX zip file using Polars.""" year = timestamp.strftime("%Y") month = timestamp.strftime("%m") @@ -71,7 +78,7 @@ def _read_raw_epex_file_fast(timestamp: date, datapath: str) -> pl.DataFrame: df = ( df.unique(subset=_EPEX_DEDUP_COLS, keep="first", maintain_order=True) .filter(pl.col("UserDefinedBlock") == "N") - .filter(pl.col("Product").is_in(["Intraday_Hour_Power", "XBID_Hour_Power"])) + .filter(pl.col("Product").is_in(_EPEX_PRODUCT_FILTERS[product])) .filter(pl.col("ActionCode").is_in(["A", "D", "C", "I"])) .drop(_EPEX_DROP_COLS) .rename(_EPEX_RENAME_MAP) @@ -88,7 +95,7 @@ def _read_raw_epex_file_fast(timestamp: date, datapath: str) -> pl.DataFrame: df = df.with_row_index("_idx") - # Process change messages (shift instead of while loop) + # Process change messages (shift instead of while loop) c_orders = df.filter(pl.col("action") == "C")["order"].unique().to_list() a_orders = df.filter(pl.col("action") == "A")["order"].unique().to_list() orders_to_process = list(set(c_orders) & set(a_orders)) @@ -132,7 +139,7 @@ def _read_raw_epex_file_fast(timestamp: date, datapath: str) -> pl.DataFrame: .alias("action") ) - # Process cancel messages + # Process cancel messages cancel_messages = df.filter(pl.col("action") == "D") a_orders_for_cancel = df.filter(pl.col("action") == "A")["order"].unique().to_list() cancel_messages = cancel_messages.filter(pl.col("order").is_in(a_orders_for_cancel)) @@ -174,6 +181,116 @@ def _read_raw_epex_file_fast(timestamp: date, datapath: str) -> pl.DataFrame: return df +def _read_raw_files_parallel( + dates: list[date], + marketdatapath: str, + product: str = "Hourly", + max_workers: int = 2, + verbose: bool = False, +) -> dict[date, pl.DataFrame]: + """Read a batch of raw EPEX files in parallel. Returns dict of date -> DataFrame.""" + empty_schema = { + "initial": pl.Int64, "side": pl.Utf8, "start": pl.Utf8, + "transaction": pl.Utf8, "validity": pl.Utf8, + "price": pl.Float64, "quantity": pl.Float64, + } + raw_data: dict[date, pl.DataFrame] = {} + + with ThreadPoolExecutor(max_workers=max_workers) as executor: + futures = { + executor.submit(_read_raw_epex_file_fast, dt, marketdatapath, product): dt + for dt in dates + } + for future in as_completed(futures): + dt = futures[future] + try: + raw_data[dt] = future.result() + if verbose: + print(f" Read raw file for {dt} ({raw_data[dt].shape[0]} rows)") + except Exception as e: + print(f" ERROR reading {dt}: {e}") + raw_data[dt] = pl.DataFrame(schema=empty_schema) + + return raw_data + + +def _save_day(dt1: date, raw_data: dict[date, pl.DataFrame], savepath: str, verbose: bool): + """Combine raw files for dt1 and dt1+1, filter to transaction date dt1, and save.""" + dt2 = dt1 + timedelta(days=1) + df1 = raw_data.get(dt1, pl.DataFrame()) + df2 = raw_data.get(dt2, pl.DataFrame()) + + frames = [f for f in [df1, df2] if not f.is_empty()] + if not frames: + if verbose: + print(f" No data for {dt1}, skipping") + return + + df = pl.concat(frames) + + df = df.with_columns( + pl.col("transaction").str.slice(0, 10).alias("transaction_date"), + pl.col("price").cast(pl.Float64).round(2), + pl.col("quantity").cast(pl.Float64).round(1), + ) + + group = df.filter(pl.col("transaction_date") == dt1.isoformat()) + + if group.is_empty(): + if verbose: + print(f" No data for {dt1}, skipping") + return + + group = ( + group.sort("transaction") + .with_row_index("") + .drop("transaction_date") + ) + + group = group.with_columns(pl.col("validity").fill_null("")) + group = group.select(["", "initial", "side", "start", "transaction", "validity", "price", "quantity"]) + + daily_filename = f"orderbook_{dt1}.csv" + zip_path = f"{savepath}{daily_filename}.zip" + + buf = StringIO() + group.write_csv(buf) + csv_bytes = buf.getvalue().encode("utf-8") + + with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: + zf.writestr(daily_filename, csv_bytes) + + if verbose: + print(f" Saved {dt1}: {group.shape[0]} rows") + + +def _process_chunk( + chunk_start: date, + chunk_end: date, + marketdatapath: str, + savepath: str, + product: str, + max_workers: int, + verbose: bool, +): + """Process a single chunk in a subprocess. All memory is freed when this exits.""" + raw_dates = [] + d = chunk_start + while d <= chunk_end + timedelta(days=1): + raw_dates.append(d) + d += timedelta(days=1) + + if verbose: + print(f"\n Chunk {chunk_start} to {chunk_end} ({len(raw_dates)} raw files)") + + raw_data = _read_raw_files_parallel(raw_dates, marketdatapath, product, max_workers, verbose) + + d = chunk_start + while d <= chunk_end: + _save_day(d, raw_data, savepath, verbose) + d += timedelta(days=1) + + class Data: def __init__(self): """Initialize a Data instance.""" @@ -618,25 +735,34 @@ def parse_market_data(self, start_date_str: str, end_date_str: str, marketdatapa print("\nWriting CSV data completed.") def parse_market_data_fast(self, start_date_str: str, end_date_str: str, marketdatapath: str, - savepath: str, market_type: str, max_workers: int = 4, verbose: bool = True): + savepath: str, market_type: str, product: str = "Hourly", + max_workers: int = 2, chunk_size: int = 3, verbose: bool = True): """ Fast version of parse_market_data using Polars + parallel raw file reading. Same interface as parse_market_data, but ~2x faster for EPEX data (2021+ format). Currently supports EPEX only. Falls back to parse_market_data for NordPool. + Processes the date range in chunks of `chunk_size` days to limit memory usage. + Each chunk runs in a subprocess so memory is truly freed by the OS between chunks. + Args: start_date_str (str): Start date in format "YYYY-MM-DD" end_date_str (str): End date in format "YYYY-MM-DD" marketdatapath (str): Path to market data folder with yearly/monthly subfolders savepath (str): Directory where processed CSV files will be saved market_type (str): "EPEX" or "NordPool" (NordPool falls back to original method) - max_workers (int, optional): Number of parallel threads for reading raw files. Defaults to 4. + product (str, optional): "Hourly" or "Quarter-Hourly". Defaults to "Hourly". + max_workers (int, optional): Number of parallel threads for reading raw files. Defaults to 2. + chunk_size (int, optional): Number of target days per chunk. Defaults to 3. verbose (bool, optional): Print progress messages. Defaults to True. """ if market_type != "EPEX": return self.parse_market_data(start_date_str, end_date_str, marketdatapath, savepath, market_type, verbose) + if product not in _EPEX_PRODUCT_FILTERS: + raise ValueError(f"Unknown product '{product}'. Must be one of: {list(_EPEX_PRODUCT_FILTERS.keys())}") + os.makedirs(savepath, exist_ok=True) start = date.fromisoformat(start_date_str) @@ -647,96 +773,24 @@ def parse_market_data_fast(self, start_date_str: str, end_date_str: str, marketd if start.year < 2020: raise ValueError("Error: Years before 2020 are not supported.") - # We need raw files from start through end + 1 day (parsed file of end_date will also be complete, unless end_date + 1 does not exist) - all_raw_dates = [] - d = start - while d <= end + timedelta(days=1): - all_raw_dates.append(d) - d += timedelta(days=1) - - # Parallel read of all raw files - raw_data: dict[date, pl.DataFrame] = {} t0 = time.time() - with ThreadPoolExecutor(max_workers=max_workers) as executor: - futures = { - executor.submit(_read_raw_epex_file_fast, dt, marketdatapath): dt - for dt in all_raw_dates - } - for future in as_completed(futures): - dt = futures[future] - try: - raw_data[dt] = future.result() - if verbose: - print(f" Read raw file for {dt} ({raw_data[dt].shape[0]} rows)") - except Exception as e: - print(f" ERROR reading {dt}: {e}") - raw_data[dt] = pl.DataFrame( - schema={ - "initial": pl.Int64, "side": pl.Utf8, "start": pl.Utf8, - "transaction": pl.Utf8, "validity": pl.Utf8, - "price": pl.Float64, "quantity": pl.Float64, - } - ) - - if verbose: - print(f"All raw files read in {time.time() - t0:.1f}s") - - # Sequential: combine adjacent days, group by transaction_date, save - target_dates = [] - d = start - while d <= end: - target_dates.append(d) - d += timedelta(days=1) - - df2 = pl.DataFrame() - for dt1 in target_dates: - dt2 = dt1 + timedelta(days=1) + # Process in chunks, each in a subprocess so memory is truly freed + chunk_start = start + while chunk_start <= end: + chunk_end = min(chunk_start + timedelta(days=chunk_size - 1), end) - df1 = df2 if not df2.is_empty() else raw_data.get(dt1, pl.DataFrame()) - df2 = raw_data.get(dt2, pl.DataFrame()) if dt2 <= end + timedelta(days=1) else pl.DataFrame() - - frames = [f for f in [df1, df2] if not f.is_empty()] - if not frames: - continue - df = pl.concat(frames) - - df = df.with_columns( - pl.col("transaction").str.slice(0, 10).alias("transaction_date"), - pl.col("price").cast(pl.Float64).round(2), - pl.col("quantity").cast(pl.Float64).round(1), - ) - - target_str = dt1.isoformat() - group = df.filter(pl.col("transaction_date") == target_str) - - if group.is_empty(): - if verbose: - print(f" No data for {dt1}, skipping") - continue - - group = ( - group.sort("transaction") - .with_row_index("") - .drop("transaction_date") + p = multiprocessing.Process( + target=_process_chunk, + args=(chunk_start, chunk_end, marketdatapath, savepath, product, max_workers, verbose), ) + p.start() + p.join() - group = group.with_columns(pl.col("validity").fill_null("")) - - group = group.select(["", "initial", "side", "start", "transaction", "validity", "price", "quantity"]) - - daily_filename = f"orderbook_{dt1}.csv" - zip_path = f"{savepath}{daily_filename}.zip" - - buf = StringIO() - group.write_csv(buf) - csv_bytes = buf.getvalue().encode("utf-8") - - with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: - zf.writestr(daily_filename, csv_bytes) + if p.exitcode != 0: + raise RuntimeError(f"Chunk {chunk_start} to {chunk_end} failed (exit code {p.exitcode})") - if verbose: - print(f" Saved {dt1}: {group.shape[0]} rows") + chunk_start = chunk_end + timedelta(days=1) if verbose: print(f"\nTotal time: {time.time() - t0:.1f}s")