From 321a54c20cd3fd6a947d59ac312ec682e4c24256 Mon Sep 17 00:00:00 2001
From: Hedgehog-Jr <48445528+Hedgehog-Jr@users.noreply.github.com>
Date: Tue, 1 Oct 2024 14:15:04 +0000
Subject: [PATCH] Pending changes exported from your codespace

---
 dict_vectorizer.bin                           | Bin 0 -> 13007 bytes
 docker-compose.yml                            |  10 ++
 mlflow.dockerfile                             |  12 ++
 mlops/metadata.yaml                           |   1 +
 mlops/module_3/.gitignore                     |  14 ++
 mlops/module_3/__init__.py                    |   0
 mlops/module_3/charts/__init__.py             |   0
 mlops/module_3/custom/__init__.py             |   0
 mlops/module_3/data_exporters/__init__.py     |   0
 .../data_exporters/export_titanic_clean.py    |  16 +++
 mlops/module_3/data_exporters/save.py         |  35 +++++
 mlops/module_3/data_loaders/__init__.py       |   0
 mlops/module_3/data_loaders/load.py           |  28 ++++
 mlops/module_3/data_loaders/load_titanic.py   |  27 ++++
 mlops/module_3/data_s/data_loaders/load.py    |  29 ++++
 mlops/module_3/dbt/profiles.yml               |   9 ++
 mlops/module_3/extensions/__init__.py         |   0
 mlops/module_3/interactions/__init__.py       |   0
 mlops/module_3/io_config.yaml                 | 134 ++++++++++++++++++
 mlops/module_3/metadata.yaml                  |  55 +++++++
 mlops/module_3/pipelines/__init__.py          |   0
 .../pipelines/example_pipeline/__init__.py    |   0
 .../pipelines/example_pipeline/metadata.yaml  |  30 ++++
 mlops/module_3/pipelines/module_3/__init__.py |   0
 .../pipelines/module_3/interactions.yaml      |   2 +
 .../module_3/pipelines/module_3/metadata.yaml |  99 +++++++++++++
 mlops/module_3/requirements.txt               |   0
 mlops/module_3/s/transformers/transform.py    |  45 ++++++
 mlops/module_3/scratchpads/__init__.py        |   0
 mlops/module_3/transformers/__init__.py       |   0
 .../transformers/fill_in_missing_values.py    |  45 ++++++
 mlops/module_3/transformers/train.py          |  49 +++++++
 mlops/module_3/transformers/transform.py      |  45 ++++++
 mlops/module_3/utils/__init__.py              |   0
 mlops/settings.yaml                           |   2 +
 .../pipelines/module_3/__init__.py            |   0
 .../pipelines/module_3/metadata.yaml          |   6 +
 37 files changed, 693 insertions(+)
 create mode 100644 dict_vectorizer.bin
 create mode 100644 mlflow.dockerfile
 create mode 100755 mlops/module_3/.gitignore
 create mode 100755 mlops/module_3/__init__.py
 create mode 100755 mlops/module_3/charts/__init__.py
 create mode 100755 mlops/module_3/custom/__init__.py
 create mode 100755 mlops/module_3/data_exporters/__init__.py
 create mode 100755 mlops/module_3/data_exporters/export_titanic_clean.py
 create mode 100644 mlops/module_3/data_exporters/save.py
 create mode 100755 mlops/module_3/data_loaders/__init__.py
 create mode 100644 mlops/module_3/data_loaders/load.py
 create mode 100755 mlops/module_3/data_loaders/load_titanic.py
 create mode 100644 mlops/module_3/data_s/data_loaders/load.py
 create mode 100755 mlops/module_3/dbt/profiles.yml
 create mode 100755 mlops/module_3/extensions/__init__.py
 create mode 100755 mlops/module_3/interactions/__init__.py
 create mode 100755 mlops/module_3/io_config.yaml
 create mode 100755 mlops/module_3/metadata.yaml
 create mode 100755 mlops/module_3/pipelines/__init__.py
 create mode 100755 mlops/module_3/pipelines/example_pipeline/__init__.py
 create mode 100755 mlops/module_3/pipelines/example_pipeline/metadata.yaml
 create mode 100755 mlops/module_3/pipelines/module_3/__init__.py
 create mode 100644 mlops/module_3/pipelines/module_3/interactions.yaml
 create mode 100755 mlops/module_3/pipelines/module_3/metadata.yaml
 create mode 100755 mlops/module_3/requirements.txt
 create mode 100644 mlops/module_3/s/transformers/transform.py
 create mode 100755 mlops/module_3/scratchpads/__init__.py
 create mode 100755 mlops/module_3/transformers/__init__.py
 create mode 100755 mlops/module_3/transformers/fill_in_missing_values.py
 create mode 100644 mlops/module_3/transformers/train.py
 create mode 100644 mlops/module_3/transformers/transform.py
 create mode 100755 mlops/module_3/utils/__init__.py
 create mode 100755 mlops/unit_3_observability/pipelines/module_3/__init__.py
 create mode 100755 mlops/unit_3_observability/pipelines/module_3/metadata.yaml

diff --git a/dict_vectorizer.bin b/dict_vectorizer.bin
new file mode 100644
index 0000000000000000000000000000000000000000..738219ef611d33a2959cc2ede19fb4e270c2cc1e
GIT binary patch
literal 13007
zcmZu&Wtdh~)MXDPqO{!&qT|e&RxvSfgJ6J(;{b!3O$<&TpeUHAWAb8mcXtPNcXxMp
zeRrMr`=0N)XTAPe`|P{Uxo6#d&wZEsVdi&Ryd-pq|8)*+n>wwzsdYyA<mRUKj@IVJ
z=GpD7O%vOvw9F`PoHS)(d*iI;iR~?|Q|30e7CQS3&oblwKe4DVWI<tGVQ^>nN$qoH
zHWxa(&*+#wb55bN=j3TEP3^VSg++zVUTw`Yn_8Q)MGBoVtkBt`EgQBq7Z!GQYint3
z&#=$`-F!yV^yaq4!uV{(KEp?kY?;`Ejf@yRtg_JAH=n2|boNU!B~sbElodEt%c(j}
z)pM$Ww#7HBERo_pR94aImDQZ8;S_Jak~d$;o3G@}^R@y%)4+N{iHWQyu%5tr0_zE^
zC$OHtdUy|2tfz|gRI#3_5^J%ZD%MlQda77Y73-;DJ$%Dz)>F-Ts##Ap>!~iWJnN}u
zJ=LtIn)Ot(9$vkM_0+JQ8rD<8dTLltO^G|Qo*LFu!+L614=+{AdTLouE$gXeJ=xbZ
z{g~CVp4t+dU_G_0r<V2bygJrX$9n2mPaW&2V?A}Or;hd1mDm^Ssbf7%s%Jg*tf!v!
z)U%#?)>F@V>RC@c>!~lXRo0Vbl3%w5*3-ax8dy&Q>uF#;4Xmeu^)#@ahD=ZX7d_iT
z{uiBpYC;962^FL!RFIlbL25z;sR<RNCRC7`P(f-!1yv_hP<8lsGE}l2b_b!7^{_h#
z><&UD>#1Zt><$9EgTU^9f8;}nl9f<hY9iN%-9cb?5ZE0Ab_apoL11?fcn|Cj0=t92
z?jW!`2vz*Pu{#Lt4g$M_!0sThJ1Fr0*&PIS2Z7x|V0RGM9fWFr-`E`lb_apoL11@K
zqWbI(0=t92?jW!`2<#35yMs`}=ZD=vV0RGM9h7)=*&PIS2Z7x|V0RGM9R#?8<QtoP
z%aTm?DN8chmn{FO&JIeRf^(8&vd?mop$|L`Ny5p_NRpu~<Afv}5`puPa7YB*3Wp;<
z31=h8kO-WNgu^S72po%K7Ky;2NH`>d7RHIlPr`vnG9&`WA>oh+9EOBLB5)KE4vD})
z$iGgs430s58JvP7Ln3en5)O&r699Xh%pwsm#|ei-&?GqH{3ICTBts%#ixUosfGJKm
zBm$N=;gARz;(R5D0y~^v25vaXkO+9;ghL|WgcA;lfDcYMyh9R^CGwNtfs+i0fCElA
zBm(|7;gATp--JUV;C&MgM2s1m^tbYfOhmsVQzDfKOIew+G?i^KO=Y`GQ`t7t6rOE<
z6MVzyU=tGWAv)NEl|A3&hS9+$EZ%%{unCJdA02F-#oLMwHX*UZ=wK68_PQjSj|MDZ
zWiL$1Vm;BpCKFjtbg+3A?;$$agv5HHgH2c^*5dj^2b)Y}J<-7?EY=epY@Wq8j1D#-
zv7YE)6Bg@<4mM$NeWHU+SbSKbgH2d`Sa|j5VDs~$gH2NWqDBXsu=sIB2b-|CKGDG@
zEUr&<uz40Q6&-9s;`&4fo3OY((ZMDxeuA;Tm9V%zvA>nDxIR2D_P6r$Vt*@1aeZQc
zD`9bcVt*@P@k5RMt%Sw(iT$lSi%GG+m5{hTvA>nDxIVGJm9V%zvA>nD_&LY^R>I=?
zWSQja8T(uLd9lBhq_{q@zm>4KKC!=*u(&?4zm>4~(Klqf&;Q70yUPEX@~=;^<&|5z
z*z!tJB(T`>N?25#Vt*@PQFV&_t%OC@iJhK2DfYLL6uoW5{#L@Gx2@RUN?5EX_O}ui
z>xuoXgvEOJnU<(e3DxWlLhLQ&Z^-VT*xyQ6^tKiITM3Kawqk!P&*Epw?x5J;O6Ku{
zi2bdEMW-jQJ1F+Il8IcO*xyQ6Tpzw6yMtnXE58)GgJORxVU<{(-#2y##r{^Z7S|{C
zxAH7ro!vpPzm?46`mj4F_P3IWCGN=Y8@q#Ie=Avw>%&X2J1F+I@=LKhDE7A!7S|{K
z-+i8yy<5pAHY-Jv$#$OqE@uZNPl^4l+`?!BC!DMbNru<S_MC9C5+s@Iu_YN25&K*D
z{g4Q{RqSu&CdK|%K0_j6e=E=7l}SYGZ{=r^h}hrCb4Ua&9Q#|jNwL3`&ya}N-^z1H
zMC@<nIaG<*-^z37bz++w+gZ6;v7MFA(4J#EE6<@l^9vu_S@~J?;A1;0&!GoTlVUq7
zHz~HW@);5l+gW)IiHPm2Jck~9Y-i;;^x$JVE5{*9Y-i;ubdKSD^XD(Nv+}b@L~Li}
zIV2*sv+^9?Aypzv<hK*sS-EB4coPnZfZt6xBm!<X;gAS;-GozLXzuJiEBm)u6FR0f
zwa#h8KLGP9q)V4kXNtY7kNBpDhl-u6C;BkN<ziEHUwcI#_5U#bUaUsf_pyk#7vB#!
zTI@}I%HKu&oY-zU+}A+dPwWeQ)2~E)me>P&qmMwmso3^f=er}`PV8bW_#23y6x&iO
z{2|0Qi><25`i+P$5_?CVwOw=@yFgFzm5^In?0fyds}Zj!cB!7`Ya;G1c8;Fl7a%@G
z>~_7vUet%}NBzj|(yOhXie0VeSbr_^Wy&(nKVmQG5*w+*{BO+pM(lpQ!EZr)so37C
zei`D^#Ts-&`(3~ApD_Md>}kE%yJxZf>dPV3Lu@nM$S*{Es@PcF-hR<f{XC3M62Anv
zSZsY=!JkHauh?yRx!s{x`W6^(BK{WeY4JUPJBoGcJo{4L@&=4I6kAz)+e3P@-Jlox
z2gtoBcD|nM+aazMTTXk}y?ULUrN{d?<aQ97p^e@vi}g=CLyxs<^*rATmv$0g0=!l1
z4SmutMqCiz5?CR&iZ0`qBR)fHqV8@_>fLscF7iK-`%3IL{oKyelWcR{*tbHiQfyNl
zYO8Bs+fHk33*E#H)Cs-|u5BYWPIvG}5Z@;Dpx)%aBK}N#YhVy-(p~KmUF^NHvHtDd
z0KeGAy1M<V-}yFRRf%n*Rd%0V?;Byfn%EV3rv0to*cjc~?$oR7HGSNk*9Wa$hxq2;
zY%Dfe_wXKA?l)fwCI3VGVO+XJY-<g+t8Qz9btS(FxwFN-)%Q$wbK6UIvQfH){h?p_
z@wj$~*hadV3zquPdjfv<K7jx1Eq&U@fV8!EKcI*W((b-3;%f1OfF^d8o^7?dfi2df
z?HHZyuOa`q*t`0i&C&y`phwwEZSof6_ZK@z&4R9D&+7elx-PUI^h2AcGi;ca+c)~I
zp9$V^VoUT^-wE+%;(r0Z7QYC%NbE2D+P>1aZMIJF>yf)a>_R=&KGc`&1%1di(KYQd
zJ>7fZ;ruCfnjYh?fcL2QNx=DH$Lk^9C5!b3J4xsJLZs)2?XKI|je3!vjd7>=(xCk1
z1F=%KGP_RCx8Yi2i}Xl4Sr=HFPO>Al!$#<)_Ke<VjXK6FF?DURopduhQ4jM6F}_Ls
z7vQI2x9Fv|p{`=@>GM{ngY7$g-zs%&Ytj9!Lf5iW^l<M4cb?ewdVzP%V*S~U)`RR!
zJ<d+hLv2qT;X^=KS!`Wh&JNWUo38uWAv)de)@$q|ec4~Y^@qe?2R<RTfv)KNA)~C!
zcGfNJN<GV-(+9j6*T;!nu4mW*x~C1)Znl+H+WY!~wQIBeq#yeXa2mzd)`7NA=h&b6
zm3^$Q*w^}w&DR6%Se<L*b!U52Z?|e)&jt7SQEUzE=eJ_+W#SdUwZs-^s~x4Y>{tEF
z2O-y8{88ZT;>%{J|Jo!S>s_(L&tk1Q(e}_CJ%F-~Sh=p@-y{A&d=c<SvA){H>ktnX
zUn)cW!{=b=VjVi!4@Nvq>|wpdj?i{nQG3}VdYifK<I8~lk9Wh}ei8oz)Gx)?1`ZT|
z9C)XAA6)-W>}UPNPeyKm*i(9s_eB2pY~#Rx#gD+XcJWn#%Zi^1JW>2J;OpXN0go3y
z0eGnReZcF*Yk?bx_r*G0%KQwBj}`wA_>%aEz{A8p2EHP8k6vp#YJ-o&c$nDP+UdU|
z{zCjV;N{|{0*??M1>8bxg6`%oBYs4DJaA|6=YbE3ZwA~*Y(E|ChIlXWYk}v9zXV(&
zHbA@jk%&9Qe*}Ic{w(l*@mGP5iH+8+d;;R##18=ODSkfiWbuE2--*8md|s?d*Y%qa
zUo19MSM}2oFBCgm+k8Ia1H~P<kNE1qzTyW0Cx}-8*A*`Sj}qS-sN(wqM~Qa;CyU<&
zyjuJ#;M-!`YPGM2czLmXbfj;Hcop%{z^%kT0=_JMEO4&)VBkvPM*|NM`#@jxGSuiN
z@gI=;P`n;EMEpA7`QpC;KNlOKEBh0O?-Dy$r}>kJ?-suwc!T&pz>(tH1M9?p27V&G
z0&pqu9=OkM;<GWHBK{8WS@Ck<8se7%PZR$d_>TA?!0F=41AB@;1H3Q$dSe|a^Yt-a
zLA(t(Nql4A>f-%@W#Uc1UB%|=R9jD%x5xEPTSo_3vySsdTpJ@^16*HhEgj%5B7Ru>
zIN-tJ*8tBI{}%Y3_-?@A;_Cnhi7y5oEq)L1TCwG|r?(=WD84IjTd~*m3A;%z_6IP&
zQS3kc-k#9A{1}X9i{A{qB>OBw(_i9uV|<PHaA1x26yV<CbAeOE4+pl1neJt;>SH!d
z$J<J}v@7zPi+=}vUwlVkgZP`kr^NRJju1Oe5B9!@yNG?Nulc8lUlacZ_^$Yhz+U1j
z1AB{~0z6#&FyKt_fxvF!?*m^D+d=F76U48It*QNeDB@McdqdCPVsGm+wy%!z4Uk_^
z>>vHsR?|Lqn9j7%^$q_A`ESKe)g!zM=KLUbjh^f0Ab*0`V|s@#K-?;Jt6t{Q5RVt1
z3fx!xZQwKFI{@p&2VsrwWqvQl*NIOA?k+wHc!1a^`l{Wmm)K|ey6vyK_yF*`mU%m_
zH;eBK+*15G-~-}U1J4n^6L^)_9Ce$bd;27e$BN$$yh3~v;F{uh0Iw8Z3phY*1zpPb
zN4$&pWZ)j+rvZ-<p9yRdKNQ#^UY4Q$<ey{shWK>ge&RO(FBI>Nt^O*$8peIZzW}}|
zJ`XrUd;qYk_+!93#E$~b5`PMKkNB6sx5U>4E+@VUa2e_7?AQ3ef2NZCHA-t+_9r3P
QUz${w*OXTlIwq9=59F(oT>t<8

literal 0
HcmV?d00001

diff --git a/docker-compose.yml b/docker-compose.yml
index 9decd7127..a827bc14d 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -38,6 +38,16 @@ services:
     restart: always
     networks:
       - app-network
+  mlflow:
+    build:
+      context: .
+      dockerfile: mlflow.dockerfile
+    ports:
+      - "5000:5000"
+    volumes:
+      - "${PWD}/mlflow_data:/home/mlflow_data/"
+    networks:
+      - app-network
 networks:
   app-network:
     driver: bridge
diff --git a/mlflow.dockerfile b/mlflow.dockerfile
new file mode 100644
index 000000000..70da0d850
--- /dev/null
+++ b/mlflow.dockerfile
@@ -0,0 +1,12 @@
+FROM python:3.10-slim
+
+RUN pip install mlflow==2.12.1
+
+EXPOSE 5000
+
+CMD [ \
+    "mlflow", "server", \
+    "--backend-store-uri", "sqlite:///home/mlflow_data/mlflow.db", \
+    "--host", "0.0.0.0", \
+    "--port", "5000" \
+]
\ No newline at end of file
diff --git a/mlops/metadata.yaml b/mlops/metadata.yaml
index 4eb3b7742..30a37385e 100644
--- a/mlops/metadata.yaml
+++ b/mlops/metadata.yaml
@@ -15,3 +15,4 @@ features:
   operation_history: true
   polars: true
 help_improve_mage: true
+project_uuid: 86daa7e2889a43988632768a1e669081
diff --git a/mlops/module_3/.gitignore b/mlops/module_3/.gitignore
new file mode 100755
index 000000000..8b3e82f61
--- /dev/null
+++ b/mlops/module_3/.gitignore
@@ -0,0 +1,14 @@
+.DS_Store
+.file_versions
+.gitkeep
+.log
+.logs/
+.mage_temp_profiles
+.preferences.yaml
+.variables/
+__pycache__/
+docker-compose.override.yml
+logs/
+mage-ai.db
+mage_data/
+secrets/
diff --git a/mlops/module_3/__init__.py b/mlops/module_3/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/mlops/module_3/charts/__init__.py b/mlops/module_3/charts/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/mlops/module_3/custom/__init__.py b/mlops/module_3/custom/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/mlops/module_3/data_exporters/__init__.py b/mlops/module_3/data_exporters/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/mlops/module_3/data_exporters/export_titanic_clean.py b/mlops/module_3/data_exporters/export_titanic_clean.py
new file mode 100755
index 000000000..cb7aa63aa
--- /dev/null
+++ b/mlops/module_3/data_exporters/export_titanic_clean.py
@@ -0,0 +1,16 @@
+from mage_ai.io.file import FileIO
+from pandas import DataFrame
+
+if 'data_exporter' not in globals():
+    from mage_ai.data_preparation.decorators import data_exporter
+
+
+@data_exporter
+def export_data_to_file(df: DataFrame, **kwargs) -> None:
+    """
+    Template for exporting data to filesystem.
+
+    Docs: https://docs.mage.ai/design/data-loading#example-loading-data-from-a-file
+    """
+    filepath = 'titanic_clean.csv'
+    FileIO().export(df, filepath)
diff --git a/mlops/module_3/data_exporters/save.py b/mlops/module_3/data_exporters/save.py
new file mode 100644
index 000000000..b13bae340
--- /dev/null
+++ b/mlops/module_3/data_exporters/save.py
@@ -0,0 +1,35 @@
+import mlflow
+import pickle
+
+
+mlflow.set_tracking_uri("http://mlflow:5000")
+mlflow.set_experiment("nec-taxi-experiment")
+
+if 'data_exporter' not in globals():
+    from mage_ai.data_preparation.decorators import data_exporter
+
+
+@data_exporter
+def export_data(data, *args, **kwargs):
+    """
+    Exports data to some source.
+
+    Args:
+        data: The output from the upstream parent block
+        args: The output from any additional upstream blocks (if applicable)
+
+    Output (optional):
+        Optionally return any object and it'll be logged and
+        displayed when inspecting the block run.
+    """
+    # Specify your data exporting logic here
+    dv, lr = data
+
+    with mlflow.start_run():
+        with open('dict_vectorizer.bin', 'wb') as f_out:
+            pickle.dump(dv, f_out)
+        mlflow.log_artifact('dict_vectorizer.bin')
+
+        mlflow.sklearn.log_model(lr, 'model')
+    
+    print('DONE')
diff --git a/mlops/module_3/data_loaders/__init__.py b/mlops/module_3/data_loaders/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/mlops/module_3/data_loaders/load.py b/mlops/module_3/data_loaders/load.py
new file mode 100644
index 000000000..8c6c9e54f
--- /dev/null
+++ b/mlops/module_3/data_loaders/load.py
@@ -0,0 +1,28 @@
+import pandas as pd
+
+
+if 'data_loader' not in globals():
+    from mage_ai.data_preparation.decorators import data_loader
+if 'test' not in globals():
+    from mage_ai.data_preparation.decorators import test
+
+
+@data_loader
+def load_data(*args, **kwargs):
+    """
+    Template code for loading data from any source.
+
+    Returns:
+        Anything (e.g. data frame, dictionary, array, int, str, etc.)
+    """
+    # Specify your data loading logic here
+    df = pd.read_parquet('https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-03.parquet')
+    return df
+
+
+@test
+def test_output(output, *args) -> None:
+    """
+    Template code for testing the output of the block.
+    """
+    assert output is not None, 'The output is undefined'
\ No newline at end of file
diff --git a/mlops/module_3/data_loaders/load_titanic.py b/mlops/module_3/data_loaders/load_titanic.py
new file mode 100755
index 000000000..c664e0f2d
--- /dev/null
+++ b/mlops/module_3/data_loaders/load_titanic.py
@@ -0,0 +1,27 @@
+import io
+import pandas as pd
+import requests
+from pandas import DataFrame
+
+if 'data_loader' not in globals():
+    from mage_ai.data_preparation.decorators import data_loader
+if 'test' not in globals():
+    from mage_ai.data_preparation.decorators import test
+
+
+@data_loader
+def load_data_from_api(**kwargs) -> DataFrame:
+    """
+    Template for loading data from API
+    """
+    url = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv?raw=True'
+
+    return pd.read_csv(url)
+
+
+@test
+def test_output(df) -> None:
+    """
+    Template code for testing the output of the block.
+    """
+    assert df is not None, 'The output is undefined'
diff --git a/mlops/module_3/data_s/data_loaders/load.py b/mlops/module_3/data_s/data_loaders/load.py
new file mode 100644
index 000000000..ca314435e
--- /dev/null
+++ b/mlops/module_3/data_s/data_loaders/load.py
@@ -0,0 +1,29 @@
+import pandas as pd
+
+
+if 'data_loader' not in globals():
+    from mage_ai.data_preparation.decorators import data_loader
+if 'test' not in globals():
+    from mage_ai.data_preparation.decorators import test
+
+
+@data_loader
+def load_data(*args, **kwargs):
+    """
+    Template code for loading data from any source.
+
+    Returns:
+        Anything (e.g. data frame, dictionary, array, int, str, etc.)
+    """
+    # Specify your data loading logic here
+    df = pd.read_parquet('https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-03.parquet')
+
+    return df
+
+
+@test
+def test_output(output, *args) -> None:
+    """
+    Template code for testing the output of the block.
+    """
+    assert output is not None, 'The output is undefined'
\ No newline at end of file
diff --git a/mlops/module_3/dbt/profiles.yml b/mlops/module_3/dbt/profiles.yml
new file mode 100755
index 000000000..90599f894
--- /dev/null
+++ b/mlops/module_3/dbt/profiles.yml
@@ -0,0 +1,9 @@
+# https://docs.getdbt.com/docs/core/connect-data-platform/profiles.yml
+
+base:
+  outputs:
+
+    dev:
+      type: duckdb
+
+  target: dev
diff --git a/mlops/module_3/extensions/__init__.py b/mlops/module_3/extensions/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/mlops/module_3/interactions/__init__.py b/mlops/module_3/interactions/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/mlops/module_3/io_config.yaml b/mlops/module_3/io_config.yaml
new file mode 100755
index 000000000..80b4d9cef
--- /dev/null
+++ b/mlops/module_3/io_config.yaml
@@ -0,0 +1,134 @@
+version: 0.1.1
+default:
+  # Default profile created for data IO access.
+  # Add your credentials for the source you use, and delete the rest.
+  # AWS
+  AWS_ACCESS_KEY_ID: "{{ env_var('AWS_ACCESS_KEY_ID') }}"
+  AWS_SECRET_ACCESS_KEY: "{{ env_var('AWS_SECRET_ACCESS_KEY') }}"
+  AWS_SESSION_TOKEN: session_token (Used to generate Redshift credentials)
+  AWS_REGION: region
+  # Algolia
+  ALGOLIA_APP_ID: app_id
+  ALGOLIA_API_KEY: api_key
+  ALGOLIA_INDEX_NAME: index_name
+  # Azure
+  AZURE_CLIENT_ID: "{{ env_var('AZURE_CLIENT_ID') }}"
+  AZURE_CLIENT_SECRET: "{{ env_var('AZURE_CLIENT_SECRET') }}"
+  AZURE_STORAGE_ACCOUNT_NAME: "{{ env_var('AZURE_STORAGE_ACCOUNT_NAME') }}"
+  AZURE_TENANT_ID: "{{ env_var('AZURE_TENANT_ID') }}"
+  # Chroma
+  CHROMA_COLLECTION: collection_name
+  CHROMA_PATH: path
+  # Clickhouse
+  CLICKHOUSE_DATABASE: default
+  CLICKHOUSE_HOST: host.docker.internal
+  CLICKHOUSE_INTERFACE: http
+  CLICKHOUSE_PASSWORD: null
+  CLICKHOUSE_PORT: 8123
+  CLICKHOUSE_USERNAME: null
+  # Druid
+  DRUID_HOST: hostname
+  DRUID_PASSWORD: password
+  DRUID_PATH: /druid/v2/sql/
+  DRUID_PORT: 8082
+  DRUID_SCHEME: http
+  DRUID_USER: user
+  # DuckDB
+  DUCKDB_DATABASE: database
+  DUCKDB_SCHEMA: main
+  # Google
+  GOOGLE_SERVICE_ACC_KEY:
+    type: service_account
+    project_id: project-id
+    private_key_id: key-id
+    private_key: "-----BEGIN PRIVATE KEY-----\nyour_private_key\n-----END_PRIVATE_KEY"
+    client_email: your_service_account_email
+    auth_uri: "https://accounts.google.com/o/oauth2/auth"
+    token_uri: "https://accounts.google.com/o/oauth2/token"
+    auth_provider_x509_cert_url: "https://www.googleapis.com/oauth2/v1/certs"
+    client_x509_cert_url: "https://www.googleapis.com/robot/v1/metadata/x509/your_service_account_email"
+  GOOGLE_SERVICE_ACC_KEY_FILEPATH: "/path/to/your/service/account/key.json"
+  GOOGLE_LOCATION: US # Optional
+  # MongoDB
+  # Specify either the connection string or the (host, password, user, port) to connect to MongoDB.
+  MONGODB_CONNECTION_STRING: "mongodb://{username}:{password}@{host}:{port}/"
+  MONGODB_HOST: host
+  MONGODB_PORT: 27017
+  MONGODB_USER: user
+  MONGODB_PASSWORD: password
+  MONGODB_DATABASE: database
+  MONGODB_COLLECTION: collection
+  # MSSQL
+  MSSQL_DATABASE: database
+  MSSQL_SCHEMA: schema
+  MSSQL_DRIVER: "ODBC Driver 18 for SQL Server"
+  MSSQL_HOST: host
+  MSSQL_PASSWORD: password
+  MSSQL_PORT: 1433
+  MSSQL_USER: SA
+  # MySQL
+  MYSQL_DATABASE: database
+  MYSQL_HOST: host
+  MYSQL_PASSWORD: password
+  MYSQL_PORT: 3306
+  MYSQL_USER: root
+  # Pinot
+  PINOT_HOST: hostname
+  PINOT_PASSWORD: password
+  PINOT_PATH: /query/sql
+  PINOT_PORT: 8000
+  PINOT_SCHEME: http
+  PINOT_USER: user
+  # PostgresSQL
+  POSTGRES_CONNECT_TIMEOUT: 10
+  POSTGRES_DBNAME: postgres
+  POSTGRES_SCHEMA: public # Optional
+  POSTGRES_USER: username
+  POSTGRES_PASSWORD: password
+  POSTGRES_HOST: hostname
+  POSTGRES_PORT: 5432
+  # Qdrant
+  QDRANT_COLLECTION: collection
+  QDRANT_PATH: path
+  # Redshift
+  REDSHIFT_SCHEMA: public # Optional
+  REDSHIFT_DBNAME: redshift_db_name
+  REDSHIFT_HOST: redshift_cluster_id.identifier.region.redshift.amazonaws.com
+  REDSHIFT_PORT: 5439
+  REDSHIFT_TEMP_CRED_USER: temp_username
+  REDSHIFT_TEMP_CRED_PASSWORD: temp_password
+  REDSHIFT_DBUSER: redshift_db_user
+  REDSHIFT_CLUSTER_ID: redshift_cluster_id
+  REDSHIFT_IAM_PROFILE: default
+  # Snowflake
+  SNOWFLAKE_USER: username
+  SNOWFLAKE_PASSWORD: password
+  SNOWFLAKE_ACCOUNT: account_id.region
+  SNOWFLAKE_DEFAULT_WH: null                  # Optional default warehouse
+  SNOWFLAKE_DEFAULT_DB: null                  # Optional default database
+  SNOWFLAKE_DEFAULT_SCHEMA: null              # Optional default schema
+  SNOWFLAKE_PRIVATE_KEY_PASSPHRASE: null      # Optional private key passphrase
+  SNOWFLAKE_PRIVATE_KEY_PATH: null            # Optional private key path
+  SNOWFLAKE_ROLE: null                        # Optional role name
+  SNOWFLAKE_TIMEOUT: null                     # Optional timeout in seconds
+  # Trino
+  trino:
+    catalog: postgresql                       # Change this to the catalog of your choice
+    host: 127.0.0.1
+    http_headers:
+      X-Something: 'mage=power'
+    http_scheme: http
+    password: mage1337                        # Optional
+    port: 8080
+    schema: core_data
+    session_properties:                       # Optional
+      acc01.optimize_locality_enabled: false
+      optimize_hash_generation: true
+    source: trino-cli                         # Optional
+    user: admin
+    verify: /path/to/your/ca.crt              # Optional
+  # Weaviate
+  WEAVIATE_ENDPOINT: https://some-endpoint.weaviate.network
+  WEAVIATE_INSTANCE_API_KEY: YOUR-WEAVIATE-API-KEY
+  WEAVIATE_INFERENCE_API_KEY: YOUR-OPENAI-API-KEY
+  WEAVIATE_COLLECTION: collectionn_name
diff --git a/mlops/module_3/metadata.yaml b/mlops/module_3/metadata.yaml
new file mode 100755
index 000000000..f208a035a
--- /dev/null
+++ b/mlops/module_3/metadata.yaml
@@ -0,0 +1,55 @@
+project_type: standalone
+
+variables_dir: ~/.mage_data
+# remote_variables_dir: s3://bucket/path_prefix
+
+variables_retention_period: '90d'
+
+emr_config:
+  # You can customize the EMR cluster instance size with the two parameters
+  master_instance_type: 'r5.4xlarge'
+  slave_instance_type: 'r5.4xlarge'
+
+  # Configure security groups for EMR cluster instances.
+  # The default managed security groups are ElasticMapReduce-master and ElasticMapReduce-slave
+  # master_security_group: 'sg-xxxxxxxxxxxx'
+  # slave_security_group: 'sg-yyyyyyyyyyyy'
+
+  # If you want to ssh tunnel into EMR cluster, ec2_key_name must be configured.
+  # You can create a key pair in page https://console.aws.amazon.com/ec2#KeyPairs and download the key file.
+  # ec2_key_name: '[ec2_key_pair_name]'
+
+spark_config:
+  # Application name
+  app_name: 'my spark app'
+  # Master URL to connect to
+  # e.g., spark_master: 'spark://host:port', or spark_master: 'yarn'
+  spark_master: 'local'
+  # Executor environment variables
+  # e.g., executor_env: {'PYTHONPATH': '/home/path'}
+  executor_env: {}
+  # Jar files to be uploaded to the cluster and added to the classpath
+  # e.g., spark_jars: ['/home/path/example1.jar']
+  spark_jars: []
+  # Path where Spark is installed on worker nodes
+  # e.g. spark_home: '/usr/lib/spark'
+  spark_home:
+  # List of key-value pairs to be set in SparkConf
+  # e.g., others: {'spark.executor.memory': '4g', 'spark.executor.cores': '2'}
+  others: {}
+  # Whether to create custom SparkSession via code and set in kwargs['context']
+  use_custom_session: false
+  # The variable name to set in kwargs['context'],
+  # e.g. kwargs['context']['spark'] = spark_session
+  custom_session_var_name: 'spark'
+
+help_improve_mage: true
+notification_config:
+  alert_on:
+  - trigger_failure
+  - trigger_passed_sla
+  slack_config:
+    webhook_url: "{{ env_var('MAGE_SLACK_WEBHOOK_URL') }}"
+  teams_config:
+    webhook_url: "{{ env_var('MAGE_TEAMS_WEBHOOK_URL') }}"
+project_uuid: module_3
diff --git a/mlops/module_3/pipelines/__init__.py b/mlops/module_3/pipelines/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/mlops/module_3/pipelines/example_pipeline/__init__.py b/mlops/module_3/pipelines/example_pipeline/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/mlops/module_3/pipelines/example_pipeline/metadata.yaml b/mlops/module_3/pipelines/example_pipeline/metadata.yaml
new file mode 100755
index 000000000..c04cfbcf8
--- /dev/null
+++ b/mlops/module_3/pipelines/example_pipeline/metadata.yaml
@@ -0,0 +1,30 @@
+blocks:
+- all_upstream_blocks_executed: true
+  downstream_blocks:
+  - fill_in_missing_values
+  name: load_titanic
+  status: not_executed
+  type: data_loader
+  upstream_blocks: []
+  uuid: load_titanic
+- all_upstream_blocks_executed: true
+  downstream_blocks:
+  - export_titanic_clean
+  name: fill_in_missing_values
+  status: not_executed
+  type: transformer
+  upstream_blocks:
+  - load_titanic
+  uuid: fill_in_missing_values
+- all_upstream_blocks_executed: true
+  downstream_blocks: []
+  name: export_titanic_clean
+  status: not_executed
+  type: data_exporter
+  upstream_blocks:
+  - fill_in_missing_values
+  uuid: export_titanic_clean
+name: example_pipeline
+type: python
+uuid: example_pipeline
+widgets: []
diff --git a/mlops/module_3/pipelines/module_3/__init__.py b/mlops/module_3/pipelines/module_3/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/mlops/module_3/pipelines/module_3/interactions.yaml b/mlops/module_3/pipelines/module_3/interactions.yaml
new file mode 100644
index 000000000..a1d40f831
--- /dev/null
+++ b/mlops/module_3/pipelines/module_3/interactions.yaml
@@ -0,0 +1,2 @@
+blocks: {}
+layout: []
diff --git a/mlops/module_3/pipelines/module_3/metadata.yaml b/mlops/module_3/pipelines/module_3/metadata.yaml
new file mode 100755
index 000000000..dd955ba30
--- /dev/null
+++ b/mlops/module_3/pipelines/module_3/metadata.yaml
@@ -0,0 +1,99 @@
+blocks:
+- all_upstream_blocks_executed: true
+  color: null
+  configuration:
+    file_source:
+      path: module_3/data_loaders/load.py
+  downstream_blocks:
+  - transform
+  executor_config: null
+  executor_type: local_python
+  has_callback: false
+  language: python
+  name: load
+  retry_config: null
+  status: executed
+  timeout: null
+  type: data_loader
+  upstream_blocks: []
+  uuid: load
+- all_upstream_blocks_executed: true
+  color: null
+  configuration:
+    file_source:
+      path: module_3/transformers/transform.py
+  downstream_blocks:
+  - train
+  executor_config: null
+  executor_type: local_python
+  has_callback: false
+  language: python
+  name: transform
+  retry_config: null
+  status: executed
+  timeout: null
+  type: transformer
+  upstream_blocks:
+  - load
+  uuid: transform
+- all_upstream_blocks_executed: true
+  color: null
+  configuration:
+    file_source:
+      path: module_3/transformers/train.py
+  downstream_blocks:
+  - save
+  executor_config: null
+  executor_type: local_python
+  has_callback: false
+  language: python
+  name: train
+  retry_config: null
+  status: executed
+  timeout: null
+  type: transformer
+  upstream_blocks:
+  - transform
+  uuid: train
+- all_upstream_blocks_executed: true
+  color: null
+  configuration:
+    file_source:
+      path: module_3/data_exporters/save.py
+  downstream_blocks: []
+  executor_config: null
+  executor_type: local_python
+  has_callback: false
+  language: python
+  name: save
+  retry_config: null
+  status: executed
+  timeout: null
+  type: data_exporter
+  upstream_blocks:
+  - train
+  uuid: save
+cache_block_output_in_memory: false
+callbacks: []
+concurrency_config: {}
+conditionals: []
+created_at: '2024-10-01 13:17:54.330994+00:00'
+data_integration: null
+description: null
+executor_config: {}
+executor_count: 1
+executor_type: null
+extensions: {}
+name: module_3
+notification_config: {}
+remote_variables_dir: null
+retry_config: {}
+run_pipeline_in_one_process: false
+settings:
+  triggers: null
+spark_config: {}
+tags: []
+type: python
+uuid: module_3
+variables_dir: /home/src/mage_data/module_3
+widgets: []
diff --git a/mlops/module_3/requirements.txt b/mlops/module_3/requirements.txt
new file mode 100755
index 000000000..e69de29bb
diff --git a/mlops/module_3/s/transformers/transform.py b/mlops/module_3/s/transformers/transform.py
new file mode 100644
index 000000000..42f147036
--- /dev/null
+++ b/mlops/module_3/s/transformers/transform.py
@@ -0,0 +1,45 @@
+import pandas as pd
+
+
+if 'transformer' not in globals():
+    from mage_ai.data_preparation.decorators import transformer
+if 'test' not in globals():
+    from mage_ai.data_preparation.decorators import test
+
+
+@transformer
+def transform(df, *args, **kwargs):
+    """
+    Template code for a transformer block.
+
+    Add more parameters to this function if this block has multiple parent blocks.
+    There should be one parameter for each output variable from each parent block.
+
+    Args:
+        data: The output from the upstream parent block
+        args: The output from any additional upstream blocks (if applicable)
+
+    Returns:
+        Anything (e.g. data frame, dictionary, array, int, str, etc.)
+    """
+    # Specify your transformation logic here
+    df.tpep_dropoff_datetime = pd.to_datetime(df.tpep_dropoff_datetime)
+    df.tpep_pickup_datetime = pd.to_datetime(df.tpep_pickup_datetime)
+    
+    df['duration'] = df.tpep_dropoff_datetime - df.tpep_pickup_datetime
+    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)
+
+    df = df[(df.duration >= 1) & (df.duration <= 60)]
+
+    categorical = ['PULocationID', 'DOLocationID']
+    df[categorical] = df[categorical].astype(str)
+
+    return df
+
+
+@test
+def test_output(output, *args) -> None:
+    """
+    Template code for testing the output of the block.
+    """
+    assert output is not None, 'The output is undefined'
\ No newline at end of file
diff --git a/mlops/module_3/scratchpads/__init__.py b/mlops/module_3/scratchpads/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/mlops/module_3/transformers/__init__.py b/mlops/module_3/transformers/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/mlops/module_3/transformers/fill_in_missing_values.py b/mlops/module_3/transformers/fill_in_missing_values.py
new file mode 100755
index 000000000..b9761c3e2
--- /dev/null
+++ b/mlops/module_3/transformers/fill_in_missing_values.py
@@ -0,0 +1,45 @@
+from pandas import DataFrame
+import math
+
+if 'transformer' not in globals():
+    from mage_ai.data_preparation.decorators import transformer
+if 'test' not in globals():
+    from mage_ai.data_preparation.decorators import test
+
+def select_number_columns(df: DataFrame) -> DataFrame:
+    return df[['Age', 'Fare', 'Parch', 'Pclass', 'SibSp', 'Survived']]
+
+
+def fill_missing_values_with_median(df: DataFrame) -> DataFrame:
+    for col in df.columns:
+        values = sorted(df[col].dropna().tolist())
+        median_value = values[math.floor(len(values) / 2)]
+        df[[col]] = df[[col]].fillna(median_value)
+    return df
+
+
+@transformer
+def transform_df(df: DataFrame, *args, **kwargs) -> DataFrame:
+    """
+    Template code for a transformer block.
+
+    Add more parameters to this function if this block has multiple parent blocks.
+    There should be one parameter for each output variable from each parent block.
+
+    Args:
+        df (DataFrame): Data frame from parent block.
+
+    Returns:
+        DataFrame: Transformed data frame
+    """
+    # Specify your transformation logic here
+
+    return fill_missing_values_with_median(select_number_columns(df))
+
+
+@test
+def test_output(df) -> None:
+    """
+    Template code for testing the output of the block.
+    """
+    assert df is not None, 'The output is undefined'
diff --git a/mlops/module_3/transformers/train.py b/mlops/module_3/transformers/train.py
new file mode 100644
index 000000000..0b720d35a
--- /dev/null
+++ b/mlops/module_3/transformers/train.py
@@ -0,0 +1,49 @@
+from sklearn.feature_extraction import DictVectorizer
+from sklearn.linear_model import LinearRegression
+
+
+if 'transformer' not in globals():
+    from mage_ai.data_preparation.decorators import transformer
+if 'test' not in globals():
+    from mage_ai.data_preparation.decorators import test
+
+
+@transformer
+def transform(df, *args, **kwargs):
+    """
+    Template code for a transformer block.
+
+    Add more parameters to this function if this block has multiple parent blocks.
+    There should be one parameter for each output variable from each parent block.
+
+    Args:
+        df: The output from the upstream parent block
+        args: The output from any additional upstream blocks (if applicable)
+
+    Returns:
+        Anything (e.g. data frame, dictionary, array, int, str, etc.)
+    """
+    # Specify your transformation logic here
+    categorical = ['PULocationID', 'DOLocationID']
+    train_dicts = df[categorical].to_dict(orient='records')
+
+    dv = DictVectorizer()
+    X_train = dv.fit_transform(train_dicts)
+
+    target = 'duration'
+    y_train = df[target].values
+
+    lr = LinearRegression()
+    lr.fit(X_train, y_train)
+
+    print(lr.intercept_)
+
+    return dv, lr
+
+
+@test
+def test_output(output, *args) -> None:
+    """
+    Template code for testing the output of the block.
+    """
+    assert output is not None, 'The output is undefined'
\ No newline at end of file
diff --git a/mlops/module_3/transformers/transform.py b/mlops/module_3/transformers/transform.py
new file mode 100644
index 000000000..70bc8957f
--- /dev/null
+++ b/mlops/module_3/transformers/transform.py
@@ -0,0 +1,45 @@
+import pandas as pd
+
+
+if 'transformer' not in globals():
+    from mage_ai.data_preparation.decorators import transformer
+if 'test' not in globals():
+    from mage_ai.data_preparation.decorators import test
+
+
+@transformer
+def transform(df, *args, **kwargs):
+    """
+    Template code for a transformer block.
+
+    Add more parameters to this function if this block has multiple parent blocks.
+    There should be one parameter for each output variable from each parent block.
+
+    Args:
+        df: The output from the upstream parent block
+        args: The output from any additional upstream blocks (if applicable)
+
+    Returns:
+        Anything (e.g. data frame, dictionary, array, int, str, etc.)
+    """
+    # Specify your transformation logic here
+    df.tpep_dropoff_datetime = pd.to_datetime(df.tpep_dropoff_datetime)
+    df.tpep_pickup_datetime = pd.to_datetime(df.tpep_pickup_datetime)
+
+    df['duration'] = df.tpep_dropoff_datetime - df.tpep_pickup_datetime
+    df.duration = df.duration.dt.total_seconds() / 60
+
+    df = df[(df.duration >= 1) & (df.duration <= 60)]
+
+    categorical = ['PULocationID', 'DOLocationID']
+    df[categorical] = df[categorical].astype(str)
+    
+    return df
+
+
+@test
+def test_output(output, *args) -> None:
+    """
+    Template code for testing the output of the block.
+    """
+    assert output is not None, 'The output is undefined'
\ No newline at end of file
diff --git a/mlops/module_3/utils/__init__.py b/mlops/module_3/utils/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/mlops/settings.yaml b/mlops/settings.yaml
index 48b62b147..235012d96 100644
--- a/mlops/settings.yaml
+++ b/mlops/settings.yaml
@@ -1,4 +1,6 @@
 projects:
+  module_3:
+    path: module_3
   unit_0_setup: {}
   unit_1_data_preparation: {}
   unit_2_training: {}
diff --git a/mlops/unit_3_observability/pipelines/module_3/__init__.py b/mlops/unit_3_observability/pipelines/module_3/__init__.py
new file mode 100755
index 000000000..e69de29bb
diff --git a/mlops/unit_3_observability/pipelines/module_3/metadata.yaml b/mlops/unit_3_observability/pipelines/module_3/metadata.yaml
new file mode 100755
index 000000000..52a722f00
--- /dev/null
+++ b/mlops/unit_3_observability/pipelines/module_3/metadata.yaml
@@ -0,0 +1,6 @@
+created_at: '2024-10-01 13:12:07.987453+00:00'
+description: module_3
+name: module_3
+tags: []
+type: python
+uuid: module_3