diff --git a/.gitconfig b/.gitconfig new file mode 100644 index 000000000..e69de29bb diff --git a/docker-compose.yml b/docker-compose.yml index 9decd7127..ed4caff8a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -38,6 +38,16 @@ services: restart: always networks: - app-network + mlflow: + build: + context: . + dockerfile: mlflow.dockerfile + ports: + - "5000:5000" + volumes: + - "${PWD}/mlflow:/home/mlflow/" + networks: + - app-network networks: app-network: driver: bridge diff --git a/finalized_model.lib b/finalized_model.lib new file mode 100644 index 000000000..1173672fc Binary files /dev/null and b/finalized_model.lib differ diff --git a/mlflow.dockerfile b/mlflow.dockerfile new file mode 100644 index 000000000..627691e95 --- /dev/null +++ b/mlflow.dockerfile @@ -0,0 +1,12 @@ +FROM python:3.10-slim + +RUN pip install mlflow==2.12.1 + +EXPOSE 5000 + +CMD [ \ + "mlflow", "server", \ + "--backend-store-uri", "sqlite:///home/mlflow/mlflow.db", \ + "--host", "0.0.0.0", \ + "--port", "5000" \ +] \ No newline at end of file diff --git a/mlflow_artifacts/dv_artifact.pkl b/mlflow_artifacts/dv_artifact.pkl new file mode 100644 index 000000000..2dacf43da Binary files /dev/null and b/mlflow_artifacts/dv_artifact.pkl differ diff --git a/mlops/homework_03/.gitignore b/mlops/homework_03/.gitignore new file mode 100755 index 000000000..8b3e82f61 --- /dev/null +++ b/mlops/homework_03/.gitignore @@ -0,0 +1,14 @@ +.DS_Store +.file_versions +.gitkeep +.log +.logs/ +.mage_temp_profiles +.preferences.yaml +.variables/ +__pycache__/ +docker-compose.override.yml +logs/ +mage-ai.db +mage_data/ +secrets/ diff --git a/mlops/homework_03/__init__.py b/mlops/homework_03/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/mlops/homework_03/charts/__init__.py b/mlops/homework_03/charts/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/mlops/homework_03/custom/__init__.py b/mlops/homework_03/custom/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/mlops/homework_03/data_exporters/__init__.py b/mlops/homework_03/data_exporters/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/mlops/homework_03/data_exporters/build.py b/mlops/homework_03/data_exporters/build.py new file mode 100644 index 000000000..c5537bf7d --- /dev/null +++ b/mlops/homework_03/data_exporters/build.py @@ -0,0 +1,67 @@ +import os +import mlflow +import mlflow.sklearn +from sklearn.feature_extraction import DictVectorizer +from sklearn.linear_model import LinearRegression +import joblib + +# Set experiment id +mlflow.set_experiment('mage_lr_experiment') + +# Set tracking uri +# mlflow.set_tracking_uri('http://localhost:5000') + +if 'data_exporter' not in globals(): + from mage_ai.data_preparation.decorators import data_exporter + + +@data_exporter +def export_data(data, *args, **kwargs): + """ + Exports data to some source. + + Args: + data: The output from the upstream parent block + args: The output from any additional upstream blocks (if applicable) + + Output (optional): + Optionally return any object and it'll be logged and + displayed when inspecting the block run. + """ + # Specify your transformation logic here + df_train = data + + # turn dictionary into vector + dv = DictVectorizer() + train_dicts = df_train[['PULocationID', 'DOLocationID']].to_dict(orient='records') + + # Feature matrix + X_train = dv.fit_transform(train_dicts) + + # Target matrix + target = 'duration' + y_train = df_train[target].values + + # Build model + model = LinearRegression() + model.fit(X_train, y_train) + + # Specify artifact_path + artifact_directory = 'mlflow_artifacts' + + # Create directory if it doesnt exist + os.makedirs(artifact_directory, exist_ok=True) + + # Save and log the artifact (DictVectorizer) + artifact_path = os.path.join(artifact_directory, "dv_artifact.pkl") + + with open(artifact_path, 'wb') as f: + joblib.dump(dv, f) + + # Log the linear regression model with MLflow + with mlflow.start_run(): + mlflow.sklearn.log_model(model, "linear_regression_model") + mlflow.log_param("intercept", model.intercept_) + mlflow.log_artifact(artifact_path) + + return model, dv \ No newline at end of file diff --git a/mlops/homework_03/data_exporters/export_titanic_clean.py b/mlops/homework_03/data_exporters/export_titanic_clean.py new file mode 100755 index 000000000..cb7aa63aa --- /dev/null +++ b/mlops/homework_03/data_exporters/export_titanic_clean.py @@ -0,0 +1,16 @@ +from mage_ai.io.file import FileIO +from pandas import DataFrame + +if 'data_exporter' not in globals(): + from mage_ai.data_preparation.decorators import data_exporter + + +@data_exporter +def export_data_to_file(df: DataFrame, **kwargs) -> None: + """ + Template for exporting data to filesystem. + + Docs: https://docs.mage.ai/design/data-loading#example-loading-data-from-a-file + """ + filepath = 'titanic_clean.csv' + FileIO().export(df, filepath) diff --git a/mlops/homework_03/data_loaders/__init__.py b/mlops/homework_03/data_loaders/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/mlops/homework_03/data_loaders/ingest.py b/mlops/homework_03/data_loaders/ingest.py new file mode 100644 index 000000000..5787b568a --- /dev/null +++ b/mlops/homework_03/data_loaders/ingest.py @@ -0,0 +1,28 @@ +import requests +from io import BytesIO +from typing import List + +import pandas as pd + + +if 'data_loader' not in globals(): + from mage_ai.data_preparation.decorators import data_loader + + +@data_loader +def load_data(*args, **kwargs): + """ + Template code for loading data from any source. + + Returns: + Anything (e.g. data frame, dictionary, array, int, str, etc.) + """ + # Specify your data loading logic here + response = requests.get("https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2023-03.parquet") + + if response.status_code != 200: + raise Exception(response.text) + + df = pd.read_parquet(BytesIO(response.content)) + + return df \ No newline at end of file diff --git a/mlops/homework_03/data_loaders/load_titanic.py b/mlops/homework_03/data_loaders/load_titanic.py new file mode 100755 index 000000000..c664e0f2d --- /dev/null +++ b/mlops/homework_03/data_loaders/load_titanic.py @@ -0,0 +1,27 @@ +import io +import pandas as pd +import requests +from pandas import DataFrame + +if 'data_loader' not in globals(): + from mage_ai.data_preparation.decorators import data_loader +if 'test' not in globals(): + from mage_ai.data_preparation.decorators import test + + +@data_loader +def load_data_from_api(**kwargs) -> DataFrame: + """ + Template for loading data from API + """ + url = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv?raw=True' + + return pd.read_csv(url) + + +@test +def test_output(df) -> None: + """ + Template code for testing the output of the block. + """ + assert df is not None, 'The output is undefined' diff --git a/mlops/homework_03/dbt/profiles.yml b/mlops/homework_03/dbt/profiles.yml new file mode 100755 index 000000000..90599f894 --- /dev/null +++ b/mlops/homework_03/dbt/profiles.yml @@ -0,0 +1,9 @@ +# https://docs.getdbt.com/docs/core/connect-data-platform/profiles.yml + +base: + outputs: + + dev: + type: duckdb + + target: dev diff --git a/mlops/homework_03/extensions/__init__.py b/mlops/homework_03/extensions/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/mlops/homework_03/interactions/__init__.py b/mlops/homework_03/interactions/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/mlops/homework_03/io_config.yaml b/mlops/homework_03/io_config.yaml new file mode 100755 index 000000000..80b4d9cef --- /dev/null +++ b/mlops/homework_03/io_config.yaml @@ -0,0 +1,134 @@ +version: 0.1.1 +default: + # Default profile created for data IO access. + # Add your credentials for the source you use, and delete the rest. + # AWS + AWS_ACCESS_KEY_ID: "{{ env_var('AWS_ACCESS_KEY_ID') }}" + AWS_SECRET_ACCESS_KEY: "{{ env_var('AWS_SECRET_ACCESS_KEY') }}" + AWS_SESSION_TOKEN: session_token (Used to generate Redshift credentials) + AWS_REGION: region + # Algolia + ALGOLIA_APP_ID: app_id + ALGOLIA_API_KEY: api_key + ALGOLIA_INDEX_NAME: index_name + # Azure + AZURE_CLIENT_ID: "{{ env_var('AZURE_CLIENT_ID') }}" + AZURE_CLIENT_SECRET: "{{ env_var('AZURE_CLIENT_SECRET') }}" + AZURE_STORAGE_ACCOUNT_NAME: "{{ env_var('AZURE_STORAGE_ACCOUNT_NAME') }}" + AZURE_TENANT_ID: "{{ env_var('AZURE_TENANT_ID') }}" + # Chroma + CHROMA_COLLECTION: collection_name + CHROMA_PATH: path + # Clickhouse + CLICKHOUSE_DATABASE: default + CLICKHOUSE_HOST: host.docker.internal + CLICKHOUSE_INTERFACE: http + CLICKHOUSE_PASSWORD: null + CLICKHOUSE_PORT: 8123 + CLICKHOUSE_USERNAME: null + # Druid + DRUID_HOST: hostname + DRUID_PASSWORD: password + DRUID_PATH: /druid/v2/sql/ + DRUID_PORT: 8082 + DRUID_SCHEME: http + DRUID_USER: user + # DuckDB + DUCKDB_DATABASE: database + DUCKDB_SCHEMA: main + # Google + GOOGLE_SERVICE_ACC_KEY: + type: service_account + project_id: project-id + private_key_id: key-id + private_key: "-----BEGIN PRIVATE KEY-----\nyour_private_key\n-----END_PRIVATE_KEY" + client_email: your_service_account_email + auth_uri: "https://accounts.google.com/o/oauth2/auth" + token_uri: "https://accounts.google.com/o/oauth2/token" + auth_provider_x509_cert_url: "https://www.googleapis.com/oauth2/v1/certs" + client_x509_cert_url: "https://www.googleapis.com/robot/v1/metadata/x509/your_service_account_email" + GOOGLE_SERVICE_ACC_KEY_FILEPATH: "/path/to/your/service/account/key.json" + GOOGLE_LOCATION: US # Optional + # MongoDB + # Specify either the connection string or the (host, password, user, port) to connect to MongoDB. + MONGODB_CONNECTION_STRING: "mongodb://{username}:{password}@{host}:{port}/" + MONGODB_HOST: host + MONGODB_PORT: 27017 + MONGODB_USER: user + MONGODB_PASSWORD: password + MONGODB_DATABASE: database + MONGODB_COLLECTION: collection + # MSSQL + MSSQL_DATABASE: database + MSSQL_SCHEMA: schema + MSSQL_DRIVER: "ODBC Driver 18 for SQL Server" + MSSQL_HOST: host + MSSQL_PASSWORD: password + MSSQL_PORT: 1433 + MSSQL_USER: SA + # MySQL + MYSQL_DATABASE: database + MYSQL_HOST: host + MYSQL_PASSWORD: password + MYSQL_PORT: 3306 + MYSQL_USER: root + # Pinot + PINOT_HOST: hostname + PINOT_PASSWORD: password + PINOT_PATH: /query/sql + PINOT_PORT: 8000 + PINOT_SCHEME: http + PINOT_USER: user + # PostgresSQL + POSTGRES_CONNECT_TIMEOUT: 10 + POSTGRES_DBNAME: postgres + POSTGRES_SCHEMA: public # Optional + POSTGRES_USER: username + POSTGRES_PASSWORD: password + POSTGRES_HOST: hostname + POSTGRES_PORT: 5432 + # Qdrant + QDRANT_COLLECTION: collection + QDRANT_PATH: path + # Redshift + REDSHIFT_SCHEMA: public # Optional + REDSHIFT_DBNAME: redshift_db_name + REDSHIFT_HOST: redshift_cluster_id.identifier.region.redshift.amazonaws.com + REDSHIFT_PORT: 5439 + REDSHIFT_TEMP_CRED_USER: temp_username + REDSHIFT_TEMP_CRED_PASSWORD: temp_password + REDSHIFT_DBUSER: redshift_db_user + REDSHIFT_CLUSTER_ID: redshift_cluster_id + REDSHIFT_IAM_PROFILE: default + # Snowflake + SNOWFLAKE_USER: username + SNOWFLAKE_PASSWORD: password + SNOWFLAKE_ACCOUNT: account_id.region + SNOWFLAKE_DEFAULT_WH: null # Optional default warehouse + SNOWFLAKE_DEFAULT_DB: null # Optional default database + SNOWFLAKE_DEFAULT_SCHEMA: null # Optional default schema + SNOWFLAKE_PRIVATE_KEY_PASSPHRASE: null # Optional private key passphrase + SNOWFLAKE_PRIVATE_KEY_PATH: null # Optional private key path + SNOWFLAKE_ROLE: null # Optional role name + SNOWFLAKE_TIMEOUT: null # Optional timeout in seconds + # Trino + trino: + catalog: postgresql # Change this to the catalog of your choice + host: 127.0.0.1 + http_headers: + X-Something: 'mage=power' + http_scheme: http + password: mage1337 # Optional + port: 8080 + schema: core_data + session_properties: # Optional + acc01.optimize_locality_enabled: false + optimize_hash_generation: true + source: trino-cli # Optional + user: admin + verify: /path/to/your/ca.crt # Optional + # Weaviate + WEAVIATE_ENDPOINT: https://some-endpoint.weaviate.network + WEAVIATE_INSTANCE_API_KEY: YOUR-WEAVIATE-API-KEY + WEAVIATE_INFERENCE_API_KEY: YOUR-OPENAI-API-KEY + WEAVIATE_COLLECTION: collectionn_name diff --git a/mlops/homework_03/metadata.yaml b/mlops/homework_03/metadata.yaml new file mode 100755 index 000000000..bf30d6f5d --- /dev/null +++ b/mlops/homework_03/metadata.yaml @@ -0,0 +1,55 @@ +project_type: standalone + +variables_dir: ~/.mage_data +# remote_variables_dir: s3://bucket/path_prefix + +variables_retention_period: '90d' + +emr_config: + # You can customize the EMR cluster instance size with the two parameters + master_instance_type: 'r5.4xlarge' + slave_instance_type: 'r5.4xlarge' + + # Configure security groups for EMR cluster instances. + # The default managed security groups are ElasticMapReduce-master and ElasticMapReduce-slave + # master_security_group: 'sg-xxxxxxxxxxxx' + # slave_security_group: 'sg-yyyyyyyyyyyy' + + # If you want to ssh tunnel into EMR cluster, ec2_key_name must be configured. + # You can create a key pair in page https://console.aws.amazon.com/ec2#KeyPairs and download the key file. + # ec2_key_name: '[ec2_key_pair_name]' + +spark_config: + # Application name + app_name: 'my spark app' + # Master URL to connect to + # e.g., spark_master: 'spark://host:port', or spark_master: 'yarn' + spark_master: 'local' + # Executor environment variables + # e.g., executor_env: {'PYTHONPATH': '/home/path'} + executor_env: {} + # Jar files to be uploaded to the cluster and added to the classpath + # e.g., spark_jars: ['/home/path/example1.jar'] + spark_jars: [] + # Path where Spark is installed on worker nodes + # e.g. spark_home: '/usr/lib/spark' + spark_home: + # List of key-value pairs to be set in SparkConf + # e.g., others: {'spark.executor.memory': '4g', 'spark.executor.cores': '2'} + others: {} + # Whether to create custom SparkSession via code and set in kwargs['context'] + use_custom_session: false + # The variable name to set in kwargs['context'], + # e.g. kwargs['context']['spark'] = spark_session + custom_session_var_name: 'spark' + +help_improve_mage: true +notification_config: + alert_on: + - trigger_failure + - trigger_passed_sla + slack_config: + webhook_url: "{{ env_var('MAGE_SLACK_WEBHOOK_URL') }}" + teams_config: + webhook_url: "{{ env_var('MAGE_TEAMS_WEBHOOK_URL') }}" +project_uuid: homework_03 diff --git a/mlops/homework_03/pipelines/__init__.py b/mlops/homework_03/pipelines/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/mlops/homework_03/pipelines/data__preparation/__init__.py b/mlops/homework_03/pipelines/data__preparation/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/mlops/homework_03/pipelines/data__preparation/interactions.yaml b/mlops/homework_03/pipelines/data__preparation/interactions.yaml new file mode 100644 index 000000000..a1d40f831 --- /dev/null +++ b/mlops/homework_03/pipelines/data__preparation/interactions.yaml @@ -0,0 +1,2 @@ +blocks: {} +layout: [] diff --git a/mlops/homework_03/pipelines/data__preparation/metadata.yaml b/mlops/homework_03/pipelines/data__preparation/metadata.yaml new file mode 100755 index 000000000..6f012911e --- /dev/null +++ b/mlops/homework_03/pipelines/data__preparation/metadata.yaml @@ -0,0 +1,80 @@ +blocks: +- all_upstream_blocks_executed: true + color: null + configuration: + file_source: + path: homework_03/data_loaders/ingest.py + downstream_blocks: + - prepare + executor_config: null + executor_type: local_python + has_callback: false + language: python + name: Ingest + retry_config: null + status: executed + timeout: null + type: data_loader + upstream_blocks: [] + uuid: ingest +- all_upstream_blocks_executed: true + color: null + configuration: + file_source: + path: homework_03/transformers/prepare.py + downstream_blocks: + - build + executor_config: null + executor_type: local_python + has_callback: false + language: python + name: Prepare + retry_config: null + status: executed + timeout: null + type: transformer + upstream_blocks: + - ingest + uuid: prepare +- all_upstream_blocks_executed: true + color: null + configuration: + file_source: + path: homework_03/data_exporters/build.py + downstream_blocks: [] + executor_config: null + executor_type: local_python + has_callback: false + language: python + name: Build + retry_config: null + status: updated + timeout: null + type: data_exporter + upstream_blocks: + - prepare + uuid: build +cache_block_output_in_memory: false +callbacks: [] +concurrency_config: {} +conditionals: [] +created_at: '2024-06-08 08:12:33.660390+00:00' +data_integration: null +description: Load data and perform feature engineering +executor_config: {} +executor_count: 1 +executor_type: null +extensions: {} +name: Data preparation +notification_config: {} +remote_variables_dir: null +retry_config: {} +run_pipeline_in_one_process: false +settings: + triggers: null +spark_config: {} +tags: [] +type: python +uuid: data__preparation +variables_dir: /home/src/mage_data/homework_03 +widgets: [] diff --git a/mlops/homework_03/pipelines/example_pipeline/__init__.py b/mlops/homework_03/pipelines/example_pipeline/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/mlops/homework_03/pipelines/example_pipeline/metadata.yaml b/mlops/homework_03/pipelines/example_pipeline/metadata.yaml new file mode 100755 index 000000000..c04cfbcf8 --- /dev/null +++ b/mlops/homework_03/pipelines/example_pipeline/metadata.yaml @@ -0,0 +1,30 @@ +blocks: +- all_upstream_blocks_executed: true + downstream_blocks: + - fill_in_missing_values + name: load_titanic + status: not_executed + type: data_loader + upstream_blocks: [] + uuid: load_titanic +- all_upstream_blocks_executed: true + downstream_blocks: + - export_titanic_clean + name: fill_in_missing_values + status: not_executed + type: transformer + upstream_blocks: + - load_titanic + uuid: fill_in_missing_values +- all_upstream_blocks_executed: true + downstream_blocks: [] + name: export_titanic_clean + status: not_executed + type: data_exporter + upstream_blocks: + - fill_in_missing_values + uuid: export_titanic_clean +name: example_pipeline +type: python +uuid: example_pipeline +widgets: [] diff --git a/mlops/homework_03/pipelines/sklearn_training/__init__.py b/mlops/homework_03/pipelines/sklearn_training/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/mlops/homework_03/pipelines/sklearn_training/metadata.yaml b/mlops/homework_03/pipelines/sklearn_training/metadata.yaml new file mode 100755 index 000000000..a05fec5ca --- /dev/null +++ b/mlops/homework_03/pipelines/sklearn_training/metadata.yaml @@ -0,0 +1,6 @@ +created_at: '2024-06-08 20:49:13.470524+00:00' +description: Pipepline for training a LinearRegression model +name: sklearn training +tags: [] +type: python +uuid: sklearn_training diff --git a/mlops/homework_03/requirements.txt b/mlops/homework_03/requirements.txt new file mode 100755 index 000000000..e69de29bb diff --git a/mlops/homework_03/scratchpads/__init__.py b/mlops/homework_03/scratchpads/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/mlops/homework_03/transformers/__init__.py b/mlops/homework_03/transformers/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/mlops/homework_03/transformers/fill_in_missing_values.py b/mlops/homework_03/transformers/fill_in_missing_values.py new file mode 100755 index 000000000..b9761c3e2 --- /dev/null +++ b/mlops/homework_03/transformers/fill_in_missing_values.py @@ -0,0 +1,45 @@ +from pandas import DataFrame +import math + +if 'transformer' not in globals(): + from mage_ai.data_preparation.decorators import transformer +if 'test' not in globals(): + from mage_ai.data_preparation.decorators import test + +def select_number_columns(df: DataFrame) -> DataFrame: + return df[['Age', 'Fare', 'Parch', 'Pclass', 'SibSp', 'Survived']] + + +def fill_missing_values_with_median(df: DataFrame) -> DataFrame: + for col in df.columns: + values = sorted(df[col].dropna().tolist()) + median_value = values[math.floor(len(values) / 2)] + df[[col]] = df[[col]].fillna(median_value) + return df + + +@transformer +def transform_df(df: DataFrame, *args, **kwargs) -> DataFrame: + """ + Template code for a transformer block. + + Add more parameters to this function if this block has multiple parent blocks. + There should be one parameter for each output variable from each parent block. + + Args: + df (DataFrame): Data frame from parent block. + + Returns: + DataFrame: Transformed data frame + """ + # Specify your transformation logic here + + return fill_missing_values_with_median(select_number_columns(df)) + + +@test +def test_output(df) -> None: + """ + Template code for testing the output of the block. + """ + assert df is not None, 'The output is undefined' diff --git a/mlops/homework_03/transformers/prepare.py b/mlops/homework_03/transformers/prepare.py new file mode 100644 index 000000000..847578579 --- /dev/null +++ b/mlops/homework_03/transformers/prepare.py @@ -0,0 +1,38 @@ +import pandas as pd +# from mlops.homework_03.data_loaders import ingest + +if 'transformer' not in globals(): + from mage_ai.data_preparation.decorators import transformer + + +@transformer +def transform(data, *args, **kwargs): + """ + Template code for a transformer block. + + Add more parameters to this function if this block has multiple parent blocks. + There should be one parameter for each output variable from each parent block. + + Args: + data: The output from the upstream parent block + args: The output from any additional upstream blocks (if applicable) + + Returns: + Anything (e.g. data frame, dictionary, array, int, str, etc.) + """ + # Specify your transformation logic here + # def read_dataframe(filename): + df = data + + df.tpep_dropoff_datetime = pd.to_datetime(df.tpep_dropoff_datetime) + df.tpep_pickup_datetime = pd.to_datetime(df.tpep_pickup_datetime) + + df['duration'] = df.tpep_dropoff_datetime - df.tpep_pickup_datetime + df.duration = df.duration.dt.total_seconds() / 60 + + df = df[(df.duration >= 1) & (df.duration <= 60)] + + categorical = ['PULocationID', 'DOLocationID'] + df[categorical] = df[categorical].astype(str) + + return df diff --git a/mlops/homework_03/utils/__init__.py b/mlops/homework_03/utils/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/mlops/metadata.yaml b/mlops/metadata.yaml index 4eb3b7742..cc0dcfee9 100644 --- a/mlops/metadata.yaml +++ b/mlops/metadata.yaml @@ -15,3 +15,4 @@ features: operation_history: true polars: true help_improve_mage: true +project_uuid: 8a1d9ffcca6e42fa98d38f4b43c70ece diff --git a/mlops/requirements.txt b/mlops/requirements.txt index 5c611efcf..b9de29417 100755 --- a/mlops/requirements.txt +++ b/mlops/requirements.txt @@ -9,3 +9,4 @@ scikit-learn seaborn shap xgboost +joblib diff --git a/mlops/settings.yaml b/mlops/settings.yaml index 48b62b147..32fb040be 100644 --- a/mlops/settings.yaml +++ b/mlops/settings.yaml @@ -1,4 +1,6 @@ projects: + homework_03: + path: homework_03 unit_0_setup: {} unit_1_data_preparation: {} unit_2_training: {}