diff --git a/.coveragerc b/.coveragerc
deleted file mode 100644
index 4fc7f84..0000000
--- a/.coveragerc
+++ /dev/null
@@ -1,17 +0,0 @@
-[run]
-# Exclude test files and specific init files from the coverage report
-omit =
- */tests/*
- */test_*.py
- */__init__.py # Good idea to exclude __init__.py files from the coverage report
-
-# Include source files only from certain directories
-source =
- bin
- src
-
-# Set parallel to true if you run tests in parallel
-parallel = True
-
-# Enable branch coverage if set to True
-branch = False
diff --git a/.editorconfig b/.editorconfig
index 1c82eed..3535d5b 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -12,27 +12,27 @@ indent_size = 4 # Default indentation size
insert_final_newline = true # Make sure files end with a newline
trim_trailing_whitespace = true # Remove trailing whitespace
-# Python specific settings, complying with PEP 8 style guide, except for the line length
+# Python files
[*.py]
max_line_length = 100
# Markdown files
[*.md]
-trim_trailing_whitespace = false # Don't remove trailing whitespace in Markdown files
max_line_length = 120
+trim_trailing_whitespace = false
# Bash scripts
[*.sh]
-indent_size = 4
+indent_size = 2
# SQL files
[*.sql]
-indent_size = 4
+indent_size = 2
# YAML files
-[*.yml]
-indent_size = 4
+[*.{yml,yaml}]
+indent_size = 2
# JSON files
[*.json]
-indent_size = 4
+indent_size = 2
diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..9b97295
--- /dev/null
+++ b/.env.example
@@ -0,0 +1 @@
+DEBUG_MODE=True
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 0000000..2953a91
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,49 @@
+name: Build and Deploy Docs
+
+on:
+ workflow_dispatch:
+
+permissions:
+ contents: read
+ pages: write
+ id-token: write
+
+# Only allow one deployment at a time running
+concurrency:
+ group: "pages"
+ cancel-in-progress: false
+
+jobs:
+ deploy:
+ environment:
+ name: github-pages
+ url: ${{ steps.deployment.outputs.page_url }}
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout Repository
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.11'
+
+ - name: Install Dependencies
+ run: |
+ make setup
+ make install
+
+ - name: Build Documentation
+ run: make docs
+
+ - name: Setup Pages
+ uses: actions/configure-pages@v5
+
+ - name: Upload Documentation as Artifact
+ uses: actions/upload-pages-artifact@v3
+ with:
+ path: './site'
+
+ - name: Deploy to GitHub Pages
+ id: deployment
+ uses: actions/deploy-pages@v4
diff --git a/.github/workflows/lints.yml b/.github/workflows/lints.yml
new file mode 100644
index 0000000..ddd57bd
--- /dev/null
+++ b/.github/workflows/lints.yml
@@ -0,0 +1,37 @@
+name: Run Linter Checks
+
+on:
+ workflow_dispatch:
+ push:
+ tags:
+ - 'v*'
+
+permissions:
+ contents: read
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+
+ strategy:
+ matrix:
+ # Define the Python versions check against
+ python-version: [ "3.10", "3.11", "3.12", "3.13" ]
+
+ steps:
+ - name: Checkout Repository
+ uses: actions/checkout@v4
+
+ - name: Set Up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install Dependencies
+ run: |
+ make setup
+ make install
+
+ - name: Run Tests with Coverage
+ run: |
+ make lint
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index ee5d432..d1f1ec8 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -1,46 +1,46 @@
-name: Tests
+name: Run Tests
on:
- workflow_dispatch: # Only enable manual runs for now
+ workflow_dispatch:
+ workflow_call:
+ pull_request:
+ branches:
+ - main
+ - develop
+
+permissions:
+ contents: read
jobs:
- build:
- runs-on: ubuntu-latest
-
- strategy:
- matrix:
- # Define the Python versions to test against
- python-version: [ "3.10", "3.11", "3.12", "3.13" ]
-
- steps:
- - name: Checkout Repository
- uses: actions/checkout@v4
-
- - name: Set Up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v5
- with:
- python-version: ${{ matrix.python-version }}
-
- # Install dependencies
- - name: Install Poetry and Dependencies
- run: |
- sudo apt-get update
- sudo apt-get install make
- make setup
- make install
-
- # Run unit tests with coverage
- - name: Run Tests with Coverage
- run: |
- make test
- make coverage
- continue-on-error: true
-
- - name: Upload Test Results and Coverage Reports
- uses: actions/upload-artifact@v4
- with:
- name: test-results-and-coverage
- overwrite: true # Overwrite the existing artifact(s) with the same name
- path: |
- junit/test-results-${{ matrix.python-version }}.xml
- htmlcov-${{ matrix.python-version }}/
+ build:
+ runs-on: ubuntu-latest
+
+ strategy:
+ matrix:
+ # Define the Python versions to test against
+ python-version: [ "3.10", "3.11", "3.12", "3.13" ]
+
+ steps:
+ - name: Checkout Repository
+ uses: actions/checkout@v4
+
+ - name: Set Up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install Dependencies
+ run: |
+ make setup
+ make install
+
+ - name: Run Tests with Coverage
+ run: |
+ make test
+ continue-on-error: true
+
+ - name: Upload coverage reports to Codecov
+ uses: codecov/codecov-action@v5
+ with:
+ token: ${{ secrets.CODECOV_TOKEN }}
+ continue-on-error: true
diff --git a/.gitignore b/.gitignore
index f00c685..d860d0b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -41,9 +41,7 @@ coverage.xml
*.cover
.hypothesis/
.pytest_cache/
-test-reports/
-test-report.xml
-test-results.xml
+.benchmarks/
# IDE specific files and directories
.idea/
@@ -60,7 +58,9 @@ test-results.xml
*.tmp
temp/
tmp/
+tmp2/
out/
+out2/
# Database files (SQLite, DuckDB, etc.)
*.duckdb
@@ -71,5 +71,9 @@ out/
# Dependency lock files (uncomment to ignore)
poetry.lock
+# Documentation files
+site/
+
# Miscellaneous files and directories to ignore
# Add any additional file patterns a directory names that should be ignored down here
+.env
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..ad9e202
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,10 @@
+repos:
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v5.0.0
+ hooks:
+ - id: trailing-whitespace
+ - id: end-of-file-fixer
+ - id: check-yaml
+ - id: check-toml
+ - id: check-added-large-files
+ - id: check-merge-conflict
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000..8e67f5c
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,3 @@
+# Code of Conduct
+
+We adhere to the [Contributor Covenant](https://www.contributor-covenant.org/version/2/1/code_of_conduct/) version 2.1.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..8e7d2b8
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,62 @@
+# Contribution Guidelines
+
+Thank you for considering contributing to this project!
+Contributions are always welcome and appreciated.
+
+## How to Contribute
+
+Please check the [issue tracker](https://github.com/habedi/template-python-project/issues) to see if there is an issue
+you
+would like to work on or if it has already been resolved.
+
+### Reporting Bugs
+
+1. Open an issue on the [issue tracker](https://github.com/habedi/template-python-project/issues).
+2. Include information such as steps to reproduce the observed behavior and relevant logs or screenshots.
+
+### Suggesting Features
+
+1. Open an issue on the [issue tracker](https://github.com/habedi/template-python-project/issues).
+2. Provide details about the feature, its purpose, and potential implementation ideas.
+
+## Submitting Pull Requests
+
+- Make sure all tests pass before submitting a pull request.
+- Write a clear description of the changes you made and the reasons behind them.
+
+> [!IMPORTANT]
+> It's assumed that by submitting a pull request, you agree to license your contributions under the project's license.
+
+## Development Workflow
+
+### Prerequisites
+
+Install GNU Make if it's not already installed on your system.
+
+```shell
+# For Debian-based systems like Debian, Ubuntu, etc.
+sudo apt-get install make
+```
+
+- Use the `make setup` command to install the development dependencies.
+- Use the `make install` command to install the Python dependencies.
+
+### Code Style
+
+- Use the `make format` command to format the code.
+
+### Running Tests
+
+- Use the `make test` command to run the tests.
+
+### Running Linter Checks
+
+- Use the `make lint` command to run the linter checks.
+
+### See Available Commands
+
+- Run `make help` to see all available commands for managing different tasks.
+
+## Code of Conduct
+
+We adhere to the [Contributor Covenant](https://www.contributor-covenant.org/version/2/1/code_of_conduct/) version 2.1.
diff --git a/Makefile b/Makefile
index 8b3e6bd..0116ab9 100644
--- a/Makefile
+++ b/Makefile
@@ -1,72 +1,75 @@
+# Load environment variables from .env file
+ifneq (,$(wildcard ./.env))
+ include .env
+ export $(shell sed 's/=.*//' .env)
+else
+ $(warning .env file not found. Environment variables not loaded.)
+endif
+
# Variables
-PYTHON = python
-PIP = pip
-POETRY = poetry
+PYTHON ?= python3
+PIP ?= pip3
+DEP_MNGR ?= poetry
+DOCS_DIR ?= docs
+
+# Directories and files to clean
+CACHE_DIRS = .mypy_cache .pytest_cache .ruff_cache
+COVERAGE = .coverage htmlcov coverage.xml
+DIST_DIRS = dist junit
+TMP_DIRS = site
-# Default target
.DEFAULT_GOAL := help
.PHONY: help
-help: ## Show this help message
- @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
+help: ## Show help for all targets
+ @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
+ awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
+# Setup & Installation
.PHONY: setup
-setup: ## Install dependencies for development (need sudo and make installed already)
+setup: ## Install system dependencies and dependency manager (e.g., Poetry)
sudo apt-get update
sudo apt-get install -y python3-pip
- $(PIP) install poetry
+ $(PIP) install --upgrade pip
+ $(PIP) install $(DEP_MNGR)
.PHONY: install
install: ## Install Python dependencies
- $(POETRY) install
-
-.PHONY: update
-update: ## Update Python dependencies
- $(POETRY) update
+ $(DEP_MNGR) install --all-extras --no-interaction --no-root
+# Quality & Testing
.PHONY: test
-test: ## Run unit tests
- $(POETRY) run pytest
+test: ## Run tests
+ $(DEP_MNGR) run pytest
.PHONY: lint
-lint: ## Perform linting with ruff
- $(POETRY) run ruff check .
+lint: ## Run linter checks
+ $(DEP_MNGR) run ruff check --fix
.PHONY: format
-format: ## Format code with ruff (not inplace by default)
- $(POETRY) run ruff format .
+format: ## Format code
+ $(DEP_MNGR) run ruff format
.PHONY: typecheck
-typecheck: ## Perform typechecking with mypy
- $(POETRY) run mypy .
-
-.PHONY: clean
-clean: ## Remove temporary files and directories
- find . -type f -name '*.pyc' -delete
- find . -type d -name '__pycache__' -exec rm -r {} +
- rm -rf .mypy_cache
- rm -rf .pytest_cache
- rm -rf .ruff_cache
- rm -rf .coverage
- rm -rf htmlcov
- rm -rf coverage.xml
- rm -rf junit
+typecheck: ## Typecheck code
+ $(DEP_MNGR) run mypy .
-.PHONY: coverage
-coverage: ## Run tests with code coverage
- $(POETRY) run pytest --cov=src --cov-report=term-missing
-
-.PHONY: build
-build: ## Build the project
- $(POETRY) build
+.PHONY: precommit
+precommit: ## Run pre-commit hooks
+ $(DEP_MNGR) run pre-commit run --all-files
-.PHONY: check
-check: lint typecheck test ## Perform linting, typechecking, and run tests
+.PHONY: precommit-install
+precommit-install: ## Install pre-commit hooks
+ $(DEP_MNGR) run pre-commit install
-.PHONY: precommit
-precommit: ## Install and run pre-commit hooks
- $(POETRY) run pre-commit install
- $(POETRY) run pre-commit run --all-files
+# Documentation
+.PHONY: docs
+docs: ## Build documentation
+ $(DEP_MNGR) run mkdocs build
-.PHONY: all
-all: install check build ## Install Python dependencies, run checks, and build the project
+# Maintenance
+.PHONY: clean
+clean: ## Remove caches and build artifacts
+ find . -type f -name '*.pyc' -delete
+ find . -type d -name '__pycache__' -exec rm -rf {} +
+ rm -rf $(CACHE_DIRS) $(COVERAGE) $(DIST_DIRS) $(TMP_DIRS)
diff --git a/README.md b/README.md
index ec883b8..e9f7b64 100644
--- a/README.md
+++ b/README.md
@@ -1,44 +1,70 @@
-# A Template Repository for Data Science Projects
+## Python ML/DS Project Template
-[](https://github.com/habedi/template-python-project/actions/workflows/tests.yml)
-[](https://github.com/habedi/template-python-project)
-[](https://github.com/habedi/template-python-project/blob/main/LICENSE)
+
+
+
+
+
-This is a template repository for starting new data science and machine learning projects in Python.
-
-I created this template to help me speed up the setup process for my projects and to have a consistent structure across
-all my personal and professional projects.
-I'm sharing it here in case it might be useful to others as well.
+[](https://github.com/habedi/template-python-project/actions/workflows/tests.yml)
+[](https://codecov.io/gh/habedi/template-python-project)
+[](https://www.codefactor.io/repository/github/habedi/template-python-project)
+[](https://github.com/habedi/template-python-project)
+[](https://github.com/habedi/template-python-project/blob/main/docs)
+[](https://github.com/habedi/template-python-project/blob/main/LICENSE)
+[](https://python-poetry.org/)
+[](https://astral.sh/uv)
+[](https://www.gnu.org/software/make/)
---
-## Features
+This is a template for Python projects, specifically designed for machine learning (ML) and data science (DS) projects.
+I made it to help me have a consistent structure across all my personal and professional projects.
+I am sharing it here in case it can be useful to others.
-- A predefined file and folder layout that should be suitable for most data science workflows.
-- Easy dependency and environment management with [Poetry](https://python-poetry.org/)
- or [uv](https://github.com/astral-sh/uv).
-- Extra configuration files for various tasks like linting, formatting, and testing.
+### Features
----
+- A predefined file and folder layout that should be suitable for most ML and DS projects.
+- Easy dependency and environment management with [Poetry](https://python-poetry.org/), [uv](https://astral.sh/uv), or
+ any other modern Python dependency manager.
+- Extra configuration files for various tasks like linting, formatting, testing, etc.
+- Comes with a lot of quality-of-life features to help you get started quickly, like a `Makefile` and GitHub Actions
+ workflows for common tasks.
+
+### Prerequisites
+
+- `Python` >= 3.10
+- `Poetry 2.0+` (or any other modern Python dependency manager like `uv`)
+- `GNU Make`
-## Structure
+### Usage
+
+Use the `Use this template` button on GitHub to create a new repository based on this template.
+Modify the `README.md` file and other files as needed.
+Run `make help` to see all available commands for managing different tasks.
+
+### Structure
```plaintext
template-python-project/
-├── bin/ # Scripts and command-line tools
-├── data/ # Raw and processed datasets
-├── notebooks/ # Jupyter notebooks for exploration, analysis, and prototyping
+├── scripts/ # Helper scripts for various tasks
+├── data/ # Raw and processed data files
+├── notebooks/ # Jupyter notebooks
├── src/ # Source code for the project
-├── models/ # ML models and related files
-├── tests/ # Unit tests and test files
-├── pyproject.toml # Poetry configuration file
-├── LICENSE # License information
-├── README.md # Project documentation
-└── Makefile # Makefile for managing common tasks like linting and testing
+├── models/ # ML models and model artifacts
+├── tests/ # Project tests
+├── pyproject.toml # Python project configuration
+├── LICENSE # Project license file
+├── docs/ # Project documentation
+└── Makefile # Makefile for managing development tasks
```
---
-## License
+### Contributing
+
+See [CONTRIBUTING.md](CONTRIBUTING.md) for details on how to make a contribution.
+
+### License
-The files in this repository are licensed under the [MIT License](LICENSE).
+This template is licensed under the MIT License ([LICENSE](LICENSE) or https://opensource.org/licenses/MIT)
diff --git a/codecov.yml b/codecov.yml
new file mode 100644
index 0000000..cc4d59e
--- /dev/null
+++ b/codecov.yml
@@ -0,0 +1,3 @@
+ignore:
+ - "examples/*"
+ - "benches/*"
diff --git a/data/README.md b/data/README.md
new file mode 100644
index 0000000..26c7daa
--- /dev/null
+++ b/data/README.md
@@ -0,0 +1,3 @@
+## Data Directory
+
+This directory contains raw and processed data files for the project.
diff --git a/bin/.gitkeep b/data/output/.gitkeep
similarity index 100%
rename from bin/.gitkeep
rename to data/output/.gitkeep
diff --git a/data/.gitkeep b/data/processed/.gitkeep
similarity index 100%
rename from data/.gitkeep
rename to data/processed/.gitkeep
diff --git a/models/.gitkeep b/data/raw/.gitkeep
similarity index 100%
rename from models/.gitkeep
rename to data/raw/.gitkeep
diff --git a/docs/assets/images/dummy_figure.dot b/docs/assets/images/dummy_figure.dot
new file mode 100644
index 0000000..9815a5e
--- /dev/null
+++ b/docs/assets/images/dummy_figure.dot
@@ -0,0 +1,75 @@
+digraph G {
+ node [fontname = "Arial", fontsize = 12];
+
+ // Title in the top left corner with margin
+ label = " Typical Structure of a Python Library";
+ labelloc = "t";
+ labeljust = "l";
+ fontsize = 16;
+ fontcolor = "black";
+ margin = 0.2
+
+ // Define node colors for different types
+ "Library" [shape = folder, style = filled, fillcolor = lightblue, label = "Library"];
+
+ // Package 1 structure
+ subgraph cluster_package1 {
+ label = "Package1";
+ "Package1" [shape = folder, style = filled, fillcolor = lightgreen, label = "Package1"];
+ "Module1_1" [shape = box, style = filled, fillcolor = lightyellow, label = "module1_1.py"];
+ "Module1_2" [shape = box, style = filled, fillcolor = lightyellow, label = "module1_2.py"];
+
+ "ClassA" [shape = ellipse, style = filled, fillcolor = lightcoral, label = "ClassA"];
+ "ClassA_method1" [shape = ellipse, style = filled, fillcolor = lightpink, label = "method1()"];
+ "ClassA_method2" [shape = ellipse, style = filled, fillcolor = lightpink, label = "method2()"];
+
+ "ClassC" [shape = ellipse, style = filled, fillcolor = lightcoral, label = "ClassC"];
+ "ClassC_method1" [shape = ellipse, style = filled, fillcolor = lightpink, label = "method1()"];
+ "ClassC_method2" [shape = ellipse, style = filled, fillcolor = lightpink, label = "method2()"];
+
+ "function1_1" [shape = ellipse, style = filled, fillcolor = lightgrey, label = "function1_1()"];
+ "function1_2" [shape = ellipse, style = filled, fillcolor = lightgrey, label = "function1_2()"];
+
+ "Package1" -> "Module1_1";
+ "Package1" -> "Module1_2";
+
+ "Module1_1" -> "ClassA";
+ "Module1_1" -> "function1_1";
+ "Module1_1" -> "function1_2";
+
+ "ClassA" -> "ClassA_method1";
+ "ClassA" -> "ClassA_method2";
+
+ "Module1_2" -> "ClassC";
+ "ClassC" -> "ClassC_method1";
+ "ClassC" -> "ClassC_method2";
+ }
+
+ // Package 2 structure
+ subgraph cluster_package2 {
+ label = "Package2";
+ labelloc = "t";
+ labeljust = "r";
+
+ "Package2" [shape = folder, style = filled, fillcolor = lightgreen, label = "Package2"];
+ "Module2_1" [shape = box, style = filled, fillcolor = lightyellow, label = "module2_1.py"];
+
+ "ClassB" [shape = ellipse, style = filled, fillcolor = lightcoral, label = "ClassB"];
+ "ClassB_method1" [shape = ellipse, style = filled, fillcolor = lightpink, label = "method1()"];
+
+ "function2_1" [shape = ellipse, style = filled, fillcolor = lightgrey, label = "function2_1()"];
+ "function2_2" [shape = ellipse, style = filled, fillcolor = lightgrey, label = "function2_2()"];
+
+ "Package2" -> "Module2_1";
+
+ "Module2_1" -> "ClassB";
+ "Module2_1" -> "function2_1";
+ "Module2_1" -> "function2_2";
+
+ "ClassB" -> "ClassB_method1";
+ }
+
+ // Relationships from Library to Packages
+ "Library" -> "Package1";
+ "Library" -> "Package2";
+}
diff --git a/docs/assets/images/dummy_figure.svg b/docs/assets/images/dummy_figure.svg
new file mode 100644
index 0000000..9a381a0
--- /dev/null
+++ b/docs/assets/images/dummy_figure.svg
@@ -0,0 +1,239 @@
+
+
+
+
+
diff --git a/docs/assets/images/logo.svg b/docs/assets/images/logo.svg
new file mode 100644
index 0000000..3c95a2c
--- /dev/null
+++ b/docs/assets/images/logo.svg
@@ -0,0 +1,65 @@
+
diff --git a/docs/assets/images/make_figures.sh b/docs/assets/images/make_figures.sh
new file mode 100644
index 0000000..c688dcc
--- /dev/null
+++ b/docs/assets/images/make_figures.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+# You need to have Graphviz installed to run this script
+# On Debian-based OSes, you can install it using: sudo apt-get install graphviz
+
+# Directory containing .dot files (with default value)
+ASSET_DIR=${1:-"."}
+
+# Make figures from .dot files
+for f in "${ASSET_DIR}"/*.dot; do
+ dot -Tsvg "$f" -o "${f%.dot}.svg"
+done
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000..81cb75d
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,3 @@
+# Project Documentation
+
+To be added.
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 0000000..6a70ff6
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,47 @@
+site_name: Project Documentation
+site_description: Documentation for the Template Python Project
+repo_url: https://github.com/habedi/template-python-project
+repo_name: habedi/template-python-project
+
+theme:
+ name: material
+ palette:
+ - media: "(prefers-color-scheme: light)"
+ scheme: default
+ toggle:
+ icon: material/brightness-7
+ name: Switch to dark mode
+ - media: "(prefers-color-scheme: dark)"
+ scheme: slate
+ toggle:
+ icon: material/brightness-4
+ name: Switch to light mode
+ features:
+ - content.code.copy
+ - navigation.tabs
+ - navigation.top
+ - navigation.indexes
+ - navigation.expand
+ - content.code.select
+ - content.code.annotate
+
+plugins:
+ - search
+ - mkdocstrings:
+ handlers:
+ python:
+ options:
+ show_root_heading: true
+ show_source: true
+nav:
+ - Home: index.md
+
+markdown_extensions:
+ - pymdownx.highlight:
+ anchor_linenums: true
+ - pymdownx.inlinehilite
+ - pymdownx.snippets
+ - pymdownx.superfences
+ - admonition
+ - toc:
+ permalink: true
diff --git a/models/README.md b/models/README.md
new file mode 100644
index 0000000..4b87862
--- /dev/null
+++ b/models/README.md
@@ -0,0 +1,4 @@
+## Models Directory
+
+This directory contains machine learning models and other model-related files like model weights, configuration files,
+etc.
diff --git a/notebooks/template_notebook.ipynb b/notebooks/template_notebook.ipynb
new file mode 100644
index 0000000..99fd945
--- /dev/null
+++ b/notebooks/template_notebook.ipynb
@@ -0,0 +1,205 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "6781b6d71f7e750",
+ "metadata": {},
+ "source": "## Import Libraries"
+ },
+ {
+ "cell_type": "code",
+ "id": "initial_id",
+ "metadata": {
+ "collapsed": true,
+ "ExecuteTime": {
+ "end_time": "2025-05-16T10:00:28.599842Z",
+ "start_time": "2025-05-16T10:00:27.829889Z"
+ }
+ },
+ "source": [
+ "import sys\n",
+ "from pathlib import Path\n",
+ "\n",
+ "import datasets as ds\n",
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import polars as pl\n",
+ "import seaborn as sns\n",
+ "from icecream import ic"
+ ],
+ "outputs": [],
+ "execution_count": 1
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1d7ac8b9f6c3a17f",
+ "metadata": {},
+ "source": "## Settings"
+ },
+ {
+ "cell_type": "code",
+ "id": "49187922af8dc248",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2025-05-16T10:00:28.686426Z",
+ "start_time": "2025-05-16T10:00:28.683789Z"
+ }
+ },
+ "source": [
+ "pd.set_option(\"display.max_columns\", None)\n",
+ "pd.set_option(\"display.max_rows\", None)\n",
+ "\n",
+ "rng = np.random.default_rng(42)\n",
+ "\n",
+ "pl.enable_string_cache()\n",
+ "pl.Config.set_decimal_separator(\",\")\n",
+ "\n",
+ "plt.style.use(\"bmh\")\n",
+ "plt.rcParams[\"figure.figsize\"] = (12, 6)\n",
+ "plt.rcParams[\"axes.titlesize\"] = 20\n",
+ "\n",
+ "sns.set_context(\"notebook\", font_scale=1.5)"
+ ],
+ "outputs": [],
+ "execution_count": 2
+ },
+ {
+ "cell_type": "markdown",
+ "id": "10bd4ebb7af0097d",
+ "metadata": {},
+ "source": "## Constants"
+ },
+ {
+ "cell_type": "code",
+ "id": "4d9d9f7052e11adc",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2025-05-16T10:00:28.693737Z",
+ "start_time": "2025-05-16T10:00:28.692221Z"
+ }
+ },
+ "source": [
+ "DATA_DIR = Path(\"../data\")\n",
+ "RAW_DATA_DIR = DATA_DIR / \"raw\"\n",
+ "PROCESSED_DATA_DIR = DATA_DIR / \"processed\"\n",
+ "OUTPUT_DATA_DIR = DATA_DIR / \"output\"\n",
+ "\n",
+ "MODEL_DIR = Path(\"../models\")"
+ ],
+ "outputs": [],
+ "execution_count": 3
+ },
+ {
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2025-05-16T10:00:28.739296Z",
+ "start_time": "2025-05-16T10:00:28.737652Z"
+ }
+ },
+ "cell_type": "code",
+ "source": [
+ "SRC_DIR = Path(\"../src\")\n",
+ "if str(SRC_DIR) not in sys.path:\n",
+ " sys.path.append(str(SRC_DIR))"
+ ],
+ "id": "cd198fd5d0ffae0e",
+ "outputs": [],
+ "execution_count": 4
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e84dff469dd8e0a4",
+ "metadata": {},
+ "source": "## Load Data"
+ },
+ {
+ "cell_type": "code",
+ "id": "c4c3949840ca3096",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2025-05-16T10:00:31.274753Z",
+ "start_time": "2025-05-16T10:00:28.782567Z"
+ }
+ },
+ "source": [
+ "stack_exchange_dataset = ds.load_dataset(\"habedi/stack-exchange-dataset\", split=\"train\")"
+ ],
+ "outputs": [],
+ "execution_count": 5
+ },
+ {
+ "cell_type": "code",
+ "id": "8ed6c55b563de78c",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2025-05-16T10:00:31.378243Z",
+ "start_time": "2025-05-16T10:00:31.280646Z"
+ }
+ },
+ "source": [
+ "ic(stack_exchange_dataset)"
+ ],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "ic| stack_exchange_dataset: Dataset({\n",
+ " features: ['id', 'title', 'body', 'tags', 'label'],\n",
+ " num_rows: 82158\n",
+ " })\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "Dataset({\n",
+ " features: ['id', 'title', 'body', 'tags', 'label'],\n",
+ " num_rows: 82158\n",
+ "})"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "execution_count": 6
+ },
+ {
+ "cell_type": "code",
+ "id": "3104efd43e7ffcf8",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2025-05-16T10:00:31.453820Z",
+ "start_time": "2025-05-16T10:00:31.452767Z"
+ }
+ },
+ "source": [],
+ "outputs": [],
+ "execution_count": null
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 2
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython2",
+ "version": "2.7.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/poetry.toml b/poetry.toml
new file mode 100644
index 0000000..ab1033b
--- /dev/null
+++ b/poetry.toml
@@ -0,0 +1,2 @@
+[virtualenvs]
+in-project = true
diff --git a/pyproject.toml b/pyproject.toml
index dcf0cf6..474a51e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,83 +1,103 @@
-[tool.poetry]
+[project]
name = "template-python-project"
version = "0.1.0"
-description = "A template repository for data science and machine learning projects in Python."
-authors = ["Hassan Abedi "]
-maintainers = ["Hassan Abedi "]
+description = "A template for data science and machine learning projects in Python"
readme = "README.md"
-repository = "https://github.com/habedi/template-python-project"
-license = "MIT"
-packages = [{ include = "src", from = "." }]
-
-[tool.poetry.dependencies]
-python = "^3.10"
+license = { text = "MIT" }
+authors = [
+ { name = "Hassan Abedi", email = "hassan.abedi.t@gmail.com" }
+]
-[tool.poetry.group.dev.dependencies]
-poetry-dynamic-versioning = "^1.4.0"
-pytest = "^8.0.1"
-pytest-cov = "^6.0.0"
-pytest-mock = "^3.14.0"
-mypy = "^1.11.1"
-ruff = "^0.9.3"
+requires-python = ">=3.10,<4.0"
+dependencies = [
+ "pandas (>=2.2.3,<3.0.0)",
+ "numpy (>=2.2.5,<3.0.0)",
+ "seaborn (>=0.13.2,<0.14.0)",
+ "jupyter (>=1.1.1,<2.0.0)",
+ "kaggle (>=1.7.4.5,<2.0.0.0)",
+ "huggingface-hub (>=0.31.2,<0.32.0)",
+ "polars (>=1.29.0,<2.0.0)",
+ "scikit-learn (>=1.6.1,<2.0.0)",
+ "icecream (>=2.1.4,<3.0.0)",
+ "datasets (>=3.6.0,<4.0.0)",
+ "python-dotenv (>=1.1.0,<2.0.0)",
+]
-[tool.poetry.scripts]
-cli_script = "src.cli:main"
+[project.optional-dependencies]
+dev = [
+ "pytest>=8.0.1",
+ "pytest-cov>=6.0.0",
+ "pytest-mock>=3.14.0",
+ "pytest-asyncio (>=0.26.0,<0.27.0)",
+ "mypy>=1.11.1",
+ "ruff>=0.9.3",
+ "pre-commit (>=4.2.0,<5.0.0)",
+ "griffe (>=1.7.3,<2.0.0)",
+ "mkdocs (>=1.6.1,<2.0.0)",
+ "mkdocstrings-python (>=1.16.10,<2.0.0)",
+ "mkdocs-material (>=9.6.14,<10.0.0)",
+ "types-requests (>=2.32.0.20250515,<3.0.0.0)"
+]
-[build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
+[project.urls]
+Repository = "https://github.com/habedi/template-python-project"
+Documentation = "https://github.com/habedi/template-python-project/blob/main/docs/index.md"
[tool.pytest.ini_options]
-pythonpath = [".", "src", 'bin', 'notebooks']
+pythonpath = ["src"]
+testpaths = ["tests"]
+addopts = [
+ "--tb=short",
+ #"--disable-warnings",
+ "--cov=src",
+ "--cov-branch",
+ "--cov-report=term",
+ "--cov-report=xml",
+ "-rs"
+]
+asyncio_mode = "auto"
+asyncio_default_fixture_loop_scope = "function"
+asyncio_default_test_loop_scope = "function"
+
+[tool.coverage.run]
+branch = true
+parallel = true
+source = ["src"]
+omit = ["tests/*"]
+
+[tool.coverage.report]
+show_missing = false
+skip_empty = true
+precision = 2
[tool.mypy]
python_version = "3.10"
ignore_missing_imports = true
+disallow_untyped_defs = true
disallow_untyped_calls = true
+disallow_incomplete_defs = true
+check_untyped_defs = true
+warn_return_any = true
strict_optional = true
warn_redundant_casts = true
+exclude = "^(scripts/|tests/)"
-[tool.poetry-dynamic-versioning]
-enable = true
-vcs = "git"
-versioning = "semver" # Semantic Versioning
-
-# Ruff configuration
[tool.ruff]
exclude = [
- ".bzr",
- ".direnv",
- ".eggs",
- ".git",
- ".git-rewrite",
- ".hg",
- ".mypy_cache",
- ".nox",
- ".pants.d",
- ".pytype",
- ".ruff_cache",
- ".svn",
- ".tox",
- ".venv",
- "__pypackages__",
- "_build",
- "buck-out",
- "build",
- "dist",
- "node_modules",
- "venv"
+ ".bzr", ".direnv", ".eggs", ".git", ".git-rewrite", ".hg", ".mypy_cache",
+ ".nox", ".pants.d", ".pytype", ".ruff_cache", ".svn", ".tox", ".venv",
+ "__pypackages__", "_build", "buck-out", "build", "dist", "node_modules",
+ "venv", "tests"
]
line-length = 100
indent-width = 4
-src = ["src", "tests", "bin", "notebooks"]
+src = ["src"]
target-version = "py310"
+unsafe-fixes = false
[tool.ruff.lint]
-select = ["ANN", "D", "E", "F", "I"]
-ignore = [
- # Ignore missing docstrings
- "D100", "D101", "D102", "D103", "D104", "D105", "D106", "D107",
-]
+select = ["ANN", "E", "F", "I", "W", "B", "RUF", "SIM", "C90"]
+ignore = ["D100", "D101", "D102", "D103", "D104", "D105", "D106", "D107"]
fixable = ["ALL"]
unfixable = []
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
@@ -92,4 +112,4 @@ line-ending = "auto"
convention = "google"
[tool.ruff.lint.per-file-ignores]
-"tests/**/*.py" = []
+"tests/**/*.py" = ["ANN001", "ANN201"]
diff --git a/notebooks/.gitkeep b/scripts/.gitkeep
similarity index 100%
rename from notebooks/.gitkeep
rename to scripts/.gitkeep
diff --git a/src/__init__.py b/src/pipeline_1/__init__.py
similarity index 100%
rename from src/__init__.py
rename to src/pipeline_1/__init__.py
diff --git a/tests/.gitkeep b/src/pipeline_1/infer.py
similarity index 100%
rename from tests/.gitkeep
rename to src/pipeline_1/infer.py
diff --git a/src/pipeline_1/model/__init__.py b/src/pipeline_1/model/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/pipeline_1/model/config.py b/src/pipeline_1/model/config.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/pipeline_1/model/metrics.py b/src/pipeline_1/model/metrics.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/pipeline_1/model/model.py b/src/pipeline_1/model/model.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/pipeline_1/model/utils.py b/src/pipeline_1/model/utils.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/pipeline_1/train.py b/src/pipeline_1/train.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..e69de29