diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 4fc7f84..0000000 --- a/.coveragerc +++ /dev/null @@ -1,17 +0,0 @@ -[run] -# Exclude test files and specific init files from the coverage report -omit = - */tests/* - */test_*.py - */__init__.py # Good idea to exclude __init__.py files from the coverage report - -# Include source files only from certain directories -source = - bin - src - -# Set parallel to true if you run tests in parallel -parallel = True - -# Enable branch coverage if set to True -branch = False diff --git a/.editorconfig b/.editorconfig index 1c82eed..3535d5b 100644 --- a/.editorconfig +++ b/.editorconfig @@ -12,27 +12,27 @@ indent_size = 4 # Default indentation size insert_final_newline = true # Make sure files end with a newline trim_trailing_whitespace = true # Remove trailing whitespace -# Python specific settings, complying with PEP 8 style guide, except for the line length +# Python files [*.py] max_line_length = 100 # Markdown files [*.md] -trim_trailing_whitespace = false # Don't remove trailing whitespace in Markdown files max_line_length = 120 +trim_trailing_whitespace = false # Bash scripts [*.sh] -indent_size = 4 +indent_size = 2 # SQL files [*.sql] -indent_size = 4 +indent_size = 2 # YAML files -[*.yml] -indent_size = 4 +[*.{yml,yaml}] +indent_size = 2 # JSON files [*.json] -indent_size = 4 +indent_size = 2 diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..9b97295 --- /dev/null +++ b/.env.example @@ -0,0 +1 @@ +DEBUG_MODE=True diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..2953a91 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,49 @@ +name: Build and Deploy Docs + +on: + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +# Only allow one deployment at a time running +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install Dependencies + run: | + make setup + make install + + - name: Build Documentation + run: make docs + + - name: Setup Pages + uses: actions/configure-pages@v5 + + - name: Upload Documentation as Artifact + uses: actions/upload-pages-artifact@v3 + with: + path: './site' + + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/lints.yml b/.github/workflows/lints.yml new file mode 100644 index 0000000..ddd57bd --- /dev/null +++ b/.github/workflows/lints.yml @@ -0,0 +1,37 @@ +name: Run Linter Checks + +on: + workflow_dispatch: + push: + tags: + - 'v*' + +permissions: + contents: read + +jobs: + build: + runs-on: ubuntu-latest + + strategy: + matrix: + # Define the Python versions check against + python-version: [ "3.10", "3.11", "3.12", "3.13" ] + + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Set Up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Dependencies + run: | + make setup + make install + + - name: Run Tests with Coverage + run: | + make lint diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ee5d432..d1f1ec8 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,46 +1,46 @@ -name: Tests +name: Run Tests on: - workflow_dispatch: # Only enable manual runs for now + workflow_dispatch: + workflow_call: + pull_request: + branches: + - main + - develop + +permissions: + contents: read jobs: - build: - runs-on: ubuntu-latest - - strategy: - matrix: - # Define the Python versions to test against - python-version: [ "3.10", "3.11", "3.12", "3.13" ] - - steps: - - name: Checkout Repository - uses: actions/checkout@v4 - - - name: Set Up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - # Install dependencies - - name: Install Poetry and Dependencies - run: | - sudo apt-get update - sudo apt-get install make - make setup - make install - - # Run unit tests with coverage - - name: Run Tests with Coverage - run: | - make test - make coverage - continue-on-error: true - - - name: Upload Test Results and Coverage Reports - uses: actions/upload-artifact@v4 - with: - name: test-results-and-coverage - overwrite: true # Overwrite the existing artifact(s) with the same name - path: | - junit/test-results-${{ matrix.python-version }}.xml - htmlcov-${{ matrix.python-version }}/ + build: + runs-on: ubuntu-latest + + strategy: + matrix: + # Define the Python versions to test against + python-version: [ "3.10", "3.11", "3.12", "3.13" ] + + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Set Up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Dependencies + run: | + make setup + make install + + - name: Run Tests with Coverage + run: | + make test + continue-on-error: true + + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v5 + with: + token: ${{ secrets.CODECOV_TOKEN }} + continue-on-error: true diff --git a/.gitignore b/.gitignore index f00c685..d860d0b 100644 --- a/.gitignore +++ b/.gitignore @@ -41,9 +41,7 @@ coverage.xml *.cover .hypothesis/ .pytest_cache/ -test-reports/ -test-report.xml -test-results.xml +.benchmarks/ # IDE specific files and directories .idea/ @@ -60,7 +58,9 @@ test-results.xml *.tmp temp/ tmp/ +tmp2/ out/ +out2/ # Database files (SQLite, DuckDB, etc.) *.duckdb @@ -71,5 +71,9 @@ out/ # Dependency lock files (uncomment to ignore) poetry.lock +# Documentation files +site/ + # Miscellaneous files and directories to ignore # Add any additional file patterns a directory names that should be ignored down here +.env diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..ad9e202 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,10 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-toml + - id: check-added-large-files + - id: check-merge-conflict diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..8e67f5c --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,3 @@ +# Code of Conduct + +We adhere to the [Contributor Covenant](https://www.contributor-covenant.org/version/2/1/code_of_conduct/) version 2.1. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..8e7d2b8 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,62 @@ +# Contribution Guidelines + +Thank you for considering contributing to this project! +Contributions are always welcome and appreciated. + +## How to Contribute + +Please check the [issue tracker](https://github.com/habedi/template-python-project/issues) to see if there is an issue +you +would like to work on or if it has already been resolved. + +### Reporting Bugs + +1. Open an issue on the [issue tracker](https://github.com/habedi/template-python-project/issues). +2. Include information such as steps to reproduce the observed behavior and relevant logs or screenshots. + +### Suggesting Features + +1. Open an issue on the [issue tracker](https://github.com/habedi/template-python-project/issues). +2. Provide details about the feature, its purpose, and potential implementation ideas. + +## Submitting Pull Requests + +- Make sure all tests pass before submitting a pull request. +- Write a clear description of the changes you made and the reasons behind them. + +> [!IMPORTANT] +> It's assumed that by submitting a pull request, you agree to license your contributions under the project's license. + +## Development Workflow + +### Prerequisites + +Install GNU Make if it's not already installed on your system. + +```shell +# For Debian-based systems like Debian, Ubuntu, etc. +sudo apt-get install make +``` + +- Use the `make setup` command to install the development dependencies. +- Use the `make install` command to install the Python dependencies. + +### Code Style + +- Use the `make format` command to format the code. + +### Running Tests + +- Use the `make test` command to run the tests. + +### Running Linter Checks + +- Use the `make lint` command to run the linter checks. + +### See Available Commands + +- Run `make help` to see all available commands for managing different tasks. + +## Code of Conduct + +We adhere to the [Contributor Covenant](https://www.contributor-covenant.org/version/2/1/code_of_conduct/) version 2.1. diff --git a/Makefile b/Makefile index 8b3e6bd..0116ab9 100644 --- a/Makefile +++ b/Makefile @@ -1,72 +1,75 @@ +# Load environment variables from .env file +ifneq (,$(wildcard ./.env)) + include .env + export $(shell sed 's/=.*//' .env) +else + $(warning .env file not found. Environment variables not loaded.) +endif + # Variables -PYTHON = python -PIP = pip -POETRY = poetry +PYTHON ?= python3 +PIP ?= pip3 +DEP_MNGR ?= poetry +DOCS_DIR ?= docs + +# Directories and files to clean +CACHE_DIRS = .mypy_cache .pytest_cache .ruff_cache +COVERAGE = .coverage htmlcov coverage.xml +DIST_DIRS = dist junit +TMP_DIRS = site -# Default target .DEFAULT_GOAL := help .PHONY: help -help: ## Show this help message - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' +help: ## Show help for all targets + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \ + awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' +# Setup & Installation .PHONY: setup -setup: ## Install dependencies for development (need sudo and make installed already) +setup: ## Install system dependencies and dependency manager (e.g., Poetry) sudo apt-get update sudo apt-get install -y python3-pip - $(PIP) install poetry + $(PIP) install --upgrade pip + $(PIP) install $(DEP_MNGR) .PHONY: install install: ## Install Python dependencies - $(POETRY) install - -.PHONY: update -update: ## Update Python dependencies - $(POETRY) update + $(DEP_MNGR) install --all-extras --no-interaction --no-root +# Quality & Testing .PHONY: test -test: ## Run unit tests - $(POETRY) run pytest +test: ## Run tests + $(DEP_MNGR) run pytest .PHONY: lint -lint: ## Perform linting with ruff - $(POETRY) run ruff check . +lint: ## Run linter checks + $(DEP_MNGR) run ruff check --fix .PHONY: format -format: ## Format code with ruff (not inplace by default) - $(POETRY) run ruff format . +format: ## Format code + $(DEP_MNGR) run ruff format .PHONY: typecheck -typecheck: ## Perform typechecking with mypy - $(POETRY) run mypy . - -.PHONY: clean -clean: ## Remove temporary files and directories - find . -type f -name '*.pyc' -delete - find . -type d -name '__pycache__' -exec rm -r {} + - rm -rf .mypy_cache - rm -rf .pytest_cache - rm -rf .ruff_cache - rm -rf .coverage - rm -rf htmlcov - rm -rf coverage.xml - rm -rf junit +typecheck: ## Typecheck code + $(DEP_MNGR) run mypy . -.PHONY: coverage -coverage: ## Run tests with code coverage - $(POETRY) run pytest --cov=src --cov-report=term-missing - -.PHONY: build -build: ## Build the project - $(POETRY) build +.PHONY: precommit +precommit: ## Run pre-commit hooks + $(DEP_MNGR) run pre-commit run --all-files -.PHONY: check -check: lint typecheck test ## Perform linting, typechecking, and run tests +.PHONY: precommit-install +precommit-install: ## Install pre-commit hooks + $(DEP_MNGR) run pre-commit install -.PHONY: precommit -precommit: ## Install and run pre-commit hooks - $(POETRY) run pre-commit install - $(POETRY) run pre-commit run --all-files +# Documentation +.PHONY: docs +docs: ## Build documentation + $(DEP_MNGR) run mkdocs build -.PHONY: all -all: install check build ## Install Python dependencies, run checks, and build the project +# Maintenance +.PHONY: clean +clean: ## Remove caches and build artifacts + find . -type f -name '*.pyc' -delete + find . -type d -name '__pycache__' -exec rm -rf {} + + rm -rf $(CACHE_DIRS) $(COVERAGE) $(DIST_DIRS) $(TMP_DIRS) diff --git a/README.md b/README.md index ec883b8..e9f7b64 100644 --- a/README.md +++ b/README.md @@ -1,44 +1,70 @@ -# A Template Repository for Data Science Projects +## Python ML/DS Project Template -[![Tests](https://github.com/habedi/template-python-project/actions/workflows/tests.yml/badge.svg)](https://github.com/habedi/template-python-project/actions/workflows/tests.yml) -[![Python Version](https://img.shields.io/badge/Python-%3E=3.10-blue)](https://github.com/habedi/template-python-project) -[![License](https://img.shields.io/badge/License-MIT-blue)](https://github.com/habedi/template-python-project/blob/main/LICENSE) +
+ + Python Project Template + +
-This is a template repository for starting new data science and machine learning projects in Python. - -I created this template to help me speed up the setup process for my projects and to have a consistent structure across -all my personal and professional projects. -I'm sharing it here in case it might be useful to others as well. +[![Tests](https://img.shields.io/github/actions/workflow/status/habedi/template-python-project/tests.yml?label=tests&style=flat&labelColor=333333&logo=github&logoColor=white)](https://github.com/habedi/template-python-project/actions/workflows/tests.yml) +[![Code Coverage](https://img.shields.io/codecov/c/github/habedi/template-python-project?style=flat&label=coverage&labelColor=333333&logo=codecov&logoColor=white)](https://codecov.io/gh/habedi/template-python-project) +[![Code Quality](https://img.shields.io/codefactor/grade/github/habedi/template-python-project?style=flat&label=code%20quality&labelColor=333333&logo=codefactor&logoColor=white)](https://www.codefactor.io/repository/github/habedi/template-python-project) +[![Python Version](https://img.shields.io/badge/python-%3E=3.10-3776ab?style=flat&labelColor=333333&logo=python&logoColor=white)](https://github.com/habedi/template-python-project) +[![Documentation](https://img.shields.io/badge/docs-latest-8ca0d7?style=flat&labelColor=333333&logo=read-the-docs&logoColor=white)](https://github.com/habedi/template-python-project/blob/main/docs) +[![License](https://img.shields.io/badge/license-MIT-00acc1?style=flat&labelColor=333333&logo=open-source-initiative&logoColor=white)](https://github.com/habedi/template-python-project/blob/main/LICENSE) +[![Managed with Poetry](https://img.shields.io/badge/managed%20with-Poetry-60A5FA?style=flat&logo=poetry&labelColor=333333&logoColor=white)](https://python-poetry.org/) +[![Managed with uv](https://img.shields.io/badge/managed%20with-uv-000000?style=flat&logo=uv&labelColor=333333&logoColor=white)](https://astral.sh/uv) +[![Makefile](https://img.shields.io/badge/managed%20with-Makefile-000000?style=flat&logo=gnu&labelColor=333333&logoColor=white)](https://www.gnu.org/software/make/) --- -## Features +This is a template for Python projects, specifically designed for machine learning (ML) and data science (DS) projects. +I made it to help me have a consistent structure across all my personal and professional projects. +I am sharing it here in case it can be useful to others. -- A predefined file and folder layout that should be suitable for most data science workflows. -- Easy dependency and environment management with [Poetry](https://python-poetry.org/) - or [uv](https://github.com/astral-sh/uv). -- Extra configuration files for various tasks like linting, formatting, and testing. +### Features ---- +- A predefined file and folder layout that should be suitable for most ML and DS projects. +- Easy dependency and environment management with [Poetry](https://python-poetry.org/), [uv](https://astral.sh/uv), or + any other modern Python dependency manager. +- Extra configuration files for various tasks like linting, formatting, testing, etc. +- Comes with a lot of quality-of-life features to help you get started quickly, like a `Makefile` and GitHub Actions + workflows for common tasks. + +### Prerequisites + +- `Python` >= 3.10 +- `Poetry 2.0+` (or any other modern Python dependency manager like `uv`) +- `GNU Make` -## Structure +### Usage + +Use the `Use this template` button on GitHub to create a new repository based on this template. +Modify the `README.md` file and other files as needed. +Run `make help` to see all available commands for managing different tasks. + +### Structure ```plaintext template-python-project/ -├── bin/ # Scripts and command-line tools -├── data/ # Raw and processed datasets -├── notebooks/ # Jupyter notebooks for exploration, analysis, and prototyping +├── scripts/ # Helper scripts for various tasks +├── data/ # Raw and processed data files +├── notebooks/ # Jupyter notebooks ├── src/ # Source code for the project -├── models/ # ML models and related files -├── tests/ # Unit tests and test files -├── pyproject.toml # Poetry configuration file -├── LICENSE # License information -├── README.md # Project documentation -└── Makefile # Makefile for managing common tasks like linting and testing +├── models/ # ML models and model artifacts +├── tests/ # Project tests +├── pyproject.toml # Python project configuration +├── LICENSE # Project license file +├── docs/ # Project documentation +└── Makefile # Makefile for managing development tasks ``` --- -## License +### Contributing + +See [CONTRIBUTING.md](CONTRIBUTING.md) for details on how to make a contribution. + +### License -The files in this repository are licensed under the [MIT License](LICENSE). +This template is licensed under the MIT License ([LICENSE](LICENSE) or https://opensource.org/licenses/MIT) diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..cc4d59e --- /dev/null +++ b/codecov.yml @@ -0,0 +1,3 @@ +ignore: + - "examples/*" + - "benches/*" diff --git a/data/README.md b/data/README.md new file mode 100644 index 0000000..26c7daa --- /dev/null +++ b/data/README.md @@ -0,0 +1,3 @@ +## Data Directory + +This directory contains raw and processed data files for the project. diff --git a/bin/.gitkeep b/data/output/.gitkeep similarity index 100% rename from bin/.gitkeep rename to data/output/.gitkeep diff --git a/data/.gitkeep b/data/processed/.gitkeep similarity index 100% rename from data/.gitkeep rename to data/processed/.gitkeep diff --git a/models/.gitkeep b/data/raw/.gitkeep similarity index 100% rename from models/.gitkeep rename to data/raw/.gitkeep diff --git a/docs/assets/images/dummy_figure.dot b/docs/assets/images/dummy_figure.dot new file mode 100644 index 0000000..9815a5e --- /dev/null +++ b/docs/assets/images/dummy_figure.dot @@ -0,0 +1,75 @@ +digraph G { + node [fontname = "Arial", fontsize = 12]; + + // Title in the top left corner with margin + label = " Typical Structure of a Python Library"; + labelloc = "t"; + labeljust = "l"; + fontsize = 16; + fontcolor = "black"; + margin = 0.2 + + // Define node colors for different types + "Library" [shape = folder, style = filled, fillcolor = lightblue, label = "Library"]; + + // Package 1 structure + subgraph cluster_package1 { + label = "Package1"; + "Package1" [shape = folder, style = filled, fillcolor = lightgreen, label = "Package1"]; + "Module1_1" [shape = box, style = filled, fillcolor = lightyellow, label = "module1_1.py"]; + "Module1_2" [shape = box, style = filled, fillcolor = lightyellow, label = "module1_2.py"]; + + "ClassA" [shape = ellipse, style = filled, fillcolor = lightcoral, label = "ClassA"]; + "ClassA_method1" [shape = ellipse, style = filled, fillcolor = lightpink, label = "method1()"]; + "ClassA_method2" [shape = ellipse, style = filled, fillcolor = lightpink, label = "method2()"]; + + "ClassC" [shape = ellipse, style = filled, fillcolor = lightcoral, label = "ClassC"]; + "ClassC_method1" [shape = ellipse, style = filled, fillcolor = lightpink, label = "method1()"]; + "ClassC_method2" [shape = ellipse, style = filled, fillcolor = lightpink, label = "method2()"]; + + "function1_1" [shape = ellipse, style = filled, fillcolor = lightgrey, label = "function1_1()"]; + "function1_2" [shape = ellipse, style = filled, fillcolor = lightgrey, label = "function1_2()"]; + + "Package1" -> "Module1_1"; + "Package1" -> "Module1_2"; + + "Module1_1" -> "ClassA"; + "Module1_1" -> "function1_1"; + "Module1_1" -> "function1_2"; + + "ClassA" -> "ClassA_method1"; + "ClassA" -> "ClassA_method2"; + + "Module1_2" -> "ClassC"; + "ClassC" -> "ClassC_method1"; + "ClassC" -> "ClassC_method2"; + } + + // Package 2 structure + subgraph cluster_package2 { + label = "Package2"; + labelloc = "t"; + labeljust = "r"; + + "Package2" [shape = folder, style = filled, fillcolor = lightgreen, label = "Package2"]; + "Module2_1" [shape = box, style = filled, fillcolor = lightyellow, label = "module2_1.py"]; + + "ClassB" [shape = ellipse, style = filled, fillcolor = lightcoral, label = "ClassB"]; + "ClassB_method1" [shape = ellipse, style = filled, fillcolor = lightpink, label = "method1()"]; + + "function2_1" [shape = ellipse, style = filled, fillcolor = lightgrey, label = "function2_1()"]; + "function2_2" [shape = ellipse, style = filled, fillcolor = lightgrey, label = "function2_2()"]; + + "Package2" -> "Module2_1"; + + "Module2_1" -> "ClassB"; + "Module2_1" -> "function2_1"; + "Module2_1" -> "function2_2"; + + "ClassB" -> "ClassB_method1"; + } + + // Relationships from Library to Packages + "Library" -> "Package1"; + "Library" -> "Package2"; +} diff --git a/docs/assets/images/dummy_figure.svg b/docs/assets/images/dummy_figure.svg new file mode 100644 index 0000000..9a381a0 --- /dev/null +++ b/docs/assets/images/dummy_figure.svg @@ -0,0 +1,239 @@ + + + + + + + G + +      Typical + Structure of a Python Library + + + cluster_package1 + + Package1 + + + cluster_package2 + + Package2 + + + + Library + + Library + + + + Package1 + + Package1 + + + + Library->Package1 + + + + + + Package2 + + Package2 + + + + Library->Package2 + + + + + + Module1_1 + + module1_1.py + + + + Package1->Module1_1 + + + + + + Module1_2 + + module1_2.py + + + + Package1->Module1_2 + + + + + + ClassA + + ClassA + + + + Module1_1->ClassA + + + + + + function1_1 + + function1_1() + + + + Module1_1->function1_1 + + + + + + function1_2 + + function1_2() + + + + Module1_1->function1_2 + + + + + + ClassC + + ClassC + + + + Module1_2->ClassC + + + + + + ClassA_method1 + + method1() + + + + ClassA->ClassA_method1 + + + + + + ClassA_method2 + + method2() + + + + ClassA->ClassA_method2 + + + + + + ClassC_method1 + + method1() + + + + ClassC->ClassC_method1 + + + + + + ClassC_method2 + + method2() + + + + ClassC->ClassC_method2 + + + + + + Module2_1 + + module2_1.py + + + + Package2->Module2_1 + + + + + + ClassB + + ClassB + + + + Module2_1->ClassB + + + + + + function2_1 + + function2_1() + + + + Module2_1->function2_1 + + + + + + function2_2 + + function2_2() + + + + Module2_1->function2_2 + + + + + + ClassB_method1 + + method1() + + + + ClassB->ClassB_method1 + + + + + diff --git a/docs/assets/images/logo.svg b/docs/assets/images/logo.svg new file mode 100644 index 0000000..3c95a2c --- /dev/null +++ b/docs/assets/images/logo.svg @@ -0,0 +1,65 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/assets/images/make_figures.sh b/docs/assets/images/make_figures.sh new file mode 100644 index 0000000..c688dcc --- /dev/null +++ b/docs/assets/images/make_figures.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# You need to have Graphviz installed to run this script +# On Debian-based OSes, you can install it using: sudo apt-get install graphviz + +# Directory containing .dot files (with default value) +ASSET_DIR=${1:-"."} + +# Make figures from .dot files +for f in "${ASSET_DIR}"/*.dot; do + dot -Tsvg "$f" -o "${f%.dot}.svg" +done diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..81cb75d --- /dev/null +++ b/docs/index.md @@ -0,0 +1,3 @@ +# Project Documentation + +To be added. diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..6a70ff6 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,47 @@ +site_name: Project Documentation +site_description: Documentation for the Template Python Project +repo_url: https://github.com/habedi/template-python-project +repo_name: habedi/template-python-project + +theme: + name: material + palette: + - media: "(prefers-color-scheme: light)" + scheme: default + toggle: + icon: material/brightness-7 + name: Switch to dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + toggle: + icon: material/brightness-4 + name: Switch to light mode + features: + - content.code.copy + - navigation.tabs + - navigation.top + - navigation.indexes + - navigation.expand + - content.code.select + - content.code.annotate + +plugins: + - search + - mkdocstrings: + handlers: + python: + options: + show_root_heading: true + show_source: true +nav: + - Home: index.md + +markdown_extensions: + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.inlinehilite + - pymdownx.snippets + - pymdownx.superfences + - admonition + - toc: + permalink: true diff --git a/models/README.md b/models/README.md new file mode 100644 index 0000000..4b87862 --- /dev/null +++ b/models/README.md @@ -0,0 +1,4 @@ +## Models Directory + +This directory contains machine learning models and other model-related files like model weights, configuration files, +etc. diff --git a/notebooks/template_notebook.ipynb b/notebooks/template_notebook.ipynb new file mode 100644 index 0000000..99fd945 --- /dev/null +++ b/notebooks/template_notebook.ipynb @@ -0,0 +1,205 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "6781b6d71f7e750", + "metadata": {}, + "source": "## Import Libraries" + }, + { + "cell_type": "code", + "id": "initial_id", + "metadata": { + "collapsed": true, + "ExecuteTime": { + "end_time": "2025-05-16T10:00:28.599842Z", + "start_time": "2025-05-16T10:00:27.829889Z" + } + }, + "source": [ + "import sys\n", + "from pathlib import Path\n", + "\n", + "import datasets as ds\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import polars as pl\n", + "import seaborn as sns\n", + "from icecream import ic" + ], + "outputs": [], + "execution_count": 1 + }, + { + "cell_type": "markdown", + "id": "1d7ac8b9f6c3a17f", + "metadata": {}, + "source": "## Settings" + }, + { + "cell_type": "code", + "id": "49187922af8dc248", + "metadata": { + "ExecuteTime": { + "end_time": "2025-05-16T10:00:28.686426Z", + "start_time": "2025-05-16T10:00:28.683789Z" + } + }, + "source": [ + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.max_rows\", None)\n", + "\n", + "rng = np.random.default_rng(42)\n", + "\n", + "pl.enable_string_cache()\n", + "pl.Config.set_decimal_separator(\",\")\n", + "\n", + "plt.style.use(\"bmh\")\n", + "plt.rcParams[\"figure.figsize\"] = (12, 6)\n", + "plt.rcParams[\"axes.titlesize\"] = 20\n", + "\n", + "sns.set_context(\"notebook\", font_scale=1.5)" + ], + "outputs": [], + "execution_count": 2 + }, + { + "cell_type": "markdown", + "id": "10bd4ebb7af0097d", + "metadata": {}, + "source": "## Constants" + }, + { + "cell_type": "code", + "id": "4d9d9f7052e11adc", + "metadata": { + "ExecuteTime": { + "end_time": "2025-05-16T10:00:28.693737Z", + "start_time": "2025-05-16T10:00:28.692221Z" + } + }, + "source": [ + "DATA_DIR = Path(\"../data\")\n", + "RAW_DATA_DIR = DATA_DIR / \"raw\"\n", + "PROCESSED_DATA_DIR = DATA_DIR / \"processed\"\n", + "OUTPUT_DATA_DIR = DATA_DIR / \"output\"\n", + "\n", + "MODEL_DIR = Path(\"../models\")" + ], + "outputs": [], + "execution_count": 3 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-05-16T10:00:28.739296Z", + "start_time": "2025-05-16T10:00:28.737652Z" + } + }, + "cell_type": "code", + "source": [ + "SRC_DIR = Path(\"../src\")\n", + "if str(SRC_DIR) not in sys.path:\n", + " sys.path.append(str(SRC_DIR))" + ], + "id": "cd198fd5d0ffae0e", + "outputs": [], + "execution_count": 4 + }, + { + "cell_type": "markdown", + "id": "e84dff469dd8e0a4", + "metadata": {}, + "source": "## Load Data" + }, + { + "cell_type": "code", + "id": "c4c3949840ca3096", + "metadata": { + "ExecuteTime": { + "end_time": "2025-05-16T10:00:31.274753Z", + "start_time": "2025-05-16T10:00:28.782567Z" + } + }, + "source": [ + "stack_exchange_dataset = ds.load_dataset(\"habedi/stack-exchange-dataset\", split=\"train\")" + ], + "outputs": [], + "execution_count": 5 + }, + { + "cell_type": "code", + "id": "8ed6c55b563de78c", + "metadata": { + "ExecuteTime": { + "end_time": "2025-05-16T10:00:31.378243Z", + "start_time": "2025-05-16T10:00:31.280646Z" + } + }, + "source": [ + "ic(stack_exchange_dataset)" + ], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ic| stack_exchange_dataset: Dataset({\n", + " features: ['id', 'title', 'body', 'tags', 'label'],\n", + " num_rows: 82158\n", + " })\n" + ] + }, + { + "data": { + "text/plain": [ + "Dataset({\n", + " features: ['id', 'title', 'body', 'tags', 'label'],\n", + " num_rows: 82158\n", + "})" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 6 + }, + { + "cell_type": "code", + "id": "3104efd43e7ffcf8", + "metadata": { + "ExecuteTime": { + "end_time": "2025-05-16T10:00:31.453820Z", + "start_time": "2025-05-16T10:00:31.452767Z" + } + }, + "source": [], + "outputs": [], + "execution_count": null + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/poetry.toml b/poetry.toml new file mode 100644 index 0000000..ab1033b --- /dev/null +++ b/poetry.toml @@ -0,0 +1,2 @@ +[virtualenvs] +in-project = true diff --git a/pyproject.toml b/pyproject.toml index dcf0cf6..474a51e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,83 +1,103 @@ -[tool.poetry] +[project] name = "template-python-project" version = "0.1.0" -description = "A template repository for data science and machine learning projects in Python." -authors = ["Hassan Abedi "] -maintainers = ["Hassan Abedi "] +description = "A template for data science and machine learning projects in Python" readme = "README.md" -repository = "https://github.com/habedi/template-python-project" -license = "MIT" -packages = [{ include = "src", from = "." }] - -[tool.poetry.dependencies] -python = "^3.10" +license = { text = "MIT" } +authors = [ + { name = "Hassan Abedi", email = "hassan.abedi.t@gmail.com" } +] -[tool.poetry.group.dev.dependencies] -poetry-dynamic-versioning = "^1.4.0" -pytest = "^8.0.1" -pytest-cov = "^6.0.0" -pytest-mock = "^3.14.0" -mypy = "^1.11.1" -ruff = "^0.9.3" +requires-python = ">=3.10,<4.0" +dependencies = [ + "pandas (>=2.2.3,<3.0.0)", + "numpy (>=2.2.5,<3.0.0)", + "seaborn (>=0.13.2,<0.14.0)", + "jupyter (>=1.1.1,<2.0.0)", + "kaggle (>=1.7.4.5,<2.0.0.0)", + "huggingface-hub (>=0.31.2,<0.32.0)", + "polars (>=1.29.0,<2.0.0)", + "scikit-learn (>=1.6.1,<2.0.0)", + "icecream (>=2.1.4,<3.0.0)", + "datasets (>=3.6.0,<4.0.0)", + "python-dotenv (>=1.1.0,<2.0.0)", +] -[tool.poetry.scripts] -cli_script = "src.cli:main" +[project.optional-dependencies] +dev = [ + "pytest>=8.0.1", + "pytest-cov>=6.0.0", + "pytest-mock>=3.14.0", + "pytest-asyncio (>=0.26.0,<0.27.0)", + "mypy>=1.11.1", + "ruff>=0.9.3", + "pre-commit (>=4.2.0,<5.0.0)", + "griffe (>=1.7.3,<2.0.0)", + "mkdocs (>=1.6.1,<2.0.0)", + "mkdocstrings-python (>=1.16.10,<2.0.0)", + "mkdocs-material (>=9.6.14,<10.0.0)", + "types-requests (>=2.32.0.20250515,<3.0.0.0)" +] -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" +[project.urls] +Repository = "https://github.com/habedi/template-python-project" +Documentation = "https://github.com/habedi/template-python-project/blob/main/docs/index.md" [tool.pytest.ini_options] -pythonpath = [".", "src", 'bin', 'notebooks'] +pythonpath = ["src"] +testpaths = ["tests"] +addopts = [ + "--tb=short", + #"--disable-warnings", + "--cov=src", + "--cov-branch", + "--cov-report=term", + "--cov-report=xml", + "-rs" +] +asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "function" +asyncio_default_test_loop_scope = "function" + +[tool.coverage.run] +branch = true +parallel = true +source = ["src"] +omit = ["tests/*"] + +[tool.coverage.report] +show_missing = false +skip_empty = true +precision = 2 [tool.mypy] python_version = "3.10" ignore_missing_imports = true +disallow_untyped_defs = true disallow_untyped_calls = true +disallow_incomplete_defs = true +check_untyped_defs = true +warn_return_any = true strict_optional = true warn_redundant_casts = true +exclude = "^(scripts/|tests/)" -[tool.poetry-dynamic-versioning] -enable = true -vcs = "git" -versioning = "semver" # Semantic Versioning - -# Ruff configuration [tool.ruff] exclude = [ - ".bzr", - ".direnv", - ".eggs", - ".git", - ".git-rewrite", - ".hg", - ".mypy_cache", - ".nox", - ".pants.d", - ".pytype", - ".ruff_cache", - ".svn", - ".tox", - ".venv", - "__pypackages__", - "_build", - "buck-out", - "build", - "dist", - "node_modules", - "venv" + ".bzr", ".direnv", ".eggs", ".git", ".git-rewrite", ".hg", ".mypy_cache", + ".nox", ".pants.d", ".pytype", ".ruff_cache", ".svn", ".tox", ".venv", + "__pypackages__", "_build", "buck-out", "build", "dist", "node_modules", + "venv", "tests" ] line-length = 100 indent-width = 4 -src = ["src", "tests", "bin", "notebooks"] +src = ["src"] target-version = "py310" +unsafe-fixes = false [tool.ruff.lint] -select = ["ANN", "D", "E", "F", "I"] -ignore = [ - # Ignore missing docstrings - "D100", "D101", "D102", "D103", "D104", "D105", "D106", "D107", -] +select = ["ANN", "E", "F", "I", "W", "B", "RUF", "SIM", "C90"] +ignore = ["D100", "D101", "D102", "D103", "D104", "D105", "D106", "D107"] fixable = ["ALL"] unfixable = [] dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" @@ -92,4 +112,4 @@ line-ending = "auto" convention = "google" [tool.ruff.lint.per-file-ignores] -"tests/**/*.py" = [] +"tests/**/*.py" = ["ANN001", "ANN201"] diff --git a/notebooks/.gitkeep b/scripts/.gitkeep similarity index 100% rename from notebooks/.gitkeep rename to scripts/.gitkeep diff --git a/src/__init__.py b/src/pipeline_1/__init__.py similarity index 100% rename from src/__init__.py rename to src/pipeline_1/__init__.py diff --git a/tests/.gitkeep b/src/pipeline_1/infer.py similarity index 100% rename from tests/.gitkeep rename to src/pipeline_1/infer.py diff --git a/src/pipeline_1/model/__init__.py b/src/pipeline_1/model/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/pipeline_1/model/config.py b/src/pipeline_1/model/config.py new file mode 100644 index 0000000..e69de29 diff --git a/src/pipeline_1/model/metrics.py b/src/pipeline_1/model/metrics.py new file mode 100644 index 0000000..e69de29 diff --git a/src/pipeline_1/model/model.py b/src/pipeline_1/model/model.py new file mode 100644 index 0000000..e69de29 diff --git a/src/pipeline_1/model/utils.py b/src/pipeline_1/model/utils.py new file mode 100644 index 0000000..e69de29 diff --git a/src/pipeline_1/train.py b/src/pipeline_1/train.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..e69de29