From 64a47ce6195c5daa61d34f10db15dbd3ae52b368 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Thu, 25 Jun 2026 14:54:18 +0000 Subject: [PATCH] docs: overhaul documentation and introduce issue/PR templates This commit adds strict intent-revealing Rustdoc comments across all core modules (main, object, tree, commit, refs, config) to reflect the current codebase state. It also introduces structured GitHub issue templates (bug report, feature request) and refines the PR template to guarantee documentation integrity. Furthermore, it creates a new architectural overview markdown file in the docs/ folder and updates the CHANGELOG accordingly. Co-authored-by: fadyphil <227748259+fadyphil@users.noreply.github.com> --- .github/ISSUE_TEMPLATE/bug_report.md | 31 ++++++++++++++++++ .github/ISSUE_TEMPLATE/feature_request.md | 20 ++++++++++++ .github/PULL_REQUEST_TEMPLATE.md | 20 +++++++++--- CHANGELOG.md | 6 ++++ README.md | 1 + docs/architecture/overview.md | 34 ++++++++++++++++++++ src/commit.rs | 21 ++++++++++++ src/config.rs | 15 +++++++++ src/main.rs | 39 +++++++++++++++++++++-- src/object.rs | 23 +++++++++++++ src/refs.rs | 18 +++++++++++ src/tree.rs | 18 +++++++++++ 12 files changed, 238 insertions(+), 8 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 docs/architecture/overview.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..8ed364f --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,31 @@ +--- +name: Bug Report +about: Create a report to help us improve git-rs +title: "[BUG] " +labels: bug +assignees: '' + +--- + +## Describe the Bug +A clear and concise description of what the bug is. + +## To Reproduce +Steps to reproduce the behavior: +1. Run command '...' +2. Input '...' +3. See error + +## Expected Behavior +A clear and concise description of what you expected to happen. + +## Actual Behavior +A clear and concise description of what actually happened. + +## Environment (please complete the following information): + - OS: [e.g. Ubuntu, macOS, Windows] + - Rust Version: [e.g. 1.70.0] + - `git-rs` Version/Commit: [e.g. 0.3.0 or commit hash] + +## Additional Context +Add any other context about the problem here (e.g. stack traces, logs). diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..c09a7ce --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature Request +about: Suggest an idea for this project +title: "[FEATURE] " +labels: enhancement +assignees: '' + +--- + +## Is your feature request related to a problem? Please describe. +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +## Describe the solution you'd like +A clear and concise description of what you want to happen. Consider referencing official Git behavior. + +## Describe alternatives you've considered +A clear and concise description of any alternative solutions or features you've considered. + +## Additional context +Add any other context or screenshots about the feature request here. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index aa0ddba..547943f 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,6 +1,8 @@ # Description -Please include a summary of the change and which issue is fixed. +Please include a summary of the change and which issue is fixed. If this PR introduces a new architecture or modifies a critical storage mechanism, explain the *why* behind your approach. + +Fixes # (issue) ## Type of Change @@ -8,10 +10,18 @@ Please include a summary of the change and which issue is fixed. - [ ] New feature (non-breaking change which adds functionality) - [ ] Refactor (memory/safety optimization) - [ ] Documentation update +- [ ] Performance improvement + +## Testing and Verification + +Please describe the tests that you ran to verify your changes. +- [ ] I have verified this change operates correctly alongside the official `git` binary (e.g., `git cat-file -p `). +- [ ] I have added unit/integration tests that prove my fix is effective or my feature works. +- [ ] `cargo test` passes locally. ## Checklist -- [ ] My code follows the style guidelines of this project -- [ ] I have run `cargo fmt` -- [ ] I have run `cargo clippy -- -D warnings` and fixed all warnings -- [ ] I have added/updated documentation if necessary +- [ ] My code follows the style guidelines of this project (`cargo fmt`). +- [ ] I have run `cargo clippy -- -D warnings` and fixed all warnings. +- [ ] I have added/updated strictly formatted Rustdoc comments (`///` or `//!`) detailing the *why* for any new/modified logic. +- [ ] I have updated the `CHANGELOG.md` with my changes (if applicable). diff --git a/CHANGELOG.md b/CHANGELOG.md index 81aac42..93d0a82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.4.0] - 2026-06-25 + +### Added +- **Deep Strict Documentation:** Added comprehensive, intent-revealing Rustdoc comments (`///` and `//!`) across all modules (`main.rs`, `object.rs`, `tree.rs`, `commit.rs`, `refs.rs`, `config.rs`). The documentation directly reflects the current codebase state, clearly defining architectural domain logic and providing context drawn from the project's history. +- **GitHub Issue & PR Templates:** Created strict template structures (`.github/ISSUE_TEMPLATE/bug_report.md`, `.github/ISSUE_TEMPLATE/feature_request.md`) and refined `.github/PULL_REQUEST_TEMPLATE.md` to ensure better integrity and structured code review processes. + ## [0.3.0] - 2026-06-25 ### Added diff --git a/README.md b/README.md index e0b2a67..4009b5f 100644 --- a/README.md +++ b/README.md @@ -200,6 +200,7 @@ If official Git can read the database, the binary format is mathematically corre | [**Commit Objects & the DAG**](docs/04_commit_object_and_commit_tree.md) | **4** | **Commit object format, DAG structure, parent references, and serialization** | | [**DAG & Commit Serialization**](docs/05_dag_and_commit_serialization.md) | **4** | **Deep dive: DAG mathematics, commit serialization pipeline, content deduplication** | | [**Porcelain Commit & Refs**](docs/06_porcelain_commit_and_refs.md) | **5** | **`commit` workflow, `HEAD` resolution, branch pointer mutation, plumbing vs porcelain** | +| [**Architectural Overview**](docs/architecture/overview.md) | **All** | **High-level summary of domain logic and core modules within the Rust codebase.** | --- diff --git a/docs/architecture/overview.md b/docs/architecture/overview.md new file mode 100644 index 0000000..152d3ae --- /dev/null +++ b/docs/architecture/overview.md @@ -0,0 +1,34 @@ +# Git-rs Architectural Overview + +## Introduction +`git-rs` is a from-scratch implementation of Git's core object storage engine in Rust. This document outlines the high-level architecture and the specific domain logic that makes up the repository. + +## Core Modules + +1. **CLI Dispatcher (`src/main.rs`)** + The entry point of the application. It utilizes `clap` to parse command-line arguments and route them to the appropriate core logic functions. It handles the bridge between the user and the system's underlying capabilities, functioning as both a plumbing and porcelain interface. + +2. **Object Database Engine (`src/object.rs`)** + This module handles the core content-addressable storage mechanics. Git relies on a strict, continuous stream of bytes. This engine ensures the bytes are formatted correctly: ` \0`, and handles hashing (SHA-1) and compression (Zlib) before writing objects to `.git/objects/XX/YYY...`. + +3. **Tree Serialization (`src/tree.rs`)** + This module implements the recursive post-order depth-first search required to capture a directory's state. It reads file paths, determines if an entry is a file (blob) or a directory (tree), and recursively bubbles up the computed hashes to build the full tree structure, writing each part to the object database. + +4. **Commit Creation (`src/commit.rs`)** + Commits bind a tree hash to metadata (author, committer, timestamp, message, and parent commit). This module handles formatting this ASCII data correctly and storing it as a commit object, acting as the nodes in the Git Directed Acyclic Graph (DAG). + +5. **References Management (`src/refs.rs`)** + Branches and tags in Git are simply pointers to commit hashes. This module provides the tools to read the `HEAD` pointer, resolve branches, and safely mutate references when new commits are created. + +6. **Configuration Parsing (`src/config.rs`)** + To support local development identity, this module reads the repository-local `.git/config` file. It relies on the `serde` and `toml` crates to deserialize author names and emails used in the commit generation process. + +## Data Flow Pipeline +1. **User Action:** A user initiates a command (e.g., `git-rs commit -m "Msg"`). +2. **Snapshot:** `tree.rs` walks the directory, compressing and hashing each file into `object.rs`, and finally returns a root tree hash. +3. **Reference Lookup:** `refs.rs` resolves `HEAD` to find the parent commit hash. +4. **Metadata Assembly:** `commit.rs` pulls author info via `config.rs`, creates the commit ASCII payload, and writes it using `object.rs`. +5. **Reference Update:** `refs.rs` overwrites the active branch file with the new commit hash. + +## Integrity Guarantee +If the official Git binary can parse the `.git/objects` and `.git/refs` generated by this implementation, the architecture is structurally sound and mathematically valid. \ No newline at end of file diff --git a/src/commit.rs b/src/commit.rs index 215146a..4cf7a5c 100644 --- a/src/commit.rs +++ b/src/commit.rs @@ -1,3 +1,9 @@ +//! # Git Commit Creation +//! +//! This module handles the construction and serialization of Git commit objects. +//! A commit object links a tree (the snapshot) with metadata such as the author, +//! committer, timestamp, and an optional parent commit to form the commit history (DAG). + use crate::{config::get_author, object::write_object}; use std::{ io::Write, @@ -6,6 +12,7 @@ use std::{ }; use thiserror::Error; +/// Errors that can occur during commit creation and serialization. #[derive(Debug, Error)] pub enum CommitError { #[error("I/O error: {0}")] @@ -17,6 +24,8 @@ pub enum CommitError { #[error("Object storage error: {0}")] Object(#[from] crate::object::ObjectError), } + +/// Represents a Git author or committer signature. pub struct Signature { name: String, email: String, @@ -24,6 +33,7 @@ pub struct Signature { timezone: String, } +/// Represents a Git commit object and its metadata. pub struct Commit { tree: String, author: Signature, @@ -32,11 +42,16 @@ pub struct Commit { parent: Option, } +/// Retrieves the current system time as a UNIX timestamp (seconds since epoch). fn get_timestamp() -> Result { let timestamp = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs(); Ok(timestamp) } +/// Constructs a `Commit` struct with the provided tree, message, and parent. +/// +/// Author and committer information are read from the repository's `.git/config` +/// file, and the current system time is used for the timestamps. fn create_commit( tree_hash: &str, commit_message: &str, @@ -68,6 +83,8 @@ fn create_commit( Ok(commit) } +/// Serializes a `Commit` struct into the official Git ASCII format and writes +/// it to the object database. Returns the SHA-1 hash of the commit object. fn write_commit(commit: &Commit, dir: &Path) -> Result { let mut serialized = Vec::new(); writeln!(&mut serialized, "tree {}", commit.tree)?; @@ -92,6 +109,10 @@ fn write_commit(commit: &Commit, dir: &Path) -> Result { Ok(oid) } +/// High-level function to create and write a commit object in one step. +/// +/// This serves as the primary entry point for commit creation from the CLI dispatcher. +/// Returns the 40-character hex SHA-1 hash of the new commit object. pub fn write_commit_object( tree_hash: &str, commit_message: &str, diff --git a/src/config.rs b/src/config.rs index 4234311..0d6b32d 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,15 +1,30 @@ +//! # Git Configuration Parsing +//! +//! This module handles the parsing of `.git/config` files to extract repository-local +//! settings, primarily focusing on user identity (name and email) for commits. + use std::path::Path; +/// Represents the top-level structure of a Git configuration file. #[derive(serde::Deserialize)] pub struct GitConfig { + /// The `[user]` section of the config. pub user: UserConfig, } + +/// Represents the `[user]` section containing identity information. #[derive(serde::Deserialize)] pub struct UserConfig { + /// The user's name (e.g., "John Doe"). pub name: String, + /// The user's email address (e.g., "john@example.com"). pub email: String, } +/// Reads the repository's `.git/config` file and extracts the author's name and email. +/// +/// If the config file is missing or malformed, it falls back to a default +/// "unknown_user" and "unknown@localhost" to ensure commit creation does not fail. pub fn get_author(dir: &Path) -> (String, String) { let unknown_user = UserConfig { name: "unknown_user".to_string(), diff --git a/src/main.rs b/src/main.rs index e0e0333..ab2a43b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,12 @@ +//! # Git-rs CLI Dispatcher +//! +//! This module contains the main entry point for the `git-rs` command-line interface. +//! It uses the `clap` crate to define, parse, and route CLI arguments to the appropriate +//! plumbing or porcelain commands (e.g., `init`, `hash-object`, `commit`). +//! +//! The `git-rs` project is a from-scratch implementation of Git's core object +//! storage engine, intended for educational purposes and systems programming practice. + mod commit; mod config; mod object; @@ -19,16 +28,19 @@ use crate::tree::write_tree; use clap::{Parser, Subcommand}; +/// The root command-line interface structure parsed by `clap`. #[derive(Parser)] #[command( name = "git-rs", about = "A from-scratch implementation of Git's core object storage engine in Rust." )] struct Cli { + /// The specific subcommand to execute. #[command(subcommand)] command: Commands, } +/// The available Git commands supported by `git-rs`. #[derive(Subcommand)] enum Commands { /// Initialize a new git-rs repository @@ -72,6 +84,10 @@ enum Commands { }, } +/// The main entry point of the `git-rs` application. +/// +/// It initializes the CLI parser, determines the current working directory, +/// and delegates execution to the corresponding command handler function. fn main() -> anyhow::Result<()> { // The magic happens here! clap reads env::args(), validates everything, // and populates the Cli struct. @@ -114,8 +130,10 @@ fn main() -> anyhow::Result<()> { } } -// DELETED: expect_args and run functions are no longer needed! - +/// Initializes a new, empty Git repository in the current directory. +/// +/// Creates the `.git` directory structure, including `objects/`, `refs/`, +/// a default `HEAD` pointer, and a default `.git/config` if one does not exist. fn cmd_init(repo_dir: &Path) -> anyhow::Result<()> { let git_dir = repo_dir.join(".git"); fs::create_dir_all(git_dir.join("objects/info"))?; @@ -134,6 +152,9 @@ fn cmd_init(repo_dir: &Path) -> anyhow::Result<()> { Ok(()) } +/// Reads an object from the Git database and outputs its content, type, or size. +/// +/// Exactly one of `pretty`, `show_type`, or `show_size` must be true. fn cmd_cat_file( pretty: bool, show_type: bool, @@ -155,6 +176,7 @@ fn cmd_cat_file( Ok(()) } +/// Computes the SHA-1 hash of a file's content and optionally writes it to the database as a blob. fn cmd_hash_object(file: &str, write: bool, dir: &Path) -> anyhow::Result<()> { let content = fs::read(file)?; if write { @@ -166,12 +188,18 @@ fn cmd_hash_object(file: &str, write: bool, dir: &Path) -> anyhow::Result<()> { Ok(()) } +/// Recursively snapshots the working directory into a tree object. +/// +/// Returns the SHA-1 hash of the resulting root tree object. fn cmd_write_tree(path: &Path, dir: &Path) -> anyhow::Result { let tree_hash = write_tree(path, dir).context("Failed to write tree")?; Ok(tree_hash) } -// FIXED: Removed the `flag` parameter and the `match flag` block +/// Low-level plumbing command to create a commit object. +/// +/// Requires an existing tree hash, a commit message, and an optional parent commit hash. +/// Returns the SHA-1 hash of the newly created commit object. fn cmd_write_commit( tree_hash: &str, commit_message: &str, @@ -183,6 +211,11 @@ fn cmd_write_commit( Ok(commit_hash) } +/// High-level porcelain command to record changes to the repository. +/// +/// Snapshots the current working directory, creates a commit object referencing +/// the snapshot, sets the parent to the current HEAD, and updates HEAD to point +/// to the new commit. Returns the SHA-1 hash of the newly created commit. fn cmd_commit(commit_message: &str, dir: &Path) -> anyhow::Result { let current_path = Path::new("."); let tree_hash = diff --git a/src/object.rs b/src/object.rs index 8e8d2c1..22ad7d2 100644 --- a/src/object.rs +++ b/src/object.rs @@ -1,3 +1,9 @@ +//! # Git Object Database Engine +//! +//! This module handles the core content-addressable storage mechanics of Git. +//! It provides functionality for hashing, compressing (Zlib), serializing, +//! reading, and writing raw Git objects (blobs, trees, commits). + use flate2::{read::ZlibDecoder, write::ZlibEncoder, Compression}; use sha1::{Digest, Sha1}; use std::{ @@ -7,6 +13,7 @@ use std::{ }; use thiserror::Error; +/// Errors that can occur during object serialization, deserialization, or storage. #[derive(Debug, Error)] pub enum ObjectError { // --- STANDARD LIBRARY ERRORS (Using #[from]) --- @@ -40,6 +47,9 @@ pub enum ObjectError { InvalidObjectPath, } +/// Formats a raw payload into a valid Git object by prepending the header. +/// +/// The standard Git object format is: ` \0` fn create_object(kind: &str, content: &[u8]) -> Result, ObjectError> { //here we create the vector that will hold the object which we will return let mut obj = Vec::new(); @@ -49,6 +59,8 @@ fn create_object(kind: &str, content: &[u8]) -> Result, ObjectError> { Ok(obj) } +/// Computes the cryptographic SHA-1 hash of an uncompressed Git object byte slice. +/// Returns the hash as a 40-character hex string. fn hash_object(object: &[u8]) -> String { let mut hasher = Sha1::new(); hasher.update(object); @@ -57,6 +69,7 @@ fn hash_object(object: &[u8]) -> String { hash_hex } +/// Compresses an uncompressed Git object byte slice using the Zlib algorithm. fn compress_object(object: &[u8]) -> Result, ObjectError> { let mut compressor = ZlibEncoder::new(Vec::new(), Compression::default()); compressor.write_all(object)?; @@ -64,6 +77,10 @@ fn compress_object(object: &[u8]) -> Result, ObjectError> { Ok(compressed?) } +/// Locates, decompresses, and parses a Git object from the `.git/objects` directory. +/// +/// Returns a tuple containing the object type as a `String` (e.g., "blob", "tree") +/// and its raw binary content payload. pub fn read_object(hash: &str, dir: &Path) -> Result<(String, Vec), ObjectError> { // 1. Resolve the filesystem path for this hash let path = object_path(hash, dir)?; @@ -105,6 +122,9 @@ pub fn read_object(hash: &str, dir: &Path) -> Result<(String, Vec), ObjectEr Ok((kind.to_string(), content)) } +/// Resolves the filesystem path for an object given its 40-character hex SHA-1 hash. +/// +/// Git stores objects in `.git/objects/XX/YYY...` where `XX` is the first 2 hex chars. fn object_path(hash: &str, repo_dir: &Path) -> Result { if hash.len() != 40 { return Err(ObjectError::InvalidHashLength); @@ -116,6 +136,9 @@ fn object_path(hash: &str, repo_dir: &Path) -> Result { Ok(path) } +/// Creates, hashes, compresses, and writes a new object to the `.git/objects` directory. +/// +/// Returns the calculated 40-character SHA-1 hex hash of the newly written object. pub fn write_object(kind: &str, content: &[u8], dir: &Path) -> Result { let object = create_object(kind, content)?; let hashed_object = hash_object(&object); diff --git a/src/refs.rs b/src/refs.rs index 2abbe22..9514518 100644 --- a/src/refs.rs +++ b/src/refs.rs @@ -1,6 +1,13 @@ +//! # Git References Management +//! +//! This module provides functions to read and mutate Git references (branches, tags) +//! and the `HEAD` pointer. References in Git are simply text files containing a +//! 40-character commit hash or a symbolic reference to another ref. + use std::{fs, path::Path}; use thiserror::Error; +/// Errors that can occur when reading or updating Git references. #[derive(Debug, Error)] pub enum RefsError { #[error("I/O error {0}")] @@ -12,6 +19,11 @@ pub enum RefsError { DetachedHead, } +/// Reads the `.git/HEAD` file and returns the symbolic reference it points to. +/// +/// For example, if HEAD contains `ref: refs/heads/main`, this function +/// returns `"refs/heads/main"`. Detached HEAD states (where HEAD contains +/// a raw commit hash) are currently not supported and will return an error. pub fn read_head(dir: &Path) -> Result { let path = dir.join(".git").join("HEAD"); let contents = fs::read_to_string(path)?; @@ -22,6 +34,8 @@ pub fn read_head(dir: &Path) -> Result { Ok(clean_path.to_string()) } +/// Reads a specific reference file (e.g., `.git/refs/heads/main`) and returns +/// the 40-character commit hash it contains, if the file exists. pub fn read_ref(path: &str, dir: &Path) -> Result, RefsError> { let path = dir.join(".git").join(path); if !path.exists() { @@ -32,6 +46,10 @@ pub fn read_ref(path: &str, dir: &Path) -> Result, RefsError> { Ok(Some(cleaned.to_string())) } +/// Updates the current branch reference to point to a new commit hash. +/// +/// This function first resolves `HEAD` to find the active branch reference, +/// then overwrites that reference file with the provided `new_head_commit_hash`. pub fn update_current_ref(new_head_commit_hash: &str, dir: &Path) -> Result<(), RefsError> { let ref_path = read_head(dir)?; let head_file_path = dir.join(".git").join(ref_path); diff --git a/src/tree.rs b/src/tree.rs index 9bf4d24..a2dfab6 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -1,8 +1,16 @@ +//! # Git Tree Serialization +//! +//! This module handles the recursive traversal of directories and the creation +//! of Git tree objects. Tree objects represent the state of a directory at a +//! specific point in time, storing the names, modes, and SHA-1 hashes of its +//! contents (files and subdirectories). + use std::{fs, io::Write, path::Path}; use crate::object::write_object; use thiserror::Error; +/// Errors that can occur during tree serialization. #[derive(Debug, Error)] pub enum TreeError { #[error("I/O error: {0}")] @@ -14,12 +22,22 @@ pub enum TreeError { #[error("Hex decoding error: {0}")] Hex(#[from] hex::FromHexError), // Wraps the hex crate error } + +/// Represents a single entry (file or directory) within a Git tree. pub struct TreeEntry { + /// The file mode (e.g., "100644" for files, "040000" for directories). pub mode: String, + /// The name of the file or directory. pub name: String, + /// The 40-character hex SHA-1 hash of the object. pub hash: String, } +/// Recursively traverses a directory, hashes its contents, and writes tree objects to the database. +/// +/// This function uses a post-order depth-first search approach, ensuring that child +/// objects (blobs and sub-trees) are written and hashed before their parent tree. +/// It returns the 40-character hex SHA-1 hash of the root tree object. pub fn write_tree(path: &Path, dir: &Path) -> Result { let entries = fs::read_dir(path)?; let mut array_of_entries: Vec = Vec::new();