diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bcfd3a1..2ae4f16 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,20 +33,32 @@ jobs: run: cargo fmt --check - name: Clippy (WASM target) - run: cargo clippy --target wasm32-wasip2 -- -D warnings + run: cargo clippy --target wasm32-wasip2 --workspace -- -D warnings - name: Unit tests (host target) - run: cargo test --target x86_64-unknown-linux-gnu + run: cargo test --target x86_64-unknown-linux-gnu --workspace - - name: Build WASM component - run: cargo build --release --target wasm32-wasip2 + - name: Build WASM plugins + run: cargo build --release --target wasm32-wasip2 --workspace - - name: Upload plugin artifact + - name: Upload Unity plugin uses: actions/upload-artifact@v4 with: - name: plugin-wasm + name: unity-format-plugin path: target/wasm32-wasip2/release/unity_format_plugin.wasm + - name: Upload RPM plugin + uses: actions/upload-artifact@v4 + with: + name: rpm-format-plugin + path: target/wasm32-wasip2/release/rpm_format_plugin.wasm + + - name: Upload PyPI plugin + uses: actions/upload-artifact@v4 + with: + name: pypi-format-plugin + path: target/wasm32-wasip2/release/pypi_format_plugin.wasm + - name: SonarCloud Scan uses: SonarSource/sonarqube-scan-action@v5 env: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 950cb38..eb1b4c5 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -19,39 +19,66 @@ jobs: with: targets: wasm32-wasip2 - - name: Build WASM component - run: cargo build --release --target wasm32-wasip2 + - name: Build all WASM plugins + run: cargo build --release --target wasm32-wasip2 --workspace - - name: Prepare release archive + - name: Package Unity plugin run: | - mkdir -p release-staging - cp target/wasm32-wasip2/release/unity_format_plugin.wasm release-staging/plugin.wasm - cp plugin.toml release-staging/ - cp README.md release-staging/ - cp LICENSE release-staging/ - cp -r wit release-staging/ - cd release-staging && zip -r ../unity-format-plugin-${{ github.ref_name }}.zip . + mkdir -p staging/unity-format + cp target/wasm32-wasip2/release/unity_format_plugin.wasm staging/unity-format/plugin.wasm + cp plugins/unity-format/plugin.toml staging/unity-format/ + cp README.md LICENSE staging/unity-format/ + cp -r wit staging/unity-format/ + cd staging/unity-format && zip -r ../../unity-format-plugin-${{ github.ref_name }}.zip . + + - name: Package RPM plugin + run: | + mkdir -p staging/rpm-format + cp target/wasm32-wasip2/release/rpm_format_plugin.wasm staging/rpm-format/plugin.wasm + cp plugins/rpm-format/plugin.toml staging/rpm-format/ + cp README.md LICENSE staging/rpm-format/ + cp -r wit staging/rpm-format/ + cd staging/rpm-format && zip -r ../../rpm-format-plugin-${{ github.ref_name }}.zip . + + - name: Package PyPI plugin + run: | + mkdir -p staging/pypi-format + cp target/wasm32-wasip2/release/pypi_format_plugin.wasm staging/pypi-format/plugin.wasm + cp plugins/pypi-format/plugin.toml staging/pypi-format/ + cp README.md LICENSE staging/pypi-format/ + cp -r wit staging/pypi-format/ + cd staging/pypi-format && zip -r ../../pypi-format-plugin-${{ github.ref_name }}.zip . - name: Create GitHub Release uses: softprops/action-gh-release@v2 with: - files: unity-format-plugin-${{ github.ref_name }}.zip + files: | + unity-format-plugin-${{ github.ref_name }}.zip + rpm-format-plugin-${{ github.ref_name }}.zip + pypi-format-plugin-${{ github.ref_name }}.zip generate_release_notes: true body: | ## Install into Artifact Keeper - **Option A - ZIP upload (easiest):** - Download the `.zip` below and upload it: + Download a plugin ZIP below and upload it: ```bash curl -X POST https://your-registry/api/v1/plugins/install/zip \ -H "Authorization: Bearer $TOKEN" \ - -F "file=@unity-format-plugin-${{ github.ref_name }}.zip" + -F "file=@-${{ github.ref_name }}.zip" ``` - **Option B - Git install:** + Or install directly from this Git repo: ```bash curl -X POST https://your-registry/api/v1/plugins/install/git \ -H "Authorization: Bearer $TOKEN" \ -H "Content-Type: application/json" \ -d '{"url": "https://github.com/artifact-keeper/artifact-keeper-example-plugin.git", "ref": "${{ github.ref_name }}"}' ``` + + ## Included Plugins + + | Plugin | Format Key | Description | + |--------|-----------|-------------| + | unity-format | `unity` | Unity .unitypackage handler | + | rpm-format | `rpm` | RPM package handler | + | pypi-format | `pypi` | Python wheel/sdist handler | diff --git a/Cargo.toml b/Cargo.toml index 6e510df..d197159 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,24 +1,3 @@ -[package] -name = "unity-format-plugin" -version = "0.1.0" -edition = "2021" -description = "Example Artifact Keeper plugin - Unity .unitypackage format handler" -license = "MIT" -authors = ["Artifact Keeper Team"] -repository = "https://github.com/artifact-keeper/artifact-keeper-example-plugin" - -[lib] -crate-type = ["cdylib"] - -[dependencies] -wit-bindgen = "0.36" -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" - -[package.metadata.component] -package = "artifact-keeper:format" - -[package.metadata.component.target] -path = "wit" - -[package.metadata.component.dependencies] +[workspace] +members = ["plugins/*"] +resolver = "2" diff --git a/README.md b/README.md index 9eef460..42782d2 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,16 @@ -# Artifact Keeper Example Plugin +# Artifact Keeper Example Plugins -A fully working example of a custom format handler plugin for [Artifact Keeper](https://github.com/artifact-keeper/artifact-keeper). This plugin handles **Unity `.unitypackage`** files (gzipped tarballs), demonstrating real-world format validation, metadata extraction, and index generation. +A collection of working example plugins for [Artifact Keeper](https://github.com/artifact-keeper/artifact-keeper). Each plugin implements a custom format handler using the WASM Component Model and the `artifact-keeper:format@1.0.0` WIT contract. -Use this repo as a starting point for building your own plugins. Fork it, change the format key, and implement your logic. +Use these as starting points for building your own plugins. Fork, change the format key, and implement your logic. -## What this plugin does +## Included plugins -| Capability | Description | -|------------|-------------| -| **Validate** | Checks gzip magic bytes and correct file extension | -| **Parse metadata** | Extracts version from path/filename, detects content type | -| **Generate index** | Creates a `unity-index.json` listing all packages in a repository | +| Plugin | Format Key | What it demonstrates | +|--------|-----------|---------------------| +| [Unity](plugins/unity-format/) | `unity` | Gzip magic byte validation, path-based version extraction, JSON index | +| [RPM](plugins/rpm-format/) | `rpm` | Binary format validation (RPM lead magic), right-to-left filename parsing, structured metadata | +| [PyPI](plugins/pypi-format/) | `pypi` | PEP 427 wheel parsing, PEP 503 name normalization, HTML + JSON index generation | ## Prerequisites @@ -20,14 +20,16 @@ Use this repo as a starting point for building your own plugins. Fork it, change ## Build ```bash -# Clone this repo git clone https://github.com/artifact-keeper/artifact-keeper-example-plugin.git cd artifact-keeper-example-plugin -# Build the WASM component +# Build all plugins cargo build --release -# Output: target/wasm32-wasip2/release/unity_format_plugin.wasm +# Build a specific plugin +cargo build --release -p rpm-format-plugin + +# Output: target/wasm32-wasip2/release/.wasm ``` ## Test @@ -35,7 +37,11 @@ cargo build --release Unit tests run on the host target (not WASM): ```bash -cargo test --target $(rustc -vV | grep host | awk '{print $2}') +# All plugins +cargo test --target $(rustc -vV | grep host | awk '{print $2}') --workspace + +# Single plugin +cargo test --target $(rustc -vV | grep host | awk '{print $2}') -p pypi-format-plugin ``` ## Install into Artifact Keeper @@ -54,12 +60,12 @@ curl -X POST https://your-registry/api/v1/plugins/install/git \ ### From ZIP (release artifact) -Download the ZIP from the [Releases](https://github.com/artifact-keeper/artifact-keeper-example-plugin/releases) page, then: +Download a plugin ZIP from the [Releases](https://github.com/artifact-keeper/artifact-keeper-example-plugin/releases) page, then: ```bash curl -X POST https://your-registry/api/v1/plugins/install/zip \ -H "Authorization: Bearer $TOKEN" \ - -F "file=@unity-format-plugin-v0.1.0.zip" + -F "file=@rpm-format-plugin-v0.1.0.zip" ``` ### From local path @@ -73,7 +79,7 @@ curl -X POST https://your-registry/api/v1/plugins/install/local \ ## Create your own plugin -1. **Fork this repo** or use it as a template +1. **Copy one of the example plugins** as a starting point (the Unity plugin is the simplest) 2. Update `plugin.toml` with your format key, extensions, and description 3. Implement the four functions in `src/lib.rs`: - `format_key()` -- return your unique format identifier @@ -87,18 +93,29 @@ curl -X POST https://your-registry/api/v1/plugins/install/local \ ``` . -├── .cargo/config.toml # Default WASM target -├── .github/workflows/ -│ ├── ci.yml # Lint + test + build on push/PR -│ └── release.yml # Build + package + GitHub Release on tag -├── src/lib.rs # Plugin implementation -├── wit/format-plugin.wit # WIT contract (from Artifact Keeper) -├── plugin.toml # Plugin manifest -├── Cargo.toml # Rust project config -└── rust-toolchain.toml # Rust toolchain + WASM target +├── Cargo.toml # Workspace root +├── .cargo/config.toml # Default WASM target (wasm32-wasip2) +├── rust-toolchain.toml # Rust stable + WASM target +├── wit/format-plugin.wit # Shared WIT contract +├── plugins/ +│ ├── unity-format/ # Unity .unitypackage handler +│ │ ├── Cargo.toml +│ │ ├── plugin.toml +│ │ └── src/lib.rs +│ ├── rpm-format/ # RPM package handler +│ │ ├── Cargo.toml +│ │ ├── plugin.toml +│ │ └── src/lib.rs +│ └── pypi-format/ # Python wheel/sdist handler +│ ├── Cargo.toml +│ ├── plugin.toml +│ └── src/lib.rs +└── .github/workflows/ + ├── ci.yml # Lint + test + build on push/PR + └── release.yml # Build + package + GitHub Release on tag ``` -## WIT Interface +## WIT interface Plugins implement the `artifact-keeper:format@1.0.0` interface: diff --git a/docs/TEST_PLAN.md b/docs/TEST_PLAN.md index 3d30c63..74e9c47 100644 --- a/docs/TEST_PLAN.md +++ b/docs/TEST_PLAN.md @@ -2,42 +2,70 @@ ## Overview -The artifact-keeper example plugin is a Rust WASM plugin template using wit-bindgen. It compiles to wasm32-wasip2 and implements the FormatHandler WIT contract. +The artifact-keeper example plugins are Rust WASM plugin templates using wit-bindgen. They compile to wasm32-wasip2 and implement the FormatHandler WIT contract. The workspace contains three plugins: Unity, RPM, and PyPI. ## Test Inventory | Test Type | Framework | Count | CI Job | Status | |-----------|-----------|-------|--------|--------| -| Check | cargo check | Full | `check` | Active | | Format | cargo fmt | Full | CI | Active | | Lint | cargo clippy | Full | CI | Active | -| Unit | cargo test | Minimal | `test` | Active | -| WASM build | cargo build --release | Full | `build` | Active | +| Unit - Unity | cargo test | 12 | CI | Active | +| Unit - RPM | cargo test | 18 | CI | Active | +| Unit - PyPI | cargo test | 27 | CI | Active | +| WASM build | cargo build --release | 3 plugins | CI | Active | | Integration | (none) | 0 | - | Missing | ## How to Run -### Check and Lint +### Lint ```bash -cargo check --workspace cargo fmt --check -cargo clippy --workspace -- -D warnings +cargo clippy --target wasm32-wasip2 --workspace -- -D warnings ``` ### Unit Tests (must run on host, not WASM target) ```bash -cargo test --target $(rustc -vV | grep host | awk '{print $2}') +cargo test --target $(rustc -vV | grep host | awk '{print $2}') --workspace ``` ### Build WASM ```bash -cargo build --release -# Output: target/wasm32-wasip2/release/unity_format_plugin.wasm +cargo build --release --workspace +# Output: target/wasm32-wasip2/release/{unity,rpm,pypi}_format_plugin.wasm ``` +## Plugin Test Coverage + +### Unity Format (12 tests) +- Format key identity +- Gzip magic byte detection +- Non-gzip content type fallback +- Validation: accepts valid gzip, rejects empty, rejects wrong extension, rejects bad magic +- Version extraction from path component and filename +- Index generation: empty returns None, produces valid JSON + +### RPM Format (18 tests) +- Format key identity +- Filename parsing: simple, hyphens in name, noarch, no extension +- Version extraction from paths +- Metadata: RPM magic detection, non-RPM fallback, empty error +- Validation: accepts valid RPM, rejects empty, wrong extension, too small, bad magic, empty path +- Index generation: empty returns None, produces JSON with name/arch/release fields + +### PyPI Format (27 tests) +- Format key identity +- PEP 503 name normalization: simple, underscores, dots, consecutive separators, mixed, leading/trailing +- Wheel filename parsing: name extraction, version extraction, build tag handling +- Source distribution parsing: name from sdist, name with hyphens, version from tar.gz/zip +- Metadata: wheel content type, sdist content type, empty error +- Validation: accepts wheel, accepts sdist, rejects empty, wrong extension, bad wheel filename, sdist without version, empty path +- Index generation: empty returns None, produces HTML + JSON, normalizes package names + ## Gaps and Roadmap | Gap | Recommendation | Priority | |-----|---------------|----------| | No integration test | Add test that loads WASM in wasmtime and calls FormatHandler methods | P2 | | No plugin lifecycle test | Test register, upload, download, list cycle | P3 | +| No cross-plugin test | Verify all three plugins can coexist in the same Artifact Keeper instance | P3 | diff --git a/plugins/pypi-format/Cargo.toml b/plugins/pypi-format/Cargo.toml new file mode 100644 index 0000000..777124d --- /dev/null +++ b/plugins/pypi-format/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "pypi-format-plugin" +version = "0.1.0" +edition = "2021" +description = "Example Artifact Keeper plugin - Python package (PyPI) format handler" +license = "MIT" +authors = ["Artifact Keeper Team"] +repository = "https://github.com/artifact-keeper/artifact-keeper-example-plugin" + +[lib] +crate-type = ["cdylib"] + +[dependencies] +wit-bindgen = "0.36" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" + +[package.metadata.component] +package = "artifact-keeper:format" + +[package.metadata.component.target] +path = "../../wit" + +[package.metadata.component.dependencies] diff --git a/plugins/pypi-format/plugin.toml b/plugins/pypi-format/plugin.toml new file mode 100644 index 0000000..4fbf8ce --- /dev/null +++ b/plugins/pypi-format/plugin.toml @@ -0,0 +1,25 @@ +[plugin] +name = "pypi-custom-format" +version = "0.1.0" +description = "Example custom format handler for Python packages (wheels and source distributions)" +author = "Artifact Keeper Team" +license = "MIT" +homepage = "https://github.com/artifact-keeper/artifact-keeper-example-plugin" +min_keeper_version = "1.0.0" + +[format] +key = "pypi-custom" +display_name = "Python Package (Custom)" +extensions = [".whl", ".tar.gz", ".zip"] +content_types = ["application/zip", "application/gzip"] + +[capabilities] +parse_metadata = true +validate_artifact = true +generate_index = true +handle_request = true + +[resources] +max_memory_bytes = 16777216 # 16 MB +max_fuel = 100000000 # 100M cycles +max_execution_ms = 10000 # 10 seconds diff --git a/plugins/pypi-format/src/lib.rs b/plugins/pypi-format/src/lib.rs new file mode 100644 index 0000000..d1971b4 --- /dev/null +++ b/plugins/pypi-format/src/lib.rs @@ -0,0 +1,807 @@ +//! Python Package (PyPI) Format Plugin for Artifact Keeper +//! +//! Handles Python wheels (`.whl`) and source distributions (`.tar.gz`, `.zip`). +//! This plugin demonstrates filename convention parsing following PEP 427 (wheels) +//! and PEP 503 (Simple Repository API) standards. +//! +//! ## Wheel filename convention (PEP 427) +//! +//! ```text +//! {distribution}-{version}(-{build})?-{python}-{abi}-{platform}.whl +//! ``` +//! +//! Examples: +//! - `requests-2.28.0-py3-none-any.whl` +//! - `numpy-1.24.2-cp311-cp311-manylinux_2_17_x86_64.whl` +//! +//! ## Source distribution convention +//! +//! ```text +//! {name}-{version}.tar.gz +//! {name}-{version}.zip +//! ``` + +wit_bindgen::generate!({ + world: "format-plugin-v2", + path: "../../wit/format-plugin.wit", +}); + +use exports::artifact_keeper::format::handler::{Guest as HandlerGuest, Metadata}; +use exports::artifact_keeper::format::request_handler::{ + Guest as RequestHandlerGuest, HttpRequest, HttpResponse, RepoContext, +}; + +struct PypiFormatHandler; + +impl HandlerGuest for PypiFormatHandler { + fn format_key() -> String { + "pypi-custom".to_string() + } + + fn parse_metadata(path: String, data: Vec) -> Result { + if data.is_empty() { + return Err("Empty file".to_string()); + } + + let filename = path.rsplit('/').next().unwrap_or(&path); + let version = extract_version(filename); + + let content_type = if filename.ends_with(".whl") || filename.ends_with(".zip") { + "application/zip" + } else if filename.ends_with(".tar.gz") { + "application/gzip" + } else { + "application/octet-stream" + }; + + Ok(Metadata { + path, + version, + content_type: content_type.to_string(), + size_bytes: data.len() as u64, + checksum_sha256: None, + }) + } + + fn validate(path: String, data: Vec) -> Result<(), String> { + if data.is_empty() { + return Err("Python package cannot be empty".to_string()); + } + + if path.is_empty() { + return Err("Artifact path cannot be empty".to_string()); + } + + let filename = path.rsplit('/').next().unwrap_or(&path); + let lower = filename.to_lowercase(); + + if !lower.ends_with(".whl") && !lower.ends_with(".tar.gz") && !lower.ends_with(".zip") { + return Err(format!( + "Expected .whl, .tar.gz, or .zip extension, got: {filename}" + )); + } + + // Validate wheel filename structure (PEP 427) + if let Some(stem) = lower.strip_suffix(".whl") { + let parts: Vec<&str> = stem.split('-').collect(); + if parts.len() < 5 { + return Err(format!( + "Invalid wheel filename: expected at least 5 dash-separated parts \ + (name-version-python-abi-platform), got {} in '{filename}'", + parts.len() + )); + } + } + + // Validate source distribution has a version separator + let sdist_stem = lower + .strip_suffix(".tar.gz") + .or_else(|| lower.strip_suffix(".zip")); + if let Some(stem) = sdist_stem { + if !stem.contains('-') { + return Err(format!( + "Invalid source distribution filename: expected 'name-version' format, \ + got '{stem}'" + )); + } + } + + Ok(()) + } + + fn generate_index(artifacts: Vec) -> Result)>>, String> { + if artifacts.is_empty() { + return Ok(None); + } + + // Collect unique normalized package names + let mut packages: Vec = artifacts + .iter() + .filter_map(|a| { + let filename = a.path.rsplit('/').next()?; + extract_package_name(filename).map(|n| normalize_package_name(&n)) + }) + .collect(); + packages.sort(); + packages.dedup(); + + // Generate PEP 503 Simple Repository root index + let mut html = String::from( + "\n\nSimple Index\n\n", + ); + for pkg in &packages { + html.push_str(&format!(" {pkg}\n")); + } + html.push_str("\n\n"); + + // Also generate a JSON index for API consumers + let entries: Vec = artifacts + .iter() + .map(|a| { + let filename = a.path.rsplit('/').next().unwrap_or(&a.path); + let name = extract_package_name(filename) + .map(|n| normalize_package_name(&n)) + .unwrap_or_default(); + + let mut entry = serde_json::Map::new(); + entry.insert("path".into(), serde_json::Value::String(a.path.clone())); + entry.insert("name".into(), serde_json::Value::String(name)); + if let Some(ref v) = a.version { + entry.insert("version".into(), serde_json::Value::String(v.clone())); + } + entry.insert( + "content_type".into(), + serde_json::Value::String(a.content_type.clone()), + ); + entry.insert( + "size_bytes".into(), + serde_json::Value::Number(a.size_bytes.into()), + ); + serde_json::Value::Object(entry) + }) + .collect(); + + let json_index = serde_json::json!({ + "format": "pypi-custom", + "total_count": artifacts.len(), + "total_size_bytes": artifacts.iter().map(|a| a.size_bytes).sum::(), + "packages": entries, + }); + + let json_bytes = serde_json::to_vec_pretty(&json_index) + .map_err(|e| format!("Failed to serialize index: {e}"))?; + + Ok(Some(vec![ + ("simple/index.html".to_string(), html.into_bytes()), + ("pypi-index.json".to_string(), json_bytes), + ])) + } +} + +impl RequestHandlerGuest for PypiFormatHandler { + fn handle_request( + request: HttpRequest, + context: RepoContext, + artifacts: Vec, + ) -> Result { + let path = request.path.as_str(); + + // Only handle GET and HEAD + if request.method != "GET" && request.method != "HEAD" { + return Ok(HttpResponse { + status: 405, + headers: vec![("allow".to_string(), "GET, HEAD".to_string())], + body: b"Method Not Allowed".to_vec(), + }); + } + + // Route: /simple/ - PEP 503 root index + if path == "/simple/" || path == "/simple" || path == "/" { + return handle_simple_root(&context, &artifacts); + } + + // Route: /simple/{project}/ - PEP 503 project page + let trimmed = path.trim_end_matches('/'); + if let Some(project) = trimmed.strip_prefix("/simple/") { + if !project.contains('/') && !project.is_empty() { + return handle_simple_project(project, &context, &artifacts); + } + } + + // Route: /packages/{filename} - redirect to download + if let Some(filename) = trimmed.strip_prefix("/packages/") { + if !filename.contains('/') && !filename.is_empty() { + return handle_package_download(filename, &context, &artifacts); + } + } + + // 404 for everything else + Ok(HttpResponse { + status: 404, + headers: vec![("content-type".to_string(), "text/plain".to_string())], + body: b"Not Found".to_vec(), + }) + } +} + +export!(PypiFormatHandler); + +// --------------------------------------------------------------------------- +// Request handler helpers +// --------------------------------------------------------------------------- + +/// PEP 503 root index: list all normalized package names as links. +fn handle_simple_root( + context: &RepoContext, + artifacts: &[Metadata], +) -> Result { + let mut packages: Vec = artifacts + .iter() + .filter_map(|a| { + let filename = a.path.rsplit('/').next()?; + extract_package_name(filename).map(|n| normalize_package_name(&n)) + }) + .collect(); + packages.sort(); + packages.dedup(); + + let mut html = + String::from("\n\nSimple Index\n\n"); + for pkg in &packages { + html.push_str(&format!( + " {}\n", + context.base_url, pkg, pkg + )); + } + html.push_str("\n\n"); + + Ok(HttpResponse { + status: 200, + headers: vec![("content-type".to_string(), "text/html".to_string())], + body: html.into_bytes(), + }) +} + +/// PEP 503 project page: list files for a specific package with `#sha256=` fragments. +fn handle_simple_project( + project: &str, + context: &RepoContext, + artifacts: &[Metadata], +) -> Result { + let normalized_project = normalize_package_name(project); + + // Filter artifacts matching this project + let matching: Vec<&Metadata> = artifacts + .iter() + .filter(|a| { + let filename = a.path.rsplit('/').next().unwrap_or(&a.path); + extract_package_name(filename) + .map(|n| normalize_package_name(&n) == normalized_project) + .unwrap_or(false) + }) + .collect(); + + if matching.is_empty() { + return Ok(HttpResponse { + status: 404, + headers: vec![("content-type".to_string(), "text/plain".to_string())], + body: format!("Project '{}' not found", project).into_bytes(), + }); + } + + let mut html = format!( + "\n\nLinks for {}\n\n\ +

Links for {}

\n", + normalized_project, normalized_project + ); + + for artifact in &matching { + let filename = artifact.path.rsplit('/').next().unwrap_or(&artifact.path); + let hash_fragment = match &artifact.checksum_sha256 { + Some(sha) if !sha.is_empty() => format!("#sha256={}", sha), + _ => String::new(), + }; + html.push_str(&format!( + " {}\n", + context.base_url, filename, hash_fragment, filename + )); + } + + html.push_str("\n\n"); + + Ok(HttpResponse { + status: 200, + headers: vec![("content-type".to_string(), "text/html".to_string())], + body: html.into_bytes(), + }) +} + +/// Redirect package download to the artifact storage download endpoint. +fn handle_package_download( + filename: &str, + context: &RepoContext, + artifacts: &[Metadata], +) -> Result { + // Find the artifact matching this filename + let artifact = artifacts + .iter() + .find(|a| a.path.rsplit('/').next().unwrap_or(&a.path) == filename); + + match artifact { + Some(a) => { + let download_url = format!("{}/{}", context.download_base_url, a.path); + Ok(HttpResponse { + status: 302, + headers: vec![("location".to_string(), download_url)], + body: Vec::new(), + }) + } + None => Ok(HttpResponse { + status: 404, + headers: vec![("content-type".to_string(), "text/plain".to_string())], + body: format!("Package '{}' not found", filename).into_bytes(), + }), + } +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/// Normalize a Python package name per PEP 503. +/// +/// Converts to lowercase and replaces any run of non-alphanumeric characters +/// with a single hyphen. +fn normalize_package_name(name: &str) -> String { + let lower = name.to_lowercase(); + let mut result = String::with_capacity(lower.len()); + let mut prev_was_separator = false; + + for ch in lower.chars() { + if ch.is_ascii_alphanumeric() { + prev_was_separator = false; + result.push(ch); + } else if !prev_was_separator { + prev_was_separator = true; + result.push('-'); + } + } + + // Strip leading/trailing hyphens + result.trim_matches('-').to_string() +} + +/// Extract the package name from a filename. +fn extract_package_name(filename: &str) -> Option { + if let Some(stem) = filename.strip_suffix(".whl") { + // Wheel: first dash-separated part is the distribution name + stem.split('-').next().map(|s| s.to_string()) + } else if let Some(stem) = filename.strip_suffix(".tar.gz") { + // Split on last hyphen: everything before is the name + stem.rsplit_once('-').map(|(name, _)| name.to_string()) + } else if let Some(stem) = filename.strip_suffix(".zip") { + stem.rsplit_once('-').map(|(name, _)| name.to_string()) + } else { + None + } +} + +/// Extract version from a Python package filename. +fn extract_version(filename: &str) -> Option { + if let Some(stem) = filename.strip_suffix(".whl") { + // Wheel: second dash-separated part is the version + let parts: Vec<&str> = stem.split('-').collect(); + if parts.len() >= 2 { + Some(parts[1].to_string()) + } else { + None + } + } else if let Some(stem) = filename.strip_suffix(".tar.gz") { + stem.rsplit_once('-').map(|(_, ver)| ver.to_string()) + } else if let Some(stem) = filename.strip_suffix(".zip") { + stem.rsplit_once('-').map(|(_, ver)| ver.to_string()) + } else { + None + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + // -- format_key -- + + #[test] + fn format_key_is_pypi() { + assert_eq!(PypiFormatHandler::format_key(), "pypi-custom"); + } + + // -- package name normalization (PEP 503) -- + + #[test] + fn normalize_simple_name() { + assert_eq!(normalize_package_name("requests"), "requests"); + } + + #[test] + fn normalize_underscores() { + assert_eq!(normalize_package_name("My_Package"), "my-package"); + } + + #[test] + fn normalize_dots() { + assert_eq!(normalize_package_name("some.package"), "some-package"); + } + + #[test] + fn normalize_consecutive_separators() { + assert_eq!(normalize_package_name("Package__Name"), "package-name"); + } + + #[test] + fn normalize_mixed_separators() { + assert_eq!(normalize_package_name("My.Cool_Package"), "my-cool-package"); + } + + #[test] + fn normalize_leading_trailing() { + assert_eq!(normalize_package_name("_leading_"), "leading"); + } + + // -- wheel filename parsing -- + + #[test] + fn extract_name_from_wheel() { + assert_eq!( + extract_package_name("requests-2.28.0-py3-none-any.whl"), + Some("requests".to_string()) + ); + } + + #[test] + fn extract_version_from_wheel() { + assert_eq!( + extract_version("requests-2.28.0-py3-none-any.whl"), + Some("2.28.0".to_string()) + ); + } + + #[test] + fn extract_version_from_wheel_with_build_tag() { + assert_eq!( + extract_version("package-1.0.0-1-cp39-cp39-manylinux1_x86_64.whl"), + Some("1.0.0".to_string()) + ); + } + + // -- source distribution parsing -- + + #[test] + fn extract_name_from_sdist() { + assert_eq!( + extract_package_name("requests-2.28.0.tar.gz"), + Some("requests".to_string()) + ); + } + + #[test] + fn extract_name_from_sdist_with_hyphens() { + assert_eq!( + extract_package_name("my-cool-package-1.0.0.tar.gz"), + Some("my-cool-package".to_string()) + ); + } + + #[test] + fn extract_version_from_sdist() { + assert_eq!( + extract_version("requests-2.28.0.tar.gz"), + Some("2.28.0".to_string()) + ); + } + + #[test] + fn extract_version_from_zip() { + assert_eq!( + extract_version("my-package-1.0.0.zip"), + Some("1.0.0".to_string()) + ); + } + + // -- parse_metadata -- + + #[test] + fn parse_metadata_wheel() { + let data = vec![0x50, 0x4b, 0x03, 0x04]; // ZIP magic + let result = PypiFormatHandler::parse_metadata( + "packages/requests/2.28.0/requests-2.28.0-py3-none-any.whl".into(), + data, + ); + let meta = result.unwrap(); + assert_eq!(meta.content_type, "application/zip"); + assert_eq!(meta.version, Some("2.28.0".to_string())); + } + + #[test] + fn parse_metadata_sdist() { + let data = vec![0x1f, 0x8b, 0x08]; // gzip magic + let result = PypiFormatHandler::parse_metadata( + "packages/requests/2.28.0/requests-2.28.0.tar.gz".into(), + data, + ); + let meta = result.unwrap(); + assert_eq!(meta.content_type, "application/gzip"); + assert_eq!(meta.version, Some("2.28.0".to_string())); + } + + #[test] + fn parse_metadata_empty_error() { + let result = PypiFormatHandler::parse_metadata("test.whl".into(), vec![]); + assert!(result.is_err()); + } + + // -- validate -- + + #[test] + fn validate_accepts_wheel() { + let data = vec![0x50, 0x4b, 0x03, 0x04]; + let result = PypiFormatHandler::validate("requests-2.28.0-py3-none-any.whl".into(), data); + assert!(result.is_ok()); + } + + #[test] + fn validate_accepts_sdist() { + let data = vec![0x1f, 0x8b, 0x08]; + let result = PypiFormatHandler::validate("requests-2.28.0.tar.gz".into(), data); + assert!(result.is_ok()); + } + + #[test] + fn validate_rejects_empty() { + let result = PypiFormatHandler::validate("test.whl".into(), vec![]); + assert!(result.unwrap_err().contains("empty")); + } + + #[test] + fn validate_rejects_wrong_extension() { + let result = PypiFormatHandler::validate("test.rpm".into(), vec![0x00]); + assert!(result.unwrap_err().contains(".whl")); + } + + #[test] + fn validate_rejects_bad_wheel_filename() { + let data = vec![0x50, 0x4b]; + let result = PypiFormatHandler::validate("bad-name.whl".into(), data); + assert!(result.unwrap_err().contains("5 dash-separated")); + } + + #[test] + fn validate_rejects_sdist_without_version() { + let data = vec![0x1f, 0x8b]; + let result = PypiFormatHandler::validate("noversion.tar.gz".into(), data); + assert!(result.unwrap_err().contains("name-version")); + } + + #[test] + fn validate_rejects_empty_path() { + let result = PypiFormatHandler::validate("".into(), vec![0x00]); + assert!(result.unwrap_err().contains("path")); + } + + // -- generate_index -- + + #[test] + fn generate_index_empty() { + let result = PypiFormatHandler::generate_index(vec![]); + assert!(result.unwrap().is_none()); + } + + #[test] + fn generate_index_produces_html_and_json() { + let artifacts = vec![ + Metadata { + path: "packages/requests/2.28.0/requests-2.28.0-py3-none-any.whl".into(), + version: Some("2.28.0".into()), + content_type: "application/zip".into(), + size_bytes: 2048, + checksum_sha256: None, + }, + Metadata { + path: "packages/numpy/1.24.2/numpy-1.24.2.tar.gz".into(), + version: Some("1.24.2".into()), + content_type: "application/gzip".into(), + size_bytes: 4096, + checksum_sha256: None, + }, + ]; + let result = PypiFormatHandler::generate_index(artifacts) + .unwrap() + .unwrap(); + assert_eq!(result.len(), 2); + + // HTML index + assert_eq!(result[0].0, "simple/index.html"); + let html = String::from_utf8(result[0].1.clone()).unwrap(); + assert!(html.contains("numpy")); + assert!(html.contains("requests")); + assert!(html.contains("/simple/")); + + // JSON index + assert_eq!(result[1].0, "pypi-index.json"); + let json: serde_json::Value = serde_json::from_slice(&result[1].1).unwrap(); + assert_eq!(json["format"], "pypi-custom"); + assert_eq!(json["total_count"], 2); + } + + #[test] + fn generate_index_normalizes_names() { + let artifacts = vec![Metadata { + path: "packages/My_Package-1.0.0-py3-none-any.whl".into(), + version: Some("1.0.0".into()), + content_type: "application/zip".into(), + size_bytes: 1024, + checksum_sha256: None, + }]; + let result = PypiFormatHandler::generate_index(artifacts) + .unwrap() + .unwrap(); + let html = String::from_utf8(result[0].1.clone()).unwrap(); + assert!(html.contains("my-package")); + } + + // -- handle_request (PEP 503) -- + + fn test_context() -> RepoContext { + RepoContext { + repo_key: "pypi-test".to_string(), + base_url: "http://localhost:8080/ext/pypi-custom/pypi-test".to_string(), + download_base_url: "http://localhost:8080/api/v1/repositories/pypi-test/download" + .to_string(), + } + } + + fn test_artifacts() -> Vec { + vec![ + Metadata { + path: "requests-2.28.0-py3-none-any.whl".into(), + version: Some("2.28.0".into()), + content_type: "application/zip".into(), + size_bytes: 2048, + checksum_sha256: Some("abc123".into()), + }, + Metadata { + path: "requests-2.28.0.tar.gz".into(), + version: Some("2.28.0".into()), + content_type: "application/gzip".into(), + size_bytes: 4096, + checksum_sha256: Some("def456".into()), + }, + Metadata { + path: "numpy-1.24.2-cp311-cp311-manylinux_2_17_x86_64.whl".into(), + version: Some("1.24.2".into()), + content_type: "application/zip".into(), + size_bytes: 8192, + checksum_sha256: None, + }, + ] + } + + fn get_request(path: &str) -> HttpRequest { + HttpRequest { + method: "GET".to_string(), + path: path.to_string(), + query: String::new(), + headers: Vec::new(), + body: Vec::new(), + } + } + + #[test] + fn handle_request_simple_root() { + let resp = PypiFormatHandler::handle_request( + get_request("/simple/"), + test_context(), + test_artifacts(), + ) + .unwrap(); + assert_eq!(resp.status, 200); + let body = String::from_utf8(resp.body).unwrap(); + assert!(body.contains("numpy")); + assert!(body.contains("requests")); + assert!(body.contains("/ext/pypi-custom/pypi-test/simple/")); + } + + #[test] + fn handle_request_root_redirects_to_simple() { + let resp = + PypiFormatHandler::handle_request(get_request("/"), test_context(), test_artifacts()) + .unwrap(); + assert_eq!(resp.status, 200); + let body = String::from_utf8(resp.body).unwrap(); + assert!(body.contains("Simple Index")); + } + + #[test] + fn handle_request_project_page() { + let resp = PypiFormatHandler::handle_request( + get_request("/simple/requests/"), + test_context(), + test_artifacts(), + ) + .unwrap(); + assert_eq!(resp.status, 200); + let body = String::from_utf8(resp.body).unwrap(); + assert!(body.contains("requests-2.28.0-py3-none-any.whl")); + assert!(body.contains("requests-2.28.0.tar.gz")); + assert!(body.contains("#sha256=abc123")); + assert!(body.contains("#sha256=def456")); + // Should NOT contain numpy + assert!(!body.contains("numpy")); + } + + #[test] + fn handle_request_project_not_found() { + let resp = PypiFormatHandler::handle_request( + get_request("/simple/nonexistent/"), + test_context(), + test_artifacts(), + ) + .unwrap(); + assert_eq!(resp.status, 404); + } + + #[test] + fn handle_request_package_download_redirect() { + let resp = PypiFormatHandler::handle_request( + get_request("/packages/requests-2.28.0-py3-none-any.whl"), + test_context(), + test_artifacts(), + ) + .unwrap(); + assert_eq!(resp.status, 302); + let location = resp.headers.iter().find(|(k, _)| k == "location").unwrap(); + assert!(location + .1 + .contains("/download/requests-2.28.0-py3-none-any.whl")); + } + + #[test] + fn handle_request_package_not_found() { + let resp = PypiFormatHandler::handle_request( + get_request("/packages/nonexistent-1.0.0.whl"), + test_context(), + test_artifacts(), + ) + .unwrap(); + assert_eq!(resp.status, 404); + } + + #[test] + fn handle_request_unknown_path() { + let resp = PypiFormatHandler::handle_request( + get_request("/unknown/path"), + test_context(), + test_artifacts(), + ) + .unwrap(); + assert_eq!(resp.status, 404); + } + + #[test] + fn handle_request_post_rejected() { + let req = HttpRequest { + method: "POST".to_string(), + path: "/simple/".to_string(), + query: String::new(), + headers: Vec::new(), + body: Vec::new(), + }; + let resp = + PypiFormatHandler::handle_request(req, test_context(), test_artifacts()).unwrap(); + assert_eq!(resp.status, 405); + } +} diff --git a/plugins/rpm-format/Cargo.toml b/plugins/rpm-format/Cargo.toml new file mode 100644 index 0000000..f4f9d24 --- /dev/null +++ b/plugins/rpm-format/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "rpm-format-plugin" +version = "0.1.0" +edition = "2021" +description = "Example Artifact Keeper plugin - RPM package format handler" +license = "MIT" +authors = ["Artifact Keeper Team"] +repository = "https://github.com/artifact-keeper/artifact-keeper-example-plugin" + +[lib] +crate-type = ["cdylib"] + +[dependencies] +wit-bindgen = "0.36" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" + +[package.metadata.component] +package = "artifact-keeper:format" + +[package.metadata.component.target] +path = "../../wit" + +[package.metadata.component.dependencies] diff --git a/plugins/rpm-format/plugin.toml b/plugins/rpm-format/plugin.toml new file mode 100644 index 0000000..b8085eb --- /dev/null +++ b/plugins/rpm-format/plugin.toml @@ -0,0 +1,25 @@ +[plugin] +name = "rpm-custom-format" +version = "0.1.0" +description = "Example custom format handler for RPM packages (.rpm files)" +author = "Artifact Keeper Team" +license = "MIT" +homepage = "https://github.com/artifact-keeper/artifact-keeper-example-plugin" +min_keeper_version = "1.0.0" + +[format] +key = "rpm-custom" +display_name = "RPM Package (Custom)" +extensions = [".rpm"] +content_types = ["application/x-rpm"] + +[capabilities] +parse_metadata = true +validate_artifact = true +generate_index = true +handle_request = true + +[resources] +max_memory_bytes = 16777216 # 16 MB +max_fuel = 100000000 # 100M cycles +max_execution_ms = 10000 # 10 seconds diff --git a/plugins/rpm-format/src/lib.rs b/plugins/rpm-format/src/lib.rs new file mode 100644 index 0000000..ee25623 --- /dev/null +++ b/plugins/rpm-format/src/lib.rs @@ -0,0 +1,874 @@ +//! RPM Package Format Plugin for Artifact Keeper +//! +//! Handles `.rpm` files used by Red Hat, Fedora, SUSE, and other RPM-based Linux distributions. +//! This plugin demonstrates binary format validation (RPM lead magic bytes) and right-to-left +//! filename parsing to extract structured metadata from RPM naming conventions. +//! +//! ## RPM filename convention +//! +//! ```text +//! name-version-release.arch.rpm +//! ``` +//! +//! Examples: +//! - `nginx-1.24.0-1.el9.x86_64.rpm` +//! - `python3-numpy-1.24.2-4.el9.x86_64.rpm` (name contains hyphens) +//! - `bash-completion-2.11-5.el9.noarch.rpm` + +wit_bindgen::generate!({ + world: "format-plugin-v2", + path: "../../wit/format-plugin.wit", +}); + +use exports::artifact_keeper::format::handler::{Guest as HandlerGuest, Metadata}; +use exports::artifact_keeper::format::request_handler::{ + Guest as RequestHandlerGuest, HttpRequest, HttpResponse, RepoContext, +}; + +/// RPM lead magic bytes: 0xed 0xab 0xee 0xdb +const RPM_MAGIC: [u8; 4] = [0xed, 0xab, 0xee, 0xdb]; + +/// RPM lead is exactly 96 bytes. +const RPM_LEAD_SIZE: usize = 96; + +struct RpmFormatHandler; + +impl HandlerGuest for RpmFormatHandler { + fn format_key() -> String { + "rpm-custom".to_string() + } + + fn parse_metadata(path: String, data: Vec) -> Result { + if data.is_empty() { + return Err("Empty file".to_string()); + } + + let has_rpm_magic = data.len() >= 4 && data[..4] == RPM_MAGIC; + + let content_type = if has_rpm_magic { + "application/x-rpm" + } else { + "application/octet-stream" + }; + + let version = extract_version_from_rpm_filename(&path); + + Ok(Metadata { + path, + version, + content_type: content_type.to_string(), + size_bytes: data.len() as u64, + checksum_sha256: None, + }) + } + + fn validate(path: String, data: Vec) -> Result<(), String> { + if data.is_empty() { + return Err("RPM package cannot be empty".to_string()); + } + + if path.is_empty() { + return Err("Artifact path cannot be empty".to_string()); + } + + // Verify .rpm extension + if !path.to_lowercase().ends_with(".rpm") { + return Err(format!( + "Expected .rpm extension, got: {}", + path.rsplit('/').next().unwrap_or(&path) + )); + } + + // RPM lead is 96 bytes minimum + if data.len() < RPM_LEAD_SIZE { + return Err(format!( + "File too small for RPM lead: {} bytes (minimum {})", + data.len(), + RPM_LEAD_SIZE + )); + } + + // Verify RPM magic bytes + if data[..4] != RPM_MAGIC { + return Err(format!( + "Invalid RPM magic: expected [ed, ab, ee, db], got [{:02x}, {:02x}, {:02x}, {:02x}]", + data[0], data[1], data[2], data[3] + )); + } + + Ok(()) + } + + fn generate_index(artifacts: Vec) -> Result)>>, String> { + if artifacts.is_empty() { + return Ok(None); + } + + let entries: Vec = artifacts + .iter() + .map(|a| { + let filename = a.path.rsplit('/').next().unwrap_or(&a.path); + let info = parse_rpm_filename(filename); + + let mut entry = serde_json::Map::new(); + entry.insert("path".into(), serde_json::Value::String(a.path.clone())); + if let Some(ref v) = a.version { + entry.insert("version".into(), serde_json::Value::String(v.clone())); + } + if let Some(name) = info.name { + entry.insert("name".into(), serde_json::Value::String(name)); + } + if let Some(arch) = info.arch { + entry.insert("arch".into(), serde_json::Value::String(arch)); + } + if let Some(release) = info.release { + entry.insert("release".into(), serde_json::Value::String(release)); + } + entry.insert( + "size_bytes".into(), + serde_json::Value::Number(a.size_bytes.into()), + ); + serde_json::Value::Object(entry) + }) + .collect(); + + let index = serde_json::json!({ + "format": "rpm-custom", + "total_count": artifacts.len(), + "total_size_bytes": artifacts.iter().map(|a| a.size_bytes).sum::(), + "packages": entries, + }); + + let json_bytes = serde_json::to_vec_pretty(&index) + .map_err(|e| format!("Failed to serialize index: {e}"))?; + + Ok(Some(vec![("rpm-index.json".to_string(), json_bytes)])) + } +} + +impl RequestHandlerGuest for RpmFormatHandler { + fn handle_request( + request: HttpRequest, + context: RepoContext, + artifacts: Vec, + ) -> Result { + let path = request.path.as_str(); + + // Only handle GET and HEAD + if request.method != "GET" && request.method != "HEAD" { + return Ok(HttpResponse { + status: 405, + headers: vec![("allow".to_string(), "GET, HEAD".to_string())], + body: b"Method Not Allowed".to_vec(), + }); + } + + let trimmed = path.trim_end_matches('/'); + + // Route: /repodata/repomd.xml + if trimmed == "/repodata/repomd.xml" { + return handle_repomd_xml(&context, &artifacts); + } + + // Route: /repodata/primary.xml.gz + if trimmed == "/repodata/primary.xml.gz" { + return handle_primary_xml_gz(&context, &artifacts); + } + + // Route: /repodata/filelists.xml.gz + if trimmed == "/repodata/filelists.xml.gz" { + return handle_filelists_xml_gz(); + } + + // Route: /repodata/other.xml.gz + if trimmed == "/repodata/other.xml.gz" { + return handle_other_xml_gz(); + } + + // Route: /packages/{filename} or /Packages/{filename} - redirect to download + if let Some(filename) = trimmed + .strip_prefix("/packages/") + .or_else(|| trimmed.strip_prefix("/Packages/")) + { + if !filename.contains('/') && !filename.is_empty() { + return handle_package_download(filename, &context, &artifacts); + } + } + + // 404 for everything else + Ok(HttpResponse { + status: 404, + headers: vec![("content-type".to_string(), "text/plain".to_string())], + body: b"Not Found".to_vec(), + }) + } +} + +export!(RpmFormatHandler); + +// --------------------------------------------------------------------------- +// Request handler helpers +// --------------------------------------------------------------------------- + +/// Generate repomd.xml pointing to the primary, filelists, and other metadata files. +fn handle_repomd_xml( + _context: &RepoContext, + _artifacts: &[Metadata], +) -> Result { + // Simple repomd.xml - in production you'd compute checksums of each data file, + // but for serving purposes we use a static structure with timestamps. + let xml = r#" + + 1 + + + + + + + + + + +"#; + + Ok(HttpResponse { + status: 200, + headers: vec![("content-type".to_string(), "application/xml".to_string())], + body: xml.as_bytes().to_vec(), + }) +} + +/// Generate primary.xml.gz with package entries. +fn handle_primary_xml_gz( + _context: &RepoContext, + artifacts: &[Metadata], +) -> Result { + let mut xml = String::from( + "\n\ + \n"); + + for artifact in artifacts { + let filename = artifact.path.rsplit('/').next().unwrap_or(&artifact.path); + let info = parse_rpm_filename(filename); + + let name = info.name.as_deref().unwrap_or("unknown"); + let version = info.version.as_deref().unwrap_or("0"); + let release = info.release.as_deref().unwrap_or("0"); + let arch = info.arch.as_deref().unwrap_or("x86_64"); + + xml.push_str(" \n"); + xml.push_str(&format!(" {}\n", xml_escape(name))); + xml.push_str(&format!(" {}\n", xml_escape(arch))); + xml.push_str(&format!( + " \n", + xml_escape(version), + xml_escape(release) + )); + xml.push_str(&format!( + " {}\n", + artifact.checksum_sha256.as_deref().unwrap_or("") + )); + xml.push_str(" \n"); + xml.push_str(" \n"); + xml.push_str(" \n"); + xml.push_str(" \n"); + xml.push_str(&format!( + " \n", + artifact.size_bytes + )); + xml.push_str(&format!( + " \n", + xml_escape(filename) + )); + xml.push_str(" \n"); + xml.push_str(&format!( + " \n \n \n", + xml_escape(name), + xml_escape(version), + xml_escape(release) + )); + xml.push_str(" \n"); + xml.push_str(" \n"); + } + + xml.push_str("\n"); + + // gzip the XML + let compressed = gzip_compress(xml.as_bytes())?; + + Ok(HttpResponse { + status: 200, + headers: vec![("content-type".to_string(), "application/gzip".to_string())], + body: compressed, + }) +} + +/// Generate empty filelists.xml.gz. +fn handle_filelists_xml_gz() -> Result { + let xml = "\n\ + \n\ + \n"; + + let compressed = gzip_compress(xml.as_bytes())?; + + Ok(HttpResponse { + status: 200, + headers: vec![("content-type".to_string(), "application/gzip".to_string())], + body: compressed, + }) +} + +/// Generate empty other.xml.gz. +fn handle_other_xml_gz() -> Result { + let xml = "\n\ + \n\ + \n"; + + let compressed = gzip_compress(xml.as_bytes())?; + + Ok(HttpResponse { + status: 200, + headers: vec![("content-type".to_string(), "application/gzip".to_string())], + body: compressed, + }) +} + +/// Redirect package download to the artifact storage download endpoint. +fn handle_package_download( + filename: &str, + context: &RepoContext, + artifacts: &[Metadata], +) -> Result { + let artifact = artifacts + .iter() + .find(|a| a.path.rsplit('/').next().unwrap_or(&a.path) == filename); + + match artifact { + Some(a) => { + let download_url = format!("{}/{}", context.download_base_url, a.path); + Ok(HttpResponse { + status: 302, + headers: vec![("location".to_string(), download_url)], + body: Vec::new(), + }) + } + None => Ok(HttpResponse { + status: 404, + headers: vec![("content-type".to_string(), "text/plain".to_string())], + body: format!("Package '{}' not found", filename).into_bytes(), + }), + } +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/// Minimal gzip compression using the DEFLATE algorithm. +/// +/// WASM plugins can't use libflate or flate2 easily, so we produce a valid +/// gzip stream with STORED blocks (no actual compression, just framing). +/// This is perfectly valid per RFC 1952 and all tools accept it. +fn gzip_compress(data: &[u8]) -> Result, String> { + let mut output = Vec::with_capacity(data.len() + 64); + + // Gzip header (10 bytes) + output.extend_from_slice(&[ + 0x1f, 0x8b, // magic + 0x08, // method: deflate + 0x00, // flags: none + 0x00, 0x00, 0x00, 0x00, // mtime + 0x00, // extra flags + 0xff, // OS: unknown + ]); + + // DEFLATE stored blocks + // Each stored block can hold up to 65535 bytes + let chunks: Vec<&[u8]> = if data.is_empty() { + vec![&[]] + } else { + data.chunks(65535).collect() + }; + + for (i, chunk) in chunks.iter().enumerate() { + let is_last = i == chunks.len() - 1; + // Block header: 1 byte (BFINAL=1 for last, BTYPE=00 for stored) + output.push(if is_last { 0x01 } else { 0x00 }); + let len = chunk.len() as u16; + let nlen = !len; + output.extend_from_slice(&len.to_le_bytes()); + output.extend_from_slice(&nlen.to_le_bytes()); + output.extend_from_slice(chunk); + } + + // CRC32 and original size (ISIZE) + let crc = crc32(data); + let size = data.len() as u32; + output.extend_from_slice(&crc.to_le_bytes()); + output.extend_from_slice(&size.to_le_bytes()); + + Ok(output) +} + +/// CRC32 (ISO 3309 / ITU-T V.42) used by gzip. +fn crc32(data: &[u8]) -> u32 { + let mut crc: u32 = 0xFFFF_FFFF; + for &byte in data { + crc ^= byte as u32; + for _ in 0..8 { + if crc & 1 != 0 { + crc = (crc >> 1) ^ 0xEDB8_8320; + } else { + crc >>= 1; + } + } + } + !crc +} + +/// Escape XML special characters. +fn xml_escape(s: &str) -> String { + s.replace('&', "&") + .replace('<', "<") + .replace('>', ">") + .replace('"', """) + .replace('\'', "'") +} + +struct RpmFileInfo { + name: Option, + version: Option, + release: Option, + arch: Option, +} + +/// Parse an RPM filename into its components. +/// +/// RPM filenames follow the convention: `name-version-release.arch.rpm` +/// The name can contain hyphens, so we parse right-to-left: +/// 1. Strip `.rpm` extension +/// 2. Split on last `.` to get arch +/// 3. Split remainder on last `-` to get release +/// 4. Split remainder on last `-` to get version (rest is name) +fn parse_rpm_filename(filename: &str) -> RpmFileInfo { + let stem = match filename.strip_suffix(".rpm") { + Some(s) => s, + None => { + return RpmFileInfo { + name: None, + version: None, + release: None, + arch: None, + } + } + }; + + // Split on last dot for arch: "nginx-1.24.0-1.el9.x86_64" -> ("nginx-1.24.0-1.el9", "x86_64") + let (before_arch, arch) = match stem.rsplit_once('.') { + Some((b, a)) => (b, Some(a.to_string())), + None => (stem, None), + }; + + // Split on last hyphen for release: "nginx-1.24.0-1.el9" -> ("nginx-1.24.0", "1.el9") + let (before_release, release) = match before_arch.rsplit_once('-') { + Some((b, r)) => (b, Some(r.to_string())), + None => (before_arch, None), + }; + + // Split on last hyphen for version: "nginx-1.24.0" -> ("nginx", "1.24.0") + let (name, version) = match before_release.rsplit_once('-') { + Some((n, v)) => (Some(n.to_string()), Some(v.to_string())), + None => (Some(before_release.to_string()), None), + }; + + RpmFileInfo { + name, + version, + release, + arch, + } +} + +/// Extract the version string from an RPM filename in a path. +fn extract_version_from_rpm_filename(path: &str) -> Option { + let filename = path.rsplit('/').next()?; + let info = parse_rpm_filename(filename); + + match (info.version, info.release) { + (Some(ver), Some(rel)) => Some(format!("{ver}-{rel}")), + (Some(ver), None) => Some(ver), + _ => None, + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + // -- format_key -- + + #[test] + fn format_key_is_rpm() { + assert_eq!(RpmFormatHandler::format_key(), "rpm-custom"); + } + + // -- RPM filename parsing -- + + #[test] + fn parse_simple_rpm() { + let info = parse_rpm_filename("nginx-1.24.0-1.el9.x86_64.rpm"); + assert_eq!(info.name.as_deref(), Some("nginx")); + assert_eq!(info.version.as_deref(), Some("1.24.0")); + assert_eq!(info.release.as_deref(), Some("1.el9")); + assert_eq!(info.arch.as_deref(), Some("x86_64")); + } + + #[test] + fn parse_rpm_with_hyphens_in_name() { + let info = parse_rpm_filename("python3-numpy-1.24.2-4.el9.x86_64.rpm"); + assert_eq!(info.name.as_deref(), Some("python3-numpy")); + assert_eq!(info.version.as_deref(), Some("1.24.2")); + assert_eq!(info.release.as_deref(), Some("4.el9")); + assert_eq!(info.arch.as_deref(), Some("x86_64")); + } + + #[test] + fn parse_rpm_noarch() { + let info = parse_rpm_filename("bash-completion-2.11-5.el9.noarch.rpm"); + assert_eq!(info.name.as_deref(), Some("bash-completion")); + assert_eq!(info.version.as_deref(), Some("2.11")); + assert_eq!(info.release.as_deref(), Some("5.el9")); + assert_eq!(info.arch.as_deref(), Some("noarch")); + } + + #[test] + fn parse_rpm_no_extension() { + let info = parse_rpm_filename("not-an-rpm.txt"); + assert!(info.name.is_none()); + } + + // -- version extraction from path -- + + #[test] + fn version_from_simple_filename() { + assert_eq!( + extract_version_from_rpm_filename("Packages/nginx-1.24.0-1.el9.x86_64.rpm"), + Some("1.24.0-1.el9".to_string()) + ); + } + + #[test] + fn version_from_hyphenated_name() { + assert_eq!( + extract_version_from_rpm_filename("python3-numpy-1.24.2-4.el9.x86_64.rpm"), + Some("1.24.2-4.el9".to_string()) + ); + } + + // -- parse_metadata -- + + #[test] + fn parse_metadata_detects_rpm_magic() { + let mut data = vec![0; RPM_LEAD_SIZE]; + data[..4].copy_from_slice(&RPM_MAGIC); + let result = + RpmFormatHandler::parse_metadata("Packages/nginx-1.24.0-1.el9.x86_64.rpm".into(), data); + let meta = result.unwrap(); + assert_eq!(meta.content_type, "application/x-rpm"); + assert_eq!(meta.version, Some("1.24.0-1.el9".to_string())); + } + + #[test] + fn parse_metadata_non_rpm_content() { + let data = vec![0x50, 0x4b, 0x03, 0x04]; // ZIP magic + let result = RpmFormatHandler::parse_metadata("test.rpm".into(), data); + let meta = result.unwrap(); + assert_eq!(meta.content_type, "application/octet-stream"); + } + + #[test] + fn parse_metadata_empty_error() { + let result = RpmFormatHandler::parse_metadata("test.rpm".into(), vec![]); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("Empty")); + } + + // -- validate -- + + #[test] + fn validate_accepts_valid_rpm() { + let mut data = vec![0; RPM_LEAD_SIZE]; + data[..4].copy_from_slice(&RPM_MAGIC); + let result = RpmFormatHandler::validate("test.rpm".into(), data); + assert!(result.is_ok()); + } + + #[test] + fn validate_rejects_empty() { + let result = RpmFormatHandler::validate("test.rpm".into(), vec![]); + assert!(result.unwrap_err().contains("empty")); + } + + #[test] + fn validate_rejects_wrong_extension() { + let mut data = vec![0; RPM_LEAD_SIZE]; + data[..4].copy_from_slice(&RPM_MAGIC); + let result = RpmFormatHandler::validate("test.deb".into(), data); + assert!(result.unwrap_err().contains(".rpm")); + } + + #[test] + fn validate_rejects_too_small() { + let data = RPM_MAGIC.to_vec(); // Only 4 bytes, need 96 + let result = RpmFormatHandler::validate("test.rpm".into(), data); + assert!(result.unwrap_err().contains("too small")); + } + + #[test] + fn validate_rejects_bad_magic() { + let data = vec![0; RPM_LEAD_SIZE]; + let result = RpmFormatHandler::validate("test.rpm".into(), data); + assert!(result.unwrap_err().contains("Invalid RPM magic")); + } + + #[test] + fn validate_rejects_empty_path() { + let mut data = vec![0; RPM_LEAD_SIZE]; + data[..4].copy_from_slice(&RPM_MAGIC); + let result = RpmFormatHandler::validate("".into(), data); + assert!(result.unwrap_err().contains("path")); + } + + // -- generate_index -- + + #[test] + fn generate_index_empty() { + let result = RpmFormatHandler::generate_index(vec![]); + assert!(result.unwrap().is_none()); + } + + #[test] + fn generate_index_produces_json() { + let artifacts = vec![ + Metadata { + path: "Packages/nginx-1.24.0-1.el9.x86_64.rpm".into(), + version: Some("1.24.0-1.el9".into()), + content_type: "application/x-rpm".into(), + size_bytes: 8192, + checksum_sha256: None, + }, + Metadata { + path: "Packages/bash-5.2.26-1.el9.x86_64.rpm".into(), + version: Some("5.2.26-1.el9".into()), + content_type: "application/x-rpm".into(), + size_bytes: 4096, + checksum_sha256: None, + }, + ]; + let result = RpmFormatHandler::generate_index(artifacts) + .unwrap() + .unwrap(); + assert_eq!(result.len(), 1); + assert_eq!(result[0].0, "rpm-index.json"); + + let json: serde_json::Value = serde_json::from_slice(&result[0].1).unwrap(); + assert_eq!(json["format"], "rpm-custom"); + assert_eq!(json["total_count"], 2); + assert_eq!(json["total_size_bytes"], 12288); + + let packages = json["packages"].as_array().unwrap(); + assert_eq!(packages[0]["name"], "nginx"); + assert_eq!(packages[0]["arch"], "x86_64"); + } + + // -- handle_request (repodata) -- + + fn test_context() -> RepoContext { + RepoContext { + repo_key: "rpm-test".to_string(), + base_url: "http://localhost:8080/ext/rpm-custom/rpm-test".to_string(), + download_base_url: "http://localhost:8080/api/v1/repositories/rpm-test/download" + .to_string(), + } + } + + fn test_artifacts() -> Vec { + vec![ + Metadata { + path: "nginx-1.24.0-1.el9.x86_64.rpm".into(), + version: Some("1.24.0-1.el9".into()), + content_type: "application/x-rpm".into(), + size_bytes: 8192, + checksum_sha256: Some("abc123def456".into()), + }, + Metadata { + path: "bash-5.2.26-1.el9.x86_64.rpm".into(), + version: Some("5.2.26-1.el9".into()), + content_type: "application/x-rpm".into(), + size_bytes: 4096, + checksum_sha256: None, + }, + ] + } + + fn get_request(path: &str) -> HttpRequest { + HttpRequest { + method: "GET".to_string(), + path: path.to_string(), + query: String::new(), + headers: Vec::new(), + body: Vec::new(), + } + } + + #[test] + fn handle_request_repomd_xml() { + let resp = RpmFormatHandler::handle_request( + get_request("/repodata/repomd.xml"), + test_context(), + test_artifacts(), + ) + .unwrap(); + assert_eq!(resp.status, 200); + let body = String::from_utf8(resp.body).unwrap(); + assert!(body.contains(" 10); + assert_eq!(resp.body[0], 0x1f); + assert_eq!(resp.body[1], 0x8b); + } + + #[test] + fn handle_request_filelists_xml_gz() { + let resp = RpmFormatHandler::handle_request( + get_request("/repodata/filelists.xml.gz"), + test_context(), + test_artifacts(), + ) + .unwrap(); + assert_eq!(resp.status, 200); + assert_eq!(resp.body[0], 0x1f); + assert_eq!(resp.body[1], 0x8b); + } + + #[test] + fn handle_request_other_xml_gz() { + let resp = RpmFormatHandler::handle_request( + get_request("/repodata/other.xml.gz"), + test_context(), + test_artifacts(), + ) + .unwrap(); + assert_eq!(resp.status, 200); + assert_eq!(resp.body[0], 0x1f); + assert_eq!(resp.body[1], 0x8b); + } + + #[test] + fn handle_request_package_download_redirect() { + let resp = RpmFormatHandler::handle_request( + get_request("/packages/nginx-1.24.0-1.el9.x86_64.rpm"), + test_context(), + test_artifacts(), + ) + .unwrap(); + assert_eq!(resp.status, 302); + let location = resp.headers.iter().find(|(k, _)| k == "location").unwrap(); + assert!(location + .1 + .contains("/download/nginx-1.24.0-1.el9.x86_64.rpm")); + } + + #[test] + fn handle_request_package_not_found() { + let resp = RpmFormatHandler::handle_request( + get_request("/packages/nonexistent-1.0.0-1.el9.x86_64.rpm"), + test_context(), + test_artifacts(), + ) + .unwrap(); + assert_eq!(resp.status, 404); + } + + #[test] + fn handle_request_unknown_path() { + let resp = RpmFormatHandler::handle_request( + get_request("/unknown/path"), + test_context(), + test_artifacts(), + ) + .unwrap(); + assert_eq!(resp.status, 404); + } + + #[test] + fn handle_request_post_rejected() { + let req = HttpRequest { + method: "POST".to_string(), + path: "/repodata/repomd.xml".to_string(), + query: String::new(), + headers: Vec::new(), + body: Vec::new(), + }; + let resp = RpmFormatHandler::handle_request(req, test_context(), test_artifacts()).unwrap(); + assert_eq!(resp.status, 405); + } + + // -- gzip helpers -- + + #[test] + fn gzip_compress_produces_valid_header() { + let result = gzip_compress(b"hello").unwrap(); + assert_eq!(result[0], 0x1f); + assert_eq!(result[1], 0x8b); + assert_eq!(result[2], 0x08); // deflate + } + + #[test] + fn gzip_compress_empty_input() { + let result = gzip_compress(b"").unwrap(); + assert!(result.len() > 10); // header + trailer at minimum + assert_eq!(result[0], 0x1f); + assert_eq!(result[1], 0x8b); + } + + #[test] + fn crc32_known_value() { + // CRC32 of empty string is 0x00000000 + assert_eq!(crc32(b""), 0x0000_0000); + // CRC32 of "123456789" is 0xCBF43926 + assert_eq!(crc32(b"123456789"), 0xCBF4_3926); + } + + #[test] + fn xml_escape_special_chars() { + assert_eq!( + xml_escape("ac&d\"e'f"), + "a<b>c&d"e'f" + ); + } +} diff --git a/plugins/unity-format/Cargo.toml b/plugins/unity-format/Cargo.toml new file mode 100644 index 0000000..a347512 --- /dev/null +++ b/plugins/unity-format/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "unity-format-plugin" +version = "0.1.0" +edition = "2021" +description = "Example Artifact Keeper plugin - Unity .unitypackage format handler" +license = "MIT" +authors = ["Artifact Keeper Team"] +repository = "https://github.com/artifact-keeper/artifact-keeper-example-plugin" + +[lib] +crate-type = ["cdylib"] + +[dependencies] +wit-bindgen = "0.36" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" + +[package.metadata.component] +package = "artifact-keeper:format" + +[package.metadata.component.target] +path = "../../wit" + +[package.metadata.component.dependencies] diff --git a/plugin.toml b/plugins/unity-format/plugin.toml similarity index 100% rename from plugin.toml rename to plugins/unity-format/plugin.toml diff --git a/src/lib.rs b/plugins/unity-format/src/lib.rs similarity index 99% rename from src/lib.rs rename to plugins/unity-format/src/lib.rs index 34156d4..6db7b99 100644 --- a/src/lib.rs +++ b/plugins/unity-format/src/lib.rs @@ -15,7 +15,7 @@ wit_bindgen::generate!({ world: "format-plugin", - path: "wit/format-plugin.wit", + path: "../../wit/format-plugin.wit", }); use exports::artifact_keeper::format::handler::{Guest, Metadata}; diff --git a/sonar-project.properties b/sonar-project.properties index 8d37024..115256c 100644 --- a/sonar-project.properties +++ b/sonar-project.properties @@ -1,5 +1,12 @@ sonar.projectKey=artifact-keeper_artifact-keeper-example-plugin sonar.organization=artifact-keeper -sonar.sources=src -sonar.exclusions=target/** sonar.sourceEncoding=UTF-8 +sonar.sources=plugins +sonar.exclusions=target/** + +# Each plugin is an independent crate that implements the same WIT interface. +# Cross-plugin structural similarity (Metadata construction, validate() pattern, +# handle_request scaffolding) is intentional: each plugin is a self-contained, +# copy-pasteable template. Exclude plugin sources from copy-paste detection +# to avoid false positives from this expected interface boilerplate. +sonar.cpd.exclusions=plugins/*/src/** diff --git a/wit/format-plugin.wit b/wit/format-plugin.wit index 0973b35..cd9289f 100644 --- a/wit/format-plugin.wit +++ b/wit/format-plugin.wit @@ -74,3 +74,58 @@ world format-plugin { /// Export the format handler interface. export handler; } + +/// HTTP request/response interface for native protocol serving. +/// +/// Plugins that implement this interface can serve native client protocols +/// (e.g., PEP 503 for pip, repodata for dnf) directly from WASM. +interface request-handler { + use handler.{metadata}; + + /// Incoming HTTP request from a native client. + record http-request { + /// HTTP method (GET, POST, PUT, DELETE, HEAD) + method: string, + /// Request path relative to the plugin mount point + path: string, + /// Query string (without leading "?"), empty if none + query: string, + /// Request headers as key-value pairs + headers: list>, + /// Request body bytes (empty for GET/HEAD) + body: list, + } + + /// Repository context so the plugin can generate correct URLs. + record repo-context { + /// Repository key (e.g., "my-rpm-repo") + repo-key: string, + /// Base URL for this plugin's mount point + base-url: string, + /// Base URL for downloading artifacts from storage + download-base-url: string, + } + + /// HTTP response returned by the plugin. + record http-response { + /// HTTP status code + status: u16, + /// Response headers + headers: list>, + /// Response body bytes + body: list, + } + + /// Handle an HTTP request for this format's native protocol. + handle-request: func( + request: http-request, + context: repo-context, + artifacts: list, + ) -> result; +} + +/// Extended world for plugins that serve native client protocols. +world format-plugin-v2 { + export handler; + export request-handler; +}