diff --git a/.Rbuildignore b/.Rbuildignore
new file mode 100644
index 0000000..0f0d0eb
--- /dev/null
+++ b/.Rbuildignore
@@ -0,0 +1,7 @@
+^\.github$
+^data-raw$
+^environment\.yml$
+^\.git$
+^.*\.Rproj$
+^\.Rproj\.user$
+^LICENSE\.md$
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
new file mode 100644
index 0000000..00d906f
--- /dev/null
+++ b/.github/workflows/R-CMD-check.yaml
@@ -0,0 +1,49 @@
+on:
+  push:
+    branches: [main, master, restructure]
+  pull_request:
+    branches: [main, master, restructure]
+
+name: R-CMD-check
+
+permissions: read-all
+
+jobs:
+  R-CMD-check:
+    runs-on: ${{ matrix.config.os }}
+
+    name: ${{ matrix.config.os }} (R ${{ matrix.config.r }})
+
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - {os: ubuntu-latest,  r: 'release'}
+          - {os: ubuntu-latest,  r: 'devel'}
+          - {os: macos-latest,   r: 'release'}
+          - {os: windows-latest, r: 'release'}
+
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+      R_KEEP_PKG_SOURCE: yes
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          r-version: ${{ matrix.config.r }}
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-pandoc@v2
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::rcmdcheck
+          needs: check
+
+      - uses: r-lib/actions/check-r-package@v2
+        with:
+          args: 'c("--no-manual", "--as-cran")'
+          error-on: '"warning"'
+          upload-snapshots: true
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
new file mode 100644
index 0000000..1b5f116
--- /dev/null
+++ b/.github/workflows/release.yaml
@@ -0,0 +1,63 @@
+on:
+  push:
+    tags:
+      - 'v*'
+
+name: Release
+
+permissions:
+  contents: write
+
+jobs:
+  release:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          r-version: 'release'
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-pandoc@v2
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::rcmdcheck
+          needs: check
+
+      - name: Check package
+        shell: Rscript {0}
+        run: |
+          rcmdcheck::rcmdcheck(
+            args     = c("--no-manual", "--as-cran"),
+            error_on = "warning"
+          )
+
+      - name: Build source tarball
+        run: R CMD build .
+
+      - name: Get package version and tarball name
+        id: pkg
+        run: |
+          VERSION=$(grep "^Version:" DESCRIPTION | sed 's/Version:[[:space:]]*//')
+          echo "version=$VERSION" >> "$GITHUB_OUTPUT"
+          echo "tarball=DeOPUS_${VERSION}.tar.gz" >> "$GITHUB_OUTPUT"
+
+      - name: Create GitHub Release
+        uses: softprops/action-gh-release@v2
+        with:
+          name: "DeOPUS ${{ steps.pkg.outputs.version }}"
+          body: |
+            ## DeOPUS ${{ steps.pkg.outputs.version }}
+
+            See [NEWS.md](https://github.com/tinnlab/DeOPUS/blob/main/NEWS.md) for a full list of changes.
+
+            ### Installation
+
+            ```r
+            devtools::install_github("tinnlab/DeOPUS@${{ github.ref_name }}")
+            ```
+          files: ${{ steps.pkg.outputs.tarball }}
+          fail_on_unmatched_files: true
diff --git a/.gitignore b/.gitignore
index e43b0f9..3094af6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,8 @@
 .DS_Store
+.Rproj.user
+.Rhistory
+.RData
+.Ruserdata
+*.Rproj
+/doc/
+/Meta/
diff --git a/DeOPUS/DESCRIPTION b/DESCRIPTION
similarity index 81%
rename from DeOPUS/DESCRIPTION
rename to DESCRIPTION
index 84ddc1f..3534a23 100644
--- a/DeOPUS/DESCRIPTION
+++ b/DESCRIPTION
@@ -5,27 +5,28 @@ Version: 1.0.0
 Authors@R: c(
     person("Ha", "Nguyen", email = "hvn0006@wayne.edu", role = c("aut", "cre"))
     )
-Description: A reference-based cellular deconvolution method that employs 
-    hierarchical shrinkage transformation to robustly estimate cell-type 
-    proportions from bulk RNA sequencing data. DeOPUS applies a multi-level 
-    adaptive transformation combining local and global shrinkage priors, 
-    variance-stabilizing power transformation, and quantile normalization 
+Description: A reference-based cellular deconvolution method that employs
+    hierarchical shrinkage transformation to robustly estimate cell-type
+    proportions from bulk RNA sequencing data. DeOPUS applies a multi-level
+    adaptive transformation combining local and global shrinkage priors,
+    variance-stabilizing power transformation, and quantile normalization
     to minimize the influence of outliers and high-variance genes.
 License: MIT + file LICENSE
 Encoding: UTF-8
+Depends: R (>= 3.5.0)
 LazyData: true
-RoxygenNote: 7.2.3
+LazyDataCompression: xz
+RoxygenNote: 7.3.3
 Imports:
     Matrix,
-    dplyr,
     parallel,
     stats
 Suggests:
     testthat (>= 3.0.0),
     knitr,
     rmarkdown,
-    ggplot2,
-    tidyverse
+    ggplot2
 VignetteBuilder: knitr
-URL: https://github.com/tinnlab/DeOPUS/DeOPUS
+Config/testthat/edition: 3
+URL: https://github.com/tinnlab/DeOPUS
 BugReports: https://github.com/tinnlab/DeOPUS/issues
diff --git a/DeOPUS/.gitignore b/DeOPUS/.gitignore
deleted file mode 100644
index ae972be..0000000
--- a/DeOPUS/.gitignore
+++ /dev/null
@@ -1,52 +0,0 @@
-# R specific
-.Rproj.user
-.Rhistory
-.RData
-.Ruserdata
-*.Rproj
-
-# Package build files
-*.tar.gz
-*.tgz
-DeOPUS.Rcheck/
-DeOPUS_*.tar.gz
-
-# Generated documentation
-man/*.Rd
-NAMESPACE
-
-# OS specific
-.DS_Store
-Thumbs.db
-
-# IDE specific
-.idea/
-.vscode/
-*.swp
-*.swo
-*~
-
-# Data files (large)
-data/real_datasets/*.rds
-data/simulated_datasets/*.rds
-results/
-
-# Temporary files
-*.tmp
-*.bak
-*.log
-
-# Output files
-*.pdf
-*.png
-*.jpg
-!vignettes/*.png
-
-# Compiled code
-*.o
-*.so
-*.dll
-
-# Sensitive information
-*.lic
-license.lic
diff --git a/DeOPUS/R/deconvolve.R b/DeOPUS/R/deconvolve.R
deleted file mode 100644
index a2eb369..0000000
--- a/DeOPUS/R/deconvolve.R
+++ /dev/null
@@ -1,220 +0,0 @@
-#' Deconvolve bulk RNA-seq data using DeOPUS
-#'
-#' @description
-#' Estimates cell-type proportions from bulk RNA-seq data using a reference-based
-#' approach with hierarchical shrinkage transformation.
-#'
-#' @param bulk A numeric matrix of bulk expression data (genes x samples).
-#' @param reference A numeric matrix of reference expression profiles (genes x cell types).
-#' @param alpha Numeric. Regularization parameter for transformation. Default is 0.01.
-#' @param power Numeric. Power for loss function (1 = MAE, 2 = MSE). Default is 2.
-#' @param n_cores Integer. Number of cores for parallel processing. Default is 1.
-#' @param maxit Integer. Maximum number of optimization iterations. Default is 100.
-#' @param verbose Logical. Whether to print progress messages. Default is FALSE.
-#'
-#' @return A list containing:
-#' \itemize{
-#'   \item \code{proportions}: Matrix of estimated cell-type proportions (samples x cell types)
-#'   \item \code{convergence}: List of convergence information for each sample
-#' }
-#'
-#' @details
-#' DeOPUS applies a multi-level adaptive transformation combining:
-#' \itemize{
-#'   \item Local shrinkage: Attenuates influence of high-variance genes
-#'   \item Global shrinkage: Provides overall regularization
-#'   \item Power transformation: Stabilizes variance
-#'   \item Quantile normalization: Ensures robust profile comparison
-#' }
-#'
-#' @examples
-#' \dontrun{
-#' # Load example data
-#' data(example_bulk)
-#' data(example_reference)
-#'
-#' # Run deconvolution
-#' results <- deconvolve(
-#'   bulk = example_bulk,
-#'   reference = example_reference,
-#'   n_cores = 4
-#' )
-#'
-#' # View proportions
-#' head(results$proportions)
-#' }
-#'
-#' @export
-deconvolve <- function(bulk,
-                       reference,
-                       alpha = 0.01,
-                       power = 2,
-                       n_cores = 1,
-                       maxit = 100,
-                       verbose = FALSE) {
-
-
-  # Input validation
-  if (!is.matrix(bulk)) {
-    bulk <- as.matrix(bulk)
-  }
-  if (!is.matrix(reference)) {
-    reference <- as.matrix(reference)
-  }
-
-  # Check for common genes
-
-common_genes <- intersect(rownames(bulk), rownames(reference))
-  if (length(common_genes) == 0) {
-    stop("No common genes found between bulk and reference matrices.")
-  }
-
-  if (verbose) {
-    message(sprintf("Using %d common genes for deconvolution", length(common_genes)))
-  }
-
-  # Subset to common genes
-  bulk <- bulk[common_genes, , drop = FALSE]
-  reference <- reference[common_genes, , drop = FALSE]
-
-  # Get dimensions
-  n_samples <- ncol(bulk)
-  n_cell_types <- ncol(reference)
-  cell_type_names <- colnames(reference)
-  sample_names <- colnames(bulk)
-
-  # Define transformation function
-  transform_func <- function(q, alpha = 0.01) {
-    q[q < 0] <- 0
-
-    # Normalize by total counts
-    scale_factor <- mean(q) * 100
-    if (scale_factor == 0) scale_factor <- 1
-
-    # Shrinkage parameters
-    tau <- 0.05     # Local shrinkage parameter
-    phi <- 0.8      # Global shrinkage intensity
-    omega <- 1.5    # Shape modulation factor
-    delta <- 0.3    # Adaptive regularization strength
-
-    # Dispersion and shape parameters
-    kappa <- 0.1
-    mu <- 2.0
-
-    # Hierarchical shrinkage adjustment
-    local_shrink <- tau / (tau + q / scale_factor)
-    global_shrink <- (1 - phi) * exp(-delta * q / scale_factor) + phi
-
-    # Modified power transformation with adaptive hierarchical shrinkage
-    z <- ((q / scale_factor + kappa)^(mu * omega * global_shrink) - kappa^mu) / (mu * global_shrink)
-
-    # Apply spline-based normalization
-    ranks <- rank(z) / length(z)
-    quantile_norm <- qnorm(pmax(0.001, pmin(0.999, ranks)))
-
-    # Enhanced blending transformation with multi-level shrinkage
-    signal_weight <- pmin(1, q / (10 + alpha * q)) * (1 - local_shrink)
-
-    # Incorporate prior information shrinkage
-    transformed <- signal_weight * z + (1 - signal_weight) *
-      global_shrink *
-      quantile_norm
-
-    # Apply edge-preserving smoothing via shrinkage
-    transformed <- transformed / (1 + delta * abs(transformed))
-
-    # Scale to approximate log2 scale with adaptive shrinkage
-    transformed <- transformed / max(abs(transformed), na.rm = TRUE) * log2(max(q) + 1)
-
-    return(transformed)
-  }
-
-  # Define loss function
-  loss_function <- function(p, b, reference, alpha, power, transform_func) {
-    # Apply transformation to bulk
-    b_transformed <- transform_func(b, alpha)
-
-    # Compute fitted values
-    fitted <- as.vector(reference %*% p)
-    fitted_transformed <- transform_func(fitted, alpha)
-
-    # Calculate loss
-    return(sum(abs(b_transformed - fitted_transformed)^power))
-  }
-
-  # Run deconvolution for each sample
-  if (verbose) {
-    message(sprintf("Deconvolving %d samples using %d cores...", n_samples, n_cores))
-  }
-
-  results <- parallel::mclapply(seq_len(n_samples), mc.cores = n_cores, mc.preschedule = FALSE, function(i) {
-    result <- tryCatch({
-      optim_result <- optim(
-        par = rep(1 / n_cell_types, n_cell_types),
-        fn = loss_function,
-        b = bulk[, i],
-        reference = reference,
-        alpha = alpha,
-        power = power,
-        transform_func = transform_func,
-        method = "L-BFGS-B",
-        lower = 0,
-        upper = 100,
-        control = list(maxit = maxit)
-      )
-
-      list(
-        proportions = optim_result$par,
-        convergence = optim_result$convergence,
-        value = optim_result$value
-      )
-    }, error = function(e) {
-      list(
-        proportions = rep(0, n_cell_types),
-        convergence = -1,
-        value = NA,
-        error = conditionMessage(e)
-      )
-    })
-
-    return(result)
-  })
-
-  # Extract proportions matrix
-  proportions <- do.call(rbind, lapply(results, function(x) x$proportions))
-
-  # Normalize to sum to 1
-  row_sums <- rowSums(proportions)
-  row_sums[row_sums == 0] <- 1  # Avoid division by zero
-  proportions <- proportions / row_sums
-
-  # Handle NAs
-  proportions[is.na(proportions)] <- 0
-
-  # Set names
-  colnames(proportions) <- cell_type_names
-  rownames(proportions) <- sample_names
-
-  # Extract convergence info
-  convergence <- lapply(results, function(x) {
-    list(
-      convergence = x$convergence,
-      value = x$value,
-      error = x$error %||% NULL
-    )
-  })
-  names(convergence) <- sample_names
-
-  if (verbose) {
-    n_converged <- sum(sapply(results, function(x) x$convergence == 0))
-    message(sprintf("Deconvolution complete. %d/%d samples converged.", n_converged, n_samples))
-  }
-
-  return(list(
-    proportions = proportions,
-    convergence = convergence
-  ))
-}
-
-#' @keywords internal
-`%||%` <- function(a, b) if (is.null(a)) b else a
diff --git a/DeOPUS/README.md b/DeOPUS/README.md
deleted file mode 100644
index f3b4709..0000000
--- a/DeOPUS/README.md
+++ /dev/null
@@ -1,168 +0,0 @@
-# DeOPUS: Deconvolution via Optimized Power-transformed Unmixing with Shrinkage
-
-[![R](https://img.shields.io/badge/R-%3E%3D4.0-blue.svg)](https://www.r-project.org/)
-[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-
-DeOPUS is a reference-based cellular deconvolution method that employs hierarchical shrinkage transformation to robustly estimate cell-type proportions from bulk RNA sequencing data.
-
-## Overview
-
-Single-cell RNA sequencing enables comprehensive transcriptomic profiling at single-cell resolution, but high costs limit its widespread application. DeOPUS offers a cost-effective alternative by computationally estimating cell-type proportions from bulk RNA-seq data.
-
-**Key features:**
-- Hierarchical shrinkage transformation with local and global priors
-- Variance-stabilizing power transformation
-- Quantile normalization to minimize outlier influence
-- Robust performance across diverse tissues and cell-type complexities
-
-## Installation
-
-### From GitHub
-
-```r
-# Install devtools if not already installed
-install.packages("devtools")
-
-# Install DeOPUS
-devtools::install_github("tinnlab/DeOPUS")
-```
-
-### Dependencies
-
-```r
-install.packages(c("Matrix", "dplyr", "parallel"))
-```
-
-## Quick Start
-
-```r
-library(DeOPUS)
-
-# Load your data
-# cellTypeExpr: reference expression matrix (genes x cell types)
-# bulk: bulk expression matrix (genes x samples)
-
-# Run deconvolution
-results <- deconvolve(
-  bulk = bulk_matrix,
-  reference = reference_matrix,
-  alpha = 0.01,
-  n_cores = 4
-)
-
-# View estimated proportions
-head(results$proportions)
-```
-
-## Input Data Format
-
-DeOPUS requires two main inputs:
-
-1. **Reference expression matrix** (`reference`): A genes × cell types matrix containing average expression profiles for each cell type, typically derived from scRNA-seq data.
-
-2. **Bulk expression matrix** (`bulk`): A genes × samples matrix containing bulk RNA-seq expression data to be deconvolved.
-
-Both matrices should:
-- Have matching gene identifiers (rownames)
-- Be in linear scale (not log-transformed)
-- Contain non-negative values
-
-## Method Details
-
-DeOPUS applies a multi-level adaptive transformation:
-
-1. **Local shrinkage**: Attenuates the influence of individual high-variance genes
-2. **Global shrinkage**: Provides overall regularization across the expression profile
-3. **Power transformation**: Stabilizes variance across the dynamic range
-4. **Quantile normalization**: Ensures robust comparison between predicted and observed profiles
-
-The optimization minimizes the weighted loss between transformed bulk and reconstructed expression profiles using L-BFGS-B with box constraints.
-
-## Benchmarking
-
-We benchmarked DeOPUS against six state-of-the-art methods:
-- MuSiC
-- AutoGeneS
-- CIBERSORT
-- FARDEEP
-- scaden
-- AdRoit
-
-### Running Benchmarks
-
-```r
-# Run benchmark on simulated data
-source("scripts/benchmark/run_benchmark_simulated.R")
-
-# Run benchmark on real data
-source("scripts/benchmark/run_benchmark_real.R")
-
-# Generate visualization dashboard
-source("scripts/analysis/visualize_results.R")
-results <- create_summary_barplot_dashboard(benchmark_data)
-```
-
-## Reproducing Paper Results
-
-To reproduce the results from our paper:
-
-```bash
-# Clone the repository
-git clone https://github.com/tinnlab/DeOPUS.git
-cd DeOPUS
-
-# Run the complete benchmark pipeline
-Rscript scripts/benchmark/run_all_benchmarks.R
-
-# Generate figures
-Rscript scripts/analysis/generate_figures.R
-```
-
-### Data Availability
-
-Benchmark datasets are available at https://doi.org/10.5281/zenodo.19050845 or can be generated using:
-
-```r
-source("scripts/data/prepare_benchmark_data.R")
-```
-
-## Output
-
-DeOPUS returns a list containing:
-
-- `proportions`: Matrix of estimated cell-type proportions (samples × cell types)
-- `convergence`: Optimization convergence information for each sample
-
-## Parameters
-
-| Parameter | Default | Description |
-|-----------|---------|-------------|
-| `alpha` | 0.01 | Regularization parameter for transformation |
-| `n_cores` | 1 | Number of parallel cores |
-| `maxit` | 100 | Maximum optimization iterations |
-| `power` | 2 | Loss function power (1=MAE, 2=MSE) |
-
-## Citation
-
-If you use DeOPUS in your research, please cite:
-
-```bibtex
-@article{DeOPUS2025,
-  title={DeOPUS: Deconvolution via Optimized Power-transformed Unmixing with Shrinkage},
-  author={},
-  journal={},
-  year={2025},
-  doi={}
-}
-```
-
-[//]: # (## License)
-
-[//]: # ()
-[//]: # (This project is licensed under the MIT License - see the [LICENSE]&#40;LICENSE&#41; file for details.)
-
-[//]: # ()
-[//]: # (## Contact)
-
-[//]: # ()
-[//]: # (For questions or issues, please open an issue on GitHub or contact [email].)
diff --git a/DeOPUS/tests/testthat/test_deconvolve.R b/DeOPUS/tests/testthat/test_deconvolve.R
deleted file mode 100644
index 4a09b03..0000000
--- a/DeOPUS/tests/testthat/test_deconvolve.R
+++ /dev/null
@@ -1,98 +0,0 @@
-library(testthat)
-library(DeOPUS)
-
-test_that("deconvolve works with simple input", {
-  
-  # Create simple test data
-  set.seed(42)
-  n_genes <- 100
-  n_cell_types <- 3
-  n_samples <- 5
-  
-  # Reference matrix
-  reference <- matrix(
-    rpois(n_genes * n_cell_types, lambda = 100),
-    nrow = n_genes,
-    ncol = n_cell_types
-  )
-  rownames(reference) <- paste0("Gene", 1:n_genes)
-  colnames(reference) <- paste0("CellType", 1:n_cell_types)
-  
-  # True proportions
-  true_props <- matrix(
-    c(0.5, 0.3, 0.2,
-      0.2, 0.5, 0.3,
-      0.3, 0.3, 0.4,
-      0.6, 0.2, 0.2,
-      0.1, 0.6, 0.3),
-    nrow = n_samples,
-    ncol = n_cell_types,
-    byrow = TRUE
-  )
-  
-  # Generate bulk
-  bulk <- reference %*% t(true_props) + matrix(rnorm(n_genes * n_samples, sd = 5), nrow = n_genes)
-  bulk[bulk < 0] <- 0
-  colnames(bulk) <- paste0("Sample", 1:n_samples)
-  
-  # Run deconvolution
-  results <- deconvolve(
-    bulk = bulk,
-    reference = reference,
-    n_cores = 1,
-    maxit = 50
-  )
-  
-  # Check output structure
-  expect_true(is.list(results))
-  expect_true("proportions" %in% names(results))
-  expect_true("convergence" %in% names(results))
-  
-  # Check dimensions
-  expect_equal(nrow(results$proportions), n_samples)
-  expect_equal(ncol(results$proportions), n_cell_types)
-  
-  # Check proportions sum to 1
-  row_sums <- rowSums(results$proportions)
-  expect_true(all(abs(row_sums - 1) < 1e-6))
-  
-  # Check proportions are non-negative
-  expect_true(all(results$proportions >= 0))
-})
-
-test_that("deconvolve handles mismatched genes", {
-  
-  set.seed(42)
-  
-  # Reference with genes 1-100
-  reference <- matrix(rpois(100 * 3, lambda = 100), nrow = 100, ncol = 3)
-  rownames(reference) <- paste0("Gene", 1:100)
-  colnames(reference) <- paste0("CellType", 1:3)
-  
-  # Bulk with genes 50-150 (only 50 overlap)
-  bulk <- matrix(rpois(100 * 2, lambda = 100), nrow = 100, ncol = 2)
-  rownames(bulk) <- paste0("Gene", 50:149)
-  colnames(bulk) <- paste0("Sample", 1:2)
-  
-  # Should work with overlapping genes
-  results <- deconvolve(bulk = bulk, reference = reference, n_cores = 1)
-  
-  expect_equal(nrow(results$proportions), 2)
-  expect_equal(ncol(results$proportions), 3)
-})
-
-test_that("deconvolve errors with no common genes", {
-  
-  reference <- matrix(1:9, nrow = 3, ncol = 3)
-  rownames(reference) <- c("GeneA", "GeneB", "GeneC")
-  colnames(reference) <- paste0("CellType", 1:3)
-  
-  bulk <- matrix(1:6, nrow = 3, ncol = 2)
-  rownames(bulk) <- c("GeneX", "GeneY", "GeneZ")
-  colnames(bulk) <- paste0("Sample", 1:2)
-  
-  expect_error(
-    deconvolve(bulk = bulk, reference = reference),
-    "No common genes"
-  )
-})
diff --git a/DeOPUS/vignettes/getting_started.Rmd b/DeOPUS/vignettes/getting_started.Rmd
deleted file mode 100644
index efb3c60..0000000
--- a/DeOPUS/vignettes/getting_started.Rmd
+++ /dev/null
@@ -1,171 +0,0 @@
----
-title: "Getting Started with DeOPUS"
-author: "Author Name"
-date: "`r Sys.Date()`"
-output: rmarkdown::html_vignette
-vignette: >
-  %\VignetteIndexEntry{Getting Started with DeOPUS}
-  %\VignetteEngine{knitr::rmarkdown}
-  %\VignetteEncoding{UTF-8}
----
-
-```{r setup, include = FALSE}
-knitr::opts_chunk$set(
-  collapse = TRUE,
-  comment = "#>",
-  fig.width = 7,
-  fig.height = 5
-)
-```
-
-## Introduction
-DeOPUS (Deconvolution via Optimized Power-transformed Unmixing with Shrinkage) is a reference-based cellular deconvolution method that estimates cell-type proportions from bulk RNA sequencing data.
-
-## Installation
-
-Install DeOPUS from GitHub:
-
-```{r install, eval=FALSE}
-devtools::install_github("yourusername/DeOPUS")
-```
-
-Load the package:
-
-```{r load}
-library(DeOPUS)
-```
-
-## Quick Example
-
-### Prepare Input Data
-
-DeOPUS requires two inputs:
-
-1. **Reference matrix**: Average expression profiles for each cell type (genes × cell types)
-2. **Bulk matrix**: Bulk RNA-seq expression data (genes × samples)
-
-```{r data, eval=FALSE}
-# Example: Load your data
-# reference <- readRDS("path/to/reference.rds")
-# bulk <- readRDS("path/to/bulk.rds")
-
-# Create example data
-set.seed(42)
-n_genes <- 1000
-n_cell_types <- 5
-n_samples <- 10
-
-# Simulated reference (genes x cell types)
-reference <- matrix(
-  rpois(n_genes * n_cell_types, lambda = 100),
-  nrow = n_genes,
-  ncol = n_cell_types
-)
-rownames(reference) <- paste0("Gene", 1:n_genes)
-colnames(reference) <- paste0("CellType", 1:n_cell_types)
-
-# True proportions
-true_props <- matrix(
-  runif(n_samples * n_cell_types),
-  nrow = n_samples,
-  ncol = n_cell_types
-)
-true_props <- true_props / rowSums(true_props)
-
-# Simulated bulk (genes x samples) = reference %*% t(proportions) + noise
-bulk <- reference %*% t(true_props) + matrix(rnorm(n_genes * n_samples, sd = 10), nrow = n_genes)
-bulk[bulk < 0] <- 0
-colnames(bulk) <- paste0("Sample", 1:n_samples)
-```
-
-### Run Deconvolution
-
-```{r deconvolve, eval=FALSE}
-# Run DeOPUS
-results <- deconvolve(
-  bulk = bulk,
-  reference = reference,
-  alpha = 0.01,
-  n_cores = 4,
-  verbose = TRUE
-)
-
-# View estimated proportions
-print(results$proportions)
-```
-
-### Evaluate Results
-
-```{r evaluate, eval=FALSE}
-# Compare with true proportions
-estimated <- results$proportions
-
-# Calculate correlation
-cor_values <- sapply(1:n_samples, function(i) {
-  cor(estimated[i, ], true_props[i, ], method = "pearson")
-})
-
-cat("Mean Pearson correlation:", mean(cor_values), "\n")
-```
-
-## Method Details
-
-DeOPUS applies a multi-level adaptive transformation:
-
-### 1. Hierarchical Shrinkage
-
-The transformation incorporates both local and global shrinkage:
-
-- **Local shrinkage** ($\tau$): Attenuates the influence of individual high-variance genes
-- **Global shrinkage** ($\phi$): Provides overall regularization across the expression profile
-
-### 2. Power Transformation
-
-A variance-stabilizing power transformation handles the high dynamic range:
-
-$$z = \frac{(q/s + \kappa)^{\mu \omega g} - \kappa^\mu}{\mu g}$$
-
-where $q$ is the expression value, $s$ is a scale factor, and $g$ is the global shrinkage weight.
-
-### 3. Quantile Normalization
-
-Spline-based quantile normalization ensures robust comparison between profiles:
-
-```{r qnorm, eval=FALSE}
-ranks <- rank(z) / length(z)
-quantile_norm <- qnorm(pmax(0.001, pmin(0.999, ranks)))
-```
-
-### 4. Optimization
-
-DeOPUS uses L-BFGS-B optimization with box constraints (proportions ≥ 0) to minimize the loss between transformed bulk and reconstructed expression.
-
-## Parameters
-
-| Parameter | Default | Description |
-|-----------|---------|-------------|
-| `alpha` | 0.01 | Regularization parameter |
-| `power` | 2 | Loss function power (1=MAE, 2=MSE) |
-| `n_cores` | 1 | Parallel processing cores |
-| `maxit` | 100 | Maximum iterations |
-
-## Benchmarking
-
-To reproduce the benchmark results from our paper:
-
-```{r benchmark, eval=FALSE}
-# Run benchmark on real datasets
-source("scripts/benchmark/run_benchmark_real.R")
-run_benchmark()
-
-# Generate visualization
-source("scripts/analysis/visualize_results.R")
-results <- aggregate_results()
-dashboard <- create_summary_barplot_dashboard(results)
-```
-
-## Session Info
-
-```{r sessioninfo}
-sessionInfo()
-```
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..4745632
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,2 @@
+YEAR: 2025
+COPYRIGHT HOLDER: Ha Nguyen
diff --git a/DeOPUS/LICENSE b/LICENSE.md
similarity index 95%
rename from DeOPUS/LICENSE
rename to LICENSE.md
index bb2eac1..288c3bb 100644
--- a/DeOPUS/LICENSE
+++ b/LICENSE.md
@@ -1,6 +1,6 @@
-MIT License
+# MIT License
 
-Copyright (c) 2025 [Author Name]
+Copyright (c) 2025 Ha Nguyen
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..563b10b
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1,7 @@
+# Generated by roxygen2: do not edit by hand
+
+export(deconvolve)
+importFrom(Matrix,sparseMatrix)
+importFrom(parallel,mclapply)
+importFrom(stats,optim)
+importFrom(stats,qnorm)
diff --git a/DeOPUS/NEWS.md b/NEWS.md
similarity index 81%
rename from DeOPUS/NEWS.md
rename to NEWS.md
index 0a6030e..4932a7c 100644
--- a/DeOPUS/NEWS.md
+++ b/NEWS.md
@@ -5,9 +5,11 @@
 * First public release of DeOPUS (Deconvolution via Optimized Power-transformed Unmixing with Shrinkage)
 * Core deconvolution algorithm with hierarchical shrinkage transformation
 * Parallel processing support via `parallel::mclapply`
-* Comprehensive benchmarking scripts for real and simulated datasets
+* Built-in sample benchmark dataset (`sampleData`) for demonstration and testing
+* Comprehensive benchmarking scripts in `inst/scripts/`
 * Visualization dashboard for benchmark results
 * Full documentation and vignettes
+* GitHub Actions for CRAN-style checks on Linux/macOS/Windows and automated releases on version tags
 
 ## Key Features
 
diff --git a/R/data.R b/R/data.R
new file mode 100644
index 0000000..daa1011
--- /dev/null
+++ b/R/data.R
@@ -0,0 +1,66 @@
+#' Sample bulk RNA-seq deconvolution benchmark dataset
+#'
+#' A simulated benchmark dataset with known ground-truth cell-type proportions
+#' for evaluating and demonstrating the DeOPUS deconvolution method.
+#'
+#' @format A list with the following components:
+#' \describe{
+#'   \item{bulk}{Numeric matrix (11852 genes x 512 samples) of simulated bulk
+#'     RNA-seq expression values in linear scale. Row names are Ensembl gene IDs
+#'     (e.g. ENSG00000000419); column names are Mixture1 through Mixture512.}
+#'   \item{bulkRatio}{Numeric matrix (2 cell types x 512 samples) containing
+#'     the ground-truth cell-type proportions used to generate \code{bulk}.
+#'     Row names are \code{"mature NK T cell"} and \code{"mesenchymal stem cell"}.
+#'     Note: row order may differ from the column order of \code{cellTypeExpr},
+#'     so align by name before comparing predictions to ground truth.}
+#'   \item{cellTypeExpr}{Numeric matrix (11852 genes x 2 cell types) of reference
+#'     expression profiles derived from single-cell RNA-seq data.
+#'     Column names match (but may be in a different order from) the row names of
+#'     \code{bulkRatio}.}
+#'   \item{signature}{Numeric matrix (974 signature genes x 2 cell types) of
+#'     the most discriminative marker genes for each cell type.}
+#'   \item{nCellTypes}{Integer. Number of cell types (2).}
+#'   \item{markers}{Named list of marker gene vectors, one entry per cell type.}
+#'   \item{sigGenes}{Character vector of signature gene names.}
+#'   \item{singleCellExpr}{Sparse matrix (dgCMatrix) of the single-cell RNA-seq
+#'     expression data used to derive the reference profiles.}
+#'   \item{singleCellLabels}{Character vector of cell-type labels for each cell
+#'     in \code{singleCellExpr}.}
+#'   \item{singleCellSubjects}{Character vector of donor/subject IDs for each
+#'     cell in \code{singleCellExpr}.}
+#' }
+#'
+#' @usage data(sampleData)
+#'
+#' @examples
+#' data(sampleData)
+#'
+#' # Inspect the dataset
+#' dim(sampleData$bulk)          # 11852 x 512
+#' dim(sampleData$cellTypeExpr)  # 11852 x 2
+#'
+#' # Run deconvolution on a small subset for speed
+#' set.seed(42)
+#' idx <- sample(ncol(sampleData$bulk), 10)
+#' results <- deconvolve(
+#'   bulk      = sampleData$bulk[, idx],
+#'   reference = sampleData$cellTypeExpr,
+#'   n_cores   = 1,
+#'   maxit     = 50
+#' )
+#'
+#' # Align bulkRatio cell-type ORDER to match results$proportions columns
+#' # (bulkRatio rows and cellTypeExpr columns may be in a different order).
+#' ct <- colnames(results$proportions)
+#' true_props <- t(sampleData$bulkRatio[ct, idx, drop = FALSE])
+#'
+#' # Per-sample Pearson correlation with ground truth
+#' cor_values <- sapply(seq_len(nrow(results$proportions)), function(i) {
+#'   cor(results$proportions[i, ], true_props[i, ], method = "pearson")
+#' })
+#' mean(cor_values, na.rm = TRUE)
+#'
+#' @source
+#' Simulated using negative-binomial expression models with known mixing
+#' proportions. See \code{data-raw/prepare_data.R} for the generation script.
+"sampleData"
diff --git a/deconvolve.R b/R/deconvolve.R
similarity index 89%
rename from deconvolve.R
rename to R/deconvolve.R
index a2eb369..aa69a34 100644
--- a/deconvolve.R
+++ b/R/deconvolve.R
@@ -28,22 +28,24 @@
 #' }
 #'
 #' @examples
-#' \dontrun{
-#' # Load example data
-#' data(example_bulk)
-#' data(example_reference)
+#' # Load the built-in sample benchmark dataset
+#' data(sampleData)
 #'
-#' # Run deconvolution
+#' # Run deconvolution on a small subset for speed
+#' set.seed(42)
+#' idx <- sample(ncol(sampleData$bulk), 10)
 #' results <- deconvolve(
-#'   bulk = example_bulk,
-#'   reference = example_reference,
-#'   n_cores = 4
+#'   bulk      = sampleData$bulk[, idx],
+#'   reference = sampleData$cellTypeExpr,
+#'   n_cores   = 1,
+#'   maxit     = 50
 #' )
 #'
-#' # View proportions
+#' # View estimated proportions (samples x cell types)
 #' head(results$proportions)
-#' }
 #'
+#' @importFrom parallel mclapply
+#' @importFrom stats optim qnorm
 #' @export
 deconvolve <- function(bulk,
                        reference,
@@ -53,7 +55,6 @@ deconvolve <- function(bulk,
                        maxit = 100,
                        verbose = FALSE) {
 
-
   # Input validation
   if (!is.matrix(bulk)) {
     bulk <- as.matrix(bulk)
@@ -63,8 +64,7 @@ deconvolve <- function(bulk,
   }
 
   # Check for common genes
-
-common_genes <- intersect(rownames(bulk), rownames(reference))
+  common_genes <- intersect(rownames(bulk), rownames(reference))
   if (length(common_genes) == 0) {
     stop("No common genes found between bulk and reference matrices.")
   }
@@ -110,7 +110,7 @@ common_genes <- intersect(rownames(bulk), rownames(reference))
 
     # Apply spline-based normalization
     ranks <- rank(z) / length(z)
-    quantile_norm <- qnorm(pmax(0.001, pmin(0.999, ranks)))
+    quantile_norm <- stats::qnorm(pmax(0.001, pmin(0.999, ranks)))
 
     # Enhanced blending transformation with multi-level shrinkage
     signal_weight <- pmin(1, q / (10 + alpha * q)) * (1 - local_shrink)
@@ -131,14 +131,9 @@ common_genes <- intersect(rownames(bulk), rownames(reference))
 
   # Define loss function
   loss_function <- function(p, b, reference, alpha, power, transform_func) {
-    # Apply transformation to bulk
     b_transformed <- transform_func(b, alpha)
-
-    # Compute fitted values
     fitted <- as.vector(reference %*% p)
     fitted_transformed <- transform_func(fitted, alpha)
-
-    # Calculate loss
     return(sum(abs(b_transformed - fitted_transformed)^power))
   }
 
@@ -148,8 +143,8 @@ common_genes <- intersect(rownames(bulk), rownames(reference))
   }
 
   results <- parallel::mclapply(seq_len(n_samples), mc.cores = n_cores, mc.preschedule = FALSE, function(i) {
-    result <- tryCatch({
-      optim_result <- optim(
+    tryCatch({
+      optim_result <- stats::optim(
         par = rep(1 / n_cell_types, n_cell_types),
         fn = loss_function,
         b = bulk[, i],
@@ -162,7 +157,6 @@ common_genes <- intersect(rownames(bulk), rownames(reference))
         upper = 100,
         control = list(maxit = maxit)
       )
-
       list(
         proportions = optim_result$par,
         convergence = optim_result$convergence,
@@ -176,16 +170,17 @@ common_genes <- intersect(rownames(bulk), rownames(reference))
         error = conditionMessage(e)
       )
     })
-
-    return(result)
   })
 
   # Extract proportions matrix
   proportions <- do.call(rbind, lapply(results, function(x) x$proportions))
 
+  # Clamp: L-BFGS-B can return tiny negatives (~-1e-16) on some platforms
+  proportions <- pmax(proportions, 0)
+
   # Normalize to sum to 1
   row_sums <- rowSums(proportions)
-  row_sums[row_sums == 0] <- 1  # Avoid division by zero
+  row_sums[row_sums == 0] <- 1
   proportions <- proportions / row_sums
 
   # Handle NAs
diff --git a/R/package.R b/R/package.R
new file mode 100644
index 0000000..30b4319
--- /dev/null
+++ b/R/package.R
@@ -0,0 +1,8 @@
+#' DeOPUS: Deconvolution via Optimized Power-transformed Unmixing with Shrinkage
+#'
+#' A reference-based cellular deconvolution method that employs hierarchical
+#' shrinkage transformation to robustly estimate cell-type proportions from
+#' bulk RNA sequencing data.
+#'
+#' @importFrom Matrix sparseMatrix
+"_PACKAGE"
diff --git a/README.md b/README.md
index f3b4709..6104e30 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,16 @@
 # DeOPUS: Deconvolution via Optimized Power-transformed Unmixing with Shrinkage
 
-[![R](https://img.shields.io/badge/R-%3E%3D4.0-blue.svg)](https://www.r-project.org/)
+[![R-CMD-check](https://github.com/tinnlab/DeOPUS/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/tinnlab/DeOPUS/actions/workflows/R-CMD-check.yaml)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 
-DeOPUS is a reference-based cellular deconvolution method that employs hierarchical shrinkage transformation to robustly estimate cell-type proportions from bulk RNA sequencing data.
+DeOPUS is a reference-based cellular deconvolution method that employs hierarchical
+shrinkage transformation to robustly estimate cell-type proportions from bulk RNA-seq data.
 
 ## Overview
 
-Single-cell RNA sequencing enables comprehensive transcriptomic profiling at single-cell resolution, but high costs limit its widespread application. DeOPUS offers a cost-effective alternative by computationally estimating cell-type proportions from bulk RNA-seq data.
+Single-cell RNA sequencing enables comprehensive transcriptomic profiling at single-cell
+resolution, but high costs limit its widespread application. DeOPUS offers a cost-effective
+alternative by computationally estimating cell-type proportions from bulk RNA-seq data.
 
 **Key features:**
 - Hierarchical shrinkage transformation with local and global priors
@@ -27,40 +30,74 @@ install.packages("devtools")
 devtools::install_github("tinnlab/DeOPUS")
 ```
 
-### Dependencies
+### Optional dependencies (only for the benchmark / figure scripts)
+
+The core `deconvolve()` function needs nothing beyond what `install_github`
+installs. The reproducibility scripts in `inst/scripts/` have additional needs:
 
 ```r
-install.packages(c("Matrix", "dplyr", "parallel"))
+# Benchmark runner
+install.packages(c("RhpcBLASctl", "dplyr"))
+
+# Figure generation
+install.packages(c("tidyverse", "scales", "cowplot", "gridExtra"))
+
+# Bioconductor (used by visualize_results.R)
+if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
+BiocManager::install(c("ComplexHeatmap", "circlize"))
+
+# OPTIONAL — only for competing methods (MuSiC / AutoGeneS / CIBERSORT /
+# FARDEEP / scaden / AdRoit). Requires Docker installed and running.
+# install.packages("DeconBenchmark")
 ```
 
 ## Quick Start
 
+The package ships with a built-in simulated benchmark dataset (`sampleData`):
+11,852 genes × 512 samples with known ground-truth proportions for 2 cell types.
+
 ```r
 library(DeOPUS)
 
-# Load your data
-# cellTypeExpr: reference expression matrix (genes x cell types)
-# bulk: bulk expression matrix (genes x samples)
+# Load the built-in sample benchmark dataset
+data(sampleData)
+
+# Run deconvolution on a small subset for speed
+set.seed(42)
+idx <- sample(ncol(sampleData$bulk), 10)
 
-# Run deconvolution
 results <- deconvolve(
-  bulk = bulk_matrix,
-  reference = reference_matrix,
-  alpha = 0.01,
-  n_cores = 4
+  bulk      = sampleData$bulk[, idx],
+  reference = sampleData$cellTypeExpr,
+  alpha     = 0.01,
+  n_cores   = 1,
+  verbose   = TRUE
 )
 
-# View estimated proportions
+# View estimated proportions (samples x cell types)
 head(results$proportions)
+
+# Evaluate against ground truth
+# IMPORTANT: bulkRatio rows and cellTypeExpr columns may be in different orders,
+# so align by name before computing correlations.
+ct <- colnames(results$proportions)
+true_props <- t(sampleData$bulkRatio[ct, idx, drop = FALSE])  # 10 x 2 aligned
+
+cor_values <- sapply(seq_len(nrow(results$proportions)), function(i) {
+  cor(results$proportions[i, ], true_props[i, ], method = "pearson")
+})
+cat("Mean Pearson correlation:", round(mean(cor_values, na.rm = TRUE), 3), "\n")
 ```
 
 ## Input Data Format
 
 DeOPUS requires two main inputs:
 
-1. **Reference expression matrix** (`reference`): A genes × cell types matrix containing average expression profiles for each cell type, typically derived from scRNA-seq data.
-
-2. **Bulk expression matrix** (`bulk`): A genes × samples matrix containing bulk RNA-seq expression data to be deconvolved.
+1. **Reference expression matrix** (`reference`): A genes × cell types matrix
+   containing average expression profiles for each cell type, typically derived
+   from scRNA-seq data.
+2. **Bulk expression matrix** (`bulk`): A genes × samples matrix containing bulk
+   RNA-seq expression data to be deconvolved.
 
 Both matrices should:
 - Have matching gene identifiers (rownames)
@@ -76,7 +113,8 @@ DeOPUS applies a multi-level adaptive transformation:
 3. **Power transformation**: Stabilizes variance across the dynamic range
 4. **Quantile normalization**: Ensures robust comparison between predicted and observed profiles
 
-The optimization minimizes the weighted loss between transformed bulk and reconstructed expression profiles using L-BFGS-B with box constraints.
+The optimization minimizes the weighted loss between transformed bulk and reconstructed
+expression profiles using L-BFGS-B with box constraints.
 
 ## Benchmarking
 
@@ -90,57 +128,94 @@ We benchmarked DeOPUS against six state-of-the-art methods:
 
 ### Running Benchmarks
 
-```r
-# Run benchmark on simulated data
-source("scripts/benchmark/run_benchmark_simulated.R")
+Benchmark and visualization scripts ship in `inst/scripts/` and are accessible
+after install via `system.file()`. The benchmark expects a directory of `.rds`
+input files where each file is a list matching the schema of `data(sampleData)`
+(`$bulk`, `$cellTypeExpr`, `$bulkRatio`, etc.).
+
+To benchmark **DeOPUS** on the bundled `sampleData` end-to-end:
 
-# Run benchmark on real data
-source("scripts/benchmark/run_benchmark_real.R")
+```r
+library(DeOPUS)
+source(system.file("scripts/benchmark/run_benchmark_real.R", package = "DeOPUS"))
+
+# Prepare an input dir with at least one dataset .rds file
+input_dir  <- file.path(tempdir(), "decopus_inputs")
+output_dir <- file.path(tempdir(), "decopus_results")
+dir.create(input_dir,  recursive = TRUE, showWarnings = FALSE)
+dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
+
+# Use the bundled sampleData as the benchmark input
+data(sampleData)
+saveRDS(sampleData, file.path(input_dir, "sampleData.rds"))
+
+# Run benchmark — restrict to DeOPUS unless you have DeconBenchmark + Docker
+run_benchmark(
+  data_dir   = input_dir,
+  output_dir = output_dir,
+  methods    = "DeOPUS"
+)
 
-# Generate visualization dashboard
-source("scripts/analysis/visualize_results.R")
-results <- create_summary_barplot_dashboard(benchmark_data)
+# Aggregate per-sample, per-cell-type results into a long-format dataframe
+benchmark_data <- aggregate_results(output_dir = output_dir, methods = "DeOPUS")
+head(benchmark_data)
 ```
 
+To include MuSiC, AutoGeneS, CIBERSORT, FARDEEP, scaden, AdRoit, install the
+optional `DeconBenchmark` package (requires Docker) and pass them in
+`methods = c("DeOPUS", "MuSiC", ...)`.
+
 ## Reproducing Paper Results
 
-To reproduce the results from our paper:
+The benchmark scripts expect a directory of `.rds` datasets (one per real
+benchmark dataset, matching the schema of `data(sampleData)`) at
+`data/real_datasets/`, and write results to `results/real_benchmark/` and
+figures to `figures/`.
 
 ```bash
-# Clone the repository
 git clone https://github.com/tinnlab/DeOPUS.git
 cd DeOPUS
 
-# Run the complete benchmark pipeline
-Rscript scripts/benchmark/run_all_benchmarks.R
+# Place benchmark datasets here (one .rds per dataset)
+mkdir -p data/real_datasets
+# cp /path/to/your/*.rds data/real_datasets/
+
+# Run benchmark pipeline (defaults: data/real_datasets/ → results/real_benchmark/)
+Rscript inst/scripts/benchmark/run_benchmark_real.R
 
-# Generate figures
-Rscript scripts/analysis/generate_figures.R
+# Generate figures (written to figures/)
+Rscript inst/scripts/analysis/generate_figures.R
 ```
 
 ### Data Availability
 
-Benchmark datasets are available at https://doi.org/10.5281/zenodo.19050845 or can be generated using:
+Benchmark datasets are available at
+https://doi.org/10.5281/zenodo.19050845. The simulation generator and a
+template for preparing real GEO datasets are in
+`inst/scripts/data/prepare_data.R` — source the file, then call
+`create_simulated_dataset()` or adapt `prepare_geo_dataset()` to your data:
 
 ```r
-source("scripts/data/prepare_benchmark_data.R")
+source(system.file("scripts/data/prepare_data.R", package = "DeOPUS"))
+# sampleData_like <- create_simulated_dataset(n_samples = 100, n_cell_types = 5)
 ```
 
 ## Output
 
-DeOPUS returns a list containing:
+`deconvolve()` returns a list containing:
 
-- `proportions`: Matrix of estimated cell-type proportions (samples × cell types)
-- `convergence`: Optimization convergence information for each sample
+- `proportions`: Matrix of estimated cell-type proportions (samples × cell types). Each row sums to 1.
+- `convergence`: Named list of per-sample optimization convergence information (one entry per sample, holding `convergence`, `value`, and optionally an `error` message).
 
 ## Parameters
 
 | Parameter | Default | Description |
 |-----------|---------|-------------|
-| `alpha` | 0.01 | Regularization parameter for transformation |
-| `n_cores` | 1 | Number of parallel cores |
-| `maxit` | 100 | Maximum optimization iterations |
-| `power` | 2 | Loss function power (1=MAE, 2=MSE) |
+| `alpha`   | 0.01    | Regularization parameter for transformation |
+| `power`   | 2       | Loss function power (1 = MAE, 2 = MSE) |
+| `n_cores` | 1       | Number of parallel cores |
+| `maxit`   | 100     | Maximum optimization iterations |
+| `verbose` | FALSE   | Print progress messages |
 
 ## Citation
 
@@ -148,21 +223,12 @@ If you use DeOPUS in your research, please cite:
 
 ```bibtex
 @article{DeOPUS2025,
-  title={DeOPUS: Deconvolution via Optimized Power-transformed Unmixing with Shrinkage},
-  author={},
-  journal={},
-  year={2025},
-  doi={}
+  title  = {DeOPUS: Deconvolution via Optimized Power-transformed Unmixing with Shrinkage},
+  author = {Ha Nguyen},
+  year   = {2025}
 }
 ```
 
-[//]: # (## License)
-
-[//]: # ()
-[//]: # (This project is licensed under the MIT License - see the [LICENSE]&#40;LICENSE&#41; file for details.)
-
-[//]: # ()
-[//]: # (## Contact)
+## License
 
-[//]: # ()
-[//]: # (For questions or issues, please open an issue on GitHub or contact [email].)
+MIT © Ha Nguyen — see [LICENSE.md](LICENSE.md) for full text.
diff --git a/data-raw/prepare_data.R b/data-raw/prepare_data.R
new file mode 100644
index 0000000..514f30a
--- /dev/null
+++ b/data-raw/prepare_data.R
@@ -0,0 +1,17 @@
+## Script that generated data/sampleData.rda
+##
+## Run this script from the package root to regenerate the package dataset
+## from the original source file.
+
+# ── sampleData ────────────────────────────────────────────────────────────────
+#
+# The source file was a pre-generated simulation containing:
+#   11852 genes x 512 simulated bulk RNA-seq samples
+#   2 cell types (mature NK T cell, mesenchymal stem cell)
+#   Matching ground-truth proportions (bulkRatio)
+
+# Load original source (adjust path as needed)
+sampleData <- readRDS("path/to/source_simulation.rds")
+
+# Save as compressed .rda for use as package data
+usethis::use_data(sampleData, overwrite = TRUE, compress = "xz")
diff --git a/data/sampleData.rda b/data/sampleData.rda
new file mode 100644
index 0000000..52350bb
Binary files /dev/null and b/data/sampleData.rda differ
diff --git a/environment.yml b/environment.yml
new file mode 100644
index 0000000..bcee21d
--- /dev/null
+++ b/environment.yml
@@ -0,0 +1,13 @@
+name: decopus
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - r-base>=4.3
+  - r-matrix
+  - r-devtools
+  - r-testthat
+  - r-roxygen2
+  - r-knitr
+  - r-rmarkdown
+  - r-ggplot2
diff --git a/DeOPUS/scripts/analysis/generate_figures.R b/inst/scripts/analysis/generate_figures.R
similarity index 95%
rename from DeOPUS/scripts/analysis/generate_figures.R
rename to inst/scripts/analysis/generate_figures.R
index 932fecf..d9fc469 100644
--- a/DeOPUS/scripts/analysis/generate_figures.R
+++ b/inst/scripts/analysis/generate_figures.R
@@ -7,8 +7,13 @@ library(tidyverse)
 library(scales)
 library(cowplot)
 
-# Source visualization functions
-source("scripts/analysis/visualize_results.R")
+# Source visualization functions. Prefer the installed-package location
+# (system.file resolves after install_github / R CMD INSTALL); fall back to
+# the in-repo path when running from a clone.
+.viz_path <- system.file("scripts/analysis/visualize_results.R",
+                         package = "DeOPUS")
+if (!nzchar(.viz_path)) .viz_path <- "inst/scripts/analysis/visualize_results.R"
+source(.viz_path)
 
 ################################################################################
 # Configuration
diff --git a/DeOPUS/scripts/analysis/visualize_results.R b/inst/scripts/analysis/visualize_results.R
similarity index 100%
rename from DeOPUS/scripts/analysis/visualize_results.R
rename to inst/scripts/analysis/visualize_results.R
diff --git a/DeOPUS/scripts/benchmark/run_benchmark_real.R b/inst/scripts/benchmark/run_benchmark_real.R
similarity index 100%
rename from DeOPUS/scripts/benchmark/run_benchmark_real.R
rename to inst/scripts/benchmark/run_benchmark_real.R
diff --git a/DeOPUS/scripts/data/prepare_data.R b/inst/scripts/data/prepare_data.R
similarity index 100%
rename from DeOPUS/scripts/data/prepare_data.R
rename to inst/scripts/data/prepare_data.R
diff --git a/man/DeOPUS-package.Rd b/man/DeOPUS-package.Rd
new file mode 100644
index 0000000..7a5de89
--- /dev/null
+++ b/man/DeOPUS-package.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/package.R
+\docType{package}
+\name{DeOPUS-package}
+\alias{DeOPUS}
+\alias{DeOPUS-package}
+\title{DeOPUS: Deconvolution via Optimized Power-transformed Unmixing with Shrinkage}
+\description{
+A reference-based cellular deconvolution method that employs hierarchical
+shrinkage transformation to robustly estimate cell-type proportions from
+bulk RNA sequencing data.
+}
+\seealso{
+Useful links:
+\itemize{
+  \item \url{https://github.com/tinnlab/DeOPUS}
+  \item Report bugs at \url{https://github.com/tinnlab/DeOPUS/issues}
+}
+
+}
+\author{
+\strong{Maintainer}: Ha Nguyen \email{hvn0006@wayne.edu}
+
+}
diff --git a/man/deconvolve.Rd b/man/deconvolve.Rd
new file mode 100644
index 0000000..261005e
--- /dev/null
+++ b/man/deconvolve.Rd
@@ -0,0 +1,69 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/deconvolve.R
+\name{deconvolve}
+\alias{deconvolve}
+\title{Deconvolve bulk RNA-seq data using DeOPUS}
+\usage{
+deconvolve(
+  bulk,
+  reference,
+  alpha = 0.01,
+  power = 2,
+  n_cores = 1,
+  maxit = 100,
+  verbose = FALSE
+)
+}
+\arguments{
+\item{bulk}{A numeric matrix of bulk expression data (genes x samples).}
+
+\item{reference}{A numeric matrix of reference expression profiles (genes x cell types).}
+
+\item{alpha}{Numeric. Regularization parameter for transformation. Default is 0.01.}
+
+\item{power}{Numeric. Power for loss function (1 = MAE, 2 = MSE). Default is 2.}
+
+\item{n_cores}{Integer. Number of cores for parallel processing. Default is 1.}
+
+\item{maxit}{Integer. Maximum number of optimization iterations. Default is 100.}
+
+\item{verbose}{Logical. Whether to print progress messages. Default is FALSE.}
+}
+\value{
+A list containing:
+\itemize{
+  \item \code{proportions}: Matrix of estimated cell-type proportions (samples x cell types)
+  \item \code{convergence}: List of convergence information for each sample
+}
+}
+\description{
+Estimates cell-type proportions from bulk RNA-seq data using a reference-based
+approach with hierarchical shrinkage transformation.
+}
+\details{
+DeOPUS applies a multi-level adaptive transformation combining:
+\itemize{
+  \item Local shrinkage: Attenuates influence of high-variance genes
+  \item Global shrinkage: Provides overall regularization
+  \item Power transformation: Stabilizes variance
+  \item Quantile normalization: Ensures robust profile comparison
+}
+}
+\examples{
+# Load the built-in sample benchmark dataset
+data(sampleData)
+
+# Run deconvolution on a small subset for speed
+set.seed(42)
+idx <- sample(ncol(sampleData$bulk), 10)
+results <- deconvolve(
+  bulk      = sampleData$bulk[, idx],
+  reference = sampleData$cellTypeExpr,
+  n_cores   = 1,
+  maxit     = 50
+)
+
+# View estimated proportions (samples x cell types)
+head(results$proportions)
+
+}
diff --git a/man/sampleData.Rd b/man/sampleData.Rd
new file mode 100644
index 0000000..81ed08f
--- /dev/null
+++ b/man/sampleData.Rd
@@ -0,0 +1,75 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{sampleData}
+\alias{sampleData}
+\title{Sample bulk RNA-seq deconvolution benchmark dataset}
+\format{
+A list with the following components:
+\describe{
+  \item{bulk}{Numeric matrix (11852 genes x 512 samples) of simulated bulk
+    RNA-seq expression values in linear scale. Row names are Ensembl gene IDs
+    (e.g. ENSG00000000419); column names are Mixture1 through Mixture512.}
+  \item{bulkRatio}{Numeric matrix (2 cell types x 512 samples) containing
+    the ground-truth cell-type proportions used to generate \code{bulk}.
+    Row names are \code{"mature NK T cell"} and \code{"mesenchymal stem cell"}.
+    Note: row order may differ from the column order of \code{cellTypeExpr},
+    so align by name before comparing predictions to ground truth.}
+  \item{cellTypeExpr}{Numeric matrix (11852 genes x 2 cell types) of reference
+    expression profiles derived from single-cell RNA-seq data.
+    Column names match (but may be in a different order from) the row names of
+    \code{bulkRatio}.}
+  \item{signature}{Numeric matrix (974 signature genes x 2 cell types) of
+    the most discriminative marker genes for each cell type.}
+  \item{nCellTypes}{Integer. Number of cell types (2).}
+  \item{markers}{Named list of marker gene vectors, one entry per cell type.}
+  \item{sigGenes}{Character vector of signature gene names.}
+  \item{singleCellExpr}{Sparse matrix (dgCMatrix) of the single-cell RNA-seq
+    expression data used to derive the reference profiles.}
+  \item{singleCellLabels}{Character vector of cell-type labels for each cell
+    in \code{singleCellExpr}.}
+  \item{singleCellSubjects}{Character vector of donor/subject IDs for each
+    cell in \code{singleCellExpr}.}
+}
+}
+\source{
+Simulated using negative-binomial expression models with known mixing
+proportions. See \code{data-raw/prepare_data.R} for the generation script.
+}
+\usage{
+data(sampleData)
+}
+\description{
+A simulated benchmark dataset with known ground-truth cell-type proportions
+for evaluating and demonstrating the DeOPUS deconvolution method.
+}
+\examples{
+data(sampleData)
+
+# Inspect the dataset
+dim(sampleData$bulk)          # 11852 x 512
+dim(sampleData$cellTypeExpr)  # 11852 x 2
+
+# Run deconvolution on a small subset for speed
+set.seed(42)
+idx <- sample(ncol(sampleData$bulk), 10)
+results <- deconvolve(
+  bulk      = sampleData$bulk[, idx],
+  reference = sampleData$cellTypeExpr,
+  n_cores   = 1,
+  maxit     = 50
+)
+
+# Align bulkRatio cell-type ORDER to match results$proportions columns
+# (bulkRatio rows and cellTypeExpr columns may be in a different order).
+ct <- colnames(results$proportions)
+true_props <- t(sampleData$bulkRatio[ct, idx, drop = FALSE])
+
+# Per-sample Pearson correlation with ground truth
+cor_values <- sapply(seq_len(nrow(results$proportions)), function(i) {
+  cor(results$proportions[i, ], true_props[i, ], method = "pearson")
+})
+mean(cor_values, na.rm = TRUE)
+
+}
+\keyword{datasets}
diff --git a/DeOPUS/tests/testthat.R b/tests/testthat.R
similarity index 100%
rename from DeOPUS/tests/testthat.R
rename to tests/testthat.R
diff --git a/tests/testthat/test-data.R b/tests/testthat/test-data.R
new file mode 100644
index 0000000..75f419b
--- /dev/null
+++ b/tests/testthat/test-data.R
@@ -0,0 +1,110 @@
+library(DeOPUS)
+
+# ── sampleData structure ──────────────────────────────────────────────────────
+
+test_that("sampleData loads and is a list", {
+  data(sampleData)
+  expect_true(is.list(sampleData))
+})
+
+test_that("sampleData has required components", {
+  data(sampleData)
+  expect_true(all(c("bulk", "bulkRatio", "cellTypeExpr", "signature",
+                     "singleCellLabels", "singleCellSubjects") %in% names(sampleData)))
+})
+
+test_that("sampleData$bulk is a numeric matrix with named dimensions", {
+  data(sampleData)
+  expect_true(is.matrix(sampleData$bulk))
+  expect_true(is.numeric(sampleData$bulk))
+  expect_false(is.null(rownames(sampleData$bulk)))
+  expect_false(is.null(colnames(sampleData$bulk)))
+})
+
+test_that("sampleData$bulk has expected dimensions (11852 genes x 512 samples)", {
+  data(sampleData)
+  expect_equal(nrow(sampleData$bulk), 11852)
+  expect_equal(ncol(sampleData$bulk), 512)
+})
+
+test_that("sampleData$cellTypeExpr has expected dimensions (11852 genes x 2 cell types)", {
+  data(sampleData)
+  expect_equal(nrow(sampleData$cellTypeExpr), 11852)
+  expect_equal(ncol(sampleData$cellTypeExpr), 2)
+})
+
+test_that("sampleData$bulk and $cellTypeExpr share gene names", {
+  data(sampleData)
+  common <- intersect(rownames(sampleData$bulk), rownames(sampleData$cellTypeExpr))
+  expect_gt(length(common), 0)
+})
+
+test_that("sampleData$bulkRatio proportions are between 0 and 1", {
+  data(sampleData)
+  expect_true(all(sampleData$bulkRatio >= 0))
+  expect_true(all(sampleData$bulkRatio <= 1))
+})
+
+test_that("sampleData$bulkRatio column sums are approximately 1", {
+  data(sampleData)
+  col_sums <- colSums(sampleData$bulkRatio)
+  expect_true(all(abs(col_sums - 1) < 1e-6))
+})
+
+test_that("sampleData bulk values are non-negative", {
+  data(sampleData)
+  expect_true(all(sampleData$bulk >= 0))
+})
+
+test_that("sampleData bulkRatio rows and cellTypeExpr columns refer to the same cell types", {
+  data(sampleData)
+  # Same set of names, possibly in different order. Aligning by name is therefore
+  # always safe (and necessary, since the order does differ in this dataset).
+  expect_setequal(rownames(sampleData$bulkRatio), colnames(sampleData$cellTypeExpr))
+})
+
+# ── Integration test: deconvolve on a subset of sampleData ───────────────────
+#
+# This test verifies the *correlation code logic* — alignment by cell-type name,
+# finite outputs, positive sign. It does NOT assert a specific accuracy
+# threshold, because the deconvolution algorithm may legitimately change in the
+# future. The > 0 assertion exists to catch the misalignment bug where unaligned
+# columns produced cor = -1 instead of +1.
+
+test_that("deconvolve runs on sampleData and produces correctly-aligned correlations", {
+  data(sampleData)
+
+  set.seed(42)
+  idx <- sample(ncol(sampleData$bulk), 10)
+
+  results <- deconvolve(
+    bulk      = sampleData$bulk[, idx],
+    reference = sampleData$cellTypeExpr,
+    n_cores   = 1,
+    maxit     = 50
+  )
+
+  # Output shape
+  expect_equal(nrow(results$proportions), 10)
+  expect_equal(ncol(results$proportions), 2)
+  expect_true(all(results$proportions >= 0))
+  expect_true(all(abs(rowSums(results$proportions) - 1) < 1e-6))
+
+  # Align bulkRatio cell-type ORDER to match results$proportions columns
+  ct <- colnames(results$proportions)
+  true_props <- t(sampleData$bulkRatio[ct, idx, drop = FALSE])
+
+  cor_values <- sapply(seq_len(nrow(results$proportions)), function(i) {
+    cor(results$proportions[i, ], true_props[i, ], method = "pearson")
+  })
+
+  # One correlation per sample
+  expect_length(cor_values, 10)
+
+  # All finite (catches NA/NaN propagation in the correlation code)
+  expect_true(all(is.finite(cor_values)))
+
+  # Sign check: catches the misalignment bug. Naive cor() on reversed cell-type
+  # order would give mean = -1 instead of a positive number.
+  expect_gt(mean(cor_values), 0)
+})
diff --git a/tests/testthat/test-deconvolve.R b/tests/testthat/test-deconvolve.R
new file mode 100644
index 0000000..4c9fad6
--- /dev/null
+++ b/tests/testthat/test-deconvolve.R
@@ -0,0 +1,217 @@
+library(DeOPUS)
+
+# ── Helpers ────────────────────────────────────────────────────────────────────
+
+make_data <- function(n_genes = 100, n_cell_types = 3, n_samples = 5, seed = 42) {
+  set.seed(seed)
+  reference <- matrix(
+    rpois(n_genes * n_cell_types, lambda = 100),
+    nrow = n_genes, ncol = n_cell_types
+  )
+  rownames(reference) <- paste0("Gene", seq_len(n_genes))
+  colnames(reference) <- paste0("CellType", seq_len(n_cell_types))
+
+  true_props <- matrix(
+    runif(n_samples * n_cell_types),
+    nrow = n_samples, ncol = n_cell_types
+  )
+  true_props <- true_props / rowSums(true_props)
+
+  bulk <- reference %*% t(true_props) +
+    matrix(rnorm(n_genes * n_samples, sd = 5), nrow = n_genes)
+  bulk[bulk < 0] <- 0
+  colnames(bulk) <- paste0("Sample", seq_len(n_samples))
+
+  list(bulk = bulk, reference = reference, true_props = true_props)
+}
+
+# ── Core functionality ─────────────────────────────────────────────────────────
+
+test_that("deconvolve returns list with proportions and convergence", {
+  d <- make_data()
+  res <- deconvolve(d$bulk, d$reference, n_cores = 1, maxit = 50)
+
+  expect_true(is.list(res))
+  expect_named(res, c("proportions", "convergence"))
+})
+
+test_that("proportions matrix has correct dimensions", {
+  d <- make_data(n_genes = 100, n_cell_types = 3, n_samples = 5)
+  res <- deconvolve(d$bulk, d$reference, n_cores = 1, maxit = 50)
+
+  expect_equal(nrow(res$proportions), 5)
+  expect_equal(ncol(res$proportions), 3)
+})
+
+test_that("proportions sum to 1 for every sample", {
+  d <- make_data()
+  res <- deconvolve(d$bulk, d$reference, n_cores = 1, maxit = 50)
+
+  row_sums <- rowSums(res$proportions)
+  expect_true(all(abs(row_sums - 1) < 1e-6))
+})
+
+test_that("proportions are non-negative", {
+  d <- make_data()
+  res <- deconvolve(d$bulk, d$reference, n_cores = 1, maxit = 50)
+
+  expect_true(all(res$proportions >= 0))
+})
+
+test_that("output row names match bulk column names", {
+  d <- make_data()
+  res <- deconvolve(d$bulk, d$reference, n_cores = 1, maxit = 50)
+
+  expect_equal(rownames(res$proportions), colnames(d$bulk))
+})
+
+test_that("output column names match reference column names", {
+  d <- make_data()
+  res <- deconvolve(d$bulk, d$reference, n_cores = 1, maxit = 50)
+
+  expect_equal(colnames(res$proportions), colnames(d$reference))
+})
+
+test_that("convergence list has one entry per sample", {
+  d <- make_data(n_samples = 4)
+  res <- deconvolve(d$bulk, d$reference, n_cores = 1, maxit = 50)
+
+  expect_length(res$convergence, 4)
+  expect_equal(names(res$convergence), colnames(d$bulk))
+})
+
+# ── Parameter variants ─────────────────────────────────────────────────────────
+
+test_that("deconvolve works with power = 1 (MAE loss)", {
+  d <- make_data()
+  res <- deconvolve(d$bulk, d$reference, power = 1, n_cores = 1, maxit = 50)
+
+  expect_true(all(res$proportions >= 0))
+  expect_true(all(abs(rowSums(res$proportions) - 1) < 1e-6))
+})
+
+test_that("deconvolve works with maxit = 1 (minimal iterations)", {
+  d <- make_data()
+  expect_no_error(deconvolve(d$bulk, d$reference, n_cores = 1, maxit = 1))
+})
+
+test_that("deconvolve works with custom alpha", {
+  d <- make_data()
+  res <- deconvolve(d$bulk, d$reference, alpha = 0.1, n_cores = 1, maxit = 50)
+
+  expect_true(all(res$proportions >= 0))
+})
+
+test_that("verbose = TRUE emits messages without error", {
+  d <- make_data()
+  expect_no_error(
+    expect_message(
+      deconvolve(d$bulk, d$reference, verbose = TRUE, n_cores = 1, maxit = 5)
+    )
+  )
+})
+
+# ── Input coercion ─────────────────────────────────────────────────────────────
+
+test_that("deconvolve accepts data.frame inputs and coerces to matrix", {
+  d <- make_data()
+  bulk_df <- as.data.frame(d$bulk)
+  ref_df  <- as.data.frame(d$reference)
+
+  res <- deconvolve(bulk_df, ref_df, n_cores = 1, maxit = 50)
+
+  expect_true(is.matrix(res$proportions))
+  expect_equal(nrow(res$proportions), ncol(d$bulk))
+})
+
+# ── Gene overlap handling ──────────────────────────────────────────────────────
+
+test_that("deconvolve handles partial gene overlap", {
+  set.seed(1)
+  reference <- matrix(rpois(100 * 3, lambda = 100), nrow = 100, ncol = 3)
+  rownames(reference) <- paste0("Gene", 1:100)
+  colnames(reference) <- paste0("CellType", 1:3)
+
+  bulk <- matrix(rpois(100 * 2, lambda = 100), nrow = 100, ncol = 2)
+  rownames(bulk) <- paste0("Gene", 50:149)
+  colnames(bulk) <- paste0("Sample", 1:2)
+
+  res <- deconvolve(bulk, reference, n_cores = 1, maxit = 50)
+
+  expect_equal(nrow(res$proportions), 2)
+  expect_equal(ncol(res$proportions), 3)
+})
+
+test_that("deconvolve errors when no genes overlap", {
+  reference <- matrix(1:9, nrow = 3, ncol = 3)
+  rownames(reference) <- c("GeneA", "GeneB", "GeneC")
+  colnames(reference) <- paste0("CellType", 1:3)
+
+  bulk <- matrix(1:6, nrow = 3, ncol = 2)
+  rownames(bulk) <- c("GeneX", "GeneY", "GeneZ")
+  colnames(bulk) <- paste0("Sample", 1:2)
+
+  expect_error(deconvolve(bulk, reference), "No common genes")
+})
+
+# ── Edge cases ─────────────────────────────────────────────────────────────────
+
+test_that("deconvolve works with a single sample", {
+  d <- make_data(n_samples = 1)
+  res <- deconvolve(d$bulk, d$reference, n_cores = 1, maxit = 50)
+
+  expect_equal(nrow(res$proportions), 1)
+  expect_true(abs(sum(res$proportions) - 1) < 1e-6)
+})
+
+test_that("deconvolve works with a single cell type", {
+  set.seed(7)
+  ref <- matrix(rpois(50, lambda = 100), nrow = 50, ncol = 1)
+  rownames(ref) <- paste0("Gene", 1:50)
+  colnames(ref) <- "CellType1"
+
+  bulk <- matrix(rpois(50 * 3, lambda = 100), nrow = 50, ncol = 3)
+  rownames(bulk) <- paste0("Gene", 1:50)
+  colnames(bulk) <- paste0("Sample", 1:3)
+
+  res <- deconvolve(bulk, ref, n_cores = 1, maxit = 50)
+
+  expect_equal(ncol(res$proportions), 1)
+  expect_true(all(res$proportions >= 0))
+  # With one cell type, proportions are either 1 (converged) or 0 (failed)
+  expect_true(all(res$proportions %in% c(0, 1) | abs(res$proportions - 1) < 1e-6))
+})
+
+test_that("proportions remain non-negative under heavy noise", {
+  set.seed(99)
+  n_genes <- 200
+  ref <- matrix(rpois(n_genes * 4, lambda = 200), nrow = n_genes, ncol = 4)
+  rownames(ref) <- paste0("Gene", seq_len(n_genes))
+  colnames(ref) <- paste0("CT", 1:4)
+
+  bulk <- matrix(abs(rnorm(n_genes * 10, mean = 100, sd = 500)), nrow = n_genes)
+  rownames(bulk) <- paste0("Gene", seq_len(n_genes))
+  colnames(bulk) <- paste0("S", 1:10)
+
+  res <- deconvolve(bulk, ref, n_cores = 1, maxit = 30)
+
+  # Non-negativity must always hold
+  expect_true(all(res$proportions >= 0))
+  # Row sums are either 1 (converged) or 0 (optimization failed under extreme noise)
+  row_sums <- rowSums(res$proportions)
+  expect_true(all(abs(row_sums - 1) < 1e-6 | row_sums == 0))
+})
+
+# ── Internal utility ───────────────────────────────────────────────────────────
+
+test_that("%||% returns left value when non-NULL", {
+  f <- DeOPUS:::`%||%`
+  expect_equal(f("a", "b"), "a")
+  expect_equal(f(42L, 0L), 42L)
+})
+
+test_that("%||% returns right value when left is NULL", {
+  f <- DeOPUS:::`%||%`
+  expect_equal(f(NULL, "default"), "default")
+  expect_null(f(NULL, NULL))
+})
diff --git a/vignettes/getting_started.Rmd b/vignettes/getting_started.Rmd
new file mode 100644
index 0000000..07590e8
--- /dev/null
+++ b/vignettes/getting_started.Rmd
@@ -0,0 +1,159 @@
+---
+title: "Getting Started with DeOPUS"
+author: "Ha Nguyen"
+date: "`r Sys.Date()`"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Getting Started with DeOPUS}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r setup, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>",
+  fig.width = 7,
+  fig.height = 5
+)
+```
+
+## Introduction
+
+DeOPUS (Deconvolution via Optimized Power-transformed Unmixing with Shrinkage)
+is a reference-based cellular deconvolution method that estimates cell-type
+proportions from bulk RNA sequencing data.
+
+## Installation
+
+Install DeOPUS from GitHub:
+
+```{r install, eval=FALSE}
+# Install devtools if not already installed
+install.packages("devtools")
+
+# Install DeOPUS
+devtools::install_github("tinnlab/DeOPUS")
+```
+
+Load the package:
+
+```{r load}
+library(DeOPUS)
+```
+
+## Input Data Format
+
+DeOPUS requires two inputs:
+
+1. **Bulk matrix** (`bulk`): A genes × samples matrix of bulk RNA-seq expression
+   values in linear scale (non-negative, not log-transformed).
+2. **Reference matrix** (`reference`): A genes × cell types matrix of average
+   expression profiles per cell type, typically derived from scRNA-seq.
+
+Both matrices must have matching gene identifiers as row names.
+
+## Quick Example
+
+The package includes a built-in simulated benchmark dataset (`sampleData`) with
+known ground-truth proportions for 2 cell types across 512 samples.
+
+```{r sample-data, eval=FALSE}
+# Load the built-in sample dataset
+data(sampleData)
+
+# Inspect dimensions
+dim(sampleData$bulk)          # 11852 genes x 512 samples
+dim(sampleData$cellTypeExpr)  # 11852 genes x 2 cell types
+
+# Run deconvolution on a small subset (10 samples for speed)
+set.seed(42)
+idx <- sample(ncol(sampleData$bulk), 10)
+
+results <- deconvolve(
+  bulk      = sampleData$bulk[, idx],
+  reference = sampleData$cellTypeExpr,
+  alpha     = 0.01,
+  n_cores   = 1,
+  verbose   = TRUE
+)
+
+# View estimated proportions (samples x cell types)
+head(results$proportions)
+```
+
+### Evaluate Against Ground Truth
+
+`bulkRatio` row names and `cellTypeExpr` column names may be in different
+orders, so align by cell-type name before computing the correlation:
+
+```{r evaluate, eval=FALSE}
+# Align bulkRatio cell-type ORDER to match results$proportions columns
+ct <- colnames(results$proportions)
+true_props <- t(sampleData$bulkRatio[ct, idx, drop = FALSE])  # 10 x 2
+
+# Pearson correlation per sample
+cor_values <- sapply(seq_len(nrow(results$proportions)), function(i) {
+  cor(results$proportions[i, ], true_props[i, ], method = "pearson")
+})
+
+cat("Mean Pearson correlation:", round(mean(cor_values, na.rm = TRUE), 3), "\n")
+```
+
+## Method Details
+
+DeOPUS applies a multi-level adaptive transformation:
+
+### 1. Hierarchical Shrinkage
+
+- **Local shrinkage** ($\tau$): Attenuates the influence of individual
+  high-variance genes.
+- **Global shrinkage** ($\phi$): Provides overall regularization across the
+  expression profile.
+
+### 2. Power Transformation
+
+A variance-stabilizing power transformation handles the high dynamic range:
+
+$$z = \frac{(q/s + \kappa)^{\mu \omega g} - \kappa^\mu}{\mu g}$$
+
+where $q$ is the expression value, $s$ is a scale factor, $g$ is the global
+shrinkage weight, and $\kappa$, $\mu$, $\omega$ are shape parameters.
+
+### 3. Quantile Normalization
+
+Spline-based quantile normalization ensures robust comparison between profiles.
+
+### 4. Optimization
+
+DeOPUS uses L-BFGS-B optimization with box constraints (proportions ≥ 0) to
+minimize the loss between transformed bulk and reconstructed expression.
+
+## Parameters
+
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `alpha`   | 0.01    | Regularization parameter |
+| `power`   | 2       | Loss function power (1 = MAE, 2 = MSE) |
+| `n_cores` | 1       | Parallel processing cores |
+| `maxit`   | 100     | Maximum optimization iterations |
+| `verbose` | FALSE   | Print progress messages |
+
+## Benchmarking
+
+The package ships benchmark and analysis scripts in `inst/scripts/`. After
+installation, source them via `system.file()`:
+
+```{r benchmark, eval=FALSE}
+# Run benchmark on real datasets
+source(system.file("scripts/benchmark/run_benchmark_real.R", package = "DeOPUS"))
+
+# Generate visualization dashboard
+source(system.file("scripts/analysis/visualize_results.R", package = "DeOPUS"))
+```
+
+## Session Info
+
+```{r sessioninfo}
+sessionInfo()
+```