From c3c72e83a093be85e40638dfad967e9d950195c4 Mon Sep 17 00:00:00 2001
From: shijiashuai <jiashuai.shi@qq.com>
Date: Fri, 15 May 2026 09:43:17 +0800
Subject: [PATCH 1/2] docs(site): fix pages baseline and repo identity

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .github/workflows/pages.yml  |  6 ++++-
 README.md                    | 32 +++++++++++++-------------
 docs/.vitepress/config.ts    |  8 +++----
 docs/index.md                | 18 ++-------------
 docs/package.json            |  3 ++-
 docs/scripts/verify-site.mjs | 44 ++++++++++++++++++++++++++++++++++++
 6 files changed, 73 insertions(+), 38 deletions(-)
 create mode 100644 docs/scripts/verify-site.mjs
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
index e274dff..bd725a0 100644
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -21,7 +21,7 @@ concurrency:
 jobs:
   deploy:
     # Only run on the original repository, not on forks
-    if: github.repository == 'LessUp/gpu-spmv'
+    if: github.repository == 'AICL-Lab/gpu-spmv'
     runs-on: ubuntu-latest
     environment:
       name: github-pages
@@ -68,6 +68,10 @@ jobs:
         working-directory: docs
         run: npm run sync
 
+      - name: Verify docs site
+        working-directory: docs
+        run: npm run verify:site
+
       - name: Build docs
         working-directory: docs
         env:
diff --git a/README.md b/README.md
index 735abfe..cf7ff4b 100644
--- a/README.md
+++ b/README.md
@@ -16,16 +16,16 @@
 </p>
 
 <p align="center">
-  <a href="https://github.com/LessUp/gpu-spmv/actions/workflows/ci.yml">
-    <img src="https://github.com/LessUp/gpu-spmv/actions/workflows/ci.yml/badge.svg" alt="CI">
+  <a href="https://github.com/AICL-Lab/gpu-spmv/actions/workflows/ci.yml">
+    <img src="https://github.com/AICL-Lab/gpu-spmv/actions/workflows/ci.yml/badge.svg" alt="CI">
   </a>
-  <a href="https://lessup.github.io/gpu-spmv/">
+  <a href="https://aicl-lab.github.io/gpu-spmv/">
     <img src="https://img.shields.io/badge/Docs-GitHub%20Pages-2EA44F?logo=github" alt="Documentation">
   </a>
-  <a href="https://github.com/LessUp/gpu-spmv/releases">
-    <img src="https://img.shields.io/github/v/release/LessUp/gpu-spmv?color=blue" alt="Release">
+  <a href="https://github.com/AICL-Lab/gpu-spmv/releases">
+    <img src="https://img.shields.io/github/v/release/AICL-Lab/gpu-spmv?color=blue" alt="Release">
   </a>
-  <a href="https://github.com/LessUp/gpu-spmv/blob/main/LICENSE">
+  <a href="https://github.com/AICL-Lab/gpu-spmv/blob/main/LICENSE">
     <img src="https://img.shields.io/badge/License-MIT-green" alt="License">
   </a>
 </p>
@@ -105,7 +105,7 @@ if (result.error != SpMVError::SUCCESS) {
 
 ```bash
 # 1. Clone
-git clone https://github.com/LessUp/gpu-spmv.git && cd gpu-spmv
+git clone https://github.com/AICL-Lab/gpu-spmv.git && cd gpu-spmv
 
 # 2. Build
 cmake --preset release && cmake --build --preset release
@@ -143,7 +143,7 @@ int main() {
 }
 ```
 
-📚 **More examples**: [Documentation Site](https://lessup.github.io/gpu-spmv/examples)
+📚 **More examples**: [Documentation Site](https://aicl-lab.github.io/gpu-spmv/en/examples/basic-spmv)
 
 ---
 
@@ -167,7 +167,7 @@ Benchmark on **NVIDIA RTX 3090** (Ampere, 936 GB/s peak):
 # Avg time: 23.5 ms | Bandwidth: 69.8 GB/s (71.5% of peak)
 ```
 
-📈 **Full performance guide**: [Performance Optimization](https://lessup.github.io/gpu-spmv/performance)
+📈 **Full performance guide**: [Performance Optimization](https://aicl-lab.github.io/gpu-spmv/en/performance/optimization-guide)
 
 ---
 
@@ -194,16 +194,16 @@ gpu-spmv/
 
 ## 📚 Documentation
 
-Complete documentation is available at **[https://lessup.github.io/gpu-spmv/](https://lessup.github.io/gpu-spmv/)**:
+Complete documentation is available at **[https://aicl-lab.github.io/gpu-spmv/](https://aicl-lab.github.io/gpu-spmv/)**:
 
 | Document | Description |
 |:---------|:------------|
-| [📦 Installation Guide](https://lessup.github.io/gpu-spmv/installation) | System requirements, detailed installation |
-| [📚 API Reference](https://lessup.github.io/gpu-spmv/api) | Complete API documentation, data structures |
-| [📝 Examples](https://lessup.github.io/gpu-spmv/examples) | 7 complete code examples (basic → advanced) |
-| [🚀 Performance Guide](https://lessup.github.io/gpu-spmv/performance) | Tuning strategies, benchmark data |
-| [🏗️ Architecture](https://lessup.github.io/gpu-spmv/architecture) | System design, kernel selection |
-| [📋 Changelog](https://lessup.github.io/gpu-spmv/changelog) | Version history, migration guide |
+| [📦 Installation Guide](https://aicl-lab.github.io/gpu-spmv/en/quickstart) | System requirements, detailed installation |
+| [📚 API Reference](https://aicl-lab.github.io/gpu-spmv/en/api/spmv) | Complete API documentation, data structures |
+| [📝 Examples](https://aicl-lab.github.io/gpu-spmv/en/examples/basic-spmv) | End-to-end code example and walkthrough |
+| [🚀 Performance Guide](https://aicl-lab.github.io/gpu-spmv/en/performance/optimization-guide) | Tuning strategies, benchmark data |
+| [🏗️ Architecture](https://aicl-lab.github.io/gpu-spmv/en/architecture/overview) | System design, kernel selection |
+| [📋 Changelog](https://aicl-lab.github.io/gpu-spmv/en/changelog) | Version history, migration guide |
 
 ---
 
diff --git a/docs/.vitepress/config.ts b/docs/.vitepress/config.ts
index 3cf4ba1..5da08f9 100644
--- a/docs/.vitepress/config.ts
+++ b/docs/.vitepress/config.ts
@@ -124,7 +124,7 @@ export default withMermaid(
             ]
           },
           editLink: {
-            pattern: 'https://github.com/LessUp/gpu-spmv/edit/main/docs/:path',
+            pattern: 'https://github.com/AICL-Lab/gpu-spmv/edit/main/docs/:path',
             text: '在 GitHub 上编辑此页'
           },
           docFooter: { prev: '上一页', next: '下一页' },
@@ -210,7 +210,7 @@ export default withMermaid(
             ]
           },
           editLink: {
-            pattern: 'https://github.com/LessUp/gpu-spmv/edit/main/docs/:path',
+            pattern: 'https://github.com/AICL-Lab/gpu-spmv/edit/main/docs/:path',
             text: 'Edit this page on GitHub'
           },
           outline: { label: 'On This Page', level: [2, 3] }
@@ -222,12 +222,12 @@ export default withMermaid(
       logo: '/images/logo.svg',
       siteTitle: 'GPU SpMV',
       socialLinks: [
-        { icon: 'github', link: 'https://github.com/LessUp/gpu-spmv' }
+        { icon: 'github', link: 'https://github.com/AICL-Lab/gpu-spmv' }
       ],
       search: { provider: 'local' },
       footer: {
         message: 'MIT License',
-        copyright: '© 2024-2026 LessUp'
+        copyright: '© 2024-2026 AICL-Lab'
       },
       outline: [2, 3]
     },
diff --git a/docs/index.md b/docs/index.md
index 0177402..71e521c 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -2,7 +2,8 @@
 layout: home
 hero:
   name: GPU SpMV
-  text: ' '
+  text: Bilingual Technical Whitepaper and Architecture Showcase
+  tagline: Read the project as a serious engineering artifact, not only as source code.
   actions:
     - theme: brand
       text: 简体中文
@@ -11,18 +12,3 @@ hero:
       text: English
       link: /en/
 ---
-
-<script setup>
-import { onMounted } from 'vue'
-import { useRouter } from 'vitepress'
-
-onMounted(() => {
-  const router = useRouter()
-  const lang = navigator.language || navigator.userLanguage
-  if (lang.startsWith('zh')) {
-    router.go('/zh/')
-  } else {
-    router.go('/en/')
-  }
-})
-</script>
diff --git a/docs/package.json b/docs/package.json
index b51c899..6dd19ef 100644
--- a/docs/package.json
+++ b/docs/package.json
@@ -5,8 +5,9 @@
   "type": "module",
   "scripts": {
     "sync": "node scripts/sync-changelog.mjs",
+    "verify:site": "node scripts/verify-site.mjs",
     "dev": "npm run sync && vitepress dev",
-    "build": "npm run sync && vitepress build",
+    "build": "npm run sync && npm run verify:site && vitepress build",
     "preview": "vitepress preview"
   },
   "devDependencies": {
diff --git a/docs/scripts/verify-site.mjs b/docs/scripts/verify-site.mjs
new file mode 100644
index 0000000..7106669
--- /dev/null
+++ b/docs/scripts/verify-site.mjs
@@ -0,0 +1,44 @@
+import { readFileSync } from 'node:fs'
+import { join } from 'node:path'
+
+const root = process.cwd()
+const canonicalRepo = 'AICL-Lab/gpu-spmv'
+
+const files = {
+  readme: join(root, '..', 'README.md'),
+  config: join(root, '.vitepress', 'config.ts'),
+  pages: join(root, '..', '.github', 'workflows', 'pages.yml'),
+  index: join(root, 'index.md')
+}
+
+const contents = Object.fromEntries(
+  Object.entries(files).map(([key, filePath]) => [key, readFileSync(filePath, 'utf8')])
+)
+
+const failures = []
+
+if (!contents.config.includes(canonicalRepo)) {
+  failures.push('config missing canonical repo')
+}
+
+if (!contents.pages.includes("github.repository == 'AICL-Lab/gpu-spmv'")) {
+  failures.push('pages workflow missing canonical repo guard')
+}
+
+if (/LessUp\/gpu-spmv|github\.com\/LessUp/.test(Object.values(contents).join('\n'))) {
+  failures.push('legacy LessUp repo references still present')
+}
+
+if (/useRouter\(|router\.go\('\/(zh|en)\//.test(contents.index)) {
+  failures.push('root docs index still auto-redirects by locale')
+}
+
+if (failures.length > 0) {
+  console.error('verify-site failed:')
+  for (const failure of failures) {
+    console.error(`- ${failure}`)
+  }
+  process.exit(1)
+}
+
+console.log('verify-site: ok')

From e9b8ca774795818bde4dcabcc7010a7543e2b889 Mon Sep 17 00:00:00 2001
From: shijiashuai <jiashuai.shi@qq.com>
Date: Fri, 15 May 2026 10:02:43 +0800
Subject: [PATCH 2/2] feat(docs): rebuild whitepaper site and content

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 CHANGELOG.md                                  |   6 +-
 docs/.vitepress/config.ts                     |  23 +-
 docs/.vitepress/data/benchmarks.ts            |  20 +
 docs/.vitepress/data/references.ts            |  48 +
 docs/.vitepress/data/site.ts                  |  29 +
 docs/.vitepress/theme/Layout.vue              |  13 +
 .../theme/components/ArchitectureCanvas.vue   |  73 ++
 .../theme/components/CalloutPanel.vue         |  20 +
 .../theme/components/CitationGrid.vue         |  20 +
 .../theme/components/HeroEvidence.vue         |  47 +
 .../theme/components/MetricStrip.vue          |  19 +
 .../theme/components/ThemeAwareArt.vue        |  17 +
 .../theme/components/WhitepaperSection.vue    |  24 +
 docs/.vitepress/theme/index.ts                |  20 +-
 docs/.vitepress/theme/style.css               | 933 +-----------------
 docs/.vitepress/theme/styles/base.css         |  67 ++
 docs/.vitepress/theme/styles/citation.css     |  25 +
 docs/.vitepress/theme/styles/diagram.css      |  54 +
 docs/.vitepress/theme/styles/home.css         |  89 ++
 docs/.vitepress/theme/styles/paper.css        |  44 +
 docs/.vitepress/theme/styles/tokens.css       |  40 +
 docs/en/architecture/execution-pipeline.md    |  28 +
 docs/en/architecture/overview.md              |  10 +-
 docs/en/architecture/reliability.md           |  25 +
 docs/en/architecture/spec-driven.md           |   2 +-
 docs/en/changelog.md                          |   6 +-
 docs/en/citation.md                           |  63 +-
 docs/en/contributing.md                       |   4 +-
 docs/en/faq.md                                |   2 +-
 docs/en/index.md                              | 179 +---
 docs/en/performance/benchmarks.md             |  36 +-
 docs/en/performance/methodology.md            |  19 +
 docs/en/quickstart.md                         |   2 +-
 docs/en/references.md                         |  83 +-
 docs/en/whitepaper/index.md                   | 114 +--
 docs/en/whitepaper/performance.md             |   2 +-
 docs/public/images/brand/logo-mark-dark.svg   |  11 +
 docs/public/images/brand/logo-mark-light.svg  |  11 +
 docs/public/images/favicon.svg                |   2 +-
 docs/public/images/logo.svg                   |   2 +-
 docs/public/images/og-image.svg               |   4 +-
 docs/public/images/social/og-dark.svg         |  28 +
 docs/public/images/social/og-light.svg        |  28 +
 docs/scripts/verify-site.mjs                  | 147 ++-
 docs/zh/architecture/execution-pipeline.md    |  28 +
 docs/zh/architecture/overview.md              |  38 +-
 docs/zh/architecture/reliability.md           |  25 +
 docs/zh/architecture/spec-driven.md           |   2 +-
 docs/zh/changelog.md                          |   6 +-
 docs/zh/citation.md                           |  63 +-
 docs/zh/contributing.md                       |   4 +-
 docs/zh/faq.md                                |   2 +-
 docs/zh/index.md                              | 179 +---
 docs/zh/performance/benchmarks.md             |  36 +-
 docs/zh/performance/methodology.md            |  19 +
 docs/zh/quickstart.md                         |   2 +-
 docs/zh/references.md                         |  81 +-
 docs/zh/whitepaper/index.md                   | 112 +--
 docs/zh/whitepaper/performance.md             |   2 +-
 59 files changed, 1306 insertions(+), 1732 deletions(-)
 create mode 100644 docs/.vitepress/data/benchmarks.ts
 create mode 100644 docs/.vitepress/data/references.ts
 create mode 100644 docs/.vitepress/data/site.ts
 create mode 100644 docs/.vitepress/theme/Layout.vue
 create mode 100644 docs/.vitepress/theme/components/ArchitectureCanvas.vue
 create mode 100644 docs/.vitepress/theme/components/CalloutPanel.vue
 create mode 100644 docs/.vitepress/theme/components/CitationGrid.vue
 create mode 100644 docs/.vitepress/theme/components/HeroEvidence.vue
 create mode 100644 docs/.vitepress/theme/components/MetricStrip.vue
 create mode 100644 docs/.vitepress/theme/components/ThemeAwareArt.vue
 create mode 100644 docs/.vitepress/theme/components/WhitepaperSection.vue
 create mode 100644 docs/.vitepress/theme/styles/base.css
 create mode 100644 docs/.vitepress/theme/styles/citation.css
 create mode 100644 docs/.vitepress/theme/styles/diagram.css
 create mode 100644 docs/.vitepress/theme/styles/home.css
 create mode 100644 docs/.vitepress/theme/styles/paper.css
 create mode 100644 docs/.vitepress/theme/styles/tokens.css
 create mode 100644 docs/en/architecture/execution-pipeline.md
 create mode 100644 docs/en/architecture/reliability.md
 create mode 100644 docs/en/performance/methodology.md
 create mode 100644 docs/public/images/brand/logo-mark-dark.svg
 create mode 100644 docs/public/images/brand/logo-mark-light.svg
 create mode 100644 docs/public/images/social/og-dark.svg
 create mode 100644 docs/public/images/social/og-light.svg
 create mode 100644 docs/zh/architecture/execution-pipeline.md
 create mode 100644 docs/zh/architecture/reliability.md
 create mode 100644 docs/zh/performance/methodology.md

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0df6d91..2392e06 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -42,7 +42,7 @@ This is the first stable release of GPU SpMV, featuring complete CSR and ELL for
 - Doxygen-compatible documentation
 
 #### Documentation
-- Full documentation site at https://lessup.github.io/gpu-spmv/
+- Full documentation site at https://aicl-lab.github.io/gpu-spmv/
 - Bilingual README (English and Chinese)
 - API reference, performance guide, and code examples
 - Architecture documentation and design decision records
@@ -140,5 +140,5 @@ No breaking changes from pre-release versions. The API is now stable.
 
 ---
 
-[1.0.0]: https://github.com/LessUp/gpu-spmv/releases/tag/v1.0.0
-[0.1.0]: https://github.com/LessUp/gpu-spmv/tree/7d6dd0c
+[1.0.0]: https://github.com/AICL-Lab/gpu-spmv/releases/tag/v1.0.0
+[0.1.0]: https://github.com/AICL-Lab/gpu-spmv/tree/7d6dd0c
diff --git a/docs/.vitepress/config.ts b/docs/.vitepress/config.ts
index 5da08f9..051eefd 100644
--- a/docs/.vitepress/config.ts
+++ b/docs/.vitepress/config.ts
@@ -26,11 +26,11 @@ export default withMermaid(
           content: 'High-Performance CUDA Sparse Matrix-Vector Multiplication Library'
         }
       ],
-      ['meta', { property: 'og:image', content: `${base}images/og-image.svg` }],
+      ['meta', { property: 'og:image', content: `${base}images/social/og-dark.svg` }],
       ['meta', { name: 'twitter:card', content: 'summary_large_image' }],
       ['meta', { name: 'twitter:title', content: 'GPU SpMV' }],
       ['meta', { name: 'twitter:description', content: 'High-Performance CUDA Sparse Matrix-Vector Multiplication Library' }],
-      ['meta', { name: 'twitter:image', content: `${base}images/og-image.svg` }],
+      ['meta', { name: 'twitter:image', content: `${base}images/social/og-dark.svg` }],
       ['link', { rel: 'icon', href: `${base}images/favicon.svg`, type: 'image/svg+xml' }],
       ['link', { rel: 'preconnect', href: 'https://fonts.googleapis.com' }],
       ['link', { rel: 'preconnect', href: 'https://fonts.gstatic.com', crossorigin: '' }],
@@ -59,10 +59,10 @@ export default withMermaid(
         themeConfig: {
           nav: [
             { text: '技术白皮书', link: '/zh/whitepaper/', activeMatch: '/zh/whitepaper/' },
-            { text: '快速开始', link: '/zh/quickstart', activeMatch: '/zh/(quickstart|examples)/' },
             { text: '架构设计', link: '/zh/architecture/overview', activeMatch: '/zh/architecture/' },
+            { text: '性能测试', link: '/zh/performance/benchmarks', activeMatch: '/zh/performance/' },
             { text: 'API 参考', link: '/zh/api/spmv', activeMatch: '/zh/api/' },
-            { text: '性能测试', link: '/zh/performance/benchmarks', activeMatch: '/zh/performance/' }
+            { text: '学术引用', link: '/zh/references', activeMatch: '/zh/(references|citation)/' }
           ],
           sidebar: {
             '/zh/': [
@@ -88,8 +88,10 @@ export default withMermaid(
                 collapsed: false,
                 items: [
                   { text: '系统概览', link: '/zh/architecture/overview' },
+                  { text: '执行流水线', link: '/zh/architecture/execution-pipeline' },
                   { text: 'Kernel 选择策略', link: '/zh/architecture/kernel-selection' },
                   { text: '内存布局', link: '/zh/architecture/memory-layout' },
+                  { text: '可靠性约束', link: '/zh/architecture/reliability' },
                   { text: 'Spec-Driven 开发', link: '/zh/architecture/spec-driven' }
                 ]
               },
@@ -97,6 +99,7 @@ export default withMermaid(
                 text: '性能优化',
                 collapsed: false,
                 items: [
+                  { text: '性能方法学', link: '/zh/performance/methodology' },
                   { text: '基准测试', link: '/zh/performance/benchmarks' },
                   { text: '优化指南', link: '/zh/performance/optimization-guide' }
                 ]
@@ -145,10 +148,10 @@ export default withMermaid(
         themeConfig: {
           nav: [
             { text: 'Whitepaper', link: '/en/whitepaper/', activeMatch: '/en/whitepaper/' },
-            { text: 'Getting Started', link: '/en/quickstart', activeMatch: '/en/(quickstart|examples)/' },
             { text: 'Architecture', link: '/en/architecture/overview', activeMatch: '/en/architecture/' },
+            { text: 'Benchmarks', link: '/en/performance/benchmarks', activeMatch: '/en/performance/' },
             { text: 'API Reference', link: '/en/api/spmv', activeMatch: '/en/api/' },
-            { text: 'Benchmarks', link: '/en/performance/benchmarks', activeMatch: '/en/performance/' }
+            { text: 'References', link: '/en/references', activeMatch: '/en/(references|citation)/' }
           ],
           sidebar: {
             '/en/': [
@@ -174,8 +177,10 @@ export default withMermaid(
                 collapsed: false,
                 items: [
                   { text: 'System Overview', link: '/en/architecture/overview' },
+                  { text: 'Execution Pipeline', link: '/en/architecture/execution-pipeline' },
                   { text: 'Kernel Selection', link: '/en/architecture/kernel-selection' },
                   { text: 'Memory Layout', link: '/en/architecture/memory-layout' },
+                  { text: 'Reliability Constraints', link: '/en/architecture/reliability' },
                   { text: 'Spec-Driven Dev', link: '/en/architecture/spec-driven' }
                 ]
               },
@@ -183,6 +188,7 @@ export default withMermaid(
                 text: 'Performance',
                 collapsed: false,
                 items: [
+                  { text: 'Methodology', link: '/en/performance/methodology' },
                   { text: 'Benchmarks', link: '/en/performance/benchmarks' },
                   { text: 'Optimization Guide', link: '/en/performance/optimization-guide' }
                 ]
@@ -219,7 +225,10 @@ export default withMermaid(
     },
 
     themeConfig: {
-      logo: '/images/logo.svg',
+      logo: {
+        light: '/images/brand/logo-mark-light.svg',
+        dark: '/images/brand/logo-mark-dark.svg'
+      },
       siteTitle: 'GPU SpMV',
       socialLinks: [
         { icon: 'github', link: 'https://github.com/AICL-Lab/gpu-spmv' }
diff --git a/docs/.vitepress/data/benchmarks.ts b/docs/.vitepress/data/benchmarks.ts
new file mode 100644
index 0000000..6cb02eb
--- /dev/null
+++ b/docs/.vitepress/data/benchmarks.ts
@@ -0,0 +1,20 @@
+export const benchmarkData = {
+  environment: [
+    { label: 'GPU', value: 'RTX 3090' },
+    { label: 'Peak BW', value: '936 GB/s' },
+    { label: 'CUDA', value: '12.0' },
+    { label: 'CPU', value: 'Ryzen 9 5950X' }
+  ],
+  summary: [
+    { label: 'Typical Utilization', value: '70%+' },
+    { label: 'Best Kernel Family', value: 'Merge Path' },
+    { label: 'Best Regular Pattern', value: 'ELL' },
+    { label: 'Selector Accuracy', value: '100%' }
+  ],
+  scenarios: [
+    { label: 'Very sparse', value: 'Scalar CSR', description: 'avg_nnz_per_row < 4' },
+    { label: 'Uniform rows', value: 'Vector CSR', description: 'Low skewness, good warp utilization' },
+    { label: 'High skew', value: 'Merge Path', description: 'Irregular row lengths with better balancing' },
+    { label: 'ELL-friendly', value: 'ELL Kernel', description: 'Uniform row width, coalesced memory access' }
+  ]
+}
diff --git a/docs/.vitepress/data/references.ts b/docs/.vitepress/data/references.ts
new file mode 100644
index 0000000..b39962c
--- /dev/null
+++ b/docs/.vitepress/data/references.ts
@@ -0,0 +1,48 @@
+export const references = {
+  papers: [
+    {
+      key: 'bell-garland-2009',
+      title: 'Implementing Sparse Matrix-Vector Multiplication on Throughput-Oriented Processors',
+      meta: 'Nathan Bell, Michael Garland · SC 2009',
+      url: 'https://doi.org/10.1145/1654059.1654121'
+    },
+    {
+      key: 'merrill-garland-2016',
+      title: 'Merge-based Parallel Sparse Matrix-Vector Multiplication',
+      meta: 'Duane Merrill, Michael Garland · SC 2016',
+      url: 'https://doi.org/10.1145/3016078.2851141'
+    },
+    {
+      key: 'vazquez-ellrt-2011',
+      title: 'Automatic Tuning of the Sparse Matrix Vector Product on GPUs Based on the ELL-R-T Format',
+      meta: 'Fernando Vázquez et al. · Concurrency and Computation 2011',
+      url: 'https://doi.org/10.1002/cpe.1761'
+    }
+  ],
+  projects: [
+    {
+      key: 'cusparse',
+      title: 'NVIDIA cuSPARSE',
+      meta: 'Vendor baseline and API reference for sparse GPU primitives',
+      url: 'https://developer.nvidia.com/cusparse'
+    },
+    {
+      key: 'ginkgo',
+      title: 'Ginkgo',
+      meta: 'Production-quality sparse linear algebra library with strong docs',
+      url: 'https://github.com/ginkgo-project/ginkgo'
+    },
+    {
+      key: 'moderngpu',
+      title: 'ModernGPU',
+      meta: 'Useful for understanding scan / merge / partitioning techniques on GPU',
+      url: 'https://github.com/moderngpu/moderngpu'
+    },
+    {
+      key: 'suitesparse',
+      title: 'SuiteSparse Matrix Collection',
+      meta: 'Representative real-world sparse matrices for benchmark reasoning',
+      url: 'https://github.com/DrTimothyAldenDavis/SuiteSparse'
+    }
+  ]
+}
diff --git a/docs/.vitepress/data/site.ts b/docs/.vitepress/data/site.ts
new file mode 100644
index 0000000..bdfb3fd
--- /dev/null
+++ b/docs/.vitepress/data/site.ts
@@ -0,0 +1,29 @@
+export type SiteMetric = {
+  label: string
+  value: string
+  description?: string
+}
+
+export const siteData = {
+  repo: 'https://github.com/AICL-Lab/gpu-spmv',
+  zh: {
+    heroTitle: 'GPU SpMV：技术白皮书与架构展示站',
+    heroLead: '把 CUDA 稀疏矩阵向量乘法项目打造成可读、可证、可展示的工程作品。',
+    metrics: [
+      { label: 'Bandwidth Utilization', value: '70%+' },
+      { label: 'Adaptive Kernels', value: '4' },
+      { label: 'Sparse Formats', value: 'CSR + ELL' },
+      { label: 'Property Tests', value: '100+' }
+    ] satisfies SiteMetric[]
+  },
+  en: {
+    heroTitle: 'GPU SpMV: Technical Whitepaper and Architecture Showcase',
+    heroLead: 'Present the CUDA sparse matrix-vector multiplication project as a serious engineering artifact.',
+    metrics: [
+      { label: 'Bandwidth Utilization', value: '70%+' },
+      { label: 'Adaptive Kernels', value: '4' },
+      { label: 'Sparse Formats', value: 'CSR + ELL' },
+      { label: 'Property Tests', value: '100+' }
+    ] satisfies SiteMetric[]
+  }
+}
diff --git a/docs/.vitepress/theme/Layout.vue b/docs/.vitepress/theme/Layout.vue
new file mode 100644
index 0000000..68d3147
--- /dev/null
+++ b/docs/.vitepress/theme/Layout.vue
@@ -0,0 +1,13 @@
+<script setup lang="ts">
+import DefaultTheme from 'vitepress/theme'
+
+const { Layout } = DefaultTheme
+</script>
+
+<template>
+  <Layout>
+    <template #layout-top>
+      <div class="spmv-layout-top" aria-hidden="true"></div>
+    </template>
+  </Layout>
+</template>
diff --git a/docs/.vitepress/theme/components/ArchitectureCanvas.vue b/docs/.vitepress/theme/components/ArchitectureCanvas.vue
new file mode 100644
index 0000000..1043e38
--- /dev/null
+++ b/docs/.vitepress/theme/components/ArchitectureCanvas.vue
@@ -0,0 +1,73 @@
+<script setup lang="ts">
+import { computed } from 'vue'
+
+const props = withDefaults(
+  defineProps<{
+    variant?: 'overview-zh' | 'overview-en'
+  }>(),
+  {
+    variant: 'overview-en'
+  }
+)
+
+const labels = computed(() =>
+  props.variant === 'overview-zh'
+    ? {
+        input: '稀疏矩阵',
+        analysis: '矩阵分析',
+        decision: '内核选择',
+        execution: 'GPU 执行',
+        result: '结果验证'
+      }
+    : {
+        input: 'Sparse Matrix',
+        analysis: 'Matrix Analysis',
+        decision: 'Kernel Choice',
+        execution: 'GPU Execution',
+        result: 'Result Validation'
+      }
+)
+</script>
+
+<template>
+  <div class="spmv-architecture-shell spmv-surface-card">
+    <svg viewBox="0 0 820 260" class="spmv-architecture-canvas" role="img" aria-label="SpMV architecture">
+      <defs>
+        <marker id="arrow" markerWidth="10" markerHeight="10" refX="8" refY="5" orient="auto">
+          <path d="M0,0 L10,5 L0,10 z" fill="currentColor" />
+        </marker>
+      </defs>
+
+      <path class="spmv-link" d="M130 90 H240" marker-end="url(#arrow)" />
+      <path class="spmv-link" d="M370 90 H480" marker-end="url(#arrow)" />
+      <path class="spmv-link" d="M610 90 H720" marker-end="url(#arrow)" />
+      <path class="spmv-link" d="M540 150 Q540 220 300 220 Q120 220 120 150" marker-end="url(#arrow)" />
+
+      <g transform="translate(20 50)">
+        <rect class="spmv-node" width="110" height="80" rx="20" />
+        <text class="spmv-node-text" x="55" y="34" text-anchor="middle">{{ labels.input }}</text>
+        <text class="spmv-node-caption" x="55" y="56" text-anchor="middle">CSR / ELL</text>
+      </g>
+      <g transform="translate(250 50)">
+        <rect class="spmv-node" width="120" height="80" rx="20" />
+        <text class="spmv-node-text" x="60" y="34" text-anchor="middle">{{ labels.analysis }}</text>
+        <text class="spmv-node-caption" x="60" y="56" text-anchor="middle">avg_nnz / skewness</text>
+      </g>
+      <g transform="translate(490 50)">
+        <rect class="spmv-node" width="120" height="80" rx="20" />
+        <text class="spmv-node-text" x="60" y="34" text-anchor="middle">{{ labels.decision }}</text>
+        <text class="spmv-node-caption" x="60" y="56" text-anchor="middle">Scalar / Vector / Merge</text>
+      </g>
+      <g transform="translate(690 50)">
+        <rect class="spmv-node" width="110" height="80" rx="20" />
+        <text class="spmv-node-text" x="55" y="34" text-anchor="middle">{{ labels.execution }}</text>
+        <text class="spmv-node-caption" x="55" y="56" text-anchor="middle">CUDA kernel</text>
+      </g>
+      <g transform="translate(350 170)">
+        <rect class="spmv-node" width="140" height="70" rx="20" />
+        <text class="spmv-node-text" x="70" y="30" text-anchor="middle">{{ labels.result }}</text>
+        <text class="spmv-node-caption" x="70" y="51" text-anchor="middle">Accuracy + bandwidth</text>
+      </g>
+    </svg>
+  </div>
+</template>
diff --git a/docs/.vitepress/theme/components/CalloutPanel.vue b/docs/.vitepress/theme/components/CalloutPanel.vue
new file mode 100644
index 0000000..a44aaf3
--- /dev/null
+++ b/docs/.vitepress/theme/components/CalloutPanel.vue
@@ -0,0 +1,20 @@
+<script setup lang="ts">
+withDefaults(
+  defineProps<{
+    title: string
+    tone?: 'info' | 'success' | 'warning'
+  }>(),
+  {
+    tone: 'info'
+  }
+)
+</script>
+
+<template>
+  <aside class="spmv-callout" :data-tone="tone">
+    <div class="spmv-callout-title">{{ title }}</div>
+    <div class="spmv-callout-body">
+      <slot />
+    </div>
+  </aside>
+</template>
diff --git a/docs/.vitepress/theme/components/CitationGrid.vue b/docs/.vitepress/theme/components/CitationGrid.vue
new file mode 100644
index 0000000..7855fc0
--- /dev/null
+++ b/docs/.vitepress/theme/components/CitationGrid.vue
@@ -0,0 +1,20 @@
+<script setup lang="ts">
+defineProps<{
+  items: Array<{
+    key: string
+    title: string
+    meta?: string
+    url: string
+  }>
+}>()
+</script>
+
+<template>
+  <div class="spmv-citation-grid">
+    <article v-for="item in items" :key="item.key" class="spmv-citation-card spmv-surface-card">
+      <h3>{{ item.title }}</h3>
+      <p v-if="item.meta">{{ item.meta }}</p>
+      <a :href="item.url" target="_blank" rel="noreferrer">Open reference</a>
+    </article>
+  </div>
+</template>
diff --git a/docs/.vitepress/theme/components/HeroEvidence.vue b/docs/.vitepress/theme/components/HeroEvidence.vue
new file mode 100644
index 0000000..26bc538
--- /dev/null
+++ b/docs/.vitepress/theme/components/HeroEvidence.vue
@@ -0,0 +1,47 @@
+<script setup lang="ts">
+type MetricItem = {
+  label: string
+  value: string
+}
+
+withDefaults(
+  defineProps<{
+    title: string
+    lead: string
+    metrics: MetricItem[]
+    eyebrow?: string
+    primaryLabel: string
+    primaryLink: string
+    secondaryLabel?: string
+    secondaryLink?: string
+  }>(),
+  {
+    eyebrow: 'Technical Whitepaper'
+  }
+)
+</script>
+
+<template>
+  <section class="spmv-hero">
+    <div class="spmv-hero-copy spmv-surface-card">
+      <div class="spmv-eyebrow">{{ eyebrow }}</div>
+      <h1>{{ title }}</h1>
+      <p>{{ lead }}</p>
+      <div class="spmv-hero-actions">
+        <a class="primary" :href="primaryLink">{{ primaryLabel }}</a>
+        <a v-if="secondaryLabel && secondaryLink" class="secondary" :href="secondaryLink">
+          {{ secondaryLabel }}
+        </a>
+      </div>
+      <div class="spmv-metric-strip">
+        <article v-for="metric in metrics" :key="metric.label" class="spmv-metric-card spmv-surface-card">
+          <div class="spmv-metric-value">{{ metric.value }}</div>
+          <div class="spmv-metric-label">{{ metric.label }}</div>
+        </article>
+      </div>
+    </div>
+    <div class="spmv-hero-art spmv-surface-card">
+      <slot />
+    </div>
+  </section>
+</template>
diff --git a/docs/.vitepress/theme/components/MetricStrip.vue b/docs/.vitepress/theme/components/MetricStrip.vue
new file mode 100644
index 0000000..d210b10
--- /dev/null
+++ b/docs/.vitepress/theme/components/MetricStrip.vue
@@ -0,0 +1,19 @@
+<script setup lang="ts">
+defineProps<{
+  items: Array<{
+    label: string
+    value: string
+    description?: string
+  }>
+}>()
+</script>
+
+<template>
+  <div class="spmv-metric-strip">
+    <article v-for="item in items" :key="item.label" class="spmv-metric-card spmv-surface-card">
+      <div class="spmv-metric-value">{{ item.value }}</div>
+      <div class="spmv-metric-label">{{ item.label }}</div>
+      <p v-if="item.description">{{ item.description }}</p>
+    </article>
+  </div>
+</template>
diff --git a/docs/.vitepress/theme/components/ThemeAwareArt.vue b/docs/.vitepress/theme/components/ThemeAwareArt.vue
new file mode 100644
index 0000000..1ec9f6c
--- /dev/null
+++ b/docs/.vitepress/theme/components/ThemeAwareArt.vue
@@ -0,0 +1,17 @@
+<script setup lang="ts">
+defineProps<{
+  title: string
+  caption: string
+}>()
+</script>
+
+<template>
+  <svg viewBox="0 0 320 180" class="theme-aware-art" role="img" :aria-label="title">
+    <rect x="0" y="0" width="320" height="180" rx="24" class="art-bg" />
+    <rect x="24" y="24" width="48" height="48" rx="8" class="art-accent-1" />
+    <rect x="88" y="40" width="24" height="24" rx="6" class="art-accent-2" />
+    <rect x="128" y="24" width="72" height="72" rx="16" class="art-accent-1" opacity="0.25" />
+    <text x="24" y="130" class="art-title">{{ title }}</text>
+    <text x="24" y="154" class="art-caption">{{ caption }}</text>
+  </svg>
+</template>
diff --git a/docs/.vitepress/theme/components/WhitepaperSection.vue b/docs/.vitepress/theme/components/WhitepaperSection.vue
new file mode 100644
index 0000000..5bafc9a
--- /dev/null
+++ b/docs/.vitepress/theme/components/WhitepaperSection.vue
@@ -0,0 +1,24 @@
+<script setup lang="ts">
+withDefaults(
+  defineProps<{
+    title: string
+    eyebrow?: string
+    lead?: string
+  }>(),
+  {
+    eyebrow: 'Section',
+    lead: ''
+  }
+)
+</script>
+
+<template>
+  <section class="spmv-section spmv-surface-card">
+    <div class="spmv-eyebrow">{{ eyebrow }}</div>
+    <h2>{{ title }}</h2>
+    <p v-if="lead">{{ lead }}</p>
+    <div class="spmv-section-body">
+      <slot />
+    </div>
+  </section>
+</template>
diff --git a/docs/.vitepress/theme/index.ts b/docs/.vitepress/theme/index.ts
index 347ed6e..4b6a940 100644
--- a/docs/.vitepress/theme/index.ts
+++ b/docs/.vitepress/theme/index.ts
@@ -1,6 +1,24 @@
 import DefaultTheme from 'vitepress/theme'
+import Layout from './Layout.vue'
+import HeroEvidence from './components/HeroEvidence.vue'
+import MetricStrip from './components/MetricStrip.vue'
+import WhitepaperSection from './components/WhitepaperSection.vue'
+import ArchitectureCanvas from './components/ArchitectureCanvas.vue'
+import CitationGrid from './components/CitationGrid.vue'
+import ThemeAwareArt from './components/ThemeAwareArt.vue'
+import CalloutPanel from './components/CalloutPanel.vue'
 import './style.css'
 
 export default {
-  extends: DefaultTheme
+  extends: DefaultTheme,
+  Layout,
+  enhanceApp({ app }) {
+    app.component('HeroEvidence', HeroEvidence)
+    app.component('MetricStrip', MetricStrip)
+    app.component('WhitepaperSection', WhitepaperSection)
+    app.component('ArchitectureCanvas', ArchitectureCanvas)
+    app.component('CitationGrid', CitationGrid)
+    app.component('ThemeAwareArt', ThemeAwareArt)
+    app.component('CalloutPanel', CalloutPanel)
+  }
 }
diff --git a/docs/.vitepress/theme/style.css b/docs/.vitepress/theme/style.css
index 35caddf..b7a0e06 100644
--- a/docs/.vitepress/theme/style.css
+++ b/docs/.vitepress/theme/style.css
@@ -1,927 +1,6 @@
-/**
- * GPU SpMV Documentation Theme
- * Technical Whitepaper / Architecture Showcase
- *
- * Design system:
- * - Brand: NVIDIA Green (#76B900)
- * - Accent: CUDA Teal (#00D4AA)
- * - Light mode: clean white
- * - Dark mode: GitHub Dark (#0d1117)
- */
-
-/* === CSS Variables: Light Mode (Default) === */
-:root {
-  /* Brand: NVIDIA Green */
-  --vp-c-brand-1: #5A8F00;
-  --vp-c-brand-2: #76B900;
-  --vp-c-brand-3: #8ED100;
-  --vp-c-brand-soft: rgba(118, 185, 0, 0.14);
-
-  /* Accent: CUDA Teal */
-  --spmv-accent: #00B894;
-  --spmv-accent-soft: rgba(0, 184, 148, 0.14);
-
-  /* Background */
-  --vp-c-bg: #ffffff;
-  --vp-c-bg-alt: #f6f8fa;
-  --vp-c-bg-soft: #f6f8fa;
-  --vp-c-bg-elv: #ffffff;
-  --vp-c-bg-mute: #f6f8fa;
-
-  /* Text */
-  --vp-c-text-1: #24292f;
-  --vp-c-text-2: #57606a;
-  --vp-c-text-3: #8b949e;
-
-  /* Border */
-  --vp-c-border: #d0d7de;
-  --vp-c-divider: #d0d7de;
-  --vp-c-gutter: #d0d7de;
-
-  /* Hero */
-  --vp-home-hero-name-color: #5A8F00;
-  --vp-home-hero-name-background: transparent;
-  --vp-home-hero-image-background-image: none;
-  --vp-home-hero-image-filter: none;
-
-  /* Typography */
-  --vp-font-family-base: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
-  --vp-font-family-mono: 'JetBrains Mono', 'Fira Code', SFMono-Regular, Menlo, monospace;
-
-  /* Layout */
-  --vp-layout-max-width: 1440px;
-  --vp-sidebar-width: 280px;
-
-  /* Code */
-  --vp-code-block-bg: #f6f8fa;
-  --vp-code-block-border: #d0d7de;
-
-  /* Homepage */
-  --tag-bg: rgba(118, 185, 0, 0.12);
-  --tag-text: #4A7600;
-
-  /* Spacing */
-  --spacing-xs: 8px;
-  --spacing-sm: 12px;
-  --spacing-md: 16px;
-  --spacing-lg: 24px;
-  --spacing-xl: 32px;
-  --spacing-2xl: 40px;
-
-  /* Radius */
-  --radius-sm: 6px;
-  --radius-md: 8px;
-  --radius-lg: 12px;
-  --radius-xl: 16px;
-
-  /* Transitions */
-  --transition-fast: 0.15s ease;
-  --transition-normal: 0.2s ease;
-
-  /* Whitepaper-specific */
-  --spmv-card-shadow: 0 1px 3px rgba(0, 0, 0, 0.06);
-}
-
-/* === Dark Mode Override === */
-.dark {
-  --vp-c-brand-1: #76B900;
-  --vp-c-brand-2: #5A8F00;
-  --vp-c-brand-3: #4A7600;
-  --vp-c-brand-soft: rgba(118, 185, 0, 0.16);
-
-  --spmv-accent: #00D4AA;
-  --spmv-accent-soft: rgba(0, 212, 170, 0.16);
-
-  --vp-c-bg: #0d1117;
-  --vp-c-bg-alt: #161b22;
-  --vp-c-bg-soft: #21262d;
-  --vp-c-bg-elv: #21262d;
-  --vp-c-bg-mute: #21262d;
-
-  --vp-c-text-1: #c9d1d9;
-  --vp-c-text-2: #8b949e;
-  --vp-c-text-3: #6e7681;
-
-  --vp-c-border: #30363d;
-  --vp-c-divider: #30363d;
-  --vp-c-gutter: #30363d;
-
-  --vp-home-hero-name-color: #76B900;
-
-  --vp-code-block-bg: #0d1117;
-  --vp-code-block-border: #30363d;
-
-  --tag-bg: rgba(118, 185, 0, 0.18);
-  --tag-text: #76B900;
-
-  --spmv-card-shadow: 0 1px 3px rgba(0, 0, 0, 0.3);
-}
-
-/* === Hero Section === */
-.VPHero {
-  padding: calc(var(--vp-nav-height) + 48px) var(--spacing-lg) 48px;
-}
-
-.VPHero .name {
-  font-size: 32px;
-  font-weight: 700;
-  line-height: 1.2;
-  letter-spacing: -0.5px;
-  color: var(--vp-c-brand-1);
-}
-
-.VPHero .text {
-  font-size: 16px;
-  font-weight: 400;
-  line-height: 1.5;
-  color: var(--vp-c-text-2);
-  margin-top: var(--spacing-sm);
-}
-
-.VPHero .tagline {
-  font-size: 14px;
-  line-height: 1.6;
-  color: var(--vp-c-text-3);
-  margin-top: var(--spacing-md);
-  max-width: 600px;
-}
-
-.VPHero .image {
-  display: none;
-}
-
-/* === Buttons === */
-.VPHero .actions {
-  margin-top: var(--spacing-xl);
-  gap: var(--spacing-sm);
-}
-
-.VPHero .VPButton {
-  border-radius: var(--radius-sm);
-  padding: 8px 16px;
-  font-size: 14px;
-  font-weight: 500;
-  transition: all var(--transition-fast);
-}
-
-.VPHero .VPButton.medium.brand {
-  background: var(--vp-c-brand-1);
-  border: none;
-  color: #fff;
-}
-
-.VPHero .VPButton.medium.brand:hover {
-  background: var(--vp-c-brand-2);
-}
-
-.VPHero .VPButton.medium.alt {
-  border: 1px solid var(--vp-c-border);
-  background: transparent;
-  color: var(--vp-c-text-1);
-}
-
-.VPHero .VPButton.medium.alt:hover {
-  border-color: var(--vp-c-brand-1);
-  color: var(--vp-c-brand-1);
-}
-
-/* === Features === */
-.VPFeatures {
-  padding: 48px var(--spacing-lg);
-}
-
-.VPFeatures .container {
-  max-width: 1200px;
-}
-
-.VPFeatures .VPFeature {
-  padding: var(--spacing-lg);
-  border-radius: var(--radius-md);
-  background: var(--vp-c-bg);
-  border: 1px solid var(--vp-c-border);
-  transition: border-color var(--transition-fast);
-}
-
-.VPFeatures .VPFeature:hover {
-  border-color: var(--vp-c-brand-1);
-}
-
-.VPFeature .icon {
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  width: 40px;
-  height: 40px;
-  font-size: 20px;
-  margin-bottom: var(--spacing-sm);
-}
-
-.VPFeature .title {
-  font-size: 16px;
-  font-weight: 600;
-  margin-bottom: var(--spacing-xs);
-  color: var(--vp-c-text-1);
-}
-
-.VPFeature .details {
-  font-size: 14px;
-  line-height: 1.6;
-  color: var(--vp-c-text-2);
-}
-
-/* === Navigation === */
-.VPNav {
-  background: var(--vp-c-bg);
-  border-bottom: 1px solid var(--vp-c-border);
-}
-
-.VPNavBarMenuLink,
-.VPNavBarMenuGroup .button {
-  font-weight: 500;
-  font-size: 14px;
-}
-
-.VPNavBarMenuLink.active,
-.VPNavBarMenuGroup.open .button {
-  color: var(--vp-c-brand-1);
-}
-
-/* === Search === */
-.VPNavBarSearch {
-  display: flex;
-  justify-content: flex-end;
-}
-
-.VPLocalSearchBox {
-  backdrop-filter: blur(20px);
-}
-
-.VPLocalSearchBox .backdrop {
-  background: rgba(0, 0, 0, 0.6);
-  backdrop-filter: blur(4px);
-}
-
-.VPLocalSearchBox .shell {
-  border-radius: var(--radius-xl);
-  box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.25);
-}
-
-/* === Sidebar === */
-.VPSidebar {
-  padding: var(--spacing-lg) var(--spacing-md) 96px;
-}
-
-.VPSidebarGroup {
-  margin-bottom: var(--spacing-md);
-}
-
-.VPSidebarGroup .title {
-  font-size: 13px;
-  font-weight: 600;
-  text-transform: uppercase;
-  letter-spacing: 0.5px;
-  color: var(--vp-c-text-3);
-  padding: var(--spacing-sm) 14px;
-}
-
-.VPSidebarItem .text {
-  font-size: 14px;
-  padding: var(--spacing-xs) 14px;
-  border-radius: var(--radius-md);
-  transition: all var(--transition-fast);
-}
-
-.VPSidebarItem .text:hover {
-  background: var(--vp-c-bg-soft);
-  color: var(--vp-c-brand-1);
-}
-
-.VPSidebarItem.is-active .text {
-  background: var(--vp-c-brand-soft);
-  color: var(--vp-c-brand-1);
-  font-weight: 500;
-}
-
-/* === Content === */
-.VPDoc {
-  padding: var(--spacing-xl) var(--spacing-lg) 96px;
-}
-
-.VPDoc .content {
-  max-width: 720px;
-}
-
-.VPDoc h1 {
-  font-size: 40px;
-  font-weight: 700;
-  letter-spacing: -1px;
-  margin-bottom: var(--spacing-lg);
-}
-
-.VPDoc h2 {
-  font-size: 28px;
-  font-weight: 600;
-  margin-top: 48px;
-  margin-bottom: 20px;
-  padding-bottom: var(--spacing-sm);
-  border-bottom: 1px solid var(--vp-c-divider);
-}
-
-.VPDoc h3 {
-  font-size: 20px;
-  font-weight: 600;
-  margin-top: var(--spacing-xl);
-  margin-bottom: var(--spacing-md);
-}
-
-.VPDoc p {
-  font-size: 16px;
-  line-height: 1.8;
-  margin-bottom: var(--spacing-md);
-}
-
-.VPDoc a {
-  color: var(--vp-c-brand-1);
-  text-decoration: none;
-  border-bottom: 1px solid transparent;
-  transition: border-color var(--transition-fast);
-}
-
-.VPDoc a:hover {
-  border-bottom-color: var(--vp-c-brand-1);
-}
-
-.VPDoc code {
-  font-size: 14px;
-  padding: 2px var(--spacing-xs);
-  border-radius: var(--radius-sm);
-  background: var(--vp-c-bg-soft);
-  color: var(--vp-c-text-1);
-}
-
-.VPDoc pre code {
-  font-size: 14px;
-  padding: 0;
-  background: transparent;
-}
-
-/* Custom Blocks */
-.VPDoc .custom-block {
-  border-radius: var(--radius-lg);
-  padding: 20px var(--spacing-lg);
-  margin: var(--spacing-lg) 0;
-}
-
-.VPDoc .custom-block.info {
-  background: var(--vp-c-bg-soft);
-  border-left: 4px solid var(--vp-c-brand-1);
-}
-
-.VPDoc .custom-block.tip {
-  background: var(--spmv-accent-soft);
-  border-left: 4px solid var(--spmv-accent);
-}
-
-.VPDoc .custom-block.warning {
-  background: rgba(234, 179, 8, 0.14);
-  border-left: 4px solid #EAB308;
-}
-
-.VPDoc .custom-block.danger {
-  background: rgba(239, 68, 68, 0.14);
-  border-left: 4px solid #EF4444;
-}
-
-/* === Tables === */
-.VPDoc table {
-  width: 100%;
-  border-collapse: separate;
-  border-spacing: 0;
-  margin: var(--spacing-lg) 0;
-  border-radius: var(--radius-lg);
-  overflow: hidden;
-  border: 1px solid var(--vp-c-border);
-}
-
-.VPDoc th {
-  background: var(--vp-c-bg-soft);
-  font-weight: 600;
-  text-align: left;
-  padding: 14px 18px;
-  font-size: 14px;
-  border-bottom: 1px solid var(--vp-c-border);
-}
-
-.VPDoc td {
-  padding: var(--spacing-sm) 18px;
-  font-size: 14px;
-  border-bottom: 1px solid var(--vp-c-border);
-}
-
-.VPDoc tr:last-child td {
-  border-bottom: none;
-}
-
-.VPDoc tr:hover td {
-  background: var(--vp-c-bg-soft);
-}
-
-/* === Footer === */
-.VPFooter {
-  padding: var(--spacing-2xl) var(--spacing-lg);
-  background: var(--vp-c-bg-alt);
-}
-
-.VPFooter .message,
-.VPFooter .copyright {
-  font-size: 14px;
-  color: var(--vp-c-text-3);
-}
-
-/* === Mermaid === */
-.mermaid {
-  background: var(--vp-c-bg-soft);
-  border-radius: var(--radius-lg);
-  padding: var(--spacing-lg);
-  margin: var(--spacing-lg) 0;
-  text-align: center;
-}
-
-/* === Homepage: Header === */
-.home-header {
-  display: flex;
-  justify-content: space-between;
-  align-items: center;
-  padding: var(--spacing-md) 0;
-  margin-bottom: var(--spacing-xl);
-  border-bottom: 1px solid var(--vp-c-border);
-}
-
-.home-header-left {
-  display: flex;
-  align-items: center;
-  gap: var(--spacing-sm);
-}
-
-.home-logo {
-  width: 36px;
-  height: 36px;
-  background: var(--vp-c-brand-1);
-  border-radius: 6px;
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  color: #fff;
-  font-weight: 700;
-  font-size: 11px;
-  letter-spacing: -0.5px;
-}
-
-.home-title {
-  font-weight: 600;
-  font-size: 16px;
-  color: var(--vp-c-text-1);
-}
-
-.home-subtitle {
-  color: var(--vp-c-text-2);
-  font-size: 13px;
-  margin-left: var(--spacing-xs);
-}
-
-.home-nav {
-  display: flex;
-  gap: var(--spacing-md);
-  font-size: 13px;
-}
-
-.home-nav a {
-  color: var(--vp-c-text-2);
-  text-decoration: none;
-  transition: color var(--transition-fast);
-}
-
-.home-nav a:hover {
-  color: var(--vp-c-brand-1);
-}
-
-/* === Homepage: Hero Tech === */
-.home-hero-tech {
-  padding: var(--spacing-2xl) 0;
-  margin-bottom: var(--spacing-xl);
-}
-
-.home-hero-tech h1 {
-  font-size: 36px;
-  font-weight: 800;
-  letter-spacing: -1px;
-  color: var(--vp-c-text-1);
-  margin: 0 0 var(--spacing-md) 0;
-  line-height: 1.2;
-}
-
-.home-hero-tech .hero-tagline {
-  font-size: 18px;
-  color: var(--vp-c-text-2);
-  line-height: 1.6;
-  margin: 0 0 var(--spacing-lg) 0;
-  max-width: 680px;
-}
-
-.home-hero-tech .hero-actions {
-  display: flex;
-  gap: var(--spacing-sm);
-  flex-wrap: wrap;
-}
-
-.home-hero-tech .hero-actions a {
-  display: inline-block;
-  padding: 10px 20px;
-  border-radius: var(--radius-sm);
-  font-size: 14px;
-  font-weight: 500;
-  text-decoration: none;
-  transition: all var(--transition-fast);
-}
-
-.home-hero-tech .hero-actions a.primary {
-  background: var(--vp-c-brand-1);
-  color: #fff;
-}
-
-.home-hero-tech .hero-actions a.primary:hover {
-  background: var(--vp-c-brand-2);
-}
-
-.home-hero-tech .hero-actions a.secondary {
-  border: 1px solid var(--vp-c-border);
-  color: var(--vp-c-text-1);
-  background: transparent;
-}
-
-.home-hero-tech .hero-actions a.secondary:hover {
-  border-color: var(--vp-c-brand-1);
-  color: var(--vp-c-brand-1);
-}
-
-/* === Homepage: Metrics Bar === */
-.home-metrics {
-  display: flex;
-  gap: var(--spacing-lg);
-  padding: var(--spacing-lg);
-  background: var(--vp-c-bg-soft);
-  border: 1px solid var(--vp-c-border);
-  border-radius: var(--radius-lg);
-  margin-bottom: var(--spacing-xl);
-}
-
-.home-metric {
-  flex: 1;
-  text-align: center;
-}
-
-.home-metric-value {
-  font-family: 'JetBrains Mono', monospace;
-  font-size: 28px;
-  font-weight: 700;
-  color: var(--vp-c-brand-1);
-  line-height: 1.2;
-}
-
-.home-metric-label {
-  font-size: 13px;
-  color: var(--vp-c-text-3);
-  margin-top: var(--spacing-xs);
-}
-
-/* === Homepage: Architecture === */
-.home-architecture {
-  margin-bottom: var(--spacing-xl);
-}
-
-.home-architecture .mermaid {
-  margin: 0;
-}
-
-/* === Homepage: Feature Map === */
-.feature-map {
-  display: grid;
-  grid-template-columns: repeat(3, 1fr);
-  gap: var(--spacing-md);
-  margin-bottom: var(--spacing-xl);
-}
-
-.feature-card {
-  background: var(--vp-c-bg);
-  border: 1px solid var(--vp-c-border);
-  border-radius: var(--radius-md);
-  padding: var(--spacing-md);
-  transition: border-color var(--transition-fast), box-shadow var(--transition-fast);
-}
-
-.feature-card:hover {
-  border-color: var(--vp-c-brand-1);
-  box-shadow: var(--spmv-card-shadow), 0 4px 12px rgba(118, 185, 0, 0.08);
-}
-
-.feature-card-title {
-  font-weight: 600;
-  font-size: 15px;
-  color: var(--vp-c-text-1);
-  margin-bottom: var(--spacing-sm);
-}
-
-.feature-card-desc {
-  font-size: 13px;
-  color: var(--vp-c-text-2);
-  line-height: 1.6;
-  margin-bottom: var(--spacing-sm);
-}
-
-.feature-tags {
-  display: flex;
-  flex-wrap: wrap;
-  gap: 6px;
-}
-
-.feature-tag {
-  font-size: 12px;
-  color: var(--tag-text);
-  text-decoration: none;
-  padding: 3px 8px;
-  background: var(--tag-bg);
-  border-radius: 4px;
-  transition: background var(--transition-fast);
-}
-
-.feature-tag:hover {
-  background: var(--vp-c-brand-soft);
-}
-
-/* === Performance Bars (CSS-based charts) === */
-.perf-bars {
-  margin: var(--spacing-xl) 0;
-  display: flex;
-  flex-direction: column;
-  gap: var(--spacing-xl);
-}
-
-.perf-bar-group {
-  padding: var(--spacing-lg);
-  background: var(--vp-c-bg-soft);
-  border: 1px solid var(--vp-c-border);
-  border-radius: var(--radius-lg);
-}
-
-.perf-bar-title {
-  font-size: 14px;
-  font-weight: 600;
-  color: var(--vp-c-text-1);
-  margin-bottom: var(--spacing-md);
-}
-
-.perf-row {
-  display: flex;
-  align-items: center;
-  gap: var(--spacing-md);
-  margin-bottom: var(--spacing-sm);
-}
-
-.perf-label {
-  width: 120px;
-  font-size: 13px;
-  color: var(--vp-c-text-2);
-  flex-shrink: 0;
-}
-
-.perf-bar {
-  flex: 1;
-  height: 20px;
-  background: var(--vp-c-bg);
-  border-radius: 4px;
-  position: relative;
-  border: 1px solid var(--vp-c-border);
-}
-
-.perf-bar::after {
-  content: '';
-  position: absolute;
-  left: 0;
-  top: 0;
-  height: 100%;
-  width: var(--width);
-  background: var(--vp-c-brand-1);
-  border-radius: 4px;
-  transition: width var(--transition-normal);
-}
-
-.perf-value {
-  width: 50px;
-  text-align: right;
-  font-size: 13px;
-  font-family: 'JetBrains Mono', monospace;
-  color: var(--vp-c-text-1);
-  flex-shrink: 0;
-}
-
-/* === Language Selector (root page fallback) === */
-.language-selector {
-  display: flex;
-  flex-direction: column;
-  align-items: center;
-  justify-content: center;
-  min-height: calc(100vh - var(--vp-nav-height));
-  padding: var(--spacing-2xl);
-  text-align: center;
-}
-
-.language-selector h1 {
-  font-size: 48px;
-  font-weight: 800;
-  margin-bottom: var(--spacing-md);
-  background: linear-gradient(135deg, var(--vp-c-brand-1) 0%, var(--spmv-accent) 100%);
-  -webkit-background-clip: text;
-  -webkit-text-fill-color: transparent;
-}
-
-.language-selector p {
-  font-size: 18px;
-  color: var(--vp-c-text-2);
-  margin-bottom: var(--spacing-2xl);
-}
-
-.language-cards {
-  display: grid;
-  grid-template-columns: repeat(2, 1fr);
-  gap: var(--spacing-lg);
-  max-width: 600px;
-}
-
-.language-card {
-  display: block;
-  padding: var(--spacing-xl);
-  background: var(--vp-c-bg-soft);
-  border: 1px solid var(--vp-c-border);
-  border-radius: var(--radius-xl);
-  text-decoration: none;
-  transition: all var(--transition-normal);
-}
-
-.language-card:hover {
-  border-color: var(--vp-c-brand-1);
-  transform: translateY(-4px);
-  box-shadow: 0 12px 24px -8px rgba(118, 185, 0, 0.2);
-}
-
-.language-card h2 {
-  font-size: 24px;
-  font-weight: 700;
-  color: var(--vp-c-text-1);
-  margin: 0 0 var(--spacing-sm) 0;
-  padding: 0;
-  border: none;
-}
-
-.language-card p {
-  font-size: 14px;
-  color: var(--vp-c-text-3);
-  margin: 0;
-}
-
-/* === Error Page === */
-.error-page h1 {
-  color: var(--vp-c-text-3);
-}
-
-.error-page .btn {
-  display: inline-block;
-  padding: 8px 20px;
-  background: var(--vp-c-brand-1);
-  color: #fff;
-  border-radius: var(--radius-sm);
-  text-decoration: none;
-  font-size: 14px;
-  font-weight: 500;
-  transition: background var(--transition-fast);
-}
-
-.error-page .btn:hover {
-  background: var(--vp-c-brand-2);
-}
-
-/* === Responsive === */
-@media (max-width: 960px) {
-  .VPHero {
-    padding: calc(var(--vp-nav-height) + 60px) var(--spacing-lg) 60px;
-  }
-
-  .VPHero .name {
-    font-size: 48px;
-  }
-
-  .VPHero .text {
-    font-size: 18px;
-  }
-
-  .VPDoc h1 {
-    font-size: 32px;
-  }
-
-  .VPDoc h2 {
-    font-size: 24px;
-  }
-
-  .home-metrics {
-    flex-wrap: wrap;
-  }
-
-  .home-metric {
-    min-width: 120px;
-  }
-}
-
-@media (max-width: 959px) {
-  .feature-map {
-    grid-template-columns: repeat(2, 1fr);
-  }
-
-  .language-cards {
-    grid-template-columns: 1fr;
-  }
-}
-
-@media (max-width: 640px) {
-  .VPHero .name {
-    font-size: 36px;
-    letter-spacing: -1px;
-  }
-
-  .VPFeatures .VPFeature {
-    padding: var(--spacing-lg);
-  }
-
-  .feature-map {
-    grid-template-columns: 1fr;
-  }
-
-  .home-header {
-    flex-direction: column;
-    align-items: flex-start;
-    gap: var(--spacing-sm);
-  }
-
-  .home-nav {
-    width: 100%;
-    justify-content: flex-start;
-  }
-
-  .home-hero-tech h1 {
-    font-size: 28px;
-  }
-
-  .home-metrics {
-    flex-direction: column;
-    gap: var(--spacing-md);
-  }
-
-  .perf-row {
-    flex-wrap: wrap;
-  }
-
-  .perf-label {
-    width: 100%;
-    margin-bottom: var(--spacing-xs);
-  }
-
-  .perf-bar {
-    flex: 1;
-  }
-}
-
-/* === Print === */
-@media print {
-  .VPNav,
-  .VPSidebar,
-  .VPFooter,
-  .home-header,
-  .home-nav {
-    display: none;
-  }
-
-  .VPDoc {
-    padding: 0;
-  }
-
-  .VPDoc .content {
-    max-width: 100%;
-  }
-
-  .VPDoc a {
-    border-bottom: none;
-  }
-
-  .VPDoc code {
-    background: transparent;
-    border: 1px solid #ddd;
-  }
-}
+@import './styles/tokens.css';
+@import './styles/base.css';
+@import './styles/home.css';
+@import './styles/paper.css';
+@import './styles/citation.css';
+@import './styles/diagram.css';
diff --git a/docs/.vitepress/theme/styles/base.css b/docs/.vitepress/theme/styles/base.css
new file mode 100644
index 0000000..0fb1757
--- /dev/null
+++ b/docs/.vitepress/theme/styles/base.css
@@ -0,0 +1,67 @@
+.spmv-layout-top {
+  position: fixed;
+  inset: 0 0 auto 0;
+  height: 420px;
+  pointer-events: none;
+  background:
+    radial-gradient(circle at top left, rgba(118, 185, 0, 0.14), transparent 30%),
+    radial-gradient(circle at top right, rgba(0, 212, 170, 0.12), transparent 28%);
+  z-index: 0;
+}
+
+.Layout,
+.VPContent,
+.VPDoc {
+  position: relative;
+  z-index: 1;
+}
+
+.VPContent.is-home {
+  background:
+    linear-gradient(180deg, color-mix(in srgb, var(--spmv-surface-2) 88%, transparent) 0%, transparent 100%);
+}
+
+.vp-doc h1,
+.vp-doc h2,
+.vp-doc h3 {
+  letter-spacing: -0.02em;
+}
+
+.vp-doc p,
+.vp-doc li {
+  color: var(--spmv-ink-2);
+}
+
+.spmv-card-grid {
+  display: grid;
+  gap: var(--spmv-grid-gap);
+}
+
+.spmv-card-grid.cols-2 {
+  grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
+}
+
+.spmv-card-grid.cols-3 {
+  grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
+}
+
+.spmv-surface-card {
+  background: color-mix(in srgb, var(--spmv-surface-1) 92%, transparent);
+  border: 1px solid var(--spmv-border);
+  border-radius: var(--spmv-radius-lg);
+  box-shadow: var(--spmv-shadow-md);
+}
+
+.spmv-eyebrow {
+  display: inline-flex;
+  align-items: center;
+  gap: 8px;
+  padding: 8px 12px;
+  border-radius: 999px;
+  background: rgba(118, 185, 0, 0.12);
+  color: var(--vp-c-brand-1);
+  font-size: 12px;
+  font-weight: 700;
+  letter-spacing: 0.08em;
+  text-transform: uppercase;
+}
diff --git a/docs/.vitepress/theme/styles/citation.css b/docs/.vitepress/theme/styles/citation.css
new file mode 100644
index 0000000..9a84fea
--- /dev/null
+++ b/docs/.vitepress/theme/styles/citation.css
@@ -0,0 +1,25 @@
+.spmv-citation-grid {
+  display: grid;
+  grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
+  gap: 18px;
+  margin: 24px 0;
+}
+
+.spmv-citation-card {
+  padding: 22px;
+}
+
+.spmv-citation-card h3 {
+  margin: 0 0 10px;
+  font-size: 1.05rem;
+  color: var(--spmv-ink-1);
+}
+
+.spmv-citation-card p {
+  margin: 0 0 10px;
+  font-size: 0.95rem;
+}
+
+.spmv-citation-card a {
+  font-weight: 700;
+}
diff --git a/docs/.vitepress/theme/styles/diagram.css b/docs/.vitepress/theme/styles/diagram.css
new file mode 100644
index 0000000..ed3a894
--- /dev/null
+++ b/docs/.vitepress/theme/styles/diagram.css
@@ -0,0 +1,54 @@
+.spmv-architecture-canvas,
+.theme-aware-art {
+  width: 100%;
+  display: block;
+}
+
+.spmv-architecture-shell {
+  padding: 22px;
+}
+
+.spmv-node {
+  fill: color-mix(in srgb, var(--spmv-surface-1) 94%, transparent);
+  stroke: var(--spmv-border);
+  stroke-width: 1.5;
+}
+
+.spmv-node-text {
+  fill: var(--spmv-ink-1);
+  font: 700 14px Inter, sans-serif;
+}
+
+.spmv-node-caption {
+  fill: var(--spmv-ink-3);
+  font: 500 12px Inter, sans-serif;
+}
+
+.spmv-link {
+  stroke: color-mix(in srgb, var(--spmv-brand-1) 55%, var(--spmv-border));
+  stroke-width: 2;
+  fill: none;
+}
+
+.theme-aware-art .art-bg {
+  fill: color-mix(in srgb, var(--spmv-surface-2) 92%, transparent);
+  stroke: var(--spmv-border);
+}
+
+.theme-aware-art .art-accent-1 {
+  fill: var(--spmv-brand-1);
+}
+
+.theme-aware-art .art-accent-2 {
+  fill: var(--spmv-brand-2);
+}
+
+.theme-aware-art .art-title {
+  fill: var(--spmv-ink-1);
+  font: 700 24px Inter, sans-serif;
+}
+
+.theme-aware-art .art-caption {
+  fill: var(--spmv-ink-2);
+  font: 500 14px Inter, sans-serif;
+}
diff --git a/docs/.vitepress/theme/styles/home.css b/docs/.vitepress/theme/styles/home.css
new file mode 100644
index 0000000..6810239
--- /dev/null
+++ b/docs/.vitepress/theme/styles/home.css
@@ -0,0 +1,89 @@
+.spmv-hero {
+  display: grid;
+  grid-template-columns: minmax(0, 1.2fr) minmax(320px, 0.8fr);
+  gap: 32px;
+  align-items: stretch;
+  margin: 24px 0 48px;
+}
+
+.spmv-hero-copy,
+.spmv-hero-art {
+  padding: 32px;
+}
+
+.spmv-hero-copy h1 {
+  margin: 18px 0 16px;
+  font-size: clamp(2.4rem, 5vw, 4.3rem);
+  line-height: 1.02;
+  color: var(--spmv-ink-1);
+}
+
+.spmv-hero-copy p {
+  margin: 0;
+  font-size: 1.08rem;
+  line-height: 1.75;
+}
+
+.spmv-hero-actions {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 14px;
+  margin-top: 28px;
+}
+
+.spmv-hero-actions a {
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  min-width: 160px;
+  padding: 13px 18px;
+  border-radius: 999px;
+  border: 1px solid var(--spmv-border);
+  font-weight: 700;
+  text-decoration: none;
+  transition: transform 0.2s ease, border-color 0.2s ease;
+}
+
+.spmv-hero-actions a.primary {
+  background: linear-gradient(135deg, var(--spmv-brand-1), var(--spmv-brand-2));
+  color: #ffffff;
+  border-color: transparent;
+}
+
+.spmv-hero-actions a.secondary {
+  color: var(--spmv-ink-1);
+}
+
+.spmv-hero-actions a:hover {
+  transform: translateY(-1px);
+  border-color: color-mix(in srgb, var(--spmv-brand-1) 50%, var(--spmv-border));
+}
+
+.spmv-metric-strip {
+  display: grid;
+  grid-template-columns: repeat(auto-fit, minmax(170px, 1fr));
+  gap: 16px;
+  margin-top: 24px;
+}
+
+.spmv-metric-card {
+  padding: 20px;
+}
+
+.spmv-metric-value {
+  font-size: 1.8rem;
+  font-weight: 800;
+  color: var(--spmv-ink-1);
+}
+
+.spmv-metric-label {
+  margin-top: 8px;
+  font-size: 0.94rem;
+  color: var(--spmv-ink-2);
+}
+
+@media (max-width: 960px) {
+  .spmv-hero {
+    grid-template-columns: 1fr;
+  }
+}
diff --git a/docs/.vitepress/theme/styles/paper.css b/docs/.vitepress/theme/styles/paper.css
new file mode 100644
index 0000000..76b313e
--- /dev/null
+++ b/docs/.vitepress/theme/styles/paper.css
@@ -0,0 +1,44 @@
+.spmv-section {
+  margin: 36px 0;
+  padding: 28px;
+}
+
+.spmv-section h2 {
+  margin: 14px 0 12px;
+  font-size: 1.85rem;
+  color: var(--spmv-ink-1);
+}
+
+.spmv-section p {
+  margin: 0;
+}
+
+.spmv-callout {
+  margin: 24px 0;
+  padding: 20px 22px;
+  border-radius: var(--spmv-radius-md);
+  border: 1px solid var(--spmv-border);
+  background: color-mix(in srgb, var(--spmv-surface-1) 92%, transparent);
+}
+
+.spmv-callout[data-tone='success'] {
+  border-color: rgba(118, 185, 0, 0.35);
+  background: rgba(118, 185, 0, 0.08);
+}
+
+.spmv-callout[data-tone='info'] {
+  border-color: rgba(0, 212, 170, 0.3);
+  background: rgba(0, 212, 170, 0.08);
+}
+
+.spmv-callout[data-tone='warning'] {
+  border-color: rgba(251, 191, 36, 0.35);
+  background: rgba(251, 191, 36, 0.08);
+}
+
+.spmv-callout-title {
+  margin: 0 0 8px;
+  font-size: 1rem;
+  font-weight: 800;
+  color: var(--spmv-ink-1);
+}
diff --git a/docs/.vitepress/theme/styles/tokens.css b/docs/.vitepress/theme/styles/tokens.css
new file mode 100644
index 0000000..276554f
--- /dev/null
+++ b/docs/.vitepress/theme/styles/tokens.css
@@ -0,0 +1,40 @@
+:root {
+  --vp-c-brand-1: #76b900;
+  --vp-c-brand-2: #5a8f00;
+  --vp-c-brand-3: #93d522;
+  --vp-c-brand-soft: rgba(118, 185, 0, 0.14);
+
+  --spmv-brand-1: #76b900;
+  --spmv-brand-2: #00d4aa;
+  --spmv-ink-1: #0f172a;
+  --spmv-ink-2: #334155;
+  --spmv-ink-3: #64748b;
+  --spmv-surface-1: #ffffff;
+  --spmv-surface-2: #f8fafc;
+  --spmv-surface-3: #eef2f7;
+  --spmv-border: rgba(15, 23, 42, 0.12);
+  --spmv-shadow-lg: 0 24px 64px rgba(15, 23, 42, 0.12);
+  --spmv-shadow-md: 0 12px 30px rgba(15, 23, 42, 0.08);
+  --spmv-radius-xl: 24px;
+  --spmv-radius-lg: 20px;
+  --spmv-radius-md: 16px;
+  --spmv-radius-sm: 12px;
+  --spmv-max-width: 1240px;
+  --spmv-grid-gap: 24px;
+}
+
+.dark {
+  --vp-c-brand-1: #93d522;
+  --vp-c-brand-2: #76b900;
+  --vp-c-brand-3: #5a8f00;
+
+  --spmv-ink-1: #f8fafc;
+  --spmv-ink-2: #cbd5e1;
+  --spmv-ink-3: #94a3b8;
+  --spmv-surface-1: #020617;
+  --spmv-surface-2: #0f172a;
+  --spmv-surface-3: #162033;
+  --spmv-border: rgba(148, 163, 184, 0.2);
+  --spmv-shadow-lg: 0 24px 64px rgba(2, 6, 23, 0.5);
+  --spmv-shadow-md: 0 12px 30px rgba(2, 6, 23, 0.35);
+}
diff --git a/docs/en/architecture/execution-pipeline.md b/docs/en/architecture/execution-pipeline.md
new file mode 100644
index 0000000..892f585
--- /dev/null
+++ b/docs/en/architecture/execution-pipeline.md
@@ -0,0 +1,28 @@
+# Execution Pipeline
+
+## Why this deserves its own page
+
+GPU SpMV is not just “launch a kernel.” The real engineering story is **how the matrix is analyzed, how kernel choice is made, how execution context is reused, and how the result is interpreted with confidence**.
+
+## Pipeline Breakdown
+
+1. **Input stage**: load CSR / ELL data structures and prepare the input vector.
+2. **Analysis stage**: compute `avg_nnz_per_row`, skewness, and row distribution characteristics.
+3. **Decision stage**: choose Scalar CSR, Vector CSR, Merge Path, or ELL.
+4. **Execution stage**: launch the GPU kernel and record timing / bandwidth metrics.
+5. **Validation stage**: compare against CPU reference behavior or established baselines.
+
+## Key Decisions
+
+| Observation | Decision |
+|:------------|:---------|
+| `avg_nnz_per_row < 4` | Scalar CSR to avoid wasting warp-scale resources |
+| Rows are uniform and low-skew | Vector CSR for stronger warp collaboration |
+| Row lengths are highly skewed | Merge Path to prioritize load balance |
+| Row width is nearly fixed | ELL kernel to prioritize coalesced access |
+
+## Read this together with
+
+- [Kernel Selection](/en/architecture/kernel-selection)
+- [Memory Layout](/en/architecture/memory-layout)
+- [Performance Methodology](/en/performance/methodology)
diff --git a/docs/en/architecture/overview.md b/docs/en/architecture/overview.md
index d357561..9c1a962 100644
--- a/docs/en/architecture/overview.md
+++ b/docs/en/architecture/overview.md
@@ -1,6 +1,6 @@
 # Architecture Overview
 
-GPU SpMV uses a layered architecture design with clear separation of storage, computation, and application layers.
+The architectural story of GPU SpMV is not just “what modules exist,” but **how matrix statistics, kernel choice, execution context, and validation fit together into an explainable engineering system**.
 
 ## System Architecture
 
@@ -84,8 +84,16 @@ Applications built on SpMV:
 - **Graph Neural Networks** — Sparse graph convolution
 - **Scientific Computing** — FEM, CFD
 
+## The three most important ideas on this page
+
+1. **How data flows** from sparse input to validated output.
+2. **Why automatic selection is justified** by `avg_nnz_per_row` and skewness rather than opaque tuning.
+3. **Why the system is trustworthy** thanks to resource management, semantic errors, CPU reference paths, and property tests.
+
 ## Related Documentation
 
 - [Kernel Selection](/en/architecture/kernel-selection)
+- [Execution Pipeline](/en/architecture/execution-pipeline)
 - [Memory Layout](/en/architecture/memory-layout)
+- [Reliability Constraints](/en/architecture/reliability)
 - [Spec-Driven Development](/en/architecture/spec-driven)
diff --git a/docs/en/architecture/reliability.md b/docs/en/architecture/reliability.md
new file mode 100644
index 0000000..4020d05
--- /dev/null
+++ b/docs/en/architecture/reliability.md
@@ -0,0 +1,25 @@
+# Reliability and Engineering Constraints
+
+## Where reliability comes from
+
+Reliability in this project is not “it seems to run.” It comes from three lines of evidence working together:
+
+1. **Explicit resource lifetime** through `CudaBuffer<T>` and execution-context abstractions instead of raw `cudaMalloc` / `cudaFree`.
+2. **Explicit error semantics** through `SpMVError` and CUDA checking macros.
+3. **Spec and test closure** through OpenSpec requirements and property-test coverage.
+
+## Why this matters for a showcase project
+
+Interviewers and open-source readers trust a project more when it can answer:
+
+- How are resources released?
+- How are failures surfaced?
+- How should benchmark evidence be interpreted?
+- How are design changes traced?
+
+## Reliability Checklist
+
+- RAII wrappers for GPU resources
+- CPU reference paths for cross-checking
+- property tests with at least 100 iterations
+- GitHub Pages used to publish architecture and references, not only marketing copy
diff --git a/docs/en/architecture/spec-driven.md b/docs/en/architecture/spec-driven.md
index 8593ee9..0c14fc5 100644
--- a/docs/en/architecture/spec-driven.md
+++ b/docs/en/architecture/spec-driven.md
@@ -89,5 +89,5 @@ Demonstrating Spec-Driven Development in interviews:
 
 ## References
 
-- [OpenSpec Specs](https://github.com/LessUp/gpu-spmv/tree/main/openspec)
+- [OpenSpec Specs](https://github.com/AICL-Lab/gpu-spmv/tree/main/openspec)
 - [Architecture Overview](/en/architecture/overview)
\ No newline at end of file
diff --git a/docs/en/changelog.md b/docs/en/changelog.md
index c8c2e74..35a0798 100644
--- a/docs/en/changelog.md
+++ b/docs/en/changelog.md
@@ -46,7 +46,7 @@ This is the first stable release of GPU SpMV, featuring complete CSR and ELL for
 - Doxygen-compatible documentation
 
 #### Documentation
-- Full documentation site at https://lessup.github.io/gpu-spmv/
+- Full documentation site at https://aicl-lab.github.io/gpu-spmv/
 - Bilingual README (English and Chinese)
 - API reference, performance guide, and code examples
 - Architecture documentation and design decision records
@@ -144,5 +144,5 @@ No breaking changes from pre-release versions. The API is now stable.
 
 ---
 
-[1.0.0]: https://github.com/LessUp/gpu-spmv/releases/tag/v1.0.0
-[0.1.0]: https://github.com/LessUp/gpu-spmv/tree/7d6dd0c
+[1.0.0]: https://github.com/AICL-Lab/gpu-spmv/releases/tag/v1.0.0
+[0.1.0]: https://github.com/AICL-Lab/gpu-spmv/tree/7d6dd0c
diff --git a/docs/en/citation.md b/docs/en/citation.md
index b818426..f87ce71 100644
--- a/docs/en/citation.md
+++ b/docs/en/citation.md
@@ -6,11 +6,11 @@ If you use GPU SpMV in your research, please cite:
 
 ```bibtex
 @software{gpu_spmv_2026,
-  author = {LessUp},
+  author = {AICL-Lab},
   title = {GPU SpMV: High-Performance CUDA Sparse Matrix-Vector Multiplication},
   year = {2026},
   publisher = {GitHub},
-  url = {https://github.com/LessUp/gpu-spmv},
+  url = {https://github.com/AICL-Lab/gpu-spmv},
   version = {1.0.0}
 }
 ```
@@ -18,61 +18,14 @@ If you use GPU SpMV in your research, please cite:
 ## Text Format
 
 ```
-LessUp. GPU SpMV: High-Performance CUDA Sparse Matrix-Vector Multiplication.
-GitHub repository, 2026. https://github.com/LessUp/gpu-spmv
+AICL-Lab. GPU SpMV: High-Performance CUDA Sparse Matrix-Vector Multiplication.
+GitHub repository, 2026. https://github.com/AICL-Lab/gpu-spmv
 ```
 
 ---
 
-## Related Publications
+## Usage Guidance
 
-The algorithms implemented in this library are based on the following research:
-
-### Merge Path Algorithm
-
-1. **Merrill, D., & Garland, M. (2016)**. Merge-based parallel sparse matrix-vector multiplication. *Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (SC '16)*. IEEE.
-
-   ::: tip Key Contribution
-   The Merge Path algorithm enables perfect load balancing for irregular sparse matrices by partitioning work based on the merge operation between row pointers and work indices.
-   :::
-
-### Vectorized CSR
-
-2. **Bell, N., & Garland, M. (2009)**. Implementing sparse matrix-vector multiplication on throughput-oriented processors. *Proceedings of SC '09*. IEEE.
-
-3. **Bell, N., Dalton, S., & Olson, L. N. (2012)**. Exposing fine-grained parallelism in algebraic multigrid methods. *SIAM Journal on Scientific Computing*, 34(4), C170-C194.
-
-### ELL Format
-
-4. **Vázquez, F., Fernández, J. J., & Garzón, E. M. (2011)**. Automatic tuning of the sparse matrix vector product on GPUs based on the ELL-R-T format. *Concurrency and Computation: Practice and Experience*, 24(1), 1-20.
-
----
-
-## Algorithm References
-
-| Algorithm | Reference | Key Idea |
-|:----------|:----------|:---------|
-| Scalar CSR | Bell & Garland (2009) | One thread per row |
-| Vector CSR | Bell & Garland (2009) | One warp per row |
-| Merge Path | Merrill & Garland (2016) | Merge-based partitioning |
-| ELL Kernel | Vázquez et al. (2011) | Column-major coalesced access |
-
----
-
-## Benchmark Methodology
-
-Our benchmark methodology follows best practices from:
-
-- **SPAPT Benchmark Suite**: Standardized performance assessment for sparse computations
-- **SuiteSparse Matrix Collection**: Real-world test matrices
-- **GPU Performance Metrics**: Memory bandwidth utilization as primary metric
-
----
-
-## Acknowledgments
-
-This library builds upon the excellent work of the CUDA ecosystem:
-
-- NVIDIA cuSPARSE for reference implementations
-- Thrust library for parallel primitives
-- Google Test for testing infrastructure
\ No newline at end of file
+- Use the software citation above when citing the **repository itself**.
+- Also cite the relevant papers from [References](/en/references) when discussing the **algorithms behind the implementation**.
+- If you cite performance charts or benchmark claims, mention the hardware and link back to [Performance Methodology](/en/performance/methodology).
diff --git a/docs/en/contributing.md b/docs/en/contributing.md
index 3f7fb89..eec5015 100644
--- a/docs/en/contributing.md
+++ b/docs/en/contributing.md
@@ -14,7 +14,7 @@ Thank you for your interest in contributing to GPU SpMV!
 ### Clone and Build
 
 ```bash
-git clone https://github.com/LessUp/gpu-spmv.git
+git clone https://github.com/AICL-Lab/gpu-spmv.git
 cd gpu-spmv
 cmake --preset default
 cmake --build --preset default
@@ -79,7 +79,7 @@ npm run dev
 
 ## Getting Help
 
-- Open an [Issue](https://github.com/LessUp/gpu-spmv/issues)
+- Open an [Issue](https://github.com/AICL-Lab/gpu-spmv/issues)
 - Check existing documentation
 - Review OpenSpec specs
 
diff --git a/docs/en/faq.md b/docs/en/faq.md
index f4f199d..b53b81b 100644
--- a/docs/en/faq.md
+++ b/docs/en/faq.md
@@ -198,4 +198,4 @@ If the above doesn't answer your question:
 
 1. Check [API Reference](/en/api/spmv) for detailed usage
 2. Check [Optimization Guide](/en/performance/optimization-guide) for performance tips
-3. Ask on [GitHub Issues](https://github.com/LessUp/gpu-spmv/issues)
+3. Ask on [GitHub Issues](https://github.com/AICL-Lab/gpu-spmv/issues)
diff --git a/docs/en/index.md b/docs/en/index.md
index 0b68138..7e53728 100644
--- a/docs/en/index.md
+++ b/docs/en/index.md
@@ -1,134 +1,53 @@
 ---
 layout: home
+title: GPU SpMV Technical Whitepaper
 ---
 
-<div class="home-header">
-  <div class="home-header-left">
-    <div class="home-logo">GPU</div>
-    <div>
-      <span class="home-title">GPU SpMV</span>
-      <span class="home-subtitle">Technical Whitepaper</span>
-    </div>
-  </div>
-  <div class="home-nav">
-    <a href="./whitepaper/">Whitepaper</a>
-    <a href="https://github.com/LessUp/gpu-spmv">GitHub</a>
-    <a href="../zh/">中文</a>
-  </div>
-</div>
-
-<div class="home-hero-tech">
-  <h1>Production-Grade CUDA Sparse Matrix-Vector Multiplication</h1>
-  <p class="hero-tagline">
-    High-performance SpMV achieving <strong>70%+ theoretical memory bandwidth</strong> on modern NVIDIA GPUs.
-    4 adaptive kernels, intelligent selection algorithm, comprehensive API.
-  </p>
-  <div class="hero-actions">
-    <a href="./whitepaper/" class="primary">Read the Whitepaper</a>
-    <a href="./quickstart" class="secondary">Quick Start</a>
-  </div>
-</div>
-
-<div class="home-metrics">
-  <div class="home-metric">
-    <div class="home-metric-value">70%+</div>
-    <div class="home-metric-label">Bandwidth Utilization</div>
-  </div>
-  <div class="home-metric">
-    <div class="home-metric-value">4</div>
-    <div class="home-metric-label">Adaptive Kernels</div>
-  </div>
-  <div class="home-metric">
-    <div class="home-metric-value">CSR+ELL</div>
-    <div class="home-metric-label">Sparse Formats</div>
-  </div>
-  <div class="home-metric">
-    <div class="home-metric-value">100+</div>
-    <div class="home-metric-label">Test Cases</div>
-  </div>
-</div>
-
-## Architecture Overview
-
-<div class="home-architecture">
-
-```mermaid
-flowchart LR
-    Input[Sparse Matrix] --> Analysis[Matrix Analysis]
-    Analysis --> Decision{Auto Select}
-    Decision -->|avg_nnz < 4| Scalar[Scalar CSR]
-    Decision -->|uniform rows| Vector[Vector CSR]
-    Decision -->|high skew| Merge[Merge Path]
-    Decision -->|column-major| ELL[ELL Kernel]
-    Scalar --> GPU[GPU Execution]
-    Vector --> GPU
-    Merge --> GPU
-    ELL --> GPU
-    GPU --> Result[Result Vector]
-```
-
-</div>
-
-## Technical Features
-
-<div class="feature-map">
-  <div class="feature-card">
-    <div class="feature-card-title">Kernel Selection Strategy</div>
-    <div class="feature-card-desc">
-      Automatic kernel selection based on matrix characteristics: avg_nnz, row length skewness.
-    </div>
-    <div class="feature-tags">
-      <a href="./architecture/kernel-selection" class="feature-tag">Details</a>
-    </div>
-  </div>
-
-  <div class="feature-card">
-    <div class="feature-card-title">Merge Path Algorithm</div>
-    <div class="feature-card-desc">
-      Perfect load balancing for irregular sparsity patterns. O(nnz + m) work decomposition.
-    </div>
-    <div class="feature-tags">
-      <a href="./whitepaper/philosophy" class="feature-tag">Philosophy</a>
-    </div>
-  </div>
-
-  <div class="feature-card">
-    <div class="feature-card-title">Production Quality</div>
-    <div class="feature-card-desc">
-      RAII resource management, semantic error codes, CudaBuffer abstraction, cross-platform.
-    </div>
-    <div class="feature-tags">
-      <a href="./api/spmv" class="feature-tag">API</a>
-    </div>
-  </div>
-
-  <div class="feature-card">
-    <div class="feature-card-title">Spec-Driven Development</div>
-    <div class="feature-card-desc">
-      OpenSpec specification-driven workflow. Design decisions traceable, documentation as code.
-    </div>
-    <div class="feature-tags">
-      <a href="./architecture/spec-driven" class="feature-tag">Workflow</a>
-    </div>
-  </div>
-
-  <div class="feature-card">
-    <div class="feature-card-title">Academic Rigor</div>
-    <div class="feature-card-desc">
-      Complete academic citation support, BibTeX format, related paper references.
-    </div>
-    <div class="feature-tags">
-      <a href="./citation" class="feature-tag">Citation</a>
-    </div>
-  </div>
-
-  <div class="feature-card">
-    <div class="feature-card-title">Quick Start</div>
-    <div class="feature-card-desc">
-      <code>git clone https://github.com/LessUp/gpu-spmv.git</code>
-    </div>
-    <div class="feature-tags">
-      <a href="./quickstart" class="feature-tag">Guide</a>
-    </div>
-  </div>
-</div>
+<script setup lang="ts">
+import { siteData } from '../.vitepress/data/site'
+</script>
+
+<HeroEvidence
+  eyebrow="Technical Whitepaper"
+  :title="siteData.en.heroTitle"
+  :lead="siteData.en.heroLead"
+  :metrics="siteData.en.metrics"
+  primary-label="Read the Whitepaper"
+  primary-link="/en/whitepaper/"
+  secondary-label="View Architecture"
+  secondary-link="/en/architecture/overview"
+>
+  <ThemeAwareArt
+    title="Readable, Verifiable, Presentable"
+    caption="The site explains not only what the project does, but why its design and validation deserve attention."
+  />
+</HeroEvidence>
+
+<WhitepaperSection
+  eyebrow="Architecture"
+  title="Lead with conclusions, then evidence, then implementation"
+  lead="The landing page should help a reader decide quickly whether this project is worth deeper reading."
+>
+  <ArchitectureCanvas variant="overview-en" />
+</WhitepaperSection>
+
+<WhitepaperSection
+  eyebrow="Highlights"
+  title="Why this project is strong as a showcase"
+  lead="Because it combines CUDA performance work with engineering discipline, explainability, and documentation quality."
+>
+  <div class="spmv-card-grid cols-3">
+    <article class="spmv-surface-card spmv-section">
+      <h3>Performance-first</h3>
+      <p>Kernel choice, irregular sparsity behavior, and bandwidth utilization are presented as explicit decisions.</p>
+    </article>
+    <article class="spmv-surface-card spmv-section">
+      <h3>Engineering clarity</h3>
+      <p>The execution pipeline, memory layout, reliability story, and spec-driven workflow are all visible.</p>
+    </article>
+    <article class="spmv-surface-card spmv-section">
+      <h3>Interview-ready narrative</h3>
+      <p>A reviewer can understand the value proposition, evidence chain, and reading path directly from the site.</p>
+    </article>
+  </div>
+</WhitepaperSection>
diff --git a/docs/en/performance/benchmarks.md b/docs/en/performance/benchmarks.md
index d22e756..8a5a022 100644
--- a/docs/en/performance/benchmarks.md
+++ b/docs/en/performance/benchmarks.md
@@ -1,6 +1,12 @@
 # Benchmarks
 
-GPU SpMV performance test results on NVIDIA RTX 3090.
+<script setup lang="ts">
+import { benchmarkData } from '../../.vitepress/data/benchmarks'
+</script>
+
+This benchmark page is not only a table of numbers. Its purpose is to explain **what these results actually mean and how they should be interpreted**.
+
+<MetricStrip :items="benchmarkData.summary" />
 
 ## Test Environment
 
@@ -88,29 +94,15 @@ SpMV is memory bandwidth bound. Our implementation achieves 70%+ of theoretical
 - **Ampere (SM 8.6)**: Best performance
 - **Hopper (SM 9.0)**: Full support
 
-## Benchmark Method
-
-```cpp
-#include <spmv/benchmark.h>
-
-int main() {
-    CSRMatrix* csr = /* ... */;
-    csr_to_gpu(csr);
-
-    // Multiple runs for average
-    BenchmarkResult result = benchmark_spmv(csr, 100);
-
-    printf("Avg time: %.3f ms\n", result.avg_ms);
-    printf("Min time: %.3f ms\n", result.min_ms);
-    printf("Max time: %.3f ms\n", result.max_ms);
-    printf("Stddev: %.3f ms\n", result.stddev_ms);
-    printf("Bandwidth: %.1f GB/s\n", result.bandwidth_gb_s);
+## How to read these results
 
-    return 0;
-}
-```
+- **70%+ utilization** means the implementation is approaching a sensible memory-bound ceiling.
+- **ELL winning on regular patterns** does not mean it should be used universally; applicability and conversion cost still matter.
+- **Merge Path staying ahead on skewed matrices** is evidence that load balancing is the dominant concern there.
+- **The selector matters** because it turns those judgments into default behavior instead of a manual tuning burden.
 
 ## References
 
+- [Performance Methodology](/en/performance/methodology)
 - [Optimization Guide](/en/performance/optimization-guide)
-- [Kernel Selection](/en/architecture/kernel-selection)
\ No newline at end of file
+- [Kernel Selection](/en/architecture/kernel-selection)
diff --git a/docs/en/performance/methodology.md b/docs/en/performance/methodology.md
new file mode 100644
index 0000000..2ad695c
--- /dev/null
+++ b/docs/en/performance/methodology.md
@@ -0,0 +1,19 @@
+# Performance Methodology
+
+## Read the method before the numbers
+
+Benchmark numbers are not persuasive on their own. This page explains **under what conditions the measurements make sense, how they should be read, and which conclusions are safe to draw**.
+
+## Measurement Assumptions
+
+- GPU: NVIDIA RTX 3090 (Ampere)
+- Peak bandwidth: 936 GB/s
+- Primary metrics: time, bandwidth, utilization, variance
+- Main comparison: kernel choice across different sparsity patterns
+
+## Recommended Reading Order
+
+1. **Look for trends, not only peaks**: does the implementation stay near 70%+ utilization consistently?
+2. **Read matrix pattern together with kernel choice**: regular and highly skewed matrices should not be judged the same way.
+3. **Check whether the selector is explainable**: does the chosen kernel match the matrix statistics?
+4. **Look at variance**: a high average with unstable spread is weaker evidence.
diff --git a/docs/en/quickstart.md b/docs/en/quickstart.md
index 1a409da..7f9512d 100644
--- a/docs/en/quickstart.md
+++ b/docs/en/quickstart.md
@@ -24,7 +24,7 @@ nvidia-smi
 ### 1. Clone Repository
 
 ```bash
-git clone https://github.com/LessUp/gpu-spmv.git
+git clone https://github.com/AICL-Lab/gpu-spmv.git
 cd gpu-spmv
 ```
 
diff --git a/docs/en/references.md b/docs/en/references.md
index 8b18a4e..1ed799a 100644
--- a/docs/en/references.md
+++ b/docs/en/references.md
@@ -1,78 +1,21 @@
-# Academic References
+# References
 
-GPU SpMV is built upon the following academic research.
+<script setup lang="ts">
+import { references } from '../.vitepress/data/references'
+</script>
 
-## Core Algorithms
+This page separates papers, comparable projects, and follow-up reading so readers can understand **what this project learned from and what ecosystem it belongs to**.
 
-### Merge-based Parallel SpMV
+## Core Papers
 
-> Merrill, D., & Garland, M. (2016). **Merge-based parallel sparse matrix-vector multiplication**. *ACM SIGPLAN Notices*, 51(8), 12-21.
+<CitationGrid :items="references.papers" />
 
-- **Contribution**: Proposed the Merge Path algorithm for perfect load balancing
-- **Applied to**: `MERGE_PATH` kernel
-- [DOI: 10.1145/3016078.285114](https://doi.org/10.1145/3016078.285114)
+## Representative Projects
 
-### Bell & Garland Survey
+<CitationGrid :items="references.projects" />
 
-> Bell, N., & Garland, M. (2009). **Implementing sparse matrix-vector multiplication on throughput-oriented processors**. *SC'09: Proceedings of the Conference on High Performance Computing Networking, Storage and Analysis*.
+## How to read these references
 
-- **Contribution**: CSR vs ELL format performance analysis, foundational GPU SpMV theory
-- **Applied to**: `VECTOR_CSR`, `ELL_KERNEL` design
-- [DOI: 10.1145/1654059.1654121](https://doi.org/10.1145/1654059.1654121)
-
-### CSR5 Format
-
-> Liu, Y., & Vuduc, R. (2018). **An adaptive algorithm for sparse matrix-vector multiplication on GPUs**. *IEEE Transactions on Parallel and Distributed Systems*.
-
-- **Contribution**: CSR5 format with adaptive load balancing
-- **Reference**: Understanding load distribution in irregular sparse matrices
-
-## GPU Computing
-
-### CUDA Best Practices
-
-> NVIDIA. (2024). **CUDA C++ Best Practices Guide**.
-
-- **Reference**: Memory coalescing, texture cache, warp synchronization
-- [Link](https://docs.nvidia.com/cuda/cuda-c-best-practices-guide/)
-
-### CUDA Programming Guide
-
-> NVIDIA. (2024). **CUDA C++ Programming Guide**.
-
-- **Reference**: CUDA execution model, memory hierarchy
-- [Link](https://docs.nvidia.com/cuda/cuda-c-programming-guide/)
-
-## PageRank
-
-> Page, L., Brin, S., Motwani, R., & Winograd, T. (1999). **The PageRank citation ranking: Bringing order to the web**. *Stanford InfoLab*.
-
-- **Contribution**: Original PageRank algorithm
-- **Applied to**: `pagerank()` implementation
-
-## Related Projects
-
-| Project | Stars | Description | Key Takeaway |
-|:--------|:-----:|:------------|:-------------|
-| [Ginkgo](https://github.com/ginkgo-project/ginkgo) | 597 | High-performance linear algebra | Performance visualization |
-| [cuSPARSE](https://docs.nvidia.com/cuda/cusparse/) | N/A | NVIDIA official library | Performance baseline |
-| [SuiteSparse](https://github.com/DrTimothyAldenDavis/SuiteSparse) | 947 | Sparse matrix collection | Standard test data |
-| [Kokkos Kernels](https://github.com/kokkos/kokkos-kernels) | 300+ | Multi-backend sparse BLAS | Performance portability |
-
-## Cite This Project
-
-```bibtex
-@software{gpuspmv2024,
-  author = {LessUp},
-  title = {GPU SpMV: High-Performance CUDA Sparse Matrix-Vector Multiplication},
-  year = {2024},
-  url = {https://github.com/LessUp/gpu-spmv}
-}
-```
-
-## Further Reading
-
-1. **GPU Architecture**: Understanding GPU memory hierarchy and execution model
-2. **Sparse Matrix Formats**: Trade-offs between different formats
-3. **Load Balancing**: Techniques for parallel load balancing
-4. **Memory Coalescing**: GPU memory access optimization
+1. Start with **Bell & Garland** for the classic GPU SpMV framing.
+2. Read **Merrill & Garland** to understand why Merge Path matters for irregular work distribution.
+3. Compare against **cuSPARSE / Ginkgo / SuiteSparse** to place this project inside the real sparse-computing ecosystem.
diff --git a/docs/en/whitepaper/index.md b/docs/en/whitepaper/index.md
index 705135d..9906b3d 100644
--- a/docs/en/whitepaper/index.md
+++ b/docs/en/whitepaper/index.md
@@ -1,100 +1,28 @@
-# Technical Whitepaper
+# GPU SpMV: Read the project as an engineering artifact
 
-## Executive Summary
+<CalloutPanel title="Project Positioning" tone="success">
+This site is written for interviewers, open-source readers, and performance engineers. The whitepaper landing page leads with conclusions, then points to the design decisions and evidence chain behind them.
+</CalloutPanel>
 
-GPU SpMV is a **production-grade CUDA library** implementing high-performance sparse matrix-vector multiplication (SpMV), achieving **70%+ of theoretical memory bandwidth** on modern NVIDIA GPUs.
+## Why this project deserves a whitepaper
 
-### Key Contributions
+- SpMV is a classic **memory-bandwidth-bound** workload, so performance depends more on access patterns than raw arithmetic throughput.
+- The interesting part is not only which kernel exists, but **why it is chosen, when it is chosen, and how that choice is justified**.
+- This project combines CUDA performance work with RAII resource management, explicit error handling, spec-driven development, and readable documentation.
 
-| Contribution | Impact |
-|:-------------|:-------|
-| **4 Optimized Kernels** | Adaptive kernel selection based on matrix characteristics |
-| **Merge Path Algorithm** | Perfect load balancing for irregular sparsity patterns |
-| **ELL Column-Major Layout** | Fully coalesced memory access for uniform matrices |
-| **Spec-Driven Development** | Complete design decision traceability |
+## What this whitepaper is meant to answer
 
-### Performance Highlights
+1. Why the problem matters and where the real bottlenecks are.
+2. What each optimized kernel and the selector are responsible for.
+3. How performance, engineering discipline, and explainability are tied together.
+4. Where to continue reading for architecture, API usage, performance interpretation, and references.
 
-| Matrix Size | Non-zeros | Kernel | Bandwidth Utilization |
-|:-----------:|:---------:|:-------|:---------------------:|
-| 10K × 10K | 500K | Vector CSR | **70.2%** |
-| 100K × 100K | 5M | Merge Path | **71.5%** |
-| 1M × 1M | 50M | Merge Path | **70.8%** |
+## Reading Path
 
-::: info Benchmark Environment
-NVIDIA RTX 3090 (Ampere architecture, theoretical bandwidth: 936 GB/s)
-:::
-
-### Target Audience
-
-- **Systems Architects**: Designing GPU-accelerated sparse computations
-- **HPC Engineers**: Optimizing memory-bound workloads
-- **Researchers**: Requiring reproducible, well-documented baselines
-- **Application Developers**: Building graph algorithms, iterative solvers
-
-### Document Structure
-
-| Section | Purpose |
-|:--------|:--------|
-| [Design Philosophy](/en/whitepaper/philosophy) | Architectural principles and trade-offs |
-| [Performance Analysis](/en/whitepaper/performance) | Detailed benchmark methodology and results |
-| [Architecture Overview](/en/architecture/overview) | System design documentation |
-| [API Reference](/en/api/spmv) | Complete API documentation |
-
----
-
-## Why SpMV Matters
-
-Sparse matrix-vector multiplication (SpMV) is a fundamental operation in:
-
-- **Graph Analytics**: PageRank, community detection, shortest path
-- **Scientific Computing**: Finite element analysis, CFD, iterative solvers
-- **Machine Learning**: Sparse neural networks, recommendation systems
-
-SpMV is inherently **memory-bound** — each non-zero element requires reading matrix data, column indices, and vector values, with minimal computation. Achieving high bandwidth utilization is the primary optimization challenge.
-
----
-
-## Design Overview
-
-```mermaid
-flowchart TB
-    subgraph Input["Input"]
-        Matrix[Sparse Matrix]
-        Vector[Dense Vector]
-    end
-    
-    subgraph Analysis["Matrix Analysis"]
-        NNZ[avg_nnz per row]
-        Skew[Skewness]
-        Pattern[Distribution Pattern]
-    end
-    
-    subgraph Selection["Kernel Selection"]
-        Decision{Auto Select}
-        Scalar[Scalar CSR<br/>avg_nnz < 4]
-        Vector[Vector CSR<br/>uniform rows]
-        Merge[Merge Path<br/>high skewness]
-        ELL[ELL Kernel<br/>column-major]
-    end
-    
-    subgraph Execution["GPU Execution"]
-        Compute[SpMV Computation]
-        Result[Result Vector]
-    end
-    
-    Matrix --> Analysis
-    Vector --> Execution
-    Analysis --> Decision
-    Decision --> Scalar
-    Decision --> Vector
-    Decision --> Merge
-    Decision --> ELL
-    Scalar --> Compute
-    Vector --> Compute
-    Merge --> Compute
-    ELL --> Compute
-    Compute --> Result
-```
-
-The library automatically selects the optimal kernel based on matrix characteristics, ensuring near-peak performance across diverse sparsity patterns.
\ No newline at end of file
+| Page | Role |
+|:-----|:-----|
+| [Design Philosophy](/en/whitepaper/philosophy) | See the architectural priorities and trade-offs |
+| [Performance Analysis](/en/whitepaper/performance) | Learn how to interpret the benchmark evidence |
+| [Architecture Overview](/en/architecture/overview) | Understand the execution pipeline and module boundaries |
+| [API Reference](/en/api/spmv) | Inspect the external interface |
+| [References](/en/references) | Review papers, projects, and further reading |
diff --git a/docs/en/whitepaper/performance.md b/docs/en/whitepaper/performance.md
index 30087b6..3a4c267 100644
--- a/docs/en/whitepaper/performance.md
+++ b/docs/en/whitepaper/performance.md
@@ -205,7 +205,7 @@ To reproduce these benchmarks:
 
 ```bash
 # Clone and build
-git clone https://github.com/LessUp/gpu-spmv.git
+git clone https://github.com/AICL-Lab/gpu-spmv.git
 cd gpu-spmv
 cmake -S . -B build -DCMAKE_BUILD_TYPE=Release
 cmake --build build
diff --git a/docs/public/images/brand/logo-mark-dark.svg b/docs/public/images/brand/logo-mark-dark.svg
new file mode 100644
index 0000000..e81f001
--- /dev/null
+++ b/docs/public/images/brand/logo-mark-dark.svg
@@ -0,0 +1,11 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 128 128" width="128" height="128">
+  <rect width="128" height="128" rx="24" fill="#020617"/>
+  <rect x="16" y="16" width="96" height="96" rx="24" fill="url(#brandDark)" opacity="0.18"/>
+  <path d="M24 84V36h16c10 0 18 6 18 16c0 6-3 11-8 14c6 2 10 7 10 14c0 11-9 20-22 20H24zm14-29h3c6 0 10-4 10-9s-4-8-10-8h-3v17zm0 18h5c7 0 11-4 11-10c0-5-4-9-11-9h-5v19z" fill="#f8fafc"/>
+  <defs>
+    <linearGradient id="brandDark" x1="0%" y1="0%" x2="100%" y2="100%">
+      <stop offset="0%" stop-color="#76b900"/>
+      <stop offset="100%" stop-color="#00d4aa"/>
+    </linearGradient>
+  </defs>
+</svg>
diff --git a/docs/public/images/brand/logo-mark-light.svg b/docs/public/images/brand/logo-mark-light.svg
new file mode 100644
index 0000000..6c2ce6f
--- /dev/null
+++ b/docs/public/images/brand/logo-mark-light.svg
@@ -0,0 +1,11 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 128 128" width="128" height="128">
+  <rect width="128" height="128" rx="24" fill="#f8fafc"/>
+  <rect x="16" y="16" width="96" height="96" rx="24" fill="url(#brandLight)" opacity="0.12"/>
+  <path d="M24 84V36h16c10 0 18 6 18 16c0 6-3 11-8 14c6 2 10 7 10 14c0 11-9 20-22 20H24zm14-29h3c6 0 10-4 10-9s-4-8-10-8h-3v17zm0 18h5c7 0 11-4 11-10c0-5-4-9-11-9h-5v19z" fill="#0f172a"/>
+  <defs>
+    <linearGradient id="brandLight" x1="0%" y1="0%" x2="100%" y2="100%">
+      <stop offset="0%" stop-color="#76b900"/>
+      <stop offset="100%" stop-color="#00d4aa"/>
+    </linearGradient>
+  </defs>
+</svg>
diff --git a/docs/public/images/favicon.svg b/docs/public/images/favicon.svg
index c07e335..c5890bc 100644
--- a/docs/public/images/favicon.svg
+++ b/docs/public/images/favicon.svg
@@ -6,5 +6,5 @@
     </linearGradient>
   </defs>
   <rect width="32" height="32" rx="6" fill="url(#favGrad)"/>
-  <text x="16" y="22" text-anchor="middle" font-family="Inter, system-ui, sans-serif" font-size="14" font-weight="800" fill="#000">Sp</text>
+  <text x="16" y="22" text-anchor="middle" font-family="Inter, system-ui, sans-serif" font-size="14" font-weight="800" fill="#0F172A">Sp</text>
 </svg>
diff --git a/docs/public/images/logo.svg b/docs/public/images/logo.svg
index ce34797..38dcc5a 100644
--- a/docs/public/images/logo.svg
+++ b/docs/public/images/logo.svg
@@ -8,5 +8,5 @@
   <!-- Background -->
   <rect width="128" height="128" rx="24" fill="url(#logoGrad)"/>
   <!-- Text -->
-  <text x="64" y="82" text-anchor="middle" font-family="Inter, system-ui, sans-serif" font-size="48" font-weight="800" fill="#000">Sp</text>
+  <text x="64" y="82" text-anchor="middle" font-family="Inter, system-ui, sans-serif" font-size="48" font-weight="800" fill="#0F172A">Sp</text>
 </svg>
diff --git a/docs/public/images/og-image.svg b/docs/public/images/og-image.svg
index 0fd61b2..0f324bf 100644
--- a/docs/public/images/og-image.svg
+++ b/docs/public/images/og-image.svg
@@ -81,6 +81,6 @@
 
   <!-- Footer -->
   <text x="600" y="600" font-family="Inter, sans-serif" font-size="16" fill="#64748B" text-anchor="middle">
-    github.com/LessUp/gpu-spmv
+    github.com/AICL-Lab/gpu-spmv
   </text>
-</svg>
\ No newline at end of file
+</svg>
diff --git a/docs/public/images/social/og-dark.svg b/docs/public/images/social/og-dark.svg
new file mode 100644
index 0000000..4c5662f
--- /dev/null
+++ b/docs/public/images/social/og-dark.svg
@@ -0,0 +1,28 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1200 630" width="1200" height="630">
+  <defs>
+    <linearGradient id="darkBg" x1="0%" y1="0%" x2="100%" y2="100%">
+      <stop offset="0%" stop-color="#020617"/>
+      <stop offset="100%" stop-color="#0f172a"/>
+    </linearGradient>
+    <linearGradient id="darkAccent" x1="0%" y1="0%" x2="100%" y2="100%">
+      <stop offset="0%" stop-color="#76b900"/>
+      <stop offset="100%" stop-color="#00d4aa"/>
+    </linearGradient>
+  </defs>
+  <rect width="1200" height="630" fill="url(#darkBg)"/>
+  <rect x="72" y="80" width="180" height="180" rx="32" fill="#0f172a" stroke="#334155"/>
+  <rect x="96" y="104" width="132" height="132" rx="28" fill="url(#darkAccent)" opacity="0.18"/>
+  <text x="162" y="196" text-anchor="middle" font-family="Inter, sans-serif" font-size="84" font-weight="800" fill="#f8fafc">Sp</text>
+  <text x="320" y="182" font-family="Inter, sans-serif" font-size="72" font-weight="800" fill="#f8fafc">GPU SpMV</text>
+  <text x="320" y="246" font-family="Inter, sans-serif" font-size="26" font-weight="500" fill="#cbd5e1">Technical whitepaper, architecture, and evidence-driven documentation.</text>
+  <rect x="320" y="320" width="210" height="110" rx="24" fill="#111c30" stroke="#334155"/>
+  <rect x="550" y="320" width="210" height="110" rx="24" fill="#111c30" stroke="#334155"/>
+  <rect x="780" y="320" width="240" height="110" rx="24" fill="#111c30" stroke="#334155"/>
+  <text x="348" y="365" font-family="JetBrains Mono, monospace" font-size="34" font-weight="700" fill="#93d522">70%+</text>
+  <text x="348" y="398" font-family="Inter, sans-serif" font-size="18" fill="#cbd5e1">Bandwidth utilization</text>
+  <text x="578" y="365" font-family="JetBrains Mono, monospace" font-size="34" font-weight="700" fill="#00d4aa">4</text>
+  <text x="578" y="398" font-family="Inter, sans-serif" font-size="18" fill="#cbd5e1">Adaptive kernels</text>
+  <text x="808" y="365" font-family="JetBrains Mono, monospace" font-size="34" font-weight="700" fill="#f8fafc">Docs + Code</text>
+  <text x="808" y="398" font-family="Inter, sans-serif" font-size="18" fill="#cbd5e1">Interview-ready showcase</text>
+  <text x="72" y="584" font-family="Inter, sans-serif" font-size="20" fill="#94a3b8">github.com/AICL-Lab/gpu-spmv</text>
+</svg>
diff --git a/docs/public/images/social/og-light.svg b/docs/public/images/social/og-light.svg
new file mode 100644
index 0000000..8e54e94
--- /dev/null
+++ b/docs/public/images/social/og-light.svg
@@ -0,0 +1,28 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1200 630" width="1200" height="630">
+  <defs>
+    <linearGradient id="lightBg" x1="0%" y1="0%" x2="100%" y2="100%">
+      <stop offset="0%" stop-color="#f8fafc"/>
+      <stop offset="100%" stop-color="#eef2f7"/>
+    </linearGradient>
+    <linearGradient id="lightAccent" x1="0%" y1="0%" x2="100%" y2="100%">
+      <stop offset="0%" stop-color="#76b900"/>
+      <stop offset="100%" stop-color="#00d4aa"/>
+    </linearGradient>
+  </defs>
+  <rect width="1200" height="630" fill="url(#lightBg)"/>
+  <rect x="72" y="80" width="180" height="180" rx="32" fill="#ffffff" stroke="#cbd5e1"/>
+  <rect x="96" y="104" width="132" height="132" rx="28" fill="url(#lightAccent)" opacity="0.12"/>
+  <text x="162" y="196" text-anchor="middle" font-family="Inter, sans-serif" font-size="84" font-weight="800" fill="#0f172a">Sp</text>
+  <text x="320" y="182" font-family="Inter, sans-serif" font-size="72" font-weight="800" fill="#0f172a">GPU SpMV</text>
+  <text x="320" y="246" font-family="Inter, sans-serif" font-size="26" font-weight="500" fill="#475569">Technical whitepaper, architecture, and evidence-driven documentation.</text>
+  <rect x="320" y="320" width="210" height="110" rx="24" fill="#ffffff" stroke="#cbd5e1"/>
+  <rect x="550" y="320" width="210" height="110" rx="24" fill="#ffffff" stroke="#cbd5e1"/>
+  <rect x="780" y="320" width="240" height="110" rx="24" fill="#ffffff" stroke="#cbd5e1"/>
+  <text x="348" y="365" font-family="JetBrains Mono, monospace" font-size="34" font-weight="700" fill="#76b900">70%+</text>
+  <text x="348" y="398" font-family="Inter, sans-serif" font-size="18" fill="#475569">Bandwidth utilization</text>
+  <text x="578" y="365" font-family="JetBrains Mono, monospace" font-size="34" font-weight="700" fill="#00d4aa">4</text>
+  <text x="578" y="398" font-family="Inter, sans-serif" font-size="18" fill="#475569">Adaptive kernels</text>
+  <text x="808" y="365" font-family="JetBrains Mono, monospace" font-size="34" font-weight="700" fill="#0f172a">Docs + Code</text>
+  <text x="808" y="398" font-family="Inter, sans-serif" font-size="18" fill="#475569">Interview-ready showcase</text>
+  <text x="72" y="584" font-family="Inter, sans-serif" font-size="20" fill="#64748b">github.com/AICL-Lab/gpu-spmv</text>
+</svg>
diff --git a/docs/scripts/verify-site.mjs b/docs/scripts/verify-site.mjs
index 7106669..8134b16 100644
--- a/docs/scripts/verify-site.mjs
+++ b/docs/scripts/verify-site.mjs
@@ -1,14 +1,37 @@
-import { readFileSync } from 'node:fs'
+import { existsSync, readdirSync, readFileSync } from 'node:fs'
 import { join } from 'node:path'
 
 const root = process.cwd()
 const canonicalRepo = 'AICL-Lab/gpu-spmv'
 
+function collectTextFiles(dirPath) {
+  const entries = readdirSync(dirPath, { withFileTypes: true })
+  const files = []
+
+  for (const entry of entries) {
+    const fullPath = join(dirPath, entry.name)
+    if (entry.isDirectory()) {
+      files.push(...collectTextFiles(fullPath))
+      continue
+    }
+    if (/\.(md|ts|yml|svg)$/.test(entry.name)) {
+      files.push(fullPath)
+    }
+  }
+
+  return files
+}
+
 const files = {
   readme: join(root, '..', 'README.md'),
   config: join(root, '.vitepress', 'config.ts'),
   pages: join(root, '..', '.github', 'workflows', 'pages.yml'),
-  index: join(root, 'index.md')
+  index: join(root, 'index.md'),
+  themeIndex: join(root, '.vitepress', 'theme', 'index.ts'),
+  zhHome: join(root, 'zh', 'index.md'),
+  enHome: join(root, 'en', 'index.md'),
+  zhWhitepaper: join(root, 'zh', 'whitepaper', 'index.md'),
+  enWhitepaper: join(root, 'en', 'whitepaper', 'index.md')
 }
 
 const contents = Object.fromEntries(
@@ -33,6 +56,126 @@ if (/useRouter\(|router\.go\('\/(zh|en)\//.test(contents.index)) {
   failures.push('root docs index still auto-redirects by locale')
 }
 
+const requiredThemeFiles = [
+  join(root, '.vitepress', 'theme', 'Layout.vue'),
+  join(root, '.vitepress', 'theme', 'styles', 'tokens.css'),
+  join(root, '.vitepress', 'theme', 'styles', 'base.css'),
+  join(root, '.vitepress', 'theme', 'styles', 'home.css'),
+  join(root, '.vitepress', 'theme', 'styles', 'paper.css'),
+  join(root, '.vitepress', 'theme', 'styles', 'citation.css'),
+  join(root, '.vitepress', 'theme', 'styles', 'diagram.css'),
+  join(root, '.vitepress', 'theme', 'components', 'HeroEvidence.vue'),
+  join(root, '.vitepress', 'theme', 'components', 'MetricStrip.vue'),
+  join(root, '.vitepress', 'theme', 'components', 'WhitepaperSection.vue'),
+  join(root, '.vitepress', 'theme', 'components', 'ArchitectureCanvas.vue'),
+  join(root, '.vitepress', 'theme', 'components', 'CitationGrid.vue'),
+  join(root, '.vitepress', 'theme', 'components', 'ThemeAwareArt.vue'),
+  join(root, '.vitepress', 'theme', 'components', 'CalloutPanel.vue'),
+  join(root, '.vitepress', 'data', 'site.ts')
+]
+
+const requiredAssetFiles = [
+  join(root, 'public', 'images', 'brand', 'logo-mark-light.svg'),
+  join(root, 'public', 'images', 'brand', 'logo-mark-dark.svg'),
+  join(root, 'public', 'images', 'social', 'og-light.svg'),
+  join(root, 'public', 'images', 'social', 'og-dark.svg')
+]
+
+const requiredContentFiles = [
+  join(root, '.vitepress', 'data', 'references.ts'),
+  join(root, '.vitepress', 'data', 'benchmarks.ts'),
+  join(root, 'zh', 'architecture', 'execution-pipeline.md'),
+  join(root, 'en', 'architecture', 'execution-pipeline.md'),
+  join(root, 'zh', 'architecture', 'reliability.md'),
+  join(root, 'en', 'architecture', 'reliability.md'),
+  join(root, 'zh', 'performance', 'methodology.md'),
+  join(root, 'en', 'performance', 'methodology.md')
+]
+
+for (const filePath of [...requiredThemeFiles, ...requiredAssetFiles, ...requiredContentFiles]) {
+  if (!existsSync(filePath)) {
+    failures.push(`missing theme file: ${filePath.replace(`${root}/`, '')}`)
+  }
+}
+
+const themeIndexChecks = [
+  'HeroEvidence',
+  'MetricStrip',
+  'WhitepaperSection',
+  'ArchitectureCanvas',
+  'CitationGrid',
+  'ThemeAwareArt',
+  'CalloutPanel'
+]
+
+for (const token of themeIndexChecks) {
+  if (!contents.themeIndex.includes(token)) {
+    failures.push(`theme index missing component registration: ${token}`)
+  }
+}
+
+if (!contents.zhHome.includes('<HeroEvidence') || !contents.zhHome.includes('<ArchitectureCanvas')) {
+  failures.push('zh homepage has not been rebuilt with theme components')
+}
+
+if (!contents.enHome.includes('<HeroEvidence') || !contents.enHome.includes('<ArchitectureCanvas')) {
+  failures.push('en homepage has not been rebuilt with theme components')
+}
+
+if (!contents.zhWhitepaper.includes('<CalloutPanel')) {
+  failures.push('zh whitepaper landing page missing positioning callout')
+}
+
+if (!contents.enWhitepaper.includes('<CalloutPanel')) {
+  failures.push('en whitepaper landing page missing positioning callout')
+}
+
+if (!contents.config.includes("link: '/zh/references'")) {
+  failures.push('zh nav missing references entry')
+}
+
+if (!contents.config.includes("link: '/en/references'")) {
+  failures.push('en nav missing references entry')
+}
+
+if (!contents.config.includes("light: '/images/brand/logo-mark-light.svg'")) {
+  failures.push('config missing light-mode logo asset')
+}
+
+if (!contents.config.includes("dark: '/images/brand/logo-mark-dark.svg'")) {
+  failures.push('config missing dark-mode logo asset')
+}
+
+if (!contents.config.includes("`${base}images/social/og-dark.svg`")) {
+  failures.push('config missing social og image upgrade')
+}
+
+if (!contents.config.includes("link: '/zh/architecture/execution-pipeline'")) {
+  failures.push('zh sidebar missing execution pipeline entry')
+}
+
+if (!contents.config.includes("link: '/en/architecture/execution-pipeline'")) {
+  failures.push('en sidebar missing execution pipeline entry')
+}
+
+if (!contents.config.includes("link: '/zh/performance/methodology'")) {
+  failures.push('zh sidebar missing methodology entry')
+}
+
+if (!contents.config.includes("link: '/en/performance/methodology'")) {
+  failures.push('en sidebar missing methodology entry')
+}
+
+const docsCorpus = collectTextFiles(join(root, 'zh'))
+  .concat(collectTextFiles(join(root, 'en')))
+  .concat([join(root, '..', 'README.md')])
+  .map((filePath) => readFileSync(filePath, 'utf8'))
+  .join('\n')
+
+if (/LessUp\/gpu-spmv|github\.com\/LessUp|lessup\.github\.io\/gpu-spmv/.test(docsCorpus)) {
+  failures.push('legacy LessUp references still exist in docs corpus')
+}
+
 if (failures.length > 0) {
   console.error('verify-site failed:')
   for (const failure of failures) {
diff --git a/docs/zh/architecture/execution-pipeline.md b/docs/zh/architecture/execution-pipeline.md
new file mode 100644
index 0000000..61dc1c3
--- /dev/null
+++ b/docs/zh/architecture/execution-pipeline.md
@@ -0,0 +1,28 @@
+# 执行流水线
+
+## 为什么要单独讲执行流水线
+
+GPU SpMV 的难点不在于“调用一个 kernel”，而在于 **输入矩阵如何被分析、如何做 kernel 选择、如何复用执行上下文，以及如何解释结果是否可信**。
+
+## Pipeline 分解
+
+1. **输入阶段**：加载 CSR / ELL 数据结构，准备输入向量。
+2. **分析阶段**：统计 `avg_nnz_per_row`、偏斜度和行分布模式。
+3. **决策阶段**：基于统计结果选择 Scalar CSR、Vector CSR、Merge Path 或 ELL。
+4. **执行阶段**：调度 GPU kernel，记录时间和带宽指标。
+5. **验证阶段**：与 CPU 参考结果或既有基线做一致性检查。
+
+## 关键判断
+
+| 现象 | 决策 |
+|:-----|:-----|
+| `avg_nnz_per_row < 4` | Scalar CSR，避免 warp 级资源浪费 |
+| 行长度均匀、偏斜度低 | Vector CSR，提升 warp 内协作效率 |
+| 行长度高度不均 | Merge Path，优先负载均衡 |
+| 行宽近似固定 | ELL kernel，优先合并访存 |
+
+## 这个页面应该和什么一起看
+
+- [Kernel 选择策略](/zh/architecture/kernel-selection)
+- [内存布局](/zh/architecture/memory-layout)
+- [性能方法学](/zh/performance/methodology)
diff --git a/docs/zh/architecture/overview.md b/docs/zh/architecture/overview.md
index 55137c5..02cb39c 100644
--- a/docs/zh/architecture/overview.md
+++ b/docs/zh/architecture/overview.md
@@ -1,6 +1,6 @@
 # 架构概览
 
-GPU SpMV 采用分层架构设计，清晰分离存储、计算和应用层。
+GPU SpMV 的架构重点不是“模块图长什么样”，而是 **如何把矩阵统计、kernel 选择、执行上下文和验证链路串成可解释的工程系统**。
 
 ## 系统架构
 
@@ -84,40 +84,16 @@ graph TB
 - **图神经网络** — 稀疏图卷积
 - **科学计算** — 有限元、CFD
 
-## 设计亮点
+## 这份架构总览最重要的三件事
 
-### 1. RAII 资源管理
-
-```cpp
-// 自动生命周期管理，防止内存泄漏
-class CudaBuffer {
-public:
-    explicit CudaBuffer(size_t n) { cudaMalloc(&ptr_, n * sizeof(T)); }
-    ~CudaBuffer() { cudaFree(ptr_); }
-    // 禁用拷贝，允许移动
-};
-```
-
-### 2. 执行上下文
-
-```cpp
-// 缓存纹理对象，避免重复创建
-SpMVExecutionContext ctx;
-for (int i = 0; i < n_iter; i++) {
-    spmv_csr(csr, d_x, d_y, &config, n, &ctx);
-    // 纹理对象被复用
-}
-```
-
-### 3. 自动 Kernel 选择
-
-```cpp
-// 基于矩阵特征自动选择最优 Kernel
-SpMVConfig config = spmv_auto_config(csr);
-```
+1. **数据怎么流动**：输入矩阵先被分析，再决定走哪条执行路径。
+2. **为什么自动选择成立**：不是玄学 heuristics，而是围绕 `avg_nnz_per_row` 与偏斜度展开。
+3. **为什么它可信**：资源管理、错误语义、CPU 参考路径和 property tests 共同形成约束。
 
 ## 相关文档
 
 - [Kernel 选择策略](/zh/architecture/kernel-selection)
+- [执行流水线](/zh/architecture/execution-pipeline)
 - [内存布局](/zh/architecture/memory-layout)
+- [可靠性约束](/zh/architecture/reliability)
 - [Spec-Driven 开发](/zh/architecture/spec-driven)
diff --git a/docs/zh/architecture/reliability.md b/docs/zh/architecture/reliability.md
new file mode 100644
index 0000000..890b668
--- /dev/null
+++ b/docs/zh/architecture/reliability.md
@@ -0,0 +1,25 @@
+# 可靠性与工程约束
+
+## 可靠性来自哪里
+
+这个项目的可靠性不是靠“看起来能跑”，而是来自三条线同时成立：
+
+1. **资源生命周期明确**：使用 `CudaBuffer<T>` 和执行上下文抽象，避免裸 `cudaMalloc` / `cudaFree`。
+2. **错误语义明确**：通过 `SpMVError` 和 CUDA 检查宏把失败显式暴露出来。
+3. **规范与测试闭环**：OpenSpec 提供需求来源，property tests 提供回归保护。
+
+## 为什么这对展示项目很重要
+
+面试或开源展示时，读者更容易相信一个项目，如果它能回答：
+
+- 资源怎么释放？
+- 失败怎么暴露？
+- benchmark 数据怎么解释？
+- 设计变化如何追溯？
+
+## 可靠性清单
+
+- RAII 封装 GPU 资源
+- CPU 参考实现用于交叉验证
+- property tests ≥ 100 次迭代
+- GitHub Pages 把设计与引用一起公开
diff --git a/docs/zh/architecture/spec-driven.md b/docs/zh/architecture/spec-driven.md
index 100ef55..e8c87b6 100644
--- a/docs/zh/architecture/spec-driven.md
+++ b/docs/zh/architecture/spec-driven.md
@@ -157,5 +157,5 @@ flowchart LR
 
 ## 参考
 
-- [OpenSpec 规范](https://github.com/LessUp/gpu-spmv/tree/main/openspec)
+- [OpenSpec 规范](https://github.com/AICL-Lab/gpu-spmv/tree/main/openspec)
 - [架构概览](/zh/architecture/overview)
\ No newline at end of file
diff --git a/docs/zh/changelog.md b/docs/zh/changelog.md
index 9e2c329..398571b 100644
--- a/docs/zh/changelog.md
+++ b/docs/zh/changelog.md
@@ -46,7 +46,7 @@ This is the first stable release of GPU SpMV, featuring complete CSR and ELL for
 - Doxygen-compatible documentation
 
 #### Documentation
-- Full documentation site at https://lessup.github.io/gpu-spmv/
+- Full documentation site at https://aicl-lab.github.io/gpu-spmv/
 - Bilingual README (English and Chinese)
 - API reference, performance guide, and code examples
 - Architecture documentation and design decision records
@@ -144,5 +144,5 @@ No breaking changes from pre-release versions. The API is now stable.
 
 ---
 
-[1.0.0]: https://github.com/LessUp/gpu-spmv/releases/tag/v1.0.0
-[0.1.0]: https://github.com/LessUp/gpu-spmv/tree/7d6dd0c
+[1.0.0]: https://github.com/AICL-Lab/gpu-spmv/releases/tag/v1.0.0
+[0.1.0]: https://github.com/AICL-Lab/gpu-spmv/tree/7d6dd0c
diff --git a/docs/zh/citation.md b/docs/zh/citation.md
index 6f84cff..4011448 100644
--- a/docs/zh/citation.md
+++ b/docs/zh/citation.md
@@ -6,11 +6,11 @@
 
 ```bibtex
 @software{gpu_spmv_2026,
-  author = {LessUp},
+  author = {AICL-Lab},
   title = {GPU SpMV: High-Performance CUDA Sparse Matrix-Vector Multiplication},
   year = {2026},
   publisher = {GitHub},
-  url = {https://github.com/LessUp/gpu-spmv},
+  url = {https://github.com/AICL-Lab/gpu-spmv},
   version = {1.0.0}
 }
 ```
@@ -18,61 +18,14 @@
 ## 文本格式
 
 ```
-LessUp. GPU SpMV: High-Performance CUDA Sparse Matrix-Vector Multiplication.
-GitHub repository, 2026. https://github.com/LessUp/gpu-spmv
+AICL-Lab. GPU SpMV: High-Performance CUDA Sparse Matrix-Vector Multiplication.
+GitHub repository, 2026. https://github.com/AICL-Lab/gpu-spmv
 ```
 
 ---
 
-## 相关论文
+## 使用建议
 
-本库实现的算法基于以下研究：
-
-### Merge Path 算法
-
-1. **Merrill, D., & Garland, M. (2016)**. Merge-based parallel sparse matrix-vector multiplication. *Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis (SC '16)*. IEEE.
-
-   ::: tip 核心贡献
-   Merge Path 算法通过基于行指针和工作索引的合并操作进行工作分区，为不规则稀疏矩阵实现完美负载均衡。
-   :::
-
-### 向量化 CSR
-
-2. **Bell, N., & Garland, M. (2009)**. Implementing sparse matrix-vector multiplication on throughput-oriented processors. *Proceedings of SC '09*. IEEE.
-
-3. **Bell, N., Dalton, S., & Olson, L. N. (2012)**. Exposing fine-grained parallelism in algebraic multigrid methods. *SIAM Journal on Scientific Computing*, 34(4), C170-C194.
-
-### ELL 格式
-
-4. **Vázquez, F., Fernández, J. J., & Garzón, E. M. (2011)**. Automatic tuning of the sparse matrix vector product on GPUs based on the ELL-R-T format. *Concurrency and Computation: Practice and Experience*, 24(1), 1-20.
-
----
-
-## 算法参考
-
-| 算法 | 参考文献 | 核心思想 |
-|:-----|:---------|:---------|
-| Scalar CSR | Bell & Garland (2009) | 每行一线程 |
-| Vector CSR | Bell & Garland (2009) | 每行一 warp |
-| Merge Path | Merrill & Garland (2016) | 基于合并的分区 |
-| ELL Kernel | Vázquez et al. (2011) | 列主序合并访存 |
-
----
-
-## 基准测试方法
-
-我们的基准测试方法遵循以下最佳实践：
-
-- **SPAPT 基准测试套件**：稀疏计算标准化性能评估
-- **SuiteSparse 矩阵集**：真实世界测试矩阵
-- **GPU 性能指标**：内存带宽利用率作为主要指标
-
----
-
-## 致谢
-
-本库基于 CUDA 生态系统的优秀工作：
-
-- NVIDIA cuSPARSE 提供参考实现
-- Thrust 库提供并行原语
-- Google Test 提供测试基础设施
\ No newline at end of file
+- 如果引用的是**代码仓库**，请使用上面的软件引用格式。
+- 如果引用的是**算法来源**，请同时引用 [学术参考](/zh/references) 中对应论文。
+- 如果引用的是**性能数据或图示**，请在正文中说明硬件与方法学条件，并链接到 [性能方法学](/zh/performance/methodology)。
diff --git a/docs/zh/contributing.md b/docs/zh/contributing.md
index e945638..f11b037 100644
--- a/docs/zh/contributing.md
+++ b/docs/zh/contributing.md
@@ -14,7 +14,7 @@
 ### 克隆和构建
 
 ```bash
-git clone https://github.com/LessUp/gpu-spmv.git
+git clone https://github.com/AICL-Lab/gpu-spmv.git
 cd gpu-spmv
 cmake --preset default
 cmake --build --preset default
@@ -79,7 +79,7 @@ npm run dev
 
 ## 获取帮助
 
-- 提交 [Issue](https://github.com/LessUp/gpu-spmv/issues)
+- 提交 [Issue](https://github.com/AICL-Lab/gpu-spmv/issues)
 - 查看现有文档
 - 阅读 OpenSpec 规范
 
diff --git a/docs/zh/faq.md b/docs/zh/faq.md
index 8baaf3c..85a36f3 100644
--- a/docs/zh/faq.md
+++ b/docs/zh/faq.md
@@ -198,4 +198,4 @@ auto end = std::chrono::high_resolution_clock::now();
 
 1. 查看 [API 参考](/zh/api/spmv) 了解详细用法
 2. 查看 [性能指南](/zh/performance/optimization-guide) 了解优化技巧
-3. 在 [GitHub Issues](https://github.com/LessUp/gpu-spmv/issues) 提问
+3. 在 [GitHub Issues](https://github.com/AICL-Lab/gpu-spmv/issues) 提问
diff --git a/docs/zh/index.md b/docs/zh/index.md
index 2ea3faf..f99e284 100644
--- a/docs/zh/index.md
+++ b/docs/zh/index.md
@@ -1,134 +1,53 @@
 ---
 layout: home
+title: GPU SpMV 技术白皮书
 ---
 
-<div class="home-header">
-  <div class="home-header-left">
-    <div class="home-logo">GPU</div>
-    <div>
-      <span class="home-title">GPU SpMV</span>
-      <span class="home-subtitle">技术白皮书</span>
-    </div>
-  </div>
-  <div class="home-nav">
-    <a href="./whitepaper/">白皮书</a>
-    <a href="https://github.com/LessUp/gpu-spmv">GitHub</a>
-    <a href="../en/">English</a>
-  </div>
-</div>
-
-<div class="home-hero-tech">
-  <h1>生产级 CUDA 稀疏矩阵向量乘法</h1>
-  <p class="hero-tagline">
-    高性能稀疏矩阵向量乘法（SpMV），在现代 NVIDIA GPU 上实现 <strong>70%+ 理论内存带宽利用率</strong>。
-    4 个自适应内核、智能选择算法、完整 API。
-  </p>
-  <div class="hero-actions">
-    <a href="./whitepaper/" class="primary">阅读白皮书</a>
-    <a href="./quickstart" class="secondary">快速开始</a>
-  </div>
-</div>
-
-<div class="home-metrics">
-  <div class="home-metric">
-    <div class="home-metric-value">70%+</div>
-    <div class="home-metric-label">带宽利用率</div>
-  </div>
-  <div class="home-metric">
-    <div class="home-metric-value">4</div>
-    <div class="home-metric-label">自适应内核</div>
-  </div>
-  <div class="home-metric">
-    <div class="home-metric-value">CSR+ELL</div>
-    <div class="home-metric-label">稀疏格式</div>
-  </div>
-  <div class="home-metric">
-    <div class="home-metric-value">100+</div>
-    <div class="home-metric-label">测试用例</div>
-  </div>
-</div>
-
-## 架构概览
-
-<div class="home-architecture">
-
-```mermaid
-flowchart LR
-    Input[稀疏矩阵] --> Analysis[矩阵分析]
-    Analysis --> Decision{自动选择}
-    Decision -->|avg_nnz < 4| Scalar[Scalar CSR]
-    Decision -->|均匀行| Vector[Vector CSR]
-    Decision -->|高偏斜| Merge[Merge Path]
-    Decision -->|列主序| ELL[ELL 内核]
-    Scalar --> GPU[GPU 执行]
-    Vector --> GPU
-    Merge --> GPU
-    ELL --> GPU
-    GPU --> Result[结果向量]
-```
-
-</div>
-
-## 技术特性
-
-<div class="feature-map">
-  <div class="feature-card">
-    <div class="feature-card-title">内核选择策略</div>
-    <div class="feature-card-desc">
-      基于矩阵特征自动选择最优内核：平均非零元数、行长度偏斜度。
-    </div>
-    <div class="feature-tags">
-      <a href="./architecture/kernel-selection" class="feature-tag">详细说明</a>
-    </div>
-  </div>
-
-  <div class="feature-card">
-    <div class="feature-card-title">Merge Path 算法</div>
-    <div class="feature-card-desc">
-      针对不规则稀疏模式的完美负载均衡，O(nnz + m) 工作分解。
-    </div>
-    <div class="feature-tags">
-      <a href="./whitepaper/philosophy" class="feature-tag">设计哲学</a>
-    </div>
-  </div>
-
-  <div class="feature-card">
-    <div class="feature-card-title">生产级质量</div>
-    <div class="feature-card-desc">
-      RAII 资源管理、语义化错误码、CudaBuffer 抽象、跨平台支持。
-    </div>
-    <div class="feature-tags">
-      <a href="./api/spmv" class="feature-tag">API</a>
-    </div>
-  </div>
-
-  <div class="feature-card">
-    <div class="feature-card-title">Spec-Driven 开发</div>
-    <div class="feature-card-desc">
-      OpenSpec 规范驱动工作流，设计决策可追溯，文档即代码。
-    </div>
-    <div class="feature-tags">
-      <a href="./architecture/spec-driven" class="feature-tag">工作流</a>
-    </div>
-  </div>
-
-  <div class="feature-card">
-    <div class="feature-card-title">学术严谨</div>
-    <div class="feature-card-desc">
-      完整的学术引用支持、BibTeX 格式、相关论文参考。
-    </div>
-    <div class="feature-tags">
-      <a href="./citation" class="feature-tag">引用格式</a>
-    </div>
-  </div>
-
-  <div class="feature-card">
-    <div class="feature-card-title">快速开始</div>
-    <div class="feature-card-desc">
-      <code>git clone https://github.com/LessUp/gpu-spmv.git</code>
-    </div>
-    <div class="feature-tags">
-      <a href="./quickstart" class="feature-tag">详细指南</a>
-    </div>
-  </div>
-</div>
+<script setup lang="ts">
+import { siteData } from '../.vitepress/data/site'
+</script>
+
+<HeroEvidence
+  eyebrow="Technical Whitepaper"
+  :title="siteData.zh.heroTitle"
+  :lead="siteData.zh.heroLead"
+  :metrics="siteData.zh.metrics"
+  primary-label="阅读白皮书"
+  primary-link="/zh/whitepaper/"
+  secondary-label="查看架构"
+  secondary-link="/zh/architecture/overview"
+>
+  <ThemeAwareArt
+    title="SpMV as an Engineering Artifact"
+    caption="性能、架构、验证链路与引用体系被一起展示，而不是只给源码。"
+  />
+</HeroEvidence>
+
+<WhitepaperSection
+  eyebrow="Architecture"
+  title="先给结论，再给证据，再给设计"
+  lead="首页的任务不是罗列细节，而是帮助读者快速判断：这个项目值不值得深入读。"
+>
+  <ArchitectureCanvas variant="overview-zh" />
+</WhitepaperSection>
+
+<WhitepaperSection
+  eyebrow="Highlights"
+  title="为什么这个项目适合拿来展示"
+  lead="因为它不仅讲 CUDA kernel，还把工程规范、性能推理和文档表达放在了一起。"
+>
+  <div class="spmv-card-grid cols-3">
+    <article class="spmv-surface-card spmv-section">
+      <h3>性能导向</h3>
+      <p>围绕内存带宽利用率、矩阵分布与 kernel 选择给出明确论证。</p>
+    </article>
+    <article class="spmv-surface-card spmv-section">
+      <h3>工程可解释</h3>
+      <p>把执行流水线、数据布局、错误处理与 spec-driven workflow 全部显式化。</p>
+    </article>
+    <article class="spmv-surface-card spmv-section">
+      <h3>适合面试与开源展示</h3>
+      <p>首页就能看到项目定位、亮点、证据链与延伸阅读路径。</p>
+    </article>
+  </div>
+</WhitepaperSection>
diff --git a/docs/zh/performance/benchmarks.md b/docs/zh/performance/benchmarks.md
index 3ac3f10..49c63be 100644
--- a/docs/zh/performance/benchmarks.md
+++ b/docs/zh/performance/benchmarks.md
@@ -1,6 +1,12 @@
 # 基准测试
 
-GPU SpMV 在 NVIDIA RTX 3090 上的性能测试结果。
+<script setup lang="ts">
+import { benchmarkData } from '../../.vitepress/data/benchmarks'
+</script>
+
+GPU SpMV 的 benchmark 页面不只罗列数字，而是帮助读者理解 **这些数字说明了什么，不说明什么**。
+
+<MetricStrip :items="benchmarkData.summary" />
 
 ## 测试环境
 
@@ -88,29 +94,15 @@ SpMV 是内存带宽受限的计算，我们的实现达到 70%+ 的理论带宽
 - **Ampere (SM 8.6)**: 最佳性能
 - **Hopper (SM 9.0)**: 完全支持
 
-## 基准测试方法
-
-```cpp
-#include <spmv/benchmark.h>
-
-int main() {
-    CSRMatrix* csr = /* ... */;
-    csr_to_gpu(csr);
-
-    // 多次运行取平均
-    BenchmarkResult result = benchmark_spmv(csr, 100);
-
-    printf("Avg time: %.3f ms\n", result.avg_ms);
-    printf("Min time: %.3f ms\n", result.min_ms);
-    printf("Max time: %.3f ms\n", result.max_ms);
-    printf("Stddev: %.3f ms\n", result.stddev_ms);
-    printf("Bandwidth: %.1f GB/s\n", result.bandwidth_gb_s);
+## 如何阅读这些结果
 
-    return 0;
-}
-```
+- **70%+ 带宽利用率** 说明实现已经接近“受限于访存”的合理上界。
+- **ELL 在规则模式下更高**，并不意味着它适合所有矩阵；格式转换和适用范围必须一起考虑。
+- **Merge Path 在高偏斜分布下稳定领先**，说明负载均衡确实是这类矩阵的第一问题。
+- **自动选择器的价值** 在于把这些判断变成默认能力，而不是要求用户手工猜测。
 
 ## 参考
 
+- [性能方法学](/zh/performance/methodology)
 - [优化指南](/zh/performance/optimization-guide)
-- [Kernel 选择策略](/zh/architecture/kernel-selection)
\ No newline at end of file
+- [Kernel 选择策略](/zh/architecture/kernel-selection)
diff --git a/docs/zh/performance/methodology.md b/docs/zh/performance/methodology.md
new file mode 100644
index 0000000..2edb0a4
--- /dev/null
+++ b/docs/zh/performance/methodology.md
@@ -0,0 +1,19 @@
+# 性能方法学
+
+## 先看方法，再看数字
+
+如果不先交代测量方法，性能数字本身没有说服力。这个页面的目标是帮助读者理解：**这些 benchmark 结果在什么条件下成立，应该怎样阅读，哪些结论可以安全地得出。**
+
+## 测量前提
+
+- GPU：NVIDIA RTX 3090（Ampere）
+- 理论带宽：936 GB/s
+- 关注指标：时间、带宽、利用率、方差
+- 对比对象：不同稀疏模式下的 kernel 选择结果
+
+## 推荐阅读方式
+
+1. **先看趋势，不只看峰值**：是否稳定接近 70%+ 带宽利用率。
+2. **把矩阵模式和 kernel 一起看**：均匀矩阵与高偏斜矩阵的最优策略不同。
+3. **看选择器是否解释得通**：自动选择结果是否与统计特征一致。
+4. **看方差**：平均值高但波动大，未必适合当成强结论。
diff --git a/docs/zh/quickstart.md b/docs/zh/quickstart.md
index f1aa636..f5940f1 100644
--- a/docs/zh/quickstart.md
+++ b/docs/zh/quickstart.md
@@ -24,7 +24,7 @@ nvidia-smi
 ### 1. 克隆仓库
 
 ```bash
-git clone https://github.com/LessUp/gpu-spmv.git
+git clone https://github.com/AICL-Lab/gpu-spmv.git
 cd gpu-spmv
 ```
 
diff --git a/docs/zh/references.md b/docs/zh/references.md
index 557bb2a..dbac42e 100644
--- a/docs/zh/references.md
+++ b/docs/zh/references.md
@@ -1,78 +1,21 @@
 # 学术参考
 
-GPU SpMV 的实现基于以下学术研究成果。
+<script setup lang="ts">
+import { references } from '../.vitepress/data/references'
+</script>
 
-## 核心算法
+本页把论文、项目和延伸阅读分开整理，方便读者快速建立“**这个项目参考了谁、站在什么技术谱系上**”的理解。
 
-### Merge-based Parallel SpMV
+## 核心论文
 
-> Merrill, D., & Garland, M. (2016). **Merge-based parallel sparse matrix-vector multiplication**. *ACM SIGPLAN Notices*, 51(8), 12-21.
+<CitationGrid :items="references.papers" />
 
-- **贡献**: 提出 Merge Path 算法，实现完美负载均衡
-- **应用于**: `MERGE_PATH` kernel
-- [DOI: 10.1145/3016078.285114](https://doi.org/10.1145/3016078.285114)
+## 代表性项目
 
-### Bell & Garland Survey
+<CitationGrid :items="references.projects" />
 
-> Bell, N., & Garland, M. (2009). **Implementing sparse matrix-vector multiplication on throughput-oriented processors**. *SC'09: Proceedings of the Conference on High Performance Computing Networking, Storage and Analysis*.
+## 如何使用这些参考
 
-- **贡献**: CSR vs ELL 格式性能分析，GPU SpMV 基础理论
-- **应用于**: `VECTOR_CSR`、`ELL_KERNEL` 设计
-- [DOI: 10.1145/1654059.1654121](https://doi.org/10.1145/1654059.1654121)
-
-### CSR5 Format
-
-> Liu, Y., & Vuduc, R. (2018). **An adaptive algorithm for sparse matrix-vector multiplication on GPUs**. *IEEE Transactions on Parallel and Distributed Systems*.
-
-- **贡献**: CSR5 格式，自适应负载均衡
-- **参考**: 理解不规则稀疏矩阵的负载分布
-
-## GPU 计算基础
-
-### CUDA Best Practices
-
-> NVIDIA. (2024). **CUDA C++ Best Practices Guide**.
-
-- **参考**: 内存合并、纹理缓存、Warp 同步
-- [Link](https://docs.nvidia.com/cuda/cuda-c-best-practices-guide/)
-
-### CUDA Programming Guide
-
-> NVIDIA. (2024). **CUDA C++ Programming Guide**.
-
-- **参考**: CUDA 执行模型、存储层次
-- [Link](https://docs.nvidia.com/cuda/cuda-c-programming-guide/)
-
-## PageRank
-
-> Page, L., Brin, S., Motwani, R., & Winograd, T. (1999). **The PageRank citation ranking: Bringing order to the web**. *Stanford InfoLab*.
-
-- **贡献**: PageRank 算法原始论文
-- **应用于**: `pagerank()` 实现
-
-## 相关项目
-
-| Project | Stars | Description | Key Takeaway |
-|:--------|:-----:|:------------|:-------------|
-| [Ginkgo](https://github.com/ginkgo-project/ginkgo) | 597 | High-performance linear algebra | Performance visualization |
-| [cuSPARSE](https://docs.nvidia.com/cuda/cusparse/) | N/A | NVIDIA official library | Performance baseline |
-| [SuiteSparse](https://github.com/DrTimothyAldenDavis/SuiteSparse) | 947 | Sparse matrix collection | Standard test data |
-| [Kokkos Kernels](https://github.com/kokkos/kokkos-kernels) | 300+ | Multi-backend sparse BLAS | Performance portability |
-
-## 引用本项目
-
-```bibtex
-@software{gpuspmv2024,
-  author = {LessUp},
-  title = {GPU SpMV: High-Performance CUDA Sparse Matrix-Vector Multiplication},
-  year = {2024},
-  url = {https://github.com/LessUp/gpu-spmv}
-}
-```
-
-## 推荐阅读
-
-1. **GPU Architecture**: 了解 GPU 内存层次和执行模型
-2. **Sparse Matrix Formats**: 不同格式的优缺点
-3. **Load Balancing**: 并行计算中的负载均衡技术
-4. **Memory Coalescing**: GPU 内存访问优化
+1. **先读 Bell & Garland**，理解 GPU SpMV 的经典问题定义。
+2. **再看 Merrill & Garland**，理解 Merge Path 在不规则负载中的价值。
+3. **对照 cuSPARSE / Ginkgo / SuiteSparse**，把本项目放回真实工程生态里看。
diff --git a/docs/zh/whitepaper/index.md b/docs/zh/whitepaper/index.md
index e85c512..f93e23c 100644
--- a/docs/zh/whitepaper/index.md
+++ b/docs/zh/whitepaper/index.md
@@ -1,100 +1,28 @@
-# 设计哲学
+# GPU SpMV：把项目当成工程作品来阅读
 
-## 执行摘要
+<CalloutPanel title="项目定位" tone="success">
+这个站点服务于三类读者：面试官、开源读者、性能工程师。白皮书首页先给结论，随后给设计判断、证据链和深入阅读路径。
+</CalloutPanel>
 
-GPU SpMV 是一个 **生产级 CUDA 库**，实现了高性能稀疏矩阵向量乘法（SpMV），在现代 NVIDIA GPU 上达到 **70%+ 理论内存带宽利用率**。
+## 为什么这个项目值得单独写成白皮书
 
-### 核心贡献
+- SpMV 是典型的 **内存带宽受限** 问题，性能上限主要由访存效率决定。
+- 真正有展示价值的不只是 kernel 本身，而是 **为什么选它、什么时候选它、如何证明它值得选**。
+- 这个项目同时强调 CUDA 性能、RAII 资源管理、错误处理、Spec-Driven 开发和可读文档，这让它更像工程作品，而不只是 demo。
 
-| 贡献 | 影响 |
-|:-----|:-----|
-| **4 种优化内核** | 基于矩阵特征的自动内核选择 |
-| **Merge Path 算法** | 不规则稀疏模式的完美负载均衡 |
-| **ELL 列主序布局** | 均匀矩阵的完全合并访存 |
-| **Spec-Driven 开发** | 完整的设计决策可追溯性 |
-
-### 性能亮点
-
-| 矩阵规模 | 非零元素 | 内核 | 带宽利用率 |
-|:--------:|:--------:|:-----|:----------:|
-| 10K × 10K | 500K | Vector CSR | **70.2%** |
-| 100K × 100K | 5M | Merge Path | **71.5%** |
-| 1M × 1M | 50M | Merge Path | **70.8%** |
-
-::: info 测试环境
-NVIDIA RTX 3090（Ampere 架构，理论带宽：936 GB/s）
-:::
+## 这份白皮书会回答什么
 
-### 目标读者
+1. 这个问题为什么重要，以及 GPU SpMV 的瓶颈是什么。
+2. 四类 kernel 与自动选择策略分别解决了什么。
+3. 项目如何把性能、工程规范和可解释性结合起来。
+4. 哪些页面提供架构、性能、API 与引用材料，方便继续深入。
 
-- **系统架构师**：设计 GPU 加速的稀疏计算
-- **HPC 工程师**：优化内存受限的工作负载
-- **研究人员**：需要可复现、文档完善的基准
-- **应用开发者**：构建图算法、迭代求解器
+## 阅读路径
 
-### 文档结构
-
-| 章节 | 目的 |
+| 页面 | 作用 |
 |:-----|:-----|
-| [设计哲学](/zh/whitepaper/philosophy) | 架构原则和权衡 |
-| [性能分析](/zh/whitepaper/performance) | 详细基准测试方法和结果 |
-| [架构概览](/zh/architecture/overview) | 系统设计文档 |
-| [API 参考](/zh/api/spmv) | 完整 API 文档 |
-
----
-
-## SpMV 的重要性
-
-稀疏矩阵向量乘法（SpMV）是以下领域的基础操作：
-
-- **图分析**：PageRank、社区发现、最短路径
-- **科学计算**：有限元分析、CFD、迭代求解器
-- **机器学习**：稀疏神经网络、推荐系统
-
-SpMV 本质上是 **内存受限** 的——每个非零元素需要读取矩阵数据、列索引和向量值，计算量极小。实现高带宽利用率是主要的优化挑战。
-
----
-
-## 设计概览
-
-```mermaid
-flowchart TB
-    subgraph Input["输入"]
-        Matrix[稀疏矩阵]
-        Vector[稠密向量]
-    end
-    
-    subgraph Analysis["矩阵分析"]
-        NNZ[每行平均 nnz]
-        Skew[偏度]
-        Pattern[分布模式]
-    end
-    
-    subgraph Selection["内核选择"]
-        Decision{自动选择}
-        Scalar[Scalar CSR<br/>avg_nnz < 4]
-        Vector[Vector CSR<br/>均匀行]
-        Merge[Merge Path<br/>高偏度]
-        ELL[ELL Kernel<br/>列主序]
-    end
-    
-    subgraph Execution["GPU 执行"]
-        Compute[SpMV 计算]
-        Result[结果向量]
-    end
-    
-    Matrix --> Analysis
-    Vector --> Execution
-    Analysis --> Decision
-    Decision --> Scalar
-    Decision --> Vector
-    Decision --> Merge
-    Decision --> ELL
-    Scalar --> Compute
-    Vector --> Compute
-    Merge --> Compute
-    ELL --> Compute
-    Compute --> Result
-```
-
-库会根据矩阵特征自动选择最优内核，确保在各种稀疏模式下都获得接近峰值性能。
\ No newline at end of file
+| [设计哲学](/zh/whitepaper/philosophy) | 看这个项目优先优化什么、舍弃什么 |
+| [性能分析](/zh/whitepaper/performance) | 看 benchmark 应该怎么读，数据说明什么 |
+| [架构概览](/zh/architecture/overview) | 看执行流水线、数据流和模块边界 |
+| [API 参考](/zh/api/spmv) | 看对外接口与使用方式 |
+| [学术参考](/zh/references) | 看论文、项目与延伸材料 |
diff --git a/docs/zh/whitepaper/performance.md b/docs/zh/whitepaper/performance.md
index 9c01be5..91e31ce 100644
--- a/docs/zh/whitepaper/performance.md
+++ b/docs/zh/whitepaper/performance.md
@@ -205,7 +205,7 @@ for (auto& x : inputs) {
 
 ```bash
 # 克隆并构建
-git clone https://github.com/LessUp/gpu-spmv.git
+git clone https://github.com/AICL-Lab/gpu-spmv.git
 cd gpu-spmv
 cmake -S . -B build -DCMAKE_BUILD_TYPE=Release
 cmake --build build