From 88a8470fd6c77442ee50625286d0c767014b23bc Mon Sep 17 00:00:00 2001
From: praneeth_paikray-data <praneeth.paikray@databricks.com>
Date: Wed, 8 Apr 2026 16:29:25 +0530
Subject: [PATCH 1/3] Add notebook for installing Genie Code skills from a
 Databricks workspace

Non-technical Genie Code users can now import this notebook into their
workspace and run it to install skills, without needing a local terminal
or the Databricks CLI. The notebook uses the Databricks Python SDK
(pre-installed in all runtimes) to download skills from GitHub and
upload them to /Users/<username>/.assistant/skills/.

- New: databricks-skills/install_genie_code_skills.py (Databricks notebook)
- Updated: README.md with notebook install option in Genie Code section

Co-authored-by: Isaac
---
 README.md                                     |   4 +
 .../install_genie_code_skills.py              | 220 ++++++++++++++++++
 2 files changed, 224 insertions(+)
 create mode 100644 databricks-skills/install_genie_code_skills.py
diff --git a/README.md b/README.md
index 8b32690e..e34b9881 100644
--- a/README.md
+++ b/README.md
@@ -201,6 +201,10 @@ curl -sSL https://raw.githubusercontent.com/databricks-solutions/ai-dev-kit/main
 
 Combine `--profile`, `--local`, specific skill names, `--mlflow-version`, etc. as needed; see `./databricks-skills/install_skills.sh --help` or [databricks-skills/README.md](databricks-skills/README.md).
 
+**From a Databricks notebook** (no local terminal needed):
+
+Import [`databricks-skills/install_genie_code_skills.py`](databricks-skills/install_genie_code_skills.py) into your workspace as a notebook and run it. It downloads skills from GitHub and uploads them to your workspace using the Databricks SDK. This works on any compute, including serverless.
+
 **Skill modification or Custom Skill**
 
 After the script successfully installs the skills to your workspace, you may find the skills under `/Workspace/Users/<your_user_name>/.assistant/skills`.
diff --git a/databricks-skills/install_genie_code_skills.py b/databricks-skills/install_genie_code_skills.py
new file mode 100644
index 00000000..4a0023ee
--- /dev/null
+++ b/databricks-skills/install_genie_code_skills.py
@@ -0,0 +1,220 @@
+# Databricks notebook source
+# MAGIC %md
+# MAGIC # Install Genie Code Skills
+# MAGIC
+# MAGIC This notebook downloads AI Dev Kit skills from GitHub and uploads them to your workspace so Genie Code can use them.
+# MAGIC
+# MAGIC Skills are installed to `/Workspace/Users/<your_username>/.assistant/skills/`.
+# MAGIC
+# MAGIC **How to use:** Run all cells top to bottom. Edit the configuration cell below if you want to install a subset of skills.
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC ## Configuration
+# MAGIC
+# MAGIC By default, all skills are installed. To install only specific skills, replace `INSTALL_SKILLS` with a list of skill names.
+
+# COMMAND ----------
+
+# -- Configuration ----------------------------------------------------------
+# Set to "all" to install everything, or provide a list of specific skill names.
+INSTALL_SKILLS = "all"
+
+# Examples:
+# INSTALL_SKILLS = "all"
+# INSTALL_SKILLS = ["databricks-dbsql", "databricks-jobs", "databricks-unity-catalog"]
+# INSTALL_SKILLS = ["databricks-agent-bricks", "databricks-vector-search"]
+
+# Source branch or tag (change to pin a specific release)
+GITHUB_REF = "main"
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC ## Install Skills
+
+# COMMAND ----------
+
+import urllib.request
+import json
+import posixpath
+from databricks.sdk import WorkspaceClient
+
+# ── Skill registry (synced with install_skills.sh) ──────────────────────────
+
+REPO_RAW = f"https://raw.githubusercontent.com/databricks-solutions/ai-dev-kit/{GITHUB_REF}"
+MLFLOW_RAW = f"https://raw.githubusercontent.com/mlflow/skills/{GITHUB_REF}"
+APX_RAW = f"https://raw.githubusercontent.com/databricks-solutions/apx/{GITHUB_REF}/skills/apx"
+
+DATABRICKS_SKILLS = [
+    "databricks-agent-bricks", "databricks-ai-functions", "databricks-aibi-dashboards",
+    "databricks-bundles", "databricks-app-python", "databricks-config", "databricks-dbsql",
+    "databricks-docs", "databricks-genie", "databricks-iceberg", "databricks-jobs",
+    "databricks-lakebase-autoscale", "databricks-lakebase-provisioned", "databricks-metric-views",
+    "databricks-mlflow-evaluation", "databricks-model-serving", "databricks-python-sdk",
+    "databricks-execution-compute", "databricks-spark-declarative-pipelines",
+    "databricks-spark-structured-streaming", "databricks-synthetic-data-gen",
+    "databricks-unity-catalog", "databricks-unstructured-pdf-generation",
+    "databricks-vector-search", "databricks-zerobus-ingest", "spark-python-data-source",
+]
+
+MLFLOW_SKILLS = [
+    "agent-evaluation", "analyze-mlflow-chat-session", "analyze-mlflow-trace",
+    "instrumenting-with-mlflow-tracing", "mlflow-onboarding", "querying-mlflow-metrics",
+    "retrieving-mlflow-traces", "searching-mlflow-docs",
+]
+
+APX_SKILLS = ["databricks-app-apx"]
+
+DATABRICKS_EXTRA_FILES = {
+    "databricks-agent-bricks": ["1-knowledge-assistants.md", "2-supervisor-agents.md"],
+    "databricks-ai-functions": ["1-task-functions.md", "2-ai-query.md", "3-ai-forecast.md", "4-document-processing-pipeline.md"],
+    "databricks-aibi-dashboards": ["widget-reference.md", "sql-patterns.md"],
+    "databricks-genie": ["spaces.md", "conversation.md"],
+    "databricks-bundles": ["alerts_guidance.md", "SDP_guidance.md"],
+    "databricks-iceberg": ["1-managed-iceberg-tables.md", "2-uniform-and-compatibility.md", "3-iceberg-rest-catalog.md", "4-snowflake-interop.md", "5-external-engine-interop.md"],
+    "databricks-app-apx": ["backend-patterns.md", "best-practices.md", "frontend-patterns.md"],
+    "databricks-app-python": ["1-authorization.md", "2-app-resources.md", "3-frameworks.md", "4-deployment.md", "5-lakebase.md", "6-mcp-approach.md", "examples/llm_config.py", "examples/fm-minimal-chat.py", "examples/fm-parallel-calls.py", "examples/fm-structured-outputs.py"],
+    "databricks-jobs": ["task-types.md", "triggers-schedules.md", "notifications-monitoring.md", "examples.md"],
+    "databricks-python-sdk": ["doc-index.md", "examples/1-authentication.py", "examples/2-clusters-and-jobs.py", "examples/3-sql-and-warehouses.py", "examples/4-unity-catalog.py", "examples/5-serving-and-vector-search.py"],
+    "databricks-unity-catalog": ["5-system-tables.md"],
+    "databricks-lakebase-autoscale": ["projects.md", "branches.md", "computes.md", "connection-patterns.md", "reverse-etl.md"],
+    "databricks-lakebase-provisioned": ["connection-patterns.md", "reverse-etl.md"],
+    "databricks-metric-views": ["yaml-reference.md", "patterns.md"],
+    "databricks-model-serving": ["1-classical-ml.md", "2-custom-pyfunc.md", "3-genai-agents.md", "4-tools-integration.md", "5-development-testing.md", "6-logging-registration.md", "7-deployment.md", "8-querying-endpoints.md", "9-package-requirements.md"],
+    "databricks-mlflow-evaluation": ["references/CRITICAL-interfaces.md", "references/GOTCHAS.md", "references/patterns-context-optimization.md", "references/patterns-datasets.md", "references/patterns-evaluation.md", "references/patterns-scorers.md", "references/patterns-trace-analysis.md", "references/user-journeys.md"],
+    "databricks-spark-declarative-pipelines": ["1-ingestion-patterns.md", "2-streaming-patterns.md", "3-scd-patterns.md", "4-performance-tuning.md", "5-python-api.md", "6-dlt-migration.md", "7-advanced-configuration.md", "8-project-initialization.md"],
+    "databricks-spark-structured-streaming": ["checkpoint-best-practices.md", "kafka-streaming.md", "merge-operations.md", "multi-sink-writes.md", "stateful-operations.md", "stream-static-joins.md", "stream-stream-joins.md", "streaming-best-practices.md", "trigger-and-cost-optimization.md"],
+    "databricks-vector-search": ["index-types.md", "end-to-end-rag.md"],
+    "databricks-zerobus-ingest": ["1-setup-and-authentication.md", "2-python-client.md", "3-multilanguage-clients.md", "4-protobuf-schema.md", "5-operations-and-limits.md"],
+}
+
+MLFLOW_EXTRA_FILES = {
+    "agent-evaluation": ["references/dataset-preparation.md", "references/scorers-constraints.md", "references/scorers.md", "references/setup-guide.md", "references/tracing-integration.md", "references/troubleshooting.md", "scripts/analyze_results.py", "scripts/create_dataset_template.py", "scripts/list_datasets.py", "scripts/run_evaluation_template.py", "scripts/setup_mlflow.py", "scripts/validate_agent_tracing.py", "scripts/validate_auth.py", "scripts/validate_environment.py", "scripts/validate_tracing_runtime.py"],
+    "analyze-mlflow-chat-session": ["scripts/discover_schema.sh", "scripts/inspect_turn.sh"],
+    "analyze-mlflow-trace": ["references/trace-structure.md"],
+    "instrumenting-with-mlflow-tracing": ["references/advanced-patterns.md", "references/distributed-tracing.md", "references/feedback-collection.md", "references/production.md", "references/python.md", "references/typescript.md"],
+    "querying-mlflow-metrics": ["references/api_reference.md", "scripts/fetch_metrics.py"],
+}
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+
+def _download(url: str) -> bytes | None:
+    """Download a file from a URL. Returns bytes on success, None on failure."""
+    try:
+        with urllib.request.urlopen(url, timeout=30) as resp:
+            return resp.read()
+    except Exception:
+        return None
+
+
+def _upload(w: WorkspaceClient, workspace_path: str, content: bytes):
+    """Upload a file to the Databricks workspace."""
+    import base64
+    from databricks.sdk.service.workspace import ImportFormat
+    parent = posixpath.dirname(workspace_path)
+    w.workspace.mkdirs(parent)
+    w.workspace.import_(
+        path=workspace_path,
+        content=base64.b64encode(content).decode(),
+        format=ImportFormat.AUTO,
+        overwrite=True,
+    )
+
+
+def install_skill(w: WorkspaceClient, skill_name: str, base_url: str, extra_files: list[str], skills_path: str) -> bool:
+    """Download and upload one skill (SKILL.md + extra files)."""
+    skill_url = f"{base_url}/{skill_name}"
+    skill_md = _download(f"{skill_url}/SKILL.md")
+    if skill_md is None:
+        print(f"  SKIP {skill_name} (could not download SKILL.md)")
+        return False
+
+    dest = f"{skills_path}/{skill_name}"
+    _upload(w, f"{dest}/SKILL.md", skill_md)
+    uploaded = 1
+
+    for extra in extra_files:
+        data = _download(f"{skill_url}/{extra}")
+        if data is not None:
+            _upload(w, f"{dest}/{extra}", data)
+            uploaded += 1
+
+    print(f"  OK   {skill_name} ({uploaded} file{'s' if uploaded != 1 else ''})")
+    return True
+
+
+# ── Main ─────────────────────────────────────────────────────────────────────
+
+w = WorkspaceClient()
+username = w.current_user.me().user_name
+skills_path = f"/Users/{username}/.assistant/skills"
+
+print(f"Username:  {username}")
+print(f"Target:    {skills_path}")
+print()
+
+# Determine which skills to install
+if INSTALL_SKILLS == "all":
+    selected = DATABRICKS_SKILLS + MLFLOW_SKILLS + APX_SKILLS
+else:
+    selected = INSTALL_SKILLS
+
+w.workspace.mkdirs(skills_path)
+
+installed = 0
+failed = 0
+
+# Databricks skills
+db_base = f"{REPO_RAW}/databricks-skills"
+for skill in selected:
+    if skill in DATABRICKS_SKILLS:
+        extras = DATABRICKS_EXTRA_FILES.get(skill, [])
+        ok = install_skill(w, skill, db_base, extras, skills_path)
+        installed += ok
+        failed += (not ok)
+
+# MLflow skills
+for skill in selected:
+    if skill in MLFLOW_SKILLS:
+        extras = MLFLOW_EXTRA_FILES.get(skill, [])
+        ok = install_skill(w, skill, MLFLOW_RAW, extras, skills_path)
+        installed += ok
+        failed += (not ok)
+
+# APX skills
+for skill in selected:
+    if skill in APX_SKILLS:
+        extras = DATABRICKS_EXTRA_FILES.get(skill, [])
+        ok = install_skill(w, skill, APX_RAW, extras, skills_path)
+        installed += ok
+        failed += (not ok)
+
+print()
+print(f"Done. {installed} skills installed, {failed} failed.")
+print(f"Skills are at: /Workspace{skills_path}")
+
+# COMMAND ----------
+
+# MAGIC %md
+# MAGIC ## Verify Installation
+# MAGIC
+# MAGIC Run this cell to list the skills installed in your workspace.
+
+# COMMAND ----------
+
+from databricks.sdk import WorkspaceClient
+
+w = WorkspaceClient()
+username = w.current_user.me().user_name
+skills_path = f"/Users/{username}/.assistant/skills"
+
+try:
+    entries = w.workspace.list(skills_path)
+    skill_dirs = sorted([e.path.split("/")[-1] for e in entries if e.is_directory])
+    print(f"Found {len(skill_dirs)} skills in {skills_path}:\n")
+    for name in skill_dirs:
+        print(f"  {name}")
+except Exception as e:
+    print(f"Could not list skills: {e}")

From 3337c3b97a7b2b7850f1d11c16dda0de3c7191d8 Mon Sep 17 00:00:00 2001
From: praneeth_paikray-data <praneeth.paikray@databricks.com>
Date: Wed, 8 Apr 2026 16:47:16 +0530
Subject: [PATCH 2/3] Fix APX skill download URL and separate its extra files
 mapping

The APX repo stores files at skills/apx/SKILL.md (flat), not in a
skill-name subdirectory like the other repos. Added source_path
parameter to install_skill() to handle this, and moved APX extra
files to its own mapping matching get_apx_skill_extra_files() in
install_skills.sh.

Co-authored-by: Isaac
---
 .../install_genie_code_skills.py              | 24 +++++++++++++------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/databricks-skills/install_genie_code_skills.py b/databricks-skills/install_genie_code_skills.py
index 4a0023ee..ab0593c7 100644
--- a/databricks-skills/install_genie_code_skills.py
+++ b/databricks-skills/install_genie_code_skills.py
@@ -74,7 +74,6 @@
     "databricks-genie": ["spaces.md", "conversation.md"],
     "databricks-bundles": ["alerts_guidance.md", "SDP_guidance.md"],
     "databricks-iceberg": ["1-managed-iceberg-tables.md", "2-uniform-and-compatibility.md", "3-iceberg-rest-catalog.md", "4-snowflake-interop.md", "5-external-engine-interop.md"],
-    "databricks-app-apx": ["backend-patterns.md", "best-practices.md", "frontend-patterns.md"],
     "databricks-app-python": ["1-authorization.md", "2-app-resources.md", "3-frameworks.md", "4-deployment.md", "5-lakebase.md", "6-mcp-approach.md", "examples/llm_config.py", "examples/fm-minimal-chat.py", "examples/fm-parallel-calls.py", "examples/fm-structured-outputs.py"],
     "databricks-jobs": ["task-types.md", "triggers-schedules.md", "notifications-monitoring.md", "examples.md"],
     "databricks-python-sdk": ["doc-index.md", "examples/1-authentication.py", "examples/2-clusters-and-jobs.py", "examples/3-sql-and-warehouses.py", "examples/4-unity-catalog.py", "examples/5-serving-and-vector-search.py"],
@@ -98,6 +97,10 @@
     "querying-mlflow-metrics": ["references/api_reference.md", "scripts/fetch_metrics.py"],
 }
 
+APX_EXTRA_FILES = {
+    "databricks-app-apx": ["backend-patterns.md", "frontend-patterns.md"],
+}
+
 # ── Helpers ──────────────────────────────────────────────────────────────────
 
 def _download(url: str) -> bytes | None:
@@ -123,9 +126,16 @@ def _upload(w: WorkspaceClient, workspace_path: str, content: bytes):
     )
 
 
-def install_skill(w: WorkspaceClient, skill_name: str, base_url: str, extra_files: list[str], skills_path: str) -> bool:
-    """Download and upload one skill (SKILL.md + extra files)."""
-    skill_url = f"{base_url}/{skill_name}"
+def install_skill(w: WorkspaceClient, skill_name: str, base_url: str, extra_files: list[str], skills_path: str, source_path: str | None = "") -> bool:
+    """Download and upload one skill (SKILL.md + extra files).
+    source_path: "" = use skill_name as subdirectory (default), None = files at base_url root, str = custom subdirectory.
+    """
+    if source_path is None:
+        skill_url = base_url
+    elif source_path:
+        skill_url = f"{base_url}/{source_path}"
+    else:
+        skill_url = f"{base_url}/{skill_name}"
     skill_md = _download(f"{skill_url}/SKILL.md")
     if skill_md is None:
         print(f"  SKIP {skill_name} (could not download SKILL.md)")
@@ -183,11 +193,11 @@ def install_skill(w: WorkspaceClient, skill_name: str, base_url: str, extra_file
         installed += ok
         failed += (not ok)
 
-# APX skills
+# APX skills (files are at the repo root, not in a skill-name subdirectory)
 for skill in selected:
     if skill in APX_SKILLS:
-        extras = DATABRICKS_EXTRA_FILES.get(skill, [])
-        ok = install_skill(w, skill, APX_RAW, extras, skills_path)
+        extras = APX_EXTRA_FILES.get(skill, [])
+        ok = install_skill(w, skill, APX_RAW, extras, skills_path, source_path=None)
         installed += ok
         failed += (not ok)
 

From 900d9656e429d5f5e9c0f0f53d231043d0c5695b Mon Sep 17 00:00:00 2001
From: praneeth_paikray-data <praneeth.paikray@databricks.com>
Date: Mon, 13 Apr 2026 23:48:31 +0530
Subject: [PATCH 3/3] Auto-discover skills from GitHub instead of hardcoded
 lists
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces the hardcoded skill registries (DATABRICKS_SKILLS, MLFLOW_SKILLS,
APX_SKILLS and their extra-file mappings) with dynamic discovery using the
GitHub Git Trees API. This eliminates the need to maintain duplicate skill
lists across install_skills.sh and this notebook.

Also fixes the verify cell which used the non-existent `is_directory`
attribute on ObjectInfo — now uses `object_type` comparison instead.

Co-authored-by: Isaac
---
 .../install_genie_code_skills.py              | 230 +++++++++---------
 1 file changed, 117 insertions(+), 113 deletions(-)

diff --git a/databricks-skills/install_genie_code_skills.py b/databricks-skills/install_genie_code_skills.py
index ab0593c7..cb7e6c44 100644
--- a/databricks-skills/install_genie_code_skills.py
+++ b/databricks-skills/install_genie_code_skills.py
@@ -7,6 +7,8 @@
 # MAGIC Skills are installed to `/Workspace/Users/<your_username>/.assistant/skills/`.
 # MAGIC
 # MAGIC **How to use:** Run all cells top to bottom. Edit the configuration cell below if you want to install a subset of skills.
+# MAGIC
+# MAGIC Skills are auto-discovered from GitHub — no hardcoded lists to maintain.
 
 # COMMAND ----------
 
@@ -24,7 +26,7 @@
 # Examples:
 # INSTALL_SKILLS = "all"
 # INSTALL_SKILLS = ["databricks-dbsql", "databricks-jobs", "databricks-unity-catalog"]
-# INSTALL_SKILLS = ["databricks-agent-bricks", "databricks-vector-search"]
+# INSTALL_SKILLS = ["databricks-agent-bricks", "agent-evaluation"]
 
 # Source branch or tag (change to pin a specific release)
 GITHUB_REF = "main"
@@ -39,72 +41,37 @@
 import urllib.request
 import json
 import posixpath
+import base64
 from databricks.sdk import WorkspaceClient
+from databricks.sdk.service.workspace import ImportFormat
 
-# ── Skill registry (synced with install_skills.sh) ──────────────────────────
-
-REPO_RAW = f"https://raw.githubusercontent.com/databricks-solutions/ai-dev-kit/{GITHUB_REF}"
-MLFLOW_RAW = f"https://raw.githubusercontent.com/mlflow/skills/{GITHUB_REF}"
-APX_RAW = f"https://raw.githubusercontent.com/databricks-solutions/apx/{GITHUB_REF}/skills/apx"
-
-DATABRICKS_SKILLS = [
-    "databricks-agent-bricks", "databricks-ai-functions", "databricks-aibi-dashboards",
-    "databricks-bundles", "databricks-app-python", "databricks-config", "databricks-dbsql",
-    "databricks-docs", "databricks-genie", "databricks-iceberg", "databricks-jobs",
-    "databricks-lakebase-autoscale", "databricks-lakebase-provisioned", "databricks-metric-views",
-    "databricks-mlflow-evaluation", "databricks-model-serving", "databricks-python-sdk",
-    "databricks-execution-compute", "databricks-spark-declarative-pipelines",
-    "databricks-spark-structured-streaming", "databricks-synthetic-data-gen",
-    "databricks-unity-catalog", "databricks-unstructured-pdf-generation",
-    "databricks-vector-search", "databricks-zerobus-ingest", "spark-python-data-source",
-]
+# ── Skill sources ──────────────────────────────────────────────────────────
+# Skills are auto-discovered: any subdirectory containing SKILL.md is a skill.
 
-MLFLOW_SKILLS = [
-    "agent-evaluation", "analyze-mlflow-chat-session", "analyze-mlflow-trace",
-    "instrumenting-with-mlflow-tracing", "mlflow-onboarding", "querying-mlflow-metrics",
-    "retrieving-mlflow-traces", "searching-mlflow-docs",
+SKILL_SOURCES = [
+    {"owner": "databricks-solutions", "repo": "ai-dev-kit", "path": "databricks-skills",
+     "skip": {"TEMPLATE"}},
+    {"owner": "mlflow",               "repo": "skills",      "path": ""},
+    {"owner": "databricks-solutions", "repo": "apx",         "path": "skills",
+     "name_overrides": {"apx": "databricks-app-apx"}},
 ]
 
-APX_SKILLS = ["databricks-app-apx"]
-
-DATABRICKS_EXTRA_FILES = {
-    "databricks-agent-bricks": ["1-knowledge-assistants.md", "2-supervisor-agents.md"],
-    "databricks-ai-functions": ["1-task-functions.md", "2-ai-query.md", "3-ai-forecast.md", "4-document-processing-pipeline.md"],
-    "databricks-aibi-dashboards": ["widget-reference.md", "sql-patterns.md"],
-    "databricks-genie": ["spaces.md", "conversation.md"],
-    "databricks-bundles": ["alerts_guidance.md", "SDP_guidance.md"],
-    "databricks-iceberg": ["1-managed-iceberg-tables.md", "2-uniform-and-compatibility.md", "3-iceberg-rest-catalog.md", "4-snowflake-interop.md", "5-external-engine-interop.md"],
-    "databricks-app-python": ["1-authorization.md", "2-app-resources.md", "3-frameworks.md", "4-deployment.md", "5-lakebase.md", "6-mcp-approach.md", "examples/llm_config.py", "examples/fm-minimal-chat.py", "examples/fm-parallel-calls.py", "examples/fm-structured-outputs.py"],
-    "databricks-jobs": ["task-types.md", "triggers-schedules.md", "notifications-monitoring.md", "examples.md"],
-    "databricks-python-sdk": ["doc-index.md", "examples/1-authentication.py", "examples/2-clusters-and-jobs.py", "examples/3-sql-and-warehouses.py", "examples/4-unity-catalog.py", "examples/5-serving-and-vector-search.py"],
-    "databricks-unity-catalog": ["5-system-tables.md"],
-    "databricks-lakebase-autoscale": ["projects.md", "branches.md", "computes.md", "connection-patterns.md", "reverse-etl.md"],
-    "databricks-lakebase-provisioned": ["connection-patterns.md", "reverse-etl.md"],
-    "databricks-metric-views": ["yaml-reference.md", "patterns.md"],
-    "databricks-model-serving": ["1-classical-ml.md", "2-custom-pyfunc.md", "3-genai-agents.md", "4-tools-integration.md", "5-development-testing.md", "6-logging-registration.md", "7-deployment.md", "8-querying-endpoints.md", "9-package-requirements.md"],
-    "databricks-mlflow-evaluation": ["references/CRITICAL-interfaces.md", "references/GOTCHAS.md", "references/patterns-context-optimization.md", "references/patterns-datasets.md", "references/patterns-evaluation.md", "references/patterns-scorers.md", "references/patterns-trace-analysis.md", "references/user-journeys.md"],
-    "databricks-spark-declarative-pipelines": ["1-ingestion-patterns.md", "2-streaming-patterns.md", "3-scd-patterns.md", "4-performance-tuning.md", "5-python-api.md", "6-dlt-migration.md", "7-advanced-configuration.md", "8-project-initialization.md"],
-    "databricks-spark-structured-streaming": ["checkpoint-best-practices.md", "kafka-streaming.md", "merge-operations.md", "multi-sink-writes.md", "stateful-operations.md", "stream-static-joins.md", "stream-stream-joins.md", "streaming-best-practices.md", "trigger-and-cost-optimization.md"],
-    "databricks-vector-search": ["index-types.md", "end-to-end-rag.md"],
-    "databricks-zerobus-ingest": ["1-setup-and-authentication.md", "2-python-client.md", "3-multilanguage-clients.md", "4-protobuf-schema.md", "5-operations-and-limits.md"],
-}
-
-MLFLOW_EXTRA_FILES = {
-    "agent-evaluation": ["references/dataset-preparation.md", "references/scorers-constraints.md", "references/scorers.md", "references/setup-guide.md", "references/tracing-integration.md", "references/troubleshooting.md", "scripts/analyze_results.py", "scripts/create_dataset_template.py", "scripts/list_datasets.py", "scripts/run_evaluation_template.py", "scripts/setup_mlflow.py", "scripts/validate_agent_tracing.py", "scripts/validate_auth.py", "scripts/validate_environment.py", "scripts/validate_tracing_runtime.py"],
-    "analyze-mlflow-chat-session": ["scripts/discover_schema.sh", "scripts/inspect_turn.sh"],
-    "analyze-mlflow-trace": ["references/trace-structure.md"],
-    "instrumenting-with-mlflow-tracing": ["references/advanced-patterns.md", "references/distributed-tracing.md", "references/feedback-collection.md", "references/production.md", "references/python.md", "references/typescript.md"],
-    "querying-mlflow-metrics": ["references/api_reference.md", "scripts/fetch_metrics.py"],
-}
-
-APX_EXTRA_FILES = {
-    "databricks-app-apx": ["backend-patterns.md", "frontend-patterns.md"],
-}
 
 # ── Helpers ──────────────────────────────────────────────────────────────────
 
-def _download(url: str) -> bytes | None:
-    """Download a file from a URL. Returns bytes on success, None on failure."""
+def _github_api(url):
+    """Fetch JSON from the GitHub API."""
+    req = urllib.request.Request(url, headers={"Accept": "application/vnd.github.v3+json"})
+    try:
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            return json.loads(resp.read())
+    except Exception as e:
+        print(f"  WARN GitHub API error: {e}")
+        return None
+
+
+def _download(url):
+    """Download raw file bytes. Returns bytes on success, None on failure."""
     try:
         with urllib.request.urlopen(url, timeout=30) as resp:
             return resp.read()
@@ -112,12 +79,9 @@ def _download(url: str) -> bytes | None:
         return None
 
 
-def _upload(w: WorkspaceClient, workspace_path: str, content: bytes):
+def _upload(w, workspace_path, content):
     """Upload a file to the Databricks workspace."""
-    import base64
-    from databricks.sdk.service.workspace import ImportFormat
-    parent = posixpath.dirname(workspace_path)
-    w.workspace.mkdirs(parent)
+    w.workspace.mkdirs(posixpath.dirname(workspace_path))
     w.workspace.import_(
         path=workspace_path,
         content=base64.b64encode(content).decode(),
@@ -126,32 +90,70 @@ def _upload(w: WorkspaceClient, workspace_path: str, content: bytes):
     )
 
 
-def install_skill(w: WorkspaceClient, skill_name: str, base_url: str, extra_files: list[str], skills_path: str, source_path: str | None = "") -> bool:
-    """Download and upload one skill (SKILL.md + extra files).
-    source_path: "" = use skill_name as subdirectory (default), None = files at base_url root, str = custom subdirectory.
+def _discover_from_source(source, ref):
+    """Discover skills in a GitHub repo using the Git Trees API.
+
+    Returns list of (install_name, raw_url_prefix, [extra_file_paths]).
     """
-    if source_path is None:
-        skill_url = base_url
-    elif source_path:
-        skill_url = f"{base_url}/{source_path}"
-    else:
-        skill_url = f"{base_url}/{skill_name}"
-    skill_md = _download(f"{skill_url}/SKILL.md")
+    owner, repo = source["owner"], source["repo"]
+    base_path = source["path"]
+    overrides = source.get("name_overrides", {})
+    skip = source.get("skip", set())
+
+    data = _github_api(
+        f"https://api.github.com/repos/{owner}/{repo}/git/trees/{ref}?recursive=1"
+    )
+    if data is None:
+        return []
+
+    all_files = {item["path"] for item in data.get("tree", []) if item["type"] == "blob"}
+    prefix = f"{base_path}/" if base_path else ""
+
+    # Find directories that directly contain SKILL.md
+    skill_dirs = set()
+    for f in all_files:
+        if not f.startswith(prefix):
+            continue
+        rel = f[len(prefix):]
+        parts = rel.split("/")
+        if len(parts) == 2 and parts[1] == "SKILL.md" and parts[0] not in skip:
+            skill_dirs.add(parts[0])
+
+    # Build result with extra files for each skill
+    raw_base = f"https://raw.githubusercontent.com/{owner}/{repo}/{ref}"
+    results = []
+    for dir_name in sorted(skill_dirs):
+        skill_prefix = f"{prefix}{dir_name}/"
+        extras = sorted(
+            f[len(skill_prefix):]
+            for f in all_files
+            if f.startswith(skill_prefix) and not f.endswith("/SKILL.md")
+        )
+        install_name = overrides.get(dir_name, dir_name)
+        source_url = f"{raw_base}/{prefix}{dir_name}"
+        results.append((install_name, source_url, extras))
+
+    return results
+
+
+def _install_skill(w, name, source_url, extras, skills_path):
+    """Download and upload one skill (SKILL.md + extra files)."""
+    skill_md = _download(f"{source_url}/SKILL.md")
     if skill_md is None:
-        print(f"  SKIP {skill_name} (could not download SKILL.md)")
+        print(f"  SKIP {name} (could not download SKILL.md)")
         return False
 
-    dest = f"{skills_path}/{skill_name}"
+    dest = f"{skills_path}/{name}"
     _upload(w, f"{dest}/SKILL.md", skill_md)
     uploaded = 1
 
-    for extra in extra_files:
-        data = _download(f"{skill_url}/{extra}")
+    for extra in extras:
+        data = _download(f"{source_url}/{extra}")
         if data is not None:
             _upload(w, f"{dest}/{extra}", data)
             uploaded += 1
 
-    print(f"  OK   {skill_name} ({uploaded} file{'s' if uploaded != 1 else ''})")
+    print(f"  OK   {name} ({uploaded} file{'s' if uploaded != 1 else ''})")
     return True
 
 
@@ -165,44 +167,39 @@ def install_skill(w: WorkspaceClient, skill_name: str, base_url: str, extra_file
 print(f"Target:    {skills_path}")
 print()
 
-# Determine which skills to install
+# Discover skills from all sources
+print("Discovering skills from GitHub...")
+all_skills = []
+for source in SKILL_SOURCES:
+    discovered = _discover_from_source(source, GITHUB_REF)
+    label = f"{source['owner']}/{source['repo']}"
+    print(f"  {label}: {len(discovered)} skills")
+    all_skills.extend(discovered)
+
+print(f"\nTotal: {len(all_skills)} skills available\n")
+
+# Filter to requested skills
 if INSTALL_SKILLS == "all":
-    selected = DATABRICKS_SKILLS + MLFLOW_SKILLS + APX_SKILLS
+    selected = all_skills
 else:
-    selected = INSTALL_SKILLS
+    wanted = set(INSTALL_SKILLS)
+    selected = [s for s in all_skills if s[0] in wanted]
+    missing = wanted - {s[0] for s in selected}
+    if missing:
+        print(f"  WARN: skills not found in any source: {', '.join(sorted(missing))}\n")
 
+# Install
 w.workspace.mkdirs(skills_path)
 
 installed = 0
 failed = 0
-
-# Databricks skills
-db_base = f"{REPO_RAW}/databricks-skills"
-for skill in selected:
-    if skill in DATABRICKS_SKILLS:
-        extras = DATABRICKS_EXTRA_FILES.get(skill, [])
-        ok = install_skill(w, skill, db_base, extras, skills_path)
-        installed += ok
-        failed += (not ok)
-
-# MLflow skills
-for skill in selected:
-    if skill in MLFLOW_SKILLS:
-        extras = MLFLOW_EXTRA_FILES.get(skill, [])
-        ok = install_skill(w, skill, MLFLOW_RAW, extras, skills_path)
-        installed += ok
-        failed += (not ok)
-
-# APX skills (files are at the repo root, not in a skill-name subdirectory)
-for skill in selected:
-    if skill in APX_SKILLS:
-        extras = APX_EXTRA_FILES.get(skill, [])
-        ok = install_skill(w, skill, APX_RAW, extras, skills_path, source_path=None)
-        installed += ok
-        failed += (not ok)
+for name, source_url, extras in selected:
+    ok = _install_skill(w, name, source_url, extras, skills_path)
+    installed += ok
+    failed += not ok
 
 print()
-print(f"Done. {installed} skills installed, {failed} failed.")
+print(f"Done. {installed} installed, {failed} failed.")
 print(f"Skills are at: /Workspace{skills_path}")
 
 # COMMAND ----------
@@ -221,10 +218,17 @@ def install_skill(w: WorkspaceClient, skill_name: str, base_url: str, extra_file
 skills_path = f"/Users/{username}/.assistant/skills"
 
 try:
-    entries = w.workspace.list(skills_path)
-    skill_dirs = sorted([e.path.split("/")[-1] for e in entries if e.is_directory])
-    print(f"Found {len(skill_dirs)} skills in {skills_path}:\n")
-    for name in skill_dirs:
-        print(f"  {name}")
+    entries = list(w.workspace.list(skills_path))
+    subdirs = sorted([
+        e.path.split("/")[-1]
+        for e in entries
+        if str(e.object_type) == "ObjectType.DIRECTORY"
+    ])
+    if subdirs:
+        print(f"Found {len(subdirs)} skills in {skills_path}:\n")
+        for name in subdirs:
+            print(f"  {name}")
+    else:
+        print(f"No skills found in {skills_path}.")
 except Exception as e:
     print(f"Could not list skills: {e}")