From 88a8470fd6c77442ee50625286d0c767014b23bc Mon Sep 17 00:00:00 2001 From: praneeth_paikray-data Date: Wed, 8 Apr 2026 16:29:25 +0530 Subject: [PATCH 1/3] Add notebook for installing Genie Code skills from a Databricks workspace Non-technical Genie Code users can now import this notebook into their workspace and run it to install skills, without needing a local terminal or the Databricks CLI. The notebook uses the Databricks Python SDK (pre-installed in all runtimes) to download skills from GitHub and upload them to /Users//.assistant/skills/. - New: databricks-skills/install_genie_code_skills.py (Databricks notebook) - Updated: README.md with notebook install option in Genie Code section Co-authored-by: Isaac --- README.md | 4 + .../install_genie_code_skills.py | 220 ++++++++++++++++++ 2 files changed, 224 insertions(+) create mode 100644 databricks-skills/install_genie_code_skills.py diff --git a/README.md b/README.md index 8b32690e..e34b9881 100644 --- a/README.md +++ b/README.md @@ -201,6 +201,10 @@ curl -sSL https://raw.githubusercontent.com/databricks-solutions/ai-dev-kit/main Combine `--profile`, `--local`, specific skill names, `--mlflow-version`, etc. as needed; see `./databricks-skills/install_skills.sh --help` or [databricks-skills/README.md](databricks-skills/README.md). +**From a Databricks notebook** (no local terminal needed): + +Import [`databricks-skills/install_genie_code_skills.py`](databricks-skills/install_genie_code_skills.py) into your workspace as a notebook and run it. It downloads skills from GitHub and uploads them to your workspace using the Databricks SDK. This works on any compute, including serverless. + **Skill modification or Custom Skill** After the script successfully installs the skills to your workspace, you may find the skills under `/Workspace/Users//.assistant/skills`. diff --git a/databricks-skills/install_genie_code_skills.py b/databricks-skills/install_genie_code_skills.py new file mode 100644 index 00000000..4a0023ee --- /dev/null +++ b/databricks-skills/install_genie_code_skills.py @@ -0,0 +1,220 @@ +# Databricks notebook source +# MAGIC %md +# MAGIC # Install Genie Code Skills +# MAGIC +# MAGIC This notebook downloads AI Dev Kit skills from GitHub and uploads them to your workspace so Genie Code can use them. +# MAGIC +# MAGIC Skills are installed to `/Workspace/Users//.assistant/skills/`. +# MAGIC +# MAGIC **How to use:** Run all cells top to bottom. Edit the configuration cell below if you want to install a subset of skills. + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ## Configuration +# MAGIC +# MAGIC By default, all skills are installed. To install only specific skills, replace `INSTALL_SKILLS` with a list of skill names. + +# COMMAND ---------- + +# -- Configuration ---------------------------------------------------------- +# Set to "all" to install everything, or provide a list of specific skill names. +INSTALL_SKILLS = "all" + +# Examples: +# INSTALL_SKILLS = "all" +# INSTALL_SKILLS = ["databricks-dbsql", "databricks-jobs", "databricks-unity-catalog"] +# INSTALL_SKILLS = ["databricks-agent-bricks", "databricks-vector-search"] + +# Source branch or tag (change to pin a specific release) +GITHUB_REF = "main" + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ## Install Skills + +# COMMAND ---------- + +import urllib.request +import json +import posixpath +from databricks.sdk import WorkspaceClient + +# ── Skill registry (synced with install_skills.sh) ────────────────────────── + +REPO_RAW = f"https://raw.githubusercontent.com/databricks-solutions/ai-dev-kit/{GITHUB_REF}" +MLFLOW_RAW = f"https://raw.githubusercontent.com/mlflow/skills/{GITHUB_REF}" +APX_RAW = f"https://raw.githubusercontent.com/databricks-solutions/apx/{GITHUB_REF}/skills/apx" + +DATABRICKS_SKILLS = [ + "databricks-agent-bricks", "databricks-ai-functions", "databricks-aibi-dashboards", + "databricks-bundles", "databricks-app-python", "databricks-config", "databricks-dbsql", + "databricks-docs", "databricks-genie", "databricks-iceberg", "databricks-jobs", + "databricks-lakebase-autoscale", "databricks-lakebase-provisioned", "databricks-metric-views", + "databricks-mlflow-evaluation", "databricks-model-serving", "databricks-python-sdk", + "databricks-execution-compute", "databricks-spark-declarative-pipelines", + "databricks-spark-structured-streaming", "databricks-synthetic-data-gen", + "databricks-unity-catalog", "databricks-unstructured-pdf-generation", + "databricks-vector-search", "databricks-zerobus-ingest", "spark-python-data-source", +] + +MLFLOW_SKILLS = [ + "agent-evaluation", "analyze-mlflow-chat-session", "analyze-mlflow-trace", + "instrumenting-with-mlflow-tracing", "mlflow-onboarding", "querying-mlflow-metrics", + "retrieving-mlflow-traces", "searching-mlflow-docs", +] + +APX_SKILLS = ["databricks-app-apx"] + +DATABRICKS_EXTRA_FILES = { + "databricks-agent-bricks": ["1-knowledge-assistants.md", "2-supervisor-agents.md"], + "databricks-ai-functions": ["1-task-functions.md", "2-ai-query.md", "3-ai-forecast.md", "4-document-processing-pipeline.md"], + "databricks-aibi-dashboards": ["widget-reference.md", "sql-patterns.md"], + "databricks-genie": ["spaces.md", "conversation.md"], + "databricks-bundles": ["alerts_guidance.md", "SDP_guidance.md"], + "databricks-iceberg": ["1-managed-iceberg-tables.md", "2-uniform-and-compatibility.md", "3-iceberg-rest-catalog.md", "4-snowflake-interop.md", "5-external-engine-interop.md"], + "databricks-app-apx": ["backend-patterns.md", "best-practices.md", "frontend-patterns.md"], + "databricks-app-python": ["1-authorization.md", "2-app-resources.md", "3-frameworks.md", "4-deployment.md", "5-lakebase.md", "6-mcp-approach.md", "examples/llm_config.py", "examples/fm-minimal-chat.py", "examples/fm-parallel-calls.py", "examples/fm-structured-outputs.py"], + "databricks-jobs": ["task-types.md", "triggers-schedules.md", "notifications-monitoring.md", "examples.md"], + "databricks-python-sdk": ["doc-index.md", "examples/1-authentication.py", "examples/2-clusters-and-jobs.py", "examples/3-sql-and-warehouses.py", "examples/4-unity-catalog.py", "examples/5-serving-and-vector-search.py"], + "databricks-unity-catalog": ["5-system-tables.md"], + "databricks-lakebase-autoscale": ["projects.md", "branches.md", "computes.md", "connection-patterns.md", "reverse-etl.md"], + "databricks-lakebase-provisioned": ["connection-patterns.md", "reverse-etl.md"], + "databricks-metric-views": ["yaml-reference.md", "patterns.md"], + "databricks-model-serving": ["1-classical-ml.md", "2-custom-pyfunc.md", "3-genai-agents.md", "4-tools-integration.md", "5-development-testing.md", "6-logging-registration.md", "7-deployment.md", "8-querying-endpoints.md", "9-package-requirements.md"], + "databricks-mlflow-evaluation": ["references/CRITICAL-interfaces.md", "references/GOTCHAS.md", "references/patterns-context-optimization.md", "references/patterns-datasets.md", "references/patterns-evaluation.md", "references/patterns-scorers.md", "references/patterns-trace-analysis.md", "references/user-journeys.md"], + "databricks-spark-declarative-pipelines": ["1-ingestion-patterns.md", "2-streaming-patterns.md", "3-scd-patterns.md", "4-performance-tuning.md", "5-python-api.md", "6-dlt-migration.md", "7-advanced-configuration.md", "8-project-initialization.md"], + "databricks-spark-structured-streaming": ["checkpoint-best-practices.md", "kafka-streaming.md", "merge-operations.md", "multi-sink-writes.md", "stateful-operations.md", "stream-static-joins.md", "stream-stream-joins.md", "streaming-best-practices.md", "trigger-and-cost-optimization.md"], + "databricks-vector-search": ["index-types.md", "end-to-end-rag.md"], + "databricks-zerobus-ingest": ["1-setup-and-authentication.md", "2-python-client.md", "3-multilanguage-clients.md", "4-protobuf-schema.md", "5-operations-and-limits.md"], +} + +MLFLOW_EXTRA_FILES = { + "agent-evaluation": ["references/dataset-preparation.md", "references/scorers-constraints.md", "references/scorers.md", "references/setup-guide.md", "references/tracing-integration.md", "references/troubleshooting.md", "scripts/analyze_results.py", "scripts/create_dataset_template.py", "scripts/list_datasets.py", "scripts/run_evaluation_template.py", "scripts/setup_mlflow.py", "scripts/validate_agent_tracing.py", "scripts/validate_auth.py", "scripts/validate_environment.py", "scripts/validate_tracing_runtime.py"], + "analyze-mlflow-chat-session": ["scripts/discover_schema.sh", "scripts/inspect_turn.sh"], + "analyze-mlflow-trace": ["references/trace-structure.md"], + "instrumenting-with-mlflow-tracing": ["references/advanced-patterns.md", "references/distributed-tracing.md", "references/feedback-collection.md", "references/production.md", "references/python.md", "references/typescript.md"], + "querying-mlflow-metrics": ["references/api_reference.md", "scripts/fetch_metrics.py"], +} + +# ── Helpers ────────────────────────────────────────────────────────────────── + +def _download(url: str) -> bytes | None: + """Download a file from a URL. Returns bytes on success, None on failure.""" + try: + with urllib.request.urlopen(url, timeout=30) as resp: + return resp.read() + except Exception: + return None + + +def _upload(w: WorkspaceClient, workspace_path: str, content: bytes): + """Upload a file to the Databricks workspace.""" + import base64 + from databricks.sdk.service.workspace import ImportFormat + parent = posixpath.dirname(workspace_path) + w.workspace.mkdirs(parent) + w.workspace.import_( + path=workspace_path, + content=base64.b64encode(content).decode(), + format=ImportFormat.AUTO, + overwrite=True, + ) + + +def install_skill(w: WorkspaceClient, skill_name: str, base_url: str, extra_files: list[str], skills_path: str) -> bool: + """Download and upload one skill (SKILL.md + extra files).""" + skill_url = f"{base_url}/{skill_name}" + skill_md = _download(f"{skill_url}/SKILL.md") + if skill_md is None: + print(f" SKIP {skill_name} (could not download SKILL.md)") + return False + + dest = f"{skills_path}/{skill_name}" + _upload(w, f"{dest}/SKILL.md", skill_md) + uploaded = 1 + + for extra in extra_files: + data = _download(f"{skill_url}/{extra}") + if data is not None: + _upload(w, f"{dest}/{extra}", data) + uploaded += 1 + + print(f" OK {skill_name} ({uploaded} file{'s' if uploaded != 1 else ''})") + return True + + +# ── Main ───────────────────────────────────────────────────────────────────── + +w = WorkspaceClient() +username = w.current_user.me().user_name +skills_path = f"/Users/{username}/.assistant/skills" + +print(f"Username: {username}") +print(f"Target: {skills_path}") +print() + +# Determine which skills to install +if INSTALL_SKILLS == "all": + selected = DATABRICKS_SKILLS + MLFLOW_SKILLS + APX_SKILLS +else: + selected = INSTALL_SKILLS + +w.workspace.mkdirs(skills_path) + +installed = 0 +failed = 0 + +# Databricks skills +db_base = f"{REPO_RAW}/databricks-skills" +for skill in selected: + if skill in DATABRICKS_SKILLS: + extras = DATABRICKS_EXTRA_FILES.get(skill, []) + ok = install_skill(w, skill, db_base, extras, skills_path) + installed += ok + failed += (not ok) + +# MLflow skills +for skill in selected: + if skill in MLFLOW_SKILLS: + extras = MLFLOW_EXTRA_FILES.get(skill, []) + ok = install_skill(w, skill, MLFLOW_RAW, extras, skills_path) + installed += ok + failed += (not ok) + +# APX skills +for skill in selected: + if skill in APX_SKILLS: + extras = DATABRICKS_EXTRA_FILES.get(skill, []) + ok = install_skill(w, skill, APX_RAW, extras, skills_path) + installed += ok + failed += (not ok) + +print() +print(f"Done. {installed} skills installed, {failed} failed.") +print(f"Skills are at: /Workspace{skills_path}") + +# COMMAND ---------- + +# MAGIC %md +# MAGIC ## Verify Installation +# MAGIC +# MAGIC Run this cell to list the skills installed in your workspace. + +# COMMAND ---------- + +from databricks.sdk import WorkspaceClient + +w = WorkspaceClient() +username = w.current_user.me().user_name +skills_path = f"/Users/{username}/.assistant/skills" + +try: + entries = w.workspace.list(skills_path) + skill_dirs = sorted([e.path.split("/")[-1] for e in entries if e.is_directory]) + print(f"Found {len(skill_dirs)} skills in {skills_path}:\n") + for name in skill_dirs: + print(f" {name}") +except Exception as e: + print(f"Could not list skills: {e}") From 3337c3b97a7b2b7850f1d11c16dda0de3c7191d8 Mon Sep 17 00:00:00 2001 From: praneeth_paikray-data Date: Wed, 8 Apr 2026 16:47:16 +0530 Subject: [PATCH 2/3] Fix APX skill download URL and separate its extra files mapping The APX repo stores files at skills/apx/SKILL.md (flat), not in a skill-name subdirectory like the other repos. Added source_path parameter to install_skill() to handle this, and moved APX extra files to its own mapping matching get_apx_skill_extra_files() in install_skills.sh. Co-authored-by: Isaac --- .../install_genie_code_skills.py | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/databricks-skills/install_genie_code_skills.py b/databricks-skills/install_genie_code_skills.py index 4a0023ee..ab0593c7 100644 --- a/databricks-skills/install_genie_code_skills.py +++ b/databricks-skills/install_genie_code_skills.py @@ -74,7 +74,6 @@ "databricks-genie": ["spaces.md", "conversation.md"], "databricks-bundles": ["alerts_guidance.md", "SDP_guidance.md"], "databricks-iceberg": ["1-managed-iceberg-tables.md", "2-uniform-and-compatibility.md", "3-iceberg-rest-catalog.md", "4-snowflake-interop.md", "5-external-engine-interop.md"], - "databricks-app-apx": ["backend-patterns.md", "best-practices.md", "frontend-patterns.md"], "databricks-app-python": ["1-authorization.md", "2-app-resources.md", "3-frameworks.md", "4-deployment.md", "5-lakebase.md", "6-mcp-approach.md", "examples/llm_config.py", "examples/fm-minimal-chat.py", "examples/fm-parallel-calls.py", "examples/fm-structured-outputs.py"], "databricks-jobs": ["task-types.md", "triggers-schedules.md", "notifications-monitoring.md", "examples.md"], "databricks-python-sdk": ["doc-index.md", "examples/1-authentication.py", "examples/2-clusters-and-jobs.py", "examples/3-sql-and-warehouses.py", "examples/4-unity-catalog.py", "examples/5-serving-and-vector-search.py"], @@ -98,6 +97,10 @@ "querying-mlflow-metrics": ["references/api_reference.md", "scripts/fetch_metrics.py"], } +APX_EXTRA_FILES = { + "databricks-app-apx": ["backend-patterns.md", "frontend-patterns.md"], +} + # ── Helpers ────────────────────────────────────────────────────────────────── def _download(url: str) -> bytes | None: @@ -123,9 +126,16 @@ def _upload(w: WorkspaceClient, workspace_path: str, content: bytes): ) -def install_skill(w: WorkspaceClient, skill_name: str, base_url: str, extra_files: list[str], skills_path: str) -> bool: - """Download and upload one skill (SKILL.md + extra files).""" - skill_url = f"{base_url}/{skill_name}" +def install_skill(w: WorkspaceClient, skill_name: str, base_url: str, extra_files: list[str], skills_path: str, source_path: str | None = "") -> bool: + """Download and upload one skill (SKILL.md + extra files). + source_path: "" = use skill_name as subdirectory (default), None = files at base_url root, str = custom subdirectory. + """ + if source_path is None: + skill_url = base_url + elif source_path: + skill_url = f"{base_url}/{source_path}" + else: + skill_url = f"{base_url}/{skill_name}" skill_md = _download(f"{skill_url}/SKILL.md") if skill_md is None: print(f" SKIP {skill_name} (could not download SKILL.md)") @@ -183,11 +193,11 @@ def install_skill(w: WorkspaceClient, skill_name: str, base_url: str, extra_file installed += ok failed += (not ok) -# APX skills +# APX skills (files are at the repo root, not in a skill-name subdirectory) for skill in selected: if skill in APX_SKILLS: - extras = DATABRICKS_EXTRA_FILES.get(skill, []) - ok = install_skill(w, skill, APX_RAW, extras, skills_path) + extras = APX_EXTRA_FILES.get(skill, []) + ok = install_skill(w, skill, APX_RAW, extras, skills_path, source_path=None) installed += ok failed += (not ok) From 900d9656e429d5f5e9c0f0f53d231043d0c5695b Mon Sep 17 00:00:00 2001 From: praneeth_paikray-data Date: Mon, 13 Apr 2026 23:48:31 +0530 Subject: [PATCH 3/3] Auto-discover skills from GitHub instead of hardcoded lists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the hardcoded skill registries (DATABRICKS_SKILLS, MLFLOW_SKILLS, APX_SKILLS and their extra-file mappings) with dynamic discovery using the GitHub Git Trees API. This eliminates the need to maintain duplicate skill lists across install_skills.sh and this notebook. Also fixes the verify cell which used the non-existent `is_directory` attribute on ObjectInfo — now uses `object_type` comparison instead. Co-authored-by: Isaac --- .../install_genie_code_skills.py | 230 +++++++++--------- 1 file changed, 117 insertions(+), 113 deletions(-) diff --git a/databricks-skills/install_genie_code_skills.py b/databricks-skills/install_genie_code_skills.py index ab0593c7..cb7e6c44 100644 --- a/databricks-skills/install_genie_code_skills.py +++ b/databricks-skills/install_genie_code_skills.py @@ -7,6 +7,8 @@ # MAGIC Skills are installed to `/Workspace/Users//.assistant/skills/`. # MAGIC # MAGIC **How to use:** Run all cells top to bottom. Edit the configuration cell below if you want to install a subset of skills. +# MAGIC +# MAGIC Skills are auto-discovered from GitHub — no hardcoded lists to maintain. # COMMAND ---------- @@ -24,7 +26,7 @@ # Examples: # INSTALL_SKILLS = "all" # INSTALL_SKILLS = ["databricks-dbsql", "databricks-jobs", "databricks-unity-catalog"] -# INSTALL_SKILLS = ["databricks-agent-bricks", "databricks-vector-search"] +# INSTALL_SKILLS = ["databricks-agent-bricks", "agent-evaluation"] # Source branch or tag (change to pin a specific release) GITHUB_REF = "main" @@ -39,72 +41,37 @@ import urllib.request import json import posixpath +import base64 from databricks.sdk import WorkspaceClient +from databricks.sdk.service.workspace import ImportFormat -# ── Skill registry (synced with install_skills.sh) ────────────────────────── - -REPO_RAW = f"https://raw.githubusercontent.com/databricks-solutions/ai-dev-kit/{GITHUB_REF}" -MLFLOW_RAW = f"https://raw.githubusercontent.com/mlflow/skills/{GITHUB_REF}" -APX_RAW = f"https://raw.githubusercontent.com/databricks-solutions/apx/{GITHUB_REF}/skills/apx" - -DATABRICKS_SKILLS = [ - "databricks-agent-bricks", "databricks-ai-functions", "databricks-aibi-dashboards", - "databricks-bundles", "databricks-app-python", "databricks-config", "databricks-dbsql", - "databricks-docs", "databricks-genie", "databricks-iceberg", "databricks-jobs", - "databricks-lakebase-autoscale", "databricks-lakebase-provisioned", "databricks-metric-views", - "databricks-mlflow-evaluation", "databricks-model-serving", "databricks-python-sdk", - "databricks-execution-compute", "databricks-spark-declarative-pipelines", - "databricks-spark-structured-streaming", "databricks-synthetic-data-gen", - "databricks-unity-catalog", "databricks-unstructured-pdf-generation", - "databricks-vector-search", "databricks-zerobus-ingest", "spark-python-data-source", -] +# ── Skill sources ────────────────────────────────────────────────────────── +# Skills are auto-discovered: any subdirectory containing SKILL.md is a skill. -MLFLOW_SKILLS = [ - "agent-evaluation", "analyze-mlflow-chat-session", "analyze-mlflow-trace", - "instrumenting-with-mlflow-tracing", "mlflow-onboarding", "querying-mlflow-metrics", - "retrieving-mlflow-traces", "searching-mlflow-docs", +SKILL_SOURCES = [ + {"owner": "databricks-solutions", "repo": "ai-dev-kit", "path": "databricks-skills", + "skip": {"TEMPLATE"}}, + {"owner": "mlflow", "repo": "skills", "path": ""}, + {"owner": "databricks-solutions", "repo": "apx", "path": "skills", + "name_overrides": {"apx": "databricks-app-apx"}}, ] -APX_SKILLS = ["databricks-app-apx"] - -DATABRICKS_EXTRA_FILES = { - "databricks-agent-bricks": ["1-knowledge-assistants.md", "2-supervisor-agents.md"], - "databricks-ai-functions": ["1-task-functions.md", "2-ai-query.md", "3-ai-forecast.md", "4-document-processing-pipeline.md"], - "databricks-aibi-dashboards": ["widget-reference.md", "sql-patterns.md"], - "databricks-genie": ["spaces.md", "conversation.md"], - "databricks-bundles": ["alerts_guidance.md", "SDP_guidance.md"], - "databricks-iceberg": ["1-managed-iceberg-tables.md", "2-uniform-and-compatibility.md", "3-iceberg-rest-catalog.md", "4-snowflake-interop.md", "5-external-engine-interop.md"], - "databricks-app-python": ["1-authorization.md", "2-app-resources.md", "3-frameworks.md", "4-deployment.md", "5-lakebase.md", "6-mcp-approach.md", "examples/llm_config.py", "examples/fm-minimal-chat.py", "examples/fm-parallel-calls.py", "examples/fm-structured-outputs.py"], - "databricks-jobs": ["task-types.md", "triggers-schedules.md", "notifications-monitoring.md", "examples.md"], - "databricks-python-sdk": ["doc-index.md", "examples/1-authentication.py", "examples/2-clusters-and-jobs.py", "examples/3-sql-and-warehouses.py", "examples/4-unity-catalog.py", "examples/5-serving-and-vector-search.py"], - "databricks-unity-catalog": ["5-system-tables.md"], - "databricks-lakebase-autoscale": ["projects.md", "branches.md", "computes.md", "connection-patterns.md", "reverse-etl.md"], - "databricks-lakebase-provisioned": ["connection-patterns.md", "reverse-etl.md"], - "databricks-metric-views": ["yaml-reference.md", "patterns.md"], - "databricks-model-serving": ["1-classical-ml.md", "2-custom-pyfunc.md", "3-genai-agents.md", "4-tools-integration.md", "5-development-testing.md", "6-logging-registration.md", "7-deployment.md", "8-querying-endpoints.md", "9-package-requirements.md"], - "databricks-mlflow-evaluation": ["references/CRITICAL-interfaces.md", "references/GOTCHAS.md", "references/patterns-context-optimization.md", "references/patterns-datasets.md", "references/patterns-evaluation.md", "references/patterns-scorers.md", "references/patterns-trace-analysis.md", "references/user-journeys.md"], - "databricks-spark-declarative-pipelines": ["1-ingestion-patterns.md", "2-streaming-patterns.md", "3-scd-patterns.md", "4-performance-tuning.md", "5-python-api.md", "6-dlt-migration.md", "7-advanced-configuration.md", "8-project-initialization.md"], - "databricks-spark-structured-streaming": ["checkpoint-best-practices.md", "kafka-streaming.md", "merge-operations.md", "multi-sink-writes.md", "stateful-operations.md", "stream-static-joins.md", "stream-stream-joins.md", "streaming-best-practices.md", "trigger-and-cost-optimization.md"], - "databricks-vector-search": ["index-types.md", "end-to-end-rag.md"], - "databricks-zerobus-ingest": ["1-setup-and-authentication.md", "2-python-client.md", "3-multilanguage-clients.md", "4-protobuf-schema.md", "5-operations-and-limits.md"], -} - -MLFLOW_EXTRA_FILES = { - "agent-evaluation": ["references/dataset-preparation.md", "references/scorers-constraints.md", "references/scorers.md", "references/setup-guide.md", "references/tracing-integration.md", "references/troubleshooting.md", "scripts/analyze_results.py", "scripts/create_dataset_template.py", "scripts/list_datasets.py", "scripts/run_evaluation_template.py", "scripts/setup_mlflow.py", "scripts/validate_agent_tracing.py", "scripts/validate_auth.py", "scripts/validate_environment.py", "scripts/validate_tracing_runtime.py"], - "analyze-mlflow-chat-session": ["scripts/discover_schema.sh", "scripts/inspect_turn.sh"], - "analyze-mlflow-trace": ["references/trace-structure.md"], - "instrumenting-with-mlflow-tracing": ["references/advanced-patterns.md", "references/distributed-tracing.md", "references/feedback-collection.md", "references/production.md", "references/python.md", "references/typescript.md"], - "querying-mlflow-metrics": ["references/api_reference.md", "scripts/fetch_metrics.py"], -} - -APX_EXTRA_FILES = { - "databricks-app-apx": ["backend-patterns.md", "frontend-patterns.md"], -} # ── Helpers ────────────────────────────────────────────────────────────────── -def _download(url: str) -> bytes | None: - """Download a file from a URL. Returns bytes on success, None on failure.""" +def _github_api(url): + """Fetch JSON from the GitHub API.""" + req = urllib.request.Request(url, headers={"Accept": "application/vnd.github.v3+json"}) + try: + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read()) + except Exception as e: + print(f" WARN GitHub API error: {e}") + return None + + +def _download(url): + """Download raw file bytes. Returns bytes on success, None on failure.""" try: with urllib.request.urlopen(url, timeout=30) as resp: return resp.read() @@ -112,12 +79,9 @@ def _download(url: str) -> bytes | None: return None -def _upload(w: WorkspaceClient, workspace_path: str, content: bytes): +def _upload(w, workspace_path, content): """Upload a file to the Databricks workspace.""" - import base64 - from databricks.sdk.service.workspace import ImportFormat - parent = posixpath.dirname(workspace_path) - w.workspace.mkdirs(parent) + w.workspace.mkdirs(posixpath.dirname(workspace_path)) w.workspace.import_( path=workspace_path, content=base64.b64encode(content).decode(), @@ -126,32 +90,70 @@ def _upload(w: WorkspaceClient, workspace_path: str, content: bytes): ) -def install_skill(w: WorkspaceClient, skill_name: str, base_url: str, extra_files: list[str], skills_path: str, source_path: str | None = "") -> bool: - """Download and upload one skill (SKILL.md + extra files). - source_path: "" = use skill_name as subdirectory (default), None = files at base_url root, str = custom subdirectory. +def _discover_from_source(source, ref): + """Discover skills in a GitHub repo using the Git Trees API. + + Returns list of (install_name, raw_url_prefix, [extra_file_paths]). """ - if source_path is None: - skill_url = base_url - elif source_path: - skill_url = f"{base_url}/{source_path}" - else: - skill_url = f"{base_url}/{skill_name}" - skill_md = _download(f"{skill_url}/SKILL.md") + owner, repo = source["owner"], source["repo"] + base_path = source["path"] + overrides = source.get("name_overrides", {}) + skip = source.get("skip", set()) + + data = _github_api( + f"https://api.github.com/repos/{owner}/{repo}/git/trees/{ref}?recursive=1" + ) + if data is None: + return [] + + all_files = {item["path"] for item in data.get("tree", []) if item["type"] == "blob"} + prefix = f"{base_path}/" if base_path else "" + + # Find directories that directly contain SKILL.md + skill_dirs = set() + for f in all_files: + if not f.startswith(prefix): + continue + rel = f[len(prefix):] + parts = rel.split("/") + if len(parts) == 2 and parts[1] == "SKILL.md" and parts[0] not in skip: + skill_dirs.add(parts[0]) + + # Build result with extra files for each skill + raw_base = f"https://raw.githubusercontent.com/{owner}/{repo}/{ref}" + results = [] + for dir_name in sorted(skill_dirs): + skill_prefix = f"{prefix}{dir_name}/" + extras = sorted( + f[len(skill_prefix):] + for f in all_files + if f.startswith(skill_prefix) and not f.endswith("/SKILL.md") + ) + install_name = overrides.get(dir_name, dir_name) + source_url = f"{raw_base}/{prefix}{dir_name}" + results.append((install_name, source_url, extras)) + + return results + + +def _install_skill(w, name, source_url, extras, skills_path): + """Download and upload one skill (SKILL.md + extra files).""" + skill_md = _download(f"{source_url}/SKILL.md") if skill_md is None: - print(f" SKIP {skill_name} (could not download SKILL.md)") + print(f" SKIP {name} (could not download SKILL.md)") return False - dest = f"{skills_path}/{skill_name}" + dest = f"{skills_path}/{name}" _upload(w, f"{dest}/SKILL.md", skill_md) uploaded = 1 - for extra in extra_files: - data = _download(f"{skill_url}/{extra}") + for extra in extras: + data = _download(f"{source_url}/{extra}") if data is not None: _upload(w, f"{dest}/{extra}", data) uploaded += 1 - print(f" OK {skill_name} ({uploaded} file{'s' if uploaded != 1 else ''})") + print(f" OK {name} ({uploaded} file{'s' if uploaded != 1 else ''})") return True @@ -165,44 +167,39 @@ def install_skill(w: WorkspaceClient, skill_name: str, base_url: str, extra_file print(f"Target: {skills_path}") print() -# Determine which skills to install +# Discover skills from all sources +print("Discovering skills from GitHub...") +all_skills = [] +for source in SKILL_SOURCES: + discovered = _discover_from_source(source, GITHUB_REF) + label = f"{source['owner']}/{source['repo']}" + print(f" {label}: {len(discovered)} skills") + all_skills.extend(discovered) + +print(f"\nTotal: {len(all_skills)} skills available\n") + +# Filter to requested skills if INSTALL_SKILLS == "all": - selected = DATABRICKS_SKILLS + MLFLOW_SKILLS + APX_SKILLS + selected = all_skills else: - selected = INSTALL_SKILLS + wanted = set(INSTALL_SKILLS) + selected = [s for s in all_skills if s[0] in wanted] + missing = wanted - {s[0] for s in selected} + if missing: + print(f" WARN: skills not found in any source: {', '.join(sorted(missing))}\n") +# Install w.workspace.mkdirs(skills_path) installed = 0 failed = 0 - -# Databricks skills -db_base = f"{REPO_RAW}/databricks-skills" -for skill in selected: - if skill in DATABRICKS_SKILLS: - extras = DATABRICKS_EXTRA_FILES.get(skill, []) - ok = install_skill(w, skill, db_base, extras, skills_path) - installed += ok - failed += (not ok) - -# MLflow skills -for skill in selected: - if skill in MLFLOW_SKILLS: - extras = MLFLOW_EXTRA_FILES.get(skill, []) - ok = install_skill(w, skill, MLFLOW_RAW, extras, skills_path) - installed += ok - failed += (not ok) - -# APX skills (files are at the repo root, not in a skill-name subdirectory) -for skill in selected: - if skill in APX_SKILLS: - extras = APX_EXTRA_FILES.get(skill, []) - ok = install_skill(w, skill, APX_RAW, extras, skills_path, source_path=None) - installed += ok - failed += (not ok) +for name, source_url, extras in selected: + ok = _install_skill(w, name, source_url, extras, skills_path) + installed += ok + failed += not ok print() -print(f"Done. {installed} skills installed, {failed} failed.") +print(f"Done. {installed} installed, {failed} failed.") print(f"Skills are at: /Workspace{skills_path}") # COMMAND ---------- @@ -221,10 +218,17 @@ def install_skill(w: WorkspaceClient, skill_name: str, base_url: str, extra_file skills_path = f"/Users/{username}/.assistant/skills" try: - entries = w.workspace.list(skills_path) - skill_dirs = sorted([e.path.split("/")[-1] for e in entries if e.is_directory]) - print(f"Found {len(skill_dirs)} skills in {skills_path}:\n") - for name in skill_dirs: - print(f" {name}") + entries = list(w.workspace.list(skills_path)) + subdirs = sorted([ + e.path.split("/")[-1] + for e in entries + if str(e.object_type) == "ObjectType.DIRECTORY" + ]) + if subdirs: + print(f"Found {len(subdirs)} skills in {skills_path}:\n") + for name in subdirs: + print(f" {name}") + else: + print(f"No skills found in {skills_path}.") except Exception as e: print(f"Could not list skills: {e}")