From 65bbb3604e60537f86ebd68f436de343ce42c239 Mon Sep 17 00:00:00 2001 From: Anton Nekipelov <226657+anton-107@users.noreply.github.com> Date: Fri, 12 Jun 2026 17:20:25 +0200 Subject: [PATCH 1/2] Add experimental managed Python environment provisioning with uv Behind the "python.managedEnvironment" experimental setting (databricks.experiments.optInto), the environment setup flow provisions a working environment with uv instead of walking the user through manual steps: locate uv (PATH first, then a pinned SHA256-verified download into global storage), ensure a Python interpreter matching the selected compute, create the project .venv (seeded with pip), install databricks-connect matched to the compute plus the project's own requirements, and select the interpreter in the MS Python extension so run and debug agree. Environments the user created are respected: a satisfied environment is only selected, never modified, and a foreign .venv is only repaired or recreated after an explicit prompt. Venvs created by the extension are tagged with a marker file and can be repaired or recreated silently. Failures are classified (network blocked, interpreter unavailable, disk, cancelled) into actionable messages with a Retry button. Custom package indexes are honored via UV_INDEX_URL, PIP_INDEX_URL and pip.conf. Each funnel step emits a managedEnvironmentSetup telemetry event with duration and failure class. With the setting off (default) the setup flow is unchanged. Co-authored-by: Isaac --- packages/databricks-vscode/package.json | 6 +- packages/databricks-vscode/src/extension.ts | 13 +- .../src/language/EnvironmentCommands.test.ts | 121 +++ .../src/language/EnvironmentCommands.ts | 47 +- .../language/EnvironmentProvisioner.test.ts | 358 +++++++++ .../src/language/EnvironmentProvisioner.ts | 729 ++++++++++++++++++ .../src/language/UvBinaryProvider.test.ts | 101 +++ .../src/language/UvBinaryProvider.ts | 173 +++++ .../src/language/computeTargetSpec.test.ts | 55 +- .../src/language/computeTargetSpec.ts | 47 ++ .../src/telemetry/constants.ts | 29 + .../src/vscode-objs/WorkspaceConfigs.ts | 6 + 12 files changed, 1679 insertions(+), 6 deletions(-) create mode 100644 packages/databricks-vscode/src/language/EnvironmentCommands.test.ts create mode 100644 packages/databricks-vscode/src/language/EnvironmentProvisioner.test.ts create mode 100644 packages/databricks-vscode/src/language/EnvironmentProvisioner.ts create mode 100644 packages/databricks-vscode/src/language/UvBinaryProvider.test.ts create mode 100644 packages/databricks-vscode/src/language/UvBinaryProvider.ts diff --git a/packages/databricks-vscode/package.json b/packages/databricks-vscode/package.json index 3389e5edd..387f9086e 100644 --- a/packages/databricks-vscode/package.json +++ b/packages/databricks-vscode/package.json @@ -1407,11 +1407,13 @@ "items": { "enum": [ "views.cluster", - "views.workspace" + "views.workspace", + "python.managedEnvironment" ], "enumDescriptions": [ "Show cluster view in the explorer.", - "Show workspace browser in the explorer." + "Show workspace browser in the explorer.", + "Let the extension automatically provision a Python environment matching the selected compute when setting up Databricks Connect." ], "type": "string" }, diff --git a/packages/databricks-vscode/src/extension.ts b/packages/databricks-vscode/src/extension.ts index c50503da5..4a126e0c8 100644 --- a/packages/databricks-vscode/src/extension.ts +++ b/packages/databricks-vscode/src/extension.ts @@ -75,6 +75,8 @@ import {BundleVariableTreeDataProvider} from "./ui/bundle-variables/BundleVariab import {ConfigurationTreeViewManager} from "./ui/configuration-view/ConfigurationTreeViewManager"; import {getCLIDependenciesEnvVars} from "./utils/envVarGenerators"; import {EnvironmentCommands} from "./language/EnvironmentCommands"; +import {EnvironmentProvisioner} from "./language/EnvironmentProvisioner"; +import {UvBinaryProvider} from "./language/UvBinaryProvider"; import {WorkspaceFolderManager} from "./vscode-objs/WorkspaceFolderManager"; import {SyncCommands} from "./sync/SyncCommands"; import {CodeSynchronizer} from "./sync"; @@ -601,12 +603,21 @@ export async function activate( configureAutocomplete ) ); + const environmentProvisioner = new EnvironmentProvisioner( + connectionManager, + pythonExtensionWrapper, + workspaceFolderManager, + new UvBinaryProvider(context), + telemetry + ); const environmentCommands = new EnvironmentCommands( featureManager, pythonExtensionWrapper, - environmentDependenciesInstaller + environmentDependenciesInstaller, + environmentProvisioner ); context.subscriptions.push( + environmentProvisioner, telemetry.registerCommand( "databricks.environment.setup", environmentCommands.setup, diff --git a/packages/databricks-vscode/src/language/EnvironmentCommands.test.ts b/packages/databricks-vscode/src/language/EnvironmentCommands.test.ts new file mode 100644 index 000000000..22a832e63 --- /dev/null +++ b/packages/databricks-vscode/src/language/EnvironmentCommands.test.ts @@ -0,0 +1,121 @@ +import * as assert from "assert"; +import {anything, instance, mock, verify, when} from "ts-mockito"; +import {EnvironmentCommands} from "./EnvironmentCommands"; +import { + FeatureManager, + FeatureState, + FeatureStepState, +} from "../feature-manager/FeatureManager"; +import {MsPythonExtensionWrapper} from "./MsPythonExtensionWrapper"; +import {EnvironmentDependenciesInstaller} from "./EnvironmentDependenciesInstaller"; +import {EnvironmentProvisioner} from "./EnvironmentProvisioner"; + +function makeState(steps: Array & {id: string}>) { + const state: FeatureState = { + available: steps.every((s) => s.available), + steps: new Map( + steps.map((s) => [ + s.id, + {available: false, ...s} as FeatureStepState, + ]) + ), + }; + return state; +} + +describe(__filename, () => { + let featureManagerMock: FeatureManager; + let provisionerMock: EnvironmentProvisioner; + let commands: EnvironmentCommands; + + beforeEach(() => { + featureManagerMock = mock(FeatureManager); + provisionerMock = mock(EnvironmentProvisioner); + commands = new EnvironmentCommands( + instance(featureManagerMock), + instance(mock(MsPythonExtensionWrapper)), + instance(mock(EnvironmentDependenciesInstaller)), + instance(provisionerMock) + ); + }); + + describe("shouldProvision", () => { + it("should provision when only the python environment is failing", () => { + const state = makeState([ + {id: "checkCluster", available: true}, + {id: "checkPythonEnvironment", available: false}, + {id: "checkEnvironmentDependencies", available: false}, + ]); + assert.strictEqual(commands.shouldProvision(state), true); + }); + + it("should not provision when the cluster step is failing", () => { + const state = makeState([ + {id: "checkCluster", available: false}, + {id: "checkPythonEnvironment", available: false}, + ]); + assert.strictEqual(commands.shouldProvision(state), false); + }); + + it("should not provision when everything is available", () => { + const state = makeState([ + {id: "checkCluster", available: true}, + {id: "checkPythonEnvironment", available: true}, + ]); + assert.strictEqual(commands.shouldProvision(state), false); + }); + + it("should ignore failing optional steps", () => { + const state = makeState([ + {id: "checkPythonEnvironment", available: false}, + {id: "checkBuiltins", available: false, optional: true}, + ]); + assert.strictEqual(commands.shouldProvision(state), true); + }); + + it("should respect the requested step", () => { + const state = makeState([ + {id: "checkPythonEnvironment", available: false}, + ]); + assert.strictEqual( + commands.shouldProvision(state, "checkPythonEnvironment"), + true + ); + assert.strictEqual( + commands.shouldProvision(state, "checkCluster"), + false + ); + }); + }); + + describe("_setup", () => { + it("should provision when the experiment is enabled", async () => { + when(featureManagerMock.isEnabled(anything())).thenResolve( + makeState([{id: "checkPythonEnvironment", available: false}]) + ); + when(featureManagerMock.isEnabled(anything(), true)).thenResolve( + makeState([{id: "checkPythonEnvironment", available: true}]) + ); + when(provisionerMock.enabled).thenReturn(true); + when(provisionerMock.ensureEnvironment()).thenResolve({ + success: true, + }); + + await commands["_setup"](); + + verify(provisionerMock.ensureEnvironment()).once(); + verify(featureManagerMock.isEnabled(anything(), true)).once(); + }); + + it("should not provision when the experiment is disabled", async () => { + when(featureManagerMock.isEnabled(anything())).thenResolve( + makeState([{id: "checkPythonEnvironment", available: true}]) + ); + when(provisionerMock.enabled).thenReturn(false); + + await commands["_setup"](); + + verify(provisionerMock.ensureEnvironment()).never(); + }); + }); +}); diff --git a/packages/databricks-vscode/src/language/EnvironmentCommands.ts b/packages/databricks-vscode/src/language/EnvironmentCommands.ts index 4d6ba30c8..a6b4ea20d 100644 --- a/packages/databricks-vscode/src/language/EnvironmentCommands.ts +++ b/packages/databricks-vscode/src/language/EnvironmentCommands.ts @@ -1,6 +1,7 @@ import {window, commands, QuickPickItem, ProgressLocation} from "vscode"; import { FeatureManager, + FeatureState, FeatureStepState, } from "../feature-manager/FeatureManager"; import {MsPythonExtensionWrapper} from "./MsPythonExtensionWrapper"; @@ -8,12 +9,19 @@ import {Cluster} from "../sdk-extensions"; import {EnvironmentDependenciesInstaller} from "./EnvironmentDependenciesInstaller"; import {Environment} from "./MsPythonExtensionApi"; import {environmentName} from "../utils/environmentUtils"; +import {EnvironmentProvisioner} from "./EnvironmentProvisioner"; + +const provisionableSteps = [ + "checkPythonEnvironment", + "checkEnvironmentDependencies", +]; export class EnvironmentCommands { constructor( private featureManager: FeatureManager, private pythonExtension: MsPythonExtensionWrapper, - private installer: EnvironmentDependenciesInstaller + private installer: EnvironmentDependenciesInstaller, + private provisioner?: EnvironmentProvisioner ) {} async setup(stepId?: string) { @@ -40,6 +48,20 @@ export class EnvironmentCommands { let state = await this.featureManager.isEnabled( "environment.dependencies" ); + if (this.provisioner?.enabled && this.shouldProvision(state, stepId)) { + const result = await this.provisioner.ensureEnvironment(); + if (!result.noOp) { + state = await this.checkEnvironmentDependencies(); + if (state.available || !result.success) { + // On provisioning failures the provisioner already showed + // an actionable error with a retry option. + this.reportSetupOutcome(state, !result.success); + return; + } + } + // noOp (or an inconsistent result): fall through to the manual + // per-step setup flow. + } for (const [, s] of state.steps) { if (!s.available && (!stepId || s.id === stepId) && s.action) { // Take an action of a failed step and re-check all steps state afterwards. @@ -53,11 +75,32 @@ export class EnvironmentCommands { break; } } + this.reportSetupOutcome(state); + } + + /** + * The managed flow can only fix the python environment and its + * dependencies: cluster and workspace problems keep the manual flow. + */ + shouldProvision(state: FeatureState, stepId?: string): boolean { + if (stepId && !provisionableSteps.includes(stepId)) { + return false; + } + const failingSteps = Array.from(state.steps.values()).filter( + (s) => !s.available && !s.optional + ); + return ( + failingSteps.length > 0 && + failingSteps.every((s) => provisionableSteps.includes(s.id)) + ); + } + + private reportSetupOutcome(state: FeatureState, quiet = false) { if (state.available) { window.showInformationMessage( "Python environment and Databricks Connect are set up." ); - } else { + } else if (!quiet) { const detail = Array.from(state.steps.values()) .filter( (s) => !s.available && !s.optional && (s.message || s.title) diff --git a/packages/databricks-vscode/src/language/EnvironmentProvisioner.test.ts b/packages/databricks-vscode/src/language/EnvironmentProvisioner.test.ts new file mode 100644 index 000000000..2b41b65fc --- /dev/null +++ b/packages/databricks-vscode/src/language/EnvironmentProvisioner.test.ts @@ -0,0 +1,358 @@ +import * as assert from "assert"; +import path from "node:path"; +import fs from "node:fs"; +import os from "node:os"; +import {Uri} from "vscode"; +import {anything, instance, mock, when} from "ts-mockito"; +import { + EnvironmentProvisioner, + ProvisionError, + buildProvisionEnv, + classifyProvisionFailure, + readPipIndexUrl, + venvPythonExecutable, +} from "./EnvironmentProvisioner"; +import {ConnectionManager} from "../configuration/ConnectionManager"; +import {MsPythonExtensionWrapper} from "./MsPythonExtensionWrapper"; +import {WorkspaceFolderManager} from "../vscode-objs/WorkspaceFolderManager"; +import {UvBinaryProvider} from "./UvBinaryProvider"; +import {Telemetry} from "../telemetry"; +import {cancellableExecFile} from "../cli/CliWrapper"; + +describe(__filename, () => { + describe("venvPythonExecutable", () => { + it("should use bin/python on posix", () => { + assert.strictEqual( + venvPythonExecutable("/p/.venv", "darwin"), + path.join("/p/.venv", "bin", "python") + ); + }); + it("should use Scripts/python.exe on windows", () => { + assert.strictEqual( + venvPythonExecutable("C:\\p\\.venv", "win32"), + path.join("C:\\p\\.venv", "Scripts", "python.exe") + ); + }); + }); + + /* eslint-disable @typescript-eslint/naming-convention */ + describe("buildProvisionEnv", () => { + it("should map PIP_INDEX_URL to UV_INDEX_URL", () => { + const env = buildProvisionEnv({ + PIP_INDEX_URL: "https://mirror.corp/simple", + }); + assert.strictEqual(env.UV_INDEX_URL, "https://mirror.corp/simple"); + }); + it("should not override an explicit UV_INDEX_URL", () => { + const env = buildProvisionEnv({ + PIP_INDEX_URL: "https://pip.corp/simple", + UV_INDEX_URL: "https://uv.corp/simple", + }); + assert.strictEqual(env.UV_INDEX_URL, "https://uv.corp/simple"); + }); + it("should fall back to the pip.conf index url", () => { + const env = buildProvisionEnv({}, "https://conf.corp/simple"); + assert.strictEqual(env.UV_INDEX_URL, "https://conf.corp/simple"); + }); + it("should leave UV_INDEX_URL unset without custom indexes", () => { + const env = buildProvisionEnv({}); + assert.strictEqual(env.UV_INDEX_URL, undefined); + }); + }); + + describe("readPipIndexUrl", () => { + it("should read index-url from PIP_CONFIG_FILE", () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), "pipconf-")); + const confPath = path.join(dir, "pip.conf"); + fs.writeFileSync( + confPath, + "[global]\ntimeout = 60\nindex-url = https://mirror.corp/simple\n" + ); + try { + assert.strictEqual( + readPipIndexUrl("darwin", {PIP_CONFIG_FILE: confPath}, dir), + "https://mirror.corp/simple" + ); + } finally { + fs.rmSync(dir, {recursive: true, force: true}); + } + }); + it("should return undefined when no pip.conf exists", () => { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), "pipconf-")); + try { + assert.strictEqual( + readPipIndexUrl("darwin", {}, dir), + undefined + ); + } finally { + fs.rmSync(dir, {recursive: true, force: true}); + } + }); + }); + /* eslint-enable @typescript-eslint/naming-convention */ + + describe("classifyProvisionFailure", () => { + const cases: Array<{message: string; expected: string}> = [ + { + message: + "error sending request for url (https://pypi.org/simple/databricks-connect/)", + expected: "networkBlocked", + }, + { + message: "connect ECONNREFUSED 104.16.0.1:443", + expected: "networkBlocked", + }, + { + message: "No download found for request: cpython-3.12", + expected: "pythonUnavailable", + }, + {message: "ENOSPC: no space left on device", expected: "disk"}, + {message: "something exploded", expected: "unknown"}, + ]; + for (const {message, expected} of cases) { + it(`should classify "${message.slice( + 0, + 40 + )}..." as ${expected}`, () => { + assert.strictEqual( + classifyProvisionFailure(new Error(message)), + expected + ); + }); + } + it("should keep the class of ProvisionError", () => { + assert.strictEqual( + classifyProvisionFailure( + new ProvisionError("uv missing", "uvUnavailable") + ), + "uvUnavailable" + ); + }); + }); + + describe("ensureEnvironment", () => { + let projectDir: string; + let venvDir: string; + let execCalls: string[][] = []; + let selectedInterpreter: string | undefined; + let foreignVenvChoice: "repair" | "recreate" | "manual" = "manual"; + /** keyed responses for the python version/dbconnect probes */ + let venvPythonVersion: string | undefined; + let installedDbconnect: string | undefined; + + class TestProvisioner extends EnvironmentProvisioner { + protected override async promptForeignVenv(): Promise< + "repair" | "recreate" | "manual" + > { + return foreignVenvChoice; + } + } + + const fakeExec: typeof cancellableExecFile = async (file, args) => { + execCalls.push([file, ...args]); + const code = args.includes("-c") ? args[args.length - 1] : ""; + if (code.includes("sys.version_info")) { + if (!venvPythonVersion) { + throw new Error("broken interpreter"); + } + return {stdout: `${venvPythonVersion}\n`, stderr: ""}; + } + if (code.includes("databricks-connect")) { + if (!installedDbconnect) { + throw new Error( + "importlib.metadata.PackageNotFoundError: databricks-connect" + ); + } + return {stdout: `${installedDbconnect}\n`, stderr: ""}; + } + if (args[0] === "venv") { + fs.mkdirSync(path.join(venvDir, "bin"), {recursive: true}); + fs.writeFileSync(path.join(venvDir, "bin", "python"), ""); + } + return {stdout: "", stderr: ""}; + }; + + function createProvisioner( + execFn: typeof cancellableExecFile = fakeExec + ) { + const connectionManagerMock = mock(ConnectionManager); + when(connectionManagerMock.serverless).thenReturn(true); + when(connectionManagerMock.cluster).thenReturn(undefined); + + const uvProviderMock = mock(UvBinaryProvider); + when(uvProviderMock.getUvPath(anything())).thenResolve("uv"); + + const pythonExtensionFake = { + api: { + environments: { + refreshEnvironments: async () => {}, + updateActiveEnvironmentPath: async (p: string) => { + selectedInterpreter = p; + }, + }, + }, + } as unknown as MsPythonExtensionWrapper; + + return new TestProvisioner( + instance(connectionManagerMock), + pythonExtensionFake, + { + activeProjectUri: Uri.file(projectDir), + } as unknown as WorkspaceFolderManager, + instance(uvProviderMock), + instance(mock(Telemetry)), + execFn + ); + } + + function uvCommands() { + return execCalls + .filter(([file]) => file === "uv") + .map((call) => call.slice(1, 3).join(" ")); + } + + beforeEach(() => { + projectDir = fs.mkdtempSync(path.join(os.tmpdir(), "dbx-proj-")); + venvDir = path.join(projectDir, ".venv"); + execCalls = []; + selectedInterpreter = undefined; + venvPythonVersion = undefined; + installedDbconnect = undefined; + foreignVenvChoice = "manual"; + }); + + afterEach(() => { + fs.rmSync(projectDir, {recursive: true, force: true}); + }); + + it("should create the venv and install dependencies in a fresh project", async () => { + fs.writeFileSync( + path.join(projectDir, "requirements.txt"), + "pandas\n" + ); + const provisioner = createProvisioner(); + + const result = await provisioner.ensureEnvironment(); + + assert.strictEqual(result.success, true); + assert.deepStrictEqual(uvCommands(), [ + "python find", + "venv " + venvDir, + "pip install", + "pip install", + ]); + const dbconnectInstall = execCalls.find((c) => + c.some((a) => a.startsWith("databricks-connect==")) + ); + assert.ok(dbconnectInstall); + assert.ok(dbconnectInstall.includes("databricks-connect==17.3.*")); + assert.ok(dbconnectInstall.includes("nbformat")); + const requirementsInstall = execCalls.find((c) => c.includes("-r")); + assert.ok(requirementsInstall); + assert.ok( + requirementsInstall.includes( + path.join(projectDir, "requirements.txt") + ) + ); + assert.strictEqual( + selectedInterpreter, + venvPythonExecutable(venvDir) + ); + const marker = JSON.parse( + fs.readFileSync(path.join(venvDir, "databricks.json"), "utf-8") + ); + assert.strictEqual(marker.createdBy, "databricks-vscode"); + assert.strictEqual(marker.pythonVersion, "3.12"); + }); + + it("should recreate a managed venv with the wrong python version", async () => { + fs.mkdirSync(path.join(venvDir, "bin"), {recursive: true}); + fs.writeFileSync(path.join(venvDir, "bin", "python"), ""); + fs.writeFileSync( + path.join(venvDir, "databricks.json"), + JSON.stringify({createdBy: "databricks-vscode"}) + ); + venvPythonVersion = "3.10"; + const provisioner = createProvisioner(); + + const result = await provisioner.ensureEnvironment(); + + assert.strictEqual(result.success, true); + assert.ok(uvCommands().includes("venv " + venvDir)); + }); + + it("should only install dependencies into a managed venv with a matching python", async () => { + fs.mkdirSync(path.join(venvDir, "bin"), {recursive: true}); + fs.writeFileSync(path.join(venvDir, "bin", "python"), ""); + fs.writeFileSync( + path.join(venvDir, "databricks.json"), + JSON.stringify({createdBy: "databricks-vscode"}) + ); + venvPythonVersion = "3.12"; + const provisioner = createProvisioner(); + + const result = await provisioner.ensureEnvironment(); + + assert.strictEqual(result.success, true); + assert.ok(!uvCommands().includes("venv " + venvDir)); + assert.ok(uvCommands().includes("pip install")); + }); + + it("should not touch a foreign venv when the user chooses manual setup", async () => { + fs.mkdirSync(path.join(venvDir, "bin"), {recursive: true}); + fs.writeFileSync(path.join(venvDir, "bin", "python"), ""); + venvPythonVersion = "3.10"; + foreignVenvChoice = "manual"; + const provisioner = createProvisioner(); + + const result = await provisioner.ensureEnvironment(); + + assert.strictEqual(result.success, false); + assert.strictEqual(result.noOp, true); + assert.deepStrictEqual(uvCommands(), []); + assert.ok(!fs.existsSync(path.join(venvDir, "databricks.json"))); + }); + + it("should select a satisfied foreign venv without modifying it", async () => { + fs.mkdirSync(path.join(venvDir, "bin"), {recursive: true}); + fs.writeFileSync(path.join(venvDir, "bin", "python"), ""); + venvPythonVersion = "3.12"; + installedDbconnect = "17.3.2"; + const provisioner = createProvisioner(); + + const result = await provisioner.ensureEnvironment(); + + assert.strictEqual(result.success, true); + assert.deepStrictEqual(uvCommands(), []); + assert.strictEqual( + selectedInterpreter, + venvPythonExecutable(venvDir) + ); + assert.ok(!fs.existsSync(path.join(venvDir, "databricks.json"))); + }); + + it("should clean up a venv it created when installation fails", async () => { + const failingExec: typeof cancellableExecFile = async ( + file, + args, + options, + token + ) => { + if (args[0] === "pip") { + throw new Error( + "error sending request for url (https://pypi.org/simple/)" + ); + } + return fakeExec(file, args, options, token); + }; + const provisioner = createProvisioner(failingExec); + + const result = await provisioner.ensureEnvironment(); + + assert.strictEqual(result.success, false); + assert.strictEqual(result.failureClass, "networkBlocked"); + assert.strictEqual(result.failedStep, "depsInstall"); + assert.ok(!fs.existsSync(venvDir)); + }); + }); +}); diff --git a/packages/databricks-vscode/src/language/EnvironmentProvisioner.ts b/packages/databricks-vscode/src/language/EnvironmentProvisioner.ts new file mode 100644 index 000000000..f425c5ab0 --- /dev/null +++ b/packages/databricks-vscode/src/language/EnvironmentProvisioner.ts @@ -0,0 +1,729 @@ +import { + CancellationToken, + Disposable, + OutputChannel, + Progress, + ProgressLocation, + commands, + window, +} from "vscode"; +import path from "node:path"; +import fs from "node:fs"; +import os from "node:os"; +import {ConnectionManager} from "../configuration/ConnectionManager"; +import {MsPythonExtensionWrapper} from "./MsPythonExtensionWrapper"; +import {WorkspaceFolderManager} from "../vscode-objs/WorkspaceFolderManager"; +import {UvBinaryProvider} from "./UvBinaryProvider"; +import {workspaceConfigs} from "../vscode-objs/WorkspaceConfigs"; +import {ComputeTargetSpec, resolveComputeTargetSpec} from "./computeTargetSpec"; +import {cancellableExecFile} from "../cli/CliWrapper"; +import {Mutex} from "../locking"; +import {Events, Telemetry} from "../telemetry"; +import {NamedLogger} from "@databricks/sdk-experimental/dist/logging"; +import {Loggers} from "../logger"; + +export type ProvisionFailureClass = + | "networkBlocked" + | "uvUnavailable" + | "pythonUnavailable" + | "disk" + | "cancelled" + | "unknown"; + +export type ProvisionStep = + | "uvAcquire" + | "pythonInstall" + | "venvCreate" + | "depsInstall" + | "interpreterSet"; + +export type VenvDisposition = "satisfied" | "repair" | "recreate" | "absent"; + +export interface ProvisionResult { + success: boolean; + /** + * True when the provisioner decided not to act (unsupported compute, + * user chose manual setup): callers should fall back to the manual + * setup flow. + */ + noOp?: boolean; + failureClass?: ProvisionFailureClass; + failedStep?: ProvisionStep; + message?: string; +} + +export class ProvisionError extends Error { + constructor( + message: string, + public readonly failureClass: ProvisionFailureClass + ) { + super(message); + } +} + +class ProvisionStepError extends Error { + constructor( + public readonly step: ProvisionStep, + public readonly cause: unknown + ) { + super(cause instanceof Error ? cause.message : String(cause)); + } +} + +export function venvPythonExecutable( + venvDir: string, + platform: NodeJS.Platform = process.platform +): string { + return platform === "win32" + ? path.join(venvDir, "Scripts", "python.exe") + : path.join(venvDir, "bin", "python"); +} + +/** + * uv doesn't read pip configuration files, so we look up a custom index-url + * configured for pip and pass it to uv explicitly. + */ +export function readPipIndexUrl( + platform: NodeJS.Platform = process.platform, + env: NodeJS.ProcessEnv = process.env, + homeDir: string = os.homedir() +): string | undefined { + const candidates: string[] = []; + if (env.PIP_CONFIG_FILE) { + candidates.push(env.PIP_CONFIG_FILE); + } + if (platform === "win32") { + if (env.APPDATA) { + candidates.push(path.join(env.APPDATA, "pip", "pip.ini")); + } + candidates.push(path.join(homeDir, "pip", "pip.ini")); + } else { + candidates.push( + path.join( + env.XDG_CONFIG_HOME ?? path.join(homeDir, ".config"), + "pip", + "pip.conf" + ), + path.join(homeDir, ".pip", "pip.conf"), + "/etc/pip.conf" + ); + } + for (const candidate of candidates) { + let content: string; + try { + content = fs.readFileSync(candidate, "utf-8"); + } catch { + continue; + } + const match = content.match(/^\s*index-url\s*=\s*(\S+)/m); + if (match) { + return match[1]; + } + } + return undefined; +} + +/** + * Environment for uv child processes: forwards the user's proxy and index + * settings and maps pip's index configuration to uv's, which doesn't read + * PIP_* variables or pip.conf. + */ +export function buildProvisionEnv( + baseEnv: NodeJS.ProcessEnv = process.env, + pipIndexUrl: string | undefined = undefined +): NodeJS.ProcessEnv { + const env = {...baseEnv}; + if (!env.UV_INDEX_URL) { + const indexUrl = env.PIP_INDEX_URL ?? pipIndexUrl; + if (indexUrl) { + env.UV_INDEX_URL = indexUrl; + } + } + return env; +} + +export function classifyProvisionFailure(e: unknown): ProvisionFailureClass { + if (e instanceof ProvisionError) { + return e.failureClass; + } + const message = e instanceof Error ? e.message : String(e); + if ( + (e instanceof Error && e.name === "AbortError") || + message.includes("ABORT_ERR") + ) { + return "cancelled"; + } + if (/ENOSPC|[Nn]o space left/.test(message)) { + return "disk"; + } + if ( + /[Nn]o download found for|[Nn]o interpreter found for|managed Python download/.test( + message + ) + ) { + return "pythonUnavailable"; + } + if ( + /error sending request|[Cc]onnection (refused|reset)|certificate|tls|timed out|ENOTFOUND|ECONNREFUSED|ETIMEDOUT|EAI_AGAIN|proxy|403|407|503/.test( + message + ) + ) { + return "networkBlocked"; + } + return "unknown"; +} + +const failureMessages: Record = { + networkBlocked: + "Your network seems to block the package index or interpreter downloads. " + + "If you use a proxy or a package mirror, set HTTPS_PROXY, UV_INDEX_URL " + + "(or pip.conf index-url) and UV_PYTHON_INSTALL_MIRROR, then retry.", + uvUnavailable: + "The extension could not find or download the uv tool used to set up " + + "Python environments. Install uv (https://docs.astral.sh/uv/) and retry, " + + "or set up the environment manually.", + pythonUnavailable: + "A matching Python interpreter is not available and could not be downloaded. " + + "Install the required Python version and retry.", + disk: "Not enough disk space to set up the Python environment.", + cancelled: "", + unknown: "Failed to set up the Python environment.", +}; + +interface VenvAssessment { + disposition: VenvDisposition | "manual"; + pythonMatches: boolean; + /** Whether .venv was created by this extension (has our marker file) */ + managed: boolean; +} + +/** + * Provisions a Python environment matching the selected compute using uv: + * downloads a suitable interpreter, creates the project .venv, installs + * databricks-connect and the project's own dependencies, and selects the + * interpreter in the MS Python extension. Gated behind the + * "python.managedEnvironment" experimental setting. + */ +export class EnvironmentProvisioner implements Disposable { + private readonly logger = NamedLogger.getOrCreate(Loggers.Extension); + private readonly mutex = new Mutex(); + private readonly disposables: Disposable[] = []; + private _outputChannel?: OutputChannel; + + constructor( + private readonly connectionManager: ConnectionManager, + private readonly pythonExtension: MsPythonExtensionWrapper, + private readonly workspaceFolderManager: WorkspaceFolderManager, + private readonly uvProvider: UvBinaryProvider, + private readonly telemetry: Telemetry, + private readonly execFn: typeof cancellableExecFile = cancellableExecFile + ) {} + + get enabled(): boolean { + return workspaceConfigs.managedPythonEnvironmentEnabled; + } + + private get outputChannel() { + if (!this._outputChannel) { + this._outputChannel = window.createOutputChannel( + "Databricks Python Environment" + ); + this.disposables.push(this._outputChannel); + } + return this._outputChannel; + } + + private get projectRoot() { + return this.workspaceFolderManager.activeProjectUri.fsPath; + } + + private get venvDir() { + return path.join(this.projectRoot, ".venv"); + } + + private get markerPath() { + return path.join(this.venvDir, "databricks.json"); + } + + async ensureEnvironment(): Promise { + await this.mutex.wait(); + try { + return await this.ensureEnvironmentImpl(); + } finally { + this.mutex.signal(); + } + } + + private resolveSpec(): ComputeTargetSpec | undefined { + return resolveComputeTargetSpec({ + serverless: this.connectionManager.serverless, + serverlessDbconnectVersion: + workspaceConfigs.serverlessDbconnectVersion, + dbrVersion: this.connectionManager.cluster?.dbrVersion, + }); + } + + private async ensureEnvironmentImpl(): Promise { + const spec = this.resolveSpec(); + if (!spec) { + return {success: false, noOp: true}; + } + const venvPython = venvPythonExecutable(this.venvDir); + const assessment = await this.assessVenv(spec, venvPython); + if (assessment.disposition === "manual") { + return {success: false, noOp: true}; + } + + const computeTitle = + spec.computeType === "serverless" + ? "Serverless" + : "the selected cluster"; + const result = await window.withProgress( + { + location: ProgressLocation.Notification, + title: `Databricks: Setting up Python environment for ${computeTitle} (Python ${spec.pythonVersion.display})`, + cancellable: true, + }, + (progress, token) => + this.provision(spec, assessment, venvPython, progress, token) + ); + if (!result.success && !result.noOp) { + this.reportFailure(result); + } + return result; + } + + private async assessVenv( + spec: ComputeTargetSpec, + venvPython: string + ): Promise { + const managed = fs.existsSync(this.markerPath); + if (!fs.existsSync(this.venvDir)) { + return {disposition: "absent", pythonMatches: false, managed}; + } + const pythonMatches = await this.venvPythonMatches(spec, venvPython); + const depsMatch = + pythonMatches && (await this.venvDepsMatch(spec, venvPython)); + if (pythonMatches && depsMatch) { + return {disposition: "satisfied", pythonMatches, managed}; + } + if (managed) { + return { + disposition: pythonMatches ? "repair" : "recreate", + pythonMatches, + managed, + }; + } + const disposition = await this.promptForeignVenv(spec, pythonMatches); + return {disposition, pythonMatches, managed}; + } + + private async venvPythonMatches( + spec: ComputeTargetSpec, + venvPython: string + ): Promise { + if (!fs.existsSync(venvPython)) { + return false; + } + try { + const {stdout} = await this.execFn( + venvPython, + ["-c", "import sys; print('%d.%d' % sys.version_info[:2])"], + {shell: false} + ); + return stdout.trim() === spec.pythonVersion.display; + } catch { + return false; + } + } + + private async venvDepsMatch( + spec: ComputeTargetSpec, + venvPython: string + ): Promise { + try { + const {stdout} = await this.execFn( + venvPython, + [ + "-c", + "import importlib.metadata as m; print(m.version('databricks-connect'))", + ], + {shell: false} + ); + // "17.3.*" should match an installed 17.3.x + const expectedPrefix = spec.dbconnectVersion.replace(/\.?\*$/, ""); + const installed = stdout.trim(); + return ( + installed === expectedPrefix || + installed.startsWith(`${expectedPrefix}.`) + ); + } catch { + return false; + } + } + + /** + * The .venv exists but wasn't created by this extension: never modify it + * without an explicit user decision. + */ + protected async promptForeignVenv( + spec: ComputeTargetSpec, + pythonMatches: boolean + ): Promise { + const repairChoice = "Install into .venv"; + const recreateChoice = "Recreate .venv"; + const manualChoice = "Set up manually"; + const choices = pythonMatches + ? [repairChoice, recreateChoice, manualChoice] + : [recreateChoice, manualChoice]; + const detail = pythonMatches + ? `The project .venv is missing the dependencies for Databricks Connect (databricks-connect ${spec.dbconnectVersion}).` + : `The project .venv doesn't match the selected compute: Python ${spec.pythonVersion.display} is required. Recreating will delete the existing .venv.`; + const choice = await window.showWarningMessage( + `Update the existing Python environment for Databricks Connect?`, + {modal: true, detail}, + ...choices + ); + switch (choice) { + case repairChoice: + return "repair"; + case recreateChoice: + return "recreate"; + default: + return "manual"; + } + } + + private async provision( + spec: ComputeTargetSpec, + assessment: VenvAssessment, + venvPython: string, + progress: Progress<{message?: string; increment?: number}>, + token: CancellationToken + ): Promise { + const disposition = assessment.disposition as VenvDisposition; + const childEnv = buildProvisionEnv(process.env, readPipIndexUrl()); + let createdVenv = false; + const start = Date.now(); + try { + if (disposition !== "satisfied") { + const uv = await this.runStep(spec, "uvAcquire", async () => { + progress.report({message: "locating uv", increment: 5}); + const uvPath = await this.uvProvider.getUvPath(token); + if (!uvPath) { + throw new ProvisionError( + "uv is not available", + "uvUnavailable" + ); + } + return uvPath; + }); + this.throwIfCancelled(token); + + await this.runStep(spec, "pythonInstall", async () => { + progress.report({ + message: `ensuring Python ${spec.pythonVersion.display} is installed`, + increment: 15, + }); + await this.ensurePythonInterpreter( + uv, + spec, + childEnv, + token + ); + }); + this.throwIfCancelled(token); + + if (disposition !== "repair") { + await this.runStep(spec, "venvCreate", async () => { + progress.report({ + message: "creating .venv", + increment: 15, + }); + if (fs.existsSync(this.venvDir)) { + // Deleting is safe here: either we created this + // venv (marker file) or the user explicitly chose + // "Recreate" in promptForeignVenv. + await fs.promises.rm(this.venvDir, { + recursive: true, + force: true, + }); + } + await this.uvExec( + uv, + [ + "venv", + this.venvDir, + "--python", + spec.pythonVersion.display, + "--seed", + ], + childEnv, + token + ); + createdVenv = true; + }); + this.throwIfCancelled(token); + } + + await this.runStep(spec, "depsInstall", async () => { + progress.report({ + message: `installing databricks-connect ${spec.dbconnectVersion}`, + increment: 40, + }); + await this.installDependencies( + uv, + spec, + venvPython, + childEnv, + token + ); + }); + this.throwIfCancelled(token); + } + + await this.runStep(spec, "interpreterSet", async () => { + progress.report({ + message: "selecting the interpreter", + increment: 15, + }); + if (createdVenv || assessment.managed) { + // Only tag environments we own: the marker is what allows + // silent recreation later, so a user-created venv must + // never get one. + this.writeMarker(spec); + } + await this.pythonExtension.api.environments.refreshEnvironments(); + await this.pythonExtension.api.environments.updateActiveEnvironmentPath( + venvPython + ); + }); + + this.recordTotal(spec, disposition, start, true); + return {success: true}; + } catch (e) { + this.logger.error("Failed to provision python environment", e); + const step = e instanceof ProvisionStepError ? e.step : undefined; + const cause = e instanceof ProvisionStepError ? e.cause : e; + const failureClass = classifyProvisionFailure(cause); + if (createdVenv) { + // Don't leave behind a half-initialized venv we created. + await fs.promises.rm(this.venvDir, { + recursive: true, + force: true, + }); + } + this.recordTotal(spec, disposition, start, false, failureClass); + return { + success: false, + failureClass, + failedStep: step, + message: cause instanceof Error ? cause.message : String(cause), + }; + } + } + + private async ensurePythonInterpreter( + uv: string, + spec: ComputeTargetSpec, + childEnv: NodeJS.ProcessEnv, + token: CancellationToken + ) { + try { + await this.uvExec( + uv, + ["python", "find", spec.pythonVersion.display], + childEnv, + token, + false + ); + return; + } catch { + // not found: fall through to install + } + await this.uvExec( + uv, + ["python", "install", spec.pythonVersion.display], + childEnv, + token + ); + } + + private async installDependencies( + uv: string, + spec: ComputeTargetSpec, + venvPython: string, + childEnv: NodeJS.ProcessEnv, + token: CancellationToken + ) { + const pipInstall = ["pip", "install", "--python", venvPython]; + await this.uvExec( + uv, + [ + ...pipInstall, + `databricks-connect==${spec.dbconnectVersion}`, + // Required for executing notebooks with %run magic + "nbformat", + ], + childEnv, + token + ); + for (const requirements of [ + "requirements.txt", + "requirements-dev.txt", + ]) { + const requirementsPath = path.join(this.projectRoot, requirements); + if (fs.existsSync(requirementsPath)) { + await this.uvExec( + uv, + [...pipInstall, "-r", requirementsPath], + childEnv, + token + ); + } + } + const pyprojectPath = path.join(this.projectRoot, "pyproject.toml"); + if ( + fs.existsSync(pyprojectPath) && + /^\[project\]/m.test(fs.readFileSync(pyprojectPath, "utf-8")) + ) { + await this.uvExec( + uv, + [...pipInstall, "-r", pyprojectPath], + childEnv, + token + ); + } + } + + private async uvExec( + uv: string, + args: string[], + childEnv: NodeJS.ProcessEnv, + token: CancellationToken, + log = true + ) { + if (log) { + this.outputChannel.appendLine(`Running: ${uv} ${args.join(" ")}`); + } + const {stdout, stderr} = await this.execFn( + uv, + args, + {cwd: this.projectRoot, env: childEnv, shell: false}, + token + ); + if (log) { + if (stdout.trim()) { + this.outputChannel.appendLine(stdout.trim()); + } + if (stderr.trim()) { + this.outputChannel.appendLine(stderr.trim()); + } + } + return {stdout, stderr}; + } + + private writeMarker(spec: ComputeTargetSpec) { + fs.writeFileSync( + this.markerPath, + JSON.stringify( + { + createdBy: "databricks-vscode", + pythonVersion: spec.pythonVersion.display, + dbconnectVersion: spec.dbconnectVersion, + createdAt: new Date().toISOString(), + }, + null, + 2 + ) + ); + } + + private throwIfCancelled(token: CancellationToken) { + if (token.isCancellationRequested) { + throw new ProvisionError("Setup was cancelled", "cancelled"); + } + } + + private async runStep( + spec: ComputeTargetSpec, + step: ProvisionStep, + fn: () => Promise + ): Promise { + const start = Date.now(); + try { + const result = await fn(); + this.telemetry.recordEvent(Events.MANAGED_ENV_SETUP, { + step, + success: true, + computeType: spec.computeType, + duration: Date.now() - start, + }); + return result; + } catch (e) { + this.telemetry.recordEvent(Events.MANAGED_ENV_SETUP, { + step, + success: false, + computeType: spec.computeType, + failureClass: classifyProvisionFailure(e), + duration: Date.now() - start, + }); + throw new ProvisionStepError(step, e); + } + } + + private recordTotal( + spec: ComputeTargetSpec, + disposition: VenvDisposition, + start: number, + success: boolean, + failureClass?: ProvisionFailureClass + ) { + this.telemetry.recordEvent(Events.MANAGED_ENV_SETUP, { + step: "total", + success, + computeType: spec.computeType, + failureClass, + venvDisposition: disposition, + duration: Date.now() - start, + }); + } + + private reportFailure(result: ProvisionResult) { + if (result.failureClass === "cancelled") { + return; + } + const message = + failureMessages[result.failureClass ?? "unknown"] + + (result.message ? ` (${result.message})` : ""); + // Not awaited: the toast stays up until the user reacts. + window + .showErrorMessage( + `Databricks: Failed to set up the Python environment. ${message}`, + "Retry", + "Show Logs", + "Set up manually" + ) + .then(async (choice) => { + switch (choice) { + case "Retry": + await commands.executeCommand( + "databricks.environment.setup" + ); + break; + case "Show Logs": + this.outputChannel.show(); + break; + case "Set up manually": + await commands.executeCommand( + "databricks.environment.selectPythonInterpreter" + ); + break; + } + }); + } + + dispose() { + this.disposables.forEach((i) => i.dispose()); + } +} diff --git a/packages/databricks-vscode/src/language/UvBinaryProvider.test.ts b/packages/databricks-vscode/src/language/UvBinaryProvider.test.ts new file mode 100644 index 000000000..43491eff6 --- /dev/null +++ b/packages/databricks-vscode/src/language/UvBinaryProvider.test.ts @@ -0,0 +1,101 @@ +import * as assert from "assert"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import {ExtensionContext, Uri} from "vscode"; +import {UvBinaryProvider, uvArtifactName} from "./UvBinaryProvider"; + +// Buffer.from(string).buffer can point at a shared pool, so encode exactly. +function toArrayBuffer(s: string): ArrayBuffer { + return new TextEncoder().encode(s).buffer as ArrayBuffer; +} + +class TestableUvProvider extends UvBinaryProvider { + /** executables for which the version probe succeeds */ + probeSucceedsFor: string[] = []; + fetchedUrls: string[] = []; + artifacts: Record = {}; + + protected override async probe(executable: string): Promise { + return this.probeSucceedsFor.includes(executable); + } + + protected override async fetchArtifact(url: string): Promise { + this.fetchedUrls.push(url); + const artifact = this.artifacts[path.basename(url)]; + if (!artifact) { + throw new Error(`fetch failed for ${url}`); + } + return artifact; + } +} + +describe(__filename, () => { + describe("uvArtifactName", () => { + it("should map the supported platforms", () => { + assert.strictEqual( + uvArtifactName("darwin", "arm64"), + "uv-aarch64-apple-darwin.tar.gz" + ); + assert.strictEqual( + uvArtifactName("linux", "x64"), + "uv-x86_64-unknown-linux-gnu.tar.gz" + ); + assert.strictEqual( + uvArtifactName("win32", "x64"), + "uv-x86_64-pc-windows-msvc.zip" + ); + assert.strictEqual(uvArtifactName("aix", "x64"), undefined); + assert.strictEqual(uvArtifactName("linux", "ia32"), undefined); + }); + }); + + describe("getUvPath", () => { + let storageDir: string; + let provider: TestableUvProvider; + + beforeEach(() => { + storageDir = fs.mkdtempSync(path.join(os.tmpdir(), "uv-test-")); + provider = new TestableUvProvider({ + globalStorageUri: Uri.file(storageDir), + } as unknown as ExtensionContext); + }); + + afterEach(() => { + fs.rmSync(storageDir, {recursive: true, force: true}); + }); + + it("should prefer uv from PATH", async () => { + provider.probeSucceedsFor = ["uv"]; + assert.strictEqual(await provider.getUvPath(), "uv"); + assert.deepStrictEqual(provider.fetchedUrls, []); + }); + + it("should use a previously downloaded binary", async () => { + provider.probeSucceedsFor = [provider.uvBinaryPath]; + assert.strictEqual( + await provider.getUvPath(), + provider.uvBinaryPath + ); + assert.deepStrictEqual(provider.fetchedUrls, []); + }); + + it("should reject downloads with a checksum mismatch", async () => { + const artifact = uvArtifactName()!; + provider.artifacts = { + [artifact]: toArrayBuffer("not really uv"), + [`${artifact}.sha256`]: toArrayBuffer( + `${"0".repeat(64)} ${artifact}\n` + ), + }; + assert.strictEqual(await provider.getUvPath(), undefined); + assert.strictEqual(provider.fetchedUrls.length, 2); + assert.ok(!fs.existsSync(provider.uvBinaryPath)); + }); + + it("should return undefined when the download fails", async () => { + assert.strictEqual(await provider.getUvPath(), undefined); + assert.ok(provider.fetchedUrls.length > 0); + }); + }); +}); diff --git a/packages/databricks-vscode/src/language/UvBinaryProvider.ts b/packages/databricks-vscode/src/language/UvBinaryProvider.ts new file mode 100644 index 000000000..d19d75311 --- /dev/null +++ b/packages/databricks-vscode/src/language/UvBinaryProvider.ts @@ -0,0 +1,173 @@ +import {CancellationToken, ExtensionContext} from "vscode"; +import path from "node:path"; +import fs from "node:fs/promises"; +import os from "node:os"; +import {createHash} from "node:crypto"; +import {cancellableExecFile} from "../cli/CliWrapper"; +import {NamedLogger} from "@databricks/sdk-experimental/dist/logging"; +import {Loggers} from "../logger"; + +/** + * Pinned uv release we download when uv is not already installed. + * Bump deliberately: the download URL, archive layout and checksum format + * are validated for this version. + */ +export const UV_VERSION = "0.7.13"; + +export function uvArtifactName( + platform: NodeJS.Platform = process.platform, + arch: string = process.arch +): string | undefined { + const archPart = {x64: "x86_64", arm64: "aarch64"}[arch]; + if (!archPart) { + return undefined; + } + switch (platform) { + case "darwin": + return `uv-${archPart}-apple-darwin.tar.gz`; + case "linux": + return `uv-${archPart}-unknown-linux-gnu.tar.gz`; + case "win32": + return `uv-${archPart}-pc-windows-msvc.zip`; + default: + return undefined; + } +} + +/** + * Locates a uv binary: prefers one already on PATH (so corporate installs + * with preconfigured proxies and mirrors keep working), then a previously + * downloaded one, and finally downloads a pinned release into the + * extension's global storage. + */ +export class UvBinaryProvider { + private readonly logger = NamedLogger.getOrCreate(Loggers.Extension); + + constructor(private readonly context: ExtensionContext) {} + + get uvBinaryPath(): string { + return path.join( + this.context.globalStorageUri.fsPath, + "uv", + UV_VERSION, + process.platform === "win32" ? "uv.exe" : "uv" + ); + } + + protected async probe( + executable: string, + token?: CancellationToken + ): Promise { + try { + await cancellableExecFile( + executable, + ["--version"], + {shell: false}, + token + ); + return true; + } catch { + return false; + } + } + + async getUvPath(token?: CancellationToken): Promise { + if (await this.probe("uv", token)) { + return "uv"; + } + if (await this.probe(this.uvBinaryPath, token)) { + return this.uvBinaryPath; + } + try { + await this.download(token); + } catch (e) { + this.logger.error("Failed to download uv", e); + return undefined; + } + return (await this.probe(this.uvBinaryPath, token)) + ? this.uvBinaryPath + : undefined; + } + + protected async fetchArtifact(url: string): Promise { + const response = await fetch(url, {redirect: "follow"}); + if (!response.ok) { + throw new Error(`Failed to download ${url}: ${response.status}`); + } + return await response.arrayBuffer(); + } + + private async download(token?: CancellationToken): Promise { + const artifact = uvArtifactName(); + if (!artifact) { + throw new Error( + `Unsupported platform for uv: ${process.platform}/${process.arch}` + ); + } + const baseUrl = `https://github.com/astral-sh/uv/releases/download/${UV_VERSION}`; + const [archive, checksumFile] = await Promise.all([ + this.fetchArtifact(`${baseUrl}/${artifact}`), + this.fetchArtifact(`${baseUrl}/${artifact}.sha256`), + ]); + + const expectedSha = Buffer.from(checksumFile) + .toString("utf-8") + .trim() + .split(/\s+/)[0]; + const actualSha = createHash("sha256") + .update(Buffer.from(archive)) + .digest("hex"); + if (expectedSha !== actualSha) { + throw new Error( + `Checksum mismatch for ${artifact}: expected ${expectedSha}, got ${actualSha}` + ); + } + + const targetDir = path.dirname(this.uvBinaryPath); + await fs.mkdir(targetDir, {recursive: true}); + const archivePath = path.join( + await fs.mkdtemp(path.join(os.tmpdir(), "databricks-uv-")), + artifact + ); + try { + await fs.writeFile(archivePath, Buffer.from(archive)); + await this.extract(archivePath, targetDir, token); + if (process.platform !== "win32") { + await fs.chmod(this.uvBinaryPath, 0o755); + } + } finally { + await fs.rm(path.dirname(archivePath), { + recursive: true, + force: true, + }); + } + } + + private async extract( + archivePath: string, + targetDir: string, + token?: CancellationToken + ): Promise { + if (archivePath.endsWith(".zip")) { + // Windows zips contain uv.exe at the archive root + await cancellableExecFile( + "powershell.exe", + [ + "-NoProfile", + "-Command", + `Expand-Archive -Path '${archivePath}' -DestinationPath '${targetDir}' -Force`, + ], + {shell: false}, + token + ); + } else { + // tarballs contain a uv-/ directory with the binary + await cancellableExecFile( + "tar", + ["-xzf", archivePath, "-C", targetDir, "--strip-components=1"], + {shell: false}, + token + ); + } + } +} diff --git a/packages/databricks-vscode/src/language/computeTargetSpec.test.ts b/packages/databricks-vscode/src/language/computeTargetSpec.test.ts index 6038c77d5..a74742c28 100644 --- a/packages/databricks-vscode/src/language/computeTargetSpec.test.ts +++ b/packages/databricks-vscode/src/language/computeTargetSpec.test.ts @@ -1,5 +1,8 @@ import * as assert from "assert"; -import {getRequiredPythonVersion} from "./computeTargetSpec"; +import { + getRequiredPythonVersion, + resolveComputeTargetSpec, +} from "./computeTargetSpec"; describe(__filename, () => { describe("serverless", () => { @@ -80,4 +83,54 @@ describe(__filename, () => { assert.strictEqual(required, undefined); }); }); + + describe("resolveComputeTargetSpec", () => { + it("should resolve the full spec for serverless", () => { + const spec = resolveComputeTargetSpec({ + serverless: true, + serverlessDbconnectVersion: "17.3", + }); + assert.strictEqual(spec?.computeType, "serverless"); + assert.strictEqual(spec?.pythonVersion.display, "3.12"); + assert.strictEqual(spec?.dbconnectVersion, "17.3.*"); + }); + + it("should resolve the full spec for a cluster", () => { + const spec = resolveComputeTargetSpec({ + serverless: false, + serverlessDbconnectVersion: "17.3", + dbrVersion: [15, 4], + }); + assert.strictEqual(spec?.computeType, "cluster"); + assert.strictEqual(spec?.pythonVersion.display, "3.11"); + assert.strictEqual(spec?.dbconnectVersion, "15.4.*"); + }); + + it("should use a wildcard for unknown DBR minor versions", () => { + const spec = resolveComputeTargetSpec({ + serverless: false, + serverlessDbconnectVersion: "17.3", + dbrVersion: [15, "x"], + }); + assert.strictEqual(spec?.dbconnectVersion, "15.*"); + }); + + it("should return undefined for unsupported compute", () => { + assert.strictEqual( + resolveComputeTargetSpec({ + serverless: false, + serverlessDbconnectVersion: "17.3", + dbrVersion: ["x", "x"], + }), + undefined + ); + assert.strictEqual( + resolveComputeTargetSpec({ + serverless: false, + serverlessDbconnectVersion: "17.3", + }), + undefined + ); + }); + }); }); diff --git a/packages/databricks-vscode/src/language/computeTargetSpec.ts b/packages/databricks-vscode/src/language/computeTargetSpec.ts index 1dce6e984..8f59ee849 100644 --- a/packages/databricks-vscode/src/language/computeTargetSpec.ts +++ b/packages/databricks-vscode/src/language/computeTargetSpec.ts @@ -76,3 +76,50 @@ export function getRequiredPythonVersion(input: { source, }; } + +export interface ComputeTargetSpec { + computeType: "serverless" | "cluster"; + pythonVersion: RequiredPythonVersion; + /** databricks-connect pip version specifier, e.g. "17.3.*" */ + dbconnectVersion: string; +} + +/** + * Resolves everything the extension needs to provision a working environment + * for the selected compute: the local Python version and the matching + * databricks-connect version. Returns undefined when the compute doesn't + * support Databricks Connect or its version can't be determined; callers + * should fall back to the manual setup flow in that case. + */ +export function resolveComputeTargetSpec(input: { + serverless: boolean; + serverlessDbconnectVersion: string; + dbrVersion?: (number | "x")[]; +}): ComputeTargetSpec | undefined { + const pythonVersion = getRequiredPythonVersion(input); + if (!pythonVersion) { + return undefined; + } + if (input.serverless) { + const parts = input.serverlessDbconnectVersion.split("."); + const major = parts[0]; + const minor = parts[1] ?? "3"; + return { + computeType: "serverless", + pythonVersion, + dbconnectVersion: `${major}.${minor}.*`, + }; + } + const [major, minor] = input.dbrVersion ?? []; + if (major === undefined || major === "x") { + return undefined; + } + return { + computeType: "cluster", + pythonVersion, + dbconnectVersion: + minor === "x" || minor === undefined + ? `${major}.*` + : `${major}.${minor}.*`, + }; +} diff --git a/packages/databricks-vscode/src/telemetry/constants.ts b/packages/databricks-vscode/src/telemetry/constants.ts index 8fbc124cf..529d8b07f 100644 --- a/packages/databricks-vscode/src/telemetry/constants.ts +++ b/packages/databricks-vscode/src/telemetry/constants.ts @@ -23,6 +23,7 @@ export enum Events { COMPUTE_SELECTED = "computeSelected", WORKFLOW_RUN = "workflowRun", DBCONNECT_RUN = "dbconnectRun", + MANAGED_ENV_SETUP = "managedEnvironmentSetup", OPEN_RESOURCE_EXTERNALLY = "openResourceExternally", } /* eslint-enable @typescript-eslint/naming-convention */ @@ -201,6 +202,34 @@ export class EventTypes { comment: "The type of the compute", }, }; + [Events.MANAGED_ENV_SETUP]: EventType< + { + step: string; + success: boolean; + computeType: ComputeType; + failureClass?: string; + venvDisposition?: string; + } & DurationMeasurement + > = { + comment: "A step of the managed python environment setup", + step: { + comment: "The setup funnel step", + }, + success: { + comment: "true if the step succeeded, false otherwise", + }, + computeType: { + comment: "The type of the compute", + }, + failureClass: { + comment: "Classification of the failure", + }, + venvDisposition: { + comment: + "What was done with the virtual environment (satisfied, repair, recreate, absent)", + }, + ...getDurationProperty(), + }; [Events.OPEN_RESOURCE_EXTERNALLY]: EventType<{ type: string; }> = { diff --git a/packages/databricks-vscode/src/vscode-objs/WorkspaceConfigs.ts b/packages/databricks-vscode/src/vscode-objs/WorkspaceConfigs.ts index a6b18bc47..8a04ce498 100644 --- a/packages/databricks-vscode/src/vscode-objs/WorkspaceConfigs.ts +++ b/packages/databricks-vscode/src/vscode-objs/WorkspaceConfigs.ts @@ -51,6 +51,12 @@ export const workspaceConfigs = { .get>("experiments.optInto", []); }, + get managedPythonEnvironmentEnabled() { + return this.experimetalFeatureOverides.includes( + "python.managedEnvironment" + ); + }, + /** * set the python.envFile configuration in the ms-python extension */ From 49437c3f31dec224acb9fcd07e6d9bb5bfc103d6 Mon Sep 17 00:00:00 2001 From: Anton Nekipelov <226657+anton-107@users.noreply.github.com> Date: Fri, 12 Jun 2026 17:42:09 +0200 Subject: [PATCH 2/2] Address review findings in the managed environment provisioning - Extract archives with tar on all platforms (bsdtar ships with Windows 10+), removing the PowerShell command-string interpolation that broke on paths containing quotes. - Force-refresh the Python extension's environment discovery after creating a venv: the default refresh is a no-op once per-session discovery ran. - Raise the exec buffer for uv commands: verbose installs exceeded execFile's 1MiB default and killed otherwise successful installs. - Anchor the bare status-code and tls/proxy patterns in the failure classifier so paths or package names containing them aren't reported as network problems. - Fall back to the manual setup flow when the provisioner fails outside its steps (e.g. no active project folder) instead of leaking the error. - Derive the suggested databricks-connect version in the installer from resolveComputeTargetSpec, removing the divergent duplicate mapping. - Build the uv child environment (incl. the sync pip.conf scan) only when provisioning will actually run, and drop the derivable pythonMatches field from the venv assessment. Co-authored-by: Isaac --- .../EnvironmentDependenciesInstaller.ts | 25 ++++------ .../src/language/EnvironmentProvisioner.ts | 46 +++++++++++++------ .../src/language/UvBinaryProvider.ts | 28 +++-------- 3 files changed, 47 insertions(+), 52 deletions(-) diff --git a/packages/databricks-vscode/src/language/EnvironmentDependenciesInstaller.ts b/packages/databricks-vscode/src/language/EnvironmentDependenciesInstaller.ts index 6a1ddbd1b..5869d02e7 100644 --- a/packages/databricks-vscode/src/language/EnvironmentDependenciesInstaller.ts +++ b/packages/databricks-vscode/src/language/EnvironmentDependenciesInstaller.ts @@ -5,6 +5,7 @@ import {MsPythonExtensionWrapper} from "./MsPythonExtensionWrapper"; import {ConnectionManager} from "../configuration/ConnectionManager"; import {DATABRICKS_CONNECT_VERSION as DATABRICKS_CONNECT_MINIMAL_VERSION} from "../utils/constants"; import {workspaceConfigs} from "../vscode-objs/WorkspaceConfigs"; +import {resolveComputeTargetSpec} from "./computeTargetSpec"; export class EnvironmentDependenciesInstaller implements Disposable { private disposables: Disposable[] = []; @@ -67,23 +68,13 @@ export class EnvironmentDependenciesInstaller implements Disposable { } async getSuggestedVersion() { - if (this.connectionManager.serverless) { - const serverlessVersion = - workspaceConfigs.serverlessDbconnectVersion; - const parts = serverlessVersion.split("."); - const major = parts[0]; - const minor = parts[1] ?? "3"; - return `${major}.${minor}.*`; - } - const dbrVersionParts = - this.connectionManager.cluster?.dbrVersion || []; - if (dbrVersionParts.length < 2 || dbrVersionParts[0] === "x") { - return DATABRICKS_CONNECT_MINIMAL_VERSION; - } - const major = dbrVersionParts[0]; - const minor = dbrVersionParts[1] === "x" ? "*" : dbrVersionParts[1]; - const rest = minor === "*" ? "" : ".*"; - return `${major}.${minor + rest}`; + const spec = resolveComputeTargetSpec({ + serverless: this.connectionManager.serverless, + serverlessDbconnectVersion: + workspaceConfigs.serverlessDbconnectVersion, + dbrVersion: this.connectionManager.cluster?.dbrVersion, + }); + return spec?.dbconnectVersion ?? DATABRICKS_CONNECT_MINIMAL_VERSION; } async installWithVersionPrompt(suggestedVersion?: string) { diff --git a/packages/databricks-vscode/src/language/EnvironmentProvisioner.ts b/packages/databricks-vscode/src/language/EnvironmentProvisioner.ts index f425c5ab0..0fd328fb1 100644 --- a/packages/databricks-vscode/src/language/EnvironmentProvisioner.ts +++ b/packages/databricks-vscode/src/language/EnvironmentProvisioner.ts @@ -164,7 +164,7 @@ export function classifyProvisionFailure(e: unknown): ProvisionFailureClass { return "pythonUnavailable"; } if ( - /error sending request|[Cc]onnection (refused|reset)|certificate|tls|timed out|ENOTFOUND|ECONNREFUSED|ETIMEDOUT|EAI_AGAIN|proxy|403|407|503/.test( + /error sending request|[Cc]onnection (refused|reset)|certificate|\btls\b|timed out|ENOTFOUND|ECONNREFUSED|ETIMEDOUT|EAI_AGAIN|\bproxy\b|\b(403|407|503)\b/.test( message ) ) { @@ -173,7 +173,11 @@ export function classifyProvisionFailure(e: unknown): ProvisionFailureClass { return "unknown"; } -const failureMessages: Record = { +// "cancelled" never reaches the user: reportFailure returns early for it. +const failureMessages: Record< + Exclude, + string +> = { networkBlocked: "Your network seems to block the package index or interpreter downloads. " + "If you use a proxy or a package mirror, set HTTPS_PROXY, UV_INDEX_URL " + @@ -186,13 +190,11 @@ const failureMessages: Record = { "A matching Python interpreter is not available and could not be downloaded. " + "Install the required Python version and retry.", disk: "Not enough disk space to set up the Python environment.", - cancelled: "", unknown: "Failed to set up the Python environment.", }; interface VenvAssessment { disposition: VenvDisposition | "manual"; - pythonMatches: boolean; /** Whether .venv was created by this extension (has our marker file) */ managed: boolean; } @@ -249,6 +251,11 @@ export class EnvironmentProvisioner implements Disposable { await this.mutex.wait(); try { return await this.ensureEnvironmentImpl(); + } catch (e) { + // Errors outside the provisioning steps (e.g. no active project + // folder) fall back to the manual setup flow. + this.logger.error("Failed to run managed environment setup", e); + return {success: false, noOp: true}; } finally { this.mutex.signal(); } @@ -299,23 +306,20 @@ export class EnvironmentProvisioner implements Disposable { ): Promise { const managed = fs.existsSync(this.markerPath); if (!fs.existsSync(this.venvDir)) { - return {disposition: "absent", pythonMatches: false, managed}; + return {disposition: "absent", managed}; } const pythonMatches = await this.venvPythonMatches(spec, venvPython); - const depsMatch = - pythonMatches && (await this.venvDepsMatch(spec, venvPython)); - if (pythonMatches && depsMatch) { - return {disposition: "satisfied", pythonMatches, managed}; + if (pythonMatches && (await this.venvDepsMatch(spec, venvPython))) { + return {disposition: "satisfied", managed}; } if (managed) { return { disposition: pythonMatches ? "repair" : "recreate", - pythonMatches, managed, }; } const disposition = await this.promptForeignVenv(spec, pythonMatches); - return {disposition, pythonMatches, managed}; + return {disposition, managed}; } private async venvPythonMatches( @@ -402,11 +406,14 @@ export class EnvironmentProvisioner implements Disposable { token: CancellationToken ): Promise { const disposition = assessment.disposition as VenvDisposition; - const childEnv = buildProvisionEnv(process.env, readPipIndexUrl()); let createdVenv = false; const start = Date.now(); try { if (disposition !== "satisfied") { + const childEnv = buildProvisionEnv( + process.env, + readPipIndexUrl() + ); const uv = await this.runStep(spec, "uvAcquire", async () => { progress.report({message: "locating uv", increment: 5}); const uvPath = await this.uvProvider.getUvPath(token); @@ -493,7 +500,12 @@ export class EnvironmentProvisioner implements Disposable { // never get one. this.writeMarker(spec); } - await this.pythonExtension.api.environments.refreshEnvironments(); + // forceRefresh: the default refresh is a no-op once the + // session's discovery already ran, leaving a freshly created + // .venv unknown to the Python extension. + await this.pythonExtension.api.environments.refreshEnvironments( + {forceRefresh: true} + ); await this.pythonExtension.api.environments.updateActiveEnvironmentPath( venvPython ); @@ -609,7 +621,13 @@ export class EnvironmentProvisioner implements Disposable { const {stdout, stderr} = await this.execFn( uv, args, - {cwd: this.projectRoot, env: childEnv, shell: false}, + { + cwd: this.projectRoot, + env: childEnv, + shell: false, + // Large installs exceed execFile's 1MiB default buffer. + maxBuffer: 128 * 1024 * 1024, + } as Parameters[2], token ); if (log) { diff --git a/packages/databricks-vscode/src/language/UvBinaryProvider.ts b/packages/databricks-vscode/src/language/UvBinaryProvider.ts index d19d75311..7551094ee 100644 --- a/packages/databricks-vscode/src/language/UvBinaryProvider.ts +++ b/packages/databricks-vscode/src/language/UvBinaryProvider.ts @@ -148,26 +148,12 @@ export class UvBinaryProvider { targetDir: string, token?: CancellationToken ): Promise { - if (archivePath.endsWith(".zip")) { - // Windows zips contain uv.exe at the archive root - await cancellableExecFile( - "powershell.exe", - [ - "-NoProfile", - "-Command", - `Expand-Archive -Path '${archivePath}' -DestinationPath '${targetDir}' -Force`, - ], - {shell: false}, - token - ); - } else { - // tarballs contain a uv-/ directory with the binary - await cancellableExecFile( - "tar", - ["-xzf", archivePath, "-C", targetDir, "--strip-components=1"], - {shell: false}, - token - ); - } + // bsdtar (shipped with macOS, most Linux distros and Windows 10+) + // extracts both formats. Windows zips contain uv.exe at the archive + // root, tarballs contain a uv-/ directory with the binary. + const args = archivePath.endsWith(".zip") + ? ["-xf", archivePath, "-C", targetDir] + : ["-xzf", archivePath, "-C", targetDir, "--strip-components=1"]; + await cancellableExecFile("tar", args, {shell: false}, token); } }