diff --git a/packages/databricks-vscode/src/language/EnvironmentCommands.ts b/packages/databricks-vscode/src/language/EnvironmentCommands.ts index 5fc8e9cf5..1a3ae0e6e 100644 --- a/packages/databricks-vscode/src/language/EnvironmentCommands.ts +++ b/packages/databricks-vscode/src/language/EnvironmentCommands.ts @@ -1,5 +1,9 @@ import {window, commands, QuickPickItem, ProgressLocation} from "vscode"; -import {FeatureManager} from "../feature-manager/FeatureManager"; +import { + FeatureManager, + FeatureState, + FeatureStepState, +} from "../feature-manager/FeatureManager"; import {MsPythonExtensionWrapper} from "./MsPythonExtensionWrapper"; import {Cluster} from "../sdk-extensions"; import {EnvironmentDependenciesInstaller} from "./EnvironmentDependenciesInstaller"; @@ -13,9 +17,9 @@ export class EnvironmentCommands { private installer: EnvironmentDependenciesInstaller ) {} - async setup(stepId?: string) { + async setup(stepId?: string): Promise { commands.executeCommand("configurationView.focus"); - await window.withProgress( + return await window.withProgress( {location: {viewId: "configurationView"}}, () => this._setup(stepId) ); @@ -32,7 +36,7 @@ export class EnvironmentCommands { ); } - private async _setup(stepId?: string) { + private async _setup(stepId?: string): Promise { // Get the state from the cache, we will re-check the state after taking an action (e.g. asking a user to select a venv or install dbconnect). let state = await this.featureManager.isEnabled( "environment.dependencies" @@ -50,6 +54,11 @@ export class EnvironmentCommands { break; } } + this.reportSetupOutcome(state); + return state.available; + } + + private reportSetupOutcome(state: FeatureState) { if (state.available) { window.showInformationMessage( "Python environment and Databricks Connect are set up." @@ -77,14 +86,36 @@ export class EnvironmentCommands { async selectPythonInterpreter() { const environments = await this.pythonExtension.getAvailableEnvironments(); + // The requirement hint is best effort: a fresh state check can block + // on the workspace connection, and the picker must always show up. + const state = await Promise.race([ + this.featureManager.isEnabled("environment.dependencies"), + new Promise((resolve) => + setTimeout(() => resolve(undefined), 2000) + ), + ]); + const pythonStep = state?.steps.get("checkPythonEnvironment"); + const requirement = !pythonStep?.available ? pythonStep : undefined; if (environments.length > 0) { - await this.showEnvironmentsQuickPick(environments); + await this.showEnvironmentsQuickPick(environments, requirement); } else { - await this.pythonExtension.createPythonEnvironment(); + await this.createPythonEnvironment(requirement); } } - async showEnvironmentsQuickPick(environments: Environment[]) { + private async createPythonEnvironment(requirement?: FeatureStepState) { + if (requirement?.message) { + // The environment creation flow of the MS Python extension knows + // nothing about our version requirements, so we surface them here. + window.showInformationMessage(requirement.message); + } + await this.pythonExtension.createPythonEnvironment(); + } + + async showEnvironmentsQuickPick( + environments: Environment[], + requirement?: FeatureStepState + ) { const envPicks: (QuickPickItem & {path?: string})[] = environments.map( (env) => ({ label: environmentName(env), @@ -101,11 +132,14 @@ export class EnvironmentCommands { ]; const selectedPick = await window.showQuickPick( envPicks.concat(staticPicks), - {title: "Select Python Environment"} + { + title: requirement?.title ?? "Select Python Environment", + placeHolder: requirement?.message, + } ); if (selectedPick) { if (selectedPick.label === createNewLabel) { - await this.pythonExtension.createPythonEnvironment(); + await this.createPythonEnvironment(requirement); } else if (selectedPick.label === usePythonExtensionLabel) { await this.pythonExtension.selectPythonInterpreter(); } else if (selectedPick.path) { diff --git a/packages/databricks-vscode/src/language/EnvironmentDependenciesVerifier.test.ts b/packages/databricks-vscode/src/language/EnvironmentDependenciesVerifier.test.ts new file mode 100644 index 000000000..05803f9e5 --- /dev/null +++ b/packages/databricks-vscode/src/language/EnvironmentDependenciesVerifier.test.ts @@ -0,0 +1,175 @@ +import * as assert from "assert"; +import {Disposable, Uri} from "vscode"; +import {mock, instance, when} from "ts-mockito"; +import {EnvironmentDependenciesVerifier} from "./EnvironmentDependenciesVerifier"; +import {ConnectionManager} from "../configuration/ConnectionManager"; +import {MsPythonExtensionWrapper} from "./MsPythonExtensionWrapper"; +import {EnvironmentDependenciesInstaller} from "./EnvironmentDependenciesInstaller"; +import {ConfigureAutocomplete} from "./ConfigureAutocomplete"; +import {ResolvedEnvironment} from "./MsPythonExtensionApi"; +import {Cluster} from "../sdk-extensions"; + +function fakeEnvironment( + version?: {major: number; minor: number; micro: number}, + name = ".venv", + executablePath = "/project/.venv/bin/python" +): ResolvedEnvironment { + return { + id: executablePath, + path: executablePath, + executable: {uri: Uri.file(executablePath)}, + environment: { + type: "VirtualEnvironment", + name, + folderUri: Uri.file("/project/.venv"), + }, + version, + } as unknown as ResolvedEnvironment; +} + +describe(__filename, () => { + let connectionManagerMock: ConnectionManager; + let pythonExtensionMock: MsPythonExtensionWrapper; + let verifier: EnvironmentDependenciesVerifier; + + const noopEvent = () => new Disposable(() => {}); + + beforeEach(() => { + connectionManagerMock = mock(ConnectionManager); + when(connectionManagerMock.onDidChangeCluster).thenReturn(noopEvent); + when(connectionManagerMock.onDidChangeState).thenReturn(noopEvent); + + pythonExtensionMock = mock(MsPythonExtensionWrapper); + when(pythonExtensionMock.onDidChangePythonExecutable).thenReturn( + noopEvent + ); + + const installerMock = mock(EnvironmentDependenciesInstaller); + when(installerMock.onDidTryInstallation).thenReturn(noopEvent); + + const autocompleteMock = mock(ConfigureAutocomplete); + when(autocompleteMock.onDidUpdate).thenReturn(noopEvent); + + verifier = new EnvironmentDependenciesVerifier( + instance(connectionManagerMock), + instance(pythonExtensionMock), + instance(installerMock), + instance(autocompleteMock) + ); + }); + + function setupEnvironment(env?: ResolvedEnvironment) { + when(pythonExtensionMock.pythonEnvironment).thenReturn( + Promise.resolve(env) + ); + when(pythonExtensionMock.getPythonExecutable()).thenResolve( + env?.executable.uri?.fsPath + ); + } + + describe("serverless (default dbconnect version 17.3)", () => { + beforeEach(() => { + when(connectionManagerMock.serverless).thenReturn(true); + when(connectionManagerMock.cluster).thenReturn(undefined); + }); + + it("should accept python 3.12 without warnings", async () => { + setupEnvironment(fakeEnvironment({major: 3, minor: 12, micro: 4})); + const step = await verifier.checkPythonEnvironment(); + assert.strictEqual(step.available, true); + assert.strictEqual(step.warning, undefined); + assert.strictEqual(step.title, "Active Environment: .venv"); + assert.strictEqual(step.message, "/project/.venv/bin/python"); + }); + + it("should reject python 3.13 (higher than the remote version)", async () => { + setupEnvironment(fakeEnvironment({major: 3, minor: 13, micro: 1})); + const step = await verifier.checkPythonEnvironment(); + assert.strictEqual(step.available, false); + assert.strictEqual( + step.title, + "Activate an environment with Python 3.12" + ); + assert.ok(step.message?.includes("must match")); + assert.ok( + step.message?.includes("Current Python version is 3.13.1") + ); + }); + + it("should reject python 3.9", async () => { + setupEnvironment(fakeEnvironment({major: 3, minor: 9, micro: 6})); + const step = await verifier.checkPythonEnvironment(); + assert.strictEqual(step.available, false); + assert.strictEqual( + step.title, + "Activate an environment with Python 3.12" + ); + }); + + it("should reject when no environment is active", async () => { + setupEnvironment(undefined); + const step = await verifier.checkPythonEnvironment(); + assert.strictEqual(step.available, false); + assert.strictEqual( + step.title, + "Activate an environment with Python 3.12" + ); + assert.ok(step.message?.includes("No active environments found")); + }); + + it("should accept an environment with an unresolvable version", async () => { + setupEnvironment(fakeEnvironment(undefined)); + const step = await verifier.checkPythonEnvironment(); + assert.strictEqual(step.available, true); + assert.strictEqual(step.warning, undefined); + }); + }); + + describe("clusters", () => { + function setupCluster(dbrVersion: (number | "x")[]) { + when(connectionManagerMock.serverless).thenReturn(false); + when(connectionManagerMock.cluster).thenReturn({ + dbrVersion, + } as unknown as Cluster); + } + + it("should accept a matching python version", async () => { + setupCluster([15, 4]); + setupEnvironment(fakeEnvironment({major: 3, minor: 11, micro: 9})); + const step = await verifier.checkPythonEnvironment(); + assert.strictEqual(step.available, true); + assert.strictEqual(step.warning, undefined); + }); + + it("should accept a non-matching python >= 3.10 with a warning", async () => { + setupCluster([15, 4]); + setupEnvironment(fakeEnvironment({major: 3, minor: 10, micro: 2})); + const step = await verifier.checkPythonEnvironment(); + assert.strictEqual(step.available, true); + assert.ok(step.warning?.includes("Use Python 3.11")); + assert.ok(step.warning?.includes("DBR 15")); + }); + + it("should reject python below 3.10", async () => { + setupCluster([15, 4]); + setupEnvironment(fakeEnvironment({major: 3, minor: 9, micro: 6})); + const step = await verifier.checkPythonEnvironment(); + assert.strictEqual(step.available, false); + assert.strictEqual( + step.title, + "Activate an environment with Python 3.11" + ); + }); + + it("should fall back to '3.10 or greater' for unknown DBR versions", async () => { + setupCluster(["x", "x"]); + setupEnvironment(undefined); + const step = await verifier.checkPythonEnvironment(); + assert.strictEqual(step.available, false); + assert.strictEqual( + step.title, + "Activate an environment with Python 3.10 or greater" + ); + }); + }); +}); diff --git a/packages/databricks-vscode/src/language/EnvironmentDependenciesVerifier.ts b/packages/databricks-vscode/src/language/EnvironmentDependenciesVerifier.ts index 7ce5c80d7..423694e7a 100644 --- a/packages/databricks-vscode/src/language/EnvironmentDependenciesVerifier.ts +++ b/packages/databricks-vscode/src/language/EnvironmentDependenciesVerifier.ts @@ -10,6 +10,7 @@ import {ResolvedEnvironment} from "./MsPythonExtensionApi"; import {NamedLogger} from "@databricks/sdk-experimental/dist/logging"; import {ConfigureAutocomplete} from "./ConfigureAutocomplete"; import {workspaceConfigs} from "../vscode-objs/WorkspaceConfigs"; +import {getRequiredPythonVersion} from "./computeTargetSpec"; export class EnvironmentDependenciesVerifier extends MultiStepAccessVerifier { private readonly logger = NamedLogger.getOrCreate(Loggers.Extension); @@ -174,74 +175,49 @@ export class EnvironmentDependenciesVerifier extends MultiStepAccessVerifier { : "No active environments found."; } - private getExpectedPythonVersionMessage(dbrVersionParts: (number | "x")[]) { - if (dbrVersionParts[0] === 13 || dbrVersionParts[0] === 14) { - return "3.10"; - } - if (dbrVersionParts[0] === 15) { - return "3.11"; - } - if (dbrVersionParts[0] === 16) { - return "3.12"; - } - if (dbrVersionParts[0] !== "x" && dbrVersionParts[0] > 16) { - return "3.12 or greater"; - } - return "3.10 or greater"; - } - - private getVersionMismatchWarning( - dbrMajor: "x" | number, - env: ResolvedEnvironment, - currentPythonVersionMessage: string - ): string | undefined { - if ( - (dbrMajor === 13 || dbrMajor === 14) && - !this.matchEnvironmentVersion(env, 3, 10) - ) { - return `Use python 3.10 to match DBR ${dbrMajor} requirements. ${currentPythonVersionMessage}`; - } - if (dbrMajor === 15 && !this.matchEnvironmentVersion(env, 3, 11)) { - return `Use python 3.11 to match DBR ${dbrMajor} requirements. ${currentPythonVersionMessage}`; - } - if (dbrMajor === 16 && !this.matchEnvironmentVersion(env, 3, 12)) { - return `Use python 3.12 to match DBR ${dbrMajor} requirements. ${currentPythonVersionMessage}`; - } - return undefined; - } - async checkPythonEnvironment(): Promise { try { const env = await this.pythonExtension.pythonEnvironment; - let envVersionTooLow = - env?.version && - (env.version.major !== 3 || env.version.minor < 10); - let dbrVersion = this.connectionManager.cluster?.dbrVersion || []; - if (this.connectionManager.serverless) { - const serverlessVersion = - workspaceConfigs.serverlessDbconnectVersion; - const serverlessVersionParts = serverlessVersion.split("."); - const serverlessMajor = parseInt(serverlessVersionParts[0], 10); - const serverlessMinor = parseInt( - serverlessVersionParts[1] ?? "0", - 10 + const required = getRequiredPythonVersion({ + serverless: this.connectionManager.serverless, + serverlessDbconnectVersion: + workspaceConfigs.serverlessDbconnectVersion, + dbrVersion: this.connectionManager.cluster?.dbrVersion, + }); + const expectedPythonVersion = + required?.display ?? "3.10 or greater"; + const currentVersionMessage = + this.getCurrentPythonVersionMessage(env); + if (!env?.environment) { + return this.rejectStep( + "checkPythonEnvironment", + `Activate an environment with Python ${expectedPythonVersion}`, + `Databricks Connect requires Python ${expectedPythonVersion}. ${currentVersionMessage}`, + this.selectPythonInterpreter.bind(this) ); - dbrVersion = [serverlessMajor, serverlessMinor]; - const minPythonMinor = serverlessMajor >= 16 ? 12 : 11; - envVersionTooLow = - env?.version && - (env.version.major !== 3 || - env.version.minor < minPythonMinor); } - const expectedPythonVersion = - this.getExpectedPythonVersionMessage(dbrVersion); - if (!env?.environment || envVersionTooLow) { + // Environments whose version the Python extension can't resolve + // are let through, matching the historic behavior. + let rejectionMessage: string | undefined; + if (env.version) { + if ( + required?.exact && + !this.matchEnvironmentVersion( + env, + required.major, + required.minor + ) + ) { + rejectionMessage = `Databricks Connect requires Python ${required.display}: the local minor version must match the Python version of ${required.source}. ${currentVersionMessage}`; + } else if (env.version.major !== 3 || env.version.minor < 10) { + rejectionMessage = `Databricks Connect requires Python ${expectedPythonVersion}. ${currentVersionMessage}`; + } + } + if (rejectionMessage) { return this.rejectStep( "checkPythonEnvironment", `Activate an environment with Python ${expectedPythonVersion}`, - `Databricks Connect requires Python ${expectedPythonVersion}. ${this.getCurrentPythonVersionMessage( - env - )}`, + rejectionMessage, this.selectPythonInterpreter.bind(this) ); } @@ -254,11 +230,16 @@ export class EnvironmentDependenciesVerifier extends MultiStepAccessVerifier { this.selectPythonInterpreter.bind(this) ); } - const warning = this.getVersionMismatchWarning( - dbrVersion[0], - env, - this.getCurrentPythonVersionMessage(env) - ); + const warning = + required && + env.version && + !this.matchEnvironmentVersion( + env, + required.major, + required.minor + ) + ? `Use Python ${required.display} to match the Python version of ${required.source}: local and remote minor versions must match to run UDFs. ${currentVersionMessage}` + : undefined; return this.acceptStep( "checkPythonEnvironment", `Active Environment: ${env.environment.name}`, diff --git a/packages/databricks-vscode/src/language/MsPythonExtensionWrapper.test.ts b/packages/databricks-vscode/src/language/MsPythonExtensionWrapper.test.ts new file mode 100644 index 000000000..3e9e4bc0c --- /dev/null +++ b/packages/databricks-vscode/src/language/MsPythonExtensionWrapper.test.ts @@ -0,0 +1,170 @@ +import * as assert from "assert"; +import {Disposable, Extension, OutputChannel, Uri} from "vscode"; +import {MsPythonExtensionWrapper} from "./MsPythonExtensionWrapper"; +import {IExtensionApi as MsPythonExtensionApi} from "./MsPythonExtensionApi"; +import {WorkspaceFolderManager} from "../vscode-objs/WorkspaceFolderManager"; +import {StateStorage} from "../vscode-objs/StateStorage"; + +const PIP_MISSING_ERROR = new Error( + "Command failed: python -m pip list --format json\n" + + "/project/.venv/bin/python: No module named pip" +); + +const PIP_LIST_OUTPUT = JSON.stringify([ + {name: "databricks-connect", version: "17.3.1"}, +]); + +class TestableWrapper extends MsPythonExtensionWrapper { + execCalls: Array<{command: string; args: string[]}> = []; + runCalls: Array<{command: string; args: string[]}> = []; + /** Simulates an environment without pip until ensurepip is executed */ + pipIsMissing = false; + execError?: Error; + usesUv = false; + + protected override async execCommand(command: string, args: string[]) { + this.execCalls.push({command, args}); + if (this.execError) { + throw this.execError; + } + if (this.pipIsMissing && args.includes("pip")) { + throw PIP_MISSING_ERROR; + } + return {stdout: PIP_LIST_OUTPUT}; + } + + override async runWithOutput( + command: string, + args: string[], + // eslint-disable-next-line @typescript-eslint/no-unused-vars + outputChannel?: OutputChannel + ) { + this.runCalls.push({command, args}); + if (args.includes("ensurepip")) { + this.pipIsMissing = false; + return; + } + if (this.pipIsMissing && args.includes("pip")) { + throw PIP_MISSING_ERROR; + } + } + + override async isUsingUv() { + return this.usesUv; + } + + get ensurepipCalls() { + return this.runCalls.filter((c) => c.args.includes("ensurepip")); + } +} + +describe(__filename, () => { + let wrapper: TestableWrapper; + // Use the node executable as a stand-in for python: pythonEnvironment + // requires the interpreter path to exist on disk. + const executablePath = process.execPath; + + beforeEach(() => { + const api = { + settings: { + onDidChangeExecutionDetails: () => new Disposable(() => {}), + }, + environments: { + getActiveEnvironmentPath: () => ({ + id: executablePath, + path: executablePath, + }), + resolveEnvironment: async () => ({ + id: executablePath, + path: executablePath, + executable: {uri: Uri.file(executablePath)}, + environment: {name: ".venv"}, + version: {major: 3, minor: 12, micro: 0}, + }), + }, + } as unknown as MsPythonExtensionApi; + const extension = { + exports: api, + } as unknown as Extension; + const workspaceFolderManager = { + activeProjectUri: Uri.file("/project"), + } as unknown as WorkspaceFolderManager; + wrapper = new TestableWrapper( + extension, + workspaceFolderManager, + {} as unknown as StateStorage + ); + }); + + describe("getPackageDetailsFromEnvironment", () => { + it("should seed pip with ensurepip when pip is missing", async () => { + wrapper.pipIsMissing = true; + const details = + await wrapper.getPackageDetailsFromEnvironment( + "databricks-connect" + ); + assert.deepStrictEqual(details, { + name: "databricks-connect", + version: "17.3.1", + }); + assert.strictEqual(wrapper.ensurepipCalls.length, 1); + assert.deepStrictEqual(wrapper.ensurepipCalls[0], { + command: executablePath, + args: ["-m", "ensurepip", "--upgrade"], + }); + assert.strictEqual(wrapper.execCalls.length, 2); + }); + + it("should not seed pip when pip works", async () => { + const details = + await wrapper.getPackageDetailsFromEnvironment( + "databricks-connect" + ); + assert.ok(details); + assert.strictEqual(wrapper.ensurepipCalls.length, 0); + assert.strictEqual(wrapper.execCalls.length, 1); + }); + + it("should rethrow errors not caused by a missing pip", async () => { + wrapper.execError = new Error("Command failed: disk full"); + await assert.rejects( + wrapper.getPackageDetailsFromEnvironment("databricks-connect"), + /disk full/ + ); + assert.strictEqual(wrapper.ensurepipCalls.length, 0); + }); + + it("should not attempt ensurepip for uv environments", async () => { + wrapper.usesUv = true; + wrapper.execError = PIP_MISSING_ERROR; + await assert.rejects( + wrapper.getPackageDetailsFromEnvironment("databricks-connect") + ); + assert.strictEqual(wrapper.ensurepipCalls.length, 0); + }); + }); + + describe("installPackageInEnvironment", () => { + it("should seed pip with ensurepip when pip is missing", async () => { + wrapper.pipIsMissing = true; + await wrapper.installPackageInEnvironment( + "databricks-connect", + "17.3.*" + ); + const installCalls = wrapper.runCalls.filter((c) => + c.args.includes("install") + ); + assert.strictEqual(wrapper.ensurepipCalls.length, 1); + assert.strictEqual(installCalls.length, 2); + }); + + it("should install without seeding when pip works", async () => { + await wrapper.installPackageInEnvironment( + "databricks-connect", + "17.3.*" + ); + assert.strictEqual(wrapper.ensurepipCalls.length, 0); + assert.strictEqual(wrapper.runCalls.length, 1); + }); + }); +}); diff --git a/packages/databricks-vscode/src/language/MsPythonExtensionWrapper.ts b/packages/databricks-vscode/src/language/MsPythonExtensionWrapper.ts index b7fb0ff69..cbac6a936 100644 --- a/packages/databricks-vscode/src/language/MsPythonExtensionWrapper.ts +++ b/packages/databricks-vscode/src/language/MsPythonExtensionWrapper.ts @@ -106,14 +106,25 @@ export class MsPythonExtensionWrapper implements Disposable { outputChannel?: OutputChannel ) { const cp = childProcess.execFile(command, args); + let stderr = ""; cp.stdout?.on("data", (data) => outputChannel?.append(data)); - cp.stderr?.on("data", (data) => outputChannel?.append(data)); + cp.stderr?.on("data", (data) => { + stderr += data; + outputChannel?.append(data); + }); return new Promise((resolve, reject) => { cp.on("exit", (code) => { if (code === 0) { resolve(); } else { - reject(new Error(`Command exited with code ${code}`)); + const stderrTail = stderr.trim().slice(-2000); + reject( + new Error( + `Command exited with code ${code}${ + stderrTail ? `: ${stderrTail}` : "" + }` + ) + ); } }); cp.on("error", reject); @@ -133,12 +144,13 @@ export class MsPythonExtensionWrapper implements Disposable { executable: string, baseArgs: string[], nativePipArgs: string[] = [] - ): Promise<{command: string; args: string[]}> { + ): Promise<{command: string; args: string[]; usesNativePip: boolean}> { const isUv = await this.isUsingUv(); if (isUv) { return { command: "uv", args: ["pip", ...baseArgs, "--python", executable], + usesNativePip: false, }; } return { @@ -151,24 +163,74 @@ export class MsPythonExtensionWrapper implements Disposable { "--disable-pip-version-check", "--no-python-version-warning", ], + usesNativePip: true, }; } + protected async execCommand( + command: string, + args: string[] + ): Promise<{stdout: string}> { + return await execFile(command, args, { + shell: false, + }); + } + + private isMissingPipError(e: unknown): boolean { + return e instanceof Error && e.message.includes("No module named pip"); + } + + /** + * Runs a pip-based command, seeding pip with ensurepip and retrying once + * if the environment has no pip (e.g. a venv created with `--without-pip` + * or by uv without `--seed`). + */ + private async runSeedingPipIfMissing( + executable: string, + usesNativePip: boolean, + fn: () => Promise, + outputChannel?: OutputChannel + ): Promise { + try { + return await fn(); + } catch (e) { + if (!usesNativePip || !this.isMissingPipError(e)) { + throw e; + } + outputChannel?.appendLine( + `pip is missing in the environment. Running: ${executable} -m ensurepip --upgrade` + ); + try { + await this.runWithOutput( + executable, + ["-m", "ensurepip", "--upgrade"], + outputChannel + ); + } catch { + // ensurepip can be unavailable too (e.g. Debian system + // pythons): the original error is more actionable. + throw e; + } + return await fn(); + } + } + async getPackageDetailsFromEnvironment(name: string) { const executable = await this.getPythonExecutable(); if (!executable) { return undefined; } - const {command, args} = await this.getPipCommandAndArgs(executable, [ - "list", - "--format", - "json", - ]); + const {command, args, usesNativePip} = await this.getPipCommandAndArgs( + executable, + ["list", "--format", "json"] + ); - const {stdout} = await execFile(command, args, { - shell: false, - }); + const {stdout} = await this.runSeedingPipIfMissing( + executable, + usesNativePip, + () => this.execCommand(command, args) + ); const data: Array<{name: string; version: string}> = JSON.parse(stdout); return data.find((item) => item.name === name); } @@ -189,13 +251,18 @@ export class MsPythonExtensionWrapper implements Disposable { throw Error("No python executable found"); } - const {command, args} = await this.getPipCommandAndArgs(executable, [ - "install", - `${name}${version ? `==${version}` : ""}`, - ]); + const {command, args, usesNativePip} = await this.getPipCommandAndArgs( + executable, + ["install", `${name}${version ? `==${version}` : ""}`] + ); outputChannel?.appendLine(`Running: ${command} ${args.join(" ")}`); - await this.runWithOutput(command, args, outputChannel); + await this.runSeedingPipIfMissing( + executable, + usesNativePip, + () => this.runWithOutput(command, args, outputChannel), + outputChannel + ); } async uninstallPackageFromEnvironment( @@ -208,14 +275,19 @@ export class MsPythonExtensionWrapper implements Disposable { return; } - const {command, args} = await this.getPipCommandAndArgs( + const {command, args, usesNativePip} = await this.getPipCommandAndArgs( executable, ["uninstall", name], ["-y"] ); outputChannel?.appendLine(`Running: ${command} ${args.join(" ")}`); - await this.runWithOutput(command, args, outputChannel); + await this.runSeedingPipIfMissing( + executable, + usesNativePip, + () => this.runWithOutput(command, args, outputChannel), + outputChannel + ); } async selectPythonInterpreter() { diff --git a/packages/databricks-vscode/src/language/computeTargetSpec.test.ts b/packages/databricks-vscode/src/language/computeTargetSpec.test.ts new file mode 100644 index 000000000..6038c77d5 --- /dev/null +++ b/packages/databricks-vscode/src/language/computeTargetSpec.test.ts @@ -0,0 +1,83 @@ +import * as assert from "assert"; +import {getRequiredPythonVersion} from "./computeTargetSpec"; + +describe(__filename, () => { + describe("serverless", () => { + it("should require python 3.12 exactly for serverless 17.3", () => { + const required = getRequiredPythonVersion({ + serverless: true, + serverlessDbconnectVersion: "17.3", + }); + assert.deepStrictEqual(required, { + major: 3, + minor: 12, + exact: true, + display: "3.12", + source: "the serverless environment", + }); + }); + + it("should require python 3.12 exactly for serverless 16.4", () => { + const required = getRequiredPythonVersion({ + serverless: true, + serverlessDbconnectVersion: "16.4", + }); + assert.strictEqual(required?.display, "3.12"); + assert.strictEqual(required?.exact, true); + }); + + it("should require python 3.11 exactly for serverless 15.4", () => { + const required = getRequiredPythonVersion({ + serverless: true, + serverlessDbconnectVersion: "15.4", + }); + assert.strictEqual(required?.display, "3.11"); + assert.strictEqual(required?.exact, true); + }); + + it("should return undefined for an unparsable serverless version", () => { + const required = getRequiredPythonVersion({ + serverless: true, + serverlessDbconnectVersion: "latest", + }); + assert.strictEqual(required, undefined); + }); + }); + + describe("clusters", () => { + const cases: Array<{dbr: (number | "x")[]; display?: string}> = [ + {dbr: [13, 3], display: "3.10"}, + {dbr: [14, 3], display: "3.10"}, + {dbr: [15, 4], display: "3.11"}, + {dbr: [16, 4], display: "3.12"}, + {dbr: [17, 0], display: "3.12"}, + {dbr: [15, "x"], display: "3.11"}, + {dbr: ["x", "x"], display: undefined}, + {dbr: [12, 2], display: undefined}, + ]; + for (const {dbr, display} of cases) { + it(`should require python ${display} for DBR ${dbr.join( + "." + )}`, () => { + const required = getRequiredPythonVersion({ + serverless: false, + serverlessDbconnectVersion: "17.3", + dbrVersion: dbr, + }); + assert.strictEqual(required?.display, display); + if (required) { + assert.strictEqual(required.exact, false); + assert.strictEqual(required.source, `DBR ${dbr[0]}`); + } + }); + } + + it("should return undefined without a cluster", () => { + const required = getRequiredPythonVersion({ + serverless: false, + serverlessDbconnectVersion: "17.3", + }); + assert.strictEqual(required, undefined); + }); + }); +}); diff --git a/packages/databricks-vscode/src/language/computeTargetSpec.ts b/packages/databricks-vscode/src/language/computeTargetSpec.ts new file mode 100644 index 000000000..1dce6e984 --- /dev/null +++ b/packages/databricks-vscode/src/language/computeTargetSpec.ts @@ -0,0 +1,78 @@ +/** + * Mapping between the selected compute (cluster DBR or serverless) and the + * local Python version required by Databricks Connect. Databricks Connect + * serializes UDFs with pickle, so the local Python minor version has to match + * the Python on the remote compute. + */ + +export interface RequiredPythonVersion { + major: number; + minor: number; + /** + * Whether the local minor version must match exactly for the feature to + * work. True for serverless (remote Python is fixed per environment + * version). For clusters a mismatch is reported as a warning to keep + * non-UDF workloads usable. + */ + exact: boolean; + /** Human readable version, e.g. "3.12" */ + display: string; + /** What the version requirement comes from, e.g. "the serverless environment" */ + source: string; +} + +/** + * Python version shipped with each DBR (and the matching databricks-connect) + * major version. Versions not listed here (newer than the latest entry) fall + * back to the latest entry. + */ +const dbrToPythonMinorVersion: Array<{ + minDbrMajor: number; + pythonMinor: number; +}> = [ + {minDbrMajor: 16, pythonMinor: 12}, + {minDbrMajor: 15, pythonMinor: 11}, + {minDbrMajor: 13, pythonMinor: 10}, +]; + +function pythonMinorForDbrMajor(dbrMajor: number): number | undefined { + return dbrToPythonMinorVersion.find((m) => dbrMajor >= m.minDbrMajor) + ?.pythonMinor; +} + +export function getRequiredPythonVersion(input: { + serverless: boolean; + /** databricks.connect.serverlessDbconnectVersion setting, e.g. "17.3" */ + serverlessDbconnectVersion: string; + /** cluster DBR version parts, e.g. [15, 4] */ + dbrVersion?: (number | "x")[]; +}): RequiredPythonVersion | undefined { + let dbrMajor: number | "x" | undefined; + let exact = false; + let source: string; + if (input.serverless) { + dbrMajor = parseInt(input.serverlessDbconnectVersion.split(".")[0], 10); + if (isNaN(dbrMajor)) { + return undefined; + } + exact = true; + source = "the serverless environment"; + } else { + dbrMajor = input.dbrVersion?.[0]; + source = `DBR ${dbrMajor}`; + } + if (dbrMajor === undefined || dbrMajor === "x") { + return undefined; + } + const minor = pythonMinorForDbrMajor(dbrMajor); + if (minor === undefined) { + return undefined; + } + return { + major: 3, + minor, + exact, + display: `3.${minor}`, + source, + }; +} diff --git a/packages/databricks-vscode/src/run/DatabricksDebugConfigurationProvider.ts b/packages/databricks-vscode/src/run/DatabricksDebugConfigurationProvider.ts index 76b2adc5f..ff08684fb 100644 --- a/packages/databricks-vscode/src/run/DatabricksDebugConfigurationProvider.ts +++ b/packages/databricks-vscode/src/run/DatabricksDebugConfigurationProvider.ts @@ -13,6 +13,8 @@ export interface DatabricksPythonDebugConfiguration extends DebugConfiguration { env?: Record; isInternalDatabricksRun?: boolean; console?: "integratedTerminal" | "externalTerminal" | "internalConsole"; + /** Path to the python interpreter debugpy should use */ + python?: string; } function isTest(debugConfiguration: DebugConfiguration) { diff --git a/packages/databricks-vscode/src/run/RunCommands.test.ts b/packages/databricks-vscode/src/run/RunCommands.test.ts new file mode 100644 index 000000000..dd23c765d --- /dev/null +++ b/packages/databricks-vscode/src/run/RunCommands.test.ts @@ -0,0 +1,146 @@ +import * as assert from "assert"; +import {commands, ExtensionContext, Uri} from "vscode"; +import {anything, instance, mock, verify, when} from "ts-mockito"; +import {RunCommands} from "./RunCommands"; +import {ConnectionManager} from "../configuration/ConnectionManager"; +import {MsPythonExtensionWrapper} from "../language/MsPythonExtensionWrapper"; +import {FeatureManager, FeatureState} from "../feature-manager/FeatureManager"; +import {CustomWhenContext} from "../vscode-objs/CustomWhenContext"; +import {WorkspaceFolderManager} from "../vscode-objs/WorkspaceFolderManager"; +import {Telemetry} from "../telemetry"; + +function featureState(available: boolean, executable?: string): FeatureState { + return { + available, + steps: new Map([ + [ + "checkPythonEnvironment", + { + id: "checkPythonEnvironment", + available, + title: "Active Environment: .venv", + message: executable, + }, + ], + ]), + }; +} + +describe(__filename, () => { + let featureManagerMock: FeatureManager; + let pythonExtensionMock: MsPythonExtensionWrapper; + let connectionManagerMock: ConnectionManager; + let runCommands: RunCommands; + + beforeEach(() => { + featureManagerMock = mock(FeatureManager); + pythonExtensionMock = mock(MsPythonExtensionWrapper); + connectionManagerMock = mock(ConnectionManager); + when(connectionManagerMock.state).thenReturn("CONNECTED"); + runCommands = new RunCommands( + instance(connectionManagerMock), + instance(mock(WorkspaceFolderManager)), + instance(pythonExtensionMock), + instance(featureManagerMock), + {subscriptions: []} as unknown as ExtensionContext, + instance(mock(CustomWhenContext)), + instance(mock(Telemetry)) + ); + }); + + describe("createDbconnectDebugConfig", () => { + it("should pin debugpy to the verified python executable", () => { + const config = runCommands.createDbconnectDebugConfig( + Uri.file("/project/src/main.py"), + "/project/.venv/bin/python" + ); + assert.strictEqual(config.type, "debugpy"); + assert.strictEqual(config.databricks, true); + assert.strictEqual( + config.program, + Uri.file("/project/src/main.py").fsPath + ); + assert.strictEqual(config.python, "/project/.venv/bin/python"); + }); + }); + + describe("checkDbconnectEnabled", () => { + it("should pass when the verified interpreter is still selected", async () => { + when( + featureManagerMock.isEnabled("environment.dependencies") + ).thenResolve(featureState(true, "/project/.venv/bin/python")); + when(pythonExtensionMock.getPythonExecutable()).thenResolve( + "/project/.venv/bin/python" + ); + + const result = await runCommands["checkDbconnectEnabled"](); + + assert.strictEqual(result, true); + verify( + featureManagerMock.isEnabled("environment.dependencies", true) + ).never(); + }); + + it("should re-verify when the selected interpreter changed", async () => { + when( + featureManagerMock.isEnabled("environment.dependencies") + ).thenResolve(featureState(true, "/old/python")); + when( + featureManagerMock.isEnabled("environment.dependencies", true) + ).thenResolve(featureState(true, "/project/.venv/bin/python")); + when(pythonExtensionMock.getPythonExecutable()).thenResolve( + "/project/.venv/bin/python" + ); + + const result = await runCommands["checkDbconnectEnabled"](); + + assert.strictEqual(result, true); + verify( + featureManagerMock.isEnabled("environment.dependencies", true) + ).once(); + }); + + it("should not re-verify while disconnected", async () => { + when(connectionManagerMock.state).thenReturn("DISCONNECTED"); + when( + featureManagerMock.isEnabled("environment.dependencies") + ).thenResolve(featureState(true, "/old/python")); + when(pythonExtensionMock.getPythonExecutable()).thenResolve( + "/project/.venv/bin/python" + ); + + const result = await runCommands["checkDbconnectEnabled"](); + + assert.strictEqual(result, true); + verify( + featureManagerMock.isEnabled("environment.dependencies", true) + ).never(); + }); + + it("should not re-verify when the feature is unavailable", async () => { + when(featureManagerMock.isEnabled(anything())).thenResolve( + featureState(false, undefined) + ); + when(pythonExtensionMock.getPythonExecutable()).thenResolve( + undefined + ); + // Stub the setup command flow that runs for unavailable features. + const registration = commands.registerCommand( + "databricks.environment.setup", + () => false + ); + try { + const result = await runCommands["checkDbconnectEnabled"](); + assert.strictEqual(result, false); + verify( + featureManagerMock.isEnabled( + "environment.dependencies", + true + ) + ).never(); + } finally { + registration.dispose(); + } + }); + }); +}); diff --git a/packages/databricks-vscode/src/run/RunCommands.ts b/packages/databricks-vscode/src/run/RunCommands.ts index a4f2e869e..1130b90bf 100644 --- a/packages/databricks-vscode/src/run/RunCommands.ts +++ b/packages/databricks-vscode/src/run/RunCommands.ts @@ -5,7 +5,7 @@ import {LocalUri} from "../sync/SyncDestination"; import {DatabricksPythonDebugConfiguration} from "./DatabricksDebugConfigurationProvider"; import {MsPythonExtensionWrapper} from "../language/MsPythonExtensionWrapper"; import path from "path"; -import {FeatureManager} from "../feature-manager/FeatureManager"; +import {FeatureManager, FeatureState} from "../feature-manager/FeatureManager"; import { escapeExecutableForTerminal, escapePathArgument, @@ -113,15 +113,44 @@ export class RunCommands { } private async checkDbconnectEnabled() { - const featureState = await this.featureManager.isEnabled( + let featureState = await this.featureManager.isEnabled( "environment.dependencies" ); + if ( + featureState.available && + // Re-checks block on an established connection, so don't force + // one while disconnected. + this.connection.state === "CONNECTED" && + (await this.isPythonEnvironmentStale(featureState)) + ) { + // The Python extension doesn't always notify us about interpreter + // changes, so the cached state can refer to a previously selected + // interpreter. Re-check before launching anything with it. + featureState = await this.featureManager.isEnabled( + "environment.dependencies", + true + ); + } if (featureState.available) { return true; } return await commands.executeCommand("databricks.environment.setup"); } + private async isPythonEnvironmentStale(featureState: FeatureState) { + // For an accepted checkPythonEnvironment step the message holds + // the path of the verified python executable. + const verifiedExecutable = featureState.steps.get( + "checkPythonEnvironment" + )?.message; + const currentExecutable = + await this.pythonExtension.getPythonExecutable(); + return ( + verifiedExecutable !== undefined && + verifiedExecutable !== currentExecutable + ); + } + private getTargetResource(resource?: Uri): Uri | undefined { if (!resource && window.activeTextEditor) { return window.activeTextEditor.document.uri; @@ -129,6 +158,26 @@ export class RunCommands { return resource; } + createDbconnectDebugConfig( + targetResource: Uri, + executable: string + ): DatabricksPythonDebugConfiguration { + return { + program: targetResource.fsPath, + type: "debugpy", + name: "Databricks Connect", + request: "launch", + databricks: true, + console: "integratedTerminal", + env: {...process.env}, + // Pin debugpy to the interpreter we have verified. Without this + // debugpy falls back to the Python extension's selected + // interpreter for the workspace folder, which can differ from the + // environment used by "run" and by the verification checks. + python: executable, + }; + } + async debugFileUsingDbconnect(resource?: Uri) { if (!(await this.checkDbconnectEnabled())) { return; @@ -139,15 +188,16 @@ export class RunCommands { window.showErrorMessage("Open a file to run"); return; } - const config: DatabricksPythonDebugConfiguration = { - program: targetResource.fsPath, - type: "debugpy", - name: "Databricks Connect", - request: "launch", - databricks: true, - console: "integratedTerminal", - env: {...process.env}, - }; + + const executable = await this.pythonExtension.getPythonExecutable(); + if (!executable) { + window.showErrorMessage("No python executable found"); + return; + } + const config = this.createDbconnectDebugConfig( + targetResource, + executable + ); debug.startDebugging( this.workspaceFolderManager.activeWorkspaceFolder,