Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 24 additions & 3 deletions packages/databricks-vscode/src/extension.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ import {BundleVariableTreeDataProvider} from "./ui/bundle-variables/BundleVariab
import {ConfigurationTreeViewManager} from "./ui/configuration-view/ConfigurationTreeViewManager";
import {getCLIDependenciesEnvVars} from "./utils/envVarGenerators";
import {EnvironmentCommands} from "./language/EnvironmentCommands";
import {PackageManagerTelemetry} from "./language/PackageManagerTelemetry";
import {WorkspaceFolderManager} from "./vscode-objs/WorkspaceFolderManager";
import {SyncCommands} from "./sync/SyncCommands";
import {CodeSynchronizer} from "./sync";
Expand Down Expand Up @@ -335,6 +336,23 @@ export async function activate(
customWhenContext,
telemetry
);
const packageManagerTelemetry = new PackageManagerTelemetry(
telemetry,
pythonExtensionWrapper,
() => {
try {
return workspaceFolderManager.activeProjectUri.fsPath;
} catch (e) {
return undefined;
}
},
() => {
if (connectionManager.serverless) {
return "serverless";
}
return connectionManager.cluster ? "cluster" : "none";
}
);
context.subscriptions.push(
bundleFileWatcher,
bundleValidateModel,
Expand Down Expand Up @@ -609,13 +627,15 @@ export async function activate(
connectionManager,
pythonExtensionWrapper,
environmentDependenciesInstaller,
configureAutocomplete
configureAutocomplete,
packageManagerTelemetry
)
);
const environmentCommands = new EnvironmentCommands(
featureManager,
pythonExtensionWrapper,
environmentDependenciesInstaller
environmentDependenciesInstaller,
packageManagerTelemetry
);
context.subscriptions.push(
telemetry.registerCommand(
Expand Down Expand Up @@ -993,7 +1013,8 @@ export async function activate(
featureManager,
context,
customWhenContext,
telemetry
telemetry,
packageManagerTelemetry
);
const debugFactory = new DatabricksDebugAdapterFactory(
connectionManager,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,19 @@ import {Cluster} from "../sdk-extensions";
import {EnvironmentDependenciesInstaller} from "./EnvironmentDependenciesInstaller";
import {Environment} from "./MsPythonExtensionApi";
import {environmentName} from "../utils/environmentUtils";
import {PackageManagerTelemetry} from "./PackageManagerTelemetry";

export class EnvironmentCommands {
constructor(
private featureManager: FeatureManager,
private pythonExtension: MsPythonExtensionWrapper,
private installer: EnvironmentDependenciesInstaller
private installer: EnvironmentDependenciesInstaller,
private packageManagerTelemetry: PackageManagerTelemetry
) {}

async setup(stepId?: string) {
commands.executeCommand("configurationView.focus");
void this.packageManagerTelemetry.emitDetection("explicit_command");
await window.withProgress(
{location: {viewId: "configurationView"}},
() => this._setup(stepId)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import {ResolvedEnvironment} from "./MsPythonExtensionApi";
import {NamedLogger} from "@databricks/sdk-experimental/dist/logging";
import {ConfigureAutocomplete} from "./ConfigureAutocomplete";
import {workspaceConfigs} from "../vscode-objs/WorkspaceConfigs";
import {PackageManagerTelemetry} from "./PackageManagerTelemetry";

export class EnvironmentDependenciesVerifier extends MultiStepAccessVerifier {
private readonly logger = NamedLogger.getOrCreate(Loggers.Extension);
Expand All @@ -18,7 +19,8 @@ export class EnvironmentDependenciesVerifier extends MultiStepAccessVerifier {
private readonly connectionManager: ConnectionManager,
private readonly pythonExtension: MsPythonExtensionWrapper,
private readonly installer: EnvironmentDependenciesInstaller,
private readonly configureAutocomplete: ConfigureAutocomplete
private readonly configureAutocomplete: ConfigureAutocomplete,
private readonly packageManagerTelemetry: PackageManagerTelemetry
) {
super([
"checkCluster",
Expand Down Expand Up @@ -403,6 +405,9 @@ export class EnvironmentDependenciesVerifier extends MultiStepAccessVerifier {
}

override async check() {
// First environment check on project open: emit package-manager
// detection telemetry (deduplicated per session, never throws).
void this.packageManagerTelemetry.emitDetection("auto_open");
await this.connectionManager.waitForConnect();
await Promise.all([
this.checkCluster(this.connectionManager.cluster),
Expand Down
251 changes: 251 additions & 0 deletions packages/databricks-vscode/src/language/PackageManagerTelemetry.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
import fs from "node:fs";
import path from "node:path";
import {NamedLogger} from "@databricks/sdk-experimental/dist/logging";
import {Loggers} from "../logger";
import {Telemetry} from "../telemetry";
import {ComputeType, SetupTrigger} from "../telemetry/constants";
import "../telemetry/packageManagerExtensions";
import {MsPythonExtensionWrapper} from "./MsPythonExtensionWrapper";
import {ResolvedEnvironment} from "./MsPythonExtensionApi";
import {
detectPackageManagers,
interpreterUnderCondaPrefix,
InterpreterSource,
PackageManagerSignals,
pyprojectHasToolSection,
pyvenvCfgMarksUv,
} from "./packageManagerDetection";

export type {SetupTrigger};

/**
* Collects package-manager signals at project-setup touchpoints and emits the
* {@link Events.PYTHON_ENV_SETUP_DETECTED} telemetry event.
*
* All probing is best-effort and non-blocking: any failure degrades to
* `unknown` and is swallowed, never thrown into the user's setup/run flow.
* Only categorical/enum data is emitted — no paths, package names, or other
* free-form content (see {@link detectPackageManagers}). Telemetry opt-out is
* honoured by the underlying {@link Telemetry} client.
*/
export class PackageManagerTelemetry {
private readonly logger = NamedLogger.getOrCreate(Loggers.Extension);

/**
* Triggers already emitted for the current `(project, trigger)` pair, to
* deduplicate within a session so one project open doesn't inflate counts.
*/
private readonly emitted = new Set<string>();

constructor(
private readonly telemetry: Telemetry,
private readonly pythonExtension: MsPythonExtensionWrapper,
private readonly getProjectRoot: () => string | undefined,
private readonly getComputeType: () => ComputeType | "none"
) {}

/**
* Detect the package manager(s) for the active project and emit telemetry.
* Deduplicated per `(project root, trigger)` within the session. Never
* throws.
*/
async emitDetection(trigger: SetupTrigger): Promise<void> {
try {
const projectRoot = this.getProjectRoot();
if (projectRoot === undefined) {
return;
}
const dedupeKey = `${trigger}:${projectRoot}`;
if (this.emitted.has(dedupeKey)) {
return;
}
this.emitted.add(dedupeKey);

const env = await this.resolveEnvironment();
const signals = this.collectSignals(projectRoot, env);
const detection = detectPackageManagers(signals);

this.telemetry.recordPackageManagerDetection(detection, {
pythonVersion: this.getPythonMinorVersion(env),
targetCompute: this.getComputeType(),
trigger,
});
} catch (e) {
// Detection is measurement-only and must never disrupt setup.
this.logger.debug("Package manager detection failed", e);
}
}

private async resolveEnvironment(): Promise<
ResolvedEnvironment | undefined
> {
try {
return await this.pythonExtension.pythonEnvironment;
} catch (e) {
this.logger.debug("Failed to resolve python environment", e);
return undefined;
}
}

/** Detected interpreter minor version (e.g. "3.11"), if available. */
private getPythonMinorVersion(
env: ResolvedEnvironment | undefined
): string | undefined {
const version = env?.version;
if (version?.major === undefined || version.minor === undefined) {
return undefined;
}
return `${version.major}.${version.minor}`;
}

/**
* Classify the *active interpreter's* provenance from the resolved
* environment alone. This is deliberately independent of project files: a
* project carrying `uv.lock` but running a conda/venv/system interpreter
* must report that interpreter's real source, so the setup-flow gap ("uv
* project, interpreter not uv-managed yet") stays visible. `uv.lock` is
* still captured as a strong *project* signal via `hasUvLock`.
*/
private getInterpreterSource(
env: ResolvedEnvironment | undefined
): InterpreterSource {
if (env?.environment === undefined) {
// No managed environment: a global/system interpreter.
return env ? "system" : "unknown";
}

const tools = env.tools ?? [];
if (env.environment.type === "Conda" || tools.includes("Conda")) {
return "conda";
}
if (
tools.includes("Venv") ||
tools.includes("VirtualEnv") ||
tools.includes("Poetry") ||
tools.includes("Pipenv") ||
env.environment.type === "VirtualEnvironment"
) {
// The MS Python extension reports uv-created venvs as plain virtual
// environments. Distinguish a genuinely uv-provisioned interpreter
// by the `uv = <version>` line uv writes into pyvenv.cfg -- this is
// interpreter provenance, not the mere presence of uv.lock.
return this.isUvCreatedVenv(env) ? "uv" : "venv";
}
return "unknown";
}

/**
* True if the active venv's pyvenv.cfg marks it as uv-created. Thin fs
* wrapper around the pure {@link pyvenvCfgMarksUv}.
*/
private isUvCreatedVenv(env: ResolvedEnvironment): boolean {
try {
const sysPrefix = env.executable.sysPrefix;
if (!sysPrefix) {
return false;
}
const cfg = path.join(sysPrefix, "pyvenv.cfg");
if (!fs.existsSync(cfg)) {
return false;
}
return pyvenvCfgMarksUv(fs.readFileSync(cfg, "utf-8"));
} catch (e) {
this.logger.debug("Failed to read pyvenv.cfg", e);
return false;
}
}

/**
* Gather raw signals from disk and the environment. Each probe is guarded
* so a single failure degrades that signal to absent rather than aborting.
*/
private collectSignals(
projectRoot: string,
env: ResolvedEnvironment | undefined
): PackageManagerSignals {
const exists = (file: string) => this.fileExists(projectRoot, file);
const interpreterSource = this.getInterpreterSource(env);
const pyproject = this.readPyproject(projectRoot);

const hasPyprojectToolUv = pyprojectHasToolSection(pyproject, "uv");
const hasPyprojectToolPoetry = pyprojectHasToolSection(
pyproject,
"poetry"
);
const hasPyprojectPipOnly =
pyproject !== undefined &&
!hasPyprojectToolUv &&
!hasPyprojectToolPoetry;

return {
hasUvLock: exists("uv.lock"),
hasPyprojectToolUv,
// uvOnPath is intentionally left unset: it is a weak signal that
// never attributes a project to uv, and probing it would mean
// executing a PATH-resolved `uv` binary purely for telemetry.
hasPoetryLock: exists("poetry.lock"),
hasPyprojectToolPoetry,
poetryOnPath: undefined,
hasRequirementsTxt: this.hasRequirementsTxt(projectRoot),
hasConstraintsTxt: exists("constraints.txt"),
hasPyprojectPipOnly,
hasCondaEnvFile:
exists("environment.yml") || exists("environment.yaml"),
hasCondaPrefix: this.hasActiveCondaInterpreter(env),
interpreterSource,
};
}

/**
* Whether the *active interpreter* lives under `CONDA_PREFIX`.
*
* We deliberately do NOT fire on the bare presence of `CONDA_PREFIX` /
* `CONDA_DEFAULT_ENV`: those are session-global in the extension host (set
* for every project when VS Code is launched from an activated conda
* shell), so using them directly would over-count conda for uv/poetry/pip
* projects. Requiring the active interpreter to reside under the prefix
* keeps this a project-scoped signal.
*/
private hasActiveCondaInterpreter(
env: ResolvedEnvironment | undefined
): boolean {
return interpreterUnderCondaPrefix(
env?.executable.sysPrefix,
process.env["CONDA_PREFIX"]
);
}

private fileExists(projectRoot: string, file: string): boolean {
try {
return fs.existsSync(path.join(projectRoot, file));
} catch (e) {
this.logger.debug(`Failed to stat ${file}`, e);
return false;
}
}

/** True if any `requirements*.txt` file exists in the project root. */
private hasRequirementsTxt(projectRoot: string): boolean {
try {
return fs
.readdirSync(projectRoot)
.some((name) => /^requirements.*\.txt$/.test(name));
} catch (e) {
this.logger.debug("Failed to list project root", e);
return false;
}
}

private readPyproject(projectRoot: string): string | undefined {
try {
const file = path.join(projectRoot, "pyproject.toml");
if (!fs.existsSync(file)) {
return undefined;
}
return fs.readFileSync(file, "utf-8");
} catch (e) {
this.logger.debug("Failed to read pyproject.toml", e);
return undefined;
}
}
}
Loading
Loading