diff --git a/packages/databricks-vscode/src/extension.ts b/packages/databricks-vscode/src/extension.ts index 69ca1fc94..83e0bcdaf 100644 --- a/packages/databricks-vscode/src/extension.ts +++ b/packages/databricks-vscode/src/extension.ts @@ -75,6 +75,7 @@ import {BundleVariableTreeDataProvider} from "./ui/bundle-variables/BundleVariab import {ConfigurationTreeViewManager} from "./ui/configuration-view/ConfigurationTreeViewManager"; import {getCLIDependenciesEnvVars} from "./utils/envVarGenerators"; import {EnvironmentCommands} from "./language/EnvironmentCommands"; +import {PackageManagerTelemetry} from "./language/PackageManagerTelemetry"; import {WorkspaceFolderManager} from "./vscode-objs/WorkspaceFolderManager"; import {SyncCommands} from "./sync/SyncCommands"; import {CodeSynchronizer} from "./sync"; @@ -335,6 +336,23 @@ export async function activate( customWhenContext, telemetry ); + const packageManagerTelemetry = new PackageManagerTelemetry( + telemetry, + pythonExtensionWrapper, + () => { + try { + return workspaceFolderManager.activeProjectUri.fsPath; + } catch (e) { + return undefined; + } + }, + () => { + if (connectionManager.serverless) { + return "serverless"; + } + return connectionManager.cluster ? "cluster" : "none"; + } + ); context.subscriptions.push( bundleFileWatcher, bundleValidateModel, @@ -609,13 +627,15 @@ export async function activate( connectionManager, pythonExtensionWrapper, environmentDependenciesInstaller, - configureAutocomplete + configureAutocomplete, + packageManagerTelemetry ) ); const environmentCommands = new EnvironmentCommands( featureManager, pythonExtensionWrapper, - environmentDependenciesInstaller + environmentDependenciesInstaller, + packageManagerTelemetry ); context.subscriptions.push( telemetry.registerCommand( @@ -993,7 +1013,8 @@ export async function activate( featureManager, context, customWhenContext, - telemetry + telemetry, + packageManagerTelemetry ); const debugFactory = new DatabricksDebugAdapterFactory( connectionManager, diff --git a/packages/databricks-vscode/src/language/EnvironmentCommands.ts b/packages/databricks-vscode/src/language/EnvironmentCommands.ts index 5fc8e9cf5..4e7bc29de 100644 --- a/packages/databricks-vscode/src/language/EnvironmentCommands.ts +++ b/packages/databricks-vscode/src/language/EnvironmentCommands.ts @@ -5,16 +5,19 @@ import {Cluster} from "../sdk-extensions"; import {EnvironmentDependenciesInstaller} from "./EnvironmentDependenciesInstaller"; import {Environment} from "./MsPythonExtensionApi"; import {environmentName} from "../utils/environmentUtils"; +import {PackageManagerTelemetry} from "./PackageManagerTelemetry"; export class EnvironmentCommands { constructor( private featureManager: FeatureManager, private pythonExtension: MsPythonExtensionWrapper, - private installer: EnvironmentDependenciesInstaller + private installer: EnvironmentDependenciesInstaller, + private packageManagerTelemetry: PackageManagerTelemetry ) {} async setup(stepId?: string) { commands.executeCommand("configurationView.focus"); + void this.packageManagerTelemetry.emitDetection("explicit_command"); await window.withProgress( {location: {viewId: "configurationView"}}, () => this._setup(stepId) diff --git a/packages/databricks-vscode/src/language/EnvironmentDependenciesVerifier.ts b/packages/databricks-vscode/src/language/EnvironmentDependenciesVerifier.ts index 08810e009..6e6102b3b 100644 --- a/packages/databricks-vscode/src/language/EnvironmentDependenciesVerifier.ts +++ b/packages/databricks-vscode/src/language/EnvironmentDependenciesVerifier.ts @@ -10,6 +10,7 @@ import {ResolvedEnvironment} from "./MsPythonExtensionApi"; import {NamedLogger} from "@databricks/sdk-experimental/dist/logging"; import {ConfigureAutocomplete} from "./ConfigureAutocomplete"; import {workspaceConfigs} from "../vscode-objs/WorkspaceConfigs"; +import {PackageManagerTelemetry} from "./PackageManagerTelemetry"; export class EnvironmentDependenciesVerifier extends MultiStepAccessVerifier { private readonly logger = NamedLogger.getOrCreate(Loggers.Extension); @@ -18,7 +19,8 @@ export class EnvironmentDependenciesVerifier extends MultiStepAccessVerifier { private readonly connectionManager: ConnectionManager, private readonly pythonExtension: MsPythonExtensionWrapper, private readonly installer: EnvironmentDependenciesInstaller, - private readonly configureAutocomplete: ConfigureAutocomplete + private readonly configureAutocomplete: ConfigureAutocomplete, + private readonly packageManagerTelemetry: PackageManagerTelemetry ) { super([ "checkCluster", @@ -403,6 +405,9 @@ export class EnvironmentDependenciesVerifier extends MultiStepAccessVerifier { } override async check() { + // First environment check on project open: emit package-manager + // detection telemetry (deduplicated per session, never throws). + void this.packageManagerTelemetry.emitDetection("auto_open"); await this.connectionManager.waitForConnect(); await Promise.all([ this.checkCluster(this.connectionManager.cluster), diff --git a/packages/databricks-vscode/src/language/PackageManagerTelemetry.ts b/packages/databricks-vscode/src/language/PackageManagerTelemetry.ts new file mode 100644 index 000000000..c558462a4 --- /dev/null +++ b/packages/databricks-vscode/src/language/PackageManagerTelemetry.ts @@ -0,0 +1,251 @@ +import fs from "node:fs"; +import path from "node:path"; +import {NamedLogger} from "@databricks/sdk-experimental/dist/logging"; +import {Loggers} from "../logger"; +import {Telemetry} from "../telemetry"; +import {ComputeType, SetupTrigger} from "../telemetry/constants"; +import "../telemetry/packageManagerExtensions"; +import {MsPythonExtensionWrapper} from "./MsPythonExtensionWrapper"; +import {ResolvedEnvironment} from "./MsPythonExtensionApi"; +import { + detectPackageManagers, + interpreterUnderCondaPrefix, + InterpreterSource, + PackageManagerSignals, + pyprojectHasToolSection, + pyvenvCfgMarksUv, +} from "./packageManagerDetection"; + +export type {SetupTrigger}; + +/** + * Collects package-manager signals at project-setup touchpoints and emits the + * {@link Events.PYTHON_ENV_SETUP_DETECTED} telemetry event. + * + * All probing is best-effort and non-blocking: any failure degrades to + * `unknown` and is swallowed, never thrown into the user's setup/run flow. + * Only categorical/enum data is emitted — no paths, package names, or other + * free-form content (see {@link detectPackageManagers}). Telemetry opt-out is + * honoured by the underlying {@link Telemetry} client. + */ +export class PackageManagerTelemetry { + private readonly logger = NamedLogger.getOrCreate(Loggers.Extension); + + /** + * Triggers already emitted for the current `(project, trigger)` pair, to + * deduplicate within a session so one project open doesn't inflate counts. + */ + private readonly emitted = new Set(); + + constructor( + private readonly telemetry: Telemetry, + private readonly pythonExtension: MsPythonExtensionWrapper, + private readonly getProjectRoot: () => string | undefined, + private readonly getComputeType: () => ComputeType | "none" + ) {} + + /** + * Detect the package manager(s) for the active project and emit telemetry. + * Deduplicated per `(project root, trigger)` within the session. Never + * throws. + */ + async emitDetection(trigger: SetupTrigger): Promise { + try { + const projectRoot = this.getProjectRoot(); + if (projectRoot === undefined) { + return; + } + const dedupeKey = `${trigger}:${projectRoot}`; + if (this.emitted.has(dedupeKey)) { + return; + } + this.emitted.add(dedupeKey); + + const env = await this.resolveEnvironment(); + const signals = this.collectSignals(projectRoot, env); + const detection = detectPackageManagers(signals); + + this.telemetry.recordPackageManagerDetection(detection, { + pythonVersion: this.getPythonMinorVersion(env), + targetCompute: this.getComputeType(), + trigger, + }); + } catch (e) { + // Detection is measurement-only and must never disrupt setup. + this.logger.debug("Package manager detection failed", e); + } + } + + private async resolveEnvironment(): Promise< + ResolvedEnvironment | undefined + > { + try { + return await this.pythonExtension.pythonEnvironment; + } catch (e) { + this.logger.debug("Failed to resolve python environment", e); + return undefined; + } + } + + /** Detected interpreter minor version (e.g. "3.11"), if available. */ + private getPythonMinorVersion( + env: ResolvedEnvironment | undefined + ): string | undefined { + const version = env?.version; + if (version?.major === undefined || version.minor === undefined) { + return undefined; + } + return `${version.major}.${version.minor}`; + } + + /** + * Classify the *active interpreter's* provenance from the resolved + * environment alone. This is deliberately independent of project files: a + * project carrying `uv.lock` but running a conda/venv/system interpreter + * must report that interpreter's real source, so the setup-flow gap ("uv + * project, interpreter not uv-managed yet") stays visible. `uv.lock` is + * still captured as a strong *project* signal via `hasUvLock`. + */ + private getInterpreterSource( + env: ResolvedEnvironment | undefined + ): InterpreterSource { + if (env?.environment === undefined) { + // No managed environment: a global/system interpreter. + return env ? "system" : "unknown"; + } + + const tools = env.tools ?? []; + if (env.environment.type === "Conda" || tools.includes("Conda")) { + return "conda"; + } + if ( + tools.includes("Venv") || + tools.includes("VirtualEnv") || + tools.includes("Poetry") || + tools.includes("Pipenv") || + env.environment.type === "VirtualEnvironment" + ) { + // The MS Python extension reports uv-created venvs as plain virtual + // environments. Distinguish a genuinely uv-provisioned interpreter + // by the `uv = ` line uv writes into pyvenv.cfg -- this is + // interpreter provenance, not the mere presence of uv.lock. + return this.isUvCreatedVenv(env) ? "uv" : "venv"; + } + return "unknown"; + } + + /** + * True if the active venv's pyvenv.cfg marks it as uv-created. Thin fs + * wrapper around the pure {@link pyvenvCfgMarksUv}. + */ + private isUvCreatedVenv(env: ResolvedEnvironment): boolean { + try { + const sysPrefix = env.executable.sysPrefix; + if (!sysPrefix) { + return false; + } + const cfg = path.join(sysPrefix, "pyvenv.cfg"); + if (!fs.existsSync(cfg)) { + return false; + } + return pyvenvCfgMarksUv(fs.readFileSync(cfg, "utf-8")); + } catch (e) { + this.logger.debug("Failed to read pyvenv.cfg", e); + return false; + } + } + + /** + * Gather raw signals from disk and the environment. Each probe is guarded + * so a single failure degrades that signal to absent rather than aborting. + */ + private collectSignals( + projectRoot: string, + env: ResolvedEnvironment | undefined + ): PackageManagerSignals { + const exists = (file: string) => this.fileExists(projectRoot, file); + const interpreterSource = this.getInterpreterSource(env); + const pyproject = this.readPyproject(projectRoot); + + const hasPyprojectToolUv = pyprojectHasToolSection(pyproject, "uv"); + const hasPyprojectToolPoetry = pyprojectHasToolSection( + pyproject, + "poetry" + ); + const hasPyprojectPipOnly = + pyproject !== undefined && + !hasPyprojectToolUv && + !hasPyprojectToolPoetry; + + return { + hasUvLock: exists("uv.lock"), + hasPyprojectToolUv, + // uvOnPath is intentionally left unset: it is a weak signal that + // never attributes a project to uv, and probing it would mean + // executing a PATH-resolved `uv` binary purely for telemetry. + hasPoetryLock: exists("poetry.lock"), + hasPyprojectToolPoetry, + poetryOnPath: undefined, + hasRequirementsTxt: this.hasRequirementsTxt(projectRoot), + hasConstraintsTxt: exists("constraints.txt"), + hasPyprojectPipOnly, + hasCondaEnvFile: + exists("environment.yml") || exists("environment.yaml"), + hasCondaPrefix: this.hasActiveCondaInterpreter(env), + interpreterSource, + }; + } + + /** + * Whether the *active interpreter* lives under `CONDA_PREFIX`. + * + * We deliberately do NOT fire on the bare presence of `CONDA_PREFIX` / + * `CONDA_DEFAULT_ENV`: those are session-global in the extension host (set + * for every project when VS Code is launched from an activated conda + * shell), so using them directly would over-count conda for uv/poetry/pip + * projects. Requiring the active interpreter to reside under the prefix + * keeps this a project-scoped signal. + */ + private hasActiveCondaInterpreter( + env: ResolvedEnvironment | undefined + ): boolean { + return interpreterUnderCondaPrefix( + env?.executable.sysPrefix, + process.env["CONDA_PREFIX"] + ); + } + + private fileExists(projectRoot: string, file: string): boolean { + try { + return fs.existsSync(path.join(projectRoot, file)); + } catch (e) { + this.logger.debug(`Failed to stat ${file}`, e); + return false; + } + } + + /** True if any `requirements*.txt` file exists in the project root. */ + private hasRequirementsTxt(projectRoot: string): boolean { + try { + return fs + .readdirSync(projectRoot) + .some((name) => /^requirements.*\.txt$/.test(name)); + } catch (e) { + this.logger.debug("Failed to list project root", e); + return false; + } + } + + private readPyproject(projectRoot: string): string | undefined { + try { + const file = path.join(projectRoot, "pyproject.toml"); + if (!fs.existsSync(file)) { + return undefined; + } + return fs.readFileSync(file, "utf-8"); + } catch (e) { + this.logger.debug("Failed to read pyproject.toml", e); + return undefined; + } + } +} diff --git a/packages/databricks-vscode/src/language/packageManagerDetection.test.ts b/packages/databricks-vscode/src/language/packageManagerDetection.test.ts new file mode 100644 index 000000000..02e7427b5 --- /dev/null +++ b/packages/databricks-vscode/src/language/packageManagerDetection.test.ts @@ -0,0 +1,398 @@ +import {expect} from "chai"; +import { + detectPackageManagers, + interpreterUnderCondaPrefix, + PackageManagerSignals, + pyprojectHasToolSection, + pyvenvCfgMarksUv, +} from "./packageManagerDetection"; + +describe("detectPackageManagers", () => { + describe("single manager", () => { + it("detects uv from uv.lock", () => { + const result = detectPackageManagers({hasUvLock: true}); + expect(result.managers).to.deep.equal(["uv"]); + expect(result.primary).to.equal("uv"); + expect(result.signals).to.deep.equal(["uv.lock"]); + expect(result.hasLockfile).to.equal(true); + }); + + it("detects uv from [tool.uv] in pyproject", () => { + const result = detectPackageManagers({hasPyprojectToolUv: true}); + expect(result.managers).to.deep.equal(["uv"]); + expect(result.primary).to.equal("uv"); + expect(result.signals).to.deep.equal(["pyproject.tool.uv"]); + expect(result.hasLockfile).to.equal(false); + }); + + it("detects poetry from poetry.lock", () => { + const result = detectPackageManagers({hasPoetryLock: true}); + expect(result.managers).to.deep.equal(["poetry"]); + expect(result.primary).to.equal("poetry"); + expect(result.signals).to.deep.equal(["poetry.lock"]); + expect(result.hasLockfile).to.equal(true); + }); + + it("detects pip from requirements.txt", () => { + const result = detectPackageManagers({hasRequirementsTxt: true}); + expect(result.managers).to.deep.equal(["pip"]); + expect(result.primary).to.equal("pip"); + expect(result.signals).to.deep.equal(["requirements.txt"]); + expect(result.hasLockfile).to.equal(false); + }); + + it("detects pip from constraints.txt", () => { + const result = detectPackageManagers({hasConstraintsTxt: true}); + expect(result.managers).to.deep.equal(["pip"]); + expect(result.primary).to.equal("pip"); + expect(result.signals).to.deep.equal(["constraints.txt"]); + }); + + it("detects pip from a pip-only pyproject", () => { + const result = detectPackageManagers({hasPyprojectPipOnly: true}); + expect(result.managers).to.deep.equal(["pip"]); + expect(result.primary).to.equal("pip"); + expect(result.signals).to.deep.equal(["pyproject.pipOnly"]); + }); + + it("detects conda from environment.yml", () => { + const result = detectPackageManagers({hasCondaEnvFile: true}); + expect(result.managers).to.deep.equal(["conda"]); + expect(result.primary).to.equal("conda"); + expect(result.signals).to.deep.equal(["environment.yml"]); + }); + + it("detects conda from an active CONDA_PREFIX", () => { + const result = detectPackageManagers({hasCondaPrefix: true}); + expect(result.managers).to.deep.equal(["conda"]); + expect(result.primary).to.equal("conda"); + expect(result.signals).to.deep.equal(["conda.prefix"]); + }); + }); + + describe("interpreter source", () => { + it("attributes uv from a uv-created interpreter", () => { + const result = detectPackageManagers({interpreterSource: "uv"}); + expect(result.managers).to.deep.equal(["uv"]); + expect(result.primary).to.equal("uv"); + expect(result.signals).to.deep.equal(["interpreter.uv"]); + expect(result.interpreterSource).to.equal("uv"); + }); + + it("attributes conda from a conda interpreter", () => { + const result = detectPackageManagers({interpreterSource: "conda"}); + expect(result.managers).to.deep.equal(["conda"]); + expect(result.primary).to.equal("conda"); + expect(result.signals).to.deep.equal(["interpreter.conda"]); + expect(result.interpreterSource).to.equal("conda"); + }); + + it("attributes pip from a plain venv interpreter", () => { + const result = detectPackageManagers({interpreterSource: "venv"}); + expect(result.managers).to.deep.equal(["pip"]); + expect(result.primary).to.equal("pip"); + expect(result.signals).to.deep.equal(["interpreter.venv"]); + expect(result.interpreterSource).to.equal("venv"); + }); + + it("defaults interpreterSource to unknown when absent", () => { + const result = detectPackageManagers({hasUvLock: true}); + expect(result.interpreterSource).to.equal("unknown"); + }); + }); + + describe("overlaps", () => { + it("reports both uv and pip (uv.lock + requirements.txt)", () => { + const result = detectPackageManagers({ + hasUvLock: true, + hasRequirementsTxt: true, + }); + expect(result.managers).to.deep.equal(["uv", "pip"]); + // uv outranks pip as primary. + expect(result.primary).to.equal("uv"); + expect(result.signals).to.deep.equal([ + "uv.lock", + "requirements.txt", + ]); + expect(result.hasLockfile).to.equal(true); + }); + + it("reports both conda and pip (environment.yml + requirements.txt)", () => { + const result = detectPackageManagers({ + hasCondaEnvFile: true, + hasRequirementsTxt: true, + }); + expect(result.managers).to.deep.equal(["conda", "pip"]); + // conda outranks pip as primary. + expect(result.primary).to.equal("conda"); + expect(result.signals).to.deep.equal([ + "requirements.txt", + "environment.yml", + ]); + }); + + it("reports both poetry and uv (both pyproject sections)", () => { + const result = detectPackageManagers({ + hasPyprojectToolUv: true, + hasPyprojectToolPoetry: true, + }); + expect(result.managers).to.deep.equal(["uv", "poetry"]); + // uv outranks poetry as primary. + expect(result.primary).to.equal("uv"); + expect(result.signals).to.deep.equal([ + "pyproject.tool.uv", + "pyproject.tool.poetry", + ]); + }); + + it("orders primary uv > poetry > conda > pip when all apply", () => { + const result = detectPackageManagers({ + hasUvLock: true, + hasPoetryLock: true, + hasCondaEnvFile: true, + hasRequirementsTxt: true, + }); + expect(result.managers).to.deep.equal([ + "uv", + "poetry", + "conda", + "pip", + ]); + expect(result.primary).to.equal("uv"); + expect(result.hasLockfile).to.equal(true); + }); + }); + + describe("weak signals", () => { + it("records uv/poetry on PATH without attributing the project", () => { + const result = detectPackageManagers({ + uvOnPath: true, + poetryOnPath: true, + }); + // A tool merely installed on PATH is not project usage. + expect(result.managers).to.deep.equal([]); + expect(result.primary).to.equal("unknown"); + expect(result.signals).to.deep.equal([ + "uv.onPath", + "poetry.onPath", + ]); + }); + + it("promotes uv to a manager when PATH is joined by a lockfile", () => { + const result = detectPackageManagers({ + uvOnPath: true, + hasUvLock: true, + }); + expect(result.managers).to.deep.equal(["uv"]); + expect(result.primary).to.equal("uv"); + expect(result.signals).to.deep.equal(["uv.lock", "uv.onPath"]); + }); + + it("keeps the real interpreter source for a uv project on a conda interpreter", () => { + // A uv.lock project where the user has not yet selected a + // uv-managed interpreter: uv is the project manager, but the + // interpreter source must reflect the actually-active conda env so + // the setup-flow gap stays visible (not masked as interpreter.uv). + const result = detectPackageManagers({ + hasUvLock: true, + interpreterSource: "conda", + }); + expect(result.managers).to.deep.equal(["uv", "conda"]); + expect(result.primary).to.equal("uv"); + expect(result.interpreterSource).to.equal("conda"); + expect(result.signals).to.deep.equal([ + "uv.lock", + "interpreter.conda", + ]); + }); + }); + + describe("none", () => { + it("returns unknown for empty signals", () => { + const result = detectPackageManagers({}); + expect(result.managers).to.deep.equal([]); + expect(result.primary).to.equal("unknown"); + expect(result.signals).to.deep.equal([]); + expect(result.hasLockfile).to.equal(false); + expect(result.interpreterSource).to.equal("unknown"); + }); + + it("returns unknown when only an unknown interpreter is present", () => { + const signals: PackageManagerSignals = { + interpreterSource: "unknown", + }; + const result = detectPackageManagers(signals); + expect(result.managers).to.deep.equal([]); + expect(result.primary).to.equal("unknown"); + }); + }); +}); + +describe("pyprojectHasToolSection", () => { + it("returns false for undefined contents", () => { + expect(pyprojectHasToolSection(undefined, "uv")).to.equal(false); + }); + + it("matches a bare [tool.uv] header", () => { + expect(pyprojectHasToolSection("[tool.uv]\n", "uv")).to.equal(true); + }); + + it("matches a [tool.poetry] header", () => { + const toml = '[tool.poetry]\nname = "x"\n'; + expect(pyprojectHasToolSection(toml, "poetry")).to.equal(true); + }); + + it("matches subtable-only headers (no bare [tool.uv])", () => { + // Real uv projects often have only subtables. + expect(pyprojectHasToolSection("[tool.uv.sources]\n", "uv")).to.equal( + true + ); + expect( + pyprojectHasToolSection( + "[tool.poetry.group.dev.dependencies]\n", + "poetry" + ) + ).to.equal(true); + }); + + it("tolerates whitespace inside the header brackets", () => { + expect(pyprojectHasToolSection("[ tool.uv ]\n", "uv")).to.equal(true); + }); + + it("ignores a commented-out header", () => { + expect(pyprojectHasToolSection("# [tool.uv]\n", "uv")).to.equal(false); + expect( + pyprojectHasToolSection(" #[tool.poetry]\n", "poetry") + ).to.equal(false); + }); + + it("ignores trailing comments after an unrelated header", () => { + const toml = "[project] # not [tool.uv]\n"; + expect(pyprojectHasToolSection(toml, "uv")).to.equal(false); + }); + + it("does not match tool.uv mentioned inside a value or key", () => { + expect( + pyprojectHasToolSection('description = "use [tool.uv]"\n', "uv") + ).to.equal(false); + expect( + pyprojectHasToolSection('urls."tool.uv" = "x"\n', "uv") + ).to.equal(false); + }); + + it("does not match a different tool's section", () => { + expect(pyprojectHasToolSection("[tool.ruff]\n", "uv")).to.equal(false); + // Prefix collision: [tool.uvicorn] must not count as [tool.uv]. + expect(pyprojectHasToolSection("[tool.uvicorn]\n", "uv")).to.equal( + false + ); + }); + + it("matches array-of-table headers ([[...]])", () => { + expect(pyprojectHasToolSection("[[tool.uv.index]]\n", "uv")).to.equal( + true + ); + expect( + pyprojectHasToolSection("[[tool.poetry.source]]\n", "poetry") + ).to.equal(true); + // Bare array-of-table form as well. + expect(pyprojectHasToolSection("[[tool.uv]]\n", "uv")).to.equal(true); + }); + + it("does not match array-of-table prefix collisions", () => { + expect(pyprojectHasToolSection("[[tool.uvicorn.x]]\n", "uv")).to.equal( + false + ); + }); +}); + +describe("pyvenvCfgMarksUv", () => { + it("returns false for undefined contents", () => { + expect(pyvenvCfgMarksUv(undefined)).to.equal(false); + }); + + it("detects a uv = line", () => { + const cfg = "home = /usr/bin\nversion = 3.11.4\nuv = 0.4.18\n"; + expect(pyvenvCfgMarksUv(cfg)).to.equal(true); + }); + + it("tolerates whitespace around the uv key", () => { + expect(pyvenvCfgMarksUv(" uv = 0.5.0\n")).to.equal(true); + }); + + it("returns false for a plain (non-uv) venv config", () => { + const cfg = + "home = /usr/bin\ninclude-system-site-packages = false\nversion = 3.11.4\n"; + expect(pyvenvCfgMarksUv(cfg)).to.equal(false); + }); + + it("does not match uv appearing in another key or value", () => { + expect(pyvenvCfgMarksUv("command = /x/uv venv\n")).to.equal(false); + expect(pyvenvCfgMarksUv("uv_seed = true\n")).to.equal(false); + }); +}); + +describe("interpreterUnderCondaPrefix", () => { + it("returns false when either path is missing", () => { + expect(interpreterUnderCondaPrefix(undefined, "/opt/conda")).to.equal( + false + ); + expect(interpreterUnderCondaPrefix("/opt/conda", undefined)).to.equal( + false + ); + }); + + it("matches when sysPrefix equals the conda prefix", () => { + expect( + interpreterUnderCondaPrefix( + "/opt/conda/envs/ml", + "/opt/conda/envs/ml" + ) + ).to.equal(true); + }); + + it("matches when the interpreter is nested under the prefix", () => { + expect( + interpreterUnderCondaPrefix("/opt/conda/envs/ml/bin", "/opt/conda") + ).to.equal(true); + }); + + it("tolerates a trailing separator on either path", () => { + expect( + interpreterUnderCondaPrefix( + "/opt/conda/envs/ml/", + "/opt/conda/envs/ml" + ) + ).to.equal(true); + }); + + it("does not match on a shared path prefix that is not a boundary", () => { + // /x/envs/ab must not be treated as inside /x/envs/a. + expect(interpreterUnderCondaPrefix("/x/envs/ab", "/x/envs/a")).to.equal( + false + ); + }); + + it("does not match an unrelated interpreter (shell-global CONDA_PREFIX)", () => { + // The conda env is active in the shell, but the selected interpreter + // is a uv/venv elsewhere -- must not be attributed to conda. + expect( + interpreterUnderCondaPrefix( + "/home/u/project/.venv", + "/opt/conda/envs/base" + ) + ).to.equal(false); + }); + + it("handles Windows-style separators", () => { + expect( + interpreterUnderCondaPrefix( + "C:\\conda\\envs\\ml", + "C:\\conda\\envs\\ml" + ) + ).to.equal(true); + expect( + interpreterUnderCondaPrefix("C:\\conda\\envs\\ml\\x", "C:\\conda") + ).to.equal(true); + }); +}); diff --git a/packages/databricks-vscode/src/language/packageManagerDetection.ts b/packages/databricks-vscode/src/language/packageManagerDetection.ts new file mode 100644 index 000000000..d8975926c --- /dev/null +++ b/packages/databricks-vscode/src/language/packageManagerDetection.ts @@ -0,0 +1,287 @@ +/** + * Pure, signal-based detection of the Python package/environment manager(s) a + * project uses. + * + * This module is intentionally side-effect free: callers gather raw signals + * from disk and the environment (see {@link PackageManagerSignals}) and pass + * them to {@link detectPackageManagers}, which classifies them. Keeping the + * classification pure makes it deterministic and trivially unit-testable across + * the overlap cases (uv+pip, conda+pip, poetry+uv, none). + * + * The detection feeds telemetry only (see Events.PYTHON_ENV_SETUP_DETECTED). It + * never changes setup behaviour, and only categorical/enum data leaves this + * module — no paths, package names, or other free-form content. + */ + +/** A package/environment manager we can attribute a project to. */ +export type PackageManager = "uv" | "poetry" | "pip" | "conda"; + +/** The best-guess primary manager, or "unknown" when no signal fires. */ +export type PrimaryManager = PackageManager | "unknown"; + +/** + * How the active interpreter was provisioned, independent of which managers the + * project declares on disk. + */ +export type InterpreterSource = "uv" | "conda" | "system" | "venv" | "unknown"; + +/** + * Individual signals that fired during detection. These are the only free-form + * strings emitted, and they come from this closed, enumerated set — never from + * user content. + */ +export type DetectionSignal = + | "uv.lock" + | "pyproject.tool.uv" + | "uv.onPath" + | "interpreter.uv" + | "poetry.lock" + | "pyproject.tool.poetry" + | "poetry.onPath" + | "requirements.txt" + | "constraints.txt" + | "pyproject.pipOnly" + | "interpreter.venv" + | "environment.yml" + | "conda.prefix" + | "interpreter.conda"; + +/** + * Raw, already-collected signals about a project. Every field is optional so + * callers can supply only what they could cheaply determine; missing fields are + * treated as "signal absent". Collecting these must never throw into the user + * flow — a failed probe should be reported as `false`/`undefined`. + */ +export interface PackageManagerSignals { + /** A `uv.lock` file exists in the project root. */ + hasUvLock?: boolean; + /** `pyproject.toml` contains a `[tool.uv]` section. */ + hasPyprojectToolUv?: boolean; + /** A `uv` executable is resolvable on PATH. */ + uvOnPath?: boolean; + + /** A `poetry.lock` file exists in the project root. */ + hasPoetryLock?: boolean; + /** `pyproject.toml` contains a `[tool.poetry]` section. */ + hasPyprojectToolPoetry?: boolean; + /** A `poetry` executable is resolvable on PATH. */ + poetryOnPath?: boolean; + + /** One or more `requirements*.txt` files exist. */ + hasRequirementsTxt?: boolean; + /** A `constraints.txt` file exists. */ + hasConstraintsTxt?: boolean; + /** + * A `pyproject.toml` exists but declares neither `[tool.uv]` nor + * `[tool.poetry]` (i.e. a plain PEP 621 / pip-installable project). + */ + hasPyprojectPipOnly?: boolean; + + /** An `environment.yml` / `environment.yaml` file exists. */ + hasCondaEnvFile?: boolean; + /** + * The active interpreter resides under `CONDA_PREFIX`. Collectors must NOT + * set this from the bare presence of `CONDA_PREFIX` / `CONDA_DEFAULT_ENV`: + * those are session-global (set for every project when VS Code is launched + * from an activated conda shell) and would over-count conda. + */ + hasCondaPrefix?: boolean; + + /** + * How the active interpreter was provisioned, if known. Drives both the + * `interpreter_source` field and a corroborating manager signal. + */ + interpreterSource?: InterpreterSource; +} + +/** The full classification result. All fields are categorical or boolean. */ +export interface PackageManagerDetection { + /** Every manager with at least one firing signal, in priority order. */ + managers: PackageManager[]; + /** Best-guess primary manager, or "unknown" when nothing matched. */ + primary: PrimaryManager; + /** The exact signals that fired, in a stable order. */ + signals: DetectionSignal[]; + /** True when a lockfile (uv.lock or poetry.lock) was found. */ + hasLockfile: boolean; + /** How the active interpreter was provisioned. */ + interpreterSource: InterpreterSource; +} + +/** + * Priority order used to pick the primary manager when several apply. uv and + * poetry are the most specific (they own the whole workflow), conda is next + * (it provisions the interpreter), and pip is the fallback that almost any + * project can also satisfy. + */ +const PRIMARY_PRIORITY: PackageManager[] = ["uv", "poetry", "conda", "pip"]; + +/** + * Classify a project's package manager(s) from a set of pre-collected signals. + * + * Pure and total: any input (including all-empty) yields a well-formed result, + * defaulting to `unknown`/`[]`. Multiple managers can be reported at once since + * they legitimately co-exist (e.g. a conda env that also uses pip). + */ +export function detectPackageManagers( + signals: PackageManagerSignals +): PackageManagerDetection { + const interpreterSource = signals.interpreterSource ?? "unknown"; + + // Build the firing-signal list in a deterministic order. Each entry maps a + // collected boolean to the enum string emitted in telemetry. + const firedSignals: DetectionSignal[] = []; + const fire = (condition: boolean | undefined, signal: DetectionSignal) => { + if (condition) { + firedSignals.push(signal); + } + }; + + fire(signals.hasUvLock, "uv.lock"); + fire(signals.hasPyprojectToolUv, "pyproject.tool.uv"); + fire(signals.uvOnPath, "uv.onPath"); + fire(interpreterSource === "uv", "interpreter.uv"); + + fire(signals.hasPoetryLock, "poetry.lock"); + fire(signals.hasPyprojectToolPoetry, "pyproject.tool.poetry"); + fire(signals.poetryOnPath, "poetry.onPath"); + + fire(signals.hasRequirementsTxt, "requirements.txt"); + fire(signals.hasConstraintsTxt, "constraints.txt"); + fire(signals.hasPyprojectPipOnly, "pyproject.pipOnly"); + fire(interpreterSource === "venv", "interpreter.venv"); + + fire(signals.hasCondaEnvFile, "environment.yml"); + fire(signals.hasCondaPrefix, "conda.prefix"); + fire(interpreterSource === "conda", "interpreter.conda"); + + // A bare `uv`/`poetry` on PATH is a weak signal: it says the tool is + // installed, not that this project uses it. We still record the signal, but + // it alone does not attribute the project to that manager — that requires a + // project-local marker (lockfile, pyproject section, or interpreter). + const usesUv = + Boolean(signals.hasUvLock) || + Boolean(signals.hasPyprojectToolUv) || + interpreterSource === "uv"; + const usesPoetry = + Boolean(signals.hasPoetryLock) || + Boolean(signals.hasPyprojectToolPoetry); + const usesConda = + Boolean(signals.hasCondaEnvFile) || + Boolean(signals.hasCondaPrefix) || + interpreterSource === "conda"; + const usesPip = + Boolean(signals.hasRequirementsTxt) || + Boolean(signals.hasConstraintsTxt) || + Boolean(signals.hasPyprojectPipOnly) || + interpreterSource === "venv"; + + const managers: PackageManager[] = []; + if (usesUv) { + managers.push("uv"); + } + if (usesPoetry) { + managers.push("poetry"); + } + if (usesConda) { + managers.push("conda"); + } + if (usesPip) { + managers.push("pip"); + } + + const primary: PrimaryManager = + PRIMARY_PRIORITY.find((m) => managers.includes(m)) ?? "unknown"; + + const hasLockfile = + Boolean(signals.hasUvLock) || Boolean(signals.hasPoetryLock); + + return { + managers, + primary, + signals: firedSignals, + hasLockfile, + interpreterSource, + }; +} + +/** + * Whether a `pyproject.toml` declares a `[tool.]` table (the `name` + * table itself or any subtable such as `[tool.uv.sources]`). + * + * A bounded, line-based scan of table headers -- deliberately not a full TOML + * parse (no dependency needed for this) and more robust than a substring + * match. It: + * - ignores comments (`#`), including a commented-out header, + * - ignores `tool.` mentions inside string values or other keys, + * - matches subtables, so projects that only have e.g. `[tool.uv.workspace]` + * or `[tool.poetry.group.dev.dependencies]` are still detected, + * - matches array-of-table headers too, e.g. `[[tool.uv.index]]` or + * `[[tool.poetry.source]]`. + * + * Pure over the file contents; returns false for undefined input. + */ +export function pyprojectHasToolSection( + contents: string | undefined, + name: "uv" | "poetry" +): boolean { + if (contents === undefined) { + return false; + } + // Matches a table header at the start of a line: `[tool.]`, + // `[tool..]`, or the array-of-table forms `[[tool.]]` + // / `[[tool..]]`. The optional second `[` covers the + // array-of-table case. Whitespace inside the brackets is allowed; anything + // after `#` on the line is a comment and never reaches here because we + // strip it first. + const header = new RegExp(`^\\[\\[?\\s*tool\\.${name}\\s*(\\.|\\])`); + for (const rawLine of contents.split(/\r?\n/)) { + const line = rawLine.split("#", 1)[0].trim(); + if (header.test(line)) { + return true; + } + } + return false; +} + +/** + * Whether the contents of a venv's `pyvenv.cfg` mark it as created by uv. uv + * writes a `uv = ` line into the file it generates; the MS Python + * extension otherwise reports such venvs as plain virtual environments, so this + * marker is what distinguishes a genuinely uv-provisioned interpreter. + * + * Pure over the file contents; returns false for undefined input. + */ +export function pyvenvCfgMarksUv(contents: string | undefined): boolean { + if (contents === undefined) { + return false; + } + return /^\s*uv\s*=/m.test(contents); +} + +/** + * Whether an interpreter's `sysPrefix` lies inside a conda prefix -- i.e. the + * active interpreter is that conda environment, not merely a shell that has + * `CONDA_PREFIX` exported globally. Both inputs are expected to be absolute + * paths; comparison is done with a trailing-separator boundary so that + * `/x/envs/ab` is not treated as inside `/x/envs/a`. + * + * Pure over its inputs; returns false if either is missing. Accepts both `/` + * and `\\` separators so it is platform-agnostic and deterministic in tests. + */ +export function interpreterUnderCondaPrefix( + sysPrefix: string | undefined, + condaPrefix: string | undefined +): boolean { + if (!sysPrefix || !condaPrefix) { + return false; + } + const stripTrailingSep = (p: string) => p.replace(/[\\/]+$/, ""); + const prefix = stripTrailingSep(sysPrefix); + const base = stripTrailingSep(condaPrefix); + return ( + prefix === base || + prefix.startsWith(base + "/") || + prefix.startsWith(base + "\\") + ); +} diff --git a/packages/databricks-vscode/src/run/RunCommands.test.ts b/packages/databricks-vscode/src/run/RunCommands.test.ts index db3d2a3dd..77cc20aa8 100644 --- a/packages/databricks-vscode/src/run/RunCommands.test.ts +++ b/packages/databricks-vscode/src/run/RunCommands.test.ts @@ -8,6 +8,7 @@ import {FeatureManager, FeatureState} from "../feature-manager/FeatureManager"; import {CustomWhenContext} from "../vscode-objs/CustomWhenContext"; import {WorkspaceFolderManager} from "../vscode-objs/WorkspaceFolderManager"; import {Telemetry} from "../telemetry"; +import {PackageManagerTelemetry} from "../language/PackageManagerTelemetry"; function featureState(available: boolean, executable?: string): FeatureState { return { @@ -44,7 +45,8 @@ describe(__filename, () => { instance(featureManagerMock), {subscriptions: []} as unknown as ExtensionContext, instance(mock(CustomWhenContext)), - instance(mock(Telemetry)) + instance(mock(Telemetry)), + instance(mock(PackageManagerTelemetry)) ); }); diff --git a/packages/databricks-vscode/src/run/RunCommands.ts b/packages/databricks-vscode/src/run/RunCommands.ts index 194e58132..ca07f8be9 100644 --- a/packages/databricks-vscode/src/run/RunCommands.ts +++ b/packages/databricks-vscode/src/run/RunCommands.ts @@ -13,6 +13,7 @@ import { import {CustomWhenContext} from "../vscode-objs/CustomWhenContext"; import {WorkspaceFolderManager} from "../vscode-objs/WorkspaceFolderManager"; import {Events, Telemetry} from "../telemetry"; +import {PackageManagerTelemetry} from "../language/PackageManagerTelemetry"; /** * Run related commands @@ -25,7 +26,8 @@ export class RunCommands { private readonly featureManager: FeatureManager, private readonly context: ExtensionContext, private readonly customWhenContext: CustomWhenContext, - private readonly telemetry: Telemetry + private readonly telemetry: Telemetry, + private readonly packageManagerTelemetry: PackageManagerTelemetry ) { this.context.subscriptions.push( window.onDidChangeActiveTextEditor(async () => @@ -227,6 +229,7 @@ export class RunCommands { launchType: "debug", computeType: this.connection.serverless ? "serverless" : "cluster", }); + void this.packageManagerTelemetry.emitDetection("debug"); } async runFileUsingDbconnect(resource?: Uri) { @@ -251,5 +254,6 @@ export class RunCommands { launchType: "run", computeType: this.connection.serverless ? "serverless" : "cluster", }); + void this.packageManagerTelemetry.emitDetection("run"); } } diff --git a/packages/databricks-vscode/src/telemetry/PACKAGE_MANAGER_DETECTION.md b/packages/databricks-vscode/src/telemetry/PACKAGE_MANAGER_DETECTION.md new file mode 100644 index 000000000..6f11445d3 --- /dev/null +++ b/packages/databricks-vscode/src/telemetry/PACKAGE_MANAGER_DETECTION.md @@ -0,0 +1,86 @@ +# Telemetry: Python package-manager detection + +Measurement-only instrumentation that records which Python package/environment +manager(s) a project uses, so we can size the **real** distribution of +pip / conda / uv / poetry usage across Databricks VS Code users and prioritize +the VPEX setup-flow investment with first-party data instead of public-survey +estimates. + +This event does **not** change any setup behaviour. It is detection only. + +## Event + +| | | +| ------------------- | ----------------------------------------------------------------- | +| **Event name** | `python_env.setup.detected` | +| **Defined in** | `src/telemetry/constants.ts` (`Events.PYTHON_ENV_SETUP_DETECTED`) | +| **Emitted from** | `src/language/PackageManagerTelemetry.ts` (`emitDetection`) | +| **Detection logic** | `src/language/packageManagerDetection.ts` (pure, unit-tested) | + +Telemetry is transported via the existing `Telemetry` client +(`@vscode/extension-telemetry`). As with every event, properties are prefixed +with `event.` and the user's `telemetry.telemetryLevel` opt-out is honoured by +the client — nothing is emitted when telemetry is disabled. Standard context +(extension/telemetry schema version, OS, hashed/anonymized user metadata) is +attached automatically by the client and is **not** part of this event's own +schema. + +## Schema (the `event.*` fields) + +| Field | Type | Notes | +| ------------------- | ---------- | -------------------------------------------------------------------------------------------------------------------------------------- | +| `managersDetected` | `string[]` | All managers with a firing signal, e.g. `["uv","pip"]`. Subset of `uv \| poetry \| pip \| conda`. JSON-stringified by the transport. | +| `primaryManager` | enum | `uv \| poetry \| pip \| conda \| unknown`. Priority when several apply: **uv > poetry > conda > pip**. `unknown` when no signal fires. | +| `signals` | `string[]` | Closed set of signal ids that fired (see below). JSON-stringified by the transport. | +| `pythonVersion` | `string?` | Interpreter version, **major.minor only** (e.g. `"3.11"`). Omitted if unknown. | +| `interpreterSource` | enum | `uv \| conda \| system \| venv \| unknown`. How the active interpreter was provisioned. | +| `hasLockfile` | `boolean` | True if `uv.lock` or `poetry.lock` was found. | +| `targetCompute` | enum | `cluster \| serverless \| none`. **No** cluster IDs/names. | +| `setupTrigger` | enum | `auto_open \| explicit_command \| run \| debug`. Which touchpoint fired it. | + +### `signals` value domain (closed set) + +`uv.lock`, `pyproject.tool.uv`, `uv.onPath`, `interpreter.uv`, `poetry.lock`, +`pyproject.tool.poetry`, `poetry.onPath`, `requirements.txt`, `constraints.txt`, +`pyproject.pipOnly`, `interpreter.venv`, `environment.yml`, `conda.prefix`, +`interpreter.conda`. + +> `*.onPath` are **weak** signals: they record that a tool is installed, but do +> not by themselves attribute the project to that manager. Attribution requires +> a project-local marker (lockfile, `pyproject` section, or interpreter source). +> They are part of the closed set the classifier accepts, but are **not emitted +> in practice**: the collector does not probe PATH (running an external `uv`/ +> `poetry` binary purely for a non-attributing signal is not worth the cost), so +> `uv.onPath` / `poetry.onPath` will not appear in real data unless a future +> collector populates them. + +## Where it fires + +1. **`auto_open`** — first environment check on project open + (`EnvironmentDependenciesVerifier.check`). +2. **`explicit_command`** — the "set up environment" command + (`databricks.environment.setup` → `EnvironmentCommands.setup`). +3. **`run` / `debug`** — first Run/Debug with Databricks Connect + (`RunCommands.runFileUsingDbconnect` / `debugFileUsingDbconnect`). + +Emissions are **deduplicated per session** on `(setupTrigger, projectRoot)`, so +a single project open does not inflate counts. The same project can still emit +once per distinct trigger (e.g. one `auto_open` and one `run`). + +## Privacy + +Only categorical/enum data and the closed-set signal ids are emitted. No file +paths, package names, project names, cluster names, or usernames. Detection is +best-effort and non-blocking: any failure degrades to `unknown` and is +swallowed, never thrown into the setup/run flow. + +## Suggested analysis + +- Share of projects by `primaryManager`. +- Co-occurrence from `managersDetected` (e.g. uv+pip, conda+pip, poetry+uv). +- `hasLockfile` and `pythonVersion` distributions to inform per-manager flow + depth. + +Dedupe at analysis time on the anonymized session id + `projectRoot` is not +possible (no path is sent); rely on the in-session dedupe above and treat each +event as one `(session, trigger)` observation. diff --git a/packages/databricks-vscode/src/telemetry/constants.ts b/packages/databricks-vscode/src/telemetry/constants.ts index 8fbc124cf..e13b4ee60 100644 --- a/packages/databricks-vscode/src/telemetry/constants.ts +++ b/packages/databricks-vscode/src/telemetry/constants.ts @@ -24,6 +24,7 @@ export enum Events { WORKFLOW_RUN = "workflowRun", DBCONNECT_RUN = "dbconnectRun", OPEN_RESOURCE_EXTERNALLY = "openResourceExternally", + PYTHON_ENV_SETUP_DETECTED = "python_env.setup.detected", } /* eslint-enable @typescript-eslint/naming-convention */ @@ -43,6 +44,17 @@ export type WorkflowTaskType = "python" | "notebook" | "unknown"; export type LaunchType = "run" | "debug"; export type ComputeType = "cluster" | "serverless"; +/** A Python package/environment manager detected for a project. */ +export type PackageManagerName = "uv" | "poetry" | "pip" | "conda"; +/** Best-guess primary manager, or "unknown" when no signal fires. */ +export type PrimaryManagerName = PackageManagerName | "unknown"; +/** How the active interpreter was provisioned. */ +export type InterpreterSource = "uv" | "conda" | "system" | "venv" | "unknown"; +/** The compute targeted at the time of detection. */ +export type TargetCompute = ComputeType | "none"; +/** What triggered a package-manager detection emission. */ +export type SetupTrigger = "auto_open" | "explicit_command" | "run" | "debug"; + /** Documentation about all of the properties and metrics of the event. */ type EventDescription = {[K in keyof T]?: {comment?: string}}; @@ -209,6 +221,50 @@ export class EventTypes { comment: "The resource type", }, }; + [Events.PYTHON_ENV_SETUP_DETECTED]: EventType<{ + managersDetected: PackageManagerName[]; + primaryManager: PrimaryManagerName; + signals: string[]; + pythonVersion?: string; + interpreterSource: InterpreterSource; + hasLockfile: boolean; + targetCompute: TargetCompute; + setupTrigger: SetupTrigger; + }> = { + comment: + "The Python package/environment manager(s) detected for a project at setup time. " + + "Measurement only: emits categorical data to size the real distribution of " + + "pip/conda/uv/poetry usage across users. Contains no paths, package names, or other PII.", + managersDetected: { + comment: + 'All package managers with at least one firing signal, e.g. ["uv","pip"]', + }, + primaryManager: { + comment: + "Best-guess primary manager (uv > poetry > conda > pip), or unknown", + }, + signals: { + comment: + 'The closed-set signal identifiers that fired, e.g. ["uv.lock","pyproject.tool.uv"]', + }, + pythonVersion: { + comment: + 'Detected interpreter version, major.minor only (e.g. "3.11"), if available', + }, + interpreterSource: { + comment: "How the active interpreter was provisioned", + }, + hasLockfile: { + comment: "Whether a uv.lock or poetry.lock was found", + }, + targetCompute: { + comment: + "The compute targeted at detection time (no cluster IDs/names)", + }, + setupTrigger: { + comment: "Which setup touchpoint triggered detection", + }, + }; } /** diff --git a/packages/databricks-vscode/src/telemetry/packageManagerExtensions.ts b/packages/databricks-vscode/src/telemetry/packageManagerExtensions.ts new file mode 100644 index 000000000..b0aad7f10 --- /dev/null +++ b/packages/databricks-vscode/src/telemetry/packageManagerExtensions.ts @@ -0,0 +1,51 @@ +import {Events, Telemetry} from "."; +import { + PackageManagerName, + InterpreterSource, + TargetCompute, + SetupTrigger, +} from "./constants"; +import {PackageManagerDetection} from "../language/packageManagerDetection"; + +/** + * Context for a package-manager detection that is not part of the detection + * result itself: the interpreter version, the targeted compute, and what + * triggered the emission. + */ +export interface PackageManagerDetectionContext { + pythonVersion?: string; + targetCompute: TargetCompute; + trigger: SetupTrigger; +} + +declare module "." { + interface Telemetry { + /** + * Record a package-manager detection as a PYTHON_ENV_SETUP_DETECTED + * event. This is the emit half only: callers gather the signals and run + * the pure {@link detectPackageManagers} classifier, then hand the + * result here. Keeping the collection out of Telemetry keeps this client + * free of disk/Python-extension dependencies. + */ + recordPackageManagerDetection( + detection: PackageManagerDetection, + context: PackageManagerDetectionContext + ): void; + } +} + +Telemetry.prototype.recordPackageManagerDetection = function ( + detection: PackageManagerDetection, + context: PackageManagerDetectionContext +) { + this.recordEvent(Events.PYTHON_ENV_SETUP_DETECTED, { + managersDetected: detection.managers as PackageManagerName[], + primaryManager: detection.primary, + signals: detection.signals, + pythonVersion: context.pythonVersion, + interpreterSource: detection.interpreterSource as InterpreterSource, + hasLockfile: detection.hasLockfile, + targetCompute: context.targetCompute, + setupTrigger: context.trigger, + }); +};