From d5f7567c38cd7baa38d5681cec13c81cd3bf2967 Mon Sep 17 00:00:00 2001 From: Jan Rose Date: Thu, 11 Jun 2026 20:23:21 +0200 Subject: [PATCH 1/3] Only show installable projects in 'databricks labs list' 'databricks labs list' showed every non-archived, non-fork repository in the databrickslabs GitHub org (currently 39), but only repositories that ship a labs.yml manifest at the root of their release tag can actually be installed (currently 8). Everything else failed 'databricks labs install' with a not-found error. Filter the listing to repositories that have a root labs.yml on their default branch, checked concurrently via raw.githubusercontent.com (not subject to the low unauthenticated GitHub API rate limit) and cached for 24 hours like the repository list itself. Co-authored-by: Isaac --- cmd/labs/list.go | 75 ++++++++++++++++--- cmd/labs/list_test.go | 39 ++++++++++ ...tabrickslabs-installable-repositories.json | 21 ++++++ 3 files changed, 126 insertions(+), 9 deletions(-) create mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-installable-repositories.json diff --git a/cmd/labs/list.go b/cmd/labs/list.go index 171f956a1e7..1a80cb1867a 100644 --- a/cmd/labs/list.go +++ b/cmd/labs/list.go @@ -2,11 +2,20 @@ package labs import ( "context" + "errors" + "time" "github.com/databricks/cli/cmd/labs/github" + "github.com/databricks/cli/cmd/labs/localcache" "github.com/databricks/cli/cmd/labs/project" "github.com/databricks/cli/libs/cmdio" "github.com/spf13/cobra" + "golang.org/x/sync/errgroup" +) + +const ( + labsOrg = "databrickslabs" + installableCacheTTL = 24 * time.Hour ) type labsMeta struct { @@ -20,14 +29,68 @@ func allRepos(ctx context.Context) (github.Repositories, error) { if err != nil { return nil, err } - cache := github.NewRepositoryCache("databrickslabs", cacheDir) + cache := github.NewRepositoryCache(labsOrg, cacheDir) return cache.Load(ctx) } +// installableRepos returns the org repositories that `databricks labs install` can +// actually install. Most repositories in the org don't ship a labs.yml manifest +// (e.g. libraries published to package indexes), so listing them would only +// advertise projects that fail to install. +func installableRepos(ctx context.Context) (github.Repositories, error) { + cacheDir, err := project.PathInLabs(ctx) + if err != nil { + return nil, err + } + cache := localcache.NewLocalCache[github.Repositories](cacheDir, labsOrg+"-installable-repositories", installableCacheTTL) + return cache.Load(ctx, func() (github.Repositories, error) { + repos, err := allRepos(ctx) + if err != nil { + return nil, err + } + return filterInstallable(ctx, repos) + }) +} + +// filterInstallable keeps repositories that have a root labs.yml manifest on their +// default branch. The manifest is fetched from raw.githubusercontent.com, which is +// not subject to the low unauthenticated GitHub API rate limit. +func filterInstallable(ctx context.Context, repos github.Repositories) (github.Repositories, error) { + installable := make([]bool, len(repos)) + g, gctx := errgroup.WithContext(ctx) + g.SetLimit(10) + for i, repo := range repos { + if repo.IsArchived || repo.IsFork { + continue + } + g.Go(func() error { + _, err := github.ReadFileFromRef(gctx, labsOrg, repo.Name, repo.DefaultBranch, "labs.yml") + if errors.Is(err, github.ErrNotFound) { + return nil + } + if err != nil { + return err + } + installable[i] = true + return nil + }) + } + if err := g.Wait(); err != nil { + return nil, err + } + var out github.Repositories + for i, repo := range repos { + if installable[i] { + out = append(out, repo) + } + } + return out, nil +} + func newListCommand() *cobra.Command { return &cobra.Command{ Use: "list", - Short: "List all labs", + Short: "List labs that can be installed", Annotations: map[string]string{ "template": cmdio.Heredoc(` Name Description @@ -37,18 +100,12 @@ func newListCommand() *cobra.Command { }, RunE: func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() - repositories, err := allRepos(ctx) + repositories, err := installableRepos(ctx) if err != nil { return err } var info []labsMeta for _, v := range repositories { - if v.IsArchived { - continue - } - if v.IsFork { - continue - } description := v.Description if len(description) > 50 { description = description[:50] + "..." diff --git a/cmd/labs/list_test.go b/cmd/labs/list_test.go index d1f763a7f33..93120eef0f8 100644 --- a/cmd/labs/list_test.go +++ b/cmd/labs/list_test.go @@ -1,10 +1,14 @@ package labs_test import ( + "net/http" + "net/http/httptest" "testing" + "github.com/databricks/cli/cmd/labs/github" "github.com/databricks/cli/internal/testcli" "github.com/databricks/cli/libs/env" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -15,4 +19,39 @@ func TestListingWorks(t *testing.T) { stdout, _, err := c.Run() require.NoError(t, err) require.Contains(t, stdout.String(), "ucx") + // blueprint is in the repositories cache fixture but not in the + // installable-repositories cache fixture, proving the latter is rendered. + require.NotContains(t, stdout.String(), "blueprint") +} + +func TestListingFiltersReposWithoutLabsYml(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/users/databrickslabs/repos": + _, err := w.Write([]byte(`[ + {"name": "ucx", "description": "Unity Catalog Migrations", "default_branch": "main"}, + {"name": "brickster", "description": "R interface to Databricks", "default_branch": "main"} + ]`)) + assert.NoError(t, err) + case "/databrickslabs/ucx/main/labs.yml": + _, err := w.Write([]byte("name: ucx")) + assert.NoError(t, err) + case "/databrickslabs/brickster/main/labs.yml": + w.WriteHeader(http.StatusNotFound) + default: + t.Logf("Requested: %s", r.URL.Path) + t.FailNow() + } + })) + defer server.Close() + ctx := t.Context() + ctx = github.WithApiOverride(ctx, server.URL) + ctx = github.WithUserContentOverride(ctx, server.URL) + ctx = env.WithUserHomeDir(ctx, t.TempDir()) + + c := testcli.NewRunner(t, ctx, "labs", "list") + stdout, _, err := c.Run() + require.NoError(t, err) + require.Contains(t, stdout.String(), "ucx") + require.NotContains(t, stdout.String(), "brickster") } diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-installable-repositories.json b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-installable-repositories.json new file mode 100644 index 00000000000..063ca11bd5d --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-installable-repositories.json @@ -0,0 +1,21 @@ +{ + "refreshed_at": "2033-01-01T00:00:00.92857+02:00", + "data": [ + { + "name": "ucx", + "description": "Unity Catalog Migrations", + "language": "Python", + "default_branch": "main", + "stargazers_count": 100500, + "fork": false, + "archived": false, + "topics": [], + "html_url": "https://github.com/databrickslabs/ucx", + "clone_url": "https://github.com/databrickslabs/ucx.git", + "ssh_url": "git@github.com:databrickslabs/ucx.git", + "license": { + "name": "Other" + } + } + ] +} From 8ed1722aa45642d569e8ae587daab2b871aa604e Mon Sep 17 00:00:00 2001 From: Jan Rose Date: Thu, 11 Jun 2026 20:42:43 +0200 Subject: [PATCH 2/3] Add changelog entry Co-authored-by: Isaac --- NEXT_CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index fe3511e4b21..6c9187806c6 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -6,6 +6,7 @@ ### CLI * Show a once-per-day notice after a command when a newer CLI release is available, with a link to the release and the upgrade command for the detected install method. Suppressed for non-interactive/CI runs, JSON output, the Databricks Runtime, and development builds, and can be disabled with `DATABRICKS_CLI_DISABLE_UPDATE_CHECK` ([#5470](https://github.com/databricks/cli/pull/5470)). +* `databricks labs list` now only shows projects that can be installed (those shipping a `labs.yml` manifest), and `databricks labs install` explains when a project does not provide one instead of failing with a generic "not found" error ([#5559](https://github.com/databricks/cli/pull/5559), [#5560](https://github.com/databricks/cli/pull/5560)). ### Bundles * Remove API enum values and types that are still in development from the `databricks-bundles` Python package; these were never accepted by the backend ([#5484](https://github.com/databricks/cli/pull/5484)). From ffe4dccb696a658c6f587ea14c45683930f6f287 Mon Sep 17 00:00:00 2001 From: Jan Rose Date: Wed, 17 Jun 2026 00:41:55 +0200 Subject: [PATCH 3/3] Use databricks-cli-installable tag on repos --- NEXT_CHANGELOG.md | 2 +- cmd/labs/list.go | 60 +++++-------------- cmd/labs/list_test.go | 29 +++------ ...tabrickslabs-installable-repositories.json | 21 ------- .../labs/databrickslabs-repositories.json | 4 +- 5 files changed, 27 insertions(+), 89 deletions(-) delete mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-installable-repositories.json diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 6c9187806c6..3d13ab9ec21 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -6,7 +6,7 @@ ### CLI * Show a once-per-day notice after a command when a newer CLI release is available, with a link to the release and the upgrade command for the detected install method. Suppressed for non-interactive/CI runs, JSON output, the Databricks Runtime, and development builds, and can be disabled with `DATABRICKS_CLI_DISABLE_UPDATE_CHECK` ([#5470](https://github.com/databricks/cli/pull/5470)). -* `databricks labs list` now only shows projects that can be installed (those shipping a `labs.yml` manifest), and `databricks labs install` explains when a project does not provide one instead of failing with a generic "not found" error ([#5559](https://github.com/databricks/cli/pull/5559), [#5560](https://github.com/databricks/cli/pull/5560)). +* `databricks labs list` now only shows projects that can be installed ([#5560](https://github.com/databricks/cli/pull/5560)). ### Bundles * Remove API enum values and types that are still in development from the `databricks-bundles` Python package; these were never accepted by the backend ([#5484](https://github.com/databricks/cli/pull/5484)). diff --git a/cmd/labs/list.go b/cmd/labs/list.go index 1a80cb1867a..2e709b7b385 100644 --- a/cmd/labs/list.go +++ b/cmd/labs/list.go @@ -2,20 +2,21 @@ package labs import ( "context" - "errors" - "time" + "slices" "github.com/databricks/cli/cmd/labs/github" - "github.com/databricks/cli/cmd/labs/localcache" "github.com/databricks/cli/cmd/labs/project" "github.com/databricks/cli/libs/cmdio" "github.com/spf13/cobra" - "golang.org/x/sync/errgroup" ) const ( - labsOrg = "databrickslabs" - installableCacheTTL = 24 * time.Hour + labsOrg = "databrickslabs" + + // installableTopic is the GitHub repository topic that labs maintainers add to + // projects installable via `databricks labs install`. The repositories API + // returns topics inline, so filtering on it costs no extra requests. + installableTopic = "databricks-cli-installable" ) type labsMeta struct { @@ -34,53 +35,20 @@ func allRepos(ctx context.Context) (github.Repositories, error) { } // installableRepos returns the org repositories that `databricks labs install` can -// actually install. Most repositories in the org don't ship a labs.yml manifest -// (e.g. libraries published to package indexes), so listing them would only -// advertise projects that fail to install. +// install. Most repositories don't ship a labs.yml manifest (e.g. libraries +// published to package indexes); maintainers tag the installable ones with +// installableTopic so the listing doesn't advertise projects that fail to install. func installableRepos(ctx context.Context) (github.Repositories, error) { - cacheDir, err := project.PathInLabs(ctx) + repos, err := allRepos(ctx) if err != nil { return nil, err } - cache := localcache.NewLocalCache[github.Repositories](cacheDir, labsOrg+"-installable-repositories", installableCacheTTL) - return cache.Load(ctx, func() (github.Repositories, error) { - repos, err := allRepos(ctx) - if err != nil { - return nil, err - } - return filterInstallable(ctx, repos) - }) -} - -// filterInstallable keeps repositories that have a root labs.yml manifest on their -// default branch. The manifest is fetched from raw.githubusercontent.com, which is -// not subject to the low unauthenticated GitHub API rate limit. -func filterInstallable(ctx context.Context, repos github.Repositories) (github.Repositories, error) { - installable := make([]bool, len(repos)) - g, gctx := errgroup.WithContext(ctx) - g.SetLimit(10) - for i, repo := range repos { + var out github.Repositories + for _, repo := range repos { if repo.IsArchived || repo.IsFork { continue } - g.Go(func() error { - _, err := github.ReadFileFromRef(gctx, labsOrg, repo.Name, repo.DefaultBranch, "labs.yml") - if errors.Is(err, github.ErrNotFound) { - return nil - } - if err != nil { - return err - } - installable[i] = true - return nil - }) - } - if err := g.Wait(); err != nil { - return nil, err - } - var out github.Repositories - for i, repo := range repos { - if installable[i] { + if slices.Contains(repo.Topics, installableTopic) { out = append(out, repo) } } diff --git a/cmd/labs/list_test.go b/cmd/labs/list_test.go index 93120eef0f8..f3908b896e1 100644 --- a/cmd/labs/list_test.go +++ b/cmd/labs/list_test.go @@ -19,34 +19,23 @@ func TestListingWorks(t *testing.T) { stdout, _, err := c.Run() require.NoError(t, err) require.Contains(t, stdout.String(), "ucx") - // blueprint is in the repositories cache fixture but not in the - // installable-repositories cache fixture, proving the latter is rendered. + // blueprint is in the repositories cache fixture but lacks the + // databricks-cli-installable topic, proving the topic filter is applied. require.NotContains(t, stdout.String(), "blueprint") } -func TestListingFiltersReposWithoutLabsYml(t *testing.T) { +func TestListingFiltersReposWithoutTopic(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - switch r.URL.Path { - case "/users/databrickslabs/repos": - _, err := w.Write([]byte(`[ - {"name": "ucx", "description": "Unity Catalog Migrations", "default_branch": "main"}, - {"name": "brickster", "description": "R interface to Databricks", "default_branch": "main"} - ]`)) - assert.NoError(t, err) - case "/databrickslabs/ucx/main/labs.yml": - _, err := w.Write([]byte("name: ucx")) - assert.NoError(t, err) - case "/databrickslabs/brickster/main/labs.yml": - w.WriteHeader(http.StatusNotFound) - default: - t.Logf("Requested: %s", r.URL.Path) - t.FailNow() - } + assert.Equal(t, "/users/databrickslabs/repos", r.URL.Path) + _, err := w.Write([]byte(`[ + {"name": "ucx", "description": "Unity Catalog Migrations", "topics": ["databricks-cli-installable"]}, + {"name": "brickster", "description": "R interface to Databricks", "topics": []} + ]`)) + assert.NoError(t, err) })) defer server.Close() ctx := t.Context() ctx = github.WithApiOverride(ctx, server.URL) - ctx = github.WithUserContentOverride(ctx, server.URL) ctx = env.WithUserHomeDir(ctx, t.TempDir()) c := testcli.NewRunner(t, ctx, "labs", "list") diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-installable-repositories.json b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-installable-repositories.json deleted file mode 100644 index 063ca11bd5d..00000000000 --- a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-installable-repositories.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "refreshed_at": "2033-01-01T00:00:00.92857+02:00", - "data": [ - { - "name": "ucx", - "description": "Unity Catalog Migrations", - "language": "Python", - "default_branch": "main", - "stargazers_count": 100500, - "fork": false, - "archived": false, - "topics": [], - "html_url": "https://github.com/databrickslabs/ucx", - "clone_url": "https://github.com/databrickslabs/ucx.git", - "ssh_url": "git@github.com:databrickslabs/ucx.git", - "license": { - "name": "Other" - } - } - ] -} diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-repositories.json b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-repositories.json index 896ebecc59e..11a96ac42a7 100644 --- a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-repositories.json +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-repositories.json @@ -25,7 +25,9 @@ "stargazers_count": 100500, "fork": false, "archived": false, - "topics": [], + "topics": [ + "databricks-cli-installable" + ], "html_url": "https://github.com/databrickslabs/ucx", "clone_url": "https://github.com/databrickslabs/ucx.git", "ssh_url": "git@github.com:databrickslabs/ucx.git",