Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 35 additions & 15 deletions entity/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,34 +7,28 @@ const (
// BuildStatusUnknown is the unreachable state. It is set by default when the structure is initialized. It should never be seen in the system.
BuildStatusUnknown BuildStatus = ""

// BuildStatusQueued indicates the build has been scheduled but not yet started.
BuildStatusQueued BuildStatus = "queued"
// BuildStatusAccepted indicates the build has been accepted by the CI provider.
BuildStatusAccepted BuildStatus = "accepted"

// BuildStatusRunning indicates the build is currently executing.
BuildStatusRunning BuildStatus = "running"

// BuildStatusPassed indicates the build completed successfully.
// BuildStatusSucceeded indicates the build completed successfully.
// This is a terminal state.
BuildStatusPassed BuildStatus = "passed"
BuildStatusSucceeded BuildStatus = "succeeded"

// BuildStatusFailed indicates the build completed with failures.
// This is a terminal state.
BuildStatusFailed BuildStatus = "failed"

// BuildStatusCancelled indicates the build was cancelled before completion.
// BuildStatusCancelled indicates the build was cancelled by SubmitQueue.
// This is a terminal state.
// Note: If the build system cancels a build for external reasons (e.g., timeout, resource limits),
// this should be reported as BuildStatusFailed, not BuildStatusCancelled.
BuildStatusCancelled BuildStatus = "cancelled"

// BuildStatusBlocked indicates the build is waiting for manual approval or unblocking.
// Some CI systems (like BuildKite) support manual approval steps.
BuildStatusBlocked BuildStatus = "blocked"
)

// IsTerminal returns true if the build state represents a final state (passed, failed, or cancelled).
// IsTerminal returns true if the build state represents a final state (succeeded, failed, or cancelled).
// Terminal states indicate the build has finished and will not change state again.
// Note: BuildStatusBlocked is NOT terminal as blocked builds can be unblocked and continue execution.
func (s BuildStatus) IsTerminal() bool {
return s == BuildStatusPassed || s == BuildStatusFailed || s == BuildStatusCancelled
return s == BuildStatusSucceeded || s == BuildStatusFailed || s == BuildStatusCancelled
}


Expand All @@ -61,3 +55,29 @@ type Build struct {
// Status represents the state of the build lifecycle this build is in.
Status BuildStatus
}

// ChangeAction defines the action to perform on a change submitted to the build system.
type ChangeAction string

const (
// ChangeActionUnknown is the sentinel value for uninitialized actions.
ChangeActionUnknown ChangeAction = ""
// ChangeActionApply applies the change to the target branch.
ChangeActionApply ChangeAction = "apply"
// ChangeActionValidate applies the change first, and then validates the change by running respective validation/test suites.
ChangeActionValidate ChangeAction = "validate"
)

// BuildChange represents a code change to be processed by the build system.
// This is used by BuildManager to specify what changes to build and what action to perform.
type BuildChange struct {
// Change is a list of URLs where the provider is encoded in the schema part.
// Example: "github://uber/submitqueue/pull/123/abc123def"
Change Change
// Action specifies what operation to perform on this change.
Action ChangeAction
}

// BuildMetadata contains additional metadata about a build returned by the build system.
// The specific keys and values are implementation-defined.
type BuildMetadata map[string]string
18 changes: 4 additions & 14 deletions entity/build_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ func TestBuildStatus_IsTerminal(t *testing.T) {
expected bool
}{
{
name: "passed is terminal",
status: BuildStatusPassed,
name: "succeeded is terminal",
status: BuildStatusSucceeded,
expected: true,
},
{
Expand All @@ -28,18 +28,8 @@ func TestBuildStatus_IsTerminal(t *testing.T) {
expected: true,
},
{
name: "queued is not terminal",
status: BuildStatusQueued,
expected: false,
},
{
name: "running is not terminal",
status: BuildStatusRunning,
expected: false,
},
{
name: "blocked is not terminal",
status: BuildStatusBlocked,
name: "accepted is not terminal",
status: BuildStatusAccepted,
expected: false,
},
{
Expand Down
19 changes: 19 additions & 0 deletions extension/build/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
load("@rules_go//go:def.bzl", "go_library")

exports_files(
["build_manager.go"],
visibility = ["//extension/build/mock:__pkg__"],
)

go_library(
name = "build",
srcs = [
"build_manager.go",
"errors.go",
],
importpath = "github.com/uber/submitqueue/extension/build",
visibility = ["//visibility:public"],
deps = [
"//entity",
],
)
72 changes: 72 additions & 0 deletions extension/build/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Build Manager

The BuildManager extension provides a clean abstraction for integrating with CI/CD systems like BuildKite, Jenkins, and others. It allows the Orchestrator service to schedule builds, poll their status, and cancel running builds without being coupled to any specific CI provider.

## Interface

### BuildManager

Schedules builds, polls their status, and cancels running builds.

```go
type BuildManager interface {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

plz avoid copying the code to the docs.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

examples on "how to" are ok

Schedule(ctx context.Context, queueName string, changes []entity.BuildChange) (string, error)
Poll(ctx context.Context, buildID string) (entity.BuildStatus, entity.BuildMetadata, error)
CancelBuild(ctx context.Context, buildID string) error
Close() error
}
```

- **Schedule**: Submits changes to the CI provider for processing. Returns a unique build ID.
- **Poll**: Retrieves the current status and metadata of a build.
- **CancelBuild**: Requests cancellation of a build (asynchronous, does not wait for completion).
- **Close**: Gracefully shuts down the build manager and releases resources.

### Entities

```go
type BuildChange struct {
Change entity.Change // List of URIs with provider encoded in schema
Action ChangeAction // "apply" or "validate"
}

type BuildMetadata map[string]string // Implementation-defined metadata (e.g., build URL, duration)
```

### Errors

- **ErrBuildNotFound**: Returned by Poll and CancelBuild when the build doesn't exist.
- **ErrInvalidRequest**: Returned by Schedule when validation fails.

## Usage

```go
mgr := buildkite.NewBuildManager(config)
defer mgr.Close()

// Schedule a build
changes := []entity.BuildChange{
{Change: entity.Change{URIs: []string{"github://uber/repo/pull/123/abc"}}, Action: entity.ChangeActionValidate},
}
buildID, err := mgr.Schedule(ctx, "my-queue", changes)

// Poll for status
status, metadata, err := mgr.Poll(ctx, buildID)
if status.IsTerminal() {
fmt.Println("Build finished:", status)
}

// Cancel if needed
err = mgr.CancelBuild(ctx, buildID)
```

## Implementing a Backend

1. Create `extension/build/{backend}/` directory
2. Implement the `BuildManager` interface
3. Map `entity.BuildChange` actions to backend-specific job configurations
4. Handle build status mapping to `entity.BuildStatus` values

**Thread-safety**: All implementations must be thread-safe and support concurrent operations.

**Singleton Design**: Implementations are long-lived singletons (one per build provider) initialized at service startup, similar to Storage and other extension components. They should manage connection pooling, caching, and other resources for the lifetime of the service.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also important to note how transient errors are handled. Are the implementation expected to reestablish the connection if it is lost, and give back errors on interface requests in the interim?

72 changes: 72 additions & 0 deletions extension/build/build_manager.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package build

//go:generate mockgen -source=build_manager.go -destination=mock/build_manager.go -package=mock

import (
"context"

"github.com/uber/submitqueue/entity"
)

// BuildManager is a vendor-agnostic interface for managing builds with external CI/CD providers.
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we avoid saying CI/CD completely in this interface as it can be anything as per impl...

// Implementations provide integration with specific CI systems (BuildKite, Jenkins, etc.)
// to schedule builds, poll their status, and cancel running builds.
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

poll is again a semantics of impl

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yet we made it a part of the interface.

//
// Implementations are long-lived singletons (one per build provider) initialized at service
// startup, similar to Storage and other extension components. They should manage connection
// pooling, caching, and other resources for the lifetime of the service.
//
Comment on lines +15 to +18
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i would avoid saying this, maybe simplify it.

BuildManager is responsible for triggering, cancelling and checking status of builds generated by underlying implementations.

Implementations must be thread-safe.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is what I asked for so I take this comment.
I think comments need to provide an instruction of how the implementation will be used - as it will ultimately dictate the implementation itself. Can the implementation hold heavy state in the object, or should it use a shared state elsewhere?
A requirement on just thread safety may not be enough to create efficient implementations, as someone implementing them may oversynchronize for the sake of safety.

// All implementations must be thread-safe and support concurrent operations.
type BuildManager interface {
// Schedule submits a list of changes to the CI provider for processing.
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no CI in docs

// Each change specifies an action (validate or apply) to perform.
//
// The implementation is responsible for:
// - Looking up the job name from the queue configuration
// - Creating appropriate builds/jobs for each change based on its action
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

jobs is very impl specific? so we should just say build

// - Handling dependencies between changes (order may be significant)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

generally order is always important

//
// Parameters:
// - ctx: Request context for cancellation and timeouts
// - queueName: Name of the queue processing these changes. Used to look up job configuration.
// - changes: List of changes to process. Order may be significant for dependencies.
//
// Returns:
// - string: Unique build ID that can be used with Poll and CancelBuild methods
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

replace Poll with BuildStatus or for checking the build status

// - error: ErrInvalidRequest if validation fails
Schedule(ctx context.Context, queueName string, changes []entity.BuildChange) (string, error)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does it make sense to say just "TriggerBuild", whether it's done right away vs scheduled...it's impl details?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is another constraint that we need to put on the implementation by explicitly stating the expected behavior in comments. Which is: implementation should be asynchronous, return fast, and Poll is to be used to check statuses.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

which means that "done right away" is not an option to the implementation.
If we believe that the implementation can deduce builds which already happen based on provided change, it can return the build status along with the buildID.
"123, accepted, nil"
"124, succeeded, nil"
So SQ will save on not doing the next polling call.


// Poll retrieves the current status of a build from the CI provider.
// This is a synchronous call that queries the provider's API.
//
// Parameters:
// - buildID: Build ID string
//
// Returns:
// - BuildStatus: Current state of the build
// - BuildMetadata: Additional metadata about the build (e.g., build URL, commit SHA, duration)
// - error: ErrBuildNotFound if the build doesn't exist
Poll(ctx context.Context, buildID string) (entity.BuildStatus, entity.BuildMetadata, error)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rename this to `BuildStatus".. poller is just an impl detail?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not going to nitpick names, but here again is where behavior matters. We can say in the comments, that unlike "Schedule" this implementation might be synchronous and potentially lengthy.
Full abstractions are not efficient unless they are implemented according to expected usage.
Unfortunately, the language itself has limited ways to contract this so we have to rely on conventions (explicit in comments).


// CancelBuild requests cancellation of a build.
//
// This operation is asynchronous and does not wait for the cancellation to complete.
// The implementation should initiate the cancellation request with the CI provider
// and return immediately.
//
// SubmitQueue will mark the build as BuildStatusCancelled immediately without waiting
// for confirmation from the build system.
//
// Parameters:
// - buildID: Build ID string
//
// Returns:
// - error: ErrBuildNotFound if the build doesn't exist
CancelBuild(ctx context.Context, buildID string) error
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can drop Build and say Cancel


// Close gracefully shuts down the build manager.
// Implementations should close HTTP clients and clean up resources.
// After Close is called, all other methods should return errors.
// Close is idempotent and safe to call multiple times.
Close() error
}
41 changes: 41 additions & 0 deletions extension/build/errors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package build

import (
"errors"
"fmt"
)

// ErrBuildNotFound is returned when a build does not exist in the CI provider.
// This can occur if:
// - The build ID is invalid or malformed
// - The build was deleted from the provider
// - The build never existed
var ErrBuildNotFound = errors.New("build not found")

// IsBuildNotFound returns true if any error in the error chain is ErrBuildNotFound.
func IsBuildNotFound(err error) bool {
return errors.Is(err, ErrBuildNotFound)
}

// WrapBuildNotFound wraps ErrBuildNotFound with the original error from the build provider.
// This preserves the original error details while marking it as a "not found" error.
func WrapBuildNotFound(err error) error {
return fmt.Errorf("%w: %w", ErrBuildNotFound, err)
}

// ErrInvalidRequest is returned when Schedule parameters fail validation.
// This can occur when:
// - queueName is empty or invalid
// - changes list is empty
// - changes contain invalid Change entities or Actions
var ErrInvalidRequest = errors.New("invalid request")

// IsInvalidRequest returns true if any error in the error chain is ErrInvalidRequest.
func IsInvalidRequest(err error) bool {
return errors.Is(err, ErrInvalidRequest)
}

// WrapInvalidRequest wraps ErrInvalidRequest with a descriptive error message.
func WrapInvalidRequest(err error) error {
return fmt.Errorf("%w: %w", ErrInvalidRequest, err)
}
28 changes: 28 additions & 0 deletions extension/build/mock/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
load("@rules_go//extras:gomock.bzl", "gomock")
load("@rules_go//go:def.bzl", "go_library")

_MOCKGEN = "@org_uber_go_mock//mockgen"

gomock(
name = "mock_build_manager_src",
out = "build_manager_mock.go",
mockgen_tool = _MOCKGEN,
package = "mock",
source = "//extension/build:build_manager.go",
source_importpath = "github.com/uber/submitqueue/extension/build",
)

# gazelle:ignore
go_library(
name = "mock",
srcs = [
":mock_build_manager_src",
],
importpath = "github.com/uber/submitqueue/extension/build/mock",
visibility = ["//visibility:public"],
deps = [
"//entity",
"//extension/build",
"@org_uber_go_mock//gomock",
],
)