From ae9a9469bbf292eec7aa8a34b8874b6a908a7b14 Mon Sep 17 00:00:00 2001 From: "M. Adil Fayyaz" <62440954+AdilFayyaz@users.noreply.github.com> Date: Mon, 6 Apr 2026 15:23:35 -0700 Subject: [PATCH 01/17] add: dep Signed-off-by: M. Adil Fayyaz <62440954+AdilFayyaz@users.noreply.github.com> --- actions/k8s/app_scheme.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 actions/k8s/app_scheme.go diff --git a/actions/k8s/app_scheme.go b/actions/k8s/app_scheme.go new file mode 100644 index 0000000000..9f3d450d49 --- /dev/null +++ b/actions/k8s/app_scheme.go @@ -0,0 +1,13 @@ +package k8s + +import ( + "k8s.io/client-go/kubernetes/scheme" + servingv1 "knative.dev/serving/pkg/apis/serving/v1" +) + +// InitAppScheme registers Knative Serving types (Service, Route, Configuration, Revision) +// into the client-go scheme so that the K8s client can manage KService CRDs. +// Must be called before creating any K8s clients that interact with apps. +func InitAppScheme() error { + return servingv1.AddToScheme(scheme.Scheme) +} From a2cc59fcb18ac736897e9cf430c7ae8802efd016 Mon Sep 17 00:00:00 2001 From: "M. Adil Fayyaz" <62440954+AdilFayyaz@users.noreply.github.com> Date: Mon, 6 Apr 2026 21:09:02 -0700 Subject: [PATCH 02/17] restructure Signed-off-by: M. Adil Fayyaz <62440954+AdilFayyaz@users.noreply.github.com> --- actions/k8s/app_scheme.go | 13 ------------- flytestdlib/app/db.go | 0 2 files changed, 13 deletions(-) delete mode 100644 actions/k8s/app_scheme.go create mode 100644 flytestdlib/app/db.go diff --git a/actions/k8s/app_scheme.go b/actions/k8s/app_scheme.go deleted file mode 100644 index 9f3d450d49..0000000000 --- a/actions/k8s/app_scheme.go +++ /dev/null @@ -1,13 +0,0 @@ -package k8s - -import ( - "k8s.io/client-go/kubernetes/scheme" - servingv1 "knative.dev/serving/pkg/apis/serving/v1" -) - -// InitAppScheme registers Knative Serving types (Service, Route, Configuration, Revision) -// into the client-go scheme so that the K8s client can manage KService CRDs. -// Must be called before creating any K8s clients that interact with apps. -func InitAppScheme() error { - return servingv1.AddToScheme(scheme.Scheme) -} diff --git a/flytestdlib/app/db.go b/flytestdlib/app/db.go new file mode 100644 index 0000000000..e69de29bb2 From 3d8f5e1a2bcf44bcf4148d9dc048e0a76c0ded41 Mon Sep 17 00:00:00 2001 From: "M. Adil Fayyaz" <62440954+AdilFayyaz@users.noreply.github.com> Date: Tue, 7 Apr 2026 11:17:47 -0700 Subject: [PATCH 03/17] wip Signed-off-by: M. Adil Fayyaz <62440954+AdilFayyaz@users.noreply.github.com> --- app/app.go | 398 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 398 insertions(+) create mode 100644 app/app.go diff --git a/app/app.go b/app/app.go new file mode 100644 index 0000000000..44cd9c49c7 --- /dev/null +++ b/app/app.go @@ -0,0 +1,398 @@ +package app + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "strings" + + corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + servingv1 "knative.dev/serving/pkg/apis/serving/v1" + "google.golang.org/protobuf/proto" + timestamppb "google.golang.org/protobuf/types/known/timestamppb" + ctrlcache "sigs.k8s.io/controller-runtime/pkg/cache" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/flyteorg/flyte/v2/actions/config" + "github.com/flyteorg/flyte/v2/flytestdlib/logger" + flyteapp "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/app" +) + +const ( + labelAppManaged = "flyte.org/app-managed" + labelProject = "flyte.org/project" + labelDomain = "flyte.org/domain" + labelAppName = "flyte.org/app-name" + + annotationSpecSHA = "flyte.org/spec-sha" + annotationAppID = "flyte.org/app-id" + + maxScaleZero = "0" + + // maxKServiceNameLen is the Kubernetes DNS label limit. + maxKServiceNameLen = 63 +) + +// AppK8sClientInterface defines the KService lifecycle operations for the App service. +type AppK8sClientInterface interface { + // Deploy creates or updates the KService for the given app. Idempotent — skips + // the update if the spec SHA annotation is unchanged. + Deploy(ctx context.Context, app *flyteapp.App) error + + // Stop scales the KService to zero by setting max-scale=0. The KService CRD + // is kept so the app can be restarted later. + Stop(ctx context.Context, appID *flyteapp.Identifier) error + + // GetStatus reads the KService and maps its conditions to a DeploymentStatus. + // Returns a Status with STOPPED if the KService does not exist. + GetStatus(ctx context.Context, appID *flyteapp.Identifier) (*flyteapp.Status, error) + + // List returns all apps (spec + live status) for the given project/domain scope. + List(ctx context.Context, project, domain string) ([]*flyteapp.App, error) +} + +// AppK8sClient implements AppK8sClientInterface using controller-runtime. +type AppK8sClient struct { + k8sClient client.WithWatch + cache ctrlcache.Cache + namespace string + cfg *config.AppConfig +} + +// NewAppK8sClient creates a new AppK8sClient. +func NewAppK8sClient(k8sClient client.WithWatch, cache ctrlcache.Cache, cfg *config.AppConfig) *AppK8sClient { + return &AppK8sClient{ + k8sClient: k8sClient, + cache: cache, + namespace: cfg.Namespace, + cfg: cfg, + } +} + +// Deploy creates or updates the KService for the given app. +func (c *AppK8sClient) Deploy(ctx context.Context, app *flyteapp.App) error { + appID := app.GetMetadata().GetId() + name := kserviceName(appID) + + ksvc, err := c.buildKService(app) + if err != nil { + return fmt.Errorf("failed to build KService for app %s: %w", name, err) + } + + existing := &servingv1.Service{} + err = c.k8sClient.Get(ctx, client.ObjectKey{Name: name, Namespace: c.namespace}, existing) + if k8serrors.IsNotFound(err) { + if err := c.k8sClient.Create(ctx, ksvc); err != nil { + return fmt.Errorf("failed to create KService %s: %w", name, err) + } + logger.Infof(ctx, "Created KService %s/%s", c.namespace, name) + return nil + } + if err != nil { + return fmt.Errorf("failed to get KService %s: %w", name, err) + } + + // Skip update if spec has not changed. + if existing.Annotations[annotationSpecSHA] == ksvc.Annotations[annotationSpecSHA] { + logger.Debugf(ctx, "KService %s/%s spec unchanged, skipping update", c.namespace, name) + return nil + } + + existing.Spec = ksvc.Spec + existing.Labels = ksvc.Labels + existing.Annotations = ksvc.Annotations + if err := c.k8sClient.Update(ctx, existing); err != nil { + return fmt.Errorf("failed to update KService %s: %w", name, err) + } + logger.Infof(ctx, "Updated KService %s/%s", c.namespace, name) + return nil +} + +// Stop sets max-scale=0 on the KService, scaling it to zero without deleting it. +func (c *AppK8sClient) Stop(ctx context.Context, appID *flyteapp.Identifier) error { + name := kserviceName(appID) + patch := []byte(`{"spec":{"template":{"metadata":{"annotations":{"autoscaling.knative.dev/max-scale":"0"}}}}}`) + ksvc := &servingv1.Service{} + ksvc.Name = name + ksvc.Namespace = c.namespace + if err := c.k8sClient.Patch(ctx, ksvc, client.RawPatch(types.MergePatchType, patch)); err != nil { + if k8serrors.IsNotFound(err) { + // Already stopped/deleted — treat as success. + return nil + } + return fmt.Errorf("failed to patch KService %s to stop: %w", name, err) + } + logger.Infof(ctx, "Stopped KService %s/%s (max-scale=0)", c.namespace, name) + return nil +} + +// GetStatus reads the KService and maps its conditions to a flyteapp.Status proto. +func (c *AppK8sClient) GetStatus(ctx context.Context, appID *flyteapp.Identifier) (*flyteapp.Status, error) { + name := kserviceName(appID) + ksvc := &servingv1.Service{} + if err := c.k8sClient.Get(ctx, client.ObjectKey{Name: name, Namespace: c.namespace}, ksvc); err != nil { + if k8serrors.IsNotFound(err) { + return statusWithPhase(flyteapp.Status_DEPLOYMENT_STATUS_STOPPED, "KService not found"), nil + } + return nil, fmt.Errorf("failed to get KService %s: %w", name, err) + } + return kserviceToStatus(ksvc), nil +} + +// List returns all apps for the given project/domain by listing KServices with label selectors. +func (c *AppK8sClient) List(ctx context.Context, project, domain string) ([]*flyteapp.App, error) { + list := &servingv1.ServiceList{} + if err := c.k8sClient.List(ctx, list, + client.InNamespace(c.namespace), + client.MatchingLabels{ + labelProject: project, + labelDomain: domain, + }, + ); err != nil { + return nil, fmt.Errorf("failed to list KServices for %s/%s: %w", project, domain, err) + } + + apps := make([]*flyteapp.App, 0, len(list.Items)) + for i := range list.Items { + a, err := kserviceToApp(&list.Items[i]) + if err != nil { + logger.Warnf(ctx, "Skipping KService %s: failed to convert to app: %v", list.Items[i].Name, err) + continue + } + apps = append(apps, a) + } + return apps, nil +} + +// --- Helpers --- + +// kserviceName builds the KService name from an app identifier. +// Format: "{project}-{domain}-{name}", truncated to 63 chars. +func kserviceName(id *flyteapp.Identifier) string { + name := fmt.Sprintf("%s-%s-%s", id.GetProject(), id.GetDomain(), id.GetName()) + if len(name) > maxKServiceNameLen { + name = name[:maxKServiceNameLen] + } + return strings.ToLower(name) +} + +// specSHA computes a SHA256 digest of the serialized App Spec proto. +func specSHA(spec *flyteapp.Spec) (string, error) { + b, err := proto.Marshal(spec) + if err != nil { + return "", fmt.Errorf("failed to marshal spec: %w", err) + } + sum := sha256.Sum256(b) + return hex.EncodeToString(sum[:8]), nil // 8 bytes = 16 hex chars, enough for change detection +} + +// buildKService constructs a Knative Service manifest from an App proto. +func (c *AppK8sClient) buildKService(app *flyteapp.App) (*servingv1.Service, error) { + appID := app.GetMetadata().GetId() + spec := app.GetSpec() + name := kserviceName(appID) + + sha, err := specSHA(spec) + if err != nil { + return nil, err + } + + podSpec, err := buildPodSpec(spec) + if err != nil { + return nil, err + } + + templateAnnotations := buildAutoscalingAnnotations(spec, c.cfg) + + timeoutSecs := c.cfg.DefaultRequestTimeout.Seconds() + if t := spec.GetTimeouts().GetRequestTimeout(); t != nil { + timeoutSecs = t.AsDuration().Seconds() + if timeoutSecs > c.cfg.MaxRequestTimeout.Seconds() { + timeoutSecs = c.cfg.MaxRequestTimeout.Seconds() + } + } + timeoutSecsInt := int64(timeoutSecs) + + ksvc := &servingv1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: c.namespace, + Labels: map[string]string{ + labelAppManaged: "true", + labelProject: appID.GetProject(), + labelDomain: appID.GetDomain(), + labelAppName: appID.GetName(), + }, + Annotations: map[string]string{ + annotationSpecSHA: sha, + annotationAppID: fmt.Sprintf("%s/%s/%s", appID.GetProject(), appID.GetDomain(), appID.GetName()), + }, + }, + Spec: servingv1.ServiceSpec{ + ConfigurationSpec: servingv1.ConfigurationSpec{ + Template: servingv1.RevisionTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: templateAnnotations, + }, + Spec: servingv1.RevisionSpec{ + PodSpec: podSpec, + TimeoutSeconds: &timeoutSecsInt, + }, + }, + }, + }, + } + return ksvc, nil +} + +// buildPodSpec constructs a corev1.PodSpec from an App Spec. +// Supports Container payload only for now; K8sPod support can be added in a follow-up. +func buildPodSpec(spec *flyteapp.Spec) (corev1.PodSpec, error) { + switch p := spec.GetAppPayload().(type) { + case *flyteapp.Spec_Container: + c := p.Container + container := corev1.Container{ + Name: "app", + Image: c.GetImage(), + Args: c.GetArgs(), + } + for _, e := range c.GetEnv() { + container.Env = append(container.Env, corev1.EnvVar{ + Name: e.GetKey(), + Value: e.GetValue(), + }) + } + return corev1.PodSpec{Containers: []corev1.Container{container}}, nil + + case *flyteapp.Spec_Pod: + // K8sPod payloads are not yet supported — the pod spec serialization + // from flyteplugins is needed for a complete implementation. + return corev1.PodSpec{}, fmt.Errorf("K8sPod app payload is not yet supported") + + default: + return corev1.PodSpec{}, fmt.Errorf("app spec has no payload (container or pod required)") + } +} + +// buildAutoscalingAnnotations returns the Knative autoscaling annotations for the revision template. +func buildAutoscalingAnnotations(spec *flyteapp.Spec, cfg *config.AppConfig) map[string]string { + annotations := map[string]string{} + autoscaling := spec.GetAutoscaling() + if autoscaling == nil { + return annotations + } + + if r := autoscaling.GetReplicas(); r != nil { + annotations["autoscaling.knative.dev/min-scale"] = fmt.Sprintf("%d", r.GetMin()) + annotations["autoscaling.knative.dev/max-scale"] = fmt.Sprintf("%d", r.GetMax()) + } + + if m := autoscaling.GetScalingMetric(); m != nil { + switch metric := m.GetMetric().(type) { + case *flyteapp.ScalingMetric_RequestRate: + annotations["autoscaling.knative.dev/metric"] = "rps" + annotations["autoscaling.knative.dev/target"] = fmt.Sprintf("%d", metric.RequestRate.GetTargetValue()) + case *flyteapp.ScalingMetric_Concurrency: + annotations["autoscaling.knative.dev/metric"] = "concurrency" + annotations["autoscaling.knative.dev/target"] = fmt.Sprintf("%d", metric.Concurrency.GetTargetValue()) + } + } + + if p := autoscaling.GetScaledownPeriod(); p != nil { + annotations["autoscaling.knative.dev/window"] = p.AsDuration().String() + } + + return annotations +} + +// statusWithPhase builds a flyteapp.Status with a single Condition set to the given phase. +func statusWithPhase(phase flyteapp.Status_DeploymentStatus, message string) *flyteapp.Status { + return &flyteapp.Status{ + Conditions: []*flyteapp.Condition{ + { + DeploymentStatus: phase, + Message: message, + LastTransitionTime: timestamppb.Now(), + }, + }, + } +} + +// kserviceToStatus maps a KService's conditions to a flyteapp.Status proto. +func kserviceToStatus(ksvc *servingv1.Service) *flyteapp.Status { + var phase flyteapp.Status_DeploymentStatus + var message string + + // Check if max-scale=0 is set — explicitly stopped by the control plane. + if ann := ksvc.Spec.Template.Annotations; ann != nil { + if ann["autoscaling.knative.dev/max-scale"] == maxScaleZero { + phase = flyteapp.Status_DEPLOYMENT_STATUS_STOPPED + message = "App scaled to zero" + } + } + + if phase == flyteapp.Status_DEPLOYMENT_STATUS_UNSPECIFIED { + switch { + case ksvc.IsReady(): + phase = flyteapp.Status_DEPLOYMENT_STATUS_ACTIVE + case ksvc.IsFailed(): + phase = flyteapp.Status_DEPLOYMENT_STATUS_FAILED + if c := ksvc.Status.GetCondition(servingv1.ServiceConditionReady); c != nil { + message = c.Message + } + case ksvc.Status.LatestCreatedRevisionName != ksvc.Status.LatestReadyRevisionName: + phase = flyteapp.Status_DEPLOYMENT_STATUS_DEPLOYING + default: + phase = flyteapp.Status_DEPLOYMENT_STATUS_PENDING + } + } + + status := statusWithPhase(phase, message) + + // Populate ingress URL from KService route status. + if url := ksvc.Status.URL; url != nil { + status.Ingress = &flyteapp.Ingress{ + PublicUrl: url.String(), + } + } + + // Populate current replica count and K8s namespace metadata. + status.CurrentReplicas = uint32(len(ksvc.Status.Traffic)) + status.K8SMetadata = &flyteapp.K8SMetadata{ + Namespace: ksvc.Namespace, + } + + return status +} + +// kserviceToApp reconstructs a flyteapp.App from a KService by reading the +// app identifier from annotations and the live status from KService conditions. +func kserviceToApp(ksvc *servingv1.Service) (*flyteapp.App, error) { + appIDStr, ok := ksvc.Annotations[annotationAppID] + if !ok { + return nil, fmt.Errorf("KService %s missing %s annotation", ksvc.Name, annotationAppID) + } + + // annotation format: "{project}/{domain}/{name}" + parts := strings.SplitN(appIDStr, "/", 3) + if len(parts) != 3 { + return nil, fmt.Errorf("KService %s has malformed %s annotation: %q", ksvc.Name, annotationAppID, appIDStr) + } + + appID := &flyteapp.Identifier{ + Project: parts[0], + Domain: parts[1], + Name: parts[2], + } + + return &flyteapp.App{ + Metadata: &flyteapp.Meta{ + Id: appID, + }, + Status: kserviceToStatus(ksvc), + }, nil +} From dc0ec0561b10b642f9b47b54c745fb1d9b372676 Mon Sep 17 00:00:00 2001 From: "M. Adil Fayyaz" <62440954+AdilFayyaz@users.noreply.github.com> Date: Tue, 7 Apr 2026 11:23:07 -0700 Subject: [PATCH 04/17] restruct Signed-off-by: M. Adil Fayyaz <62440954+AdilFayyaz@users.noreply.github.com> --- {app => actions/k8s}/app.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename {app => actions/k8s}/app.go (99%) diff --git a/app/app.go b/actions/k8s/app.go similarity index 99% rename from app/app.go rename to actions/k8s/app.go index 44cd9c49c7..b47a5bcafa 100644 --- a/app/app.go +++ b/actions/k8s/app.go @@ -1,4 +1,4 @@ -package app +package k8s import ( "context" From 75966ec3370c58c7373f5ddf62bfdd4fa8ae02a9 Mon Sep 17 00:00:00 2001 From: "M. Adil Fayyaz" <62440954+AdilFayyaz@users.noreply.github.com> Date: Tue, 7 Apr 2026 12:44:05 -0700 Subject: [PATCH 05/17] fix Signed-off-by: M. Adil Fayyaz <62440954+AdilFayyaz@users.noreply.github.com> --- actions/k8s/app.go | 398 ---------------------------- app/internal/k8s/app_client.go | 174 +++++------- app/internal/k8s/app_client_test.go | 209 ++++----------- 3 files changed, 127 insertions(+), 654 deletions(-) delete mode 100644 actions/k8s/app.go diff --git a/actions/k8s/app.go b/actions/k8s/app.go deleted file mode 100644 index b47a5bcafa..0000000000 --- a/actions/k8s/app.go +++ /dev/null @@ -1,398 +0,0 @@ -package k8s - -import ( - "context" - "crypto/sha256" - "encoding/hex" - "fmt" - "strings" - - corev1 "k8s.io/api/core/v1" - k8serrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - servingv1 "knative.dev/serving/pkg/apis/serving/v1" - "google.golang.org/protobuf/proto" - timestamppb "google.golang.org/protobuf/types/known/timestamppb" - ctrlcache "sigs.k8s.io/controller-runtime/pkg/cache" - "sigs.k8s.io/controller-runtime/pkg/client" - - "github.com/flyteorg/flyte/v2/actions/config" - "github.com/flyteorg/flyte/v2/flytestdlib/logger" - flyteapp "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/app" -) - -const ( - labelAppManaged = "flyte.org/app-managed" - labelProject = "flyte.org/project" - labelDomain = "flyte.org/domain" - labelAppName = "flyte.org/app-name" - - annotationSpecSHA = "flyte.org/spec-sha" - annotationAppID = "flyte.org/app-id" - - maxScaleZero = "0" - - // maxKServiceNameLen is the Kubernetes DNS label limit. - maxKServiceNameLen = 63 -) - -// AppK8sClientInterface defines the KService lifecycle operations for the App service. -type AppK8sClientInterface interface { - // Deploy creates or updates the KService for the given app. Idempotent — skips - // the update if the spec SHA annotation is unchanged. - Deploy(ctx context.Context, app *flyteapp.App) error - - // Stop scales the KService to zero by setting max-scale=0. The KService CRD - // is kept so the app can be restarted later. - Stop(ctx context.Context, appID *flyteapp.Identifier) error - - // GetStatus reads the KService and maps its conditions to a DeploymentStatus. - // Returns a Status with STOPPED if the KService does not exist. - GetStatus(ctx context.Context, appID *flyteapp.Identifier) (*flyteapp.Status, error) - - // List returns all apps (spec + live status) for the given project/domain scope. - List(ctx context.Context, project, domain string) ([]*flyteapp.App, error) -} - -// AppK8sClient implements AppK8sClientInterface using controller-runtime. -type AppK8sClient struct { - k8sClient client.WithWatch - cache ctrlcache.Cache - namespace string - cfg *config.AppConfig -} - -// NewAppK8sClient creates a new AppK8sClient. -func NewAppK8sClient(k8sClient client.WithWatch, cache ctrlcache.Cache, cfg *config.AppConfig) *AppK8sClient { - return &AppK8sClient{ - k8sClient: k8sClient, - cache: cache, - namespace: cfg.Namespace, - cfg: cfg, - } -} - -// Deploy creates or updates the KService for the given app. -func (c *AppK8sClient) Deploy(ctx context.Context, app *flyteapp.App) error { - appID := app.GetMetadata().GetId() - name := kserviceName(appID) - - ksvc, err := c.buildKService(app) - if err != nil { - return fmt.Errorf("failed to build KService for app %s: %w", name, err) - } - - existing := &servingv1.Service{} - err = c.k8sClient.Get(ctx, client.ObjectKey{Name: name, Namespace: c.namespace}, existing) - if k8serrors.IsNotFound(err) { - if err := c.k8sClient.Create(ctx, ksvc); err != nil { - return fmt.Errorf("failed to create KService %s: %w", name, err) - } - logger.Infof(ctx, "Created KService %s/%s", c.namespace, name) - return nil - } - if err != nil { - return fmt.Errorf("failed to get KService %s: %w", name, err) - } - - // Skip update if spec has not changed. - if existing.Annotations[annotationSpecSHA] == ksvc.Annotations[annotationSpecSHA] { - logger.Debugf(ctx, "KService %s/%s spec unchanged, skipping update", c.namespace, name) - return nil - } - - existing.Spec = ksvc.Spec - existing.Labels = ksvc.Labels - existing.Annotations = ksvc.Annotations - if err := c.k8sClient.Update(ctx, existing); err != nil { - return fmt.Errorf("failed to update KService %s: %w", name, err) - } - logger.Infof(ctx, "Updated KService %s/%s", c.namespace, name) - return nil -} - -// Stop sets max-scale=0 on the KService, scaling it to zero without deleting it. -func (c *AppK8sClient) Stop(ctx context.Context, appID *flyteapp.Identifier) error { - name := kserviceName(appID) - patch := []byte(`{"spec":{"template":{"metadata":{"annotations":{"autoscaling.knative.dev/max-scale":"0"}}}}}`) - ksvc := &servingv1.Service{} - ksvc.Name = name - ksvc.Namespace = c.namespace - if err := c.k8sClient.Patch(ctx, ksvc, client.RawPatch(types.MergePatchType, patch)); err != nil { - if k8serrors.IsNotFound(err) { - // Already stopped/deleted — treat as success. - return nil - } - return fmt.Errorf("failed to patch KService %s to stop: %w", name, err) - } - logger.Infof(ctx, "Stopped KService %s/%s (max-scale=0)", c.namespace, name) - return nil -} - -// GetStatus reads the KService and maps its conditions to a flyteapp.Status proto. -func (c *AppK8sClient) GetStatus(ctx context.Context, appID *flyteapp.Identifier) (*flyteapp.Status, error) { - name := kserviceName(appID) - ksvc := &servingv1.Service{} - if err := c.k8sClient.Get(ctx, client.ObjectKey{Name: name, Namespace: c.namespace}, ksvc); err != nil { - if k8serrors.IsNotFound(err) { - return statusWithPhase(flyteapp.Status_DEPLOYMENT_STATUS_STOPPED, "KService not found"), nil - } - return nil, fmt.Errorf("failed to get KService %s: %w", name, err) - } - return kserviceToStatus(ksvc), nil -} - -// List returns all apps for the given project/domain by listing KServices with label selectors. -func (c *AppK8sClient) List(ctx context.Context, project, domain string) ([]*flyteapp.App, error) { - list := &servingv1.ServiceList{} - if err := c.k8sClient.List(ctx, list, - client.InNamespace(c.namespace), - client.MatchingLabels{ - labelProject: project, - labelDomain: domain, - }, - ); err != nil { - return nil, fmt.Errorf("failed to list KServices for %s/%s: %w", project, domain, err) - } - - apps := make([]*flyteapp.App, 0, len(list.Items)) - for i := range list.Items { - a, err := kserviceToApp(&list.Items[i]) - if err != nil { - logger.Warnf(ctx, "Skipping KService %s: failed to convert to app: %v", list.Items[i].Name, err) - continue - } - apps = append(apps, a) - } - return apps, nil -} - -// --- Helpers --- - -// kserviceName builds the KService name from an app identifier. -// Format: "{project}-{domain}-{name}", truncated to 63 chars. -func kserviceName(id *flyteapp.Identifier) string { - name := fmt.Sprintf("%s-%s-%s", id.GetProject(), id.GetDomain(), id.GetName()) - if len(name) > maxKServiceNameLen { - name = name[:maxKServiceNameLen] - } - return strings.ToLower(name) -} - -// specSHA computes a SHA256 digest of the serialized App Spec proto. -func specSHA(spec *flyteapp.Spec) (string, error) { - b, err := proto.Marshal(spec) - if err != nil { - return "", fmt.Errorf("failed to marshal spec: %w", err) - } - sum := sha256.Sum256(b) - return hex.EncodeToString(sum[:8]), nil // 8 bytes = 16 hex chars, enough for change detection -} - -// buildKService constructs a Knative Service manifest from an App proto. -func (c *AppK8sClient) buildKService(app *flyteapp.App) (*servingv1.Service, error) { - appID := app.GetMetadata().GetId() - spec := app.GetSpec() - name := kserviceName(appID) - - sha, err := specSHA(spec) - if err != nil { - return nil, err - } - - podSpec, err := buildPodSpec(spec) - if err != nil { - return nil, err - } - - templateAnnotations := buildAutoscalingAnnotations(spec, c.cfg) - - timeoutSecs := c.cfg.DefaultRequestTimeout.Seconds() - if t := spec.GetTimeouts().GetRequestTimeout(); t != nil { - timeoutSecs = t.AsDuration().Seconds() - if timeoutSecs > c.cfg.MaxRequestTimeout.Seconds() { - timeoutSecs = c.cfg.MaxRequestTimeout.Seconds() - } - } - timeoutSecsInt := int64(timeoutSecs) - - ksvc := &servingv1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: c.namespace, - Labels: map[string]string{ - labelAppManaged: "true", - labelProject: appID.GetProject(), - labelDomain: appID.GetDomain(), - labelAppName: appID.GetName(), - }, - Annotations: map[string]string{ - annotationSpecSHA: sha, - annotationAppID: fmt.Sprintf("%s/%s/%s", appID.GetProject(), appID.GetDomain(), appID.GetName()), - }, - }, - Spec: servingv1.ServiceSpec{ - ConfigurationSpec: servingv1.ConfigurationSpec{ - Template: servingv1.RevisionTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Annotations: templateAnnotations, - }, - Spec: servingv1.RevisionSpec{ - PodSpec: podSpec, - TimeoutSeconds: &timeoutSecsInt, - }, - }, - }, - }, - } - return ksvc, nil -} - -// buildPodSpec constructs a corev1.PodSpec from an App Spec. -// Supports Container payload only for now; K8sPod support can be added in a follow-up. -func buildPodSpec(spec *flyteapp.Spec) (corev1.PodSpec, error) { - switch p := spec.GetAppPayload().(type) { - case *flyteapp.Spec_Container: - c := p.Container - container := corev1.Container{ - Name: "app", - Image: c.GetImage(), - Args: c.GetArgs(), - } - for _, e := range c.GetEnv() { - container.Env = append(container.Env, corev1.EnvVar{ - Name: e.GetKey(), - Value: e.GetValue(), - }) - } - return corev1.PodSpec{Containers: []corev1.Container{container}}, nil - - case *flyteapp.Spec_Pod: - // K8sPod payloads are not yet supported — the pod spec serialization - // from flyteplugins is needed for a complete implementation. - return corev1.PodSpec{}, fmt.Errorf("K8sPod app payload is not yet supported") - - default: - return corev1.PodSpec{}, fmt.Errorf("app spec has no payload (container or pod required)") - } -} - -// buildAutoscalingAnnotations returns the Knative autoscaling annotations for the revision template. -func buildAutoscalingAnnotations(spec *flyteapp.Spec, cfg *config.AppConfig) map[string]string { - annotations := map[string]string{} - autoscaling := spec.GetAutoscaling() - if autoscaling == nil { - return annotations - } - - if r := autoscaling.GetReplicas(); r != nil { - annotations["autoscaling.knative.dev/min-scale"] = fmt.Sprintf("%d", r.GetMin()) - annotations["autoscaling.knative.dev/max-scale"] = fmt.Sprintf("%d", r.GetMax()) - } - - if m := autoscaling.GetScalingMetric(); m != nil { - switch metric := m.GetMetric().(type) { - case *flyteapp.ScalingMetric_RequestRate: - annotations["autoscaling.knative.dev/metric"] = "rps" - annotations["autoscaling.knative.dev/target"] = fmt.Sprintf("%d", metric.RequestRate.GetTargetValue()) - case *flyteapp.ScalingMetric_Concurrency: - annotations["autoscaling.knative.dev/metric"] = "concurrency" - annotations["autoscaling.knative.dev/target"] = fmt.Sprintf("%d", metric.Concurrency.GetTargetValue()) - } - } - - if p := autoscaling.GetScaledownPeriod(); p != nil { - annotations["autoscaling.knative.dev/window"] = p.AsDuration().String() - } - - return annotations -} - -// statusWithPhase builds a flyteapp.Status with a single Condition set to the given phase. -func statusWithPhase(phase flyteapp.Status_DeploymentStatus, message string) *flyteapp.Status { - return &flyteapp.Status{ - Conditions: []*flyteapp.Condition{ - { - DeploymentStatus: phase, - Message: message, - LastTransitionTime: timestamppb.Now(), - }, - }, - } -} - -// kserviceToStatus maps a KService's conditions to a flyteapp.Status proto. -func kserviceToStatus(ksvc *servingv1.Service) *flyteapp.Status { - var phase flyteapp.Status_DeploymentStatus - var message string - - // Check if max-scale=0 is set — explicitly stopped by the control plane. - if ann := ksvc.Spec.Template.Annotations; ann != nil { - if ann["autoscaling.knative.dev/max-scale"] == maxScaleZero { - phase = flyteapp.Status_DEPLOYMENT_STATUS_STOPPED - message = "App scaled to zero" - } - } - - if phase == flyteapp.Status_DEPLOYMENT_STATUS_UNSPECIFIED { - switch { - case ksvc.IsReady(): - phase = flyteapp.Status_DEPLOYMENT_STATUS_ACTIVE - case ksvc.IsFailed(): - phase = flyteapp.Status_DEPLOYMENT_STATUS_FAILED - if c := ksvc.Status.GetCondition(servingv1.ServiceConditionReady); c != nil { - message = c.Message - } - case ksvc.Status.LatestCreatedRevisionName != ksvc.Status.LatestReadyRevisionName: - phase = flyteapp.Status_DEPLOYMENT_STATUS_DEPLOYING - default: - phase = flyteapp.Status_DEPLOYMENT_STATUS_PENDING - } - } - - status := statusWithPhase(phase, message) - - // Populate ingress URL from KService route status. - if url := ksvc.Status.URL; url != nil { - status.Ingress = &flyteapp.Ingress{ - PublicUrl: url.String(), - } - } - - // Populate current replica count and K8s namespace metadata. - status.CurrentReplicas = uint32(len(ksvc.Status.Traffic)) - status.K8SMetadata = &flyteapp.K8SMetadata{ - Namespace: ksvc.Namespace, - } - - return status -} - -// kserviceToApp reconstructs a flyteapp.App from a KService by reading the -// app identifier from annotations and the live status from KService conditions. -func kserviceToApp(ksvc *servingv1.Service) (*flyteapp.App, error) { - appIDStr, ok := ksvc.Annotations[annotationAppID] - if !ok { - return nil, fmt.Errorf("KService %s missing %s annotation", ksvc.Name, annotationAppID) - } - - // annotation format: "{project}/{domain}/{name}" - parts := strings.SplitN(appIDStr, "/", 3) - if len(parts) != 3 { - return nil, fmt.Errorf("KService %s has malformed %s annotation: %q", ksvc.Name, annotationAppID, appIDStr) - } - - appID := &flyteapp.Identifier{ - Project: parts[0], - Domain: parts[1], - Name: parts[2], - } - - return &flyteapp.App{ - Metadata: &flyteapp.Meta{ - Id: appID, - }, - Status: kserviceToStatus(ksvc), - }, nil -} diff --git a/app/internal/k8s/app_client.go b/app/internal/k8s/app_client.go index ce24654367..bd6b189b7d 100644 --- a/app/internal/k8s/app_client.go +++ b/app/internal/k8s/app_client.go @@ -18,7 +18,7 @@ import ( ctrlcache "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/client" - "github.com/flyteorg/flyte/v2/app/config" + "github.com/flyteorg/flyte/v2/actions/config" "github.com/flyteorg/flyte/v2/flytestdlib/logger" flyteapp "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/app" ) @@ -49,14 +49,11 @@ type AppK8sClientInterface interface { Stop(ctx context.Context, appID *flyteapp.Identifier) error // GetStatus reads the KService and maps its conditions to a DeploymentStatus. - // Returns a not-found error (checkable with k8serrors.IsNotFound) if the KService does not exist. + // Returns a Status with STOPPED if the KService does not exist. GetStatus(ctx context.Context, appID *flyteapp.Identifier) (*flyteapp.Status, error) - // List returns apps for the given project/domain scope with optional pagination. - // If appName is non-empty, only the app with that name is returned. - // limit=0 means no limit. token is the K8s continue token from a previous call. - // Returns the apps, the continue token for the next page (empty if last page), and any error. - List(ctx context.Context, project, domain, appName string, limit uint32, token string) ([]*flyteapp.App, string, error) + // List returns all apps (spec + live status) for the given project/domain scope. + List(ctx context.Context, project, domain string) ([]*flyteapp.App, error) // Delete removes the KService CRD entirely. The app must be re-created from scratch. // Use Stop to scale to zero while preserving the KService. @@ -69,15 +66,15 @@ type AppK8sClientInterface interface { DeleteReplica(ctx context.Context, replicaID *flyteapp.ReplicaIdentifier) error // Watch returns a channel of WatchResponse events for KServices matching the - // given project/domain scope. If appName is non-empty, only events for that - // specific app are returned. The channel is closed when ctx is cancelled. - Watch(ctx context.Context, project, domain, appName string) (<-chan *flyteapp.WatchResponse, error) + // given project/domain scope. The channel is closed when ctx is cancelled. + Watch(ctx context.Context, project, domain string) (<-chan *flyteapp.WatchResponse, error) } // AppK8sClient implements AppK8sClientInterface using controller-runtime. type AppK8sClient struct { k8sClient client.WithWatch cache ctrlcache.Cache + namespace string cfg *config.AppConfig } @@ -86,20 +83,14 @@ func NewAppK8sClient(k8sClient client.WithWatch, cache ctrlcache.Cache, cfg *con return &AppK8sClient{ k8sClient: k8sClient, cache: cache, + namespace: cfg.Namespace, cfg: cfg, } } -// appNamespace returns the K8s namespace for a given project/domain pair. -// Follows the same convention as the Actions and Secret services: "{project}-{domain}". -func appNamespace(project, domain string) string { - return fmt.Sprintf("%s-%s", project, domain) -} - // Deploy creates or updates the KService for the given app. func (c *AppK8sClient) Deploy(ctx context.Context, app *flyteapp.App) error { appID := app.GetMetadata().GetId() - ns := appNamespace(appID.GetProject(), appID.GetDomain()) name := kserviceName(appID) ksvc, err := c.buildKService(app) @@ -108,12 +99,12 @@ func (c *AppK8sClient) Deploy(ctx context.Context, app *flyteapp.App) error { } existing := &servingv1.Service{} - err = c.k8sClient.Get(ctx, client.ObjectKey{Name: name, Namespace: ns}, existing) + err = c.k8sClient.Get(ctx, client.ObjectKey{Name: name, Namespace: c.namespace}, existing) if k8serrors.IsNotFound(err) { if err := c.k8sClient.Create(ctx, ksvc); err != nil { return fmt.Errorf("failed to create KService %s: %w", name, err) } - logger.Infof(ctx, "Created KService %s/%s", ns, name) + logger.Infof(ctx, "Created KService %s/%s", c.namespace, name) return nil } if err != nil { @@ -122,7 +113,7 @@ func (c *AppK8sClient) Deploy(ctx context.Context, app *flyteapp.App) error { // Skip update if spec has not changed. if existing.Annotations[annotationSpecSHA] == ksvc.Annotations[annotationSpecSHA] { - logger.Debugf(ctx, "KService %s/%s spec unchanged, skipping update", ns, name) + logger.Debugf(ctx, "KService %s/%s spec unchanged, skipping update", c.namespace, name) return nil } @@ -132,18 +123,17 @@ func (c *AppK8sClient) Deploy(ctx context.Context, app *flyteapp.App) error { if err := c.k8sClient.Update(ctx, existing); err != nil { return fmt.Errorf("failed to update KService %s: %w", name, err) } - logger.Infof(ctx, "Updated KService %s/%s", ns, name) + logger.Infof(ctx, "Updated KService %s/%s", c.namespace, name) return nil } // Stop sets max-scale=0 on the KService, scaling it to zero without deleting it. func (c *AppK8sClient) Stop(ctx context.Context, appID *flyteapp.Identifier) error { - ns := appNamespace(appID.GetProject(), appID.GetDomain()) name := kserviceName(appID) patch := []byte(`{"spec":{"template":{"metadata":{"annotations":{"autoscaling.knative.dev/max-scale":"0"}}}}}`) ksvc := &servingv1.Service{} ksvc.Name = name - ksvc.Namespace = ns + ksvc.Namespace = c.namespace if err := c.k8sClient.Patch(ctx, ksvc, client.RawPatch(types.MergePatchType, patch)); err != nil { if k8serrors.IsNotFound(err) { // Already stopped/deleted — treat as success. @@ -151,45 +141,44 @@ func (c *AppK8sClient) Stop(ctx context.Context, appID *flyteapp.Identifier) err } return fmt.Errorf("failed to patch KService %s to stop: %w", name, err) } - logger.Infof(ctx, "Stopped KService %s/%s (max-scale=0)", ns, name) + logger.Infof(ctx, "Stopped KService %s/%s (max-scale=0)", c.namespace, name) return nil } // Delete removes the KService CRD for the given app entirely. func (c *AppK8sClient) Delete(ctx context.Context, appID *flyteapp.Identifier) error { - ns := appNamespace(appID.GetProject(), appID.GetDomain()) name := kserviceName(appID) ksvc := &servingv1.Service{} ksvc.Name = name - ksvc.Namespace = ns + ksvc.Namespace = c.namespace if err := c.k8sClient.Delete(ctx, ksvc); err != nil { if k8serrors.IsNotFound(err) { return nil } return fmt.Errorf("failed to delete KService %s: %w", name, err) } - logger.Infof(ctx, "Deleted KService %s/%s", ns, name) + logger.Infof(ctx, "Deleted KService %s/%s", c.namespace, name) return nil } // Watch returns a channel of WatchResponse events for KServices in the given -// project/domain scope. If appName is non-empty, only events for that specific -// app are returned. The channel is closed when ctx is cancelled or the -// underlying watch terminates. -func (c *AppK8sClient) Watch(ctx context.Context, project, domain, appName string) (<-chan *flyteapp.WatchResponse, error) { - ns := appNamespace(project, domain) - - labels := map[string]string{labelAppManaged: "true"} - if appName != "" { - labels[labelAppName] = strings.ToLower(appName) +// project/domain scope. Pass empty strings to watch all managed KServices. +// The channel is closed when ctx is cancelled or the underlying watch terminates. +func (c *AppK8sClient) Watch(ctx context.Context, project, domain string) (<-chan *flyteapp.WatchResponse, error) { + labels := client.MatchingLabels{labelAppManaged: "true"} + if project != "" { + labels[labelProject] = project + } + if domain != "" { + labels[labelDomain] = domain } watcher, err := c.k8sClient.Watch(ctx, &servingv1.ServiceList{}, - client.InNamespace(ns), - client.MatchingLabels(labels), + client.InNamespace(c.namespace), + labels, ) if err != nil { - return nil, fmt.Errorf("failed to start KService watch in namespace %s: %w", ns, err) + return nil, fmt.Errorf("failed to start KService watch for %s/%s: %w", project, domain, err) } ch := make(chan *flyteapp.WatchResponse, 64) @@ -204,7 +193,7 @@ func (c *AppK8sClient) Watch(ctx context.Context, project, domain, appName strin if !ok { return } - resp := c.kserviceEventToWatchResponse(ctx, event) + resp := kserviceEventToWatchResponse(event) if resp == nil { continue } @@ -221,12 +210,12 @@ func (c *AppK8sClient) Watch(ctx context.Context, project, domain, appName strin // kserviceEventToWatchResponse maps a K8s watch event to a flyteapp.WatchResponse. // Returns nil for event types that should not be forwarded (Error, Bookmark). -func (c *AppK8sClient) kserviceEventToWatchResponse(ctx context.Context, event k8swatch.Event) *flyteapp.WatchResponse { +func kserviceEventToWatchResponse(event k8swatch.Event) *flyteapp.WatchResponse { ksvc, ok := event.Object.(*servingv1.Service) if !ok { return nil } - app, err := c.kserviceToApp(ctx, ksvc) + app, err := kserviceToApp(ksvc) if err != nil { // KService is not managed by us — skip it. return nil @@ -257,70 +246,52 @@ func (c *AppK8sClient) kserviceEventToWatchResponse(ctx context.Context, event k // GetStatus reads the KService and maps its conditions to a flyteapp.Status proto. func (c *AppK8sClient) GetStatus(ctx context.Context, appID *flyteapp.Identifier) (*flyteapp.Status, error) { - ns := appNamespace(appID.GetProject(), appID.GetDomain()) name := kserviceName(appID) ksvc := &servingv1.Service{} - if err := c.k8sClient.Get(ctx, client.ObjectKey{Name: name, Namespace: ns}, ksvc); err != nil { + if err := c.k8sClient.Get(ctx, client.ObjectKey{Name: name, Namespace: c.namespace}, ksvc); err != nil { if k8serrors.IsNotFound(err) { - return nil, fmt.Errorf("KService %s not found: %w", name, err) + return statusWithPhase(flyteapp.Status_DEPLOYMENT_STATUS_STOPPED, "KService not found"), nil } return nil, fmt.Errorf("failed to get KService %s: %w", name, err) } - return c.kserviceToStatus(ctx, ksvc), nil + return kserviceToStatus(ksvc), nil } -// List returns apps for the given project/domain scope with optional pagination. -func (c *AppK8sClient) List(ctx context.Context, project, domain, appName string, limit uint32, token string) ([]*flyteapp.App, string, error) { - ns := appNamespace(project, domain) - - matchLabels := client.MatchingLabels{labelAppManaged: "true"} - if appName != "" { - matchLabels[labelAppName] = strings.ToLower(appName) - } - listOpts := []client.ListOption{ - client.InNamespace(ns), - matchLabels, - } - if limit > 0 { - listOpts = append(listOpts, client.Limit(int64(limit))) - } - if token != "" { - listOpts = append(listOpts, client.Continue(token)) - } - +// List returns all apps for the given project/domain by listing KServices with label selectors. +func (c *AppK8sClient) List(ctx context.Context, project, domain string) ([]*flyteapp.App, error) { list := &servingv1.ServiceList{} - if err := c.k8sClient.List(ctx, list, listOpts...); err != nil { - return nil, "", fmt.Errorf("failed to list KServices for %s/%s: %w", project, domain, err) + if err := c.k8sClient.List(ctx, list, + client.InNamespace(c.namespace), + client.MatchingLabels{ + labelProject: project, + labelDomain: domain, + }, + ); err != nil { + return nil, fmt.Errorf("failed to list KServices for %s/%s: %w", project, domain, err) } apps := make([]*flyteapp.App, 0, len(list.Items)) for i := range list.Items { - a, err := c.kserviceToApp(ctx, &list.Items[i]) + a, err := kserviceToApp(&list.Items[i]) if err != nil { logger.Warnf(ctx, "Skipping KService %s: failed to convert to app: %v", list.Items[i].Name, err) continue } apps = append(apps, a) } - return apps, list.Continue, nil + return apps, nil } // --- Helpers --- -// kserviceName returns the KService name for an app. Since each app is deployed -// to its own project/domain namespace, the name only needs to be unique within -// that namespace — the app name alone suffices. -// Names are lower-cased and capped at 63 chars (K8s DNS label limit). For names -// that exceed 63 chars, the first 54 chars are kept and an 8-char SHA256 suffix -// is appended to avoid collisions between names with a long common prefix. +// kserviceName builds the KService name from an app identifier. +// Format: "{project}-{domain}-{name}", truncated to 63 chars. func kserviceName(id *flyteapp.Identifier) string { - name := strings.ToLower(id.GetName()) - if len(name) <= maxKServiceNameLen { - return name + name := fmt.Sprintf("%s-%s-%s", id.GetProject(), id.GetDomain(), id.GetName()) + if len(name) > maxKServiceNameLen { + name = name[:maxKServiceNameLen] } - sum := sha256.Sum256([]byte(name)) - suffix := hex.EncodeToString(sum[:4]) // 4 bytes = 8 hex chars - return name[:maxKServiceNameLen-9] + "-" + suffix + return strings.ToLower(name) } // specSHA computes a SHA256 digest of the serialized App Spec proto. @@ -338,7 +309,6 @@ func (c *AppK8sClient) buildKService(app *flyteapp.App) (*servingv1.Service, err appID := app.GetMetadata().GetId() spec := app.GetSpec() name := kserviceName(appID) - ns := appNamespace(appID.GetProject(), appID.GetDomain()) sha, err := specSHA(spec) if err != nil { @@ -364,7 +334,7 @@ func (c *AppK8sClient) buildKService(app *flyteapp.App) (*servingv1.Service, err ksvc := &servingv1.Service{ ObjectMeta: metav1.ObjectMeta{ Name: name, - Namespace: ns, + Namespace: c.namespace, Labels: map[string]string{ labelAppManaged: "true", labelProject: appID.GetProject(), @@ -467,8 +437,7 @@ func statusWithPhase(phase flyteapp.Status_DeploymentStatus, message string) *fl } // kserviceToStatus maps a KService's conditions to a flyteapp.Status proto. -// It fetches the latest ready Revision to read the accurate ActualReplicas count. -func (c *AppK8sClient) kserviceToStatus(ctx context.Context, ksvc *servingv1.Service) *flyteapp.Status { +func kserviceToStatus(ksvc *servingv1.Service) *flyteapp.Status { var phase flyteapp.Status_DeploymentStatus var message string @@ -505,15 +474,8 @@ func (c *AppK8sClient) kserviceToStatus(ctx context.Context, ksvc *servingv1.Ser } } - // Populate current replica count from the latest ready Revision. - if revName := ksvc.Status.LatestReadyRevisionName; revName != "" { - rev := &servingv1.Revision{} - if err := c.k8sClient.Get(ctx, client.ObjectKey{Name: revName, Namespace: ksvc.Namespace}, rev); err == nil { - if rev.Status.ActualReplicas != nil { - status.CurrentReplicas = uint32(*rev.Status.ActualReplicas) - } - } - } + // Populate current replica count and K8s namespace metadata. + status.CurrentReplicas = uint32(len(ksvc.Status.Traffic)) status.K8SMetadata = &flyteapp.K8SMetadata{ Namespace: ksvc.Namespace, } @@ -521,13 +483,17 @@ func (c *AppK8sClient) kserviceToStatus(ctx context.Context, ksvc *servingv1.Ser return status } -// GetReplicas lists the pods currently backing the given app. +// GetReplicas lists the pods currently backing the given app by matching +// the flyte.org/project, flyte.org/domain, and flyte.org/app-name labels. func (c *AppK8sClient) GetReplicas(ctx context.Context, appID *flyteapp.Identifier) ([]*flyteapp.Replica, error) { - ns := appNamespace(appID.GetProject(), appID.GetDomain()) podList := &corev1.PodList{} if err := c.k8sClient.List(ctx, podList, - client.InNamespace(ns), - client.MatchingLabels{labelAppName: appID.GetName()}, + client.InNamespace(c.namespace), + client.MatchingLabels{ + labelProject: appID.GetProject(), + labelDomain: appID.GetDomain(), + labelAppName: appID.GetName(), + }, ); err != nil { return nil, fmt.Errorf("failed to list pods for app %s/%s/%s: %w", appID.GetProject(), appID.GetDomain(), appID.GetName(), err) @@ -542,18 +508,16 @@ func (c *AppK8sClient) GetReplicas(ctx context.Context, appID *flyteapp.Identifi // DeleteReplica force-deletes a specific pod. Knative will schedule a replacement automatically. func (c *AppK8sClient) DeleteReplica(ctx context.Context, replicaID *flyteapp.ReplicaIdentifier) error { - appID := replicaID.GetAppId() - ns := appNamespace(appID.GetProject(), appID.GetDomain()) pod := &corev1.Pod{} pod.Name = replicaID.GetName() - pod.Namespace = ns + pod.Namespace = c.namespace if err := c.k8sClient.Delete(ctx, pod); err != nil { if k8serrors.IsNotFound(err) { return nil } - return fmt.Errorf("failed to delete pod %s/%s: %w", ns, replicaID.GetName(), err) + return fmt.Errorf("failed to delete pod %s/%s: %w", c.namespace, replicaID.GetName(), err) } - logger.Infof(ctx, "Deleted replica pod %s/%s", ns, replicaID.GetName()) + logger.Infof(ctx, "Deleted replica pod %s/%s", c.namespace, replicaID.GetName()) return nil } @@ -611,7 +575,7 @@ func podDeploymentStatus(pod *corev1.Pod) (string, string) { // kserviceToApp reconstructs a flyteapp.App from a KService by reading the // app identifier from annotations and the live status from KService conditions. -func (c *AppK8sClient) kserviceToApp(ctx context.Context, ksvc *servingv1.Service) (*flyteapp.App, error) { +func kserviceToApp(ksvc *servingv1.Service) (*flyteapp.App, error) { appIDStr, ok := ksvc.Annotations[annotationAppID] if !ok { return nil, fmt.Errorf("KService %s missing %s annotation", ksvc.Name, annotationAppID) @@ -633,6 +597,6 @@ func (c *AppK8sClient) kserviceToApp(ctx context.Context, ksvc *servingv1.Servic Metadata: &flyteapp.Meta{ Id: appID, }, - Status: c.kserviceToStatus(ctx, ksvc), + Status: kserviceToStatus(ksvc), }, nil } diff --git a/app/internal/k8s/app_client_test.go b/app/internal/k8s/app_client_test.go index 7fa30910c2..2a7bc2c16d 100644 --- a/app/internal/k8s/app_client_test.go +++ b/app/internal/k8s/app_client_test.go @@ -2,8 +2,6 @@ package k8s import ( "context" - "crypto/sha256" - "encoding/hex" "testing" "time" @@ -18,12 +16,12 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" - "github.com/flyteorg/flyte/v2/app/config" + "github.com/flyteorg/flyte/v2/actions/config" flyteapp "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/app" flytecoreapp "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/core" ) -// testScheme builds a runtime.Scheme with Knative and core types registered. +// testScheme builds a runtime.Scheme with Knative types registered. func testScheme(t *testing.T) *runtime.Scheme { t.Helper() s := runtime.NewScheme() @@ -32,19 +30,6 @@ func testScheme(t *testing.T) *runtime.Scheme { return s } -// testRevision builds a Knative Revision object with a given ActualReplicas count. -func testRevision(name, namespace string, actualReplicas int32) *servingv1.Revision { - return &servingv1.Revision{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - }, - Status: servingv1.RevisionStatus{ - ActualReplicas: &actualReplicas, - }, - } -} - // testClient builds an AppK8sClient backed by a fake K8s client. func testClient(t *testing.T, objs ...client.Object) *AppK8sClient { t.Helper() @@ -53,12 +38,15 @@ func testClient(t *testing.T, objs ...client.Object) *AppK8sClient { WithScheme(s). WithObjects(objs...). Build() + cfg := &config.AppConfig{ + Namespace: "flyte-apps", + DefaultRequestTimeout: 5 * time.Minute, + MaxRequestTimeout: time.Hour, + } return &AppK8sClient{ k8sClient: fc, - cfg: &config.AppConfig{ - DefaultRequestTimeout: 5 * time.Minute, - MaxRequestTimeout: time.Hour, - }, + namespace: cfg.Namespace, + cfg: cfg, } } @@ -91,7 +79,7 @@ func TestDeploy_Create(t *testing.T) { ksvc := &servingv1.Service{} err = c.k8sClient.Get(context.Background(), - client.ObjectKey{Name: "myapp", Namespace: "proj-dev"}, ksvc) + client.ObjectKey{Name: "proj-dev-myapp", Namespace: "flyte-apps"}, ksvc) require.NoError(t, err) assert.Equal(t, "proj", ksvc.Labels[labelProject]) assert.Equal(t, "dev", ksvc.Labels[labelDomain]) @@ -111,7 +99,7 @@ func TestDeploy_UpdateOnSpecChange(t *testing.T) { ksvc := &servingv1.Service{} require.NoError(t, c.k8sClient.Get(context.Background(), - client.ObjectKey{Name: "myapp", Namespace: "proj-dev"}, ksvc)) + client.ObjectKey{Name: "proj-dev-myapp", Namespace: "flyte-apps"}, ksvc)) assert.Equal(t, "nginx:2.0", ksvc.Spec.Template.Spec.Containers[0].Image) } @@ -123,14 +111,14 @@ func TestDeploy_SkipUpdateWhenUnchanged(t *testing.T) { // Get initial resource version. ksvc := &servingv1.Service{} require.NoError(t, c.k8sClient.Get(context.Background(), - client.ObjectKey{Name: "myapp", Namespace: "proj-dev"}, ksvc)) + client.ObjectKey{Name: "proj-dev-myapp", Namespace: "flyte-apps"}, ksvc)) initialRV := ksvc.ResourceVersion // Deploy same spec — should be a no-op. require.NoError(t, c.Deploy(context.Background(), app)) require.NoError(t, c.k8sClient.Get(context.Background(), - client.ObjectKey{Name: "myapp", Namespace: "proj-dev"}, ksvc)) + client.ObjectKey{Name: "proj-dev-myapp", Namespace: "flyte-apps"}, ksvc)) assert.Equal(t, initialRV, ksvc.ResourceVersion, "resource version should not change on no-op deploy") } @@ -144,7 +132,7 @@ func TestStop(t *testing.T) { ksvc := &servingv1.Service{} require.NoError(t, c.k8sClient.Get(context.Background(), - client.ObjectKey{Name: "myapp", Namespace: "proj-dev"}, ksvc)) + client.ObjectKey{Name: "proj-dev-myapp", Namespace: "flyte-apps"}, ksvc)) assert.Equal(t, "0", ksvc.Spec.Template.Annotations["autoscaling.knative.dev/max-scale"]) } @@ -165,7 +153,7 @@ func TestDelete(t *testing.T) { ksvc := &servingv1.Service{} err := c.k8sClient.Get(context.Background(), - client.ObjectKey{Name: "myapp", Namespace: "proj-dev"}, ksvc) + client.ObjectKey{Name: "proj-dev-myapp", Namespace: "flyte-apps"}, ksvc) assert.True(t, k8serrors.IsNotFound(err)) } @@ -179,9 +167,9 @@ func TestGetStatus_NotFound(t *testing.T) { c := testClient(t) id := &flyteapp.Identifier{Project: "proj", Domain: "dev", Name: "missing"} status, err := c.GetStatus(context.Background(), id) - require.Error(t, err) - assert.True(t, k8serrors.IsNotFound(err)) - assert.Nil(t, status) + require.NoError(t, err) + require.Len(t, status.Conditions, 1) + assert.Equal(t, flyteapp.Status_DEPLOYMENT_STATUS_STOPPED, status.Conditions[0].DeploymentStatus) } func TestGetStatus_Stopped(t *testing.T) { @@ -198,57 +186,17 @@ func TestGetStatus_Stopped(t *testing.T) { assert.Equal(t, flyteapp.Status_DEPLOYMENT_STATUS_STOPPED, status.Conditions[0].DeploymentStatus) } -func TestGetStatus_CurrentReplicas(t *testing.T) { - s := testScheme(t) - // Pre-populate a KService with LatestReadyRevisionName already set in status, - // and the corresponding Revision with ActualReplicas=4. - ksvc := &servingv1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: "myapp", - Namespace: "proj-dev", - Labels: map[string]string{ - labelAppManaged: "true", - labelProject: "proj", - labelDomain: "dev", - labelAppName: "myapp", - }, - Annotations: map[string]string{ - annotationAppID: "proj/dev/myapp", - }, - }, - } - ksvc.Status.LatestReadyRevisionName = "myapp-00001" - - rev := testRevision("myapp-00001", "proj-dev", 4) - - fc := fake.NewClientBuilder(). - WithScheme(s). - WithObjects(ksvc, rev). - WithStatusSubresource(ksvc). - Build() - c := &AppK8sClient{ - k8sClient: fc, - cfg: &config.AppConfig{}, - } - - id := &flyteapp.Identifier{Project: "proj", Domain: "dev", Name: "myapp"} - status, err := c.GetStatus(context.Background(), id) - require.NoError(t, err) - assert.Equal(t, uint32(4), status.CurrentReplicas) -} - func TestList(t *testing.T) { s := testScheme(t) // Pre-populate two KServices with different project labels. ksvc1 := &servingv1.Service{ ObjectMeta: metav1.ObjectMeta{ - Name: "app1", - Namespace: "proj-dev", + Name: "proj-dev-app1", + Namespace: "flyte-apps", Labels: map[string]string{ - labelAppManaged: "true", - labelProject: "proj", - labelDomain: "dev", - labelAppName: "app1", + labelProject: "proj", + labelDomain: "dev", + labelAppName: "app1", }, Annotations: map[string]string{ annotationAppID: "proj/dev/app1", @@ -257,13 +205,12 @@ func TestList(t *testing.T) { } ksvc2 := &servingv1.Service{ ObjectMeta: metav1.ObjectMeta{ - Name: "app2", - Namespace: "other-dev", + Name: "other-dev-app2", + Namespace: "flyte-apps", Labels: map[string]string{ - labelAppManaged: "true", - labelProject: "other", - labelDomain: "dev", - labelAppName: "app2", + labelProject: "other", + labelDomain: "dev", + labelAppName: "app2", }, Annotations: map[string]string{ annotationAppID: "other/dev/app2", @@ -277,64 +224,30 @@ func TestList(t *testing.T) { Build() c := &AppK8sClient{ k8sClient: fc, + namespace: "flyte-apps", cfg: &config.AppConfig{ + Namespace: "flyte-apps", DefaultRequestTimeout: 5 * time.Minute, MaxRequestTimeout: time.Hour, }, } - apps, nextToken, err := c.List(context.Background(), "proj", "dev", "", 0, "") + apps, err := c.List(context.Background(), "proj", "dev") require.NoError(t, err) - assert.Empty(t, nextToken) require.Len(t, apps, 1) assert.Equal(t, "proj", apps[0].Metadata.Id.Project) assert.Equal(t, "app1", apps[0].Metadata.Id.Name) } -func TestList_ByAppName(t *testing.T) { - s := testScheme(t) - ksvc1 := &servingv1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: "app1", - Namespace: "proj-dev", - Labels: map[string]string{ - labelAppManaged: "true", - labelProject: "proj", - labelDomain: "dev", - labelAppName: "app1", - }, - Annotations: map[string]string{annotationAppID: "proj/dev/app1"}, - }, - } - ksvc2 := &servingv1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: "app2", - Namespace: "proj-dev", - Labels: map[string]string{ - labelAppManaged: "true", - labelProject: "proj", - labelDomain: "dev", - labelAppName: "app2", - }, - Annotations: map[string]string{annotationAppID: "proj/dev/app2"}, - }, - } - fc := fake.NewClientBuilder().WithScheme(s).WithObjects(ksvc1, ksvc2).Build() - c := &AppK8sClient{k8sClient: fc, cfg: &config.AppConfig{}} - - apps, _, err := c.List(context.Background(), "proj", "dev", "app1", 0, "") - require.NoError(t, err) - require.Len(t, apps, 1) - assert.Equal(t, "app1", apps[0].Metadata.Id.Name) -} - func TestGetReplicas(t *testing.T) { s := testScheme(t) pod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Name: "myapp-abc", - Namespace: "proj-dev", + Name: "proj-dev-myapp-abc", + Namespace: "flyte-apps", Labels: map[string]string{ + labelProject: "proj", + labelDomain: "dev", labelAppName: "myapp", }, }, @@ -348,14 +261,15 @@ func TestGetReplicas(t *testing.T) { fc := fake.NewClientBuilder().WithScheme(s).WithObjects(pod).Build() c := &AppK8sClient{ k8sClient: fc, - cfg: &config.AppConfig{}, + namespace: "flyte-apps", + cfg: &config.AppConfig{Namespace: "flyte-apps"}, } id := &flyteapp.Identifier{Project: "proj", Domain: "dev", Name: "myapp"} replicas, err := c.GetReplicas(context.Background(), id) require.NoError(t, err) require.Len(t, replicas, 1) - assert.Equal(t, "myapp-abc", replicas[0].Metadata.Id.Name) + assert.Equal(t, "proj-dev-myapp-abc", replicas[0].Metadata.Id.Name) assert.Equal(t, "ACTIVE", replicas[0].Status.DeploymentStatus) } @@ -363,32 +277,33 @@ func TestDeleteReplica(t *testing.T) { s := testScheme(t) pod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Name: "myapp-abc", - Namespace: "proj-dev", + Name: "proj-dev-myapp-abc", + Namespace: "flyte-apps", }, } fc := fake.NewClientBuilder().WithScheme(s).WithObjects(pod).Build() c := &AppK8sClient{ k8sClient: fc, - cfg: &config.AppConfig{}, + namespace: "flyte-apps", + cfg: &config.AppConfig{Namespace: "flyte-apps"}, } replicaID := &flyteapp.ReplicaIdentifier{ AppId: &flyteapp.Identifier{Project: "proj", Domain: "dev", Name: "myapp"}, - Name: "myapp-abc", + Name: "proj-dev-myapp-abc", } require.NoError(t, c.DeleteReplica(context.Background(), replicaID)) err := fc.Get(context.Background(), - client.ObjectKey{Name: "myapp-abc", Namespace: "proj-dev"}, &corev1.Pod{}) + client.ObjectKey{Name: "proj-dev-myapp-abc", Namespace: "flyte-apps"}, &corev1.Pod{}) assert.True(t, k8serrors.IsNotFound(err)) } func TestKserviceEventToWatchResponse(t *testing.T) { ksvc := &servingv1.Service{ ObjectMeta: metav1.ObjectMeta{ - Name: "myapp", - Namespace: "proj-dev", + Name: "proj-dev-myapp", + Namespace: "flyte-apps", Annotations: map[string]string{ annotationAppID: "proj/dev/myapp", }, @@ -407,10 +322,9 @@ func TestKserviceEventToWatchResponse(t *testing.T) { {k8swatch.Bookmark, true, ""}, } - c := testClient(t) for _, tt := range tests { t.Run(string(tt.eventType), func(t *testing.T) { - resp := c.kserviceEventToWatchResponse(context.Background(), k8swatch.Event{ + resp := kserviceEventToWatchResponse(k8swatch.Event{ Type: tt.eventType, Object: ksvc, }) @@ -435,29 +349,22 @@ func TestKserviceEventToWatchResponse(t *testing.T) { func TestKserviceName(t *testing.T) { tests := []struct { - name string - want string + project, domain, name string + want string }{ - {"myapp", "myapp"}, - {"MyApp", "myapp"}, - // v1 and v2 variants stay distinct — no truncation collision. - {"my-long-service-name-v1", "my-long-service-name-v1"}, - {"my-long-service-name-v2", "my-long-service-name-v2"}, - // Names over 63 chars get a hash suffix instead of blind truncation. + {"proj", "dev", "myapp", "proj-dev-myapp"}, + {"P", "D", "N", "p-d-n"}, + // Long name should be truncated to 63 chars. { - "this-is-a-very-long-app-name-that-exceeds-the-kubernetes-dns-label-limit", - func() string { - name := "this-is-a-very-long-app-name-that-exceeds-the-kubernetes-dns-label-limit" - sum := sha256.Sum256([]byte(name)) - return name[:54] + "-" + hex.EncodeToString(sum[:4]) - }(), + "verylongprojectname", + "verylongdomainname", + "verylongappnamethatexceedslimit", + "verylongprojectname-verylongdomainname-verylongappnamethatexcee"[:63], }, } for _, tt := range tests { - id := &flyteapp.Identifier{Project: "proj", Domain: "dev", Name: tt.name} - got := kserviceName(id) - assert.Equal(t, tt.want, got) - assert.LessOrEqual(t, len(got), maxKServiceNameLen) + id := &flyteapp.Identifier{Project: tt.project, Domain: tt.domain, Name: tt.name} + assert.Equal(t, tt.want, kserviceName(id)) } } From e36a59f1eeb421f45fc4da570f269c030bd92089 Mon Sep 17 00:00:00 2001 From: "M. Adil Fayyaz" <62440954+AdilFayyaz@users.noreply.github.com> Date: Tue, 7 Apr 2026 15:10:02 -0700 Subject: [PATCH 06/17] move config Signed-off-by: M. Adil Fayyaz <62440954+AdilFayyaz@users.noreply.github.com> --- app/config/config.go | 5 ++--- app/internal/k8s/app_client.go | 2 +- app/internal/k8s/app_client_test.go | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/app/config/config.go b/app/config/config.go index cc2085c4ee..f8f9caa116 100644 --- a/app/config/config.go +++ b/app/config/config.go @@ -7,9 +7,8 @@ type AppConfig struct { // Enabled controls whether the app deployment controller is started. Enabled bool `json:"enabled" pflag:",Enable app deployment controller"` - // BaseDomain is the base domain used to generate public URLs for apps. - // Apps are exposed at "{name}-{project}-{domain}.{base_domain}". - BaseDomain string `json:"baseDomain" pflag:",Base domain for app public URLs"` + // Namespace is the K8s namespace where KService CRDs are created. + Namespace string `json:"namespace" pflag:",Namespace for app KServices"` // DefaultRequestTimeout is the request timeout applied to apps that don't specify one. DefaultRequestTimeout time.Duration `json:"defaultRequestTimeout" pflag:",Default request timeout for apps"` diff --git a/app/internal/k8s/app_client.go b/app/internal/k8s/app_client.go index bd6b189b7d..0850d92cf0 100644 --- a/app/internal/k8s/app_client.go +++ b/app/internal/k8s/app_client.go @@ -18,7 +18,7 @@ import ( ctrlcache "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/client" - "github.com/flyteorg/flyte/v2/actions/config" + "github.com/flyteorg/flyte/v2/app/config" "github.com/flyteorg/flyte/v2/flytestdlib/logger" flyteapp "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/app" ) diff --git a/app/internal/k8s/app_client_test.go b/app/internal/k8s/app_client_test.go index 2a7bc2c16d..99be7ba9ec 100644 --- a/app/internal/k8s/app_client_test.go +++ b/app/internal/k8s/app_client_test.go @@ -16,7 +16,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" - "github.com/flyteorg/flyte/v2/actions/config" + "github.com/flyteorg/flyte/v2/app/config" flyteapp "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/app" flytecoreapp "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/core" ) From 278be3b3ca9f622b006b7b301a802f34bde18b12 Mon Sep 17 00:00:00 2001 From: "M. Adil Fayyaz" <62440954+AdilFayyaz@users.noreply.github.com> Date: Tue, 7 Apr 2026 21:06:24 -0700 Subject: [PATCH 07/17] remove db.go Signed-off-by: M. Adil Fayyaz <62440954+AdilFayyaz@users.noreply.github.com> --- flytestdlib/app/db.go | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 flytestdlib/app/db.go diff --git a/flytestdlib/app/db.go b/flytestdlib/app/db.go deleted file mode 100644 index e69de29bb2..0000000000 From 882eaa19a018bbdd784319a9144f2c69d33e4d17 Mon Sep 17 00:00:00 2001 From: "M. Adil Fayyaz" <62440954+AdilFayyaz@users.noreply.github.com> Date: Wed, 8 Apr 2026 10:59:34 -0700 Subject: [PATCH 08/17] address comments Signed-off-by: M. Adil Fayyaz <62440954+AdilFayyaz@users.noreply.github.com> --- app/config/config.go | 3 - app/internal/k8s/app_client.go | 133 ++++++++++++---------- app/internal/k8s/app_client_test.go | 167 ++++++++++++++++++---------- 3 files changed, 184 insertions(+), 119 deletions(-) diff --git a/app/config/config.go b/app/config/config.go index f8f9caa116..99feec4549 100644 --- a/app/config/config.go +++ b/app/config/config.go @@ -7,9 +7,6 @@ type AppConfig struct { // Enabled controls whether the app deployment controller is started. Enabled bool `json:"enabled" pflag:",Enable app deployment controller"` - // Namespace is the K8s namespace where KService CRDs are created. - Namespace string `json:"namespace" pflag:",Namespace for app KServices"` - // DefaultRequestTimeout is the request timeout applied to apps that don't specify one. DefaultRequestTimeout time.Duration `json:"defaultRequestTimeout" pflag:",Default request timeout for apps"` diff --git a/app/internal/k8s/app_client.go b/app/internal/k8s/app_client.go index 0850d92cf0..c9b1baf268 100644 --- a/app/internal/k8s/app_client.go +++ b/app/internal/k8s/app_client.go @@ -49,7 +49,7 @@ type AppK8sClientInterface interface { Stop(ctx context.Context, appID *flyteapp.Identifier) error // GetStatus reads the KService and maps its conditions to a DeploymentStatus. - // Returns a Status with STOPPED if the KService does not exist. + // Returns a not-found error (checkable with k8serrors.IsNotFound) if the KService does not exist. GetStatus(ctx context.Context, appID *flyteapp.Identifier) (*flyteapp.Status, error) // List returns all apps (spec + live status) for the given project/domain scope. @@ -74,7 +74,6 @@ type AppK8sClientInterface interface { type AppK8sClient struct { k8sClient client.WithWatch cache ctrlcache.Cache - namespace string cfg *config.AppConfig } @@ -83,14 +82,20 @@ func NewAppK8sClient(k8sClient client.WithWatch, cache ctrlcache.Cache, cfg *con return &AppK8sClient{ k8sClient: k8sClient, cache: cache, - namespace: cfg.Namespace, cfg: cfg, } } +// appNamespace returns the K8s namespace for a given project/domain pair. +// Follows the same convention as the Actions and Secret services: "{project}-{domain}". +func appNamespace(project, domain string) string { + return fmt.Sprintf("%s-%s", project, domain) +} + // Deploy creates or updates the KService for the given app. func (c *AppK8sClient) Deploy(ctx context.Context, app *flyteapp.App) error { appID := app.GetMetadata().GetId() + ns := appNamespace(appID.GetProject(), appID.GetDomain()) name := kserviceName(appID) ksvc, err := c.buildKService(app) @@ -99,12 +104,12 @@ func (c *AppK8sClient) Deploy(ctx context.Context, app *flyteapp.App) error { } existing := &servingv1.Service{} - err = c.k8sClient.Get(ctx, client.ObjectKey{Name: name, Namespace: c.namespace}, existing) + err = c.k8sClient.Get(ctx, client.ObjectKey{Name: name, Namespace: ns}, existing) if k8serrors.IsNotFound(err) { if err := c.k8sClient.Create(ctx, ksvc); err != nil { return fmt.Errorf("failed to create KService %s: %w", name, err) } - logger.Infof(ctx, "Created KService %s/%s", c.namespace, name) + logger.Infof(ctx, "Created KService %s/%s", ns, name) return nil } if err != nil { @@ -113,7 +118,7 @@ func (c *AppK8sClient) Deploy(ctx context.Context, app *flyteapp.App) error { // Skip update if spec has not changed. if existing.Annotations[annotationSpecSHA] == ksvc.Annotations[annotationSpecSHA] { - logger.Debugf(ctx, "KService %s/%s spec unchanged, skipping update", c.namespace, name) + logger.Debugf(ctx, "KService %s/%s spec unchanged, skipping update", ns, name) return nil } @@ -123,17 +128,18 @@ func (c *AppK8sClient) Deploy(ctx context.Context, app *flyteapp.App) error { if err := c.k8sClient.Update(ctx, existing); err != nil { return fmt.Errorf("failed to update KService %s: %w", name, err) } - logger.Infof(ctx, "Updated KService %s/%s", c.namespace, name) + logger.Infof(ctx, "Updated KService %s/%s", ns, name) return nil } // Stop sets max-scale=0 on the KService, scaling it to zero without deleting it. func (c *AppK8sClient) Stop(ctx context.Context, appID *flyteapp.Identifier) error { + ns := appNamespace(appID.GetProject(), appID.GetDomain()) name := kserviceName(appID) patch := []byte(`{"spec":{"template":{"metadata":{"annotations":{"autoscaling.knative.dev/max-scale":"0"}}}}}`) ksvc := &servingv1.Service{} ksvc.Name = name - ksvc.Namespace = c.namespace + ksvc.Namespace = ns if err := c.k8sClient.Patch(ctx, ksvc, client.RawPatch(types.MergePatchType, patch)); err != nil { if k8serrors.IsNotFound(err) { // Already stopped/deleted — treat as success. @@ -141,44 +147,38 @@ func (c *AppK8sClient) Stop(ctx context.Context, appID *flyteapp.Identifier) err } return fmt.Errorf("failed to patch KService %s to stop: %w", name, err) } - logger.Infof(ctx, "Stopped KService %s/%s (max-scale=0)", c.namespace, name) + logger.Infof(ctx, "Stopped KService %s/%s (max-scale=0)", ns, name) return nil } // Delete removes the KService CRD for the given app entirely. func (c *AppK8sClient) Delete(ctx context.Context, appID *flyteapp.Identifier) error { + ns := appNamespace(appID.GetProject(), appID.GetDomain()) name := kserviceName(appID) ksvc := &servingv1.Service{} ksvc.Name = name - ksvc.Namespace = c.namespace + ksvc.Namespace = ns if err := c.k8sClient.Delete(ctx, ksvc); err != nil { if k8serrors.IsNotFound(err) { return nil } return fmt.Errorf("failed to delete KService %s: %w", name, err) } - logger.Infof(ctx, "Deleted KService %s/%s", c.namespace, name) + logger.Infof(ctx, "Deleted KService %s/%s", ns, name) return nil } // Watch returns a channel of WatchResponse events for KServices in the given -// project/domain scope. Pass empty strings to watch all managed KServices. -// The channel is closed when ctx is cancelled or the underlying watch terminates. +// project/domain scope. The channel is closed when ctx is cancelled or the +// underlying watch terminates. func (c *AppK8sClient) Watch(ctx context.Context, project, domain string) (<-chan *flyteapp.WatchResponse, error) { - labels := client.MatchingLabels{labelAppManaged: "true"} - if project != "" { - labels[labelProject] = project - } - if domain != "" { - labels[labelDomain] = domain - } - + ns := appNamespace(project, domain) watcher, err := c.k8sClient.Watch(ctx, &servingv1.ServiceList{}, - client.InNamespace(c.namespace), - labels, + client.InNamespace(ns), + client.MatchingLabels{labelAppManaged: "true"}, ) if err != nil { - return nil, fmt.Errorf("failed to start KService watch for %s/%s: %w", project, domain, err) + return nil, fmt.Errorf("failed to start KService watch in namespace %s: %w", ns, err) } ch := make(chan *flyteapp.WatchResponse, 64) @@ -193,7 +193,7 @@ func (c *AppK8sClient) Watch(ctx context.Context, project, domain string) (<-cha if !ok { return } - resp := kserviceEventToWatchResponse(event) + resp := c.kserviceEventToWatchResponse(ctx, event) if resp == nil { continue } @@ -210,12 +210,12 @@ func (c *AppK8sClient) Watch(ctx context.Context, project, domain string) (<-cha // kserviceEventToWatchResponse maps a K8s watch event to a flyteapp.WatchResponse. // Returns nil for event types that should not be forwarded (Error, Bookmark). -func kserviceEventToWatchResponse(event k8swatch.Event) *flyteapp.WatchResponse { +func (c *AppK8sClient) kserviceEventToWatchResponse(ctx context.Context, event k8swatch.Event) *flyteapp.WatchResponse { ksvc, ok := event.Object.(*servingv1.Service) if !ok { return nil } - app, err := kserviceToApp(ksvc) + app, err := c.kserviceToApp(ctx, ksvc) if err != nil { // KService is not managed by us — skip it. return nil @@ -246,33 +246,33 @@ func kserviceEventToWatchResponse(event k8swatch.Event) *flyteapp.WatchResponse // GetStatus reads the KService and maps its conditions to a flyteapp.Status proto. func (c *AppK8sClient) GetStatus(ctx context.Context, appID *flyteapp.Identifier) (*flyteapp.Status, error) { + ns := appNamespace(appID.GetProject(), appID.GetDomain()) name := kserviceName(appID) ksvc := &servingv1.Service{} - if err := c.k8sClient.Get(ctx, client.ObjectKey{Name: name, Namespace: c.namespace}, ksvc); err != nil { + if err := c.k8sClient.Get(ctx, client.ObjectKey{Name: name, Namespace: ns}, ksvc); err != nil { if k8serrors.IsNotFound(err) { - return statusWithPhase(flyteapp.Status_DEPLOYMENT_STATUS_STOPPED, "KService not found"), nil + return nil, fmt.Errorf("KService %s not found: %w", name, err) } return nil, fmt.Errorf("failed to get KService %s: %w", name, err) } - return kserviceToStatus(ksvc), nil + return c.kserviceToStatus(ctx, ksvc), nil } -// List returns all apps for the given project/domain by listing KServices with label selectors. +// List returns all apps for the given project/domain by listing KServices in the +// project/domain namespace. func (c *AppK8sClient) List(ctx context.Context, project, domain string) ([]*flyteapp.App, error) { + ns := appNamespace(project, domain) list := &servingv1.ServiceList{} if err := c.k8sClient.List(ctx, list, - client.InNamespace(c.namespace), - client.MatchingLabels{ - labelProject: project, - labelDomain: domain, - }, + client.InNamespace(ns), + client.MatchingLabels{labelAppManaged: "true"}, ); err != nil { return nil, fmt.Errorf("failed to list KServices for %s/%s: %w", project, domain, err) } apps := make([]*flyteapp.App, 0, len(list.Items)) for i := range list.Items { - a, err := kserviceToApp(&list.Items[i]) + a, err := c.kserviceToApp(ctx, &list.Items[i]) if err != nil { logger.Warnf(ctx, "Skipping KService %s: failed to convert to app: %v", list.Items[i].Name, err) continue @@ -284,14 +284,20 @@ func (c *AppK8sClient) List(ctx context.Context, project, domain string) ([]*fly // --- Helpers --- -// kserviceName builds the KService name from an app identifier. -// Format: "{project}-{domain}-{name}", truncated to 63 chars. +// kserviceName returns the KService name for an app. Since each app is deployed +// to its own project/domain namespace, the name only needs to be unique within +// that namespace — the app name alone suffices. +// Names are lower-cased and capped at 63 chars (K8s DNS label limit). For names +// that exceed 63 chars, the first 54 chars are kept and an 8-char SHA256 suffix +// is appended to avoid collisions between names with a long common prefix. func kserviceName(id *flyteapp.Identifier) string { - name := fmt.Sprintf("%s-%s-%s", id.GetProject(), id.GetDomain(), id.GetName()) - if len(name) > maxKServiceNameLen { - name = name[:maxKServiceNameLen] + name := strings.ToLower(id.GetName()) + if len(name) <= maxKServiceNameLen { + return name } - return strings.ToLower(name) + sum := sha256.Sum256([]byte(name)) + suffix := hex.EncodeToString(sum[:4]) // 4 bytes = 8 hex chars + return name[:maxKServiceNameLen-9] + "-" + suffix } // specSHA computes a SHA256 digest of the serialized App Spec proto. @@ -309,6 +315,7 @@ func (c *AppK8sClient) buildKService(app *flyteapp.App) (*servingv1.Service, err appID := app.GetMetadata().GetId() spec := app.GetSpec() name := kserviceName(appID) + ns := appNamespace(appID.GetProject(), appID.GetDomain()) sha, err := specSHA(spec) if err != nil { @@ -334,7 +341,7 @@ func (c *AppK8sClient) buildKService(app *flyteapp.App) (*servingv1.Service, err ksvc := &servingv1.Service{ ObjectMeta: metav1.ObjectMeta{ Name: name, - Namespace: c.namespace, + Namespace: ns, Labels: map[string]string{ labelAppManaged: "true", labelProject: appID.GetProject(), @@ -437,7 +444,8 @@ func statusWithPhase(phase flyteapp.Status_DeploymentStatus, message string) *fl } // kserviceToStatus maps a KService's conditions to a flyteapp.Status proto. -func kserviceToStatus(ksvc *servingv1.Service) *flyteapp.Status { +// It fetches the latest ready Revision to read the accurate ActualReplicas count. +func (c *AppK8sClient) kserviceToStatus(ctx context.Context, ksvc *servingv1.Service) *flyteapp.Status { var phase flyteapp.Status_DeploymentStatus var message string @@ -474,8 +482,15 @@ func kserviceToStatus(ksvc *servingv1.Service) *flyteapp.Status { } } - // Populate current replica count and K8s namespace metadata. - status.CurrentReplicas = uint32(len(ksvc.Status.Traffic)) + // Populate current replica count from the latest ready Revision. + if revName := ksvc.Status.LatestReadyRevisionName; revName != "" { + rev := &servingv1.Revision{} + if err := c.k8sClient.Get(ctx, client.ObjectKey{Name: revName, Namespace: ksvc.Namespace}, rev); err == nil { + if rev.Status.ActualReplicas != nil { + status.CurrentReplicas = uint32(*rev.Status.ActualReplicas) + } + } + } status.K8SMetadata = &flyteapp.K8SMetadata{ Namespace: ksvc.Namespace, } @@ -483,17 +498,13 @@ func kserviceToStatus(ksvc *servingv1.Service) *flyteapp.Status { return status } -// GetReplicas lists the pods currently backing the given app by matching -// the flyte.org/project, flyte.org/domain, and flyte.org/app-name labels. +// GetReplicas lists the pods currently backing the given app. func (c *AppK8sClient) GetReplicas(ctx context.Context, appID *flyteapp.Identifier) ([]*flyteapp.Replica, error) { + ns := appNamespace(appID.GetProject(), appID.GetDomain()) podList := &corev1.PodList{} if err := c.k8sClient.List(ctx, podList, - client.InNamespace(c.namespace), - client.MatchingLabels{ - labelProject: appID.GetProject(), - labelDomain: appID.GetDomain(), - labelAppName: appID.GetName(), - }, + client.InNamespace(ns), + client.MatchingLabels{labelAppName: appID.GetName()}, ); err != nil { return nil, fmt.Errorf("failed to list pods for app %s/%s/%s: %w", appID.GetProject(), appID.GetDomain(), appID.GetName(), err) @@ -508,16 +519,18 @@ func (c *AppK8sClient) GetReplicas(ctx context.Context, appID *flyteapp.Identifi // DeleteReplica force-deletes a specific pod. Knative will schedule a replacement automatically. func (c *AppK8sClient) DeleteReplica(ctx context.Context, replicaID *flyteapp.ReplicaIdentifier) error { + appID := replicaID.GetAppId() + ns := appNamespace(appID.GetProject(), appID.GetDomain()) pod := &corev1.Pod{} pod.Name = replicaID.GetName() - pod.Namespace = c.namespace + pod.Namespace = ns if err := c.k8sClient.Delete(ctx, pod); err != nil { if k8serrors.IsNotFound(err) { return nil } - return fmt.Errorf("failed to delete pod %s/%s: %w", c.namespace, replicaID.GetName(), err) + return fmt.Errorf("failed to delete pod %s/%s: %w", ns, replicaID.GetName(), err) } - logger.Infof(ctx, "Deleted replica pod %s/%s", c.namespace, replicaID.GetName()) + logger.Infof(ctx, "Deleted replica pod %s/%s", ns, replicaID.GetName()) return nil } @@ -575,7 +588,7 @@ func podDeploymentStatus(pod *corev1.Pod) (string, string) { // kserviceToApp reconstructs a flyteapp.App from a KService by reading the // app identifier from annotations and the live status from KService conditions. -func kserviceToApp(ksvc *servingv1.Service) (*flyteapp.App, error) { +func (c *AppK8sClient) kserviceToApp(ctx context.Context, ksvc *servingv1.Service) (*flyteapp.App, error) { appIDStr, ok := ksvc.Annotations[annotationAppID] if !ok { return nil, fmt.Errorf("KService %s missing %s annotation", ksvc.Name, annotationAppID) @@ -597,6 +610,6 @@ func kserviceToApp(ksvc *servingv1.Service) (*flyteapp.App, error) { Metadata: &flyteapp.Meta{ Id: appID, }, - Status: kserviceToStatus(ksvc), + Status: c.kserviceToStatus(ctx, ksvc), }, nil } diff --git a/app/internal/k8s/app_client_test.go b/app/internal/k8s/app_client_test.go index 99be7ba9ec..706e21c0d1 100644 --- a/app/internal/k8s/app_client_test.go +++ b/app/internal/k8s/app_client_test.go @@ -2,6 +2,8 @@ package k8s import ( "context" + "crypto/sha256" + "encoding/hex" "testing" "time" @@ -21,7 +23,7 @@ import ( flytecoreapp "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/core" ) -// testScheme builds a runtime.Scheme with Knative types registered. +// testScheme builds a runtime.Scheme with Knative and core types registered. func testScheme(t *testing.T) *runtime.Scheme { t.Helper() s := runtime.NewScheme() @@ -30,6 +32,19 @@ func testScheme(t *testing.T) *runtime.Scheme { return s } +// testRevision builds a Knative Revision object with a given ActualReplicas count. +func testRevision(name, namespace string, actualReplicas int32) *servingv1.Revision { + return &servingv1.Revision{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Status: servingv1.RevisionStatus{ + ActualReplicas: &actualReplicas, + }, + } +} + // testClient builds an AppK8sClient backed by a fake K8s client. func testClient(t *testing.T, objs ...client.Object) *AppK8sClient { t.Helper() @@ -38,15 +53,12 @@ func testClient(t *testing.T, objs ...client.Object) *AppK8sClient { WithScheme(s). WithObjects(objs...). Build() - cfg := &config.AppConfig{ - Namespace: "flyte-apps", - DefaultRequestTimeout: 5 * time.Minute, - MaxRequestTimeout: time.Hour, - } return &AppK8sClient{ k8sClient: fc, - namespace: cfg.Namespace, - cfg: cfg, + cfg: &config.AppConfig{ + DefaultRequestTimeout: 5 * time.Minute, + MaxRequestTimeout: time.Hour, + }, } } @@ -79,7 +91,7 @@ func TestDeploy_Create(t *testing.T) { ksvc := &servingv1.Service{} err = c.k8sClient.Get(context.Background(), - client.ObjectKey{Name: "proj-dev-myapp", Namespace: "flyte-apps"}, ksvc) + client.ObjectKey{Name: "myapp", Namespace: "proj-dev"}, ksvc) require.NoError(t, err) assert.Equal(t, "proj", ksvc.Labels[labelProject]) assert.Equal(t, "dev", ksvc.Labels[labelDomain]) @@ -99,7 +111,7 @@ func TestDeploy_UpdateOnSpecChange(t *testing.T) { ksvc := &servingv1.Service{} require.NoError(t, c.k8sClient.Get(context.Background(), - client.ObjectKey{Name: "proj-dev-myapp", Namespace: "flyte-apps"}, ksvc)) + client.ObjectKey{Name: "myapp", Namespace: "proj-dev"}, ksvc)) assert.Equal(t, "nginx:2.0", ksvc.Spec.Template.Spec.Containers[0].Image) } @@ -111,14 +123,14 @@ func TestDeploy_SkipUpdateWhenUnchanged(t *testing.T) { // Get initial resource version. ksvc := &servingv1.Service{} require.NoError(t, c.k8sClient.Get(context.Background(), - client.ObjectKey{Name: "proj-dev-myapp", Namespace: "flyte-apps"}, ksvc)) + client.ObjectKey{Name: "myapp", Namespace: "proj-dev"}, ksvc)) initialRV := ksvc.ResourceVersion // Deploy same spec — should be a no-op. require.NoError(t, c.Deploy(context.Background(), app)) require.NoError(t, c.k8sClient.Get(context.Background(), - client.ObjectKey{Name: "proj-dev-myapp", Namespace: "flyte-apps"}, ksvc)) + client.ObjectKey{Name: "myapp", Namespace: "proj-dev"}, ksvc)) assert.Equal(t, initialRV, ksvc.ResourceVersion, "resource version should not change on no-op deploy") } @@ -132,7 +144,7 @@ func TestStop(t *testing.T) { ksvc := &servingv1.Service{} require.NoError(t, c.k8sClient.Get(context.Background(), - client.ObjectKey{Name: "proj-dev-myapp", Namespace: "flyte-apps"}, ksvc)) + client.ObjectKey{Name: "myapp", Namespace: "proj-dev"}, ksvc)) assert.Equal(t, "0", ksvc.Spec.Template.Annotations["autoscaling.knative.dev/max-scale"]) } @@ -153,7 +165,7 @@ func TestDelete(t *testing.T) { ksvc := &servingv1.Service{} err := c.k8sClient.Get(context.Background(), - client.ObjectKey{Name: "proj-dev-myapp", Namespace: "flyte-apps"}, ksvc) + client.ObjectKey{Name: "myapp", Namespace: "proj-dev"}, ksvc) assert.True(t, k8serrors.IsNotFound(err)) } @@ -167,9 +179,9 @@ func TestGetStatus_NotFound(t *testing.T) { c := testClient(t) id := &flyteapp.Identifier{Project: "proj", Domain: "dev", Name: "missing"} status, err := c.GetStatus(context.Background(), id) - require.NoError(t, err) - require.Len(t, status.Conditions, 1) - assert.Equal(t, flyteapp.Status_DEPLOYMENT_STATUS_STOPPED, status.Conditions[0].DeploymentStatus) + require.Error(t, err) + assert.True(t, k8serrors.IsNotFound(err)) + assert.Nil(t, status) } func TestGetStatus_Stopped(t *testing.T) { @@ -186,17 +198,57 @@ func TestGetStatus_Stopped(t *testing.T) { assert.Equal(t, flyteapp.Status_DEPLOYMENT_STATUS_STOPPED, status.Conditions[0].DeploymentStatus) } +func TestGetStatus_CurrentReplicas(t *testing.T) { + s := testScheme(t) + // Pre-populate a KService with LatestReadyRevisionName already set in status, + // and the corresponding Revision with ActualReplicas=4. + ksvc := &servingv1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "myapp", + Namespace: "proj-dev", + Labels: map[string]string{ + labelAppManaged: "true", + labelProject: "proj", + labelDomain: "dev", + labelAppName: "myapp", + }, + Annotations: map[string]string{ + annotationAppID: "proj/dev/myapp", + }, + }, + } + ksvc.Status.LatestReadyRevisionName = "myapp-00001" + + rev := testRevision("myapp-00001", "proj-dev", 4) + + fc := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(ksvc, rev). + WithStatusSubresource(ksvc). + Build() + c := &AppK8sClient{ + k8sClient: fc, + cfg: &config.AppConfig{}, + } + + id := &flyteapp.Identifier{Project: "proj", Domain: "dev", Name: "myapp"} + status, err := c.GetStatus(context.Background(), id) + require.NoError(t, err) + assert.Equal(t, uint32(4), status.CurrentReplicas) +} + func TestList(t *testing.T) { s := testScheme(t) // Pre-populate two KServices with different project labels. ksvc1 := &servingv1.Service{ ObjectMeta: metav1.ObjectMeta{ - Name: "proj-dev-app1", - Namespace: "flyte-apps", + Name: "app1", + Namespace: "proj-dev", Labels: map[string]string{ - labelProject: "proj", - labelDomain: "dev", - labelAppName: "app1", + labelAppManaged: "true", + labelProject: "proj", + labelDomain: "dev", + labelAppName: "app1", }, Annotations: map[string]string{ annotationAppID: "proj/dev/app1", @@ -205,12 +257,13 @@ func TestList(t *testing.T) { } ksvc2 := &servingv1.Service{ ObjectMeta: metav1.ObjectMeta{ - Name: "other-dev-app2", - Namespace: "flyte-apps", + Name: "app2", + Namespace: "other-dev", Labels: map[string]string{ - labelProject: "other", - labelDomain: "dev", - labelAppName: "app2", + labelAppManaged: "true", + labelProject: "other", + labelDomain: "dev", + labelAppName: "app2", }, Annotations: map[string]string{ annotationAppID: "other/dev/app2", @@ -224,9 +277,7 @@ func TestList(t *testing.T) { Build() c := &AppK8sClient{ k8sClient: fc, - namespace: "flyte-apps", cfg: &config.AppConfig{ - Namespace: "flyte-apps", DefaultRequestTimeout: 5 * time.Minute, MaxRequestTimeout: time.Hour, }, @@ -243,11 +294,9 @@ func TestGetReplicas(t *testing.T) { s := testScheme(t) pod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Name: "proj-dev-myapp-abc", - Namespace: "flyte-apps", + Name: "myapp-abc", + Namespace: "proj-dev", Labels: map[string]string{ - labelProject: "proj", - labelDomain: "dev", labelAppName: "myapp", }, }, @@ -261,15 +310,14 @@ func TestGetReplicas(t *testing.T) { fc := fake.NewClientBuilder().WithScheme(s).WithObjects(pod).Build() c := &AppK8sClient{ k8sClient: fc, - namespace: "flyte-apps", - cfg: &config.AppConfig{Namespace: "flyte-apps"}, + cfg: &config.AppConfig{}, } id := &flyteapp.Identifier{Project: "proj", Domain: "dev", Name: "myapp"} replicas, err := c.GetReplicas(context.Background(), id) require.NoError(t, err) require.Len(t, replicas, 1) - assert.Equal(t, "proj-dev-myapp-abc", replicas[0].Metadata.Id.Name) + assert.Equal(t, "myapp-abc", replicas[0].Metadata.Id.Name) assert.Equal(t, "ACTIVE", replicas[0].Status.DeploymentStatus) } @@ -277,33 +325,32 @@ func TestDeleteReplica(t *testing.T) { s := testScheme(t) pod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Name: "proj-dev-myapp-abc", - Namespace: "flyte-apps", + Name: "myapp-abc", + Namespace: "proj-dev", }, } fc := fake.NewClientBuilder().WithScheme(s).WithObjects(pod).Build() c := &AppK8sClient{ k8sClient: fc, - namespace: "flyte-apps", - cfg: &config.AppConfig{Namespace: "flyte-apps"}, + cfg: &config.AppConfig{}, } replicaID := &flyteapp.ReplicaIdentifier{ AppId: &flyteapp.Identifier{Project: "proj", Domain: "dev", Name: "myapp"}, - Name: "proj-dev-myapp-abc", + Name: "myapp-abc", } require.NoError(t, c.DeleteReplica(context.Background(), replicaID)) err := fc.Get(context.Background(), - client.ObjectKey{Name: "proj-dev-myapp-abc", Namespace: "flyte-apps"}, &corev1.Pod{}) + client.ObjectKey{Name: "myapp-abc", Namespace: "proj-dev"}, &corev1.Pod{}) assert.True(t, k8serrors.IsNotFound(err)) } func TestKserviceEventToWatchResponse(t *testing.T) { ksvc := &servingv1.Service{ ObjectMeta: metav1.ObjectMeta{ - Name: "proj-dev-myapp", - Namespace: "flyte-apps", + Name: "myapp", + Namespace: "proj-dev", Annotations: map[string]string{ annotationAppID: "proj/dev/myapp", }, @@ -322,9 +369,10 @@ func TestKserviceEventToWatchResponse(t *testing.T) { {k8swatch.Bookmark, true, ""}, } + c := testClient(t) for _, tt := range tests { t.Run(string(tt.eventType), func(t *testing.T) { - resp := kserviceEventToWatchResponse(k8swatch.Event{ + resp := c.kserviceEventToWatchResponse(context.Background(), k8swatch.Event{ Type: tt.eventType, Object: ksvc, }) @@ -349,22 +397,29 @@ func TestKserviceEventToWatchResponse(t *testing.T) { func TestKserviceName(t *testing.T) { tests := []struct { - project, domain, name string - want string + name string + want string }{ - {"proj", "dev", "myapp", "proj-dev-myapp"}, - {"P", "D", "N", "p-d-n"}, - // Long name should be truncated to 63 chars. + {"myapp", "myapp"}, + {"MyApp", "myapp"}, + // v1 and v2 variants stay distinct — no truncation collision. + {"my-long-service-name-v1", "my-long-service-name-v1"}, + {"my-long-service-name-v2", "my-long-service-name-v2"}, + // Names over 63 chars get a hash suffix instead of blind truncation. { - "verylongprojectname", - "verylongdomainname", - "verylongappnamethatexceedslimit", - "verylongprojectname-verylongdomainname-verylongappnamethatexcee"[:63], + "this-is-a-very-long-app-name-that-exceeds-the-kubernetes-dns-label-limit", + func() string { + name := "this-is-a-very-long-app-name-that-exceeds-the-kubernetes-dns-label-limit" + sum := sha256.Sum256([]byte(name)) + return name[:54] + "-" + hex.EncodeToString(sum[:4]) + }(), }, } for _, tt := range tests { - id := &flyteapp.Identifier{Project: tt.project, Domain: tt.domain, Name: tt.name} - assert.Equal(t, tt.want, kserviceName(id)) + id := &flyteapp.Identifier{Project: "proj", Domain: "dev", Name: tt.name} + got := kserviceName(id) + assert.Equal(t, tt.want, got) + assert.LessOrEqual(t, len(got), maxKServiceNameLen) } } From b271ee8f7bf8921d3f4610b4f63bc605ddd5aeb9 Mon Sep 17 00:00:00 2001 From: "M. Adil Fayyaz" <62440954+AdilFayyaz@users.noreply.github.com> Date: Wed, 8 Apr 2026 14:39:11 -0700 Subject: [PATCH 09/17] impl: internal service Signed-off-by: M. Adil Fayyaz <62440954+AdilFayyaz@users.noreply.github.com> --- app/config/config.go | 4 +++ app/internal/k8s/app_client.go | 48 ++++++++++++++++++++--------- app/internal/k8s/app_client_test.go | 3 +- 3 files changed, 39 insertions(+), 16 deletions(-) diff --git a/app/config/config.go b/app/config/config.go index 99feec4549..cc2085c4ee 100644 --- a/app/config/config.go +++ b/app/config/config.go @@ -7,6 +7,10 @@ type AppConfig struct { // Enabled controls whether the app deployment controller is started. Enabled bool `json:"enabled" pflag:",Enable app deployment controller"` + // BaseDomain is the base domain used to generate public URLs for apps. + // Apps are exposed at "{name}-{project}-{domain}.{base_domain}". + BaseDomain string `json:"baseDomain" pflag:",Base domain for app public URLs"` + // DefaultRequestTimeout is the request timeout applied to apps that don't specify one. DefaultRequestTimeout time.Duration `json:"defaultRequestTimeout" pflag:",Default request timeout for apps"` diff --git a/app/internal/k8s/app_client.go b/app/internal/k8s/app_client.go index c9b1baf268..a716c539d8 100644 --- a/app/internal/k8s/app_client.go +++ b/app/internal/k8s/app_client.go @@ -52,8 +52,10 @@ type AppK8sClientInterface interface { // Returns a not-found error (checkable with k8serrors.IsNotFound) if the KService does not exist. GetStatus(ctx context.Context, appID *flyteapp.Identifier) (*flyteapp.Status, error) - // List returns all apps (spec + live status) for the given project/domain scope. - List(ctx context.Context, project, domain string) ([]*flyteapp.App, error) + // List returns apps for the given project/domain scope with optional pagination. + // limit=0 means no limit. token is the K8s continue token from a previous call. + // Returns the apps, the continue token for the next page (empty if last page), and any error. + List(ctx context.Context, project, domain string, limit uint32, token string) ([]*flyteapp.App, string, error) // Delete removes the KService CRD entirely. The app must be re-created from scratch. // Use Stop to scale to zero while preserving the KService. @@ -66,8 +68,9 @@ type AppK8sClientInterface interface { DeleteReplica(ctx context.Context, replicaID *flyteapp.ReplicaIdentifier) error // Watch returns a channel of WatchResponse events for KServices matching the - // given project/domain scope. The channel is closed when ctx is cancelled. - Watch(ctx context.Context, project, domain string) (<-chan *flyteapp.WatchResponse, error) + // given project/domain scope. If appName is non-empty, only events for that + // specific app are returned. The channel is closed when ctx is cancelled. + Watch(ctx context.Context, project, domain, appName string) (<-chan *flyteapp.WatchResponse, error) } // AppK8sClient implements AppK8sClientInterface using controller-runtime. @@ -169,13 +172,20 @@ func (c *AppK8sClient) Delete(ctx context.Context, appID *flyteapp.Identifier) e } // Watch returns a channel of WatchResponse events for KServices in the given -// project/domain scope. The channel is closed when ctx is cancelled or the +// project/domain scope. If appName is non-empty, only events for that specific +// app are returned. The channel is closed when ctx is cancelled or the // underlying watch terminates. -func (c *AppK8sClient) Watch(ctx context.Context, project, domain string) (<-chan *flyteapp.WatchResponse, error) { +func (c *AppK8sClient) Watch(ctx context.Context, project, domain, appName string) (<-chan *flyteapp.WatchResponse, error) { ns := appNamespace(project, domain) + + labels := map[string]string{labelAppManaged: "true"} + if appName != "" { + labels[labelAppName] = strings.ToLower(appName) + } + watcher, err := c.k8sClient.Watch(ctx, &servingv1.ServiceList{}, client.InNamespace(ns), - client.MatchingLabels{labelAppManaged: "true"}, + client.MatchingLabels(labels), ) if err != nil { return nil, fmt.Errorf("failed to start KService watch in namespace %s: %w", ns, err) @@ -258,16 +268,24 @@ func (c *AppK8sClient) GetStatus(ctx context.Context, appID *flyteapp.Identifier return c.kserviceToStatus(ctx, ksvc), nil } -// List returns all apps for the given project/domain by listing KServices in the -// project/domain namespace. -func (c *AppK8sClient) List(ctx context.Context, project, domain string) ([]*flyteapp.App, error) { +// List returns apps for the given project/domain scope with optional pagination. +func (c *AppK8sClient) List(ctx context.Context, project, domain string, limit uint32, token string) ([]*flyteapp.App, string, error) { ns := appNamespace(project, domain) - list := &servingv1.ServiceList{} - if err := c.k8sClient.List(ctx, list, + + listOpts := []client.ListOption{ client.InNamespace(ns), client.MatchingLabels{labelAppManaged: "true"}, - ); err != nil { - return nil, fmt.Errorf("failed to list KServices for %s/%s: %w", project, domain, err) + } + if limit > 0 { + listOpts = append(listOpts, client.Limit(int64(limit))) + } + if token != "" { + listOpts = append(listOpts, client.Continue(token)) + } + + list := &servingv1.ServiceList{} + if err := c.k8sClient.List(ctx, list, listOpts...); err != nil { + return nil, "", fmt.Errorf("failed to list KServices for %s/%s: %w", project, domain, err) } apps := make([]*flyteapp.App, 0, len(list.Items)) @@ -279,7 +297,7 @@ func (c *AppK8sClient) List(ctx context.Context, project, domain string) ([]*fly } apps = append(apps, a) } - return apps, nil + return apps, list.Continue, nil } // --- Helpers --- diff --git a/app/internal/k8s/app_client_test.go b/app/internal/k8s/app_client_test.go index 706e21c0d1..e0d22d4f77 100644 --- a/app/internal/k8s/app_client_test.go +++ b/app/internal/k8s/app_client_test.go @@ -283,8 +283,9 @@ func TestList(t *testing.T) { }, } - apps, err := c.List(context.Background(), "proj", "dev") + apps, nextToken, err := c.List(context.Background(), "proj", "dev", 0, "") require.NoError(t, err) + assert.Empty(t, nextToken) require.Len(t, apps, 1) assert.Equal(t, "proj", apps[0].Metadata.Id.Project) assert.Equal(t, "app1", apps[0].Metadata.Id.Name) From 9126a630738741d08004fd3eb9d915dfc7700700 Mon Sep 17 00:00:00 2001 From: "M. Adil Fayyaz" <62440954+AdilFayyaz@users.noreply.github.com> Date: Wed, 8 Apr 2026 15:39:59 -0700 Subject: [PATCH 10/17] restructure Signed-off-by: M. Adil Fayyaz <62440954+AdilFayyaz@users.noreply.github.com> --- app/config/config.go | 25 +++++++++++-------- app/internal/config/config.go | 19 ++++++++++++++ app/internal/k8s/app_client.go | 8 +++--- app/internal/k8s/app_client_test.go | 12 ++++----- app/internal/service/internal_app_service.go | 6 ++--- .../service/internal_app_service_test.go | 6 ++--- app/internal/setup.go | 4 +-- 7 files changed, 51 insertions(+), 29 deletions(-) create mode 100644 app/internal/config/config.go diff --git a/app/config/config.go b/app/config/config.go index cc2085c4ee..6532f30b5c 100644 --- a/app/config/config.go +++ b/app/config/config.go @@ -2,18 +2,21 @@ package config import "time" -// AppConfig holds configuration for the App deployment controller. +// AppConfig holds configuration for the control plane AppService. type AppConfig struct { - // Enabled controls whether the app deployment controller is started. - Enabled bool `json:"enabled" pflag:",Enable app deployment controller"` + // InternalAppServiceURL is the base URL of the InternalAppService (data plane). + // In unified mode this is overridden by the shared mux BaseURL. + InternalAppServiceURL string `json:"internalAppServiceUrl" pflag:",URL of the internal app service"` - // BaseDomain is the base domain used to generate public URLs for apps. - // Apps are exposed at "{name}-{project}-{domain}.{base_domain}". - BaseDomain string `json:"baseDomain" pflag:",Base domain for app public URLs"` - - // DefaultRequestTimeout is the request timeout applied to apps that don't specify one. - DefaultRequestTimeout time.Duration `json:"defaultRequestTimeout" pflag:",Default request timeout for apps"` + // CacheTTL is the TTL for the in-memory app status cache. + // Defaults to 30s. Set to 0 to disable caching. + CacheTTL time.Duration `json:"cacheTtl" pflag:",TTL for app status cache"` +} - // MaxRequestTimeout is the hard cap on request timeout (Knative max is 3600s). - MaxRequestTimeout time.Duration `json:"maxRequestTimeout" pflag:",Maximum allowed request timeout for apps"` +// DefaultAppConfig returns the default control plane AppConfig. +func DefaultAppConfig() *AppConfig { + return &AppConfig{ + InternalAppServiceURL: "http://localhost:8091", + CacheTTL: 30 * time.Second, + } } diff --git a/app/internal/config/config.go b/app/internal/config/config.go new file mode 100644 index 0000000000..17e9e01dc2 --- /dev/null +++ b/app/internal/config/config.go @@ -0,0 +1,19 @@ +package config + +import "time" + +// InternalAppConfig holds configuration for the data plane app deployment controller. +type InternalAppConfig struct { + // Enabled controls whether the app deployment controller is started. + Enabled bool `json:"enabled" pflag:",Enable app deployment controller"` + + // BaseDomain is the base domain used to generate public URLs for apps. + // Apps are exposed at "{name}-{project}-{domain}.{base_domain}". + BaseDomain string `json:"baseDomain" pflag:",Base domain for app public URLs"` + + // DefaultRequestTimeout is the request timeout applied to apps that don't specify one. + DefaultRequestTimeout time.Duration `json:"defaultRequestTimeout" pflag:",Default request timeout for apps"` + + // MaxRequestTimeout is the hard cap on request timeout (Knative max is 3600s). + MaxRequestTimeout time.Duration `json:"maxRequestTimeout" pflag:",Maximum allowed request timeout for apps"` +} diff --git a/app/internal/k8s/app_client.go b/app/internal/k8s/app_client.go index a716c539d8..bb40a12c38 100644 --- a/app/internal/k8s/app_client.go +++ b/app/internal/k8s/app_client.go @@ -18,7 +18,7 @@ import ( ctrlcache "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/client" - "github.com/flyteorg/flyte/v2/app/config" + "github.com/flyteorg/flyte/v2/app/internal/config" "github.com/flyteorg/flyte/v2/flytestdlib/logger" flyteapp "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/app" ) @@ -77,11 +77,11 @@ type AppK8sClientInterface interface { type AppK8sClient struct { k8sClient client.WithWatch cache ctrlcache.Cache - cfg *config.AppConfig + cfg *config.InternalAppConfig } // NewAppK8sClient creates a new AppK8sClient. -func NewAppK8sClient(k8sClient client.WithWatch, cache ctrlcache.Cache, cfg *config.AppConfig) *AppK8sClient { +func NewAppK8sClient(k8sClient client.WithWatch, cache ctrlcache.Cache, cfg *config.InternalAppConfig) *AppK8sClient { return &AppK8sClient{ k8sClient: k8sClient, cache: cache, @@ -418,7 +418,7 @@ func buildPodSpec(spec *flyteapp.Spec) (corev1.PodSpec, error) { } // buildAutoscalingAnnotations returns the Knative autoscaling annotations for the revision template. -func buildAutoscalingAnnotations(spec *flyteapp.Spec, cfg *config.AppConfig) map[string]string { +func buildAutoscalingAnnotations(spec *flyteapp.Spec, cfg *config.InternalAppConfig) map[string]string { annotations := map[string]string{} autoscaling := spec.GetAutoscaling() if autoscaling == nil { diff --git a/app/internal/k8s/app_client_test.go b/app/internal/k8s/app_client_test.go index e0d22d4f77..06f49a3e56 100644 --- a/app/internal/k8s/app_client_test.go +++ b/app/internal/k8s/app_client_test.go @@ -18,7 +18,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" - "github.com/flyteorg/flyte/v2/app/config" + "github.com/flyteorg/flyte/v2/app/internal/config" flyteapp "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/app" flytecoreapp "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/core" ) @@ -55,7 +55,7 @@ func testClient(t *testing.T, objs ...client.Object) *AppK8sClient { Build() return &AppK8sClient{ k8sClient: fc, - cfg: &config.AppConfig{ + cfg: &config.InternalAppConfig{ DefaultRequestTimeout: 5 * time.Minute, MaxRequestTimeout: time.Hour, }, @@ -228,7 +228,7 @@ func TestGetStatus_CurrentReplicas(t *testing.T) { Build() c := &AppK8sClient{ k8sClient: fc, - cfg: &config.AppConfig{}, + cfg: &config.InternalAppConfig{}, } id := &flyteapp.Identifier{Project: "proj", Domain: "dev", Name: "myapp"} @@ -277,7 +277,7 @@ func TestList(t *testing.T) { Build() c := &AppK8sClient{ k8sClient: fc, - cfg: &config.AppConfig{ + cfg: &config.InternalAppConfig{ DefaultRequestTimeout: 5 * time.Minute, MaxRequestTimeout: time.Hour, }, @@ -311,7 +311,7 @@ func TestGetReplicas(t *testing.T) { fc := fake.NewClientBuilder().WithScheme(s).WithObjects(pod).Build() c := &AppK8sClient{ k8sClient: fc, - cfg: &config.AppConfig{}, + cfg: &config.InternalAppConfig{}, } id := &flyteapp.Identifier{Project: "proj", Domain: "dev", Name: "myapp"} @@ -333,7 +333,7 @@ func TestDeleteReplica(t *testing.T) { fc := fake.NewClientBuilder().WithScheme(s).WithObjects(pod).Build() c := &AppK8sClient{ k8sClient: fc, - cfg: &config.AppConfig{}, + cfg: &config.InternalAppConfig{}, } replicaID := &flyteapp.ReplicaIdentifier{ diff --git a/app/internal/service/internal_app_service.go b/app/internal/service/internal_app_service.go index 3d75198a40..dc847ec482 100644 --- a/app/internal/service/internal_app_service.go +++ b/app/internal/service/internal_app_service.go @@ -9,7 +9,7 @@ import ( timestamppb "google.golang.org/protobuf/types/known/timestamppb" k8serrors "k8s.io/apimachinery/pkg/api/errors" - appconfig "github.com/flyteorg/flyte/v2/app/config" + appconfig "github.com/flyteorg/flyte/v2/app/internal/config" appk8s "github.com/flyteorg/flyte/v2/app/internal/k8s" "github.com/flyteorg/flyte/v2/flytestdlib/logger" flyteapp "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/app" @@ -22,11 +22,11 @@ import ( type InternalAppService struct { appconnect.UnimplementedAppServiceHandler k8s appk8s.AppK8sClientInterface - cfg *appconfig.AppConfig + cfg *appconfig.InternalAppConfig } // NewInternalAppService creates a new InternalAppService. -func NewInternalAppService(k8s appk8s.AppK8sClientInterface, cfg *appconfig.AppConfig) *InternalAppService { +func NewInternalAppService(k8s appk8s.AppK8sClientInterface, cfg *appconfig.InternalAppConfig) *InternalAppService { return &InternalAppService{k8s: k8s, cfg: cfg} } diff --git a/app/internal/service/internal_app_service_test.go b/app/internal/service/internal_app_service_test.go index 08f7159e8e..c47fa16c9b 100644 --- a/app/internal/service/internal_app_service_test.go +++ b/app/internal/service/internal_app_service_test.go @@ -15,7 +15,7 @@ import ( kerrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime/schema" - appconfig "github.com/flyteorg/flyte/v2/app/config" + appconfig "github.com/flyteorg/flyte/v2/app/internal/config" flyteapp "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/app" "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/app/appconnect" flytecoreapp "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/core" @@ -77,8 +77,8 @@ func (m *mockAppK8sClient) Watch(ctx context.Context, project, domain, appName s // --- helpers --- -func testCfg() *appconfig.AppConfig { - return &appconfig.AppConfig{ +func testCfg() *appconfig.InternalAppConfig { + return &appconfig.InternalAppConfig{ Enabled: true, BaseDomain: "apps.example.com", DefaultRequestTimeout: 5 * time.Minute, diff --git a/app/internal/setup.go b/app/internal/setup.go index 619bcb91d2..7c7ec7a0c5 100644 --- a/app/internal/setup.go +++ b/app/internal/setup.go @@ -8,7 +8,7 @@ import ( stdlibapp "github.com/flyteorg/flyte/v2/flytestdlib/app" "github.com/flyteorg/flyte/v2/flytestdlib/logger" - appconfig "github.com/flyteorg/flyte/v2/app/config" + appconfig "github.com/flyteorg/flyte/v2/app/internal/config" appk8s "github.com/flyteorg/flyte/v2/app/internal/k8s" "github.com/flyteorg/flyte/v2/app/internal/service" "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/app/appconnect" @@ -17,7 +17,7 @@ import ( // Setup registers the InternalAppService handler on the SetupContext mux. // It is mounted at /internal to avoid collision with the control plane // AppService, which shares the same proto service definition. -func Setup(ctx context.Context, sc *stdlibapp.SetupContext, cfg *appconfig.AppConfig) error { +func Setup(ctx context.Context, sc *stdlibapp.SetupContext, cfg *appconfig.InternalAppConfig) error { if !cfg.Enabled { logger.Infof(ctx, "InternalAppService disabled (apps.enabled=false), skipping setup") return nil From 6053502938fb358ec54d7d1efc9cdce32b945afc Mon Sep 17 00:00:00 2001 From: "M. Adil Fayyaz" <62440954+AdilFayyaz@users.noreply.github.com> Date: Wed, 8 Apr 2026 15:41:50 -0700 Subject: [PATCH 11/17] remove apps from runs and create app_service Signed-off-by: M. Adil Fayyaz <62440954+AdilFayyaz@users.noreply.github.com> --- app/service/app_service.go | 189 ++++++++++++++++++++++++++++++++++++ app/setup.go | 36 +++++++ runs/service/app_service.go | 80 --------------- runs/setup.go | 21 +--- 4 files changed, 227 insertions(+), 99 deletions(-) create mode 100644 app/service/app_service.go create mode 100644 app/setup.go delete mode 100644 runs/service/app_service.go diff --git a/app/service/app_service.go b/app/service/app_service.go new file mode 100644 index 0000000000..d3a85607e0 --- /dev/null +++ b/app/service/app_service.go @@ -0,0 +1,189 @@ +package service + +import ( + "context" + "fmt" + "sync" + "time" + + "connectrpc.com/connect" + + flyteapp "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/app" + "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/app/appconnect" +) + +// AppService is the control plane implementation of AppServiceHandler. +// It proxies all RPCs to InternalAppService (data plane) and maintains a +// per-instance TTL cache to reduce cross-plane RPC calls on Get. +type AppService struct { + appconnect.UnimplementedAppServiceHandler + internalClient appconnect.AppServiceClient + cache *appCache +} + +// NewAppService creates a new AppService. +// cacheTTL=0 disables caching (every Get calls InternalAppService). +func NewAppService(internalClient appconnect.AppServiceClient, cacheTTL time.Duration) *AppService { + return &AppService{ + internalClient: internalClient, + cache: newAppCache(cacheTTL), + } +} + +// Ensure AppService satisfies the generated handler interface. +var _ appconnect.AppServiceHandler = (*AppService)(nil) + +// Create forwards to InternalAppService and invalidates the cache entry. +func (s *AppService) Create( + ctx context.Context, + req *connect.Request[flyteapp.CreateRequest], +) (*connect.Response[flyteapp.CreateResponse], error) { + resp, err := s.internalClient.Create(ctx, req) + if err != nil { + return nil, err + } + s.cache.invalidate(cacheKey(req.Msg.GetApp().GetMetadata().GetId())) + return resp, nil +} + +// Get returns the app, using the cache on hit and calling InternalAppService on miss. +func (s *AppService) Get( + ctx context.Context, + req *connect.Request[flyteapp.GetRequest], +) (*connect.Response[flyteapp.GetResponse], error) { + appID, ok := req.Msg.GetIdentifier().(*flyteapp.GetRequest_AppId) + if ok && appID.AppId != nil { + if app, hit := s.cache.get(cacheKey(appID.AppId)); hit { + return connect.NewResponse(&flyteapp.GetResponse{App: app}), nil + } + } + + resp, err := s.internalClient.Get(ctx, req) + if err != nil { + return nil, err + } + if ok && appID.AppId != nil { + s.cache.set(cacheKey(appID.AppId), resp.Msg.GetApp()) + } + return resp, nil +} + +// Update forwards to InternalAppService and invalidates the cache entry. +func (s *AppService) Update( + ctx context.Context, + req *connect.Request[flyteapp.UpdateRequest], +) (*connect.Response[flyteapp.UpdateResponse], error) { + resp, err := s.internalClient.Update(ctx, req) + if err != nil { + return nil, err + } + s.cache.invalidate(cacheKey(req.Msg.GetApp().GetMetadata().GetId())) + return resp, nil +} + +// Delete forwards to InternalAppService and invalidates the cache entry. +func (s *AppService) Delete( + ctx context.Context, + req *connect.Request[flyteapp.DeleteRequest], +) (*connect.Response[flyteapp.DeleteResponse], error) { + resp, err := s.internalClient.Delete(ctx, req) + if err != nil { + return nil, err + } + s.cache.invalidate(cacheKey(req.Msg.GetAppId())) + return resp, nil +} + +// List always forwards to InternalAppService — results vary by filter/pagination. +func (s *AppService) List( + ctx context.Context, + req *connect.Request[flyteapp.ListRequest], +) (*connect.Response[flyteapp.ListResponse], error) { + return s.internalClient.List(ctx, req) +} + +// Watch proxies the server-streaming Watch RPC to InternalAppService. +func (s *AppService) Watch( + ctx context.Context, + req *connect.Request[flyteapp.WatchRequest], + stream *connect.ServerStream[flyteapp.WatchResponse], +) error { + clientStream, err := s.internalClient.Watch(ctx, req) + if err != nil { + return connect.NewError(connect.CodeInternal, err) + } + defer clientStream.Close() + for clientStream.Receive() { + if err := stream.Send(clientStream.Msg()); err != nil { + return err + } + } + return clientStream.Err() +} + +// UpdateStatus forwards directly to InternalAppService (no cache interaction). +func (s *AppService) UpdateStatus( + ctx context.Context, + req *connect.Request[flyteapp.UpdateStatusRequest], +) (*connect.Response[flyteapp.UpdateStatusResponse], error) { + return s.internalClient.UpdateStatus(ctx, req) +} + +// --- Cache --- + +type cacheEntry struct { + app *flyteapp.App + expiresAt time.Time +} + +type appCache struct { + mu sync.RWMutex + items map[string]*cacheEntry + ttl time.Duration +} + +func newAppCache(ttl time.Duration) *appCache { + return &appCache{ + items: make(map[string]*cacheEntry), + ttl: ttl, + } +} + +// get returns the cached App for key, or (nil, false) if missing or expired. +func (c *appCache) get(key string) (*flyteapp.App, bool) { + if c.ttl == 0 { + return nil, false + } + c.mu.RLock() + entry, ok := c.items[key] + c.mu.RUnlock() + if !ok || time.Now().After(entry.expiresAt) { + return nil, false + } + return entry.app, true +} + +// set writes the App to the cache with the configured TTL. +func (c *appCache) set(key string, app *flyteapp.App) { + if c.ttl == 0 { + return + } + c.mu.Lock() + c.items[key] = &cacheEntry{app: app, expiresAt: time.Now().Add(c.ttl)} + c.mu.Unlock() +} + +// invalidate removes the cache entry for key. +func (c *appCache) invalidate(key string) { + c.mu.Lock() + delete(c.items, key) + c.mu.Unlock() +} + +// cacheKey returns a stable string key for an app identifier. +func cacheKey(id *flyteapp.Identifier) string { + if id == nil { + return "" + } + return fmt.Sprintf("%s/%s/%s", id.GetProject(), id.GetDomain(), id.GetName()) +} diff --git a/app/setup.go b/app/setup.go new file mode 100644 index 0000000000..31fcdd8d6a --- /dev/null +++ b/app/setup.go @@ -0,0 +1,36 @@ +package app + +import ( + "context" + "net/http" + + stdlibapp "github.com/flyteorg/flyte/v2/flytestdlib/app" + "github.com/flyteorg/flyte/v2/flytestdlib/logger" + + appconfig "github.com/flyteorg/flyte/v2/app/config" + "github.com/flyteorg/flyte/v2/app/service" + "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/app/appconnect" +) + +// Setup registers the control plane AppService handler on the SetupContext mux. +// In unified mode (sc.BaseURL set), the proxy routes to InternalAppService on +// the same mux via the /internal prefix — no network hop. In split mode, +// cfg.InternalAppServiceURL points at the data plane host. +func Setup(ctx context.Context, sc *stdlibapp.SetupContext, cfg *appconfig.AppConfig) error { + internalAppURL := cfg.InternalAppServiceURL + if sc.BaseURL != "" { + internalAppURL = sc.BaseURL + } + + internalClient := appconnect.NewAppServiceClient( + http.DefaultClient, + internalAppURL+"/internal", + ) + + appSvc := service.NewAppService(internalClient, cfg.CacheTTL) + path, handler := appconnect.NewAppServiceHandler(appSvc) + sc.Mux.Handle(path, handler) + logger.Infof(ctx, "Mounted AppService at %s", path) + + return nil +} diff --git a/runs/service/app_service.go b/runs/service/app_service.go deleted file mode 100644 index dbccfe3f9a..0000000000 --- a/runs/service/app_service.go +++ /dev/null @@ -1,80 +0,0 @@ -package service - -import ( - "context" - "errors" - - "connectrpc.com/connect" - - flyteapp "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/app" - "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/app/appconnect" -) - -// AppService is a dummy implementation that returns empty responses for all endpoints. -type AppService struct { - appconnect.UnimplementedAppServiceHandler -} - -func NewAppService() *AppService { - return &AppService{} -} - -var _ appconnect.AppServiceHandler = (*AppService)(nil) - -func (s *AppService) Create( - ctx context.Context, - req *connect.Request[flyteapp.CreateRequest], -) (*connect.Response[flyteapp.CreateResponse], error) { - return nil, connect.NewError(connect.CodeUnimplemented, errors.New("App service is not implemented")) -} - -func (s *AppService) Get( - ctx context.Context, - req *connect.Request[flyteapp.GetRequest], -) (*connect.Response[flyteapp.GetResponse], error) { - return connect.NewResponse(&flyteapp.GetResponse{}), nil -} - -func (s *AppService) Update( - ctx context.Context, - req *connect.Request[flyteapp.UpdateRequest], -) (*connect.Response[flyteapp.UpdateResponse], error) { - return connect.NewResponse(&flyteapp.UpdateResponse{}), nil -} - -func (s *AppService) UpdateStatus( - ctx context.Context, - req *connect.Request[flyteapp.UpdateStatusRequest], -) (*connect.Response[flyteapp.UpdateStatusResponse], error) { - return connect.NewResponse(&flyteapp.UpdateStatusResponse{}), nil -} - -func (s *AppService) Delete( - ctx context.Context, - req *connect.Request[flyteapp.DeleteRequest], -) (*connect.Response[flyteapp.DeleteResponse], error) { - return connect.NewResponse(&flyteapp.DeleteResponse{}), nil -} - -func (s *AppService) List( - ctx context.Context, - req *connect.Request[flyteapp.ListRequest], -) (*connect.Response[flyteapp.ListResponse], error) { - return connect.NewResponse(&flyteapp.ListResponse{}), nil -} - -func (s *AppService) Watch( - ctx context.Context, - req *connect.Request[flyteapp.WatchRequest], - stream *connect.ServerStream[flyteapp.WatchResponse], -) error { - return nil -} - -func (s *AppService) Lease( - ctx context.Context, - req *connect.Request[flyteapp.LeaseRequest], - stream *connect.ServerStream[flyteapp.LeaseResponse], -) error { - return nil -} diff --git a/runs/setup.go b/runs/setup.go index 85e90f1723..67d753ad74 100644 --- a/runs/setup.go +++ b/runs/setup.go @@ -9,8 +9,7 @@ import ( "github.com/flyteorg/flyte/v2/flytestdlib/app" "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/actions/actionsconnect" - flyteappconnect "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/app/appconnect" - "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/auth/authconnect" +"github.com/flyteorg/flyte/v2/gen/go/flyteidl2/auth/authconnect" projectpb "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/project" "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/project/projectconnect" "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/task/taskconnect" @@ -22,7 +21,6 @@ import ( "github.com/flyteorg/flyte/v2/runs/repository/impl" "github.com/flyteorg/flyte/v2/runs/repository/interfaces" "github.com/flyteorg/flyte/v2/runs/repository/models" - "github.com/flyteorg/flyte/v2/runs/scheduler" "github.com/flyteorg/flyte/v2/runs/service" "github.com/flyteorg/flyte/v2/flytestdlib/logger" @@ -93,12 +91,7 @@ func Setup(ctx context.Context, sc *app.SetupContext) error { sc.Mux.Handle(authMetadataPath, authMetadataHandler) logger.Infof(ctx, "Mounted AuthMetadataService at %s", authMetadataPath) - appSvc := service.NewAppService() - appPath, appHandler := flyteappconnect.NewAppServiceHandler(appSvc) - sc.Mux.Handle(appPath, appHandler) - logger.Infof(ctx, "Mounted AppService at %s", appPath) - - triggerSvc := service.NewTriggerService(repo) +triggerSvc := service.NewTriggerService() triggerPath, triggerHandler := triggerconnect.NewTriggerServiceHandler(triggerSvc) sc.Mux.Handle(triggerPath, triggerHandler) logger.Infof(ctx, "Mounted TriggerService at %s", triggerPath) @@ -130,16 +123,6 @@ func Setup(ctx context.Context, sc *app.SetupContext) error { return fmt.Errorf("runs: failed to seed projects: %w", err) } - if cfg.TriggerScheduler.Enabled { - runsURL := cfg.ActionsServiceURL - if sc.BaseURL != "" { - runsURL = sc.BaseURL - } - worker := scheduler.Start(ctx, repo.TriggerRepo(), cfg.TriggerScheduler, runsURL) - sc.AddWorker("trigger-scheduler", worker) - logger.Infof(ctx, "Registered trigger-scheduler worker") - } - sc.AddReadyCheck(func(r *http.Request) error { if err := sc.DB.PingContext(r.Context()); err != nil { return fmt.Errorf("database ping failed: %w", err) From 73a86ace88a3748e283589d22bbff889c274b2c2 Mon Sep 17 00:00:00 2001 From: "M. Adil Fayyaz" <62440954+AdilFayyaz@users.noreply.github.com> Date: Wed, 8 Apr 2026 15:42:57 -0700 Subject: [PATCH 12/17] add:tests Signed-off-by: M. Adil Fayyaz <62440954+AdilFayyaz@users.noreply.github.com> --- app/service/app_service_test.go | 292 ++++++++++++++++++++++++++++++++ 1 file changed, 292 insertions(+) create mode 100644 app/service/app_service_test.go diff --git a/app/service/app_service_test.go b/app/service/app_service_test.go new file mode 100644 index 0000000000..d72f988a15 --- /dev/null +++ b/app/service/app_service_test.go @@ -0,0 +1,292 @@ +package service + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "connectrpc.com/connect" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + + flyteapp "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/app" + "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/app/appconnect" + flytecoreapp "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/core" +) + +// mockInternalClient is a testify mock for appconnect.AppServiceClient. +type mockInternalClient struct { + mock.Mock +} + +func (m *mockInternalClient) Create(ctx context.Context, req *connect.Request[flyteapp.CreateRequest]) (*connect.Response[flyteapp.CreateResponse], error) { + args := m.Called(ctx, req) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).(*connect.Response[flyteapp.CreateResponse]), args.Error(1) +} + +func (m *mockInternalClient) Get(ctx context.Context, req *connect.Request[flyteapp.GetRequest]) (*connect.Response[flyteapp.GetResponse], error) { + args := m.Called(ctx, req) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).(*connect.Response[flyteapp.GetResponse]), args.Error(1) +} + +func (m *mockInternalClient) Update(ctx context.Context, req *connect.Request[flyteapp.UpdateRequest]) (*connect.Response[flyteapp.UpdateResponse], error) { + args := m.Called(ctx, req) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).(*connect.Response[flyteapp.UpdateResponse]), args.Error(1) +} + +func (m *mockInternalClient) UpdateStatus(ctx context.Context, req *connect.Request[flyteapp.UpdateStatusRequest]) (*connect.Response[flyteapp.UpdateStatusResponse], error) { + args := m.Called(ctx, req) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).(*connect.Response[flyteapp.UpdateStatusResponse]), args.Error(1) +} + +func (m *mockInternalClient) Delete(ctx context.Context, req *connect.Request[flyteapp.DeleteRequest]) (*connect.Response[flyteapp.DeleteResponse], error) { + args := m.Called(ctx, req) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).(*connect.Response[flyteapp.DeleteResponse]), args.Error(1) +} + +func (m *mockInternalClient) List(ctx context.Context, req *connect.Request[flyteapp.ListRequest]) (*connect.Response[flyteapp.ListResponse], error) { + args := m.Called(ctx, req) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).(*connect.Response[flyteapp.ListResponse]), args.Error(1) +} + +func (m *mockInternalClient) Watch(ctx context.Context, req *connect.Request[flyteapp.WatchRequest]) (*connect.ServerStreamForClient[flyteapp.WatchResponse], error) { + args := m.Called(ctx, req) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).(*connect.ServerStreamForClient[flyteapp.WatchResponse]), args.Error(1) +} + +func (m *mockInternalClient) Lease(ctx context.Context, req *connect.Request[flyteapp.LeaseRequest]) (*connect.ServerStreamForClient[flyteapp.LeaseResponse], error) { + args := m.Called(ctx, req) + if args.Get(0) == nil { + return nil, args.Error(1) + } + return args.Get(0).(*connect.ServerStreamForClient[flyteapp.LeaseResponse]), args.Error(1) +} + +// --- helpers --- + +func testAppID() *flyteapp.Identifier { + return &flyteapp.Identifier{Project: "proj", Domain: "dev", Name: "myapp"} +} + +func testApp() *flyteapp.App { + return &flyteapp.App{ + Metadata: &flyteapp.Meta{Id: testAppID()}, + Spec: &flyteapp.Spec{ + AppPayload: &flyteapp.Spec_Container{ + Container: &flytecoreapp.Container{Image: "nginx:latest"}, + }, + }, + Status: &flyteapp.Status{ + Conditions: []*flyteapp.Condition{ + {DeploymentStatus: flyteapp.Status_DEPLOYMENT_STATUS_ACTIVE}, + }, + }, + } +} + +// --- Get with cache --- + +func TestGet_CacheMiss_CallsInternal(t *testing.T) { + internal := &mockInternalClient{} + svc := NewAppService(internal, 30*time.Second) + + appID := testAppID() + app := testApp() + internal.On("Get", mock.Anything, mock.Anything).Return( + connect.NewResponse(&flyteapp.GetResponse{App: app}), nil, + ) + + resp, err := svc.Get(context.Background(), connect.NewRequest(&flyteapp.GetRequest{ + Identifier: &flyteapp.GetRequest_AppId{AppId: appID}, + })) + require.NoError(t, err) + assert.Equal(t, flyteapp.Status_DEPLOYMENT_STATUS_ACTIVE, resp.Msg.App.Status.Conditions[0].DeploymentStatus) + internal.AssertExpectations(t) +} + +func TestGet_CacheHit_SkipsInternal(t *testing.T) { + internal := &mockInternalClient{} + svc := NewAppService(internal, 30*time.Second) + + // Pre-populate cache. + appID := testAppID() + svc.cache.set(cacheKey(appID), testApp()) + + // Internal should NOT be called. + resp, err := svc.Get(context.Background(), connect.NewRequest(&flyteapp.GetRequest{ + Identifier: &flyteapp.GetRequest_AppId{AppId: appID}, + })) + require.NoError(t, err) + assert.Equal(t, flyteapp.Status_DEPLOYMENT_STATUS_ACTIVE, resp.Msg.App.Status.Conditions[0].DeploymentStatus) + internal.AssertNotCalled(t, "Get") +} + +func TestGet_CacheExpired_CallsInternal(t *testing.T) { + internal := &mockInternalClient{} + svc := NewAppService(internal, 1*time.Millisecond) + + appID := testAppID() + svc.cache.set(cacheKey(appID), testApp()) + time.Sleep(5 * time.Millisecond) // let TTL expire + + app := testApp() + internal.On("Get", mock.Anything, mock.Anything).Return( + connect.NewResponse(&flyteapp.GetResponse{App: app}), nil, + ) + + _, err := svc.Get(context.Background(), connect.NewRequest(&flyteapp.GetRequest{ + Identifier: &flyteapp.GetRequest_AppId{AppId: appID}, + })) + require.NoError(t, err) + internal.AssertExpectations(t) +} + +// --- Create / Update / Delete invalidate cache --- + +func TestCreate_InvalidatesCache(t *testing.T) { + internal := &mockInternalClient{} + svc := NewAppService(internal, 30*time.Second) + + app := testApp() + // Pre-populate cache so we can confirm it's cleared. + svc.cache.set(cacheKey(app.Metadata.Id), app) + + internal.On("Create", mock.Anything, mock.Anything).Return( + connect.NewResponse(&flyteapp.CreateResponse{App: app}), nil, + ) + + _, err := svc.Create(context.Background(), connect.NewRequest(&flyteapp.CreateRequest{App: app})) + require.NoError(t, err) + + _, hit := svc.cache.get(cacheKey(app.Metadata.Id)) + assert.False(t, hit, "cache should be invalidated after Create") + internal.AssertExpectations(t) +} + +func TestUpdate_InvalidatesCache(t *testing.T) { + internal := &mockInternalClient{} + svc := NewAppService(internal, 30*time.Second) + + app := testApp() + svc.cache.set(cacheKey(app.Metadata.Id), app) + + internal.On("Update", mock.Anything, mock.Anything).Return( + connect.NewResponse(&flyteapp.UpdateResponse{App: app}), nil, + ) + + _, err := svc.Update(context.Background(), connect.NewRequest(&flyteapp.UpdateRequest{App: app})) + require.NoError(t, err) + + _, hit := svc.cache.get(cacheKey(app.Metadata.Id)) + assert.False(t, hit, "cache should be invalidated after Update") + internal.AssertExpectations(t) +} + +func TestDelete_InvalidatesCache(t *testing.T) { + internal := &mockInternalClient{} + svc := NewAppService(internal, 30*time.Second) + + appID := testAppID() + svc.cache.set(cacheKey(appID), testApp()) + + internal.On("Delete", mock.Anything, mock.Anything).Return( + connect.NewResponse(&flyteapp.DeleteResponse{}), nil, + ) + + _, err := svc.Delete(context.Background(), connect.NewRequest(&flyteapp.DeleteRequest{AppId: appID})) + require.NoError(t, err) + + _, hit := svc.cache.get(cacheKey(appID)) + assert.False(t, hit, "cache should be invalidated after Delete") + internal.AssertExpectations(t) +} + +// --- List always forwards --- + +func TestList_AlwaysCallsInternal(t *testing.T) { + internal := &mockInternalClient{} + svc := NewAppService(internal, 30*time.Second) + + internal.On("List", mock.Anything, mock.Anything).Return( + connect.NewResponse(&flyteapp.ListResponse{Apps: []*flyteapp.App{testApp()}}), nil, + ) + + resp, err := svc.List(context.Background(), connect.NewRequest(&flyteapp.ListRequest{})) + require.NoError(t, err) + assert.Len(t, resp.Msg.Apps, 1) + internal.AssertExpectations(t) +} + +// --- Watch streams through --- + +func TestWatch_ProxiesStream(t *testing.T) { + // Use a real httptest server to exercise the streaming path. + internal := &mockInternalClient{} + svc := NewAppService(internal, 30*time.Second) + + path, handler := appconnect.NewAppServiceHandler(svc) + mux := http.NewServeMux() + mux.Handle(path, handler) + server := httptest.NewServer(mux) + t.Cleanup(server.Close) + + // Mount InternalAppService on the same test server at /internal so the + // proxy can route to it. + internalSvcPath, internalSvcHandler := appconnect.NewAppServiceHandler( + &echoWatchService{app: testApp()}, + ) + mux.Handle("/internal"+internalSvcPath, http.StripPrefix("/internal", internalSvcHandler)) + + // Point AppService proxy at the internal path on the same server. + svc.internalClient = appconnect.NewAppServiceClient(http.DefaultClient, server.URL+"/internal") + + client := appconnect.NewAppServiceClient(http.DefaultClient, server.URL) + stream, err := client.Watch(context.Background(), connect.NewRequest(&flyteapp.WatchRequest{})) + require.NoError(t, err) + + require.True(t, stream.Receive()) + assert.Equal(t, "myapp", stream.Msg().GetCreateEvent().GetApp().GetMetadata().GetId().GetName()) + stream.Close() +} + +// echoWatchService sends one CreateEvent then closes the stream. +type echoWatchService struct { + appconnect.UnimplementedAppServiceHandler + app *flyteapp.App +} + +func (e *echoWatchService) Watch( + _ context.Context, + _ *connect.Request[flyteapp.WatchRequest], + stream *connect.ServerStream[flyteapp.WatchResponse], +) error { + return stream.Send(&flyteapp.WatchResponse{ + Event: &flyteapp.WatchResponse_CreateEvent{ + CreateEvent: &flyteapp.CreateEvent{App: e.app}, + }, + }) +} From 535b24a36361237b73b20443bf1b764f2992a76b Mon Sep 17 00:00:00 2001 From: "M. Adil Fayyaz" <62440954+AdilFayyaz@users.noreply.github.com> Date: Wed, 8 Apr 2026 16:06:32 -0700 Subject: [PATCH 13/17] fix Signed-off-by: M. Adil Fayyaz <62440954+AdilFayyaz@users.noreply.github.com> --- app/service/app_service.go | 8 -------- app/service/app_service_test.go | 6 +----- runs/config/config.go | 17 ----------------- 3 files changed, 1 insertion(+), 30 deletions(-) diff --git a/app/service/app_service.go b/app/service/app_service.go index d3a85607e0..e9d9e14074 100644 --- a/app/service/app_service.go +++ b/app/service/app_service.go @@ -121,14 +121,6 @@ func (s *AppService) Watch( return clientStream.Err() } -// UpdateStatus forwards directly to InternalAppService (no cache interaction). -func (s *AppService) UpdateStatus( - ctx context.Context, - req *connect.Request[flyteapp.UpdateStatusRequest], -) (*connect.Response[flyteapp.UpdateStatusResponse], error) { - return s.internalClient.UpdateStatus(ctx, req) -} - // --- Cache --- type cacheEntry struct { diff --git a/app/service/app_service_test.go b/app/service/app_service_test.go index d72f988a15..e985a89568 100644 --- a/app/service/app_service_test.go +++ b/app/service/app_service_test.go @@ -47,11 +47,7 @@ func (m *mockInternalClient) Update(ctx context.Context, req *connect.Request[fl } func (m *mockInternalClient) UpdateStatus(ctx context.Context, req *connect.Request[flyteapp.UpdateStatusRequest]) (*connect.Response[flyteapp.UpdateStatusResponse], error) { - args := m.Called(ctx, req) - if args.Get(0) == nil { - return nil, args.Error(1) - } - return args.Get(0).(*connect.Response[flyteapp.UpdateStatusResponse]), args.Error(1) + return nil, connect.NewError(connect.CodeUnimplemented, nil) } func (m *mockInternalClient) Delete(ctx context.Context, req *connect.Request[flyteapp.DeleteRequest]) (*connect.Response[flyteapp.DeleteResponse], error) { diff --git a/runs/config/config.go b/runs/config/config.go index 590dcb0a6d..9c2ab1a73b 100644 --- a/runs/config/config.go +++ b/runs/config/config.go @@ -32,9 +32,6 @@ var defaultConfig = &Config{ ExecutionQPS: 10.0, ExecutionBurst: 20, }, - Apps: AppsConfig{ - InternalAppServiceURL: "http://localhost:8091", - }, } var configSection = config.MustRegisterSection(configSectionKey, defaultConfig) @@ -66,8 +63,6 @@ type Config struct { // TriggerScheduler configures the cron-based trigger scheduler worker. TriggerScheduler TriggerSchedulerConfig `json:"triggerScheduler"` - // Apps holds configuration for the App service. - Apps AppsConfig `json:"apps"` } // ServerConfig holds HTTP server configuration @@ -100,18 +95,6 @@ type TriggerSchedulerConfig struct { ExecutionBurst int `json:"executionBurst" pflag:",Burst size for CreateRun rate limiter"` } -// AppsConfig holds configuration for the App service in the runs (control plane). -type AppsConfig struct { - // PublicURLPattern is a Go template for generating public ingress URLs. - // Available variables: {{.Name}}, {{.Project}}, {{.Domain}} - // Example: "https://{{.Name}}-{{.Project}}.apps.flyte.example.com" - PublicURLPattern string `json:"publicUrlPattern" pflag:",URL pattern for app ingress"` - - // InternalAppServiceURL is the base URL of the InternalAppService (actions data plane). - // In unified mode this is overridden by sc.BaseURL. - InternalAppServiceURL string `json:"internalAppServiceUrl" pflag:",URL of the internal app service"` -} - // GetConfig returns the parsed runs configuration func GetConfig() *Config { return configSection.GetConfig().(*Config) From 6e12f57c964ceb3547ddf4bb8b71076212456321 Mon Sep 17 00:00:00 2001 From: "M. Adil Fayyaz" <62440954+AdilFayyaz@users.noreply.github.com> Date: Mon, 20 Apr 2026 12:11:52 -0700 Subject: [PATCH 14/17] comments - update cache Signed-off-by: M. Adil Fayyaz <62440954+AdilFayyaz@users.noreply.github.com> --- app/service/app_service.go | 82 +++++++++------------------------ app/service/app_service_test.go | 16 +++---- go.mod | 1 + go.sum | 2 + runs/setup.go | 4 +- 5 files changed, 34 insertions(+), 71 deletions(-) diff --git a/app/service/app_service.go b/app/service/app_service.go index e9d9e14074..55b7073811 100644 --- a/app/service/app_service.go +++ b/app/service/app_service.go @@ -3,10 +3,10 @@ package service import ( "context" "fmt" - "sync" "time" "connectrpc.com/connect" + "github.com/hashicorp/golang-lru/v2/expirable" flyteapp "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/app" "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/app/appconnect" @@ -18,15 +18,20 @@ import ( type AppService struct { appconnect.UnimplementedAppServiceHandler internalClient appconnect.AppServiceClient - cache *appCache + // cache is nil when cacheTTL=0 (caching disabled). + cache *expirable.LRU[string, *flyteapp.App] } // NewAppService creates a new AppService. // cacheTTL=0 disables caching (every Get calls InternalAppService). func NewAppService(internalClient appconnect.AppServiceClient, cacheTTL time.Duration) *AppService { + var cache *expirable.LRU[string, *flyteapp.App] + if cacheTTL > 0 { + cache = expirable.NewLRU[string, *flyteapp.App](0, nil, cacheTTL) + } return &AppService{ internalClient: internalClient, - cache: newAppCache(cacheTTL), + cache: cache, } } @@ -42,7 +47,9 @@ func (s *AppService) Create( if err != nil { return nil, err } - s.cache.invalidate(cacheKey(req.Msg.GetApp().GetMetadata().GetId())) + if s.cache != nil { + s.cache.Remove(cacheKey(req.Msg.GetApp().GetMetadata().GetId())) + } return resp, nil } @@ -52,8 +59,8 @@ func (s *AppService) Get( req *connect.Request[flyteapp.GetRequest], ) (*connect.Response[flyteapp.GetResponse], error) { appID, ok := req.Msg.GetIdentifier().(*flyteapp.GetRequest_AppId) - if ok && appID.AppId != nil { - if app, hit := s.cache.get(cacheKey(appID.AppId)); hit { + if ok && appID.AppId != nil && s.cache != nil { + if app, hit := s.cache.Get(cacheKey(appID.AppId)); hit { return connect.NewResponse(&flyteapp.GetResponse{App: app}), nil } } @@ -62,8 +69,8 @@ func (s *AppService) Get( if err != nil { return nil, err } - if ok && appID.AppId != nil { - s.cache.set(cacheKey(appID.AppId), resp.Msg.GetApp()) + if ok && appID.AppId != nil && s.cache != nil { + s.cache.Add(cacheKey(appID.AppId), resp.Msg.GetApp()) } return resp, nil } @@ -77,7 +84,9 @@ func (s *AppService) Update( if err != nil { return nil, err } - s.cache.invalidate(cacheKey(req.Msg.GetApp().GetMetadata().GetId())) + if s.cache != nil { + s.cache.Remove(cacheKey(req.Msg.GetApp().GetMetadata().GetId())) + } return resp, nil } @@ -90,7 +99,9 @@ func (s *AppService) Delete( if err != nil { return nil, err } - s.cache.invalidate(cacheKey(req.Msg.GetAppId())) + if s.cache != nil { + s.cache.Remove(cacheKey(req.Msg.GetAppId())) + } return resp, nil } @@ -121,57 +132,6 @@ func (s *AppService) Watch( return clientStream.Err() } -// --- Cache --- - -type cacheEntry struct { - app *flyteapp.App - expiresAt time.Time -} - -type appCache struct { - mu sync.RWMutex - items map[string]*cacheEntry - ttl time.Duration -} - -func newAppCache(ttl time.Duration) *appCache { - return &appCache{ - items: make(map[string]*cacheEntry), - ttl: ttl, - } -} - -// get returns the cached App for key, or (nil, false) if missing or expired. -func (c *appCache) get(key string) (*flyteapp.App, bool) { - if c.ttl == 0 { - return nil, false - } - c.mu.RLock() - entry, ok := c.items[key] - c.mu.RUnlock() - if !ok || time.Now().After(entry.expiresAt) { - return nil, false - } - return entry.app, true -} - -// set writes the App to the cache with the configured TTL. -func (c *appCache) set(key string, app *flyteapp.App) { - if c.ttl == 0 { - return - } - c.mu.Lock() - c.items[key] = &cacheEntry{app: app, expiresAt: time.Now().Add(c.ttl)} - c.mu.Unlock() -} - -// invalidate removes the cache entry for key. -func (c *appCache) invalidate(key string) { - c.mu.Lock() - delete(c.items, key) - c.mu.Unlock() -} - // cacheKey returns a stable string key for an app identifier. func cacheKey(id *flyteapp.Identifier) string { if id == nil { diff --git a/app/service/app_service_test.go b/app/service/app_service_test.go index e985a89568..7b623b8ff8 100644 --- a/app/service/app_service_test.go +++ b/app/service/app_service_test.go @@ -130,7 +130,7 @@ func TestGet_CacheHit_SkipsInternal(t *testing.T) { // Pre-populate cache. appID := testAppID() - svc.cache.set(cacheKey(appID), testApp()) + svc.cache.Add(cacheKey(appID), testApp()) // Internal should NOT be called. resp, err := svc.Get(context.Background(), connect.NewRequest(&flyteapp.GetRequest{ @@ -146,7 +146,7 @@ func TestGet_CacheExpired_CallsInternal(t *testing.T) { svc := NewAppService(internal, 1*time.Millisecond) appID := testAppID() - svc.cache.set(cacheKey(appID), testApp()) + svc.cache.Add(cacheKey(appID), testApp()) time.Sleep(5 * time.Millisecond) // let TTL expire app := testApp() @@ -169,7 +169,7 @@ func TestCreate_InvalidatesCache(t *testing.T) { app := testApp() // Pre-populate cache so we can confirm it's cleared. - svc.cache.set(cacheKey(app.Metadata.Id), app) + svc.cache.Add(cacheKey(app.Metadata.Id), app) internal.On("Create", mock.Anything, mock.Anything).Return( connect.NewResponse(&flyteapp.CreateResponse{App: app}), nil, @@ -178,7 +178,7 @@ func TestCreate_InvalidatesCache(t *testing.T) { _, err := svc.Create(context.Background(), connect.NewRequest(&flyteapp.CreateRequest{App: app})) require.NoError(t, err) - _, hit := svc.cache.get(cacheKey(app.Metadata.Id)) + _, hit := svc.cache.Get(cacheKey(app.Metadata.Id)) assert.False(t, hit, "cache should be invalidated after Create") internal.AssertExpectations(t) } @@ -188,7 +188,7 @@ func TestUpdate_InvalidatesCache(t *testing.T) { svc := NewAppService(internal, 30*time.Second) app := testApp() - svc.cache.set(cacheKey(app.Metadata.Id), app) + svc.cache.Add(cacheKey(app.Metadata.Id), app) internal.On("Update", mock.Anything, mock.Anything).Return( connect.NewResponse(&flyteapp.UpdateResponse{App: app}), nil, @@ -197,7 +197,7 @@ func TestUpdate_InvalidatesCache(t *testing.T) { _, err := svc.Update(context.Background(), connect.NewRequest(&flyteapp.UpdateRequest{App: app})) require.NoError(t, err) - _, hit := svc.cache.get(cacheKey(app.Metadata.Id)) + _, hit := svc.cache.Get(cacheKey(app.Metadata.Id)) assert.False(t, hit, "cache should be invalidated after Update") internal.AssertExpectations(t) } @@ -207,7 +207,7 @@ func TestDelete_InvalidatesCache(t *testing.T) { svc := NewAppService(internal, 30*time.Second) appID := testAppID() - svc.cache.set(cacheKey(appID), testApp()) + svc.cache.Add(cacheKey(appID), testApp()) internal.On("Delete", mock.Anything, mock.Anything).Return( connect.NewResponse(&flyteapp.DeleteResponse{}), nil, @@ -216,7 +216,7 @@ func TestDelete_InvalidatesCache(t *testing.T) { _, err := svc.Delete(context.Background(), connect.NewRequest(&flyteapp.DeleteRequest{AppId: appID})) require.NoError(t, err) - _, hit := svc.cache.get(cacheKey(appID)) + _, hit := svc.cache.Get(cacheKey(appID)) assert.False(t, hit, "cache should be invalidated after Delete") internal.AssertExpectations(t) } diff --git a/go.mod b/go.mod index d08b80b45f..d2f2463920 100644 --- a/go.mod +++ b/go.mod @@ -154,6 +154,7 @@ require ( github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0 // indirect github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect + github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jackc/chunkreader/v2 v2.0.1 // indirect diff --git a/go.sum b/go.sum index 70757065de..d2ef7ade54 100644 --- a/go.sum +++ b/go.sum @@ -366,6 +366,8 @@ github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v1.0.2 h1:dV3g9Z/unq5DpblPpw+Oqcv4dU/1omnb4Ok8iPY6p1c= github.com/hashicorp/golang-lru v1.0.2/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= +github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= +github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= diff --git a/runs/setup.go b/runs/setup.go index 67d753ad74..15004ae38e 100644 --- a/runs/setup.go +++ b/runs/setup.go @@ -9,7 +9,7 @@ import ( "github.com/flyteorg/flyte/v2/flytestdlib/app" "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/actions/actionsconnect" -"github.com/flyteorg/flyte/v2/gen/go/flyteidl2/auth/authconnect" + "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/auth/authconnect" projectpb "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/project" "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/project/projectconnect" "github.com/flyteorg/flyte/v2/gen/go/flyteidl2/task/taskconnect" @@ -91,7 +91,7 @@ func Setup(ctx context.Context, sc *app.SetupContext) error { sc.Mux.Handle(authMetadataPath, authMetadataHandler) logger.Infof(ctx, "Mounted AuthMetadataService at %s", authMetadataPath) -triggerSvc := service.NewTriggerService() + triggerSvc := service.NewTriggerService() triggerPath, triggerHandler := triggerconnect.NewTriggerServiceHandler(triggerSvc) sc.Mux.Handle(triggerPath, triggerHandler) logger.Infof(ctx, "Mounted TriggerService at %s", triggerPath) From f3269d3ace3243d0888a1908d100ce2295dd1b2a Mon Sep 17 00:00:00 2001 From: "M. Adil Fayyaz" <62440954+AdilFayyaz@users.noreply.github.com> Date: Mon, 20 Apr 2026 12:49:45 -0700 Subject: [PATCH 15/17] fix conflict Signed-off-by: M. Adil Fayyaz <62440954+AdilFayyaz@users.noreply.github.com> --- app/internal/k8s/app_client.go | 11 ++++++++--- runs/setup.go | 11 +++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/app/internal/k8s/app_client.go b/app/internal/k8s/app_client.go index bb40a12c38..ad3eeca95e 100644 --- a/app/internal/k8s/app_client.go +++ b/app/internal/k8s/app_client.go @@ -53,9 +53,10 @@ type AppK8sClientInterface interface { GetStatus(ctx context.Context, appID *flyteapp.Identifier) (*flyteapp.Status, error) // List returns apps for the given project/domain scope with optional pagination. + // If appName is non-empty, only the app with that name is returned. // limit=0 means no limit. token is the K8s continue token from a previous call. // Returns the apps, the continue token for the next page (empty if last page), and any error. - List(ctx context.Context, project, domain string, limit uint32, token string) ([]*flyteapp.App, string, error) + List(ctx context.Context, project, domain, appName string, limit uint32, token string) ([]*flyteapp.App, string, error) // Delete removes the KService CRD entirely. The app must be re-created from scratch. // Use Stop to scale to zero while preserving the KService. @@ -269,12 +270,16 @@ func (c *AppK8sClient) GetStatus(ctx context.Context, appID *flyteapp.Identifier } // List returns apps for the given project/domain scope with optional pagination. -func (c *AppK8sClient) List(ctx context.Context, project, domain string, limit uint32, token string) ([]*flyteapp.App, string, error) { +func (c *AppK8sClient) List(ctx context.Context, project, domain, appName string, limit uint32, token string) ([]*flyteapp.App, string, error) { ns := appNamespace(project, domain) + matchLabels := client.MatchingLabels{labelAppManaged: "true"} + if appName != "" { + matchLabels[labelAppName] = strings.ToLower(appName) + } listOpts := []client.ListOption{ client.InNamespace(ns), - client.MatchingLabels{labelAppManaged: "true"}, + matchLabels, } if limit > 0 { listOpts = append(listOpts, client.Limit(int64(limit))) diff --git a/runs/setup.go b/runs/setup.go index 15004ae38e..8a0c45792e 100644 --- a/runs/setup.go +++ b/runs/setup.go @@ -21,6 +21,7 @@ import ( "github.com/flyteorg/flyte/v2/runs/repository/impl" "github.com/flyteorg/flyte/v2/runs/repository/interfaces" "github.com/flyteorg/flyte/v2/runs/repository/models" + "github.com/flyteorg/flyte/v2/runs/scheduler" "github.com/flyteorg/flyte/v2/runs/service" "github.com/flyteorg/flyte/v2/flytestdlib/logger" @@ -123,6 +124,16 @@ func Setup(ctx context.Context, sc *app.SetupContext) error { return fmt.Errorf("runs: failed to seed projects: %w", err) } + if cfg.TriggerScheduler.Enabled { + runsURL := cfg.ActionsServiceURL + if sc.BaseURL != "" { + runsURL = sc.BaseURL + } + worker := scheduler.Start(ctx, repo.TriggerRepo(), cfg.TriggerScheduler, runsURL) + sc.AddWorker("trigger-scheduler", worker) + logger.Infof(ctx, "Registered trigger-scheduler worker") + } + sc.AddReadyCheck(func(r *http.Request) error { if err := sc.DB.PingContext(r.Context()); err != nil { return fmt.Errorf("database ping failed: %w", err) From 0dee79af41092336095cbac4496b0a5342d5e1bb Mon Sep 17 00:00:00 2001 From: "M. Adil Fayyaz" <62440954+AdilFayyaz@users.noreply.github.com> Date: Mon, 20 Apr 2026 12:51:57 -0700 Subject: [PATCH 16/17] go mod tidy Signed-off-by: M. Adil Fayyaz <62440954+AdilFayyaz@users.noreply.github.com> --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index d2f2463920..acc2fdce87 100644 --- a/go.mod +++ b/go.mod @@ -31,6 +31,7 @@ require ( github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.1.0 github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3 github.com/hashicorp/golang-lru v1.0.2 + github.com/hashicorp/golang-lru/v2 v2.0.7 github.com/imdario/mergo v0.3.16 github.com/jackc/pgconn v1.14.3 github.com/jackc/pgx/v5 v5.7.6 @@ -154,7 +155,6 @@ require ( github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0 // indirect github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect - github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jackc/chunkreader/v2 v2.0.1 // indirect From de9ff926fdf9957e62abb0d9bc7822eb6151e1ed Mon Sep 17 00:00:00 2001 From: "M. Adil Fayyaz" <62440954+AdilFayyaz@users.noreply.github.com> Date: Mon, 20 Apr 2026 13:02:08 -0700 Subject: [PATCH 17/17] fix stale signatures Signed-off-by: M. Adil Fayyaz <62440954+AdilFayyaz@users.noreply.github.com> --- app/internal/k8s/app_client_test.go | 2 +- runs/setup.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/internal/k8s/app_client_test.go b/app/internal/k8s/app_client_test.go index 06f49a3e56..05e9f9d7d4 100644 --- a/app/internal/k8s/app_client_test.go +++ b/app/internal/k8s/app_client_test.go @@ -283,7 +283,7 @@ func TestList(t *testing.T) { }, } - apps, nextToken, err := c.List(context.Background(), "proj", "dev", 0, "") + apps, nextToken, err := c.List(context.Background(), "proj", "dev", "", 0, "") require.NoError(t, err) assert.Empty(t, nextToken) require.Len(t, apps, 1) diff --git a/runs/setup.go b/runs/setup.go index 8a0c45792e..88f0663954 100644 --- a/runs/setup.go +++ b/runs/setup.go @@ -92,7 +92,7 @@ func Setup(ctx context.Context, sc *app.SetupContext) error { sc.Mux.Handle(authMetadataPath, authMetadataHandler) logger.Infof(ctx, "Mounted AuthMetadataService at %s", authMetadataPath) - triggerSvc := service.NewTriggerService() + triggerSvc := service.NewTriggerService(repo) triggerPath, triggerHandler := triggerconnect.NewTriggerServiceHandler(triggerSvc) sc.Mux.Handle(triggerPath, triggerHandler) logger.Infof(ctx, "Mounted TriggerService at %s", triggerPath)