Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions changelog/current.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Record image-affecting changes to `manager/`, `worker/`, `copaw/`, `openclaw-bas
---

- feat(controller): expose low-cardinality AgentTeams controller metrics and optional Helm ServiceMonitor.
- feat(controller): add Matrix AppService Human SSO identity provisioning with hash-derived Matrix IDs, AppService login, deletion deactivation, and Team admin/member identity resolution from Human status.
- fix(agent): update file-sharing path guidance for CoPaw and Team Leader agents to use `/root/hiclaw-fs/agents/...` instead of the retired `/root/.hiclaw-worker/...` path.
- fix(copaw): harden Matrix channel control-command handling, task-thread routing, NO_REPLY suppression, and cancellation noise handling.
- feat(controller): add OpenKruise Sandbox backend support for Workers via `spec.backendRuntime=sandbox`, including SandboxClaim lifecycle, status watches, CRD schema, and Helm RBAC/env wiring.
Expand Down Expand Up @@ -33,3 +34,4 @@ Record image-affecting changes to `manager/`, `worker/`, `copaw/`, `openclaw-bas
- **Remote Worker applied target auth**: Remote Worker authentication now prefers the status-pinned deployment target and falls back to spec only before first provisioning, so spec target edits do not immediately break the running remote Worker or trust a target before it is applied.
- **Remote Worker lifecycle boundary**: Workers now record the applied deployment target in status, reject running target changes until the Worker is Stopped, clean up using the applied target, and register remote Pod watches for Worker/Team status updates.
- **Team Worker CR decoupling**: Worker identity enrichment and Worker REST APIs now resolve `spec.workerMembers` references, and Teams reject sharing the same referenced Worker CR before injecting coordination context.
- **Matrix AppService integration**: SSO Human Team admins now resolve through the Human identity source, and Matrix AppService transaction push routes are wired into the controller registration path.
9 changes: 9 additions & 0 deletions helm/hiclaw/crds/humans.agentteams.io.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,15 @@ spec:
items:
type: string
description: Standalone workers this human can access (L2/L3)
identitySource:
type: object
nullable: true
properties:
issuer:
type: string
subject:
type: string
required: [issuer, subject]
note:
type: string
status:
Expand Down
3 changes: 3 additions & 0 deletions helm/hiclaw/crds/workers.agentteams.io.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,9 @@ spec:
lastHeartbeat:
type: string
format: date-time
lastActiveAt:
type: string
format: date-time
message:
type: string
exposedPorts:
Expand Down
21 changes: 14 additions & 7 deletions hiclaw-controller/api/v1beta1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ type WorkerStatus struct {
RoomID string `json:"roomID,omitempty"`
ContainerState string `json:"containerState,omitempty"`
LastHeartbeat string `json:"lastHeartbeat,omitempty"`
LastActiveAt string `json:"lastActiveAt,omitempty"`
Message string `json:"message,omitempty"`
ExposedPorts []ExposedPortStatus `json:"exposedPorts,omitempty"`

Expand Down Expand Up @@ -564,13 +565,19 @@ type Human struct {
}

type HumanSpec struct {
DisplayName string `json:"displayName"`
Username string `json:"username,omitempty"`
Email string `json:"email,omitempty"`
PermissionLevel int `json:"permissionLevel"` // 1=Admin, 2=Team, 3=Worker
AccessibleTeams []string `json:"accessibleTeams,omitempty"`
AccessibleWorkers []string `json:"accessibleWorkers,omitempty"`
Note string `json:"note,omitempty"`
DisplayName string `json:"displayName"`
Username string `json:"username,omitempty"`
Email string `json:"email,omitempty"`
PermissionLevel int `json:"permissionLevel"` // 1=Admin, 2=Team, 3=Worker
AccessibleTeams []string `json:"accessibleTeams,omitempty"`
AccessibleWorkers []string `json:"accessibleWorkers,omitempty"`
IdentitySource *IdentitySourceSpec `json:"identitySource,omitempty"`
Note string `json:"note,omitempty"`
}

type IdentitySourceSpec struct {
Issuer string `json:"issuer"`
Subject string `json:"subject"`
}

type HumanStatus struct {
Expand Down
20 changes: 20 additions & 0 deletions hiclaw-controller/api/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions hiclaw-controller/config/crd/humans.agentteams.io.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,15 @@ spec:
items:
type: string
description: Standalone workers this human can access (L2/L3)
identitySource:
type: object
nullable: true
properties:
issuer:
type: string
subject:
type: string
required: [issuer, subject]
note:
type: string
status:
Expand Down
3 changes: 3 additions & 0 deletions hiclaw-controller/config/crd/workers.agentteams.io.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,9 @@ spec:
lastHeartbeat:
type: string
format: date-time
lastActiveAt:
type: string
format: date-time
message:
type: string
exposedPorts:
Expand Down
1 change: 1 addition & 0 deletions hiclaw-controller/internal/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ func (a *App) Start(ctx context.Context) error {
AppServiceToken: a.cfg.MatrixAppServiceASToken,
AppServiceHSToken: a.cfg.MatrixAppServiceHSToken,
AppServiceSenderLocalpart: a.cfg.MatrixAppServiceSenderLocalpart,
AppServicePushURL: a.cfg.MatrixAppServicePushURL,
MatrixDomain: a.cfg.MatrixDomain,
},
}
Expand Down
12 changes: 12 additions & 0 deletions hiclaw-controller/internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"os"
"path/filepath"
"strconv"
"strings"

v1beta1 "github.com/hiclaw/hiclaw-controller/api/v1beta1"
"github.com/hiclaw/hiclaw-controller/internal/agentconfig"
Expand Down Expand Up @@ -144,6 +145,7 @@ type Config struct {
MatrixAppServiceHSToken string
MatrixAppServiceSenderLocalpart string
MatrixAppServiceUserNamespaceRegex string
MatrixAppServicePushURL string

// Auto-generation tracking (not exported to env / child containers)
MatrixAppServiceASTokenAutoGenerated bool `json:"-"`
Expand Down Expand Up @@ -420,6 +422,7 @@ func LoadConfig() *Config {
// Tokens must be provided via env vars (set by install script or manually).
// We do NOT auto-generate at runtime to prevent token drift across restarts.
if cfg.MatrixAppServiceEnabled {
cfg.MatrixAppServicePushURL = appServicePushURL(cfg.ControllerURL)
if cfg.MatrixAppServiceASToken == "" {
panic("HICLAW_MATRIX_APPSERVICE_AS_TOKEN is required when AppService mode is enabled; run install script or set env var")
}
Expand Down Expand Up @@ -700,9 +703,18 @@ func (c *Config) MatrixConfig() matrix.Config {
AppServiceHSToken: c.MatrixAppServiceHSToken,
AppServiceSenderLocalpart: c.MatrixAppServiceSenderLocalpart,
AppServiceUserNamespaceRegex: c.MatrixAppServiceUserNamespaceRegex,
AppServicePushURL: c.MatrixAppServicePushURL,
}
}

func appServicePushURL(controllerURL string) string {
controllerURL = strings.TrimRight(strings.TrimSpace(controllerURL), "/")
if controllerURL == "" {
return ""
}
return controllerURL + "/_matrix/app/v1"
}

func (c *Config) GatewayConfig() gateway.Config {
return gateway.Config{
ConsoleURL: c.HigressBaseURL,
Expand Down
10 changes: 10 additions & 0 deletions hiclaw-controller/internal/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,16 @@ func TestLoadConfigPrefersAbstractInfraEnv(t *testing.T) {
}
}

func TestMatrixConfigIncludesAppServicePushURL(t *testing.T) {
cfg := &Config{
MatrixAppServicePushURL: appServicePushURL("http://controller.example.com:8090/"),
}

if got, want := cfg.MatrixConfig().AppServicePushURL, "http://controller.example.com:8090/_matrix/app/v1"; got != want {
t.Fatalf("AppServicePushURL = %q, want %q", got, want)
}
}

func TestLoadConfigUsesSharedAdminCredentialsForHigress(t *testing.T) {
t.Setenv("HICLAW_ADMIN_USER", "shared-admin")
t.Setenv("HICLAW_ADMIN_PASSWORD", "shared-secret")
Expand Down
63 changes: 59 additions & 4 deletions hiclaw-controller/internal/controller/human_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,15 @@ package controller

import (
"context"
"errors"
"fmt"
"time"

v1beta1 "github.com/hiclaw/hiclaw-controller/api/v1beta1"
"github.com/hiclaw/hiclaw-controller/internal/controller/humanidentity"
_ "github.com/hiclaw/hiclaw-controller/internal/controller/humanidentity/externalsso"
_ "github.com/hiclaw/hiclaw-controller/internal/controller/humanidentity/legacypassword"
"github.com/hiclaw/hiclaw-controller/internal/matrix"
"github.com/hiclaw/hiclaw-controller/internal/metrics"
"github.com/hiclaw/hiclaw-controller/internal/service"
kerrors "k8s.io/apimachinery/pkg/util/errors"
Expand All @@ -15,6 +21,8 @@ import (
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)

const appServiceNotReadyRequeue = 5 * time.Second

// HumanReconciler reconciles Human resources using Service-layer orchestration.
//
// Unlike Worker/Manager, a Human has no backend container and no gateway
Expand Down Expand Up @@ -56,29 +64,40 @@ func (r *HumanReconciler) Reconcile(ctx context.Context, req reconcile.Request)
return
}

human.Status.Phase = computeHumanPhase(&human, reterr)
if reterr == nil {
human.Status.Message = ""
if human.Status.Phase != "Degraded" {
human.Status.Message = ""
}
} else {
human.Status.Message = reterr.Error()
}
human.Status.Phase = computeHumanPhase(&human, reterr)

if err := r.Status().Patch(ctx, &human, patchBase); err != nil {
logger.Error(err, "failed to patch human status")
logger.Error(err, "failed to patch human status; CR will appear to have no status",
"name", human.Name, "phase", human.Status.Phase, "matrixUserID", human.Status.MatrixUserID)
reterr = kerrors.NewAggregate([]error{reterr, err})
return
}
logger.Info("human status patched",
"name", human.Name, "phase", human.Status.Phase,
"matrixUserID", human.Status.MatrixUserID, "reconcileFailed", reterr != nil)
}()

if !human.DeletionTimestamp.IsZero() {
if controllerutil.ContainsFinalizer(&human, finalizerName) {
if err := r.resolveHumanScope(s); err != nil && human.Status.MatrixUserID == "" {
logger.Error(err, "failed to resolve deleting human identity; continuing best-effort cleanup", "name", human.Name)
}
return r.reconcileHumanDelete(ctx, s)
}
return reconcile.Result{}, nil
}

if !controllerutil.ContainsFinalizer(&human, finalizerName) {
base := human.DeepCopy()
controllerutil.AddFinalizer(&human, finalizerName)
if err := r.Update(ctx, &human); err != nil {
if err := r.Patch(ctx, &human, client.MergeFrom(base)); err != nil {
return reconcile.Result{}, err
}
}
Expand All @@ -92,7 +111,17 @@ func (r *HumanReconciler) Reconcile(ctx context.Context, req reconcile.Request)
// phases log errors but never return them, so a transient Matrix hiccup
// on room invite/kick does not block the next reconcile.
func (r *HumanReconciler) reconcileHumanNormal(ctx context.Context, s *humanScope) (reconcile.Result, error) {
if err := r.resolveHumanScope(s); err != nil {
s.human.Status.Phase = "Degraded"
s.human.Status.Message = err.Error()
return reconcile.Result{RequeueAfter: reconcileInterval}, nil
}
if err := r.reconcileHumanInfra(ctx, s); err != nil {
if errors.Is(err, matrix.ErrAppServiceNotReady) {
log.FromContext(ctx).Info("Matrix AppService not active yet; requeueing human provisioning",
"name", s.human.Name)
return reconcile.Result{RequeueAfter: appServiceNotReadyRequeue}, nil
}
return reconcile.Result{RequeueAfter: reconcileInterval}, err
}
r.reconcileHumanRooms(ctx, s)
Expand All @@ -101,6 +130,32 @@ func (r *HumanReconciler) reconcileHumanNormal(ctx context.Context, s *humanScop
return reconcile.Result{RequeueAfter: reconcileInterval}, nil
}

func (r *HumanReconciler) resolveHumanScope(s *humanScope) error {
resolved, err := humanidentity.ResolveHuman(&s.human.Spec, s.human.Name, humanidentity.Deps{
Provisioner: r.Provisioner,
})
if err != nil {
return err
}
// Once a Matrix account exists, the derived MXID is the human's stable
// identity. Any change to it — switching to/from SSO, editing
// identitySource.subject, or renaming the legacy username — means a
// different account. Re-provisioning in place would leave Status.Rooms
// pointing at the previous user's memberships, so the rooms phase would
// treat them as already observed and never invite/join the new user,
// leaving a Human that looks Active but whose new identity is in no
// rooms. Block the switch and require recreating the CR instead.
if s.human.Status.MatrixUserID != "" && s.human.Status.MatrixUserID != resolved.MatrixUserID {
return fmt.Errorf("identitySource changed; recreate CR to switch identity")
}
s.identity = resolved
s.username = resolved.MatrixLocalpart
if !resolved.ManagesInitialPassword {
s.human.Status.InitialPassword = ""
}
return nil
}

func (r *HumanReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&v1beta1.Human{}).
Expand Down
Loading
Loading