From b7863cdb7e1e18c7ab37f96bffad710ce3cdd62a Mon Sep 17 00:00:00 2001 From: shiyiyue1102 Date: Sat, 4 Jul 2026 20:28:41 +0800 Subject: [PATCH 1/4] feat(controller): add Matrix Human SSO identity Change-Id: I6fd3f33fc66bd1dd480345cddeffa584b9392dbb --- changelog/current.md | 1 + helm/hiclaw/crds/humans.agentteams.io.yaml | 9 + helm/hiclaw/crds/workers.agentteams.io.yaml | 3 + hiclaw-controller/api/v1beta1/types.go | 21 +- .../api/v1beta1/zz_generated.deepcopy.go | 20 + .../config/crd/humans.agentteams.io.yaml | 9 + .../config/crd/workers.agentteams.io.yaml | 3 + .../internal/controller/human_controller.go | 63 ++- .../controller/human_controller_test.go | 158 +++++- .../controller/human_reconcile_delete.go | 12 +- .../controller/human_reconcile_infra.go | 49 +- .../controller/human_reconcile_rooms.go | 9 +- .../internal/controller/human_scope.go | 17 +- .../humanidentity/externalsso/source.go | 125 +++++ .../humanidentity/identity_source.go | 237 ++++++++ .../humanidentity/legacypassword/source.go | 58 ++ .../internal/matrix/appservice.go | 8 +- .../internal/matrix/appservice_config.go | 35 ++ hiclaw-controller/internal/matrix/client.go | 299 +++++++++- .../internal/matrix/client_test.go | 212 ++++++- hiclaw-controller/internal/matrix/types.go | 15 +- .../internal/server/appservice_handler.go | 446 +++++++++++++-- .../server/appservice_handler_test.go | 529 ++++++++++++++++++ .../server/appservice_mgmt_handler.go | 76 +++ .../internal/service/interfaces.go | 21 + .../internal/service/provisioner_human.go | 157 +++++- .../internal/service/provisioner_team_test.go | 11 + .../internal/service/room_meta.go | 122 ++++ .../test/testutil/mocks/human_provisioner.go | 159 +++++- 29 files changed, 2719 insertions(+), 165 deletions(-) create mode 100644 hiclaw-controller/internal/controller/humanidentity/externalsso/source.go create mode 100644 hiclaw-controller/internal/controller/humanidentity/identity_source.go create mode 100644 hiclaw-controller/internal/controller/humanidentity/legacypassword/source.go create mode 100644 hiclaw-controller/internal/matrix/appservice_config.go create mode 100644 hiclaw-controller/internal/server/appservice_handler_test.go create mode 100644 hiclaw-controller/internal/server/appservice_mgmt_handler.go create mode 100644 hiclaw-controller/internal/service/room_meta.go diff --git a/changelog/current.md b/changelog/current.md index d22ee32b3..88faa0463 100644 --- a/changelog/current.md +++ b/changelog/current.md @@ -5,6 +5,7 @@ Record image-affecting changes to `manager/`, `worker/`, `copaw/`, `openclaw-bas --- - feat(controller): expose low-cardinality AgentTeams controller metrics and optional Helm ServiceMonitor. +- feat(controller): add Matrix AppService Human SSO identity provisioning with hash-derived Matrix IDs, AppService login, deletion deactivation, and Team admin/member identity resolution from Human status. - fix(agent): update file-sharing path guidance for CoPaw and Team Leader agents to use `/root/hiclaw-fs/agents/...` instead of the retired `/root/.hiclaw-worker/...` path. - fix(copaw): harden Matrix channel control-command handling, task-thread routing, NO_REPLY suppression, and cancellation noise handling. - feat(controller): add OpenKruise Sandbox backend support for Workers via `spec.backendRuntime=sandbox`, including SandboxClaim lifecycle, status watches, CRD schema, and Helm RBAC/env wiring. diff --git a/helm/hiclaw/crds/humans.agentteams.io.yaml b/helm/hiclaw/crds/humans.agentteams.io.yaml index e1fe25a20..63f64f809 100644 --- a/helm/hiclaw/crds/humans.agentteams.io.yaml +++ b/helm/hiclaw/crds/humans.agentteams.io.yaml @@ -39,6 +39,15 @@ spec: items: type: string description: Standalone workers this human can access (L2/L3) + identitySource: + type: object + nullable: true + properties: + issuer: + type: string + subject: + type: string + required: [issuer, subject] note: type: string status: diff --git a/helm/hiclaw/crds/workers.agentteams.io.yaml b/helm/hiclaw/crds/workers.agentteams.io.yaml index 3dd73cfd6..3a62e9b73 100644 --- a/helm/hiclaw/crds/workers.agentteams.io.yaml +++ b/helm/hiclaw/crds/workers.agentteams.io.yaml @@ -267,6 +267,9 @@ spec: lastHeartbeat: type: string format: date-time + lastActiveAt: + type: string + format: date-time message: type: string exposedPorts: diff --git a/hiclaw-controller/api/v1beta1/types.go b/hiclaw-controller/api/v1beta1/types.go index 4adbc0d33..61e3d3275 100644 --- a/hiclaw-controller/api/v1beta1/types.go +++ b/hiclaw-controller/api/v1beta1/types.go @@ -248,6 +248,7 @@ type WorkerStatus struct { RoomID string `json:"roomID,omitempty"` ContainerState string `json:"containerState,omitempty"` LastHeartbeat string `json:"lastHeartbeat,omitempty"` + LastActiveAt string `json:"lastActiveAt,omitempty"` Message string `json:"message,omitempty"` ExposedPorts []ExposedPortStatus `json:"exposedPorts,omitempty"` @@ -564,13 +565,19 @@ type Human struct { } type HumanSpec struct { - DisplayName string `json:"displayName"` - Username string `json:"username,omitempty"` - Email string `json:"email,omitempty"` - PermissionLevel int `json:"permissionLevel"` // 1=Admin, 2=Team, 3=Worker - AccessibleTeams []string `json:"accessibleTeams,omitempty"` - AccessibleWorkers []string `json:"accessibleWorkers,omitempty"` - Note string `json:"note,omitempty"` + DisplayName string `json:"displayName"` + Username string `json:"username,omitempty"` + Email string `json:"email,omitempty"` + PermissionLevel int `json:"permissionLevel"` // 1=Admin, 2=Team, 3=Worker + AccessibleTeams []string `json:"accessibleTeams,omitempty"` + AccessibleWorkers []string `json:"accessibleWorkers,omitempty"` + IdentitySource *IdentitySourceSpec `json:"identitySource,omitempty"` + Note string `json:"note,omitempty"` +} + +type IdentitySourceSpec struct { + Issuer string `json:"issuer"` + Subject string `json:"subject"` } type HumanStatus struct { diff --git a/hiclaw-controller/api/v1beta1/zz_generated.deepcopy.go b/hiclaw-controller/api/v1beta1/zz_generated.deepcopy.go index 8604a6ede..a66add0ce 100644 --- a/hiclaw-controller/api/v1beta1/zz_generated.deepcopy.go +++ b/hiclaw-controller/api/v1beta1/zz_generated.deepcopy.go @@ -203,6 +203,11 @@ func (in *HumanSpec) DeepCopyInto(out *HumanSpec) { *out = make([]string, len(*in)) copy(*out, *in) } + if in.IdentitySource != nil { + in, out := &in.IdentitySource, &out.IdentitySource + *out = new(IdentitySourceSpec) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HumanSpec. @@ -225,6 +230,21 @@ func (in *HumanStatus) DeepCopyInto(out *HumanStatus) { } } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *IdentitySourceSpec) DeepCopyInto(out *IdentitySourceSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new IdentitySourceSpec. +func (in *IdentitySourceSpec) DeepCopy() *IdentitySourceSpec { + if in == nil { + return nil + } + out := new(IdentitySourceSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HumanStatus. func (in *HumanStatus) DeepCopy() *HumanStatus { if in == nil { diff --git a/hiclaw-controller/config/crd/humans.agentteams.io.yaml b/hiclaw-controller/config/crd/humans.agentteams.io.yaml index e1fe25a20..63f64f809 100644 --- a/hiclaw-controller/config/crd/humans.agentteams.io.yaml +++ b/hiclaw-controller/config/crd/humans.agentteams.io.yaml @@ -39,6 +39,15 @@ spec: items: type: string description: Standalone workers this human can access (L2/L3) + identitySource: + type: object + nullable: true + properties: + issuer: + type: string + subject: + type: string + required: [issuer, subject] note: type: string status: diff --git a/hiclaw-controller/config/crd/workers.agentteams.io.yaml b/hiclaw-controller/config/crd/workers.agentteams.io.yaml index 3dd73cfd6..3a62e9b73 100644 --- a/hiclaw-controller/config/crd/workers.agentteams.io.yaml +++ b/hiclaw-controller/config/crd/workers.agentteams.io.yaml @@ -267,6 +267,9 @@ spec: lastHeartbeat: type: string format: date-time + lastActiveAt: + type: string + format: date-time message: type: string exposedPorts: diff --git a/hiclaw-controller/internal/controller/human_controller.go b/hiclaw-controller/internal/controller/human_controller.go index 0aeb94f96..0ea4bd500 100644 --- a/hiclaw-controller/internal/controller/human_controller.go +++ b/hiclaw-controller/internal/controller/human_controller.go @@ -2,9 +2,15 @@ package controller import ( "context" + "errors" + "fmt" "time" v1beta1 "github.com/hiclaw/hiclaw-controller/api/v1beta1" + "github.com/hiclaw/hiclaw-controller/internal/controller/humanidentity" + _ "github.com/hiclaw/hiclaw-controller/internal/controller/humanidentity/externalsso" + _ "github.com/hiclaw/hiclaw-controller/internal/controller/humanidentity/legacypassword" + "github.com/hiclaw/hiclaw-controller/internal/matrix" "github.com/hiclaw/hiclaw-controller/internal/metrics" "github.com/hiclaw/hiclaw-controller/internal/service" kerrors "k8s.io/apimachinery/pkg/util/errors" @@ -15,6 +21,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" ) +const appServiceNotReadyRequeue = 5 * time.Second + // HumanReconciler reconciles Human resources using Service-layer orchestration. // // Unlike Worker/Manager, a Human has no backend container and no gateway @@ -56,29 +64,40 @@ func (r *HumanReconciler) Reconcile(ctx context.Context, req reconcile.Request) return } - human.Status.Phase = computeHumanPhase(&human, reterr) if reterr == nil { - human.Status.Message = "" + if human.Status.Phase != "Degraded" { + human.Status.Message = "" + } } else { human.Status.Message = reterr.Error() } + human.Status.Phase = computeHumanPhase(&human, reterr) if err := r.Status().Patch(ctx, &human, patchBase); err != nil { - logger.Error(err, "failed to patch human status") + logger.Error(err, "failed to patch human status; CR will appear to have no status", + "name", human.Name, "phase", human.Status.Phase, "matrixUserID", human.Status.MatrixUserID) reterr = kerrors.NewAggregate([]error{reterr, err}) + return } + logger.Info("human status patched", + "name", human.Name, "phase", human.Status.Phase, + "matrixUserID", human.Status.MatrixUserID, "reconcileFailed", reterr != nil) }() if !human.DeletionTimestamp.IsZero() { if controllerutil.ContainsFinalizer(&human, finalizerName) { + if err := r.resolveHumanScope(s); err != nil && human.Status.MatrixUserID == "" { + logger.Error(err, "failed to resolve deleting human identity; continuing best-effort cleanup", "name", human.Name) + } return r.reconcileHumanDelete(ctx, s) } return reconcile.Result{}, nil } if !controllerutil.ContainsFinalizer(&human, finalizerName) { + base := human.DeepCopy() controllerutil.AddFinalizer(&human, finalizerName) - if err := r.Update(ctx, &human); err != nil { + if err := r.Patch(ctx, &human, client.MergeFrom(base)); err != nil { return reconcile.Result{}, err } } @@ -92,7 +111,17 @@ func (r *HumanReconciler) Reconcile(ctx context.Context, req reconcile.Request) // phases log errors but never return them, so a transient Matrix hiccup // on room invite/kick does not block the next reconcile. func (r *HumanReconciler) reconcileHumanNormal(ctx context.Context, s *humanScope) (reconcile.Result, error) { + if err := r.resolveHumanScope(s); err != nil { + s.human.Status.Phase = "Degraded" + s.human.Status.Message = err.Error() + return reconcile.Result{RequeueAfter: reconcileInterval}, nil + } if err := r.reconcileHumanInfra(ctx, s); err != nil { + if errors.Is(err, matrix.ErrAppServiceNotReady) { + log.FromContext(ctx).Info("Matrix AppService not active yet; requeueing human provisioning", + "name", s.human.Name) + return reconcile.Result{RequeueAfter: appServiceNotReadyRequeue}, nil + } return reconcile.Result{RequeueAfter: reconcileInterval}, err } r.reconcileHumanRooms(ctx, s) @@ -101,6 +130,32 @@ func (r *HumanReconciler) reconcileHumanNormal(ctx context.Context, s *humanScop return reconcile.Result{RequeueAfter: reconcileInterval}, nil } +func (r *HumanReconciler) resolveHumanScope(s *humanScope) error { + resolved, err := humanidentity.ResolveHuman(&s.human.Spec, s.human.Name, humanidentity.Deps{ + Provisioner: r.Provisioner, + }) + if err != nil { + return err + } + // Once a Matrix account exists, the derived MXID is the human's stable + // identity. Any change to it — switching to/from SSO, editing + // identitySource.subject, or renaming the legacy username — means a + // different account. Re-provisioning in place would leave Status.Rooms + // pointing at the previous user's memberships, so the rooms phase would + // treat them as already observed and never invite/join the new user, + // leaving a Human that looks Active but whose new identity is in no + // rooms. Block the switch and require recreating the CR instead. + if s.human.Status.MatrixUserID != "" && s.human.Status.MatrixUserID != resolved.MatrixUserID { + return fmt.Errorf("identitySource changed; recreate CR to switch identity") + } + s.identity = resolved + s.username = resolved.MatrixLocalpart + if !resolved.ManagesInitialPassword { + s.human.Status.InitialPassword = "" + } + return nil +} + func (r *HumanReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&v1beta1.Human{}). diff --git a/hiclaw-controller/internal/controller/human_controller_test.go b/hiclaw-controller/internal/controller/human_controller_test.go index 08244ec9a..fb1317a13 100644 --- a/hiclaw-controller/internal/controller/human_controller_test.go +++ b/hiclaw-controller/internal/controller/human_controller_test.go @@ -106,9 +106,9 @@ func newReadyTeam(name, roomID string) *v1beta1.Team { // double the initial-provisioning latency and create opportunities for // partial-state visibility. // -// LoginAsHuman must NOT be called during first-time create: EnsureHumanUser -// already returns a usable access token, so a fresh provisioning reconcile -// costs exactly one Matrix device session. +// Steady-state login must NOT be called during first-time create: +// EnsurePrecreated already returns a usable access token, so a fresh +// provisioning reconcile costs exactly one Matrix device session. func TestHumanReconciler_Create_HappyPath(t *testing.T) { worker := newReadyWorker("w1", "!room-w1:localhost") team := newReadyTeam("t1", "!room-t1:localhost") @@ -130,9 +130,9 @@ func TestHumanReconciler_Create_HappyPath(t *testing.T) { t.Errorf("EnsureHumanUser called %d times, want 1: %+v", len(rig.prov.Calls.EnsureHumanUser), rig.prov.Calls.EnsureHumanUser) } - if len(rig.prov.Calls.LoginAsHuman) != 0 { - t.Errorf("LoginAsHuman should not be called during first-time create (EnsureHumanUser already returned a token); got %d calls", - len(rig.prov.Calls.LoginAsHuman)) + if len(rig.prov.Calls.LoginWithPassword) != 0 { + t.Errorf("LoginWithPassword should not be called during first-time create (EnsurePrecreated already returned a token); got %d calls", + len(rig.prov.Calls.LoginWithPassword)) } if len(rig.prov.Calls.InviteToRoom) != 2 { @@ -176,6 +176,53 @@ func TestHumanReconciler_Create_HappyPath(t *testing.T) { } } +// TestHumanReconciler_FinalizerPatchPreservesIdentitySource locks in the +// Worker-style MergeFrom patch when adding the cleanup finalizer. A full +// Update would rewrite the entire spec from the in-memory object and can +// drop fields the typed client failed to round-trip (notably +// spec.identitySource), silently converting SSO Humans into legacy ones. +func TestHumanReconciler_FinalizerPatchPreservesIdentitySource(t *testing.T) { + issuer := "https://idp.example.com/pool" + subject := "user-subject-123" + human := newHuman("sso-user", v1beta1.HumanSpec{ + DisplayName: "SSO User", + Username: "sso-user", + PermissionLevel: 1, + IdentitySource: &v1beta1.IdentitySourceSpec{ + Issuer: issuer, + Subject: subject, + }, + }) + + rig := newHumanRig(t, human) + rig.prov.AppServiceEnabled = true + + out, _, err := rig.reconcile("sso-user") + if err != nil { + t.Fatalf("reconcile: %v", err) + } + + if out.Spec.IdentitySource == nil { + t.Fatal("spec.identitySource was dropped while adding finalizer") + } + if out.Spec.IdentitySource.Issuer != issuer { + t.Errorf("spec.identitySource.issuer=%q, want %q", out.Spec.IdentitySource.Issuer, issuer) + } + if out.Spec.IdentitySource.Subject != subject { + t.Errorf("spec.identitySource.subject=%q, want %q", out.Spec.IdentitySource.Subject, subject) + } + if len(rig.prov.Calls.RegisterLegacyUser) != 0 { + t.Errorf("RegisterLegacyUser should not be called for SSO human, got %d calls", + len(rig.prov.Calls.RegisterLegacyUser)) + } + if len(rig.prov.Calls.RegisterAppServiceUser) != 1 { + t.Errorf("RegisterAppServiceUser calls=%d, want 1", len(rig.prov.Calls.RegisterAppServiceUser)) + } + if out.Status.InitialPassword != "" { + t.Errorf("Status.InitialPassword=%q, want empty for SSO human", out.Status.InitialPassword) + } +} + // TestHumanReconciler_Update_AddRoom adds a new AccessibleTeam to an // already-Active Human and asserts that only the new room triggers // Invite/Join (no duplicate calls against rooms already in Status.Rooms). @@ -213,12 +260,17 @@ func TestHumanReconciler_Update_AddRoom(t *testing.T) { t.Fatalf("reconcile: %v", err) } - // Steady-state login, not EnsureHumanUser. + // Steady-state token acquisition goes through the legacy_password + // identity source's EnsureUserToken -> LoginWithPassword, not the + // EnsureHumanUser composite and not the legacy LoginAsHuman shim. if len(rig.prov.Calls.EnsureHumanUser) != 0 { t.Errorf("EnsureHumanUser should not be called on update, got %d calls", len(rig.prov.Calls.EnsureHumanUser)) } - if len(rig.prov.Calls.LoginAsHuman) != 1 { - t.Errorf("LoginAsHuman calls=%d, want 1", len(rig.prov.Calls.LoginAsHuman)) + if len(rig.prov.Calls.LoginWithPassword) != 1 { + t.Errorf("LoginWithPassword calls=%d, want 1", len(rig.prov.Calls.LoginWithPassword)) + } + if len(rig.prov.Calls.LoginAsHuman) != 0 { + t.Errorf("LoginAsHuman (legacy shim) should not be used by the human steady-state path, got %d calls", len(rig.prov.Calls.LoginAsHuman)) } // Exactly one new room's worth of work: the team room. @@ -315,13 +367,13 @@ func TestHumanReconciler_Update_PendingResource(t *testing.T) { t.Errorf("no joins expected for pending worker, got: %+v", rig.prov.Calls.JoinRoomAs) } // With lazy login, a reconcile that has no new rooms to /join must - // not trigger LoginAsHuman either — EnsureHumanUser on this + // not trigger a steady-state login either — EnsurePrecreated on this // first-time pass already seeded scope.userToken, but there's no // work for it to do, and we must not generate a fresh device when // the spec is effectively a no-op. - if len(rig.prov.Calls.LoginAsHuman) != 0 { - t.Errorf("LoginAsHuman should not be called when there are no rooms to join, got %d", - len(rig.prov.Calls.LoginAsHuman)) + if len(rig.prov.Calls.LoginWithPassword) != 0 { + t.Errorf("LoginWithPassword should not be called when there are no rooms to join, got %d", + len(rig.prov.Calls.LoginWithPassword)) } if len(out.Status.Rooms) != 0 { t.Errorf("Status.Rooms=%v, want empty (worker still pending)", out.Status.Rooms) @@ -336,7 +388,7 @@ func TestHumanReconciler_Update_PendingResource(t *testing.T) { // TestHumanReconciler_SteadyState_NoLogin locks in the device-bloat // fix: once a Human is Active and Status.Rooms matches the desired set, // periodic reconciles (driven by reconcileInterval every 5 minutes) -// must NOT call LoginAsHuman. Every Login call without a device_id +// must NOT call LoginWithPassword. Every Login call without a device_id // creates a new Matrix device session on the homeserver; under the // pre-fix behavior a single Human would accumulate ~288 orphan devices // per day. The invariant "desired == observed ⇒ zero Matrix writes" @@ -366,9 +418,9 @@ func TestHumanReconciler_SteadyState_NoLogin(t *testing.T) { t.Errorf("EnsureHumanUser must not be called on steady-state reconcile, got %d", len(rig.prov.Calls.EnsureHumanUser)) } - if len(rig.prov.Calls.LoginAsHuman) != 0 { - t.Errorf("LoginAsHuman must not be called when desired == observed; device bloat regression! got %d", - len(rig.prov.Calls.LoginAsHuman)) + if len(rig.prov.Calls.LoginWithPassword) != 0 { + t.Errorf("LoginWithPassword must not be called when desired == observed; device bloat regression! got %d", + len(rig.prov.Calls.LoginWithPassword)) } if len(rig.prov.Calls.InviteToRoom) != 0 { t.Errorf("no invites expected on steady-state, got %+v", rig.prov.Calls.InviteToRoom) @@ -463,7 +515,9 @@ func TestHumanReconciler_Login_StalePassword(t *testing.T) { human.Finalizers = []string{finalizerName} rig := newHumanRig(t, human, worker) - rig.prov.LoginAsHumanFn = func(ctx context.Context, name, password string) (string, error) { + // Steady-state token acquisition uses LoginWithPassword; simulate the + // stored password no longer working after the user rotated it. + rig.prov.LoginWithPasswordFn = func(ctx context.Context, name, password string) (string, error) { return "", errors.New("M_FORBIDDEN: invalid password") } @@ -475,8 +529,8 @@ func TestHumanReconciler_Login_StalePassword(t *testing.T) { if len(rig.prov.Calls.EnsureHumanUser) != 0 { t.Errorf("EnsureHumanUser must not be called on stale password; got %d calls", len(rig.prov.Calls.EnsureHumanUser)) } - if len(rig.prov.Calls.LoginAsHuman) != 1 { - t.Errorf("LoginAsHuman should be attempted once, got %d", len(rig.prov.Calls.LoginAsHuman)) + if len(rig.prov.Calls.LoginWithPassword) != 1 { + t.Errorf("LoginWithPassword should be attempted once, got %d", len(rig.prov.Calls.LoginWithPassword)) } // Admin-only invite happened; no /join. @@ -497,6 +551,70 @@ func TestHumanReconciler_Login_StalePassword(t *testing.T) { } } +// TestHumanReconciler_RevertSSOToLegacy_Blocked covers the identity-switch +// guard for the SSO→legacy direction. A Human that was provisioned through +// an external SSO identity carries an SSO-derived MatrixUserID and a set of +// rooms that user already joined. If spec.identitySource is then removed, +// the spec resolves to the legacy_password source whose username-derived +// MXID differs from the recorded one. +// +// Re-provisioning in place would be unsafe: reconcileHumanInfra would swap +// MatrixUserID to the new legacy account, but Status.Rooms still lists the +// old SSO user's memberships, so the rooms phase would treat every desired +// room as already observed and never invite/join the new user — leaving a +// Human that reports Active with rooms while the new identity is in none of +// them. The guard must instead degrade the Human and keep all prior state +// intact so the operator can recreate the CR. +func TestHumanReconciler_RevertSSOToLegacy_Blocked(t *testing.T) { + worker := newReadyWorker("w1", "!room-w1:localhost") + human := newHuman("sso-user", v1beta1.HumanSpec{ + Username: "sso-user", + AccessibleWorkers: []string{"w1"}, + }) + // Previously provisioned via SSO: recorded MXID is the SSO hash-derived + // localpart, which differs from the legacy "@sso-user:localhost" the + // now-identitySource-less spec resolves to. + ssoUserID := "@ssohash00112233445566778899aabb:localhost" + human.Status.MatrixUserID = ssoUserID + human.Status.Rooms = []string{"!room-w1:localhost"} + human.Status.Phase = "Active" + human.Finalizers = []string{finalizerName} + + rig := newHumanRig(t, human, worker) + + out, _, err := rig.reconcile("sso-user") + if err != nil { + t.Fatalf("reconcile should degrade (non-fatal), got error: %v", err) + } + + if out.Status.Phase != "Degraded" { + t.Errorf("Status.Phase=%q, want Degraded", out.Status.Phase) + } + if out.Status.Message == "" { + t.Error("Status.Message should explain the blocked identity switch") + } + // Identity must not be silently swapped to the legacy account. + if out.Status.MatrixUserID != ssoUserID { + t.Errorf("Status.MatrixUserID=%q, want unchanged %q", out.Status.MatrixUserID, ssoUserID) + } + // No new account may be provisioned under the legacy username. + if len(rig.prov.Calls.EnsureHumanUser) != 0 || len(rig.prov.Calls.RegisterLegacyUser) != 0 || + len(rig.prov.Calls.RegisterAppServiceUser) != 0 { + t.Errorf("no provisioning expected on blocked switch; EnsureHumanUser=%d RegisterLegacyUser=%d RegisterAppServiceUser=%d", + len(rig.prov.Calls.EnsureHumanUser), len(rig.prov.Calls.RegisterLegacyUser), len(rig.prov.Calls.RegisterAppServiceUser)) + } + // The rooms phase must not run, so no membership churn for either user. + if len(rig.prov.Calls.InviteToRoom) != 0 || len(rig.prov.Calls.JoinRoomAs) != 0 || + len(rig.prov.Calls.KickFromRoom) != 0 { + t.Errorf("no room mutations expected on blocked switch; invites=%+v joins=%+v kicks=%+v", + rig.prov.Calls.InviteToRoom, rig.prov.Calls.JoinRoomAs, rig.prov.Calls.KickFromRoom) + } + // Prior room state is preserved untouched. + if len(out.Status.Rooms) != 1 || out.Status.Rooms[0] != "!room-w1:localhost" { + t.Errorf("Status.Rooms=%v, want preserved [!room-w1:localhost]", out.Status.Rooms) + } +} + // --- helpers --- func invitedRoomSet(calls []mocks.RoomMembershipCall) map[string]bool { diff --git a/hiclaw-controller/internal/controller/human_reconcile_delete.go b/hiclaw-controller/internal/controller/human_reconcile_delete.go index e22796689..3a30ced02 100644 --- a/hiclaw-controller/internal/controller/human_reconcile_delete.go +++ b/hiclaw-controller/internal/controller/human_reconcile_delete.go @@ -3,6 +3,7 @@ package controller import ( "context" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -26,7 +27,7 @@ func (r *HumanReconciler) reconcileHumanDelete(ctx context.Context, s *humanScop humanUserID := h.Status.MatrixUserID if humanUserID == "" { - humanUserID = r.Provisioner.MatrixUserID(s.username) + humanUserID = s.identity.MatrixUserID } for _, roomID := range h.Status.Rooms { if err := r.Provisioner.ForceLeaveRoom(ctx, humanUserID, roomID); err != nil { @@ -35,14 +36,21 @@ func (r *HumanReconciler) reconcileHumanDelete(ctx context.Context, s *humanScop } } + if s.identity.Source != nil { + if err := s.identity.Source.EnsureDeactivated(ctx, &h.Spec, &h.Status); err != nil { + return reconcile.Result{RequeueAfter: reconcileInterval}, err + } + } + if r.Legacy != nil { if err := r.Legacy.RemoveFromHumansRegistry(ctx, h.Name); err != nil { logger.Error(err, "failed to remove human from registry (non-fatal)") } } + base := h.DeepCopy() controllerutil.RemoveFinalizer(h, finalizerName) - if err := r.Update(ctx, h); err != nil { + if err := r.Patch(ctx, h, client.MergeFrom(base)); err != nil { return reconcile.Result{}, err } diff --git a/hiclaw-controller/internal/controller/human_reconcile_infra.go b/hiclaw-controller/internal/controller/human_reconcile_infra.go index 7749b1102..e45c452b6 100644 --- a/hiclaw-controller/internal/controller/human_reconcile_infra.go +++ b/hiclaw-controller/internal/controller/human_reconcile_infra.go @@ -35,35 +35,50 @@ import ( func (r *HumanReconciler) reconcileHumanInfra(ctx context.Context, s *humanScope) error { h := s.human username := s.username - expectedUserID := r.Provisioner.MatrixUserID(username) + expectedUserID := s.identity.MatrixUserID + logger := log.FromContext(ctx).WithValues( + "name", h.Name, + "identitySource", s.identity.Source.Key(), + "matrixUserID", expectedUserID, + "matrixLocalpart", s.identity.MatrixLocalpart, + ) - needsProvision := h.Status.MatrixUserID == "" || h.Status.MatrixUserID != expectedUserID + needsProvision := h.Status.MatrixUserID == "" || + (s.identity.ManagesInitialPassword && h.Status.MatrixUserID != expectedUserID) if needsProvision { - creds, err := r.Provisioner.EnsureHumanUser(ctx, username) + logger.Info("provisioning Matrix account for human", + "currentStatusMatrixUserID", h.Status.MatrixUserID, + "managesInitialPassword", s.identity.ManagesInitialPassword) + creds, err := s.identity.Source.EnsurePrecreated(ctx, &h.Spec, h.Name) if err != nil { + logger.Error(err, "matrix account provisioning failed; status.matrixUserID will stay empty") return fmt.Errorf("matrix registration failed: %w", err) } - h.Status.MatrixUserID = creds.UserID - h.Status.InitialPassword = creds.Password + if creds.UserID != "" && creds.UserID != expectedUserID { + logger.Error(nil, "matrix registration returned unexpected user id", + "registeredUserID", creds.UserID) + return fmt.Errorf("matrix registration returned %s, want %s", creds.UserID, expectedUserID) + } + h.Status.MatrixUserID = expectedUserID + if s.identity.ManagesInitialPassword { + h.Status.InitialPassword = creds.Password + } else { + h.Status.InitialPassword = "" + } s.userToken = creds.AccessToken - log.FromContext(ctx).Info("human created", - "name", h.Name, "username", username, "matrixUserID", creds.UserID) + logger.Info("human Matrix account provisioned; status.matrixUserID set", + "username", username, + "created", creds.Created, + "hasAccessToken", creds.AccessToken != "") + } else if !s.identity.ManagesInitialPassword { + h.Status.InitialPassword = "" } // Sync Matrix profile displayName on first provisioning and when spec changes. shouldSyncDisplayName := needsProvision || h.Status.DisplayNameSyncedGeneration != h.Generation if shouldSyncDisplayName { - token := s.userToken - if token == "" && h.Status.InitialPassword != "" { - if t, err := r.Provisioner.LoginAsHuman(ctx, username, h.Status.InitialPassword); err == nil { - token = t - s.userToken = t - } else { - log.FromContext(ctx).Info("human login failed before displayName sync; skipping this cycle", - "name", h.Name, "username", username, "err", err.Error()) - } - } + token := r.ensureUserToken(ctx, s) if token != "" { if err := r.Provisioner.SetDisplayName(ctx, h.Status.MatrixUserID, token, h.Spec.DisplayName); err != nil { log.FromContext(ctx).Error(err, "failed to sync human displayName (non-fatal)", diff --git a/hiclaw-controller/internal/controller/human_reconcile_rooms.go b/hiclaw-controller/internal/controller/human_reconcile_rooms.go index ca05137f6..fabc61475 100644 --- a/hiclaw-controller/internal/controller/human_reconcile_rooms.go +++ b/hiclaw-controller/internal/controller/human_reconcile_rooms.go @@ -37,7 +37,7 @@ func (r *HumanReconciler) reconcileHumanRooms(ctx context.Context, s *humanScope matrixUserID := h.Status.MatrixUserID if matrixUserID == "" { - matrixUserID = r.Provisioner.MatrixUserID(s.username) + matrixUserID = s.identity.MatrixUserID } // Start with currently-observed rooms; we'll prune removals below. @@ -107,11 +107,14 @@ func (r *HumanReconciler) ensureUserToken(ctx context.Context, s *humanScope) st // reconciles fall through to here. Without a stored password we // cannot log in, so return empty and let the caller fall back to // admin-only invite. - if !r.Provisioner.MatrixAppServiceEnabled() && s.human.Status.InitialPassword == "" { + if s.identity.Source == nil { + return "" + } + if s.identity.ManagesInitialPassword && !r.Provisioner.MatrixAppServiceEnabled() && s.human.Status.InitialPassword == "" { return "" } - token, err := r.Provisioner.LoginAsHuman(ctx, s.username, s.human.Status.InitialPassword) + token, err := s.identity.Source.EnsureUserToken(ctx, &s.human.Spec, &s.human.Status, s.human.Name) if err != nil { log.FromContext(ctx).Info("human login with stored password failed; continuing with admin-only room management", "name", s.human.Name, "err", err.Error()) diff --git a/hiclaw-controller/internal/controller/human_scope.go b/hiclaw-controller/internal/controller/human_scope.go index 89fe26267..6fc19134b 100644 --- a/hiclaw-controller/internal/controller/human_scope.go +++ b/hiclaw-controller/internal/controller/human_scope.go @@ -4,6 +4,7 @@ import ( "context" v1beta1 "github.com/hiclaw/hiclaw-controller/api/v1beta1" + "github.com/hiclaw/hiclaw-controller/internal/controller/humanidentity" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -15,12 +16,14 @@ type humanScope struct { human *v1beta1.Human username string patchBase client.Patch + identity humanidentity.ResolvedIdentity // userToken is the Human's own Matrix access token for this reconcile - // pass, obtained either from EnsureHumanUser (first-time) or - // LoginAsHuman (steady-state). Empty when login failed (e.g. the user - // changed their password in Element); rooms phase then degrades to - // admin-only invite without /join. + // pass, obtained either from the identity source's EnsurePrecreated + // (first-time) or EnsureUserToken (steady-state, e.g. LoginWithPassword + // for legacy_password). Empty when login failed (e.g. the user changed + // their password in Element); rooms phase then degrades to admin-only + // invite without /join. userToken string } @@ -32,7 +35,13 @@ type humanScope struct { // (reconcile is stuck before it can report a real state), or the // previous Phase otherwise (transient errors keep us in "Active"). func computeHumanPhase(h *v1beta1.Human, reconcileErr error) string { + if h.Status.Phase == "Degraded" && h.Status.Message != "" { + return "Degraded" + } if reconcileErr != nil { + if h.Status.Phase == "Degraded" { + return "Degraded" + } if h.Status.MatrixUserID == "" { return "Failed" } diff --git a/hiclaw-controller/internal/controller/humanidentity/externalsso/source.go b/hiclaw-controller/internal/controller/humanidentity/externalsso/source.go new file mode 100644 index 000000000..f2d144e9c --- /dev/null +++ b/hiclaw-controller/internal/controller/humanidentity/externalsso/source.go @@ -0,0 +1,125 @@ +package externalsso + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + + v1beta1 "github.com/hiclaw/hiclaw-controller/api/v1beta1" + "github.com/hiclaw/hiclaw-controller/internal/controller/humanidentity" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +const matrixLocalpartHashBytes = 16 + +type source struct { + deps humanidentity.Deps +} + +func init() { + humanidentity.Register(humanidentity.KeyExternalSSO, func(deps humanidentity.Deps) humanidentity.IdentitySource { + return source{deps: deps} + }) +} + +func (s source) Key() string { + return humanidentity.KeyExternalSSO +} + +func (s source) DeriveMatrixUserID(spec *v1beta1.HumanSpec, _ string) (string, error) { + localpart, err := s.matrixLocalpart(spec) + if err != nil { + return "", err + } + return s.deps.Provisioner.MatrixUserID(localpart), nil +} + +func (s source) EnsurePrecreated(ctx context.Context, spec *v1beta1.HumanSpec, metadataName string) (humanidentity.Credentials, error) { + logger := log.FromContext(ctx).WithValues("identitySource", humanidentity.KeyExternalSSO, "human", metadataName) + + if !s.deps.Provisioner.MatrixAppServiceEnabled() { + logger.Error(nil, "cannot create Matrix account for SSO human: Matrix AppService mode is disabled (set AGENTTEAMS_MATRIX_APPSERVICE_ENABLED)") + return humanidentity.Credentials{}, fmt.Errorf("external_sso requires AppService mode") + } + + localpart, err := s.matrixLocalpart(spec) + if err != nil { + logger.Error(err, "failed to derive Matrix localpart from identitySource (issuer/subject)") + return humanidentity.Credentials{}, err + } + expectedUserID, err := s.DeriveMatrixUserID(spec, metadataName) + if err != nil { + logger.Error(err, "failed to derive Matrix user ID from identitySource") + return humanidentity.Credentials{}, err + } + + logger.Info("creating Matrix account for SSO human via AppService register", + "issuer", spec.IdentitySource.Issuer, + "subject", spec.IdentitySource.Subject, + "matrixLocalpart", localpart, + "matrixUserID", expectedUserID) + + creds, err := s.deps.Provisioner.RegisterAppServiceUser(ctx, localpart) + if err != nil { + logger.Error(err, "AppService registration failed for SSO human", + "matrixLocalpart", localpart, "matrixUserID", expectedUserID) + return humanidentity.Credentials{}, err + } + + logger.Info("Matrix account ready for SSO human", + "matrixUserID", expectedUserID, + "registeredUserID", creds.UserID, + "created", creds.Created, + "hasAccessToken", creds.AccessToken != "") + + return humanidentity.Credentials{ + UserID: expectedUserID, + AccessToken: creds.AccessToken, + Password: "", + Created: creds.Created, + }, nil +} + +func (s source) ManagesInitialPassword() bool { + return false +} + +func (s source) EnsureUserToken(ctx context.Context, spec *v1beta1.HumanSpec, _ *v1beta1.HumanStatus, _ string) (string, error) { + if !s.deps.Provisioner.MatrixAppServiceEnabled() { + return "", fmt.Errorf("external_sso requires AppService mode") + } + localpart, err := s.matrixLocalpart(spec) + if err != nil { + return "", err + } + return s.deps.Provisioner.LoginAppServiceUser(ctx, localpart) +} + +func (s source) EnsureDeactivated(ctx context.Context, spec *v1beta1.HumanSpec, status *v1beta1.HumanStatus) error { + userID := status.MatrixUserID + if userID == "" { + derived, err := s.DeriveMatrixUserID(spec, "") + if err != nil { + return err + } + userID = derived + } + return s.deps.Provisioner.DeactivateHumanUser(ctx, userID) +} + +func (s source) matrixLocalpart(spec *v1beta1.HumanSpec) (string, error) { + if spec.IdentitySource == nil { + return "", fmt.Errorf("identitySource is required for external_sso") + } + issuer := spec.IdentitySource.Issuer + subject := spec.IdentitySource.Subject + if issuer == "" { + return "", fmt.Errorf("identitySource.issuer must not be empty") + } + if subject == "" { + return "", fmt.Errorf("identitySource.subject must not be empty") + } + digest := sha256.Sum256([]byte(issuer + "\x00" + subject)) + return hex.EncodeToString(digest[:matrixLocalpartHashBytes]), nil +} diff --git a/hiclaw-controller/internal/controller/humanidentity/identity_source.go b/hiclaw-controller/internal/controller/humanidentity/identity_source.go new file mode 100644 index 000000000..6d0d75eda --- /dev/null +++ b/hiclaw-controller/internal/controller/humanidentity/identity_source.go @@ -0,0 +1,237 @@ +// Package humanidentity hosts the per-Human identity-source registry +// and the IdentitySource interface that decouples HumanReconciler's +// main reconcile loop from any specific identity provider. +// +// The package exists to satisfy three hard architectural constraints: +// +// - The HumanReconciler main loop must NOT branch on identity type. +// All differences (how a Matrix user ID is derived, whether a +// password is assigned on first registration, how steady-state +// access tokens are obtained) are expressed as different return +// values from a single uniform interface. +// +// - The controller code must NOT name any specific identity +// provider (no "agent_identity", "keycloak", "dingtalk" string +// literals). Implementations are keyed by the protocol-layer +// abstraction they implement: "legacy_password" for the +// password-bearing Matrix-native flow, "external_sso" for the +// hash-rendezvous OIDC/SAML flow. +// +// - Adding a new identity protocol must be additive: drop a new +// file with an init() that calls Register, and the main loop +// picks it up unchanged. There is no central switch statement +// to extend. +// +// HumanReconciler chooses an implementation by inspecting only Spec +// fields (e.g. presence of spec.identitySource), never by enumerating +// known identity types. Once an IdentitySource is resolved, the main +// loop drives it through a fixed five-step contract; per-type +// behaviour is encoded entirely as the return values of those steps. +package humanidentity + +import ( + "context" + "fmt" + "strings" + "sync" + + v1beta1 "github.com/hiclaw/hiclaw-controller/api/v1beta1" + "github.com/hiclaw/hiclaw-controller/internal/service" +) + +// Credentials is the result of a precreate step. Mirrors +// service.HumanCredentials so the reconciler does not need a separate +// translation layer; the duplication keeps the humanidentity package +// usable from outside service if the dependency direction ever +// inverts. +type Credentials struct { + UserID string + AccessToken string + // Password carries an initial password ONLY when the + // implementation's ManagesInitialPassword() reports true AND a new + // account was actually created on this call. Steady-state and + // SSO-style implementations always leave Password empty. + Password string + // Created reports whether the underlying register call actually + // created a new account (true) or fell back to logging in to an + // existing one (false). Identity-source implementations and the + // reconciler both gate side effects on this flag. + Created bool +} + +const ( + KeyLegacyPassword = "legacy_password" + KeyExternalSSO = "external_sso" +) + +// ResolvedIdentity is the per-Human identity selection result consumed by +// HumanReconciler. It carries both the strategy implementation and the stable +// Matrix identity derived from the current Human spec. +type ResolvedIdentity struct { + Source IdentitySource + MatrixUserID string + MatrixLocalpart string + ManagesInitialPassword bool +} + +// IdentitySource is the contract every identity-protocol implementation +// satisfies. Methods are listed in roughly the order the reconciler +// invokes them. +// +// Implementations MUST be stateless w.r.t. the Human CR — every method +// receives the inputs it needs as parameters. Per-cluster configuration +// (homeserver domain, Tuwunel client, etc.) lives in Deps and is bound +// at registry-resolve time. +type IdentitySource interface { + // Key returns the registry key this implementation registered + // itself under. Used for audit/event labelling so the reconciler + // can record "human X is being driven by identity source Y" + // without a separate enum. + Key() string + + // DeriveMatrixUserID computes the deterministic Matrix user ID + // the homeserver will assign to this Human. Pure function: must + // produce the same output for the same input across reconciles + // AND across processes (cross-language alignment with Tuwunel + // matters for the SSO flow). + DeriveMatrixUserID(spec *v1beta1.HumanSpec, metadataName string) (string, error) + + // EnsurePrecreated creates (or recognises an existing) Matrix + // account for this Human and returns the credentials needed by + // the rest of the reconcile pass. Side effects beyond Matrix + // account state must NOT leak — for example, a SSO + // implementation must not assign a password just because the + // underlying register call returned Created=true. + EnsurePrecreated(ctx context.Context, spec *v1beta1.HumanSpec, metadataName string) (Credentials, error) + + // ManagesInitialPassword reports whether this identity source + // owns the user's Matrix password. When true, the reconciler + // will persist Credentials.Password into Status.InitialPassword + // on first creation; when false it will not, regardless of + // what Credentials.Password contains. The double-gate is a + // defence-in-depth check: a buggy implementation that returns + // a non-empty Password while reporting ManagesInitialPassword= + // false still does not leak the password into Status. + ManagesInitialPassword() bool + + // EnsureUserToken returns a fresh user-scoped access token used + // by the rooms phase to /join private rooms. Returns ("", nil) + // when the controller cannot obtain one (e.g. the user rotated + // their password via Element); callers degrade to admin-only + // invite without surfacing it as an error. + EnsureUserToken(ctx context.Context, spec *v1beta1.HumanSpec, status *v1beta1.HumanStatus, metadataName string) (string, error) + + // EnsureDeactivated performs identity-source-specific cleanup before the + // finalizer is removed. Room-level cleanup (force-leave) is the + // reconciler's responsibility; this hook covers anything else + // the implementation owns. + EnsureDeactivated(ctx context.Context, spec *v1beta1.HumanSpec, status *v1beta1.HumanStatus) error +} + +// Deps is the shared dependency container all implementations receive +// at construction time. New fields can be added without breaking +// existing implementations because each implementation reads only the +// fields it needs. +type Deps struct { + // Provisioner exposes the decomposed Matrix-side primitives + // (RegisterAppServiceUser, SetUserPassword, LoginAppServiceUser, + // etc.). All identity sources route Matrix operations through it + // rather than holding a direct matrix.Client reference, so the + // service layer remains the single integration boundary. + Provisioner service.HumanProvisioner + + // Domain is the Matrix server domain part — needed by SSO-style + // implementations that compute MXIDs without going through + // Provisioner.MatrixUserID(localpart). + Domain string +} + +// FactoryFn constructs an IdentitySource bound to the given Deps. Each +// implementation registers a factory rather than a singleton so the +// registry can be re-instantiated cheaply per reconciler instance. +type FactoryFn func(deps Deps) IdentitySource + +// ========================================================================= +// Registry +// ========================================================================= + +var ( + registryMu sync.RWMutex + registry = map[string]FactoryFn{} +) + +// Register adds a factory under the given key. Intended to be called +// from package-level init(); double-registration panics so wiring +// mistakes surface at process start, not at reconcile time. +func Register(key string, factory FactoryFn) { + registryMu.Lock() + defer registryMu.Unlock() + if _, exists := registry[key]; exists { + panic(fmt.Sprintf("humanidentity: duplicate registration for key %q", key)) + } + registry[key] = factory +} + +// Resolve constructs the IdentitySource registered under key, bound to +// deps. Returns an error when no implementation has been registered +// under key — callers should treat that as a configuration bug. +func Resolve(key string, deps Deps) (IdentitySource, error) { + registryMu.RLock() + factory, ok := registry[key] + registryMu.RUnlock() + if !ok { + return nil, fmt.Errorf("humanidentity: no implementation registered for key %q", key) + } + return factory(deps), nil +} + +// ResolveHuman selects an identity source from the Human spec, derives the +// stable Matrix identity, and returns the bound source plus derived data. +func ResolveHuman(spec *v1beta1.HumanSpec, metadataName string, deps Deps) (ResolvedIdentity, error) { + key := KeyLegacyPassword + if spec.IdentitySource != nil { + key = KeyExternalSSO + } + source, err := Resolve(key, deps) + if err != nil { + return ResolvedIdentity{}, err + } + matrixUserID, err := source.DeriveMatrixUserID(spec, metadataName) + if err != nil { + return ResolvedIdentity{}, err + } + localpart, err := matrixLocalpart(matrixUserID) + if err != nil { + return ResolvedIdentity{}, err + } + return ResolvedIdentity{ + Source: source, + MatrixUserID: matrixUserID, + MatrixLocalpart: localpart, + ManagesInitialPassword: source.ManagesInitialPassword(), + }, nil +} + +func matrixLocalpart(matrixUserID string) (string, error) { + if !strings.HasPrefix(matrixUserID, "@") { + return "", fmt.Errorf("matrix user id %q must start with @", matrixUserID) + } + withoutSigil := strings.TrimPrefix(matrixUserID, "@") + separator := strings.IndexByte(withoutSigil, ':') + if separator <= 0 { + return "", fmt.Errorf("matrix user id %q must include localpart and domain", matrixUserID) + } + return withoutSigil[:separator], nil +} + +// Keys returns a snapshot of all registered keys, sorted for +// determinism. Useful for startup logging and CI audit. +func Keys() []string { + registryMu.RLock() + defer registryMu.RUnlock() + out := make([]string, 0, len(registry)) + for k := range registry { + out = append(out, k) + } + return out +} diff --git a/hiclaw-controller/internal/controller/humanidentity/legacypassword/source.go b/hiclaw-controller/internal/controller/humanidentity/legacypassword/source.go new file mode 100644 index 000000000..8332ee2c9 --- /dev/null +++ b/hiclaw-controller/internal/controller/humanidentity/legacypassword/source.go @@ -0,0 +1,58 @@ +package legacypassword + +import ( + "context" + + v1beta1 "github.com/hiclaw/hiclaw-controller/api/v1beta1" + "github.com/hiclaw/hiclaw-controller/internal/controller/humanidentity" +) + +type source struct { + deps humanidentity.Deps +} + +func init() { + humanidentity.Register(humanidentity.KeyLegacyPassword, func(deps humanidentity.Deps) humanidentity.IdentitySource { + return source{deps: deps} + }) +} + +func (s source) Key() string { + return humanidentity.KeyLegacyPassword +} + +func (s source) DeriveMatrixUserID(spec *v1beta1.HumanSpec, metadataName string) (string, error) { + return s.deps.Provisioner.MatrixUserID(spec.EffectiveUsername(metadataName)), nil +} + +func (s source) EnsurePrecreated(ctx context.Context, spec *v1beta1.HumanSpec, metadataName string) (humanidentity.Credentials, error) { + creds, err := s.deps.Provisioner.EnsureHumanUser(ctx, spec.EffectiveUsername(metadataName)) + if err != nil { + return humanidentity.Credentials{}, err + } + return humanidentity.Credentials{ + UserID: creds.UserID, + AccessToken: creds.AccessToken, + Password: creds.Password, + Created: creds.Created, + }, nil +} + +func (s source) ManagesInitialPassword() bool { + return true +} + +func (s source) EnsureUserToken(ctx context.Context, spec *v1beta1.HumanSpec, status *v1beta1.HumanStatus, metadataName string) (string, error) { + username := spec.EffectiveUsername(metadataName) + if s.deps.Provisioner.MatrixAppServiceEnabled() { + return s.deps.Provisioner.LoginAppServiceUser(ctx, username) + } + if status.InitialPassword == "" { + return "", nil + } + return s.deps.Provisioner.LoginWithPassword(ctx, username, status.InitialPassword) +} + +func (s source) EnsureDeactivated(context.Context, *v1beta1.HumanSpec, *v1beta1.HumanStatus) error { + return nil +} diff --git a/hiclaw-controller/internal/matrix/appservice.go b/hiclaw-controller/internal/matrix/appservice.go index 5988bd90a..c38e405bf 100644 --- a/hiclaw-controller/internal/matrix/appservice.go +++ b/hiclaw-controller/internal/matrix/appservice.go @@ -24,7 +24,7 @@ import ( // DO NOT enable AppService mode against a shared or pre-existing // homeserver that also hosts non-HiClaw users. Doing so would let the // as_token impersonate those users. Instead set -// HICLAW_MATRIX_APPSERVICE_USER_NAMESPACE_REGEX to a restrictive regex +// AGENTTEAMS_MATRIX_APPSERVICE_USER_NAMESPACE_REGEX to a restrictive regex // (e.g. "@hiclaw-.*:") that covers only HiClaw-managed localparts, // and ensure HiClaw-managed users are created under that prefix. func RenderAppServiceRegistration(cfg Config) AppServiceRegistration { @@ -33,9 +33,13 @@ func RenderAppServiceRegistration(cfg Config) AppServiceRegistration { if userRegex == "" { userRegex = fmt.Sprintf("@.*:%s", domain) } + var pushURL *string + if cfg.AppServicePushURL != "" { + pushURL = &cfg.AppServicePushURL + } return AppServiceRegistration{ ID: cfg.AppServiceID, - URL: nil, // Phase 1: no push from homeserver + URL: pushURL, ASToken: cfg.AppServiceToken, HSToken: cfg.AppServiceHSToken, SenderLocalpart: cfg.AppServiceSenderLocalpart, diff --git a/hiclaw-controller/internal/matrix/appservice_config.go b/hiclaw-controller/internal/matrix/appservice_config.go new file mode 100644 index 000000000..430de8c47 --- /dev/null +++ b/hiclaw-controller/internal/matrix/appservice_config.go @@ -0,0 +1,35 @@ +package matrix + +import ( + "crypto/rand" + "encoding/hex" +) + +// AppserviceConfig holds the configuration for registering the controller +// as a Matrix Application Service with the homeserver (Conduwuit/Tuwunel). +type AppserviceConfig struct { + Enabled bool + ID string // e.g. "agentteams-watcher" + ASToken string // appservice → homeserver authentication token + HSToken string // homeserver → appservice authentication token + URL string // controller HTTP endpoint reachable from homeserver +} + +// EnsureTokens fills in any empty token with a random 64-byte hex string. +// Safe to call multiple times; already-set values are left untouched. +func (c *AppserviceConfig) EnsureTokens() { + if c.ASToken == "" { + c.ASToken = randomHex(32) + } + if c.HSToken == "" { + c.HSToken = randomHex(32) + } +} + +func randomHex(n int) string { + b := make([]byte, n) + if _, err := rand.Read(b); err != nil { + panic("crypto/rand: " + err.Error()) + } + return hex.EncodeToString(b) +} diff --git a/hiclaw-controller/internal/matrix/client.go b/hiclaw-controller/internal/matrix/client.go index f8d7c4817..3b5e7c46f 100644 --- a/hiclaw-controller/internal/matrix/client.go +++ b/hiclaw-controller/internal/matrix/client.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "encoding/json" + "errors" "fmt" "io" "net/http" @@ -11,8 +12,19 @@ import ( "strings" "sync/atomic" "time" + + hiclawmetrics "github.com/hiclaw/hiclaw-controller/internal/metrics" + "sigs.k8s.io/controller-runtime/pkg/log" ) +// ErrAppServiceNotReady signals that the homeserver rejected an AppService +// call because it does not yet recognize the controller's as_token +// (M_UNKNOWN_TOKEN). This is a transient startup race: the controller's +// AppService registration has not been registered/verified with the +// homeserver yet. Callers should treat it as retryable and requeue quietly +// instead of logging it as a hard error. +var ErrAppServiceNotReady = errors.New("matrix appservice token not active yet") + // Client abstracts Matrix homeserver operations. // Implementations: TuwunelClient (current), future SynapseClient. type Client interface { @@ -40,6 +52,14 @@ type Client interface { // The alias argument MUST be the full form "#localpart:server". DeleteRoomAlias(ctx context.Context, alias string) error + // SetRoomName updates the human-readable Matrix room name. When userToken + // is empty, it falls back to the homeserver-admin identity. + SetRoomName(ctx context.Context, roomID, name, userToken string) error + + // SetRoomState writes a Matrix room state event. When userToken is empty, + // it falls back to the homeserver-admin identity. + SetRoomState(ctx context.Context, roomID, eventType, stateKey string, content map[string]interface{}, userToken string) error + // JoinRoom makes the user identified by token join the given room. JoinRoom(ctx context.Context, roomID, userToken string) error @@ -97,6 +117,9 @@ type Client interface { // The token's user must be joined and have enough power in the room. KickFromRoomWithToken(ctx context.Context, roomID, userID, reason, kickerToken string) error + // SyncMessages returns Matrix room message events visible to the admin user. + SyncMessages(ctx context.Context, since string, timeout time.Duration) (*SyncMessagesResult, error) + // UserID builds a full Matrix user ID from a localpart. UserID(localpart string) string @@ -134,6 +157,18 @@ type Client interface { VerifyAccessToken(ctx context.Context, accessToken string) error } +type MessageEvent struct { + RoomID string + EventID string + Sender string + Mentions []string +} + +type SyncMessagesResult struct { + NextBatch string + Events []MessageEvent +} + // TuwunelClient implements Client for Tuwunel (conduwuit) homeservers. type TuwunelClient struct { config Config @@ -314,13 +349,18 @@ func (c *TuwunelClient) EnsureAppServiceUser(ctx context.Context, username strin Error string `json:"error"` } + logger := log.FromContext(ctx).WithValues("matrixUserID", c.UserID(username), "localpart", username) + statusCode, _, err := c.doJSONWithASToken(ctx, http.MethodPost, "/_matrix/client/v3/register", regBody, ®Resp) if err != nil { + logger.Error(err, "AppService register request failed (transport)") return nil, fmt.Errorf("AS register user %s: %w", username, err) } if statusCode == http.StatusOK || statusCode == http.StatusCreated { + logger.Info("AppService registered new Matrix account", + "httpStatus", statusCode, "registeredUserID", regResp.UserID, "hasAccessToken", regResp.AccessToken != "") return &UserCredentials{ UserID: regResp.UserID, AccessToken: regResp.AccessToken, @@ -331,8 +371,14 @@ func (c *TuwunelClient) EnsureAppServiceUser(ctx context.Context, username strin // User already exists → fall back to AS login if regResp.ErrCode == "M_USER_IN_USE" { + logger.Info("Matrix account already exists; falling back to AppService login", "httpStatus", statusCode) token, loginErr := c.LoginAppServiceUser(ctx, username) if loginErr != nil { + if errors.Is(loginErr, ErrAppServiceNotReady) { + logger.Info("Matrix AppService token not active yet during login fallback; will retry") + return nil, loginErr + } + logger.Error(loginErr, "AppService login failed for existing Matrix account") return nil, fmt.Errorf("AS user %s exists but AS login failed: %w", username, loginErr) } return &UserCredentials{ @@ -343,6 +389,17 @@ func (c *TuwunelClient) EnsureAppServiceUser(ctx context.Context, username strin }, nil } + // Startup race: homeserver does not recognize the as_token yet. This is + // transient and self-heals once cluster init registers/verifies the + // AppService, so report it as retryable instead of a hard error. + if statusCode == http.StatusUnauthorized && regResp.ErrCode == "M_UNKNOWN_TOKEN" { + logger.Info("Matrix AppService token not active yet; will retry once it is registered/verified", + "httpStatus", statusCode) + return nil, fmt.Errorf("AS register user %s: %w", username, ErrAppServiceNotReady) + } + + logger.Error(nil, "AppService register rejected by homeserver", + "httpStatus", statusCode, "errcode", regResp.ErrCode, "error", regResp.Error) return nil, fmt.Errorf("AS register user %s: %s (%s)", username, regResp.ErrCode, regResp.Error) } @@ -369,6 +426,9 @@ func (c *TuwunelClient) LoginAppServiceUser(ctx context.Context, username string return "", fmt.Errorf("AS login %s: %w", username, err) } if statusCode != http.StatusOK { + if statusCode == http.StatusUnauthorized && resp.ErrCode == "M_UNKNOWN_TOKEN" { + return "", fmt.Errorf("AS login %s: %w", username, ErrAppServiceNotReady) + } return "", fmt.Errorf("AS login %s: HTTP %d %s %s: %s", username, statusCode, resp.ErrCode, resp.Error, truncate(respBody, 500)) } @@ -421,7 +481,9 @@ func (c *TuwunelClient) SetDisplayName(ctx context.Context, userID, accessToken, func (c *TuwunelClient) CreateRoom(ctx context.Context, req CreateRoomRequest) (*RoomInfo, error) { token := req.CreatorToken + tokenSource := "explicit" if token == "" { + tokenSource = "admin" var err error token, err = c.ensureAdminToken(ctx) if err != nil { @@ -447,16 +509,18 @@ func (c *TuwunelClient) CreateRoom(ctx context.Context, req CreateRoomRequest) ( } } + initialState := append([]StateEvent(nil), req.InitialState...) if req.E2EE { - body["initial_state"] = []map[string]interface{}{ - { - "type": "m.room.encryption", - "state_key": "", - "content": map[string]string{ - "algorithm": "m.megolm.v1.aes-sha2", - }, + initialState = append(initialState, StateEvent{ + Type: "m.room.encryption", + StateKey: "", + Content: map[string]interface{}{ + "algorithm": "m.megolm.v1.aes-sha2", }, - } + }) + } + if len(initialState) > 0 { + body["initial_state"] = initialState } var resp struct { @@ -495,10 +559,68 @@ func (c *TuwunelClient) CreateRoom(ctx context.Context, req CreateRoomRequest) ( return &RoomInfo{RoomID: existingID, Created: false}, nil } + if statusCode == http.StatusForbidden || resp.ErrCode == "M_FORBIDDEN" { + c.logCreateRoomFailureDiagnostics(ctx, req, token, tokenSource, statusCode, resp.ErrCode, resp.Error, respBody) + } + return nil, fmt.Errorf("create room %q: HTTP %d %s %s: %s", req.Name, statusCode, resp.ErrCode, resp.Error, truncate(respBody, 500)) } +func (c *TuwunelClient) logCreateRoomFailureDiagnostics(ctx context.Context, req CreateRoomRequest, token, tokenSource string, statusCode int, errCode, errText string, respBody []byte) { + senderUserID := "" + senderPowerLevel := 0 + senderPowerLevelFound := false + whoamiErr := "" + if token != "" { + if userID, err := c.accessTokenUserID(ctx, token); err != nil { + whoamiErr = err.Error() + } else { + senderUserID = userID + senderPowerLevel, senderPowerLevelFound = req.PowerLevels[userID] + } + } + + expectedAdminUserID := c.UserID(c.config.AdminUser) + expectedAdminPowerLevel, expectedAdminPowerLevelFound := req.PowerLevels[expectedAdminUserID] + + log.FromContext(ctx).Info("Matrix createRoom rejected", + "roomName", req.Name, + "roomAliasName", req.RoomAliasName, + "httpStatus", statusCode, + "errcode", errCode, + "error", errText, + "response", truncate(respBody, 500), + "tokenSource", tokenSource, + "senderUserID", senderUserID, + "senderWhoamiError", whoamiErr, + "senderPowerLevel", senderPowerLevel, + "senderPowerLevelFound", senderPowerLevelFound, + "expectedAdminUserID", expectedAdminUserID, + "expectedAdminPowerLevel", expectedAdminPowerLevel, + "expectedAdminPowerLevelFound", expectedAdminPowerLevelFound, + "powerLevels", req.PowerLevels, + "invite", req.Invite) +} + +func (c *TuwunelClient) accessTokenUserID(ctx context.Context, accessToken string) (string, error) { + var resp struct { + UserID string `json:"user_id"` + } + statusCode, respBody, err := c.doJSON(ctx, http.MethodGet, + "/_matrix/client/v3/account/whoami", accessToken, nil, &resp) + if err != nil { + return "", fmt.Errorf("whoami: %w", err) + } + if statusCode != http.StatusOK { + return "", fmt.Errorf("whoami: HTTP %d: %s", statusCode, truncate(respBody, 200)) + } + if resp.UserID == "" { + return "", errors.New("whoami: empty user_id") + } + return resp.UserID, nil +} + // ResolveRoomAlias implements Client.ResolveRoomAlias. func (c *TuwunelClient) ResolveRoomAlias(ctx context.Context, alias string) (string, bool, error) { token, err := c.ensureAdminToken(ctx) @@ -559,6 +681,55 @@ func (c *TuwunelClient) DeleteRoomAlias(ctx context.Context, alias string) error alias, statusCode, resp.ErrCode, resp.Error, truncate(respBody, 500)) } +func (c *TuwunelClient) SetRoomName(ctx context.Context, roomID, name, userToken string) error { + token := userToken + if token == "" { + var err error + token, err = c.ensureAdminToken(ctx) + if err != nil { + return fmt.Errorf("set room name %s: %w", roomID, err) + } + } + encodedRoom := encodeRoomID(roomID) + body := map[string]string{"name": name} + statusCode, respBody, err := c.doJSON(ctx, http.MethodPut, + fmt.Sprintf("/_matrix/client/v3/rooms/%s/state/m.room.name/", encodedRoom), + token, body, nil) + if err != nil { + return fmt.Errorf("set room name %s: %w", roomID, err) + } + if statusCode != http.StatusOK && statusCode != http.StatusCreated { + return fmt.Errorf("set room name %s: HTTP %d: %s", roomID, statusCode, truncate(respBody, 500)) + } + return nil +} + +func (c *TuwunelClient) SetRoomState(ctx context.Context, roomID, eventType, stateKey string, content map[string]interface{}, userToken string) error { + token := userToken + if token == "" { + var err error + token, err = c.ensureAdminToken(ctx) + if err != nil { + return fmt.Errorf("set room state %s %s: %w", roomID, eventType, err) + } + } + if content == nil { + content = map[string]interface{}{} + } + encodedRoom := encodeRoomID(roomID) + path := fmt.Sprintf("/_matrix/client/v3/rooms/%s/state/%s/%s", + encodedRoom, url.PathEscape(eventType), url.PathEscape(stateKey)) + statusCode, respBody, err := c.doJSON(ctx, http.MethodPut, path, token, content, nil) + if err != nil { + return fmt.Errorf("set room state %s %s: %w", roomID, eventType, err) + } + if statusCode != http.StatusOK && statusCode != http.StatusCreated { + return fmt.Errorf("set room state %s %s: HTTP %d: %s", + roomID, eventType, statusCode, truncate(respBody, 500)) + } + return nil +} + func (c *TuwunelClient) JoinRoom(ctx context.Context, roomID, userToken string) error { encodedRoom := encodeRoomID(roomID) statusCode, respBody, err := c.doJSON(ctx, http.MethodPost, @@ -831,16 +1002,80 @@ func (c *TuwunelClient) ListJoinedRooms(ctx context.Context, userToken string) ( return resp.JoinedRooms, nil } +func (c *TuwunelClient) SyncMessages(ctx context.Context, since string, timeout time.Duration) (*SyncMessagesResult, error) { + token, err := c.ensureAdminToken(ctx) + if err != nil { + return nil, err + } + q := url.Values{} + q.Set("timeout", fmt.Sprintf("%d", timeout.Milliseconds())) + if since != "" { + q.Set("since", since) + } + path := "/_matrix/client/v3/sync?" + q.Encode() + + var resp struct { + NextBatch string `json:"next_batch"` + Rooms struct { + Join map[string]struct { + Timeline struct { + Events []struct { + Type string `json:"type"` + EventID string `json:"event_id"` + Sender string `json:"sender"` + Content struct { + Mentions struct { + UserIDs []string `json:"user_ids"` + } `json:"m.mentions"` + } `json:"content"` + } `json:"events"` + } `json:"timeline"` + } `json:"join"` + } `json:"rooms"` + } + statusCode, respBody, err := c.doJSON(ctx, http.MethodGet, path, token, nil, &resp) + if err != nil { + return nil, fmt.Errorf("sync messages: %w", err) + } + if statusCode != http.StatusOK { + return nil, fmt.Errorf("sync messages: HTTP %d: %s", statusCode, truncate(respBody, 500)) + } + out := &SyncMessagesResult{NextBatch: resp.NextBatch} + for roomID, room := range resp.Rooms.Join { + for _, event := range room.Timeline.Events { + if event.Type != "m.room.message" || len(event.Content.Mentions.UserIDs) == 0 { + continue + } + out.Events = append(out.Events, MessageEvent{ + RoomID: roomID, + EventID: event.EventID, + Sender: event.Sender, + Mentions: event.Content.Mentions.UserIDs, + }) + } + } + return out, nil +} + // doJSON performs an HTTP request with JSON body/response. // Returns the HTTP status code, the raw response body, and any transport/decode error. // If respOut is nil, the response body is not decoded (but still read and returned). // The raw body is always returned (possibly nil) so callers can include it in // diagnostic error messages even when respOut is set. func (c *TuwunelClient) doJSON(ctx context.Context, method, path, token string, reqBody interface{}, respOut interface{}) (int, []byte, error) { + operation := matrixOperation(method, path) + start := time.Now() + statusCode := 0 + var observeErr error + defer func() { + hiclawmetrics.ObserveUpstream("matrix", operation, start, statusCode, observeErr) + }() + var bodyReader io.Reader if reqBody != nil { data, err := json.Marshal(reqBody) if err != nil { + observeErr = err return 0, nil, fmt.Errorf("marshal request: %w", err) } bodyReader = bytes.NewReader(data) @@ -849,6 +1084,7 @@ func (c *TuwunelClient) doJSON(ctx context.Context, method, path, token string, url := strings.TrimRight(c.config.ServerURL, "/") + path req, err := http.NewRequestWithContext(ctx, method, url, bodyReader) if err != nil { + observeErr = err return 0, nil, err } if reqBody != nil { @@ -860,9 +1096,11 @@ func (c *TuwunelClient) doJSON(ctx context.Context, method, path, token string, resp, err := c.http.Do(req) if err != nil { + observeErr = err return 0, nil, err } defer resp.Body.Close() + statusCode = resp.StatusCode // Clear cached admin token on auth failure so next call re-authenticates if resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden { @@ -873,6 +1111,7 @@ func (c *TuwunelClient) doJSON(ctx context.Context, method, path, token string, if respOut != nil && len(respBody) > 0 { if err := json.Unmarshal(respBody, respOut); err != nil { + observeErr = fmt.Errorf("%w: %w", hiclawmetrics.ErrDecodeResponse, err) return resp.StatusCode, respBody, fmt.Errorf("decode response: %w (body: %s)", err, truncate(respBody, 200)) } } @@ -880,6 +1119,50 @@ func (c *TuwunelClient) doJSON(ctx context.Context, method, path, token string, return resp.StatusCode, respBody, nil } +func matrixOperation(method, path string) string { + pathOnly := path + if idx := strings.IndexByte(pathOnly, '?'); idx >= 0 { + pathOnly = pathOnly[:idx] + } + + switch { + case method == http.MethodPost && pathOnly == "/_matrix/client/v3/register": + return "register_user" + case method == http.MethodPost && pathOnly == "/_matrix/client/v3/login": + return "login" + case method == http.MethodPut && strings.Contains(pathOnly, "/profile/") && strings.HasSuffix(pathOnly, "/displayname"): + return "set_display_name" + case method == http.MethodPost && pathOnly == "/_matrix/client/v3/createRoom": + return "create_room" + case method == http.MethodGet && strings.HasPrefix(pathOnly, "/_matrix/client/v3/directory/room/"): + return "resolve_room_alias" + case method == http.MethodDelete && strings.HasPrefix(pathOnly, "/_matrix/client/v3/directory/room/"): + return "delete_room_alias" + case method == http.MethodPut && strings.Contains(pathOnly, "/state/m.room.name/"): + return "set_room_name" + case method == http.MethodPut && strings.Contains(pathOnly, "/state/"): + return "set_room_state" + case method == http.MethodPost && strings.HasSuffix(pathOnly, "/join"): + return "join_room" + case method == http.MethodPost && strings.HasSuffix(pathOnly, "/leave"): + return "leave_room" + case method == http.MethodPut && strings.Contains(pathOnly, "/send/m.room.message/"): + return "send_message" + case method == http.MethodGet && strings.HasSuffix(pathOnly, "/members"): + return "list_room_members" + case method == http.MethodPost && strings.HasSuffix(pathOnly, "/invite"): + return "invite" + case method == http.MethodPost && strings.HasSuffix(pathOnly, "/kick"): + return "kick" + case method == http.MethodGet && pathOnly == "/_matrix/client/v3/joined_rooms": + return "list_joined_rooms" + case method == http.MethodGet && pathOnly == "/_matrix/client/v3/sync": + return "sync_messages" + default: + return "unknown" + } +} + // encodeRoomID percent-encodes the "!" in room IDs for URL paths. func encodeRoomID(roomID string) string { return strings.ReplaceAll(roomID, "!", "%21") diff --git a/hiclaw-controller/internal/matrix/client_test.go b/hiclaw-controller/internal/matrix/client_test.go index 8fc0804ed..1c04700de 100644 --- a/hiclaw-controller/internal/matrix/client_test.go +++ b/hiclaw-controller/internal/matrix/client_test.go @@ -8,6 +8,9 @@ import ( "sync/atomic" "testing" "time" + + appmetrics "github.com/hiclaw/hiclaw-controller/internal/metrics" + "github.com/prometheus/client_golang/prometheus/testutil" ) func TestEnsureUser_NewRegistration(t *testing.T) { @@ -53,6 +56,76 @@ func TestEnsureUser_NewRegistration(t *testing.T) { } } +func TestMatrixOperationUsesBoundedLabels(t *testing.T) { + tests := []struct { + name string + method string + path string + want string + }{ + { + name: "room state", + method: http.MethodPut, + path: "/_matrix/client/v3/rooms/%21abc%3Ad/state/io.hiclaw.meta/", + want: "set_room_state", + }, + { + name: "send message", + method: http.MethodPut, + path: "/_matrix/client/v3/rooms/%21abc%3Ad/send/m.room.message/hc-123", + want: "send_message", + }, + { + name: "sync query", + method: http.MethodGet, + path: "/_matrix/client/v3/sync?since=s1&timeout=1000", + want: "sync_messages", + }, + { + name: "unknown", + method: http.MethodPatch, + path: "/_matrix/client/v3/rooms/%21abc%3Ad/custom", + want: "unknown", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := matrixOperation(tt.method, tt.path); got != tt.want { + t.Fatalf("matrixOperation() = %q, want %q", got, tt.want) + } + }) + } +} + +func TestDoJSONRecordsUpstreamMetrics(t *testing.T) { + appmetrics.UpstreamRequestDuration.Reset() + appmetrics.UpstreamRequests.Reset() + appmetrics.UpstreamRequestErrors.Reset() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Error(w, `{"errcode":"M_UNKNOWN","error":"boom"}`, http.StatusInternalServerError) + })) + defer server.Close() + + c := NewTuwunelClient(Config{ServerURL: server.URL, Domain: "test.domain"}, server.Client()) + statusCode, _, err := c.doJSON(context.Background(), http.MethodPost, + "/_matrix/client/v3/createRoom", "token", map[string]string{"name": "room"}, nil) + if err != nil { + t.Fatalf("doJSON: %v", err) + } + if statusCode != http.StatusInternalServerError { + t.Fatalf("statusCode = %d, want 500", statusCode) + } + + if got := testutil.ToFloat64(appmetrics.UpstreamRequests.WithLabelValues("matrix", "create_room", "error", "5xx")); got != 1 { + t.Fatalf("upstream_requests_total = %v, want 1", got) + } + if got := testutil.ToFloat64(appmetrics.UpstreamRequestErrors.WithLabelValues("matrix", "create_room", "http")); got != 1 { + t.Fatalf("upstream_request_errors_total = %v, want 1", got) + } +} + func TestEnsureUser_ExistingUser(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { switch r.URL.Path { @@ -171,6 +244,64 @@ func TestCreateRoom(t *testing.T) { } } +func TestCreateRoom_InitialStateAndE2EE(t *testing.T) { + var gotBody map[string]interface{} + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/_matrix/client/v3/createRoom" { + t.Errorf("unexpected path: %s", r.URL.Path) + w.WriteHeader(http.StatusNotFound) + return + } + if err := json.NewDecoder(r.Body).Decode(&gotBody); err != nil { + t.Fatalf("decode body: %v", err) + } + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(map[string]string{"room_id": "!room123:test.domain"}) + })) + defer server.Close() + + c := NewTuwunelClient(Config{ + ServerURL: server.URL, + Domain: "test.domain", + }, server.Client()) + + _, err := c.CreateRoom(context.Background(), CreateRoomRequest{ + Name: "Team: alpha", + InitialState: []StateEvent{{ + Type: "room.meta", + StateKey: "", + Content: map[string]interface{}{ + "roomKind": "team_room", + }, + }}, + E2EE: true, + CreatorToken: "creator-token", + }) + if err != nil { + t.Fatalf("CreateRoom: %v", err) + } + + initialState, ok := gotBody["initial_state"].([]interface{}) + if !ok { + t.Fatalf("initial_state=%T, want []interface{}", gotBody["initial_state"]) + } + if len(initialState) != 2 { + t.Fatalf("initial_state length=%d, want 2", len(initialState)) + } + meta := initialState[0].(map[string]interface{}) + if meta["type"] != "room.meta" { + t.Fatalf("first state type=%v, want room.meta", meta["type"]) + } + content := meta["content"].(map[string]interface{}) + if content["roomKind"] != "team_room" { + t.Fatalf("roomKind=%v, want team_room", content["roomKind"]) + } + encryption := initialState[1].(map[string]interface{}) + if encryption["type"] != "m.room.encryption" { + t.Fatalf("second state type=%v, want m.room.encryption", encryption["type"]) + } +} + func TestCreateRoom_WithAlias(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path != "/_matrix/client/v3/createRoom" { @@ -182,8 +313,8 @@ func TestCreateRoom_WithAlias(t *testing.T) { if err := json.NewDecoder(r.Body).Decode(&body); err != nil { t.Fatalf("decode body: %v", err) } - if body["room_alias_name"] != "hiclaw-worker-alice" { - t.Errorf("room_alias_name = %v, want hiclaw-worker-alice", body["room_alias_name"]) + if body["room_alias_name"] != "agentteams-worker-alice" { + t.Errorf("room_alias_name = %v, want agentteams-worker-alice", body["room_alias_name"]) } w.WriteHeader(http.StatusOK) json.NewEncoder(w).Encode(map[string]string{"room_id": "!new:test.domain"}) @@ -193,7 +324,7 @@ func TestCreateRoom_WithAlias(t *testing.T) { c := NewTuwunelClient(Config{ServerURL: server.URL, Domain: "test.domain"}, server.Client()) info, err := c.CreateRoom(context.Background(), CreateRoomRequest{ Name: "Worker: alice", - RoomAliasName: "hiclaw-worker-alice", + RoomAliasName: "agentteams-worker-alice", CreatorToken: "tok", }) if err != nil { @@ -220,7 +351,7 @@ func TestCreateRoom_AliasInUse_ResolvesExisting(t *testing.T) { "errcode": "M_ROOM_IN_USE", "error": "Room alias already exists.", }) - case "/_matrix/client/v3/directory/room/#hiclaw-worker-alice:test.domain": + case "/_matrix/client/v3/directory/room/#agentteams-worker-alice:test.domain": resolveCalls++ w.WriteHeader(http.StatusOK) json.NewEncoder(w).Encode(map[string]interface{}{ @@ -241,7 +372,7 @@ func TestCreateRoom_AliasInUse_ResolvesExisting(t *testing.T) { info, err := c.CreateRoom(context.Background(), CreateRoomRequest{ Name: "Worker: alice", - RoomAliasName: "hiclaw-worker-alice", + RoomAliasName: "agentteams-worker-alice", }) if err != nil { t.Fatalf("CreateRoom: %v", err) @@ -346,6 +477,77 @@ func TestDeleteRoomAlias_Success(t *testing.T) { } } +func TestSetRoomName(t *testing.T) { + var gotBody map[string]string + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/_matrix/client/v3/login": + adminLoginHandler(t, w) + case "/_matrix/client/v3/rooms/!room:d/state/m.room.name/": + if r.Method != http.MethodPut { + t.Errorf("method = %s, want PUT", r.Method) + } + if auth := r.Header.Get("Authorization"); auth != "Bearer admin-token" { + t.Errorf("Authorization = %q, want Bearer admin-token", auth) + } + if err := json.NewDecoder(r.Body).Decode(&gotBody); err != nil { + t.Errorf("decode body: %v", err) + } + w.WriteHeader(http.StatusOK) + w.Write([]byte("{}")) + default: + t.Errorf("unexpected path: %s", r.URL.Path) + w.WriteHeader(http.StatusNotFound) + } + })) + defer server.Close() + + c := NewTuwunelClient(Config{ + ServerURL: server.URL, Domain: "d", AdminUser: "a", AdminPassword: "p", + }, server.Client()) + if err := c.SetRoomName(context.Background(), "!room:d", "Team: alpha [deleted]", ""); err != nil { + t.Fatalf("SetRoomName: %v", err) + } + if gotBody["name"] != "Team: alpha [deleted]" { + t.Fatalf("name body=%v", gotBody) + } +} + +func TestSetRoomState(t *testing.T) { + var gotBody map[string]interface{} + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/_matrix/client/v3/rooms/!room:d/state/room.meta/": + if r.Method != http.MethodPut { + t.Errorf("method = %s, want PUT", r.Method) + } + if auth := r.Header.Get("Authorization"); auth != "Bearer user-token" { + t.Errorf("Authorization = %q, want Bearer user-token", auth) + } + if err := json.NewDecoder(r.Body).Decode(&gotBody); err != nil { + t.Errorf("decode body: %v", err) + } + w.WriteHeader(http.StatusOK) + w.Write([]byte("{}")) + default: + t.Errorf("unexpected path: %s", r.URL.Path) + w.WriteHeader(http.StatusNotFound) + } + })) + defer server.Close() + + c := NewTuwunelClient(Config{ServerURL: server.URL, Domain: "d"}, server.Client()) + err := c.SetRoomState(context.Background(), "!room:d", "room.meta", "", map[string]interface{}{ + "roomKind": "team_room", + }, "user-token") + if err != nil { + t.Fatalf("SetRoomState: %v", err) + } + if gotBody["roomKind"] != "team_room" { + t.Fatalf("roomKind body=%v", gotBody) + } +} + // adminLoginHandler returns a handler that responds to admin login with a // fixed token, allowing tests that exercise admin-driven endpoints. func adminLoginHandler(t *testing.T, w http.ResponseWriter) { diff --git a/hiclaw-controller/internal/matrix/types.go b/hiclaw-controller/internal/matrix/types.go index 1c8a24a5a..afdd9cd4f 100644 --- a/hiclaw-controller/internal/matrix/types.go +++ b/hiclaw-controller/internal/matrix/types.go @@ -5,7 +5,7 @@ import "crypto/rand" // Config holds connection parameters for a Matrix homeserver. type Config struct { ServerURL string // internal Matrix CS API URL, e.g. http://tuwunel:6167 - Domain string // Matrix domain for user IDs, e.g. matrix-local.hiclaw.io:8080 + Domain string // Matrix domain for user IDs, e.g. matrix-local.agentteams.io:8080 RegistrationToken string // shared registration secret (m.login.registration_token) AdminUser string // global admin username AdminPassword string // global admin password @@ -29,6 +29,11 @@ type Config struct { // when running AppService mode against a shared/existing homeserver so // the as_token cannot impersonate non-HiClaw local users. AppServiceUserNamespaceRegex string + + // AppServicePushURL is the controller HTTP endpoint registered with + // Tuwunel for homeserver → appservice transaction push (mention wakeup). + // When empty, registration omits url (passwordless-only mode). + AppServicePushURL string } // EnsureUserRequest describes a user to register or log in. @@ -45,6 +50,13 @@ type UserCredentials struct { Created bool // true if newly registered, false if existing user logged in } +// StateEvent describes a Matrix state event included in createRoom.initial_state. +type StateEvent struct { + Type string `json:"type"` + StateKey string `json:"state_key"` + Content map[string]interface{} `json:"content"` +} + // CreateRoomRequest describes a new Matrix room. type CreateRoomRequest struct { Name string // human-readable room name @@ -53,6 +65,7 @@ type CreateRoomRequest struct { PowerLevels map[string]int // userID → power level override CreatorToken string // access token of the room creator E2EE bool // add m.room.encryption to initial_state + InitialState []StateEvent // Matrix state events to seed via createRoom.initial_state // IsDirect marks the room as a direct message (1:1) room. IsDirect bool diff --git a/hiclaw-controller/internal/server/appservice_handler.go b/hiclaw-controller/internal/server/appservice_handler.go index 0f8df850f..0fa6cce91 100644 --- a/hiclaw-controller/internal/server/appservice_handler.go +++ b/hiclaw-controller/internal/server/appservice_handler.go @@ -1,76 +1,430 @@ package server import ( + "context" "encoding/json" "fmt" "net/http" + "strings" + "sync" + "time" - "github.com/hiclaw/hiclaw-controller/internal/matrix" + v1beta1 "github.com/hiclaw/hiclaw-controller/api/v1beta1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/retry" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" ) -// AppServiceHandler handles AppService management endpoints. -type AppServiceHandler struct { - matrixCfg matrix.Config +const roleTeamWorker = "worker" + +// AppserviceHandler handles Matrix Application Service transaction pushes +// from the homeserver. Events arrive via HTTP push (PUT /transactions) +// instead of the controller polling /sync. +// +// Security: transactions are authenticated by verifying the hs_token +// supplied at registration time. No K8s auth middleware is involved. +type AppserviceHandler struct { + hsToken string // expected hs_token from homeserver + client client.Client // K8s client + namespace string + now func() time.Time + + mu sync.Mutex + seen map[string]struct{} // event dedup: "roomID/eventID/userID" +} + +// NewAppserviceHandler creates a handler for Matrix appservice transaction pushes. +func NewAppserviceHandler(hsToken string, c client.Client, namespace string) *AppserviceHandler { + return &AppserviceHandler{ + hsToken: hsToken, + client: c, + namespace: namespace, + now: time.Now, + seen: make(map[string]struct{}), + } } -// NewAppServiceHandler creates an AppServiceHandler. -func NewAppServiceHandler(cfg matrix.Config) *AppServiceHandler { - return &AppServiceHandler{matrixCfg: cfg} +// --- Transaction push endpoint --- + +// matrixEvent mirrors the subset of a Matrix event we care about. +type matrixEvent struct { + Type string `json:"type"` + RoomID string `json:"room_id"` + EventID string `json:"event_id"` + Sender string `json:"sender"` + Content struct { + Mentions *struct { + UserIDs []string `json:"user_ids"` + } `json:"m.mentions"` + } `json:"content"` } -// rotateTokenRequest is the JSON body for POST /api/v1/appservice/rotate-token. -type rotateTokenRequest struct { - ASToken string `json:"as_token"` - HSToken string `json:"hs_token"` +type transactionBody struct { + Events []matrixEvent `json:"events"` } -// RotateToken rotates the Matrix AppService as_token (and optionally hs_token). -// It creates a temporary TuwunelClient with the new token, unregisters the old -// registration (via admin command, which does not require the old as_token), -// registers with the new token, and verifies with a smoke test. -// -// NOTE: This only updates the homeserver registration. The caller must also -// update the controller env var / Secret and restart for the new token to -// take effect permanently. -func (h *AppServiceHandler) RotateToken(w http.ResponseWriter, r *http.Request) { - var req rotateTokenRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - http.Error(w, fmt.Sprintf("invalid request body: %v", err), http.StatusBadRequest) +// HandleTransactions handles PUT /_matrix/app/v1/transactions/{txnId}. +// The homeserver pushes batches of events here; we filter for m.room.message +// events with m.mentions, then wake matching sleeping workers. +func (h *AppserviceHandler) HandleTransactions(w http.ResponseWriter, r *http.Request) { + logger := log.FromContext(r.Context()).WithName("appservice") + txnID := txnIDFromPath(r.URL.Path) + + // Authenticate: verify hs_token from Authorization header. + if !h.verifyHSToken(r) { + // Surface as Info (not Error): a 403 here usually means the HS + // reached us with a stale or wrong hs_token after a re-register; + // it's the canonical signal for "network ok but token mismatch". + logger.Info("appservice transaction rejected: invalid hs_token", + "txnID", txnID, "remoteAddr", r.RemoteAddr) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusForbidden) + fmt.Fprint(w, `{"errcode":"M_FORBIDDEN","error":"invalid hs_token"}`) return } - if req.ASToken == "" { - http.Error(w, "as_token is required", http.StatusBadRequest) + + var body transactionBody + if err := json.NewDecoder(r.Body).Decode(&body); err != nil { + logger.Error(err, "failed to decode transaction body", "txnID", txnID) + // Return 200 anyway to avoid infinite retries from homeserver. + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + fmt.Fprint(w, "{}") return } - // Build a new config with the rotated tokens. - newCfg := h.matrixCfg - newCfg.AppServiceToken = req.ASToken - if req.HSToken != "" { - newCfg.AppServiceHSToken = req.HSToken + // V(1) entry log so operators enabling verbose logs can confirm + // Tuwunel → controller push is reaching us at all (the most common + // failure mode is the request never arriving, e.g. wrong appservice + // URL or cross-cluster network not routable). + logger.V(1).Info("appservice transaction received", + "txnID", txnID, "totalEvents", len(body.Events)) + + mentionCount := 0 + for _, event := range body.Events { + if event.Type != "m.room.message" { + continue + } + if event.Content.Mentions == nil || len(event.Content.Mentions.UserIDs) == 0 { + continue + } + mentionCount++ + for _, userID := range event.Content.Mentions.UserIDs { + if err := h.handleMention(r.Context(), event.RoomID, event.EventID, event.Sender, userID); err != nil { + logger.Error(err, "handle mention event", + "txnID", txnID, "roomID", event.RoomID, "eventID", event.EventID, + "sender", event.Sender, "mentionedUser", userID) + } + } } - // Create a temporary client with the new token. - client := matrix.NewTuwunelClient(newCfg, nil) + if mentionCount > 0 { + logger.Info("appservice transaction processed", + "txnID", txnID, + "totalEvents", len(body.Events), "mentionEvents", mentionCount) + } - ctx := r.Context() + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + fmt.Fprint(w, "{}") +} - // Build registration with new tokens and register (includes unregister fallback). - reg := matrix.RenderAppServiceRegistration(newCfg) - if err := client.RegisterAppService(ctx, reg); err != nil { - http.Error(w, fmt.Sprintf("appservice registration failed: %v", err), http.StatusInternalServerError) - return +// txnIDFromPath extracts the trailing path segment from +// /_matrix/app/v1/transactions/{txnId}. Best-effort: returns "" on miss. +func txnIDFromPath(p string) string { + if i := strings.LastIndex(p, "/"); i >= 0 && i < len(p)-1 { + return p[i+1:] } + return "" +} - // Verify the new token works. - if err := client.AppServiceSmokeTest(ctx); err != nil { - http.Error(w, fmt.Sprintf("smoke test failed after rotation: %v", err), http.StatusInternalServerError) - return +// HandleUserQuery handles GET /_matrix/app/v1/users/{userId}. +// We don't manage virtual users; always return empty object. +func (h *AppserviceHandler) HandleUserQuery(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + fmt.Fprint(w, "{}") +} + +// HandleRoomQuery handles GET /_matrix/app/v1/rooms/{roomAlias}. +// We don't manage room aliases; always return empty object. +func (h *AppserviceHandler) HandleRoomQuery(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + fmt.Fprint(w, "{}") +} + +// --- Internal logic --- + +func (h *AppserviceHandler) verifyHSToken(r *http.Request) bool { + auth := r.Header.Get("Authorization") + if !strings.HasPrefix(auth, "Bearer ") { + // Also check query param (some homeserver implementations). + return r.URL.Query().Get("access_token") == h.hsToken } + return strings.TrimPrefix(auth, "Bearer ") == h.hsToken +} + +func (h *AppserviceHandler) handleMention(ctx context.Context, roomID, eventID, sender, userID string) error { + logger := log.FromContext(ctx).WithName("appservice") - resp := map[string]string{ - "message": "appservice token rotated successfully; update your env file / Secret and restart the controller", + // Dedup by roomID/eventID/userID. + if eventID != "" { + key := fmt.Sprintf("%s/%s/%s", roomID, eventID, userID) + h.mu.Lock() + if _, ok := h.seen[key]; ok { + h.mu.Unlock() + logger.V(1).Info("mention skipped: duplicate event", + "roomID", roomID, "eventID", eventID, "mentionedUser", userID) + return nil + } + h.seen[key] = struct{}{} + h.mu.Unlock() } - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(resp) + + logger.V(1).Info("mention dispatch", + "roomID", roomID, "eventID", eventID, + "sender", sender, "mentionedUser", userID) + + if err := h.wakeStandaloneWorker(ctx, roomID, userID); err != nil { + return fmt.Errorf("wake standalone worker: %w", err) + } + if err := h.wakeTeamWorker(ctx, roomID, userID); err != nil { + return fmt.Errorf("wake team worker: %w", err) + } + return nil +} + +// wakeStandaloneWorker wakes a standalone Worker whose MatrixUserID matches +// the mentioned user, provided the mention occurred in the worker's own room. +func (h *AppserviceHandler) wakeStandaloneWorker(ctx context.Context, roomID, userID string) error { + logger := log.FromContext(ctx).WithName("appservice") + + var workers v1beta1.WorkerList + if err := h.client.List(ctx, &workers, client.InNamespace(h.namespace)); err != nil { + logger.Error(err, "list standalone Worker CRs failed", + "namespace", h.namespace, "mentionedUser", userID) + return err + } + var ( + matchedUser int // CRs whose Status.MatrixUserID equals the mentioned user + notSleeping int // matched but already Running/Stopped + roomMismatch int // matched + Sleeping but mention came from foreign room + wokenUp int + ) + for _, worker := range workers.Items { + if worker.Status.MatrixUserID != userID { + continue + } + matchedUser++ + if worker.Spec.DesiredState() != "Sleeping" { + notSleeping++ + logger.V(1).Info("mention skipped: standalone worker not Sleeping", + "worker", worker.Name, "desiredState", worker.Spec.DesiredState()) + continue + } + // Permission: mention must come from worker's own room. + if worker.Status.RoomID != "" && roomID != "" && worker.Status.RoomID != roomID { + roomMismatch++ + logger.V(1).Info("mention rejected: not worker own room", + "worker", worker.Name, "workerRoom", worker.Status.RoomID, + "mentionRoom", roomID) + continue + } + now := h.now().UTC().Format(time.RFC3339) + if err := h.setStandaloneWorkerRunning(ctx, worker.Name, now); err != nil { + logger.Error(err, "set standalone worker Running failed", + "worker", worker.Name, "room", roomID) + return err + } + wokenUp++ + logger.Info("worker woken by mention", + "worker", worker.Name, "room", roomID, "type", "standalone") + } + if matchedUser == 0 { + // No standalone Worker advertises this Matrix user. Could be a + // team worker (handled by wakeTeamWorker next) or a stale mention + // for a deleted CR — verbose only to avoid log spam. + logger.V(1).Info("no standalone worker matched mention", + "mentionedUser", userID, "scanned", len(workers.Items)) + } else if wokenUp == 0 { + logger.V(1).Info("standalone mention had no effect", + "mentionedUser", userID, + "matched", matchedUser, "notSleeping", notSleeping, "roomMismatch", roomMismatch) + } + return nil +} + +// wakeTeamWorker wakes a team worker whose MatrixUserID matches the mentioned +// user. Permission boundary: the mention must occur in either the worker's +// own DM room OR the team's shared room. Mentions from other rooms (e.g. +// another team's room) are rejected — this prevents cross-team wake. +func (h *AppserviceHandler) wakeTeamWorker(ctx context.Context, roomID, userID string) error { + logger := log.FromContext(ctx).WithName("appservice") + + var teams v1beta1.TeamList + if err := h.client.List(ctx, &teams, client.InNamespace(h.namespace)); err != nil { + logger.Error(err, "list Team CRs failed", + "namespace", h.namespace, "mentionedUser", userID) + return err + } + var ( + matchedUser int + notSleeping int + roomMismatch int + noSpec int + wokenUp int + ) + for _, team := range teams.Items { + teamRoomID := team.Status.TeamRoomID + for _, member := range team.Status.Members { + if member.Role != roleTeamWorker || member.MatrixUserID != userID { + continue + } + matchedUser++ + + // === Permission boundary === + // Mention must come from one of: + // 1. member.RoomID — worker's own DM room + // 2. teamRoomID — team's shared room + // Anything else (e.g. another team's room) is rejected. + if roomID != "" { + allowed := false + if member.RoomID != "" && member.RoomID == roomID { + allowed = true + } + if teamRoomID != "" && teamRoomID == roomID { + allowed = true + } + // When both are empty, allow as fallback (bootstrapping). + if member.RoomID == "" && teamRoomID == "" { + allowed = true + } + if !allowed { + roomMismatch++ + logger.V(1).Info("mention rejected: room not in allowed set", + "roomID", roomID, "worker", member.Name, + "team", team.Name, "workerRoom", member.RoomID, + "teamRoom", teamRoomID) + continue + } + } + + if !isDecoupledTeamWorker(&team, member.Name) { + noSpec++ + logger.V(1).Info("mention skipped: team worker ref not found", + "worker", member.Name, "team", team.Name) + continue + } + var worker v1beta1.Worker + if err := h.client.Get(ctx, types.NamespacedName{Name: member.Name, Namespace: h.namespace}, &worker); err != nil { + noSpec++ + logger.V(1).Info("mention skipped: worker CR not found", + "worker", member.Name, "team", team.Name) + continue + } + if worker.Spec.DesiredState() != "Sleeping" { + notSleeping++ + logger.V(1).Info("mention skipped: team worker not Sleeping", + "worker", member.Name, "team", team.Name, + "desiredState", worker.Spec.DesiredState()) + continue + } + now := h.now().UTC().Format(time.RFC3339) + if err := h.setStandaloneWorkerRunning(ctx, member.Name, now); err != nil { + logger.Error(err, "set team worker Running failed", + "team", team.Name, "worker", member.Name, "room", roomID) + return err + } + wokenUp++ + logger.Info("worker woken by mention", + "worker", member.Name, "team", team.Name, + "room", roomID, "type", "team") + } + } + if matchedUser == 0 { + logger.V(1).Info("no team worker matched mention", + "mentionedUser", userID, "scannedTeams", len(teams.Items)) + } else if wokenUp == 0 { + logger.V(1).Info("team mention had no effect", + "mentionedUser", userID, + "matched", matchedUser, "notSleeping", notSleeping, + "roomMismatch", roomMismatch, "noSpec", noSpec) + } + return nil +} + +// --- CR mutation helpers (mirrored from mention_watcher.go) --- + +func (h *AppserviceHandler) setStandaloneWorkerRunning(ctx context.Context, name, lastActiveAt string) error { + logger := log.FromContext(ctx).WithName("appservice") + running := "Running" + specPatched := false + if err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + var worker v1beta1.Worker + if err := h.client.Get(ctx, types.NamespacedName{Name: name, Namespace: h.namespace}, &worker); err != nil { + return client.IgnoreNotFound(err) + } + if worker.Spec.DesiredState() != "Sleeping" { + return nil + } + worker.Spec.State = &running + if err := h.client.Update(ctx, &worker); err != nil { + return err + } + specPatched = true + return nil + }); err != nil { + return err + } + if specPatched { + logger.Info("standalone worker spec.state patched to Running by mention", + "worker", name) + } + if err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + var worker v1beta1.Worker + if err := h.client.Get(ctx, types.NamespacedName{Name: name, Namespace: h.namespace}, &worker); err != nil { + return client.IgnoreNotFound(err) + } + if !isLastActiveNewer(lastActiveAt, worker.Status.LastActiveAt) { + return nil + } + worker.Status.LastActiveAt = lastActiveAt + return h.client.Status().Update(ctx, &worker) + }); err != nil { + logger.Error(err, "update standalone worker status.lastActiveAt failed (non-fatal)", + "worker", name) + return err + } + return nil +} + +// --- Appservice-local helpers --- + +func isDecoupledTeamWorker(team *v1beta1.Team, name string) bool { + for _, ref := range team.Spec.WorkerMembers { + if ref.Name == name { + return ref.Role == "" || ref.Role == roleTeamWorker + } + } + return false +} + +func isLastActiveNewer(next, current string) bool { + if next == "" { + return false + } + if current == "" { + return true + } + nextTime, err := time.Parse(time.RFC3339, next) + if err != nil { + return false + } + currentTime, err := time.Parse(time.RFC3339, current) + if err != nil { + return true + } + return nextTime.After(currentTime) } diff --git a/hiclaw-controller/internal/server/appservice_handler_test.go b/hiclaw-controller/internal/server/appservice_handler_test.go new file mode 100644 index 000000000..64e6f9aee --- /dev/null +++ b/hiclaw-controller/internal/server/appservice_handler_test.go @@ -0,0 +1,529 @@ +package server + +import ( + "bytes" + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + "time" + + v1beta1 "github.com/hiclaw/hiclaw-controller/api/v1beta1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func newAppserviceTestScheme(t *testing.T) *runtime.Scheme { + t.Helper() + scheme := runtime.NewScheme() + if err := v1beta1.AddToScheme(scheme); err != nil { + t.Fatalf("add scheme: %v", err) + } + return scheme +} + +func txnBody(t *testing.T, events []matrixEvent) *bytes.Buffer { + t.Helper() + body := transactionBody{Events: events} + b, err := json.Marshal(body) + if err != nil { + t.Fatalf("marshal: %v", err) + } + return bytes.NewBuffer(b) +} + +func mentionEvent(roomID, eventID, sender string, userIDs []string) matrixEvent { + ev := matrixEvent{ + Type: "m.room.message", + RoomID: roomID, + EventID: eventID, + Sender: sender, + } + ev.Content.Mentions = &struct { + UserIDs []string `json:"user_ids"` + }{UserIDs: userIDs} + return ev +} + +// --- Standalone Worker Tests --- + +func TestAppserviceWakesStandaloneWorkerFromOwnRoom(t *testing.T) { + scheme := newAppserviceTestScheme(t) + sleeping := "Sleeping" + worker := &v1beta1.Worker{ + ObjectMeta: metav1.ObjectMeta{Name: "alpha-dev", Namespace: "default"}, + Spec: v1beta1.WorkerSpec{State: &sleeping}, + Status: v1beta1.WorkerStatus{ + MatrixUserID: "@alpha-dev:example.com", + RoomID: "!worker-dm:example.com", + }, + } + k8sClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithStatusSubresource(&v1beta1.Worker{}). + WithObjects(worker). + Build() + + handler := NewAppserviceHandler("test-hs-token", k8sClient, "default") + handler.now = func() time.Time { + t, _ := time.Parse(time.RFC3339, "2026-05-12T10:20:00Z") + return t + } + + body := txnBody(t, []matrixEvent{ + mentionEvent("!worker-dm:example.com", "$ev1", "@human:example.com", []string{"@alpha-dev:example.com"}), + }) + req := httptest.NewRequest(http.MethodPut, "/_matrix/app/v1/transactions/txn1", body) + req.Header.Set("Authorization", "Bearer test-hs-token") + rec := httptest.NewRecorder() + + handler.HandleTransactions(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", rec.Code, rec.Body.String()) + } + + var updated v1beta1.Worker + if err := k8sClient.Get(context.Background(), client.ObjectKey{Name: "alpha-dev", Namespace: "default"}, &updated); err != nil { + t.Fatalf("get worker: %v", err) + } + if updated.Spec.DesiredState() != "Running" { + t.Fatalf("state=%q, want Running", updated.Spec.DesiredState()) + } + if updated.Status.LastActiveAt != "2026-05-12T10:20:00Z" { + t.Fatalf("lastActiveAt=%q, want updated", updated.Status.LastActiveAt) + } +} + +func TestAppserviceRejectsStandaloneFromWrongRoom(t *testing.T) { + scheme := newAppserviceTestScheme(t) + sleeping := "Sleeping" + worker := &v1beta1.Worker{ + ObjectMeta: metav1.ObjectMeta{Name: "alpha-dev", Namespace: "default"}, + Spec: v1beta1.WorkerSpec{State: &sleeping}, + Status: v1beta1.WorkerStatus{ + MatrixUserID: "@alpha-dev:example.com", + RoomID: "!worker-dm:example.com", + }, + } + k8sClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithStatusSubresource(&v1beta1.Worker{}). + WithObjects(worker). + Build() + + handler := NewAppserviceHandler("test-hs-token", k8sClient, "default") + + body := txnBody(t, []matrixEvent{ + mentionEvent("!other-room:example.com", "$ev2", "@human:example.com", []string{"@alpha-dev:example.com"}), + }) + req := httptest.NewRequest(http.MethodPut, "/_matrix/app/v1/transactions/txn2", body) + req.Header.Set("Authorization", "Bearer test-hs-token") + rec := httptest.NewRecorder() + + handler.HandleTransactions(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d", rec.Code) + } + + var updated v1beta1.Worker + if err := k8sClient.Get(context.Background(), client.ObjectKey{Name: "alpha-dev", Namespace: "default"}, &updated); err != nil { + t.Fatalf("get worker: %v", err) + } + // Should NOT have been woken — still Sleeping. + if updated.Spec.DesiredState() != "Sleeping" { + t.Fatalf("state=%q, want Sleeping (wrong room should be rejected)", updated.Spec.DesiredState()) + } +} + +// --- Team Worker Tests --- + +func TestAppserviceWakesTeamWorkerFromDMRoom(t *testing.T) { + scheme := newAppserviceTestScheme(t) + sleeping := "Sleeping" + worker := &v1beta1.Worker{ + ObjectMeta: metav1.ObjectMeta{Name: "dev", Namespace: "default"}, + Spec: v1beta1.WorkerSpec{State: &sleeping}, + Status: v1beta1.WorkerStatus{ + MatrixUserID: "@dev:example.com", + RoomID: "!dev-dm:example.com", + }, + } + team := &v1beta1.Team{ + ObjectMeta: metav1.ObjectMeta{Name: "alpha", Namespace: "default"}, + Spec: v1beta1.TeamSpec{ + WorkerMembers: []v1beta1.TeamWorkerRef{ + {Name: "lead", Role: "team_leader"}, + {Name: "dev", Role: "worker"}, + }, + }, + Status: v1beta1.TeamStatus{ + TeamRoomID: "!team-room:example.com", + Members: []v1beta1.TeamMemberStatus{{ + Name: "dev", + Role: "worker", + MatrixUserID: "@dev:example.com", + RoomID: "!dev-dm:example.com", + }}, + }, + } + k8sClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithStatusSubresource(&v1beta1.Worker{}, &v1beta1.Team{}). + WithObjects(worker, team). + Build() + + handler := NewAppserviceHandler("test-hs-token", k8sClient, "default") + handler.now = func() time.Time { + t, _ := time.Parse(time.RFC3339, "2026-05-12T10:20:00Z") + return t + } + + // Mention from worker's DM room — should also be allowed. + body := txnBody(t, []matrixEvent{ + mentionEvent("!dev-dm:example.com", "$ev4", "@human:example.com", []string{"@dev:example.com"}), + }) + req := httptest.NewRequest(http.MethodPut, "/_matrix/app/v1/transactions/txn4", body) + req.Header.Set("Authorization", "Bearer test-hs-token") + rec := httptest.NewRecorder() + + handler.HandleTransactions(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d", rec.Code) + } + + var updated v1beta1.Worker + if err := k8sClient.Get(context.Background(), client.ObjectKey{Name: "dev", Namespace: "default"}, &updated); err != nil { + t.Fatalf("get worker: %v", err) + } + if updated.Spec.DesiredState() != "Running" { + t.Fatalf("state=%q, want Running", updated.Spec.DesiredState()) + } +} + +func TestAppserviceWakesDecoupledTeamWorkerFromTeamRoom(t *testing.T) { + scheme := newAppserviceTestScheme(t) + sleeping := "Sleeping" + worker := &v1beta1.Worker{ + ObjectMeta: metav1.ObjectMeta{Name: "dev", Namespace: "default"}, + Spec: v1beta1.WorkerSpec{State: &sleeping}, + Status: v1beta1.WorkerStatus{ + MatrixUserID: "@dev:example.com", + RoomID: "!dev-dm:example.com", + }, + } + team := &v1beta1.Team{ + ObjectMeta: metav1.ObjectMeta{Name: "alpha", Namespace: "default"}, + Spec: v1beta1.TeamSpec{ + WorkerMembers: []v1beta1.TeamWorkerRef{ + {Name: "lead", Role: "team_leader"}, + {Name: "dev", Role: "worker"}, + }, + }, + Status: v1beta1.TeamStatus{ + TeamRoomID: "!team-room:example.com", + Members: []v1beta1.TeamMemberStatus{{ + Name: "dev", + Role: "worker", + MatrixUserID: "@dev:example.com", + RoomID: "!dev-dm:example.com", + }}, + }, + } + k8sClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithStatusSubresource(&v1beta1.Worker{}, &v1beta1.Team{}). + WithObjects(worker, team). + Build() + + handler := NewAppserviceHandler("test-hs-token", k8sClient, "default") + handler.now = func() time.Time { + t, _ := time.Parse(time.RFC3339, "2026-05-12T10:20:00Z") + return t + } + + body := txnBody(t, []matrixEvent{ + mentionEvent("!team-room:example.com", "$ev-decoupled", "@human:example.com", []string{"@dev:example.com"}), + }) + req := httptest.NewRequest(http.MethodPut, "/_matrix/app/v1/transactions/txn-decoupled", body) + req.Header.Set("Authorization", "Bearer test-hs-token") + rec := httptest.NewRecorder() + + handler.HandleTransactions(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d", rec.Code) + } + + var updated v1beta1.Worker + if err := k8sClient.Get(context.Background(), client.ObjectKey{Name: "dev", Namespace: "default"}, &updated); err != nil { + t.Fatalf("get worker: %v", err) + } + if updated.Spec.DesiredState() != "Running" { + t.Fatalf("state=%q, want Running", updated.Spec.DesiredState()) + } + if updated.Status.LastActiveAt != "2026-05-12T10:20:00Z" { + t.Fatalf("lastActiveAt=%q, want updated", updated.Status.LastActiveAt) + } +} + +func TestAppserviceRejectsCrossTeamMention(t *testing.T) { + scheme := newAppserviceTestScheme(t) + sleeping := "Sleeping" + worker := &v1beta1.Worker{ + ObjectMeta: metav1.ObjectMeta{Name: "dev", Namespace: "default"}, + Spec: v1beta1.WorkerSpec{State: &sleeping}, + Status: v1beta1.WorkerStatus{ + MatrixUserID: "@dev:example.com", + RoomID: "!dev-dm:example.com", + }, + } + team := &v1beta1.Team{ + ObjectMeta: metav1.ObjectMeta{Name: "alpha", Namespace: "default"}, + Spec: v1beta1.TeamSpec{ + WorkerMembers: []v1beta1.TeamWorkerRef{ + {Name: "lead", Role: "team_leader"}, + {Name: "dev", Role: "worker"}, + }, + }, + Status: v1beta1.TeamStatus{ + TeamRoomID: "!team-room:example.com", + Members: []v1beta1.TeamMemberStatus{{ + Name: "dev", + Role: "worker", + MatrixUserID: "@dev:example.com", + RoomID: "!dev-dm:example.com", + }}, + }, + } + k8sClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithStatusSubresource(&v1beta1.Worker{}, &v1beta1.Team{}). + WithObjects(worker, team). + Build() + + handler := NewAppserviceHandler("test-hs-token", k8sClient, "default") + + // Mention from a DIFFERENT team's room — should be rejected. + body := txnBody(t, []matrixEvent{ + mentionEvent("!other-team-room:example.com", "$ev5", "@human:example.com", []string{"@dev:example.com"}), + }) + req := httptest.NewRequest(http.MethodPut, "/_matrix/app/v1/transactions/txn5", body) + req.Header.Set("Authorization", "Bearer test-hs-token") + rec := httptest.NewRecorder() + + handler.HandleTransactions(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d", rec.Code) + } + + var updated v1beta1.Worker + if err := k8sClient.Get(context.Background(), client.ObjectKey{Name: "dev", Namespace: "default"}, &updated); err != nil { + t.Fatalf("get worker: %v", err) + } + // Should NOT have been woken — cross-team mention rejected. + if updated.Spec.DesiredState() != "Sleeping" { + t.Fatalf("state=%q, want Sleeping (cross-team should be rejected)", updated.Spec.DesiredState()) + } +} + +// --- Auth Tests --- + +func TestAppserviceRejectsInvalidHSToken(t *testing.T) { + scheme := newAppserviceTestScheme(t) + k8sClient := fake.NewClientBuilder().WithScheme(scheme).Build() + handler := NewAppserviceHandler("correct-token", k8sClient, "default") + + body := txnBody(t, []matrixEvent{ + mentionEvent("!room:example.com", "$ev", "@sender:example.com", []string{"@target:example.com"}), + }) + req := httptest.NewRequest(http.MethodPut, "/_matrix/app/v1/transactions/txn", body) + req.Header.Set("Authorization", "Bearer wrong-token") + rec := httptest.NewRecorder() + + handler.HandleTransactions(rec, req) + + if rec.Code != http.StatusForbidden { + t.Fatalf("expected 403, got %d", rec.Code) + } +} + +func TestAppserviceAcceptsQueryParamToken(t *testing.T) { + scheme := newAppserviceTestScheme(t) + k8sClient := fake.NewClientBuilder().WithScheme(scheme).Build() + handler := NewAppserviceHandler("correct-token", k8sClient, "default") + + body := txnBody(t, []matrixEvent{}) + req := httptest.NewRequest(http.MethodPut, "/_matrix/app/v1/transactions/txn?access_token=correct-token", body) + rec := httptest.NewRecorder() + + handler.HandleTransactions(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200 with query param token, got %d", rec.Code) + } +} + +// --- Dedup Tests --- + +func TestAppserviceDeduplicatesEvents(t *testing.T) { + scheme := newAppserviceTestScheme(t) + sleeping := "Sleeping" + worker := &v1beta1.Worker{ + ObjectMeta: metav1.ObjectMeta{Name: "alpha-dev", Namespace: "default"}, + Spec: v1beta1.WorkerSpec{State: &sleeping}, + Status: v1beta1.WorkerStatus{ + MatrixUserID: "@alpha-dev:example.com", + RoomID: "!worker-dm:example.com", + }, + } + k8sClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithStatusSubresource(&v1beta1.Worker{}). + WithObjects(worker). + Build() + + handler := NewAppserviceHandler("test-hs-token", k8sClient, "default") + handler.now = func() time.Time { + t, _ := time.Parse(time.RFC3339, "2026-05-12T10:20:00Z") + return t + } + + // First request — should wake the worker. + body1 := txnBody(t, []matrixEvent{ + mentionEvent("!worker-dm:example.com", "$same-event", "@human:example.com", []string{"@alpha-dev:example.com"}), + }) + req1 := httptest.NewRequest(http.MethodPut, "/_matrix/app/v1/transactions/txn1", body1) + req1.Header.Set("Authorization", "Bearer test-hs-token") + rec1 := httptest.NewRecorder() + handler.HandleTransactions(rec1, req1) + + var after1 v1beta1.Worker + if err := k8sClient.Get(context.Background(), client.ObjectKey{Name: "alpha-dev", Namespace: "default"}, &after1); err != nil { + t.Fatalf("get worker: %v", err) + } + if after1.Spec.DesiredState() != "Running" { + t.Fatalf("state=%q after first, want Running", after1.Spec.DesiredState()) + } + + // Reset worker back to Sleeping to detect if second request processes. + after1.Spec.State = &sleeping + if err := k8sClient.Update(context.Background(), &after1); err != nil { + t.Fatalf("reset: %v", err) + } + + // Second request with SAME event — should be deduped, worker stays Sleeping. + body2 := txnBody(t, []matrixEvent{ + mentionEvent("!worker-dm:example.com", "$same-event", "@human:example.com", []string{"@alpha-dev:example.com"}), + }) + req2 := httptest.NewRequest(http.MethodPut, "/_matrix/app/v1/transactions/txn2", body2) + req2.Header.Set("Authorization", "Bearer test-hs-token") + rec2 := httptest.NewRecorder() + handler.HandleTransactions(rec2, req2) + + var after2 v1beta1.Worker + if err := k8sClient.Get(context.Background(), client.ObjectKey{Name: "alpha-dev", Namespace: "default"}, &after2); err != nil { + t.Fatalf("get worker: %v", err) + } + if after2.Spec.DesiredState() != "Sleeping" { + t.Fatalf("state=%q after dedup, want Sleeping (should have been skipped)", after2.Spec.DesiredState()) + } +} + +// --- Non-Sleeping Ignore Tests --- + +func TestAppserviceIgnoresRunningWorker(t *testing.T) { + scheme := newAppserviceTestScheme(t) + // Worker is Running — should NOT be modified. + worker := &v1beta1.Worker{ + ObjectMeta: metav1.ObjectMeta{Name: "alpha-dev", Namespace: "default"}, + // State nil → defaults to Running. + Status: v1beta1.WorkerStatus{ + MatrixUserID: "@alpha-dev:example.com", + RoomID: "!worker-dm:example.com", + }, + } + k8sClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithStatusSubresource(&v1beta1.Worker{}). + WithObjects(worker). + Build() + + handler := NewAppserviceHandler("test-hs-token", k8sClient, "default") + + body := txnBody(t, []matrixEvent{ + mentionEvent("!worker-dm:example.com", "$ev", "@human:example.com", []string{"@alpha-dev:example.com"}), + }) + req := httptest.NewRequest(http.MethodPut, "/_matrix/app/v1/transactions/txn", body) + req.Header.Set("Authorization", "Bearer test-hs-token") + rec := httptest.NewRecorder() + + handler.HandleTransactions(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d", rec.Code) + } + + var updated v1beta1.Worker + if err := k8sClient.Get(context.Background(), client.ObjectKey{Name: "alpha-dev", Namespace: "default"}, &updated); err != nil { + t.Fatalf("get worker: %v", err) + } + // State should remain Running (nil). + if updated.Spec.DesiredState() != "Running" { + t.Fatalf("state=%q, want Running (should be unchanged)", updated.Spec.DesiredState()) + } +} + +// --- Non-message Event Filtering --- + +func TestAppserviceIgnoresNonMessageEvents(t *testing.T) { + scheme := newAppserviceTestScheme(t) + sleeping := "Sleeping" + worker := &v1beta1.Worker{ + ObjectMeta: metav1.ObjectMeta{Name: "alpha-dev", Namespace: "default"}, + Spec: v1beta1.WorkerSpec{State: &sleeping}, + Status: v1beta1.WorkerStatus{ + MatrixUserID: "@alpha-dev:example.com", + RoomID: "!worker-dm:example.com", + }, + } + k8sClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithStatusSubresource(&v1beta1.Worker{}). + WithObjects(worker). + Build() + + handler := NewAppserviceHandler("test-hs-token", k8sClient, "default") + + // Send a non-message event type — should be ignored. + ev := matrixEvent{ + Type: "m.room.member", + RoomID: "!worker-dm:example.com", + EventID: "$ev", + Sender: "@human:example.com", + } + body := txnBody(t, []matrixEvent{ev}) + req := httptest.NewRequest(http.MethodPut, "/_matrix/app/v1/transactions/txn", body) + req.Header.Set("Authorization", "Bearer test-hs-token") + rec := httptest.NewRecorder() + + handler.HandleTransactions(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d", rec.Code) + } + + var updated v1beta1.Worker + if err := k8sClient.Get(context.Background(), client.ObjectKey{Name: "alpha-dev", Namespace: "default"}, &updated); err != nil { + t.Fatalf("get worker: %v", err) + } + if updated.Spec.DesiredState() != "Sleeping" { + t.Fatalf("state=%q, want Sleeping (non-message events should be ignored)", updated.Spec.DesiredState()) + } +} diff --git a/hiclaw-controller/internal/server/appservice_mgmt_handler.go b/hiclaw-controller/internal/server/appservice_mgmt_handler.go new file mode 100644 index 000000000..0f8df850f --- /dev/null +++ b/hiclaw-controller/internal/server/appservice_mgmt_handler.go @@ -0,0 +1,76 @@ +package server + +import ( + "encoding/json" + "fmt" + "net/http" + + "github.com/hiclaw/hiclaw-controller/internal/matrix" +) + +// AppServiceHandler handles AppService management endpoints. +type AppServiceHandler struct { + matrixCfg matrix.Config +} + +// NewAppServiceHandler creates an AppServiceHandler. +func NewAppServiceHandler(cfg matrix.Config) *AppServiceHandler { + return &AppServiceHandler{matrixCfg: cfg} +} + +// rotateTokenRequest is the JSON body for POST /api/v1/appservice/rotate-token. +type rotateTokenRequest struct { + ASToken string `json:"as_token"` + HSToken string `json:"hs_token"` +} + +// RotateToken rotates the Matrix AppService as_token (and optionally hs_token). +// It creates a temporary TuwunelClient with the new token, unregisters the old +// registration (via admin command, which does not require the old as_token), +// registers with the new token, and verifies with a smoke test. +// +// NOTE: This only updates the homeserver registration. The caller must also +// update the controller env var / Secret and restart for the new token to +// take effect permanently. +func (h *AppServiceHandler) RotateToken(w http.ResponseWriter, r *http.Request) { + var req rotateTokenRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, fmt.Sprintf("invalid request body: %v", err), http.StatusBadRequest) + return + } + if req.ASToken == "" { + http.Error(w, "as_token is required", http.StatusBadRequest) + return + } + + // Build a new config with the rotated tokens. + newCfg := h.matrixCfg + newCfg.AppServiceToken = req.ASToken + if req.HSToken != "" { + newCfg.AppServiceHSToken = req.HSToken + } + + // Create a temporary client with the new token. + client := matrix.NewTuwunelClient(newCfg, nil) + + ctx := r.Context() + + // Build registration with new tokens and register (includes unregister fallback). + reg := matrix.RenderAppServiceRegistration(newCfg) + if err := client.RegisterAppService(ctx, reg); err != nil { + http.Error(w, fmt.Sprintf("appservice registration failed: %v", err), http.StatusInternalServerError) + return + } + + // Verify the new token works. + if err := client.AppServiceSmokeTest(ctx); err != nil { + http.Error(w, fmt.Sprintf("smoke test failed after rotation: %v", err), http.StatusInternalServerError) + return + } + + resp := map[string]string{ + "message": "appservice token rotated successfully; update your env file / Secret and restart the controller", + } + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(resp) +} diff --git a/hiclaw-controller/internal/service/interfaces.go b/hiclaw-controller/internal/service/interfaces.go index 07828b5ba..b9d8122f0 100644 --- a/hiclaw-controller/internal/service/interfaces.go +++ b/hiclaw-controller/internal/service/interfaces.go @@ -151,6 +151,22 @@ type HumanProvisioner interface { // room management on this reconcile pass. LoginAsHuman(ctx context.Context, username, password string) (string, error) + // RegisterAppServiceUser registers (or logs in to) a Matrix account + // via the AppService API. Returns Created=true on first registration. + RegisterAppServiceUser(ctx context.Context, username string) (*HumanCredentials, error) + + // RegisterLegacyUser registers via the registration_token flow. + RegisterLegacyUser(ctx context.Context, username string) (*HumanCredentials, error) + + // SetUserPassword writes a password for the given user via the admin bot. + SetUserPassword(ctx context.Context, userID, password string) error + + // LoginAppServiceUser obtains a token via AS login (no password). + LoginAppServiceUser(ctx context.Context, username string) (string, error) + + // LoginWithPassword obtains a token via the password login flow. + LoginWithPassword(ctx context.Context, username, password string) (string, error) + // SetDisplayName updates the Matrix profile displayname for the user. // Requires a user-scoped access token. SetDisplayName(ctx context.Context, userID, accessToken, displayName string) error @@ -175,6 +191,10 @@ type HumanProvisioner interface { // of roomID via "!admin users force-leave-room". Fire-and-forget at // the bot layer, but the admin message delivery itself is confirmed. ForceLeaveRoom(ctx context.Context, userID, roomID string) error + + // DeactivateHumanUser disables a Human Matrix account after membership removal. + DeactivateHumanUser(ctx context.Context, userID string) error + MatrixAppServiceEnabled() bool } @@ -185,6 +205,7 @@ type HumanCredentials struct { UserID string AccessToken string Password string + Created bool } // Compile-time interface satisfaction checks. diff --git a/hiclaw-controller/internal/service/provisioner_human.go b/hiclaw-controller/internal/service/provisioner_human.go index 67b537cdd..76069171d 100644 --- a/hiclaw-controller/internal/service/provisioner_human.go +++ b/hiclaw-controller/internal/service/provisioner_human.go @@ -8,43 +8,137 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" ) +// ========================================================================= +// Decomposed primitives — five explicit one-action calls +// ========================================================================= +// +// The original EnsureHumanUser / LoginAsHuman composites bundled +// "register + set password" and "AS-or-password login" into single +// black boxes. That coupling made it impossible to express +// per-identity-type behaviour (a SSO Human must register without ever +// being assigned a password, for example) without growing if/else +// branches inside the composite. The decomposition below splits each +// composite into the smallest semantic unit so callers — both legacy +// reconcile paths and future identity-source implementations — can +// pick exactly the steps they need. +// +// All five methods are pure adapters over internal/matrix; the +// decision about *whether* to invoke a given step lives at the call +// site. + +// RegisterAppServiceUser performs a single AS-register call. When the +// account already exists (M_USER_IN_USE) the underlying client falls +// back to LoginAppServiceUser and reports Created=false. The returned +// HumanCredentials carries an empty Password — the AS protocol does +// not assign one and callers that want password login must follow up +// with SetUserPassword explicitly. +func (p *Provisioner) RegisterAppServiceUser(ctx context.Context, username string) (*HumanCredentials, error) { + uc, err := p.matrix.EnsureAppServiceUser(ctx, username) + if err != nil { + return nil, fmt.Errorf("AS register human %s: %w", username, err) + } + return &HumanCredentials{ + UserID: uc.UserID, + AccessToken: uc.AccessToken, + Password: "", + Created: uc.Created, + }, nil +} + +// RegisterLegacyUser performs a single registration_token-based +// register; on M_USER_IN_USE the underlying client falls through to +// orphan-recovery (admin reset-password + login). The returned +// HumanCredentials always carries a Password since legacy auth has no +// AS bypass. +func (p *Provisioner) RegisterLegacyUser(ctx context.Context, username string) (*HumanCredentials, error) { + uc, err := p.matrix.EnsureUser(ctx, matrix.EnsureUserRequest{Username: username}) + if err != nil { + return nil, fmt.Errorf("register legacy human %s: %w", username, err) + } + return &HumanCredentials{ + UserID: uc.UserID, + AccessToken: uc.AccessToken, + Password: uc.Password, + Created: uc.Created, + }, nil +} + +// SetUserPassword writes a password for an existing Matrix account via +// the admin bot. Best-effort — admin command delivery is confirmed but +// the bot itself executes the reset asynchronously. Callers that must +// confirm propagation are expected to test by attempting a login +// afterwards. +func (p *Provisioner) SetUserPassword(ctx context.Context, userID, password string) error { + return p.matrix.SetPasswordAsAdmin(ctx, userID, password) +} + +// LoginAppServiceUser obtains a fresh access token via the AS login +// flow (no password required). Used by both legacy_password and +// external_sso identity sources when the controller runs in AS mode. +func (p *Provisioner) LoginAppServiceUser(ctx context.Context, username string) (string, error) { + return p.matrix.LoginAppServiceUser(ctx, username) +} + +// LoginWithPassword obtains a fresh access token via the password +// login flow. Used by legacy_password when AS mode is disabled and +// the controller has the user's stored InitialPassword. +func (p *Provisioner) LoginWithPassword(ctx context.Context, username, password string) (string, error) { + return p.matrix.Login(ctx, username, password) +} + +// ========================================================================= +// Composite wrappers retained for incremental migration +// ========================================================================= +// +// EnsureHumanUser and LoginAsHuman remain as backward-compatible +// shims over the new primitives. In-tree callers that need +// per-identity-type behaviour migrate to the primitives directly via +// the humanidentity registry (see internal/controller/humanidentity). +// The wrappers are kept so the WorkerProvisioner / HumanProvisioner +// interface contracts stay stable for the team-admin login path and +// the existing mock-driven tests. +// +// IMPORTANT (P0-2 legacy fix): the AS branch below now calls +// SetUserPassword **only** when RegisterAppServiceUser actually +// created a new account. The previous implementation reset the +// password on every reconcile that hit this method, which would +// silently overwrite any password the user had rotated via Element +// the moment the controller decided to "re-provision". + // EnsureHumanUser registers (or logs in) a Matrix account for a Human CR. // See HumanProvisioner.EnsureHumanUser for the contract around when this -// must be called. This implementation is a thin adapter around -// matrix.Client.EnsureUser — Humans have no persisted WorkerCredentials -// envelope (unlike Workers/Managers), so the caller is responsible for -// recording the returned password in the CR status if needed. +// must be called. This implementation now routes through the explicit +// register / set-password primitives so the "set password" side effect +// is only triggered on first creation. func (p *Provisioner) EnsureHumanUser(ctx context.Context, username string) (*HumanCredentials, error) { if p.MatrixAppServiceEnabled() { - uc, err := p.matrix.EnsureAppServiceUser(ctx, username) + creds, err := p.RegisterAppServiceUser(ctx, username) if err != nil { return nil, fmt.Errorf("ensure human AS user %s: %w", username, err) } - // Set an initial password so the human can log in via Element. - password, err := matrix.GeneratePassword(16) - if err != nil { - return nil, fmt.Errorf("generate human password: %w", err) + // Only assign an initial password on first registration. When + // the account already existed (Created=false) we return the + // AS-issued token without resetting whatever password the user + // may have rotated via Element. + if creds.Created { + password, err := matrix.GeneratePassword(16) + if err != nil { + return nil, fmt.Errorf("generate human password: %w", err) + } + if err := p.SetUserPassword(ctx, creds.UserID, password); err != nil { + return nil, fmt.Errorf("set human password via admin: %w", err) + } + creds.Password = password } - if err := p.matrix.SetPasswordAsAdmin(ctx, uc.UserID, password); err != nil { - return nil, fmt.Errorf("set human password via admin: %w", err) - } - return &HumanCredentials{ - UserID: uc.UserID, - AccessToken: uc.AccessToken, - Password: password, - }, nil + return creds, nil } // Legacy path - uc, err := p.matrix.EnsureUser(ctx, matrix.EnsureUserRequest{Username: username}) + creds, err := p.RegisterLegacyUser(ctx, username) if err != nil { return nil, fmt.Errorf("ensure human matrix user %s: %w", username, err) } - return &HumanCredentials{ - UserID: uc.UserID, - AccessToken: uc.AccessToken, - Password: uc.Password, - }, nil + return creds, nil } // LoginAsHuman obtains a fresh access token for an already-provisioned @@ -55,11 +149,15 @@ func (p *Provisioner) EnsureHumanUser(ctx context.Context, username string) (*Hu // overwrite any password the user changed via Element. func (p *Provisioner) LoginAsHuman(ctx context.Context, username, password string) (string, error) { if p.MatrixAppServiceEnabled() { - return p.matrix.LoginAppServiceUser(ctx, username) + return p.LoginAppServiceUser(ctx, username) } - return p.matrix.Login(ctx, username, password) + return p.LoginWithPassword(ctx, username, password) } +// ========================================================================= +// Other Matrix-side operations Humans need (unchanged) +// ========================================================================= + // SetDisplayName updates the Matrix profile displayname for a human user. func (p *Provisioner) SetDisplayName(ctx context.Context, userID, accessToken, displayName string) error { return p.matrix.SetDisplayName(ctx, userID, accessToken, displayName) @@ -94,3 +192,12 @@ func (p *Provisioner) ForceLeaveRoom(ctx context.Context, userID, roomID string) log.FromContext(ctx).Info("sending tuwunel force-leave-room admin command", "room", roomID, "user", userID, "command", cmd) return p.matrix.AdminCommand(ctx, cmd) } + +// DeactivateHumanUser disables a Matrix account through the Tuwunel admin bot. +// Tuwunel owns the exact deactivate/revoke semantics; the controller treats a +// successful command delivery as the offboard handoff point. +func (p *Provisioner) DeactivateHumanUser(ctx context.Context, userID string) error { + cmd := fmt.Sprintf("!admin users deactivate %s", userID) + log.FromContext(ctx).Info("sending tuwunel human deactivate admin command", "user", userID, "command", cmd) + return p.matrix.AdminCommand(ctx, cmd) +} diff --git a/hiclaw-controller/internal/service/provisioner_team_test.go b/hiclaw-controller/internal/service/provisioner_team_test.go index ae35ba954..4c33d488f 100644 --- a/hiclaw-controller/internal/service/provisioner_team_test.go +++ b/hiclaw-controller/internal/service/provisioner_team_test.go @@ -6,6 +6,7 @@ import ( "reflect" "strings" "testing" + "time" v1beta1 "github.com/hiclaw/hiclaw-controller/api/v1beta1" "github.com/hiclaw/hiclaw-controller/internal/matrix" @@ -63,6 +64,12 @@ func (f *fakeTeamMatrix) ResolveRoomAlias(context.Context, string) (string, bool func (f *fakeTeamMatrix) DeleteRoomAlias(context.Context, string) error { return nil } +func (f *fakeTeamMatrix) SetRoomName(context.Context, string, string, string) error { return nil } + +func (f *fakeTeamMatrix) SetRoomState(context.Context, string, string, string, map[string]interface{}, string) error { + return nil +} + func (f *fakeTeamMatrix) JoinRoom(_ context.Context, roomID, token string) error { f.joins = append(f.joins, roomUserCall{roomID: roomID, userID: token}) return nil @@ -137,6 +144,10 @@ func (f *fakeTeamMatrix) KickFromRoomWithToken(_ context.Context, roomID, userID return nil } +func (f *fakeTeamMatrix) SyncMessages(context.Context, string, time.Duration) (*matrix.SyncMessagesResult, error) { + return &matrix.SyncMessagesResult{}, nil +} + func (f *fakeTeamMatrix) UserID(localpart string) string { return "@" + localpart + ":localhost" } diff --git a/hiclaw-controller/internal/service/room_meta.go b/hiclaw-controller/internal/service/room_meta.go new file mode 100644 index 000000000..0fed6404f --- /dev/null +++ b/hiclaw-controller/internal/service/room_meta.go @@ -0,0 +1,122 @@ +package service + +import ( + v1beta1 "github.com/hiclaw/hiclaw-controller/api/v1beta1" + "github.com/hiclaw/hiclaw-controller/internal/matrix" +) + +const roomMetaEventType = "room.meta" + +func roomMetaState(content map[string]interface{}) []matrix.StateEvent { + return []matrix.StateEvent{{ + Type: roomMetaEventType, + StateKey: "", + Content: content, + }} +} + +func teamRoomMeta(req TeamRoomRequest, teamAdminID, leaderMatrixID string, userIDForName func(string) string) map[string]interface{} { + meta := baseRoomMeta("team_room") + if req.TeamName != "" { + meta["teamName"] = req.TeamName + } + if req.AdminSpec != nil && teamAdminID != "" { + meta["teamAdmin"] = namedUserMeta(teamAdminID, req.AdminSpec.Name) + } + if req.LeaderName != "" && leaderMatrixID != "" { + meta["leaderWorker"] = workerUserMeta(leaderMatrixID, req.LeaderName) + } + if members := humanMemberMeta(req.HumanMembers, userIDForName); len(members) > 0 { + meta["humanMembers"] = members + } + return meta +} + +func leaderDMRoomMeta(req TeamRoomRequest, teamAdminID, leaderMatrixID string) map[string]interface{} { + meta := baseRoomMeta("direct_room") + if req.TeamName != "" { + meta["teamName"] = req.TeamName + } + if req.AdminSpec != nil && teamAdminID != "" { + meta["teamAdmin"] = namedUserMeta(teamAdminID, req.AdminSpec.Name) + } + if req.LeaderName != "" && leaderMatrixID != "" { + meta["leaderWorker"] = workerUserMeta(leaderMatrixID, req.LeaderName) + } + return meta +} + +func workerRoomMeta(req WorkerProvisionRequest, workerMatrixID, leaderMatrixID string) map[string]interface{} { + meta := baseRoomMeta("worker_room") + if req.TeamName != "" { + meta["teamName"] = req.TeamName + } + if req.Name != "" { + meta["workerName"] = req.Name + } + if req.Role == "team_leader" && workerMatrixID != "" { + meta["leaderWorker"] = workerUserMeta(workerMatrixID, req.Name) + } else if req.TeamLeaderName != "" && leaderMatrixID != "" { + meta["leaderWorker"] = workerUserMeta(leaderMatrixID, req.TeamLeaderName) + } + return meta +} + +func managerDMRoomMeta(managerName, managerMatrixID, adminMatrixID, adminName string) map[string]interface{} { + meta := baseRoomMeta("direct_room") + if managerName != "" { + meta["managerName"] = managerName + } + if managerMatrixID != "" { + meta["manager"] = namedUserMeta(managerMatrixID, "manager") + } + if adminMatrixID != "" { + meta["admin"] = namedUserMeta(adminMatrixID, adminName) + } + return meta +} + +func baseRoomMeta(kind string) map[string]interface{} { + return map[string]interface{}{ + "schemaVersion": 1, + "roomKind": kind, + "lifecycle": "persistent", + "createdBy": "hiclaw", + } +} + +func namedUserMeta(userID, name string) map[string]interface{} { + out := map[string]interface{}{"userId": userID} + if name != "" { + out["name"] = name + } + return out +} + +func workerUserMeta(userID, workerName string) map[string]interface{} { + out := map[string]interface{}{"userId": userID} + if workerName != "" { + out["workerName"] = workerName + } + return out +} + +func humanMemberMeta(members []v1beta1.TeamMemberSpec, userIDForName func(string) string) []map[string]interface{} { + out := make([]map[string]interface{}, 0, len(members)) + seen := make(map[string]struct{}, len(members)) + for _, member := range members { + userID := member.MatrixUserID + if userID == "" && member.Name != "" && userIDForName != nil { + userID = userIDForName(member.Name) + } + if userID == "" { + continue + } + if _, ok := seen[userID]; ok { + continue + } + seen[userID] = struct{}{} + out = append(out, namedUserMeta(userID, member.Name)) + } + return out +} diff --git a/hiclaw-controller/test/testutil/mocks/human_provisioner.go b/hiclaw-controller/test/testutil/mocks/human_provisioner.go index cd56765b6..ef416f572 100644 --- a/hiclaw-controller/test/testutil/mocks/human_provisioner.go +++ b/hiclaw-controller/test/testutil/mocks/human_provisioner.go @@ -15,23 +15,44 @@ import ( type MockHumanProvisioner struct { mu sync.Mutex + // Composite shims (kept for the team-admin path and legacy tests). EnsureHumanUserFn func(ctx context.Context, name string) (*service.HumanCredentials, error) LoginAsHumanFn func(ctx context.Context, name, password string) (string, error) - MatrixUserIDFn func(name string) string - InviteToRoomFn func(ctx context.Context, roomID, userID string) error - JoinRoomAsFn func(ctx context.Context, roomID, userToken string) error - KickFromRoomFn func(ctx context.Context, roomID, userID, reason string) error - ForceLeaveRoomFn func(ctx context.Context, userID, roomID string) error - SetDisplayNameFn func(ctx context.Context, userID, accessToken, displayName string) error + + // Decomposed primitives. + RegisterAppServiceUserFn func(ctx context.Context, name string) (*service.HumanCredentials, error) + RegisterLegacyUserFn func(ctx context.Context, name string) (*service.HumanCredentials, error) + SetUserPasswordFn func(ctx context.Context, userID, password string) error + LoginAppServiceUserFn func(ctx context.Context, name string) (string, error) + LoginWithPasswordFn func(ctx context.Context, name, password string) (string, error) + + MatrixUserIDFn func(name string) string + InviteToRoomFn func(ctx context.Context, roomID, userID string) error + JoinRoomAsFn func(ctx context.Context, roomID, userToken string) error + KickFromRoomFn func(ctx context.Context, roomID, userID, reason string) error + ForceLeaveRoomFn func(ctx context.Context, userID, roomID string) error + DeactivateHumanUserFn func(ctx context.Context, userID string) error + SetDisplayNameFn func(ctx context.Context, userID, accessToken, displayName string) error + + // AppServiceEnabled toggles MatrixAppServiceEnabled() — needed by + // the legacy_password identity source to choose between AS and + // password registration paths in tests. + AppServiceEnabled bool Calls struct { - EnsureHumanUser []string - LoginAsHuman []LoginAsHumanCall - SetDisplayName []SetDisplayNameCall - InviteToRoom []RoomMembershipCall - JoinRoomAs []JoinRoomAsCall - KickFromRoom []KickFromRoomCall - ForceLeaveRoom []ForceLeaveRoomCall + EnsureHumanUser []string + LoginAsHuman []LoginAsHumanCall + RegisterAppServiceUser []string + RegisterLegacyUser []string + SetUserPassword []SetUserPasswordCall + LoginAppServiceUser []string + LoginWithPassword []LoginAsHumanCall + SetDisplayName []SetDisplayNameCall + InviteToRoom []RoomMembershipCall + JoinRoomAs []JoinRoomAsCall + KickFromRoom []KickFromRoomCall + ForceLeaveRoom []ForceLeaveRoomCall + DeactivateHumanUser []string } } @@ -41,6 +62,12 @@ type LoginAsHumanCall struct { Password string } +// SetUserPasswordCall records (userID, password) passed to SetUserPassword. +type SetUserPasswordCall struct { + UserID string + Password string +} + // SetDisplayNameCall records SetDisplayName input arguments. type SetDisplayNameCall struct { UserID string @@ -94,12 +121,19 @@ func (m *MockHumanProvisioner) Reset() { m.clearCallsLocked() m.EnsureHumanUserFn = nil m.LoginAsHumanFn = nil + m.RegisterAppServiceUserFn = nil + m.RegisterLegacyUserFn = nil + m.SetUserPasswordFn = nil + m.LoginAppServiceUserFn = nil + m.LoginWithPasswordFn = nil m.MatrixUserIDFn = nil m.InviteToRoomFn = nil m.JoinRoomAsFn = nil m.KickFromRoomFn = nil m.ForceLeaveRoomFn = nil + m.DeactivateHumanUserFn = nil m.SetDisplayNameFn = nil + m.AppServiceEnabled = false } // ClearCalls resets call records only, preserving Fn overrides. @@ -111,13 +145,19 @@ func (m *MockHumanProvisioner) ClearCalls() { func (m *MockHumanProvisioner) clearCallsLocked() { m.Calls = struct { - EnsureHumanUser []string - LoginAsHuman []LoginAsHumanCall - SetDisplayName []SetDisplayNameCall - InviteToRoom []RoomMembershipCall - JoinRoomAs []JoinRoomAsCall - KickFromRoom []KickFromRoomCall - ForceLeaveRoom []ForceLeaveRoomCall + EnsureHumanUser []string + LoginAsHuman []LoginAsHumanCall + RegisterAppServiceUser []string + RegisterLegacyUser []string + SetUserPassword []SetUserPasswordCall + LoginAppServiceUser []string + LoginWithPassword []LoginAsHumanCall + SetDisplayName []SetDisplayNameCall + InviteToRoom []RoomMembershipCall + JoinRoomAs []JoinRoomAsCall + KickFromRoom []KickFromRoomCall + ForceLeaveRoom []ForceLeaveRoomCall + DeactivateHumanUser []string }{} } @@ -133,6 +173,7 @@ func (m *MockHumanProvisioner) EnsureHumanUser(ctx context.Context, name string) UserID: "@" + name + ":localhost", AccessToken: "mock-human-token-" + name, Password: "mock-human-pw-" + name, + Created: true, }, nil } @@ -147,6 +188,71 @@ func (m *MockHumanProvisioner) LoginAsHuman(ctx context.Context, name, password return "mock-human-token-" + name, nil } +func (m *MockHumanProvisioner) RegisterAppServiceUser(ctx context.Context, name string) (*service.HumanCredentials, error) { + m.mu.Lock() + m.Calls.RegisterAppServiceUser = append(m.Calls.RegisterAppServiceUser, name) + fn := m.RegisterAppServiceUserFn + m.mu.Unlock() + if fn != nil { + return fn(ctx, name) + } + return &service.HumanCredentials{ + UserID: "@" + name + ":localhost", + AccessToken: "mock-as-token-" + name, + Password: "", + Created: true, + }, nil +} + +func (m *MockHumanProvisioner) RegisterLegacyUser(ctx context.Context, name string) (*service.HumanCredentials, error) { + m.mu.Lock() + m.Calls.RegisterLegacyUser = append(m.Calls.RegisterLegacyUser, name) + fn := m.RegisterLegacyUserFn + m.mu.Unlock() + if fn != nil { + return fn(ctx, name) + } + return &service.HumanCredentials{ + UserID: "@" + name + ":localhost", + AccessToken: "mock-legacy-token-" + name, + Password: "mock-human-pw-" + name, + Created: true, + }, nil +} + +func (m *MockHumanProvisioner) SetUserPassword(ctx context.Context, userID, password string) error { + m.mu.Lock() + m.Calls.SetUserPassword = append(m.Calls.SetUserPassword, SetUserPasswordCall{UserID: userID, Password: password}) + fn := m.SetUserPasswordFn + m.mu.Unlock() + if fn != nil { + return fn(ctx, userID, password) + } + return nil +} + +func (m *MockHumanProvisioner) LoginAppServiceUser(ctx context.Context, name string) (string, error) { + m.mu.Lock() + m.Calls.LoginAppServiceUser = append(m.Calls.LoginAppServiceUser, name) + fn := m.LoginAppServiceUserFn + m.mu.Unlock() + if fn != nil { + return fn(ctx, name) + } + return "mock-as-token-" + name, nil +} + +func (m *MockHumanProvisioner) LoginWithPassword(ctx context.Context, name, password string) (string, error) { + m.mu.Lock() + m.Calls.LoginWithPassword = append(m.Calls.LoginWithPassword, LoginAsHumanCall{Name: name, Password: password}) + fn := m.LoginWithPasswordFn + m.mu.Unlock() + if fn != nil { + return fn(ctx, name, password) + } + return "mock-pw-token-" + name, nil +} + func (m *MockHumanProvisioner) MatrixUserID(name string) string { m.mu.Lock() fn := m.MatrixUserIDFn @@ -212,8 +318,19 @@ func (m *MockHumanProvisioner) ForceLeaveRoom(ctx context.Context, userID, roomI return nil } +func (m *MockHumanProvisioner) DeactivateHumanUser(ctx context.Context, userID string) error { + m.mu.Lock() + m.Calls.DeactivateHumanUser = append(m.Calls.DeactivateHumanUser, userID) + fn := m.DeactivateHumanUserFn + m.mu.Unlock() + if fn != nil { + return fn(ctx, userID) + } + return nil +} + func (m *MockHumanProvisioner) MatrixAppServiceEnabled() bool { - return false + return m.AppServiceEnabled } // Compile-time interface satisfaction check. From 66c151a0535900c127b3b510050110aa29accada Mon Sep 17 00:00:00 2001 From: shiyiyue1102 Date: Sat, 4 Jul 2026 21:05:54 +0800 Subject: [PATCH 2/4] test: wait for human status in integration Change-Id: I7abd7064cb2832e261151bfff596c91b7b78aa45 --- tests/test-19-human-and-team-admin.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/test-19-human-and-team-admin.sh b/tests/test-19-human-and-team-admin.sh index e768a9b6a..feedd873d 100644 --- a/tests/test-19-human-and-team-admin.sh +++ b/tests/test-19-human-and-team-admin.sh @@ -99,8 +99,13 @@ assert_eq "${TEST_HUMAN}" "${HUMAN_NAME_CHK}" "Human CR has correct name" log_info "Waiting for controller to reconcile Human..." HUMAN_TIMEOUT=90; HUMAN_ELAPSED=0 HUMAN_CREATED=false +HUMAN_PHASE="" +HUMAN_STATUS_MXID="" while [ "${HUMAN_ELAPSED}" -lt "${HUMAN_TIMEOUT}" ]; do - if exec_in_manager cat /var/log/hiclaw/hiclaw-controller-error.log 2>/dev/null | grep -q "human created.*${TEST_HUMAN}"; then + HUMAN_STATUS=$(exec_in_agent hiclaw get humans "${TEST_HUMAN}" -o json 2>/dev/null || echo "{}") + HUMAN_PHASE=$(echo "${HUMAN_STATUS}" | jq -r '.phase // empty' 2>/dev/null) + HUMAN_STATUS_MXID=$(echo "${HUMAN_STATUS}" | jq -r '.matrixUserID // empty' 2>/dev/null) + if [ "${HUMAN_PHASE}" = "Active" ] && [ -n "${HUMAN_STATUS_MXID}" ]; then HUMAN_CREATED=true break fi @@ -111,6 +116,7 @@ if [ "${HUMAN_CREATED}" = true ]; then log_pass "HumanReconciler created human (took ~${HUMAN_ELAPSED}s)" else log_fail "HumanReconciler did not create human within ${HUMAN_TIMEOUT}s" + log_info "Last Human status: phase='${HUMAN_PHASE}' matrixUserID='${HUMAN_STATUS_MXID}'" exec_in_manager cat /var/log/hiclaw/hiclaw-controller-error.log 2>/dev/null | grep "${TEST_HUMAN}" | tail -5 fi From f131add9f767840afd58b593c09af90346bf4189 Mon Sep 17 00:00:00 2001 From: shiyiyue1102 Date: Sat, 4 Jul 2026 23:09:35 +0800 Subject: [PATCH 3/4] fix(controller): wire sso admin and appservice push Change-Id: Ia3dc013401cc1dde8d11f8c48832ca853c9d299a --- changelog/current.md | 1 + hiclaw-controller/internal/app/app.go | 1 + hiclaw-controller/internal/config/config.go | 12 + .../internal/config/config_test.go | 10 + .../internal/controller/team_controller.go | 28 ++- .../controller/team_controller_test.go | 61 +++++ .../internal/initializer/initializer.go | 2 + .../server/appservice_handler_test.go | 21 ++ hiclaw-controller/internal/server/http.go | 6 + .../test/testutil/mocks/provisioner.go | 212 +++++++++++++++++- 10 files changed, 348 insertions(+), 6 deletions(-) diff --git a/changelog/current.md b/changelog/current.md index 88faa0463..ba6227ade 100644 --- a/changelog/current.md +++ b/changelog/current.md @@ -34,3 +34,4 @@ Record image-affecting changes to `manager/`, `worker/`, `copaw/`, `openclaw-bas - **Remote Worker applied target auth**: Remote Worker authentication now prefers the status-pinned deployment target and falls back to spec only before first provisioning, so spec target edits do not immediately break the running remote Worker or trust a target before it is applied. - **Remote Worker lifecycle boundary**: Workers now record the applied deployment target in status, reject running target changes until the Worker is Stopped, clean up using the applied target, and register remote Pod watches for Worker/Team status updates. - **Team Worker CR decoupling**: Worker identity enrichment and Worker REST APIs now resolve `spec.workerMembers` references, and Teams reject sharing the same referenced Worker CR before injecting coordination context. +- **Matrix AppService integration**: SSO Human Team admins now resolve through the Human identity source, and Matrix AppService transaction push routes are wired into the controller registration path. diff --git a/hiclaw-controller/internal/app/app.go b/hiclaw-controller/internal/app/app.go index 982ce7b61..a6f4e15f1 100644 --- a/hiclaw-controller/internal/app/app.go +++ b/hiclaw-controller/internal/app/app.go @@ -199,6 +199,7 @@ func (a *App) Start(ctx context.Context) error { AppServiceToken: a.cfg.MatrixAppServiceASToken, AppServiceHSToken: a.cfg.MatrixAppServiceHSToken, AppServiceSenderLocalpart: a.cfg.MatrixAppServiceSenderLocalpart, + AppServicePushURL: a.cfg.MatrixAppServicePushURL, MatrixDomain: a.cfg.MatrixDomain, }, } diff --git a/hiclaw-controller/internal/config/config.go b/hiclaw-controller/internal/config/config.go index e17bbb930..ff756561c 100644 --- a/hiclaw-controller/internal/config/config.go +++ b/hiclaw-controller/internal/config/config.go @@ -9,6 +9,7 @@ import ( "os" "path/filepath" "strconv" + "strings" v1beta1 "github.com/hiclaw/hiclaw-controller/api/v1beta1" "github.com/hiclaw/hiclaw-controller/internal/agentconfig" @@ -144,6 +145,7 @@ type Config struct { MatrixAppServiceHSToken string MatrixAppServiceSenderLocalpart string MatrixAppServiceUserNamespaceRegex string + MatrixAppServicePushURL string // Auto-generation tracking (not exported to env / child containers) MatrixAppServiceASTokenAutoGenerated bool `json:"-"` @@ -420,6 +422,7 @@ func LoadConfig() *Config { // Tokens must be provided via env vars (set by install script or manually). // We do NOT auto-generate at runtime to prevent token drift across restarts. if cfg.MatrixAppServiceEnabled { + cfg.MatrixAppServicePushURL = appServicePushURL(cfg.ControllerURL) if cfg.MatrixAppServiceASToken == "" { panic("HICLAW_MATRIX_APPSERVICE_AS_TOKEN is required when AppService mode is enabled; run install script or set env var") } @@ -700,9 +703,18 @@ func (c *Config) MatrixConfig() matrix.Config { AppServiceHSToken: c.MatrixAppServiceHSToken, AppServiceSenderLocalpart: c.MatrixAppServiceSenderLocalpart, AppServiceUserNamespaceRegex: c.MatrixAppServiceUserNamespaceRegex, + AppServicePushURL: c.MatrixAppServicePushURL, } } +func appServicePushURL(controllerURL string) string { + controllerURL = strings.TrimRight(strings.TrimSpace(controllerURL), "/") + if controllerURL == "" { + return "" + } + return controllerURL + "/_matrix/app/v1" +} + func (c *Config) GatewayConfig() gateway.Config { return gateway.Config{ ConsoleURL: c.HigressBaseURL, diff --git a/hiclaw-controller/internal/config/config_test.go b/hiclaw-controller/internal/config/config_test.go index 0321eadc1..a297912da 100644 --- a/hiclaw-controller/internal/config/config_test.go +++ b/hiclaw-controller/internal/config/config_test.go @@ -179,6 +179,16 @@ func TestLoadConfigPrefersAbstractInfraEnv(t *testing.T) { } } +func TestMatrixConfigIncludesAppServicePushURL(t *testing.T) { + cfg := &Config{ + MatrixAppServicePushURL: appServicePushURL("http://controller.example.com:8090/"), + } + + if got, want := cfg.MatrixConfig().AppServicePushURL, "http://controller.example.com:8090/_matrix/app/v1"; got != want { + t.Fatalf("AppServicePushURL = %q, want %q", got, want) + } +} + func TestLoadConfigUsesSharedAdminCredentialsForHigress(t *testing.T) { t.Setenv("HICLAW_ADMIN_USER", "shared-admin") t.Setenv("HICLAW_ADMIN_PASSWORD", "shared-secret") diff --git a/hiclaw-controller/internal/controller/team_controller.go b/hiclaw-controller/internal/controller/team_controller.go index 830403253..9563644ed 100644 --- a/hiclaw-controller/internal/controller/team_controller.go +++ b/hiclaw-controller/internal/controller/team_controller.go @@ -13,6 +13,7 @@ import ( "github.com/hiclaw/hiclaw-controller/internal/agentconfig" "github.com/hiclaw/hiclaw-controller/internal/auth" "github.com/hiclaw/hiclaw-controller/internal/backend" + "github.com/hiclaw/hiclaw-controller/internal/controller/humanidentity" "github.com/hiclaw/hiclaw-controller/internal/executor" "github.com/hiclaw/hiclaw-controller/internal/gateway" "github.com/hiclaw/hiclaw-controller/internal/metrics" @@ -137,25 +138,42 @@ func (r *TeamReconciler) resolveTeamAdminActor(ctx context.Context, t *v1beta1.T return teamAdminActor{}, fmt.Errorf("load team admin human %s/%s: %w", key.Namespace, key.Name, err) } - username := human.Spec.EffectiveUsername(human.Name) - matrixUserID := r.Provisioner.MatrixUserID(username) + humanProv, ok := r.Provisioner.(service.HumanProvisioner) + if !ok { + return teamAdminActor{}, fmt.Errorf("team admin human %s/%s requires HumanProvisioner support", key.Namespace, key.Name) + } + identity, err := humanidentity.ResolveHuman(&human.Spec, human.Name, humanidentity.Deps{Provisioner: humanProv}) + if err != nil { + return teamAdminActor{}, fmt.Errorf("resolve team admin human %s/%s identity: %w", key.Namespace, key.Name, err) + } + matrixUserID := human.Status.MatrixUserID + if matrixUserID == "" { + matrixUserID = identity.MatrixUserID + } + if matrixUserID != identity.MatrixUserID { + return teamAdminActor{}, fmt.Errorf("team admin human %s/%s status.matrixUserID %q does not match resolved identity %q", + key.Namespace, key.Name, matrixUserID, identity.MatrixUserID) + } if t.Spec.Admin.MatrixUserID != "" && t.Spec.Admin.MatrixUserID != matrixUserID { return teamAdminActor{}, fmt.Errorf("team admin matrixUserId %q does not match Human %s/%s matrix user %q", t.Spec.Admin.MatrixUserID, key.Namespace, key.Name, matrixUserID) } - if !r.Provisioner.MatrixAppServiceEnabled() && human.Status.InitialPassword == "" { + if identity.ManagesInitialPassword && !r.Provisioner.MatrixAppServiceEnabled() && human.Status.InitialPassword == "" { return teamAdminActor{}, fmt.Errorf("team admin human %s/%s has no initial password; cannot obtain Matrix token", key.Namespace, key.Name) } - token, err := r.Provisioner.LoginAsHuman(ctx, username, human.Status.InitialPassword) + token, err := identity.Source.EnsureUserToken(ctx, &human.Spec, &human.Status, human.Name) if err != nil { return teamAdminActor{}, fmt.Errorf("login as team admin human %s/%s: %w", key.Namespace, key.Name, err) } + if token == "" { + return teamAdminActor{}, fmt.Errorf("team admin human %s/%s has no Matrix token", key.Namespace, key.Name) + } return teamAdminActor{ MatrixUserID: matrixUserID, Token: token, - Username: username, + Username: identity.MatrixLocalpart, }, nil } diff --git a/hiclaw-controller/internal/controller/team_controller_test.go b/hiclaw-controller/internal/controller/team_controller_test.go index 19ec3b3fe..83d30d5a3 100644 --- a/hiclaw-controller/internal/controller/team_controller_test.go +++ b/hiclaw-controller/internal/controller/team_controller_test.go @@ -2,6 +2,8 @@ package controller import ( "context" + "crypto/sha256" + "encoding/hex" "encoding/json" "testing" @@ -324,6 +326,65 @@ func TestReconcileMemberInfraUsesCRNameForCredentialKey(t *testing.T) { } } +func TestResolveTeamAdminActor_ExternalSSOHumanUsesResolvedIdentity(t *testing.T) { + issuer := "https://sso.example.com" + subject := "user-123" + localpart := testSSOLocalpart(issuer, subject) + matrixUserID := "@" + localpart + ":localhost" + human := &v1beta1.Human{ + ObjectMeta: metav1.ObjectMeta{Name: "alice", Namespace: "default"}, + Spec: v1beta1.HumanSpec{ + Username: "legacy-alice", + IdentitySource: &v1beta1.IdentitySourceSpec{ + Issuer: issuer, + Subject: subject, + }, + }, + Status: v1beta1.HumanStatus{ + Phase: "Active", + MatrixUserID: matrixUserID, + }, + } + prov := mocks.NewMockProvisioner() + prov.AppServiceEnabled = true + r := &TeamReconciler{ + Client: newTeamTestClient(t, human), + Provisioner: prov, + } + team := &v1beta1.Team{ + ObjectMeta: metav1.ObjectMeta{Name: "team-a", Namespace: "default"}, + Spec: v1beta1.TeamSpec{ + Admin: &v1beta1.TeamAdminSpec{Name: "alice", MatrixUserID: matrixUserID}, + }, + } + + actor, err := r.resolveTeamAdminActor(context.Background(), team) + if err != nil { + t.Fatalf("resolveTeamAdminActor: %v", err) + } + if actor.MatrixUserID != matrixUserID { + t.Fatalf("MatrixUserID=%q, want %q", actor.MatrixUserID, matrixUserID) + } + if actor.Username != localpart { + t.Fatalf("Username=%q, want resolved SSO localpart %q", actor.Username, localpart) + } + if actor.Token != "mock-as-token-"+localpart { + t.Fatalf("Token=%q, want AppService token for resolved SSO localpart", actor.Token) + } + if len(prov.Calls.LoginAppServiceUser) != 1 || prov.Calls.LoginAppServiceUser[0] != localpart { + t.Fatalf("LoginAppServiceUser calls=%v, want [%s]", prov.Calls.LoginAppServiceUser, localpart) + } + if len(prov.Calls.LoginAsHuman) != 0 || len(prov.Calls.LoginWithPassword) != 0 { + t.Fatalf("legacy login must not be used for SSO admin, LoginAsHuman=%v LoginWithPassword=%v", + prov.Calls.LoginAsHuman, prov.Calls.LoginWithPassword) + } +} + +func testSSOLocalpart(issuer, subject string) string { + digest := sha256.Sum256([]byte(issuer + "\x00" + subject)) + return hex.EncodeToString(digest[:16]) +} + func TestReconcileMemberRefreshUsesCRNameCredentialAndRuntimeMatrixName(t *testing.T) { prov := mocks.NewMockProvisioner() state := &MemberState{} diff --git a/hiclaw-controller/internal/initializer/initializer.go b/hiclaw-controller/internal/initializer/initializer.go index 28a05aeec..7f2b17ccf 100644 --- a/hiclaw-controller/internal/initializer/initializer.go +++ b/hiclaw-controller/internal/initializer/initializer.go @@ -41,6 +41,7 @@ type Config struct { AppServiceToken string AppServiceHSToken string AppServiceSenderLocalpart string + AppServicePushURL string MatrixDomain string // needed for AS registration YAML // Provider selection — drives which initialization steps run. @@ -221,6 +222,7 @@ func (i *Initializer) registerAppService(ctx context.Context) error { AppServiceToken: i.Config.AppServiceToken, AppServiceHSToken: i.Config.AppServiceHSToken, AppServiceSenderLocalpart: i.Config.AppServiceSenderLocalpart, + AppServicePushURL: i.Config.AppServicePushURL, } reg := matrix.RenderAppServiceRegistration(cfg) if err := i.Matrix.RegisterAppService(ctx, reg); err != nil { diff --git a/hiclaw-controller/internal/server/appservice_handler_test.go b/hiclaw-controller/internal/server/appservice_handler_test.go index 64e6f9aee..dd908df6d 100644 --- a/hiclaw-controller/internal/server/appservice_handler_test.go +++ b/hiclaw-controller/internal/server/appservice_handler_test.go @@ -10,6 +10,8 @@ import ( "time" v1beta1 "github.com/hiclaw/hiclaw-controller/api/v1beta1" + authpkg "github.com/hiclaw/hiclaw-controller/internal/auth" + "github.com/hiclaw/hiclaw-controller/internal/matrix" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -48,6 +50,25 @@ func mentionEvent(roomID, eventID, sender string, userIDs []string) matrixEvent return ev } +func TestNewHTTPServerRegistersAppserviceTransactionRoute(t *testing.T) { + k8s := fake.NewClientBuilder().WithScheme(newAppserviceTestScheme(t)).Build() + srv := NewHTTPServer(":0", ServerDeps{ + Client: k8s, + Namespace: "default", + AuthMw: authpkg.NewMiddleware(nil, nil, nil, nil, ""), + MatrixConfig: matrix.Config{AppServiceEnabled: true, AppServiceHSToken: "correct-token"}, + }) + + req := httptest.NewRequest(http.MethodPut, "/_matrix/app/v1/transactions/txn-from-mux", txnBody(t, nil)) + req.Header.Set("Authorization", "Bearer correct-token") + rec := httptest.NewRecorder() + srv.Mux.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("status=%d body=%q, want 200 from mounted AppService transaction route", rec.Code, rec.Body.String()) + } +} + // --- Standalone Worker Tests --- func TestAppserviceWakesStandaloneWorkerFromOwnRoom(t *testing.T) { diff --git a/hiclaw-controller/internal/server/http.go b/hiclaw-controller/internal/server/http.go index 684031bec..edefad95c 100644 --- a/hiclaw-controller/internal/server/http.go +++ b/hiclaw-controller/internal/server/http.go @@ -119,6 +119,12 @@ func NewHTTPServer(addr string, deps ServerDeps) *HTTPServer { // --- AppService management --- ash := NewAppServiceHandler(deps.MatrixConfig) mux.Handle("POST /api/v1/appservice/rotate-token", mw.RequireAuthz(authpkg.ActionUpdate, "appservice", nil)(http.HandlerFunc(ash.RotateToken))) + if deps.MatrixConfig.AppServiceEnabled && deps.MatrixConfig.AppServiceHSToken != "" { + asEvents := NewAppserviceHandler(deps.MatrixConfig.AppServiceHSToken, deps.Client, deps.Namespace) + mux.Handle("PUT /_matrix/app/v1/transactions/{txnId}", http.HandlerFunc(asEvents.HandleTransactions)) + mux.Handle("GET /_matrix/app/v1/users/{userId}", http.HandlerFunc(asEvents.HandleUserQuery)) + mux.Handle("GET /_matrix/app/v1/rooms/{roomAlias}", http.HandlerFunc(asEvents.HandleRoomQuery)) + } // --- Docker API passthrough (embedded mode only) --- if deps.KubeMode == "embedded" && deps.SocketPath != "" { diff --git a/hiclaw-controller/test/testutil/mocks/provisioner.go b/hiclaw-controller/test/testutil/mocks/provisioner.go index 5cc7b3b6e..8040b9fd3 100644 --- a/hiclaw-controller/test/testutil/mocks/provisioner.go +++ b/hiclaw-controller/test/testutil/mocks/provisioner.go @@ -29,9 +29,22 @@ type MockProvisioner struct { DeleteWorkerRoomFn func(ctx context.Context, roomID string) error MatrixUserIDFn func(name string) string LoginAsHumanFn func(ctx context.Context, username, password string) (string, error) + EnsureHumanUserFn func(ctx context.Context, name string) (*service.HumanCredentials, error) + RegisterAppServiceUserFn func(ctx context.Context, name string) (*service.HumanCredentials, error) + RegisterLegacyUserFn func(ctx context.Context, name string) (*service.HumanCredentials, error) + SetUserPasswordFn func(ctx context.Context, userID, password string) error + LoginAppServiceUserFn func(ctx context.Context, name string) (string, error) + LoginWithPasswordFn func(ctx context.Context, name, password string) (string, error) + SetDisplayNameFn func(ctx context.Context, userID, accessToken, displayName string) error + InviteToRoomFn func(ctx context.Context, roomID, userID string) error + JoinRoomAsFn func(ctx context.Context, roomID, userToken string) error + KickFromRoomFn func(ctx context.Context, roomID, userID, reason string) error + ForceLeaveRoomFn func(ctx context.Context, userID, roomID string) error + DeactivateHumanUserFn func(ctx context.Context, userID string) error ProvisionTeamRoomsFn func(ctx context.Context, req service.TeamRoomRequest) (*service.TeamRoomResult, error) DeleteTeamRoomAliasesFn func(ctx context.Context, teamName, leaderName string) error DeleteWorkerRoomAliasFn func(ctx context.Context, workerName string) error + AppServiceEnabled bool Calls struct { ProvisionWorker []service.WorkerProvisionRequest @@ -50,6 +63,18 @@ type MockProvisioner struct { LeaveAllWorkerRooms []string DeleteWorkerRoom []string LoginAsHuman []humanLoginCall + EnsureHumanUser []string + RegisterAppServiceUser []string + RegisterLegacyUser []string + SetUserPassword []userPasswordCall + LoginAppServiceUser []string + LoginWithPassword []humanLoginCall + SetDisplayName []displayNameCall + InviteToRoom []roomMembershipCall + JoinRoomAs []joinRoomAsCall + KickFromRoom []kickFromRoomCall + ForceLeaveRoom []roomMembershipCall + DeactivateHumanUser []string ProvisionTeamRooms []service.TeamRoomRequest DeleteTeamRoomAliases []string DeleteWorkerRoomAlias []string @@ -71,6 +96,33 @@ type humanLoginCall struct { Password string } +type userPasswordCall struct { + UserID string + Password string +} + +type displayNameCall struct { + UserID string + AccessToken string + DisplayName string +} + +type roomMembershipCall struct { + RoomID string + UserID string +} + +type joinRoomAsCall struct { + RoomID string + UserToken string +} + +type kickFromRoomCall struct { + RoomID string + UserID string + Reason string +} + func NewMockProvisioner() *MockProvisioner { return &MockProvisioner{} } @@ -97,9 +149,22 @@ func (m *MockProvisioner) Reset() { m.DeleteWorkerRoomFn = nil m.MatrixUserIDFn = nil m.LoginAsHumanFn = nil + m.EnsureHumanUserFn = nil + m.RegisterAppServiceUserFn = nil + m.RegisterLegacyUserFn = nil + m.SetUserPasswordFn = nil + m.LoginAppServiceUserFn = nil + m.LoginWithPasswordFn = nil + m.SetDisplayNameFn = nil + m.InviteToRoomFn = nil + m.JoinRoomAsFn = nil + m.KickFromRoomFn = nil + m.ForceLeaveRoomFn = nil + m.DeactivateHumanUserFn = nil m.ProvisionTeamRoomsFn = nil m.DeleteTeamRoomAliasesFn = nil m.DeleteWorkerRoomAliasFn = nil + m.AppServiceEnabled = false } // ClearCalls resets call records only, preserving Fn overrides. @@ -127,6 +192,18 @@ func (m *MockProvisioner) clearCallsLocked() { LeaveAllWorkerRooms []string DeleteWorkerRoom []string LoginAsHuman []humanLoginCall + EnsureHumanUser []string + RegisterAppServiceUser []string + RegisterLegacyUser []string + SetUserPassword []userPasswordCall + LoginAppServiceUser []string + LoginWithPassword []humanLoginCall + SetDisplayName []displayNameCall + InviteToRoom []roomMembershipCall + JoinRoomAs []joinRoomAsCall + KickFromRoom []kickFromRoomCall + ForceLeaveRoom []roomMembershipCall + DeactivateHumanUser []string ProvisionTeamRooms []service.TeamRoomRequest DeleteTeamRoomAliases []string DeleteWorkerRoomAlias []string @@ -339,6 +416,138 @@ func (m *MockProvisioner) LoginAsHuman(ctx context.Context, username, password s return "mock-human-token-" + username, nil } +func (m *MockProvisioner) EnsureHumanUser(ctx context.Context, name string) (*service.HumanCredentials, error) { + m.mu.Lock() + m.Calls.EnsureHumanUser = append(m.Calls.EnsureHumanUser, name) + fn := m.EnsureHumanUserFn + m.mu.Unlock() + if fn != nil { + return fn(ctx, name) + } + return &service.HumanCredentials{UserID: m.MatrixUserID(name), AccessToken: "mock-human-token-" + name, Password: "mock-human-pw-" + name, Created: true}, nil +} + +func (m *MockProvisioner) RegisterAppServiceUser(ctx context.Context, name string) (*service.HumanCredentials, error) { + m.mu.Lock() + m.Calls.RegisterAppServiceUser = append(m.Calls.RegisterAppServiceUser, name) + fn := m.RegisterAppServiceUserFn + m.mu.Unlock() + if fn != nil { + return fn(ctx, name) + } + return &service.HumanCredentials{UserID: m.MatrixUserID(name), AccessToken: "mock-as-token-" + name, Created: true}, nil +} + +func (m *MockProvisioner) RegisterLegacyUser(ctx context.Context, name string) (*service.HumanCredentials, error) { + m.mu.Lock() + m.Calls.RegisterLegacyUser = append(m.Calls.RegisterLegacyUser, name) + fn := m.RegisterLegacyUserFn + m.mu.Unlock() + if fn != nil { + return fn(ctx, name) + } + return &service.HumanCredentials{UserID: m.MatrixUserID(name), AccessToken: "mock-legacy-token-" + name, Password: "mock-human-pw-" + name, Created: true}, nil +} + +func (m *MockProvisioner) SetUserPassword(ctx context.Context, userID, password string) error { + m.mu.Lock() + m.Calls.SetUserPassword = append(m.Calls.SetUserPassword, userPasswordCall{UserID: userID, Password: password}) + fn := m.SetUserPasswordFn + m.mu.Unlock() + if fn != nil { + return fn(ctx, userID, password) + } + return nil +} + +func (m *MockProvisioner) LoginAppServiceUser(ctx context.Context, name string) (string, error) { + m.mu.Lock() + m.Calls.LoginAppServiceUser = append(m.Calls.LoginAppServiceUser, name) + fn := m.LoginAppServiceUserFn + m.mu.Unlock() + if fn != nil { + return fn(ctx, name) + } + return "mock-as-token-" + name, nil +} + +func (m *MockProvisioner) LoginWithPassword(ctx context.Context, name, password string) (string, error) { + m.mu.Lock() + m.Calls.LoginWithPassword = append(m.Calls.LoginWithPassword, humanLoginCall{Username: name, Password: password}) + fn := m.LoginWithPasswordFn + m.mu.Unlock() + if fn != nil { + return fn(ctx, name, password) + } + return "mock-pw-token-" + name, nil +} + +func (m *MockProvisioner) SetDisplayName(ctx context.Context, userID, accessToken, displayName string) error { + m.mu.Lock() + m.Calls.SetDisplayName = append(m.Calls.SetDisplayName, displayNameCall{UserID: userID, AccessToken: accessToken, DisplayName: displayName}) + fn := m.SetDisplayNameFn + m.mu.Unlock() + if fn != nil { + return fn(ctx, userID, accessToken, displayName) + } + return nil +} + +func (m *MockProvisioner) InviteToRoom(ctx context.Context, roomID, userID string) error { + m.mu.Lock() + m.Calls.InviteToRoom = append(m.Calls.InviteToRoom, roomMembershipCall{RoomID: roomID, UserID: userID}) + fn := m.InviteToRoomFn + m.mu.Unlock() + if fn != nil { + return fn(ctx, roomID, userID) + } + return nil +} + +func (m *MockProvisioner) JoinRoomAs(ctx context.Context, roomID, userToken string) error { + m.mu.Lock() + m.Calls.JoinRoomAs = append(m.Calls.JoinRoomAs, joinRoomAsCall{RoomID: roomID, UserToken: userToken}) + fn := m.JoinRoomAsFn + m.mu.Unlock() + if fn != nil { + return fn(ctx, roomID, userToken) + } + return nil +} + +func (m *MockProvisioner) KickFromRoom(ctx context.Context, roomID, userID, reason string) error { + m.mu.Lock() + m.Calls.KickFromRoom = append(m.Calls.KickFromRoom, kickFromRoomCall{RoomID: roomID, UserID: userID, Reason: reason}) + fn := m.KickFromRoomFn + m.mu.Unlock() + if fn != nil { + return fn(ctx, roomID, userID, reason) + } + return nil +} + +func (m *MockProvisioner) ForceLeaveRoom(ctx context.Context, userID, roomID string) error { + m.mu.Lock() + m.Calls.ForceLeaveRoom = append(m.Calls.ForceLeaveRoom, roomMembershipCall{RoomID: roomID, UserID: userID}) + fn := m.ForceLeaveRoomFn + m.mu.Unlock() + if fn != nil { + return fn(ctx, userID, roomID) + } + return nil +} + +func (m *MockProvisioner) DeactivateHumanUser(ctx context.Context, userID string) error { + m.mu.Lock() + m.Calls.DeactivateHumanUser = append(m.Calls.DeactivateHumanUser, userID) + fn := m.DeactivateHumanUserFn + m.mu.Unlock() + if fn != nil { + return fn(ctx, userID) + } + return nil +} + func (m *MockProvisioner) ProvisionTeamRooms(ctx context.Context, req service.TeamRoomRequest) (*service.TeamRoomResult, error) { m.mu.Lock() m.Calls.ProvisionTeamRooms = append(m.Calls.ProvisionTeamRooms, req) @@ -395,7 +604,8 @@ func (m *MockProvisioner) ServiceAccountCallCounts() (ensure, delete int) { } func (m *MockProvisioner) MatrixAppServiceEnabled() bool { - return false + return m.AppServiceEnabled } var _ service.WorkerProvisioner = (*MockProvisioner)(nil) +var _ service.HumanProvisioner = (*MockProvisioner)(nil) From 39d64a5eebd6bfe8080fd50d3b3bff7e3e6c23a9 Mon Sep 17 00:00:00 2001 From: shiyiyue1102 Date: Sun, 5 Jul 2026 06:29:37 +0800 Subject: [PATCH 4/4] fix(controller): keep appservice url at controller base Change-Id: Id3e81362ea8aad28dad8052f83f460be0d7b7046 --- changelog/current.md | 2 +- hiclaw-controller/internal/config/config.go | 5 +++-- hiclaw-controller/internal/config/config_test.go | 12 +++++++++++- .../internal/server/appservice_handler_test.go | 13 ++++++++++++- 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/changelog/current.md b/changelog/current.md index ba6227ade..fb29328c6 100644 --- a/changelog/current.md +++ b/changelog/current.md @@ -34,4 +34,4 @@ Record image-affecting changes to `manager/`, `worker/`, `copaw/`, `openclaw-bas - **Remote Worker applied target auth**: Remote Worker authentication now prefers the status-pinned deployment target and falls back to spec only before first provisioning, so spec target edits do not immediately break the running remote Worker or trust a target before it is applied. - **Remote Worker lifecycle boundary**: Workers now record the applied deployment target in status, reject running target changes until the Worker is Stopped, clean up using the applied target, and register remote Pod watches for Worker/Team status updates. - **Team Worker CR decoupling**: Worker identity enrichment and Worker REST APIs now resolve `spec.workerMembers` references, and Teams reject sharing the same referenced Worker CR before injecting coordination context. -- **Matrix AppService integration**: SSO Human Team admins now resolve through the Human identity source, and Matrix AppService transaction push routes are wired into the controller registration path. +- **Matrix AppService integration**: SSO Human Team admins now resolve through the Human identity source, Matrix AppService transaction push routes are wired into the controller registration path, and registration keeps the homeserver-facing controller URL as the endpoint base. diff --git a/hiclaw-controller/internal/config/config.go b/hiclaw-controller/internal/config/config.go index ff756561c..d76fa59cb 100644 --- a/hiclaw-controller/internal/config/config.go +++ b/hiclaw-controller/internal/config/config.go @@ -422,7 +422,8 @@ func LoadConfig() *Config { // Tokens must be provided via env vars (set by install script or manually). // We do NOT auto-generate at runtime to prevent token drift across restarts. if cfg.MatrixAppServiceEnabled { - cfg.MatrixAppServicePushURL = appServicePushURL(cfg.ControllerURL) + matrixControllerURL := firstNonEmpty(os.Getenv("AGENTTEAMS_MATRIX_APPSERVICE_CONTROLLER_URL"), cfg.ControllerURL) + cfg.MatrixAppServicePushURL = appServicePushURL(matrixControllerURL) if cfg.MatrixAppServiceASToken == "" { panic("HICLAW_MATRIX_APPSERVICE_AS_TOKEN is required when AppService mode is enabled; run install script or set env var") } @@ -712,7 +713,7 @@ func appServicePushURL(controllerURL string) string { if controllerURL == "" { return "" } - return controllerURL + "/_matrix/app/v1" + return controllerURL } func (c *Config) GatewayConfig() gateway.Config { diff --git a/hiclaw-controller/internal/config/config_test.go b/hiclaw-controller/internal/config/config_test.go index a297912da..88166fc71 100644 --- a/hiclaw-controller/internal/config/config_test.go +++ b/hiclaw-controller/internal/config/config_test.go @@ -184,7 +184,17 @@ func TestMatrixConfigIncludesAppServicePushURL(t *testing.T) { MatrixAppServicePushURL: appServicePushURL("http://controller.example.com:8090/"), } - if got, want := cfg.MatrixConfig().AppServicePushURL, "http://controller.example.com:8090/_matrix/app/v1"; got != want { + if got, want := cfg.MatrixConfig().AppServicePushURL, "http://controller.example.com:8090"; got != want { + t.Fatalf("AppServicePushURL = %q, want %q", got, want) + } +} + +func TestLoadConfigUsesMatrixAppServiceControllerURLOverride(t *testing.T) { + t.Setenv("AGENTTEAMS_MATRIX_APPSERVICE_CONTROLLER_URL", "http://matrix-facing-controller:8090/") + + cfg := LoadConfig() + + if got, want := cfg.MatrixConfig().AppServicePushURL, "http://matrix-facing-controller:8090"; got != want { t.Fatalf("AppServicePushURL = %q, want %q", got, want) } } diff --git a/hiclaw-controller/internal/server/appservice_handler_test.go b/hiclaw-controller/internal/server/appservice_handler_test.go index dd908df6d..e39d2313e 100644 --- a/hiclaw-controller/internal/server/appservice_handler_test.go +++ b/hiclaw-controller/internal/server/appservice_handler_test.go @@ -52,6 +52,17 @@ func mentionEvent(roomID, eventID, sender string, userIDs []string) matrixEvent func TestNewHTTPServerRegistersAppserviceTransactionRoute(t *testing.T) { k8s := fake.NewClientBuilder().WithScheme(newAppserviceTestScheme(t)).Build() + pushURL := "http://controller.example.com:8090" + reg := matrix.RenderAppServiceRegistration(matrix.Config{ + AppServiceID: "agentteams-controller", + AppServiceToken: "as-token", + AppServiceHSToken: "correct-token", + AppServiceSenderLocalpart: "agentteams-controller", + AppServicePushURL: pushURL, + }) + if reg.URL == nil { + t.Fatal("registration URL is nil") + } srv := NewHTTPServer(":0", ServerDeps{ Client: k8s, Namespace: "default", @@ -59,7 +70,7 @@ func TestNewHTTPServerRegistersAppserviceTransactionRoute(t *testing.T) { MatrixConfig: matrix.Config{AppServiceEnabled: true, AppServiceHSToken: "correct-token"}, }) - req := httptest.NewRequest(http.MethodPut, "/_matrix/app/v1/transactions/txn-from-mux", txnBody(t, nil)) + req := httptest.NewRequest(http.MethodPut, *reg.URL+"/_matrix/app/v1/transactions/txn-from-mux", txnBody(t, nil)) req.Header.Set("Authorization", "Bearer correct-token") rec := httptest.NewRecorder() srv.Mux.ServeHTTP(rec, req)