From ccd1ae8f97021b6d960319e466d81ac0e6b5284a Mon Sep 17 00:00:00 2001 From: Simon Hartcher Date: Thu, 5 Mar 2026 11:35:50 +1100 Subject: [PATCH 1/6] security: add SSH host key verification with TOFU Replace StrictHostKeyChecking=no with accept-new mode using a pixels-managed known_hosts file at ~/.config/pixels/known_hosts. SSH connections now use the container hostname (px-) instead of IP to avoid stale entries from IP reuse across container churn. Lifecycle methods (Create, Start, RestoreSnapshot, Delete) clean up both hostname and IP entries from known_hosts to prevent false rejections. Opt out via strict_host_keys = false under [ssh] config. Co-Authored-By: Claude Opus 4.6 --- cmd/root.go | 3 ++ internal/config/config.go | 20 +++++++- internal/ssh/ssh.go | 44 ++++++++++++++---- internal/ssh/ssh_test.go | 81 ++++++++++++++++++++++++++++++++- sandbox/truenas/backend.go | 29 ++++++++++-- sandbox/truenas/config.go | 6 ++- sandbox/truenas/exec.go | 47 +++++++++---------- sandbox/truenas/network.go | 20 ++++---- sandbox/truenas/resolve.go | 17 ++++--- sandbox/truenas/resolve_test.go | 11 ++--- 10 files changed, 209 insertions(+), 69 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index e93a26b..6700607 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -79,6 +79,9 @@ func sandboxConfig() map[string]string { if cfg.SSH.Key != "" { m["ssh_key"] = cfg.SSH.Key } + if cfg.SSH.StrictHostKeysEnabled() { + m["ssh_known_hosts"] = config.KnownHostsPath() + } m["provision"] = strconv.FormatBool(cfg.Provision.IsEnabled()) m["devtools"] = strconv.FormatBool(cfg.Provision.DevToolsEnabled()) if cfg.Network.Egress != "" { diff --git a/internal/config/config.go b/internal/config/config.go index 88dd977..0c07e6d 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -55,8 +55,18 @@ type Defaults struct { } type SSH struct { - User string `toml:"user" env:"PIXELS_SSH_USER"` - Key string `toml:"key" env:"PIXELS_SSH_KEY"` + User string `toml:"user" env:"PIXELS_SSH_USER"` + Key string `toml:"key" env:"PIXELS_SSH_KEY"` + StrictHostKeys *bool `toml:"strict_host_keys" env:"PIXELS_SSH_STRICT_HOST_KEYS"` +} + +// StrictHostKeysEnabled returns whether SSH host key verification is enabled. +// Defaults to true when not explicitly set. +func (s *SSH) StrictHostKeysEnabled() bool { + if s.StrictHostKeys == nil { + return true + } + return *s.StrictHostKeys } type Checkpoint struct { @@ -203,6 +213,12 @@ func (t *TrueNAS) InsecureSkipVerifyValue() bool { return *t.InsecureSkipVerify } +// KnownHostsPath returns the path to the pixels-managed SSH known_hosts file. +func KnownHostsPath() string { + dir := filepath.Dir(configPath()) + return filepath.Join(dir, "known_hosts") +} + func expandHome(path string) string { if strings.HasPrefix(path, "~/") { if home, err := os.UserHomeDir(); err == nil { diff --git a/internal/ssh/ssh.go b/internal/ssh/ssh.go index 8933f65..9f2cfe7 100644 --- a/internal/ssh/ssh.go +++ b/internal/ssh/ssh.go @@ -15,10 +15,11 @@ import ( // ConnConfig holds the parameters for an SSH connection. type ConnConfig struct { - Host string - User string - KeyPath string - Env map[string]string // optional, for SetEnv forwarding + Host string + User string + KeyPath string + Env map[string]string // optional, for SetEnv forwarding + KnownHostsFile string // when set, uses accept-new host key verification } // WaitReady polls the host's SSH port until it accepts connections or the timeout expires. @@ -115,12 +116,22 @@ func TestAuth(ctx context.Context, cc ConnConfig) error { // It is exported for use by callers that need to construct custom exec.Cmd // with non-standard Stdin/Stdout/Stderr (e.g. sandbox backends). func Args(cc ConnConfig) []string { - args := []string{ - "-o", "StrictHostKeyChecking=no", - "-o", "UserKnownHostsFile=" + os.DevNull, + var args []string + if cc.KnownHostsFile != "" { + args = []string{ + "-o", "StrictHostKeyChecking=accept-new", + "-o", "UserKnownHostsFile=" + cc.KnownHostsFile, + } + } else { + args = []string{ + "-o", "StrictHostKeyChecking=no", + "-o", "UserKnownHostsFile=" + os.DevNull, + } + } + args = append(args, "-o", "PasswordAuthentication=no", "-o", "LogLevel=ERROR", - } + ) if cc.KeyPath != "" { args = append(args, "-i", cc.KeyPath) } @@ -150,6 +161,23 @@ func Args(cc ConnConfig) []string { return args } +// RemoveKnownHost removes all entries for the given host from the known_hosts +// file. This is used to clean up stale entries when containers are +// created, destroyed, or restored from snapshots. It is a no-op if the +// known_hosts file does not exist. +func RemoveKnownHost(knownHostsFile, host string) error { + if knownHostsFile == "" { + return nil + } + if _, err := os.Stat(knownHostsFile); errors.Is(err, os.ErrNotExist) { + return nil + } + cmd := exec.Command("ssh-keygen", "-R", host, "-f", knownHostsFile) + cmd.Stdout = io.Discard + cmd.Stderr = io.Discard + return cmd.Run() +} + // consoleArgs builds SSH arguments for an interactive console session. // When remoteCmd is non-empty, -t is inserted to force PTY allocation // and the command is appended after user@host. diff --git a/internal/ssh/ssh_test.go b/internal/ssh/ssh_test.go index e6e30f4..d128299 100644 --- a/internal/ssh/ssh_test.go +++ b/internal/ssh/ssh_test.go @@ -29,7 +29,7 @@ func TestSSHArgs(t *testing.T) { } }) - t.Run("uses os.DevNull for UserKnownHostsFile", func(t *testing.T) { + t.Run("uses os.DevNull for UserKnownHostsFile when no KnownHostsFile", func(t *testing.T) { args := Args(ConnConfig{Host: "10.0.0.1", User: "pixel"}) want := "UserKnownHostsFile=" + os.DevNull found := false @@ -42,6 +42,47 @@ func TestSSHArgs(t *testing.T) { if !found { t.Errorf("sshArgs should contain %q, got %v", want, args) } + // Also verify StrictHostKeyChecking=no in this mode. + foundStrict := false + for _, a := range args { + if a == "StrictHostKeyChecking=no" { + foundStrict = true + } + } + if !foundStrict { + t.Errorf("expected StrictHostKeyChecking=no, got %v", args) + } + }) + + t.Run("accept-new with KnownHostsFile", func(t *testing.T) { + khFile := "/tmp/pixels-test-known-hosts" + args := Args(ConnConfig{Host: "10.0.0.1", User: "pixel", KnownHostsFile: khFile}) + + // Should use accept-new instead of no. + foundAcceptNew := false + for _, a := range args { + if a == "StrictHostKeyChecking=accept-new" { + foundAcceptNew = true + } + if a == "StrictHostKeyChecking=no" { + t.Error("should not use StrictHostKeyChecking=no when KnownHostsFile is set") + } + } + if !foundAcceptNew { + t.Errorf("expected StrictHostKeyChecking=accept-new, got %v", args) + } + + // Should use the provided known hosts file. + want := "UserKnownHostsFile=" + khFile + found := false + for _, a := range args { + if a == want { + found = true + } + } + if !found { + t.Errorf("expected %q, got %v", want, args) + } }) t.Run("without key", func(t *testing.T) { @@ -113,6 +154,44 @@ func TestSSHArgs(t *testing.T) { }) } +func TestRemoveKnownHost(t *testing.T) { + t.Run("no-op when file is empty string", func(t *testing.T) { + if err := RemoveKnownHost("", "10.0.0.1"); err != nil { + t.Errorf("expected no error, got %v", err) + } + }) + + t.Run("no-op when file does not exist", func(t *testing.T) { + if err := RemoveKnownHost("/tmp/nonexistent-known-hosts-file", "10.0.0.1"); err != nil { + t.Errorf("expected no error for missing file, got %v", err) + } + }) + + t.Run("removes entry from existing file", func(t *testing.T) { + dir := t.TempDir() + khFile := dir + "/known_hosts" + content := "10.0.0.1 ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAITestKey\n10.0.0.2 ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOtherKey\n" + if err := os.WriteFile(khFile, []byte(content), 0o600); err != nil { + t.Fatal(err) + } + + if err := RemoveKnownHost(khFile, "10.0.0.1"); err != nil { + t.Fatalf("RemoveKnownHost: %v", err) + } + + data, err := os.ReadFile(khFile) + if err != nil { + t.Fatal(err) + } + if strings.Contains(string(data), "10.0.0.1") { + t.Errorf("expected 10.0.0.1 to be removed, file contains: %s", data) + } + if !strings.Contains(string(data), "10.0.0.2") { + t.Errorf("expected 10.0.0.2 to remain, file contains: %s", data) + } + }) +} + func TestConsoleArgs(t *testing.T) { t.Run("no remote command", func(t *testing.T) { cc := ConnConfig{Host: "10.0.0.1", User: "pixel", KeyPath: "/tmp/key"} diff --git a/sandbox/truenas/backend.go b/sandbox/truenas/backend.go index 86248e2..7aec122 100644 --- a/sandbox/truenas/backend.go +++ b/sandbox/truenas/backend.go @@ -122,8 +122,10 @@ func (t *TrueNAS) Create(ctx context.Context, opts sandbox.CreateOpts) (*sandbox // Wait for SSH readiness. if ip != "" { - cc := ssh.ConnConfig{Host: ip, User: t.cfg.sshUser, KeyPath: t.cfg.sshKey} - _ = t.ssh.WaitReady(ctx, cc.Host, 90*time.Second, nil) + // Remove stale known_hosts entries — the container was just created. + ssh.RemoveKnownHost(t.cfg.knownHosts, ip) + ssh.RemoveKnownHost(t.cfg.knownHosts, full) + _ = t.ssh.WaitReady(ctx, full, 90*time.Second, nil) } return &sandbox.Instance{ @@ -177,7 +179,10 @@ func (t *TrueNAS) Start(ctx context.Context, name string) error { ip := ipFromAliases(inst.Aliases) if ip != "" { - _ = t.ssh.WaitReady(ctx, ip, 30*time.Second, nil) + // Remove stale known_hosts entries — host key may differ after restart. + ssh.RemoveKnownHost(t.cfg.knownHosts, ip) + ssh.RemoveKnownHost(t.cfg.knownHosts, full) + _ = t.ssh.WaitReady(ctx, full, 30*time.Second, nil) } return nil } @@ -199,12 +204,25 @@ func (t *TrueNAS) Delete(ctx context.Context, name string) error { // Best-effort stop. _ = t.client.Virt.StopInstance(ctx, full, tnapi.StopVirtInstanceOpts{Timeout: 30}) + // Resolve IP before deletion so we can clean up the known_hosts entry. + inst, _ := t.client.Virt.GetInstance(ctx, full) + var ip string + if inst != nil { + ip = ipFromAliases(inst.Aliases) + } + // Retry delete (Incus storage release timing). if err := retry.Do(ctx, 3, 2*time.Second, func(ctx context.Context) error { return t.client.Virt.DeleteInstance(ctx, full) }); err != nil { return fmt.Errorf("deleting %s: %w", name, err) } + + // Clean up known_hosts entries for the now-dead container. + if ip != "" { + ssh.RemoveKnownHost(t.cfg.knownHosts, ip) + } + ssh.RemoveKnownHost(t.cfg.knownHosts, full) return nil } @@ -279,7 +297,10 @@ func (t *TrueNAS) RestoreSnapshot(ctx context.Context, name, label string) error ip := ipFromAliases(inst.Aliases) if ip != "" { - _ = t.ssh.WaitReady(ctx, ip, 30*time.Second, nil) + // Remove stale known_hosts entries — snapshot restore changes the host key. + ssh.RemoveKnownHost(t.cfg.knownHosts, ip) + ssh.RemoveKnownHost(t.cfg.knownHosts, full) + _ = t.ssh.WaitReady(ctx, full, 30*time.Second, nil) } return nil } diff --git a/sandbox/truenas/config.go b/sandbox/truenas/config.go index b6106b3..a7d8a9a 100644 --- a/sandbox/truenas/config.go +++ b/sandbox/truenas/config.go @@ -24,8 +24,9 @@ type tnConfig struct { nicType string parent string - sshUser string - sshKey string + sshUser string + sshKey string + knownHosts string datasetPrefix string @@ -108,6 +109,7 @@ func parseCfg(m map[string]string) (*tnConfig, error) { if v := m["ssh_key"]; v != "" { c.sshKey = v } + c.knownHosts = m["ssh_known_hosts"] if v := m["dataset_prefix"]; v != "" { c.datasetPrefix = v diff --git a/sandbox/truenas/exec.go b/sandbox/truenas/exec.go index 4d5bd05..e4be928 100644 --- a/sandbox/truenas/exec.go +++ b/sandbox/truenas/exec.go @@ -15,8 +15,7 @@ import ( // custom Stdin/Stdout/Stderr, it builds a custom exec.Cmd using ssh.Args(). // Otherwise it delegates to ssh.Exec. func (t *TrueNAS) Run(ctx context.Context, name string, opts sandbox.ExecOpts) (int, error) { - ip, err := t.resolveRunningIP(ctx, name) - if err != nil { + if err := t.ensureRunning(ctx, name); err != nil { return 1, err } @@ -26,10 +25,11 @@ func (t *TrueNAS) Run(ctx context.Context, name string, opts sandbox.ExecOpts) ( } cc := ssh.ConnConfig{ - Host: ip, - User: user, - KeyPath: t.cfg.sshKey, - Env: envToMap(opts.Env), + Host: prefixed(name), + User: user, + KeyPath: t.cfg.sshKey, + Env: envToMap(opts.Env), + KnownHostsFile: t.cfg.knownHosts, } hasCustomIO := opts.Stdin != nil || opts.Stdout != nil || opts.Stderr != nil @@ -54,29 +54,29 @@ func (t *TrueNAS) Run(ctx context.Context, name string, opts sandbox.ExecOpts) ( // Output executes a command and returns its combined stdout. func (t *TrueNAS) Output(ctx context.Context, name string, cmd []string) ([]byte, error) { - ip, err := t.resolveRunningIP(ctx, name) - if err != nil { + if err := t.ensureRunning(ctx, name); err != nil { return nil, err } cc := ssh.ConnConfig{ - Host: ip, - User: t.cfg.sshUser, - KeyPath: t.cfg.sshKey, + Host: prefixed(name), + User: t.cfg.sshUser, + KeyPath: t.cfg.sshKey, + KnownHostsFile: t.cfg.knownHosts, } return t.ssh.OutputQuiet(ctx, cc, cmd) } // Console attaches an interactive console session. func (t *TrueNAS) Console(ctx context.Context, name string, opts sandbox.ConsoleOpts) error { - ip, err := t.resolveRunningIP(ctx, name) - if err != nil { + if err := t.ensureRunning(ctx, name); err != nil { return err } cc := ssh.ConnConfig{ - Host: ip, - User: t.cfg.sshUser, - KeyPath: t.cfg.sshKey, - Env: envToMap(opts.Env), + Host: prefixed(name), + User: t.cfg.sshUser, + KeyPath: t.cfg.sshKey, + Env: envToMap(opts.Env), + KnownHostsFile: t.cfg.knownHosts, } remoteCmd := strings.Join(opts.RemoteCmd, " ") return ssh.Console(cc, remoteCmd) @@ -85,19 +85,20 @@ func (t *TrueNAS) Console(ctx context.Context, name string, opts sandbox.Console // Ready waits until the instance is reachable via SSH. If key auth fails, // it pushes the current machine's SSH public key via the TrueNAS file API. func (t *TrueNAS) Ready(ctx context.Context, name string, timeout time.Duration) error { - ip, err := t.resolveRunningIP(ctx, name) - if err != nil { + if err := t.ensureRunning(ctx, name); err != nil { return err } - if err := t.ssh.WaitReady(ctx, ip, timeout, nil); err != nil { + host := prefixed(name) + if err := t.ssh.WaitReady(ctx, host, timeout, nil); err != nil { return err } // Test key auth and push the key if it fails. cc := ssh.ConnConfig{ - Host: ip, - User: t.cfg.sshUser, - KeyPath: t.cfg.sshKey, + Host: host, + User: t.cfg.sshUser, + KeyPath: t.cfg.sshKey, + KnownHostsFile: t.cfg.knownHosts, } if err := ssh.TestAuth(ctx, cc); err != nil { pubKey := readSSHPubKey(t.cfg.sshKey) diff --git a/sandbox/truenas/network.go b/sandbox/truenas/network.go index d4ce059..57459bf 100644 --- a/sandbox/truenas/network.go +++ b/sandbox/truenas/network.go @@ -19,12 +19,11 @@ import ( // script, safe-apt wrapper, restricted sudoers via the TrueNAS API, then // SSHes in to install nftables and resolve domains. func (t *TrueNAS) SetEgressMode(ctx context.Context, name string, mode sandbox.EgressMode) error { - ip, err := t.resolveRunningIP(ctx, name) - if err != nil { + if err := t.ensureRunning(ctx, name); err != nil { return err } - cc := ssh.ConnConfig{Host: ip, User: "root", KeyPath: t.cfg.sshKey} full := prefixed(name) + cc := ssh.ConnConfig{Host: full, User: "root", KeyPath: t.cfg.sshKey, KnownHostsFile: t.cfg.knownHosts} switch mode { case sandbox.EgressUnrestricted: @@ -106,12 +105,11 @@ func (t *TrueNAS) SetEgressMode(ctx context.Context, name string, mode sandbox.E // AllowDomain adds a domain to the egress allowlist and re-resolves. func (t *TrueNAS) AllowDomain(ctx context.Context, name, domain string) error { - ip, err := t.resolveRunningIP(ctx, name) - if err != nil { + if err := t.ensureRunning(ctx, name); err != nil { return err } - cc := ssh.ConnConfig{Host: ip, User: "root", KeyPath: t.cfg.sshKey} full := prefixed(name) + cc := ssh.ConnConfig{Host: full, User: "root", KeyPath: t.cfg.sshKey, KnownHostsFile: t.cfg.knownHosts} // Ensure egress infrastructure exists. code, _ := t.ssh.ExecQuiet(ctx, cc, []string{"test -f /etc/pixels-egress-domains"}) @@ -152,12 +150,11 @@ func (t *TrueNAS) AllowDomain(ctx context.Context, name, domain string) error { // DenyDomain removes a domain from the egress allowlist and re-resolves. func (t *TrueNAS) DenyDomain(ctx context.Context, name, domain string) error { - ip, err := t.resolveRunningIP(ctx, name) - if err != nil { + if err := t.ensureRunning(ctx, name); err != nil { return err } - cc := ssh.ConnConfig{Host: ip, User: "root", KeyPath: t.cfg.sshKey} full := prefixed(name) + cc := ssh.ConnConfig{Host: full, User: "root", KeyPath: t.cfg.sshKey, KnownHostsFile: t.cfg.knownHosts} out, err := t.ssh.OutputQuiet(ctx, cc, []string{"cat /etc/pixels-egress-domains"}) if err != nil { @@ -190,11 +187,10 @@ func (t *TrueNAS) DenyDomain(ctx context.Context, name, domain string) error { // GetPolicy returns the current egress policy for an instance. func (t *TrueNAS) GetPolicy(ctx context.Context, name string) (*sandbox.Policy, error) { - ip, err := t.resolveRunningIP(ctx, name) - if err != nil { + if err := t.ensureRunning(ctx, name); err != nil { return nil, err } - cc := ssh.ConnConfig{Host: ip, User: "root", KeyPath: t.cfg.sshKey} + cc := ssh.ConnConfig{Host: prefixed(name), User: "root", KeyPath: t.cfg.sshKey, KnownHostsFile: t.cfg.knownHosts} code, _ := t.ssh.ExecQuiet(ctx, cc, []string{"test -f /etc/pixels-egress-domains"}) if code != 0 { diff --git a/sandbox/truenas/resolve.go b/sandbox/truenas/resolve.go index ec4ca84..d3a076c 100644 --- a/sandbox/truenas/resolve.go +++ b/sandbox/truenas/resolve.go @@ -20,26 +20,25 @@ func unprefixed(name string) string { return strings.TrimPrefix(name, containerPrefix) } -// resolveRunningIP returns the IP of a running container via the API. -func (t *TrueNAS) resolveRunningIP(ctx context.Context, name string) (string, error) { +// ensureRunning verifies the container is running and has a network address. +func (t *TrueNAS) ensureRunning(ctx context.Context, name string) error { full := prefixed(name) instance, err := t.client.Virt.GetInstance(ctx, full) if err != nil { - return "", fmt.Errorf("looking up %s: %w", name, err) + return fmt.Errorf("looking up %s: %w", name, err) } if instance == nil { - return "", fmt.Errorf("instance %q not found", name) + return fmt.Errorf("instance %q not found", name) } if instance.Status != "RUNNING" { - return "", fmt.Errorf("instance %q is %s — start it first", name, instance.Status) + return fmt.Errorf("instance %q is %s — start it first", name, instance.Status) } - ip := ipFromAliases(instance.Aliases) - if ip == "" { - return "", fmt.Errorf("no IP address for %s", name) + if ipFromAliases(instance.Aliases) == "" { + return fmt.Errorf("no IP address for %s", name) } - return ip, nil + return nil } // ipFromAliases extracts the first IPv4 address from a VirtInstance's aliases. diff --git a/sandbox/truenas/resolve_test.go b/sandbox/truenas/resolve_test.go index 184e12c..44c176a 100644 --- a/sandbox/truenas/resolve_test.go +++ b/sandbox/truenas/resolve_test.go @@ -8,16 +8,15 @@ import ( tnapi "github.com/deevus/truenas-go" ) -func TestResolveRunningIP(t *testing.T) { +func TestEnsureRunning(t *testing.T) { tests := []struct { name string instance *tnapi.VirtInstance getErr error - wantIP string wantErr string }{ { - name: "API lookup", + name: "running with IP", instance: &tnapi.VirtInstance{ Name: "px-test", Status: "RUNNING", @@ -25,7 +24,6 @@ func TestResolveRunningIP(t *testing.T) { {Type: "INET", Address: "192.168.1.50"}, }, }, - wantIP: "192.168.1.50", }, { name: "API error", @@ -72,7 +70,7 @@ func TestResolveRunningIP(t *testing.T) { }, } - ip, err := tn.resolveRunningIP(context.Background(), "test") + err := tn.ensureRunning(context.Background(), "test") if tt.wantErr != "" { if err == nil { t.Fatal("expected error, got nil") @@ -85,9 +83,6 @@ func TestResolveRunningIP(t *testing.T) { if err != nil { t.Fatalf("unexpected error: %v", err) } - if ip != tt.wantIP { - t.Errorf("ip = %q, want %q", ip, tt.wantIP) - } }) } } From 8cad86d69f4af4695d964ba959b06d56342fddc9 Mon Sep 17 00:00:00 2001 From: Simon Hartcher Date: Thu, 5 Mar 2026 11:40:11 +1100 Subject: [PATCH 2/6] test: add coverage for StrictHostKeys config and KnownHostsPath Co-Authored-By: Claude Opus 4.6 --- internal/config/config_test.go | 72 ++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 8e3f15e..fca0601 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -507,6 +507,78 @@ func TestConfigPathDefault(t *testing.T) { } } +func TestStrictHostKeysEnabled(t *testing.T) { + tests := []struct { + name string + val *bool + want bool + }{ + {"nil defaults to true", nil, true}, + {"explicit true", boolPtr(true), true}, + {"explicit false", boolPtr(false), false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := SSH{StrictHostKeys: tt.val} + if got := s.StrictHostKeysEnabled(); got != tt.want { + t.Errorf("StrictHostKeysEnabled() = %v, want %v", got, tt.want) + } + }) + } +} + +func boolPtr(v bool) *bool { return &v } + +func TestStrictHostKeysFromFile(t *testing.T) { + dir := t.TempDir() + t.Setenv("XDG_CONFIG_HOME", dir) + + cfgDir := filepath.Join(dir, "pixels") + if err := os.MkdirAll(cfgDir, 0o755); err != nil { + t.Fatal(err) + } + + content := ` +[ssh] +strict_host_keys = false +` + if err := os.WriteFile(filepath.Join(cfgDir, "config.toml"), []byte(content), 0o644); err != nil { + t.Fatal(err) + } + + cfg, err := Load() + if err != nil { + t.Fatalf("Load() error: %v", err) + } + if cfg.SSH.StrictHostKeysEnabled() { + t.Error("StrictHostKeysEnabled() = true, want false (set in TOML)") + } +} + +func TestStrictHostKeysEnvOverride(t *testing.T) { + t.Setenv("XDG_CONFIG_HOME", t.TempDir()) + t.Setenv("PIXELS_SSH_STRICT_HOST_KEYS", "false") + + cfg, err := Load() + if err != nil { + t.Fatalf("Load() error: %v", err) + } + if cfg.SSH.StrictHostKeysEnabled() { + t.Error("StrictHostKeysEnabled() = true, want false (env override)") + } +} + +func TestKnownHostsPath(t *testing.T) { + dir := t.TempDir() + t.Setenv("XDG_CONFIG_HOME", dir) + + got := KnownHostsPath() + want := filepath.Join(dir, "pixels", "known_hosts") + if got != want { + t.Errorf("KnownHostsPath() = %q, want %q", got, want) + } +} + func TestExpandHome(t *testing.T) { home, err := os.UserHomeDir() if err != nil { From 13540c954c22777a240a7beae79900cdd41a05ba Mon Sep 17 00:00:00 2001 From: Simon Hartcher Date: Thu, 5 Mar 2026 12:01:15 +1100 Subject: [PATCH 3/6] refactor: idiomatic Go cleanup for SSH and TrueNAS backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add NewConnConfig constructor for secure-by-default ConnConfig - Rename KnownHostsFile → KnownHostsPath (Go convention) - Return *tnapi.VirtInstance from ensureRunning to avoid redundant API calls - Rewrite RemoveKnownHost using golang.org/x/crypto/ssh instead of ssh-keygen - Add defaultKnownHostsPath() fallback so Args() is always secure - Replace silent error swallowing (_ = err) with warnf logging - Fix provision.NewRunner to accept knownHostsPath parameter Co-Authored-By: Claude Opus 4.6 --- internal/provision/provision.go | 4 +- internal/provision/provision_test.go | 2 +- internal/ssh/ssh.go | 101 +++++++++++++++++++++------ internal/ssh/ssh_test.go | 38 +++++----- sandbox/truenas/backend.go | 15 ++-- sandbox/truenas/exec.go | 40 +++-------- sandbox/truenas/network.go | 16 ++--- sandbox/truenas/resolve.go | 15 ++-- sandbox/truenas/resolve_test.go | 5 +- sandbox/truenas/truenas.go | 12 ++++ 10 files changed, 151 insertions(+), 97 deletions(-) diff --git a/internal/provision/provision.go b/internal/provision/provision.go index 419fc3f..9f353e3 100644 --- a/internal/provision/provision.go +++ b/internal/provision/provision.go @@ -72,13 +72,13 @@ type Runner struct { } // NewRunner creates a Runner that executes commands over SSH. -func NewRunner(host, user, keyPath string) *Runner { +func NewRunner(host, user, keyPath, knownHostsPath string) *Runner { return &Runner{ Host: host, User: user, KeyPath: keyPath, exec: &sshExecutor{ - cc: ssh.ConnConfig{Host: host, User: user, KeyPath: keyPath}, + cc: ssh.NewConnConfig(host, user, keyPath, knownHostsPath), }, } } diff --git a/internal/provision/provision_test.go b/internal/provision/provision_test.go index 5354cc1..258bea2 100644 --- a/internal/provision/provision_test.go +++ b/internal/provision/provision_test.go @@ -16,7 +16,7 @@ func TestZmxCmd(t *testing.T) { } func TestNewRunner(t *testing.T) { - r := NewRunner("10.0.0.1", "root", "/tmp/key") + r := NewRunner("10.0.0.1", "root", "/tmp/key", "/tmp/known_hosts") if r.Host != "10.0.0.1" { t.Errorf("Host = %q, want %q", r.Host, "10.0.0.1") } diff --git a/internal/ssh/ssh.go b/internal/ssh/ssh.go index 9f2cfe7..9903570 100644 --- a/internal/ssh/ssh.go +++ b/internal/ssh/ssh.go @@ -1,6 +1,7 @@ package ssh import ( + "bytes" "context" "errors" "fmt" @@ -8,18 +9,32 @@ import ( "net" "os" "os/exec" + "path/filepath" "sort" "strings" "time" + + gossh "golang.org/x/crypto/ssh" ) // ConnConfig holds the parameters for an SSH connection. +// Use NewConnConfig to construct — it ensures secure defaults. type ConnConfig struct { Host string User string KeyPath string Env map[string]string // optional, for SetEnv forwarding - KnownHostsFile string // when set, uses accept-new host key verification + KnownHostsPath string // path to known_hosts file for accept-new verification +} + +// NewConnConfig creates a ConnConfig with the given parameters. +func NewConnConfig(host, user, keyPath, knownHostsPath string) ConnConfig { + return ConnConfig{ + Host: host, + User: user, + KeyPath: keyPath, + KnownHostsPath: knownHostsPath, + } } // WaitReady polls the host's SSH port until it accepts connections or the timeout expires. @@ -116,22 +131,16 @@ func TestAuth(ctx context.Context, cc ConnConfig) error { // It is exported for use by callers that need to construct custom exec.Cmd // with non-standard Stdin/Stdout/Stderr (e.g. sandbox backends). func Args(cc ConnConfig) []string { - var args []string - if cc.KnownHostsFile != "" { - args = []string{ - "-o", "StrictHostKeyChecking=accept-new", - "-o", "UserKnownHostsFile=" + cc.KnownHostsFile, - } - } else { - args = []string{ - "-o", "StrictHostKeyChecking=no", - "-o", "UserKnownHostsFile=" + os.DevNull, - } + knownHosts := cc.KnownHostsPath + if knownHosts == "" { + knownHosts = defaultKnownHostsPath() } - args = append(args, + args := []string{ + "-o", "StrictHostKeyChecking=accept-new", + "-o", "UserKnownHostsFile=" + knownHosts, "-o", "PasswordAuthentication=no", "-o", "LogLevel=ERROR", - ) + } if cc.KeyPath != "" { args = append(args, "-i", cc.KeyPath) } @@ -164,18 +173,66 @@ func Args(cc ConnConfig) []string { // RemoveKnownHost removes all entries for the given host from the known_hosts // file. This is used to clean up stale entries when containers are // created, destroyed, or restored from snapshots. It is a no-op if the -// known_hosts file does not exist. -func RemoveKnownHost(knownHostsFile, host string) error { - if knownHostsFile == "" { +// known_hosts file does not exist or the path is empty. +func RemoveKnownHost(knownHostsPath, host string) error { + if knownHostsPath == "" || host == "" { return nil } - if _, err := os.Stat(knownHostsFile); errors.Is(err, os.ErrNotExist) { + data, err := os.ReadFile(knownHostsPath) + if errors.Is(err, os.ErrNotExist) { return nil } - cmd := exec.Command("ssh-keygen", "-R", host, "-f", knownHostsFile) - cmd.Stdout = io.Discard - cmd.Stderr = io.Discard - return cmd.Run() + if err != nil { + return fmt.Errorf("reading known_hosts: %w", err) + } + + var kept []byte + rest := data + for len(rest) > 0 { + // Find the next line boundary to preserve the raw line (including newline). + end := bytes.IndexByte(rest, '\n') + var rawLine []byte + if end == -1 { + rawLine = rest + rest = nil + } else { + rawLine = rest[:end+1] + rest = rest[end+1:] + } + + // Try to parse the line as a known_hosts entry. + _, hosts, _, _, _, parseErr := gossh.ParseKnownHosts(rawLine) + if parseErr != nil { + // Not a valid entry (comment, blank line) — keep it. + kept = append(kept, rawLine...) + continue + } + + // Check if this entry matches the host to remove. + match := false + for _, h := range hosts { + if h == host { + match = true + break + } + } + if !match { + kept = append(kept, rawLine...) + } + } + + return os.WriteFile(knownHostsPath, kept, 0o600) +} + +// defaultKnownHostsPath returns the default known_hosts path in the pixels +// config directory. This ensures Args() is always secure even if +// KnownHostsPath is not explicitly set on ConnConfig. +func defaultKnownHostsPath() string { + if dir := os.Getenv("XDG_CONFIG_HOME"); dir != "" { + return filepath.Join(dir, "pixels", "known_hosts") + } + dir, _ := os.UserConfigDir() + return filepath.Join(dir, "pixels", "known_hosts") } // consoleArgs builds SSH arguments for an interactive console session. diff --git a/internal/ssh/ssh_test.go b/internal/ssh/ssh_test.go index d128299..6aa7649 100644 --- a/internal/ssh/ssh_test.go +++ b/internal/ssh/ssh_test.go @@ -29,34 +29,28 @@ func TestSSHArgs(t *testing.T) { } }) - t.Run("uses os.DevNull for UserKnownHostsFile when no KnownHostsFile", func(t *testing.T) { + t.Run("always uses accept-new even without explicit KnownHostsPath", func(t *testing.T) { args := Args(ConnConfig{Host: "10.0.0.1", User: "pixel"}) - want := "UserKnownHostsFile=" + os.DevNull - found := false + foundAcceptNew := false for _, a := range args { - if a == want { - found = true - break + if a == "StrictHostKeyChecking=accept-new" { + foundAcceptNew = true } - } - if !found { - t.Errorf("sshArgs should contain %q, got %v", want, args) - } - // Also verify StrictHostKeyChecking=no in this mode. - foundStrict := false - for _, a := range args { if a == "StrictHostKeyChecking=no" { - foundStrict = true + t.Error("should never use StrictHostKeyChecking=no") + } + if strings.Contains(a, os.DevNull) { + t.Errorf("should never use DevNull for known hosts, got %q", a) } } - if !foundStrict { - t.Errorf("expected StrictHostKeyChecking=no, got %v", args) + if !foundAcceptNew { + t.Errorf("expected StrictHostKeyChecking=accept-new, got %v", args) } }) - t.Run("accept-new with KnownHostsFile", func(t *testing.T) { + t.Run("accept-new with KnownHostsPath", func(t *testing.T) { khFile := "/tmp/pixels-test-known-hosts" - args := Args(ConnConfig{Host: "10.0.0.1", User: "pixel", KnownHostsFile: khFile}) + args := Args(ConnConfig{Host: "10.0.0.1", User: "pixel", KnownHostsPath: khFile}) // Should use accept-new instead of no. foundAcceptNew := false @@ -65,7 +59,7 @@ func TestSSHArgs(t *testing.T) { foundAcceptNew = true } if a == "StrictHostKeyChecking=no" { - t.Error("should not use StrictHostKeyChecking=no when KnownHostsFile is set") + t.Error("should not use StrictHostKeyChecking=no when KnownHostsPath is set") } } if !foundAcceptNew { @@ -170,8 +164,10 @@ func TestRemoveKnownHost(t *testing.T) { t.Run("removes entry from existing file", func(t *testing.T) { dir := t.TempDir() khFile := dir + "/known_hosts" - content := "10.0.0.1 ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAITestKey\n10.0.0.2 ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOtherKey\n" - if err := os.WriteFile(khFile, []byte(content), 0o600); err != nil { + // Use valid ssh-ed25519 key data (32 bytes base64-encoded with key type prefix). + key1 := "10.0.0.1 ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBVlGh5YxGBMp/DO3OjAHsMR0DVQS2DJnpOqaGP2MkNl\n" + key2 := "10.0.0.2 ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKxvLhGmlN1sdag3FISwEVfAGwC+v3+x0v6qIFNyGmNd\n" + if err := os.WriteFile(khFile, []byte(key1+key2), 0o600); err != nil { t.Fatal(err) } diff --git a/sandbox/truenas/backend.go b/sandbox/truenas/backend.go index 7aec122..62b8fa8 100644 --- a/sandbox/truenas/backend.go +++ b/sandbox/truenas/backend.go @@ -93,8 +93,7 @@ func (t *TrueNAS) Create(ctx context.Context, opts sandbox.CreateOpts) (*sandbox if needsProvision { if err := t.client.Provision(ctx, full, provOpts); err != nil { - // Non-fatal: continue without provisioning. - _ = err + t.warnf("provision %s: %v", name, err) } else if pubKey != "" { // Restart so rc.local runs on boot. _ = t.client.Virt.StopInstance(ctx, full, tnapi.StopVirtInstanceOpts{Timeout: 30}) @@ -125,7 +124,9 @@ func (t *TrueNAS) Create(ctx context.Context, opts sandbox.CreateOpts) (*sandbox // Remove stale known_hosts entries — the container was just created. ssh.RemoveKnownHost(t.cfg.knownHosts, ip) ssh.RemoveKnownHost(t.cfg.knownHosts, full) - _ = t.ssh.WaitReady(ctx, full, 90*time.Second, nil) + if err := t.ssh.WaitReady(ctx, full, 90*time.Second, nil); err != nil { + t.warnf("ssh wait %s: %v", name, err) + } } return &sandbox.Instance{ @@ -182,7 +183,9 @@ func (t *TrueNAS) Start(ctx context.Context, name string) error { // Remove stale known_hosts entries — host key may differ after restart. ssh.RemoveKnownHost(t.cfg.knownHosts, ip) ssh.RemoveKnownHost(t.cfg.knownHosts, full) - _ = t.ssh.WaitReady(ctx, full, 30*time.Second, nil) + if err := t.ssh.WaitReady(ctx, full, 30*time.Second, nil); err != nil { + t.warnf("ssh wait %s: %v", name, err) + } } return nil } @@ -300,7 +303,9 @@ func (t *TrueNAS) RestoreSnapshot(ctx context.Context, name, label string) error // Remove stale known_hosts entries — snapshot restore changes the host key. ssh.RemoveKnownHost(t.cfg.knownHosts, ip) ssh.RemoveKnownHost(t.cfg.knownHosts, full) - _ = t.ssh.WaitReady(ctx, full, 30*time.Second, nil) + if err := t.ssh.WaitReady(ctx, full, 30*time.Second, nil); err != nil { + t.warnf("ssh wait %s: %v", name, err) + } } return nil } diff --git a/sandbox/truenas/exec.go b/sandbox/truenas/exec.go index e4be928..0502bae 100644 --- a/sandbox/truenas/exec.go +++ b/sandbox/truenas/exec.go @@ -15,7 +15,7 @@ import ( // custom Stdin/Stdout/Stderr, it builds a custom exec.Cmd using ssh.Args(). // Otherwise it delegates to ssh.Exec. func (t *TrueNAS) Run(ctx context.Context, name string, opts sandbox.ExecOpts) (int, error) { - if err := t.ensureRunning(ctx, name); err != nil { + if _, err := t.ensureRunning(ctx, name); err != nil { return 1, err } @@ -24,13 +24,8 @@ func (t *TrueNAS) Run(ctx context.Context, name string, opts sandbox.ExecOpts) ( user = "root" } - cc := ssh.ConnConfig{ - Host: prefixed(name), - User: user, - KeyPath: t.cfg.sshKey, - Env: envToMap(opts.Env), - KnownHostsFile: t.cfg.knownHosts, - } + cc := ssh.NewConnConfig(prefixed(name), user, t.cfg.sshKey, t.cfg.knownHosts) + cc.Env = envToMap(opts.Env) hasCustomIO := opts.Stdin != nil || opts.Stdout != nil || opts.Stderr != nil if hasCustomIO { @@ -54,30 +49,20 @@ func (t *TrueNAS) Run(ctx context.Context, name string, opts sandbox.ExecOpts) ( // Output executes a command and returns its combined stdout. func (t *TrueNAS) Output(ctx context.Context, name string, cmd []string) ([]byte, error) { - if err := t.ensureRunning(ctx, name); err != nil { + if _, err := t.ensureRunning(ctx, name); err != nil { return nil, err } - cc := ssh.ConnConfig{ - Host: prefixed(name), - User: t.cfg.sshUser, - KeyPath: t.cfg.sshKey, - KnownHostsFile: t.cfg.knownHosts, - } + cc := ssh.NewConnConfig(prefixed(name), t.cfg.sshUser, t.cfg.sshKey, t.cfg.knownHosts) return t.ssh.OutputQuiet(ctx, cc, cmd) } // Console attaches an interactive console session. func (t *TrueNAS) Console(ctx context.Context, name string, opts sandbox.ConsoleOpts) error { - if err := t.ensureRunning(ctx, name); err != nil { + if _, err := t.ensureRunning(ctx, name); err != nil { return err } - cc := ssh.ConnConfig{ - Host: prefixed(name), - User: t.cfg.sshUser, - KeyPath: t.cfg.sshKey, - Env: envToMap(opts.Env), - KnownHostsFile: t.cfg.knownHosts, - } + cc := ssh.NewConnConfig(prefixed(name), t.cfg.sshUser, t.cfg.sshKey, t.cfg.knownHosts) + cc.Env = envToMap(opts.Env) remoteCmd := strings.Join(opts.RemoteCmd, " ") return ssh.Console(cc, remoteCmd) } @@ -85,7 +70,7 @@ func (t *TrueNAS) Console(ctx context.Context, name string, opts sandbox.Console // Ready waits until the instance is reachable via SSH. If key auth fails, // it pushes the current machine's SSH public key via the TrueNAS file API. func (t *TrueNAS) Ready(ctx context.Context, name string, timeout time.Duration) error { - if err := t.ensureRunning(ctx, name); err != nil { + if _, err := t.ensureRunning(ctx, name); err != nil { return err } host := prefixed(name) @@ -94,12 +79,7 @@ func (t *TrueNAS) Ready(ctx context.Context, name string, timeout time.Duration) } // Test key auth and push the key if it fails. - cc := ssh.ConnConfig{ - Host: host, - User: t.cfg.sshUser, - KeyPath: t.cfg.sshKey, - KnownHostsFile: t.cfg.knownHosts, - } + cc := ssh.NewConnConfig(host, t.cfg.sshUser, t.cfg.sshKey, t.cfg.knownHosts) if err := ssh.TestAuth(ctx, cc); err != nil { pubKey := readSSHPubKey(t.cfg.sshKey) if pubKey == "" { diff --git a/sandbox/truenas/network.go b/sandbox/truenas/network.go index 57459bf..21dbc91 100644 --- a/sandbox/truenas/network.go +++ b/sandbox/truenas/network.go @@ -19,11 +19,11 @@ import ( // script, safe-apt wrapper, restricted sudoers via the TrueNAS API, then // SSHes in to install nftables and resolve domains. func (t *TrueNAS) SetEgressMode(ctx context.Context, name string, mode sandbox.EgressMode) error { - if err := t.ensureRunning(ctx, name); err != nil { + if _, err := t.ensureRunning(ctx, name); err != nil { return err } full := prefixed(name) - cc := ssh.ConnConfig{Host: full, User: "root", KeyPath: t.cfg.sshKey, KnownHostsFile: t.cfg.knownHosts} + cc := ssh.NewConnConfig(full, "root", t.cfg.sshKey, t.cfg.knownHosts) switch mode { case sandbox.EgressUnrestricted: @@ -105,11 +105,11 @@ func (t *TrueNAS) SetEgressMode(ctx context.Context, name string, mode sandbox.E // AllowDomain adds a domain to the egress allowlist and re-resolves. func (t *TrueNAS) AllowDomain(ctx context.Context, name, domain string) error { - if err := t.ensureRunning(ctx, name); err != nil { + if _, err := t.ensureRunning(ctx, name); err != nil { return err } full := prefixed(name) - cc := ssh.ConnConfig{Host: full, User: "root", KeyPath: t.cfg.sshKey, KnownHostsFile: t.cfg.knownHosts} + cc := ssh.NewConnConfig(full, "root", t.cfg.sshKey, t.cfg.knownHosts) // Ensure egress infrastructure exists. code, _ := t.ssh.ExecQuiet(ctx, cc, []string{"test -f /etc/pixels-egress-domains"}) @@ -150,11 +150,11 @@ func (t *TrueNAS) AllowDomain(ctx context.Context, name, domain string) error { // DenyDomain removes a domain from the egress allowlist and re-resolves. func (t *TrueNAS) DenyDomain(ctx context.Context, name, domain string) error { - if err := t.ensureRunning(ctx, name); err != nil { + if _, err := t.ensureRunning(ctx, name); err != nil { return err } full := prefixed(name) - cc := ssh.ConnConfig{Host: full, User: "root", KeyPath: t.cfg.sshKey, KnownHostsFile: t.cfg.knownHosts} + cc := ssh.NewConnConfig(full, "root", t.cfg.sshKey, t.cfg.knownHosts) out, err := t.ssh.OutputQuiet(ctx, cc, []string{"cat /etc/pixels-egress-domains"}) if err != nil { @@ -187,10 +187,10 @@ func (t *TrueNAS) DenyDomain(ctx context.Context, name, domain string) error { // GetPolicy returns the current egress policy for an instance. func (t *TrueNAS) GetPolicy(ctx context.Context, name string) (*sandbox.Policy, error) { - if err := t.ensureRunning(ctx, name); err != nil { + if _, err := t.ensureRunning(ctx, name); err != nil { return nil, err } - cc := ssh.ConnConfig{Host: prefixed(name), User: "root", KeyPath: t.cfg.sshKey, KnownHostsFile: t.cfg.knownHosts} + cc := ssh.NewConnConfig(prefixed(name), "root", t.cfg.sshKey, t.cfg.knownHosts) code, _ := t.ssh.ExecQuiet(ctx, cc, []string{"test -f /etc/pixels-egress-domains"}) if code != 0 { diff --git a/sandbox/truenas/resolve.go b/sandbox/truenas/resolve.go index d3a076c..6ffd771 100644 --- a/sandbox/truenas/resolve.go +++ b/sandbox/truenas/resolve.go @@ -20,25 +20,26 @@ func unprefixed(name string) string { return strings.TrimPrefix(name, containerPrefix) } -// ensureRunning verifies the container is running and has a network address. -func (t *TrueNAS) ensureRunning(ctx context.Context, name string) error { +// ensureRunning verifies the container is running and has a network address, +// returning the instance for callers that need its metadata (e.g. IP). +func (t *TrueNAS) ensureRunning(ctx context.Context, name string) (*tnapi.VirtInstance, error) { full := prefixed(name) instance, err := t.client.Virt.GetInstance(ctx, full) if err != nil { - return fmt.Errorf("looking up %s: %w", name, err) + return nil, fmt.Errorf("looking up %s: %w", name, err) } if instance == nil { - return fmt.Errorf("instance %q not found", name) + return nil, fmt.Errorf("instance %q not found", name) } if instance.Status != "RUNNING" { - return fmt.Errorf("instance %q is %s — start it first", name, instance.Status) + return nil, fmt.Errorf("instance %q is %s — start it first", name, instance.Status) } if ipFromAliases(instance.Aliases) == "" { - return fmt.Errorf("no IP address for %s", name) + return nil, fmt.Errorf("no IP address for %s", name) } - return nil + return instance, nil } // ipFromAliases extracts the first IPv4 address from a VirtInstance's aliases. diff --git a/sandbox/truenas/resolve_test.go b/sandbox/truenas/resolve_test.go index 44c176a..5b270ed 100644 --- a/sandbox/truenas/resolve_test.go +++ b/sandbox/truenas/resolve_test.go @@ -70,7 +70,7 @@ func TestEnsureRunning(t *testing.T) { }, } - err := tn.ensureRunning(context.Background(), "test") + inst, err := tn.ensureRunning(context.Background(), "test") if tt.wantErr != "" { if err == nil { t.Fatal("expected error, got nil") @@ -83,6 +83,9 @@ func TestEnsureRunning(t *testing.T) { if err != nil { t.Fatalf("unexpected error: %v", err) } + if inst == nil { + t.Fatal("expected non-nil instance") + } }) } } diff --git a/sandbox/truenas/truenas.go b/sandbox/truenas/truenas.go index c5d701f..60905d7 100644 --- a/sandbox/truenas/truenas.go +++ b/sandbox/truenas/truenas.go @@ -4,6 +4,9 @@ package truenas import ( "context" + "fmt" + "io" + "os" "github.com/deevus/pixels/sandbox" ) @@ -23,6 +26,13 @@ type TrueNAS struct { client *Client cfg *tnConfig ssh sshRunner + warn io.Writer +} + +func (t *TrueNAS) warnf(format string, a ...any) { + if t.warn != nil { + fmt.Fprintf(t.warn, "pixels: "+format+"\n", a...) + } } // New creates a TrueNAS sandbox backend from a flat config map. @@ -41,6 +51,7 @@ func New(cfg map[string]string) (*TrueNAS, error) { client: client, cfg: c, ssh: realSSH{}, + warn: os.Stderr, }, nil } @@ -54,6 +65,7 @@ func NewForTest(client *Client, ssh sshRunner, cfg map[string]string) (*TrueNAS, client: client, cfg: c, ssh: ssh, + warn: os.Stderr, }, nil } From 6529884648823823862726ad5ba65f205094a488 Mon Sep 17 00:00:00 2001 From: Simon Hartcher Date: Thu, 5 Mar 2026 12:11:06 +1100 Subject: [PATCH 4/6] refactor: simplify RemoveKnownHost to prefix match Replace golang.org/x/crypto/ssh ParseKnownHosts loop with a simple bytes.HasPrefix filter. We control the known_hosts file format, so full crypto parsing is unnecessary. Removes the x/crypto dependency. Co-Authored-By: Claude Opus 4.6 --- internal/ssh/ssh.go | 37 ++++--------------------------------- 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/internal/ssh/ssh.go b/internal/ssh/ssh.go index 9903570..be68da3 100644 --- a/internal/ssh/ssh.go +++ b/internal/ssh/ssh.go @@ -13,8 +13,6 @@ import ( "sort" "strings" "time" - - gossh "golang.org/x/crypto/ssh" ) // ConnConfig holds the parameters for an SSH connection. @@ -186,38 +184,11 @@ func RemoveKnownHost(knownHostsPath, host string) error { return fmt.Errorf("reading known_hosts: %w", err) } + prefix := []byte(host + " ") var kept []byte - rest := data - for len(rest) > 0 { - // Find the next line boundary to preserve the raw line (including newline). - end := bytes.IndexByte(rest, '\n') - var rawLine []byte - if end == -1 { - rawLine = rest - rest = nil - } else { - rawLine = rest[:end+1] - rest = rest[end+1:] - } - - // Try to parse the line as a known_hosts entry. - _, hosts, _, _, _, parseErr := gossh.ParseKnownHosts(rawLine) - if parseErr != nil { - // Not a valid entry (comment, blank line) — keep it. - kept = append(kept, rawLine...) - continue - } - - // Check if this entry matches the host to remove. - match := false - for _, h := range hosts { - if h == host { - match = true - break - } - } - if !match { - kept = append(kept, rawLine...) + for _, line := range bytes.SplitAfter(data, []byte("\n")) { + if !bytes.HasPrefix(line, prefix) { + kept = append(kept, line...) } } From 380497ffa8f376c0a6fef4bccba88f19a29e0a22 Mon Sep 17 00:00:00 2001 From: Simon Hartcher Date: Thu, 5 Mar 2026 12:13:18 +1100 Subject: [PATCH 5/6] refactor: use config.KnownHostsPath() instead of duplicate helper Co-Authored-By: Claude Opus 4.6 --- internal/ssh/ssh.go | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/internal/ssh/ssh.go b/internal/ssh/ssh.go index be68da3..6fd59a9 100644 --- a/internal/ssh/ssh.go +++ b/internal/ssh/ssh.go @@ -9,10 +9,11 @@ import ( "net" "os" "os/exec" - "path/filepath" "sort" "strings" "time" + + "github.com/deevus/pixels/internal/config" ) // ConnConfig holds the parameters for an SSH connection. @@ -131,7 +132,7 @@ func TestAuth(ctx context.Context, cc ConnConfig) error { func Args(cc ConnConfig) []string { knownHosts := cc.KnownHostsPath if knownHosts == "" { - knownHosts = defaultKnownHostsPath() + knownHosts = config.KnownHostsPath() } args := []string{ "-o", "StrictHostKeyChecking=accept-new", @@ -195,16 +196,6 @@ func RemoveKnownHost(knownHostsPath, host string) error { return os.WriteFile(knownHostsPath, kept, 0o600) } -// defaultKnownHostsPath returns the default known_hosts path in the pixels -// config directory. This ensures Args() is always secure even if -// KnownHostsPath is not explicitly set on ConnConfig. -func defaultKnownHostsPath() string { - if dir := os.Getenv("XDG_CONFIG_HOME"); dir != "" { - return filepath.Join(dir, "pixels", "known_hosts") - } - dir, _ := os.UserConfigDir() - return filepath.Join(dir, "pixels", "known_hosts") -} // consoleArgs builds SSH arguments for an interactive console session. // When remoteCmd is non-empty, -t is inserted to force PTY allocation From 37253f7c1a947b6795893f6a5c832405bdd00a45 Mon Sep 17 00:00:00 2001 From: Simon Hartcher Date: Thu, 5 Mar 2026 12:18:01 +1100 Subject: [PATCH 6/6] refactor: extract clearAndRefreshHostKey and clearKnownHosts helpers DRY up the repeated remove-known-hosts-then-wait-ssh pattern across Create, Start, RestoreSnapshot, and Delete in backend.go. Co-Authored-By: Claude Opus 4.6 --- sandbox/truenas/backend.go | 43 +++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/sandbox/truenas/backend.go b/sandbox/truenas/backend.go index 62b8fa8..8007e62 100644 --- a/sandbox/truenas/backend.go +++ b/sandbox/truenas/backend.go @@ -16,6 +16,23 @@ import ( "github.com/deevus/pixels/sandbox" ) +// clearAndRefreshHostKey removes stale known_hosts entries for both IP and hostname, +// then waits for SSH readiness. +func (t *TrueNAS) clearAndRefreshHostKey(ctx context.Context, name, ip, hostname string, timeout time.Duration) { + t.clearKnownHosts(ip, hostname) + if err := t.ssh.WaitReady(ctx, hostname, timeout, nil); err != nil { + t.warnf("ssh wait %s: %v", name, err) + } +} + +// clearKnownHosts removes known_hosts entries for both IP and hostname. +func (t *TrueNAS) clearKnownHosts(ip, hostname string) { + if ip != "" { + ssh.RemoveKnownHost(t.cfg.knownHosts, ip) + } + ssh.RemoveKnownHost(t.cfg.knownHosts, hostname) +} + // Create creates a new container instance with the full provisioning flow: // NIC resolution, instance creation, provisioning, restart, IP poll, SSH wait. // When opts.Bare is true, only the instance is created (no provisioning or SSH wait). @@ -121,12 +138,7 @@ func (t *TrueNAS) Create(ctx context.Context, opts sandbox.CreateOpts) (*sandbox // Wait for SSH readiness. if ip != "" { - // Remove stale known_hosts entries — the container was just created. - ssh.RemoveKnownHost(t.cfg.knownHosts, ip) - ssh.RemoveKnownHost(t.cfg.knownHosts, full) - if err := t.ssh.WaitReady(ctx, full, 90*time.Second, nil); err != nil { - t.warnf("ssh wait %s: %v", name, err) - } + t.clearAndRefreshHostKey(ctx, name, ip, full, 90*time.Second) } return &sandbox.Instance{ @@ -180,12 +192,7 @@ func (t *TrueNAS) Start(ctx context.Context, name string) error { ip := ipFromAliases(inst.Aliases) if ip != "" { - // Remove stale known_hosts entries — host key may differ after restart. - ssh.RemoveKnownHost(t.cfg.knownHosts, ip) - ssh.RemoveKnownHost(t.cfg.knownHosts, full) - if err := t.ssh.WaitReady(ctx, full, 30*time.Second, nil); err != nil { - t.warnf("ssh wait %s: %v", name, err) - } + t.clearAndRefreshHostKey(ctx, name, ip, full, 30*time.Second) } return nil } @@ -222,10 +229,7 @@ func (t *TrueNAS) Delete(ctx context.Context, name string) error { } // Clean up known_hosts entries for the now-dead container. - if ip != "" { - ssh.RemoveKnownHost(t.cfg.knownHosts, ip) - } - ssh.RemoveKnownHost(t.cfg.knownHosts, full) + t.clearKnownHosts(ip, full) return nil } @@ -300,12 +304,7 @@ func (t *TrueNAS) RestoreSnapshot(ctx context.Context, name, label string) error ip := ipFromAliases(inst.Aliases) if ip != "" { - // Remove stale known_hosts entries — snapshot restore changes the host key. - ssh.RemoveKnownHost(t.cfg.knownHosts, ip) - ssh.RemoveKnownHost(t.cfg.knownHosts, full) - if err := t.ssh.WaitReady(ctx, full, 30*time.Second, nil); err != nil { - t.warnf("ssh wait %s: %v", name, err) - } + t.clearAndRefreshHostKey(ctx, name, ip, full, 30*time.Second) } return nil }