diff --git a/.jules/sentinel.md b/.jules/sentinel.md index 3f6e490..d236974 100644 --- a/.jules/sentinel.md +++ b/.jules/sentinel.md @@ -31,3 +31,8 @@ Action: Wrap `net.Listen` with `syscall.Umask(0117)` to strictly enforce `0660` Risk: The daemon's internal socket listener (`handleSocketConnection`) read unlimited data from incoming connections before unmarshalling JSON. A local authenticated attacker (e.g., a compromised `ccrunner` user) could cause a Denial of Service (DoS) by sending a massive payload (e.g., gigabytes of data), forcing the daemon to allocate excessive memory and potentially triggering an Out-Of-Memory (OOM) crash. Learning: Never trust the size of incoming data, even from "trusted" local users. `json.Decoder` reads from the stream until it finds a valid object or error, but it buffers data. Without an `io.LimitReader`, a decoder can be coerced into reading indefinitely. Action: Implemented a strict 1MB read limit (`io.LimitReader`) on the socket connection before passing it to the JSON decoder. This is sufficient for legitimate execution reports (stdout/stderr are capped at 256KB each) but prevents memory exhaustion attacks. + +## 2026-01-27 - Unbounded Socket Connection DoS +Risk: The daemon accepted an unlimited number of concurrent socket connections. A malicious local user could flood the daemon with connection requests, exhausting file descriptors or creating thousands of goroutines, leading to a crash or system instability. +Learning: `net.Listener.Accept()` loops do not inherently limit concurrency. Spawning a new goroutine for every connection without a semaphore or worker pool allows clients to dictate the server's resource usage. +Action: Implemented a semaphore-based limit (`maxConcurrentConnections = 50`) in `startSocketListener` to strictly bound the number of active request handlers. diff --git a/cmd/daemon.go b/cmd/daemon.go index 0ca04a0..b9076b1 100644 --- a/cmd/daemon.go +++ b/cmd/daemon.go @@ -3,6 +3,7 @@ package cmd import ( "bytes" "encoding/json" + "errors" "fmt" "io" "log" @@ -42,6 +43,9 @@ var ( // socketReadTimeout prevents Slowloris-style DoS attacks on the unix socket. // It is a variable to allow overriding in tests. socketReadTimeout = 5 * time.Second + // maxConcurrentConnections limits the number of active socket handlers. + // Variable for testing overrides. + maxConcurrentConnections = 50 ) var daemonCmd = &cobra.Command{ @@ -522,14 +526,26 @@ func (d *daemon) startSocketListener() { d.connMu.Unlock() } + // Semaphore to limit concurrent connections + sem := make(chan struct{}, maxConcurrentConnections) + for { conn, err := listener.Accept() if err != nil { + if errors.Is(err, net.ErrClosed) { + return + } log.Printf("Socket accept error: %v", err) continue } - go d.handleSocketConnection(conn) + // Acquire semaphore (blocks if limit reached) + sem <- struct{}{} + + go func(c net.Conn) { + defer func() { <-sem }() // Release semaphore + d.handleSocketConnection(c) + }(conn) } } diff --git a/cmd/security_concurrency_test.go b/cmd/security_concurrency_test.go new file mode 100644 index 0000000..243d727 --- /dev/null +++ b/cmd/security_concurrency_test.go @@ -0,0 +1,172 @@ +package cmd + +import ( + "net" + "os" + "path/filepath" + "runtime" + "sync" + "testing" + "time" +) + +// TestConcurrentConnectionLimit verifies that the daemon limits the number of +// concurrent socket connections to prevent DoS (resource exhaustion). +func TestConcurrentConnectionLimit(t *testing.T) { + // Setup temp directory for socket + tmpDir := t.TempDir() + socketName := filepath.Join(tmpDir, "cc-agent-test.sock") + + // Override globals + originalSocketPath := socketPath + originalMaxConns := maxConcurrentConnections + socketPath = socketName + maxConcurrentConnections = 2 // Strict limit for this test + defer func() { + socketPath = originalSocketPath + maxConcurrentConnections = originalMaxConns + }() + + // Initialize daemon + d := &daemon{ + // No need for real backend connection + } + + // Start listener + ready := make(chan struct{}) + go func() { + close(ready) + d.startSocketListener() + }() + <-ready + + // Wait for socket file to appear + socketReady := false + for i := 0; i < 20; i++ { + if _, err := os.Stat(socketName); err == nil { + socketReady = true + break + } + time.Sleep(10 * time.Millisecond) + } + if !socketReady { + t.Fatal("Socket file not created in time") + } + + // Ensure cleanup + defer func() { + if d.shutdown != nil { + d.shutdown() + } + }() + + // Helper to count goroutines + getGoroutines := func() int { + time.Sleep(50 * time.Millisecond) // Allow scheduler to settle + return runtime.NumGoroutine() + } + + initialGo := getGoroutines() + t.Logf("Initial goroutines: %d", initialGo) + + var wg sync.WaitGroup + // Channel to signal when we are done with connections + doneCh := make(chan struct{}) + defer close(doneCh) + + // Helper to connect and hold + connectAndHold := func(id string) net.Conn { + conn, err := net.Dial("unix", socketName) + if err != nil { + t.Logf("[%s] Dial failed: %v", id, err) + return nil + } + + wg.Add(1) + go func() { + defer wg.Done() + // Send partial JSON to trigger handler but block on Decode + conn.Write([]byte(`{"jobId": "` + id + `"`)) + + // Wait until test is done + <-doneCh + conn.Close() + }() + return conn + } + + // 1. Fill the slots (2 connections) + t.Log("Connecting Client 1...") + c1 := connectAndHold("1") + if c1 == nil { t.Fatal("Failed to connect client 1") } + + t.Log("Connecting Client 2...") + c2 := connectAndHold("2") + if c2 == nil { t.Fatal("Failed to connect client 2") } + + // Allow handlers to start + time.Sleep(100 * time.Millisecond) + + currentGo := getGoroutines() + t.Logf("Goroutines after 2 conns: %d (Delta: %d)", currentGo, currentGo-initialGo) + + // We expect roughly 2 new goroutines (handlers) + if currentGo < initialGo+2 { + t.Logf("Warning: Expected goroutine count to increase by at least 2") + } + + // 2. Try 3rd connection + // This should be Accepted by OS but blocked by application semaphore + t.Log("Connecting Client 3 (should be queued)...") + + // We do this in a goroutine because if the backlog is full, Dial might block? + // But backlog is usually > 0. + conn3, err := net.Dial("unix", socketName) + if err != nil { + t.Fatalf("Failed to dial 3rd connection: %v", err) + } + + // Write data to ensure it would be processed if accepted + conn3.Write([]byte(`{"jobId": "3"`)) + + // Wait a bit + time.Sleep(100 * time.Millisecond) + + go3 := getGoroutines() + t.Logf("Goroutines after 3 conns: %d (Delta from 2 conns: %d)", go3, go3-currentGo) + + // If properly limited, go3 should be equal to currentGo (or very close, NO new handler goroutine). + // If unbounded, go3 should be roughly currentGo + 1. + + if go3 > currentGo { + // This assertion checks if the limit is working. + // If go3 > currentGo, it means a new handler was spawned -> Unbounded. + // We expect this to be false AFTER the fix. + t.Errorf("Concurrency limit exceeded! New goroutine spawned for 3rd connection.") + } else { + t.Log("Success: No new goroutine spawned for 3rd connection.") + } + + // 3. Close Client 1 to free a slot + t.Log("Closing Client 1...") + // We need to signal the specific client to close. + // But our helper waits on doneCh. + // We'll just close the connection directly from here? + // The helper writes to it? No, helper reads? Helper waits. + // Let's just close c1. + c1.Close() // This breaks the Read/Write in handler? + // The handler reads. Closing c1 from client side causes EOF in handler. + // Handler exits. Release semaphore. + + // Wait for handler to exit and new one to start + time.Sleep(200 * time.Millisecond) + + goFinal := getGoroutines() + t.Logf("Goroutines after closing 1: %d", goFinal) + + // We expect goFinal to be roughly equal to currentGo + // (one died, one picked up). + + // Clean up 3rd conn + conn3.Close() +}