-
Notifications
You must be signed in to change notification settings - Fork 14
feat(worker): add ExternalWorker retry with exponential backoff #84
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b1f5935
d33ea18
f58d8de
3f02f76
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -143,3 +143,133 @@ func TestExternalWorker_IsHealthy_AfterListenerClose(t *testing.T) { | |
|
|
||
| // Verify ExternalWorker satisfies workerHandle interface at compile time. | ||
| var _ workerHandle = (*ExternalWorker)(nil) | ||
|
|
||
| // --- New tests for ExternalWorkerOptions and retry logic --- | ||
|
|
||
| func TestExternalWorkerWithOptions_Defaults(t *testing.T) { | ||
| w := NewExternalWorkerWithOptions(ExternalWorkerOptions{ | ||
| SocketPath: "/tmp/test.sock", | ||
| }) | ||
| if w.connectTimeout != defaultConnectTimeout { | ||
| t.Errorf("expected default connect timeout %v, got %v", defaultConnectTimeout, w.connectTimeout) | ||
| } | ||
| if w.maxRetries != defaultMaxRetries { | ||
| t.Errorf("expected default max retries %d, got %d", defaultMaxRetries, w.maxRetries) | ||
| } | ||
| if w.retryInterval != defaultRetryInterval { | ||
| t.Errorf("expected default retry interval %v, got %v", defaultRetryInterval, w.retryInterval) | ||
| } | ||
| } | ||
|
|
||
| func TestExternalWorkerWithOptions_CustomValues(t *testing.T) { | ||
| w := NewExternalWorkerWithOptions(ExternalWorkerOptions{ | ||
| SocketPath: "/tmp/custom.sock", | ||
| ConnectTimeout: 2 * time.Second, | ||
| MaxRetries: 3, | ||
| RetryInterval: 100 * time.Millisecond, | ||
| }) | ||
| if w.socketPath != "/tmp/custom.sock" { | ||
| t.Errorf("expected socketPath /tmp/custom.sock, got %q", w.socketPath) | ||
| } | ||
| if w.connectTimeout != 2*time.Second { | ||
| t.Errorf("expected connect timeout 2s, got %v", w.connectTimeout) | ||
| } | ||
| if w.maxRetries != 3 { | ||
| t.Errorf("expected max retries 3, got %d", w.maxRetries) | ||
| } | ||
| if w.retryInterval != 100*time.Millisecond { | ||
| t.Errorf("expected retry interval 100ms, got %v", w.retryInterval) | ||
| } | ||
| } | ||
|
|
||
| func TestNewExternalWorker_BackwardCompat(t *testing.T) { | ||
| w := NewExternalWorker("/tmp/compat.sock", 3*time.Second) | ||
| if w.maxRetries != 1 { | ||
|
||
| t.Errorf("expected maxRetries=1 for backward compat, got %d", w.maxRetries) | ||
| } | ||
| if w.connectTimeout != 3*time.Second { | ||
| t.Errorf("expected connectTimeout 3s, got %v", w.connectTimeout) | ||
| } | ||
| } | ||
|
|
||
| func TestExternalWorker_Start_RetrySuccess(t *testing.T) { | ||
| f, err := os.CreateTemp("/tmp", "pyproc-retry-*.sock") | ||
| if err != nil { | ||
| t.Fatal(err) | ||
| } | ||
| sockPath := f.Name() | ||
| _ = f.Close() | ||
| _ = os.Remove(sockPath) | ||
|
Comment on lines
+196
to
+202
|
||
| t.Cleanup(func() { _ = os.Remove(sockPath) }) | ||
|
|
||
| // Start listener after a short delay to simulate slow sidecar startup | ||
| lnCh := make(chan net.Listener, 1) | ||
| go func() { | ||
| time.Sleep(300 * time.Millisecond) | ||
| ln, listenErr := net.Listen("unix", sockPath) | ||
| if listenErr != nil { | ||
| return | ||
| } | ||
| lnCh <- ln | ||
| for { | ||
| conn, acceptErr := ln.Accept() | ||
| if acceptErr != nil { | ||
| return | ||
| } | ||
| _ = conn.Close() | ||
| } | ||
| }() | ||
| t.Cleanup(func() { | ||
| select { | ||
| case ln := <-lnCh: | ||
| _ = ln.Close() | ||
| default: | ||
| } | ||
| }) | ||
|
|
||
| w := NewExternalWorkerWithOptions(ExternalWorkerOptions{ | ||
| SocketPath: sockPath, | ||
| ConnectTimeout: 100 * time.Millisecond, | ||
| MaxRetries: 5, | ||
| RetryInterval: 100 * time.Millisecond, | ||
| }) | ||
| err = w.Start(context.Background()) | ||
| if err != nil { | ||
| t.Fatalf("expected Start with retry to succeed, got: %v", err) | ||
| } | ||
| if ExternalWorkerState(w.state.Load()) != ExternalWorkerRunning { | ||
| t.Error("expected Running state") | ||
| } | ||
| } | ||
|
|
||
| func TestExternalWorker_Start_AllRetriesFail(t *testing.T) { | ||
| w := NewExternalWorkerWithOptions(ExternalWorkerOptions{ | ||
| SocketPath: "/tmp/pyproc-noexist-retry.sock", | ||
| ConnectTimeout: 50 * time.Millisecond, | ||
| MaxRetries: 3, | ||
| RetryInterval: 50 * time.Millisecond, | ||
| }) | ||
| err := w.Start(context.Background()) | ||
| if err == nil { | ||
| t.Fatal("expected Start to fail after all retries") | ||
| } | ||
| } | ||
|
|
||
| func TestExternalWorker_Start_ContextCancelled(t *testing.T) { | ||
| ctx, cancel := context.WithCancel(context.Background()) | ||
| go func() { | ||
| time.Sleep(100 * time.Millisecond) | ||
| cancel() | ||
| }() | ||
|
|
||
| w := NewExternalWorkerWithOptions(ExternalWorkerOptions{ | ||
| SocketPath: "/tmp/pyproc-cancel-retry.sock", | ||
| ConnectTimeout: 50 * time.Millisecond, | ||
| MaxRetries: 10, | ||
| RetryInterval: 200 * time.Millisecond, | ||
| }) | ||
| err := w.Start(ctx) | ||
| if err == nil { | ||
| t.Fatal("expected Start to fail when context cancelled") | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This hardcoded
MaxRetries: 1keeps the legacy single-attempt behavior for everyNewExternalWorkercall, andnewExternalPoolstill constructs workers viaNewExternalWorker(pkg/pyproc/pool.go), so the sidecar startup path inExternalModedoes not actually get the new retry/backoff behavior and will still fail immediately when the Python socket appears shortly after pool startup.Useful? React with 👍 / 👎.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Intentional design choice. This PR adds the retry mechanism to ExternalWorker as a building block. The backward-compatible NewExternalWorker (MaxRetries=1) preserves existing behavior for all current callers including pool.go.
Integrating retry into the Pool's ExternalMode path requires adding retry configuration to PoolOptions and updating newExternalPool to use NewExternalWorkerWithOptions. That is planned as a follow-up PR to keep changes atomic and reviewable (one responsibility per PR).
Users who need retry today can construct workers directly with NewExternalWorkerWithOptions.