Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ replace (
require (
cuelang.org/go v0.15.1
github.com/Masterminds/semver/v3 v3.4.0
github.com/cenkalti/backoff/v5 v5.0.3
github.com/dlclark/regexp2 v1.11.0
github.com/docker/cli v27.5.1+incompatible
github.com/fluxcd/pkg/oci v0.43.1
Expand Down Expand Up @@ -100,7 +101,6 @@ require (
github.com/aws/smithy-go v1.24.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/blang/semver/v4 v4.0.0 // indirect
github.com/cenkalti/backoff/v5 v5.0.3 // indirect
Comment thread
Pranav-d33 marked this conversation as resolved.
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/chai2010/gettext-go v1.0.3 // indirect
github.com/clipperhouse/stringish v0.1.1 // indirect
Expand Down
94 changes: 94 additions & 0 deletions retry/error.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package retry

import (
"errors"

"github.com/cenkalti/backoff/v5"
meshkiterrors "github.com/meshery/meshkit/errors"
)

// ErrInvalidConfig is returned when retry configuration validation fails.
// Use errors.Is(err, ErrInvalidConfig) to distinguish config errors from
// operation failures.
var ErrInvalidConfig = errors.New("retry: invalid config")

const (
ErrRetryCode = "meshkit-10001"
ErrContextCode = "meshkit-10002"
ErrConfigCode = "meshkit-10003"
)

type retryError struct {
inner error
meshkit *meshkiterrors.Error
}

func (e *retryError) Error() string {
return e.meshkit.Error()
}

func (e *retryError) Unwrap() []error {
return []error{e.inner, e.meshkit}
}

func ErrRetry(err error) error {
return &retryError{
inner: err,
meshkit: meshkiterrors.New(ErrRetryCode, meshkiterrors.Alert,
[]string{"Retry operation failed"},
[]string{err.Error()},
[]string{"Operation did not succeed within retry limits"},
[]string{"Check the underlying operation and retry configuration"}),
}
}

func ErrContext(err error) error {
return &retryError{
inner: err,
meshkit: meshkiterrors.New(ErrContextCode, meshkiterrors.Alert,
[]string{"Context canceled or deadline exceeded"},
[]string{err.Error()},
[]string{"Operation timed out or context was canceled"},
[]string{"Check context timeout and ensure the operation completes in time"}),
}
}

func ErrConfig(err error) error {
return &retryError{
inner: err,
meshkit: meshkiterrors.New(ErrConfigCode, meshkiterrors.Alert,
[]string{"Invalid retry configuration"},
[]string{err.Error()},
[]string{"One or more config values are invalid"},
[]string{"Ensure all retry configuration values are correct"}),
}
}

// ErrorDecision controls retry behaviour for a single error.
type ErrorDecision int

const (
DecisionRetry ErrorDecision = iota
DecisionStop
)

// ErrorClassifier returns the retry decision for a given error.
// Return DecisionStop for errors that should not be retried (e.g. HTTP 4xx,
// validation failures, auth errors). Return DecisionRetry for transient
// errors (timeouts, 5xx, rate limits).
//
// Ignored when the operation explicitly returns Permanent(err).
type ErrorClassifier func(err error) ErrorDecision

// Permanent wraps err to signal no further retries should be attempted.
// Use for non-transient errors (HTTP 4xx, auth failures, validation errors).
// Do NOT use for context-cancellation; return ctx.Err() directly.
func Permanent(err error) error {
return backoff.Permanent(err)
}

// IsPermanent reports whether err is (or wraps) a PermanentError.
func IsPermanent(err error) bool {
var pErr *backoff.PermanentError
return errors.As(err, &pErr)
}
101 changes: 101 additions & 0 deletions retry/options.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
package retry
Comment thread
Pranav-d33 marked this conversation as resolved.

import (
"time"

"github.com/meshery/meshkit/logger"
)

const (
DefaultInitialInterval = 500 * time.Millisecond
DefaultMaxInterval = 30 * time.Second
DefaultMaxElapsedTime = 2 * time.Minute
DefaultMultiplier = 1.5
DefaultRandomizationFactor = 0.3 // Never set to 0 in production
)

type Config struct {
MaxAttempts uint
InitialInterval time.Duration
MaxInterval time.Duration
MaxElapsedTime time.Duration
Multiplier float64
RandomizationFactor float64
Notifier func(err error, wait time.Duration)
ErrorClassifier ErrorClassifier
}

func defaultConfig() Config {
return Config{
InitialInterval: DefaultInitialInterval,
MaxInterval: DefaultMaxInterval,
MaxElapsedTime: DefaultMaxElapsedTime,
Multiplier: DefaultMultiplier,
RandomizationFactor: DefaultRandomizationFactor,
}
}

type Option func(*Config)

// WithMaxAttempts sets a hard cap on total calls (includes first attempt).
func WithMaxAttempts(n uint) Option {
return func(c *Config) { c.MaxAttempts = n }
}

func WithInitialInterval(d time.Duration) Option {
return func(c *Config) { c.InitialInterval = d }
}

func WithMaxInterval(d time.Duration) Option {
return func(c *Config) { c.MaxInterval = d }
}

// WithMaxElapsedTime sets wall-clock deadline. Pass 0 to disable.
func WithMaxElapsedTime(d time.Duration) Option {
return func(c *Config) { c.MaxElapsedTime = d }
}

func WithMultiplier(m float64) Option {
return func(c *Config) { c.Multiplier = m }
}

// WithJitter overrides randomization factor (range: 0.0-1.0). Do not set to 0.0 in production.
func WithJitter(f float64) Option {
return func(c *Config) { c.RandomizationFactor = f }
}

// WithErrorClassifier provides a decision function for classifying errors as
// retryable (DecisionRetry) or terminal (DecisionStop). When set, every error
// returned by the operation (except those explicitly wrapped with Permanent)
// is passed to this function. If it returns DecisionStop, the error is treated
// as permanent and the retry loop stops immediately.
//
// Example:
//
// retry.Do(ctx, op,
// retry.WithErrorClassifier(func(err error) retry.ErrorDecision {
// var status *myHTTPError
// if errors.As(err, &status) {
// if status.Code >= 500 {
// return retry.DecisionRetry
// }
// return retry.DecisionStop
// }
// return retry.DecisionRetry
// }),
// )
func WithErrorClassifier(classifier ErrorClassifier) Option {
return func(c *Config) { c.ErrorClassifier = classifier }
}
Comment thread
Pranav-d33 marked this conversation as resolved.

func WithNotifier(n func(err error, wait time.Duration)) Option {
return func(c *Config) { c.Notifier = n }
}

// WithLogNotifier emits a Warn log entry on each retry via MeshKit's logger.Handler.
func WithLogNotifier(log logger.Handler) Option {
return WithNotifier(func(err error, wait time.Duration) {
log.Infof("retry: transient error; retrying in %s", wait.Round(time.Millisecond))
log.Warn(err)
})
}
101 changes: 101 additions & 0 deletions retry/retry.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
package retry
Comment thread
Pranav-d33 marked this conversation as resolved.

import (
"context"
"errors"
"fmt"
"math"

"github.com/cenkalti/backoff/v5"
)

type Operation func(ctx context.Context) error

// Do executes op with exponential backoff until success, permanent error,
// context cancellation, or budget exhaustion. Config via opts (default:
// 500ms initial, 1.5x growth, 30% jitter, 2min cap).
//
// When a ErrorClassifier is configured via WithErrorClassifier, every non-nil
// error from op (except those explicitly wrapped with Permanent) is passed to
// the classifier before the retry decision is made.
func Do(ctx context.Context, op Operation, opts ...Option) error {
Comment thread
Pranav-d33 marked this conversation as resolved.
if err := ctx.Err(); err != nil {
return ErrContext(err)
}
cfg := defaultConfig()
Comment thread
Pranav-d33 marked this conversation as resolved.
for _, o := range opts {
o(&cfg)
}

if err := validateConfig(cfg); err != nil {
return ErrConfig(err)
}

apply := op
if cfg.ErrorClassifier != nil {
apply = func(ctx context.Context) error {
err := op(ctx)
if err == nil {
return nil
}
var pErr *backoff.PermanentError
if errors.As(err, &pErr) {
return err
}
if cfg.ErrorClassifier(err) == DecisionStop {
return backoff.Permanent(err)
}
return err
}
}

retryOpts := []backoff.RetryOption{
backoff.WithBackOff(buildBackOff(cfg)),
backoff.WithMaxElapsedTime(cfg.MaxElapsedTime),
backoff.WithNotify(cfg.Notifier),
}
if cfg.MaxAttempts > 0 {
retryOpts = append(retryOpts, backoff.WithMaxTries(cfg.MaxAttempts))
}

_, err := backoff.Retry(ctx, func() (struct{}, error) {
return struct{}{}, apply(ctx)
}, retryOpts...)
if err != nil {
return ErrRetry(err)
}
return nil
}

func validateConfig(cfg Config) error {

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The config validation is a good addition, but the returned errors are currently string-only.

Right now callers can only distinguish invalid retry configuration from an operation failure by inspecting the error message. That is brittle. Since retry.Do can now fail before the operation even runs, callers should have a reliable way to detect that class of failure.

I’d suggest exposing a sentinel error, for example:

var ErrInvalidConfig = errors.New("retry: invalid config")

Then wrap it from validateConfig:

return fmt.Errorf("%w: InitialInterval must be > 0, got %v", ErrInvalidConfig, cfg.InitialInterval)

This lets consumers write:

if errors.Is(err, retry.ErrInvalidConfig) {
    // fail startup / reject bad config
}

if cfg.InitialInterval <= 0 {
return fmt.Errorf("%w: InitialInterval must be > 0, got %v", ErrInvalidConfig, cfg.InitialInterval)
}
if cfg.MaxInterval <= 0 {
return fmt.Errorf("%w: MaxInterval must be > 0, got %v", ErrInvalidConfig, cfg.MaxInterval)
}
if cfg.MaxInterval < cfg.InitialInterval {
return fmt.Errorf("%w: MaxInterval (%v) must be >= InitialInterval (%v)", ErrInvalidConfig, cfg.MaxInterval, cfg.InitialInterval)
}
if cfg.MaxElapsedTime < 0 {
return fmt.Errorf("%w: MaxElapsedTime must be >= 0, got %v", ErrInvalidConfig, cfg.MaxElapsedTime)
}
if math.IsNaN(cfg.Multiplier) || math.IsInf(cfg.Multiplier, 0) || cfg.Multiplier < 1 {
return fmt.Errorf("%w: Multiplier must be finite and >= 1, got %v", ErrInvalidConfig, cfg.Multiplier)
}
if math.IsNaN(cfg.RandomizationFactor) || cfg.RandomizationFactor < 0 || cfg.RandomizationFactor > 1 {
return fmt.Errorf("%w: RandomizationFactor must be finite and in [0,1], got %v", ErrInvalidConfig, cfg.RandomizationFactor)
}
return nil
}

// buildBackOff constructs a backoff policy from Config.
func buildBackOff(cfg Config) backoff.BackOff {
b := backoff.NewExponentialBackOff()
b.InitialInterval = cfg.InitialInterval
b.MaxInterval = cfg.MaxInterval
b.Multiplier = cfg.Multiplier
b.RandomizationFactor = cfg.RandomizationFactor

return b
}
Loading