Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions cmd/daemon/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package main
import (
"context"
"fmt"
"log/slog"
"os"
"os/signal"
"path"
Expand Down Expand Up @@ -201,6 +202,14 @@ func initConfig(ctx context.Context) {
doctor.CheckErr(ctx, err)
}

// Install the slog→logx bridge so the daemon kernel (which logs via the
// stdlib log/slog seam, in preparation for the pkg/daemonkit extraction)
// emits to the same zerolog sinks logx just configured — console, the
// rotating file, and journald. Must run after logx.Initialize so the
// bridge resolves the fully configured logger. CLI/workflows keep using
// logx directly.
slog.SetDefault(slog.New(logx.NewSlogHandler()))

activateProxy(ctx)
}

Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ require (
github.com/BurntSushi/toml v1.6.0
github.com/Masterminds/semver/v3 v3.5.0
github.com/automa-saga/automa v0.10.0
github.com/automa-saga/logx v0.4.0
github.com/automa-saga/logx v0.5.0
github.com/bluet/syspkg v0.1.7
github.com/charmbracelet/bubbles v1.0.0
github.com/charmbracelet/bubbletea v1.3.10
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z
github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
github.com/automa-saga/automa v0.10.0 h1:K3cBRwVl7MgHFre4FYmnLZMYsphoUOnoGJ50hjDe4NY=
github.com/automa-saga/automa v0.10.0/go.mod h1:AVzNGE+ci5sAJ7ao3ZkmC+CMe4kpwJhE6AloBJv3Udc=
github.com/automa-saga/logx v0.4.0 h1:5KPFX1vfnJtFvm18a9pwwDlqwgALimKjVmR3G1zjytM=
github.com/automa-saga/logx v0.4.0/go.mod h1:inlus7bMGEUD5wIEgtoKcGO5Ulhl93CHkzUBlqJJK6I=
github.com/automa-saga/logx v0.5.0 h1:yuFC7JlEQRbEKr+6t9eg1kXAvRIIgzdu9Bz7a0gSJYo=
github.com/automa-saga/logx v0.5.0/go.mod h1:8S9d499t8uro8yO3blI1hruXLEYWzrBp9ndCI/Ik/88=
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
github.com/aymanbagabas/go-udiff v0.3.1 h1:LV+qyBQ2pqe0u42ZsUEtPiCaUoqgA9gYRDs3vj1nolY=
Expand Down
48 changes: 22 additions & 26 deletions internal/daemon/core/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@ package core
import (
"context"
"fmt"
"log/slog"
"sync"
"time"

"github.com/automa-saga/logx"
)

// Back-off and degradation parameters for SupervisedMonitor. Declared as
Expand Down Expand Up @@ -164,42 +163,40 @@ func SupervisedMonitor(ctx context.Context, m MonitorRunner, tracker *StatusTrac
// ctx cancelled → clean shutdown, do not restart.
if ctx.Err() != nil {
setState("stopped")
logx.As().Info().
Str("reason", "MonitorStopped").
Str("monitor", m.Name()).
Msg("Monitor stopped cleanly")
slog.Info("Monitor stopped cleanly",
"reason", "MonitorStopped",
"monitor", m.Name())
return
}

// nil return without ctx cancellation → also clean exit.
if err == nil {
setState("stopped")
logx.As().Info().
Str("reason", "MonitorExited").
Str("monitor", m.Name()).
Msg("Monitor exited without error and without context cancellation — not restarting")
slog.Info("Monitor exited without error and without context cancellation — not restarting",
"reason", "MonitorExited",
"monitor", m.Name())
return
}

// Crash path.
consecutiveCrashes++

logx.As().Error().Err(err).
Str("reason", "MonitorCrash").
Str("monitor", m.Name()).
Int("consecutive_crashes", consecutiveCrashes).
Dur("backoff", backoff).
Msg("Monitor crashed — restarting after back-off")
slog.Error("Monitor crashed — restarting after back-off",
"error", err,
"reason", "MonitorCrash",
"monitor", m.Name(),
"consecutive_crashes", consecutiveCrashes,
"backoff", backoff)

// Emit MonitorDegraded at every supervisedDegradedThreshold consecutive
// crashes so ops is alerted at crash #5, #10, #15, …
if consecutiveCrashes%supervisedDegradedThreshold == 0 {
logx.As().Error().Err(err).
Str("reason", "MonitorDegraded").
Str("monitor", m.Name()).
Int("consecutive_crashes", consecutiveCrashes).
Dur("current_backoff", backoff).
Msg("Monitor is crashing repeatedly — operator intervention may be required")
slog.Error("Monitor is crashing repeatedly — operator intervention may be required",
"error", err,
"reason", "MonitorDegraded",
"monitor", m.Name(),
"consecutive_crashes", consecutiveCrashes,
"current_backoff", backoff)
}

// A stable run resets both the back-off and the consecutive-crash counter.
Expand All @@ -213,10 +210,9 @@ func SupervisedMonitor(ctx context.Context, m MonitorRunner, tracker *StatusTrac
select {
case <-ctx.Done():
setState("stopped")
logx.As().Info().
Str("reason", "MonitorStopped").
Str("monitor", m.Name()).
Msg("Monitor restart cancelled — context done")
slog.Info("Monitor restart cancelled — context done",
"reason", "MonitorStopped",
"monitor", m.Name())
return
case <-time.After(backoff):
}
Expand Down
8 changes: 4 additions & 4 deletions internal/daemon/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ package daemon
import (
"context"
"errors"
"log/slog"
"net"
"net/http"
"os"
"path/filepath"
"time"

"github.com/automa-saga/logx"
"github.com/hashgraph/solo-weaver/internal/daemon/core"
)

Expand Down Expand Up @@ -102,20 +102,20 @@ func (s *Server) Start(ctx context.Context) error {
return err
}

logx.As().Info().Str("reason", "ServerStarted").Str("sock", sockPath).Msg("Daemon socket server listening")
slog.Info("Daemon socket server listening", "reason", "ServerStarted", "sock", sockPath)

serveErr := make(chan error, 1)
go func() {
if err := s.srv.Serve(ln); err != nil && !errors.Is(err, http.ErrServerClosed) {
logx.As().Error().Err(err).Str("reason", "ServerStopped").Msg("Daemon socket server exited with error")
slog.Error("Daemon socket server exited with error", "error", err, "reason", "ServerStopped")
serveErr <- err
}
close(serveErr)
}()

select {
case <-ctx.Done():
logx.As().Info().Str("reason", "ServerStopped").Msg("Daemon socket server shutting down")
slog.Info("Daemon socket server shutting down", "reason", "ServerStopped")
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
shutdownErr := s.srv.Shutdown(shutdownCtx)
Expand Down
16 changes: 8 additions & 8 deletions internal/workflows/steps/consensus_migration_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ package steps
import (
"bytes"
"encoding/json"
"fmt"
"net/http"
"time"

"github.com/hashgraph/solo-weaver/internal/daemon/consensus"
"github.com/joomcode/errorx"
)

// SoakStart sends POST /consensus_node/migration/soak/start to the daemon socket
Expand All @@ -18,7 +18,7 @@ import (
func SoakStart(sockPath string, req consensus.SoakStartRequest) (*consensus.SoakStartResponse, error) {
body, err := json.Marshal(req)
if err != nil {
return nil, fmt.Errorf("marshal soak start request: %w", err)
return nil, errorx.InternalError.Wrap(err, "marshal soak start request")
}

resp, err := socketClient(sockPath).Post(
Expand All @@ -27,7 +27,7 @@ func SoakStart(sockPath string, req consensus.SoakStartRequest) (*consensus.Soak
bytes.NewReader(body),
)
if err != nil {
return nil, fmt.Errorf("soak start: %w", err)
return nil, errorx.ExternalError.Wrap(err, "soak start")
}
defer resp.Body.Close()

Expand All @@ -37,7 +37,7 @@ func SoakStart(sockPath string, req consensus.SoakStartRequest) (*consensus.Soak

var out consensus.SoakStartResponse
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
return nil, fmt.Errorf("decode soak start response: %w", err)
return nil, errorx.IllegalFormat.Wrap(err, "decode soak start response")
}
return &out, nil
}
Expand All @@ -54,12 +54,12 @@ func SoakStop(sockPath string, keepState bool) error {

req, err := http.NewRequest(http.MethodDelete, url, nil)
if err != nil {
return fmt.Errorf("build soak stop request: %w", err)
return errorx.InternalError.Wrap(err, "build soak stop request")
}

resp, err := soakSocketClient(sockPath).Do(req)
if err != nil {
return fmt.Errorf("soak stop: %w", err)
return errorx.ExternalError.Wrap(err, "soak stop")
}
defer resp.Body.Close()

Expand Down Expand Up @@ -93,9 +93,9 @@ func decodeAPIError(resp *http.Response) error {
}
_ = json.NewDecoder(resp.Body).Decode(&body)
if body.Error != "" {
return fmt.Errorf("daemon returned %d: %s", resp.StatusCode, body.Error)
return errorx.ExternalError.New("daemon returned %d: %s", resp.StatusCode, body.Error)
}
return fmt.Errorf("daemon returned unexpected status %d", resp.StatusCode)
return errorx.ExternalError.New("daemon returned unexpected status %d", resp.StatusCode)
}

// soakClientTimeout is used by the soak client calls. Longer than the default
Expand Down
6 changes: 4 additions & 2 deletions internal/workflows/steps/step_daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,10 @@ func InstallDaemonBinaryStep(src DaemonBinarySource, paths models.WeaverPaths) *
return automa.StepFailureReport(stp.Id(),
automa.WithError(errorx.InternalError.Wrap(err, "failed to initialise daemon installer").
WithProperty(models.ErrPropertyResolution, []string{
"The provisioner binary may be built without a catalog entry for solo-provisioner-daemon",
"Re-install the provisioner: sudo solo-provisioner install",
"Auto-download is unavailable until an official solo-provisioner-daemon release is published",
"Build the daemon locally and install it with --daemon-bin:",
" task build:daemon GOOS=linux GOARCH=<arch>",
" sudo solo-provisioner daemon service install --daemon-bin=<path-to-binary>",
})))
}

Expand Down
23 changes: 23 additions & 0 deletions pkg/software/config_it_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,29 @@ func Test_Config_GetSoftwareByName_Integration(t *testing.T) {
require.Error(t, err, "Should return error for non-existent software")
}

// Test_Config_DaemonAutoDownloadUnavailable_Integration locks in the contract that
// the solo-provisioner-daemon binary has no embedded catalog entry. There is no
// published daemon release yet, so the auto-download path (used when no --daemon-bin
// is supplied) must fail rather than attempt a broken download. NewDaemonInstaller
// resolves the catalog entry up front, so a missing entry surfaces as an error here.
//
// If/when a real daemon release with checksums is published and re-added to the
// catalog, this test should be replaced with one asserting the entry resolves.
func Test_Config_DaemonAutoDownloadUnavailable_Integration(t *testing.T) {
config, err := LoadInfrastructureCatalog()
require.NoError(t, err)

_, err = config.GetHostArtifact(DaemonBinaryName)
require.Error(t, err,
"solo-provisioner-daemon must NOT have a catalog entry until a real release exists; "+
"its presence would re-enable a broken auto-download path")

_, err = NewDaemonInstaller()
require.Error(t, err,
"NewDaemonInstaller must fail without a catalog entry so the install step fails "+
"cleanly when no --daemon-bin is supplied")
}

// Test_Config_GetDefaultVersion_Integration tests getting the default version from actual artifacts
func Test_Config_GetDefaultVersion_Integration(t *testing.T) {
config, err := LoadInfrastructureCatalog()
Expand Down
15 changes: 0 additions & 15 deletions pkg/software/infrastructure-catalog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -244,21 +244,6 @@ host:
algorithm: 'sha256'
checksum: '9bb6c7e85c3166ad698ee11042706b2ffbce4b9d017ef96bb1ced3962d88256a'

- name: solo-provisioner-daemon
default: "0.0.0"
versions:
0.0.0:
binaries:
- name: 'solo-provisioner-daemon'
url: 'https://github.com/hashgraph/solo-weaver/releases/download/daemon-v{{.VERSION}}/solo-provisioner-daemon-{{.OS}}-{{.ARCH}}'
linux:
amd64:
algorithm: 'sha256'
checksum: ''
arm64:
algorithm: 'sha256'
checksum: ''

- name: teleport
default: "18.6.4"
versions:
Expand Down
Loading