Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
- **`ww shell` "AI agents:" startup hint.** The line `AI agents: ipfs cat /ipns/releases.wetware.run/.agents/prompt.md` is gone from `src/cli/shell.rs`. The hint pointed at a host-shell command (`ipfs cat`) that's awkward to surface from inside a Glia REPL (the user can't paste it), and the obvious Glia-form rewrite — `(perform fs :read-str "/ipns/…")` — fails today because the WASI fs interceptor (`crates/cell/src/fs_intercept.rs:481-520`) only recognizes `ipfs/<CID>/…` paths (`parse_ipfs_path` at line 72 strips the `ipfs/` prefix; there's no `ipns/` sibling). `/ipfs/<CID>/…` reads through the cap *do* work — `open_ipfs` lazily materializes content from the pinset cache — so a hint pointing at a stable CID would work today; what's missing is IPNS resolution at the intercept layer (or a sibling cap method that calls Kubo `name/resolve` first, then routes through the existing pinset path). Restoring a pasteable Glia-form hint is the natural reward for that follow-up.

### Fixed
- **CI IPFS release publishing now tolerates slow pod staging.** The publish helper uses a unique pod staging path, retries `kubectl cp`, keeps repo stats and pod cleanup best-effort, and logs each production publish phase so slow k3s API behavior is diagnosable without changing release pin retention semantics.
- **WAGI HTTP requests now time out and kill hung cells (#535).** `HttpListener` bounds each spawned request's stdin/stdout/wait phase with a 30s wall-clock timeout, returns `504 Gateway Timeout` on expiry, and calls `Process.kill()` best-effort while preserving the existing oversized-response kill path.
- **Epoch advances now update the live CidTree root before broadcasting (#536).** Daemon startup wires the runtime `CidTree` into `EpochService`, so epoch commit handling swaps the virtual filesystem root to the committed event CID before the delayed epoch notification is released.
- **`/status` host introspection now degrades instead of hanging during cold start (#534).** Bounded best-effort timeouts around `host.id`, `host.addrs`, and `host.peers` let slow startup host RPCs render individual JSON fields as `null` instead of blocking the status response indefinitely.
Expand Down
52 changes: 46 additions & 6 deletions scripts/ipfs_publish_release.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ set -euo pipefail
: "${POD:?POD is required}"

REMOTE_RELEASE_TREE="${REMOTE_RELEASE_TREE:-/tmp/ww-release-tree}"
POD_RELEASE_TREE="${POD_RELEASE_TREE:-/tmp/release-tree}"
POD_RELEASE_TREE="${POD_RELEASE_TREE:-/tmp/ww-release-tree-publish-$(date +%s)-$$}"
STATE_FILE="${WW_RELEASE_PIN_STATE:-/data/ipfs/ww-release-pins.txt}"
RETAIN="${WW_RELEASE_PIN_RETAIN:-10}"
KUBECTL_TIMEOUT="${KUBECTL_TIMEOUT:-5m}"
KUBECTL_TIMEOUT="${KUBECTL_TIMEOUT:-10m}"
KUBECTL_BEST_EFFORT_TIMEOUT="${KUBECTL_BEST_EFFORT_TIMEOUT:-45s}"

case "$RETAIN" in
''|*[!0-9]*)
Expand All @@ -27,41 +28,78 @@ k() {
kubectl --request-timeout="$KUBECTL_TIMEOUT" "$@"
}

best_effort_k() {
kubectl --request-timeout="$KUBECTL_BEST_EFFORT_TIMEOUT" "$@"
}

pod() {
k exec "$POD" -- "$@"
}

log() {
printf 'ipfs-publish: %s\n' "$*" >&2
}

cleanup() {
pod rm -rf "$POD_RELEASE_TREE" >/dev/null 2>&1 || true
best_effort_k exec "$POD" -- rm -rf "$POD_RELEASE_TREE" >/dev/null 2>&1 || true
}
trap cleanup EXIT

repo_stat_size() {
pod sh -c 'if command -v timeout >/dev/null 2>&1; then timeout 30 ipfs repo stat --size-only; else ipfs repo stat --size-only; fi' 2>/dev/null \
best_effort_k exec "$POD" -- sh -c 'if command -v timeout >/dev/null 2>&1; then timeout 30 ipfs repo stat --size-only; else ipfs repo stat --size-only; fi' 2>/dev/null \
| tail -n 1 \
| tr -d '\r' \
|| true
}

copy_release_tree() {
local attempt backoff

for attempt in 1 2 3; do
log "copying release tree into pod staging path $POD_RELEASE_TREE (attempt $attempt)"
if k cp --retries=3 "$REMOTE_RELEASE_TREE" "$POD:$POD_RELEASE_TREE"; then
return 0
fi

if [ "$attempt" -lt 3 ]; then
backoff="$((attempt * 20))"
log "release tree copy failed; retrying in ${backoff}s"
sleep "$backoff"
fi
done

return 1
}

if [ ! -d "$REMOTE_RELEASE_TREE" ]; then
echo "ERROR: release tree is missing on VPS: $REMOTE_RELEASE_TREE" >&2
exit 1
fi

log "collecting repo stat before publish (best effort)"
repo_stat_before="$(repo_stat_size)"

pod rm -rf "$POD_RELEASE_TREE"
k cp "$REMOTE_RELEASE_TREE" "$POD:$POD_RELEASE_TREE"
copy_release_tree

log "adding release tree to IPFS with implicit pinning disabled"
CID="$(pod ipfs add --pin=false -rQ --cid-version=1 "$POD_RELEASE_TREE" | tail -n 1 | tr -d '\r')"
if [ -z "$CID" ]; then
echo "ERROR: ipfs add produced an empty CID" >&2
exit 1
fi

echo "CID=$CID"
log "pinning release CID $CID"
pod ipfs pin add "$CID"
log "publishing IPNS ww-release to $CID"
pod ipfs name publish --key=ww-release "/ipfs/$CID"

log "announcing release CID to the DHT (best effort)"
if ! pod sh -c "if command -v timeout >/dev/null 2>&1; then timeout 60 ipfs routing provide -r '$CID'; else ipfs routing provide -r '$CID'; fi"; then
echo "WARNING: provide announce timed out or failed; DHT propagation may lag" >&2
fi

log "updating managed release pin state"
state_output="$(
k exec "$POD" -- sh -s -- "$CID" "$RETAIN" "$STATE_FILE" <<'POD_STATE_SH'
set -eu
Expand Down Expand Up @@ -152,11 +190,13 @@ printf '%s\n' "$state_output"

unpinned_count="$(printf '%s\n' "$state_output" | awk -F= '$1 == "UNPINNED_COUNT" { value=$2 } END { print value + 0 }')"
if [ "$unpinned_count" -gt 0 ]; then
log "running IPFS repo GC after managed stale release unpins"
if ! pod sh -c 'if command -v timeout >/dev/null 2>&1; then timeout 120 ipfs repo gc; else ipfs repo gc; fi'; then
echo "WARNING: ipfs repo gc timed out or failed after stale release unpins" >&2
fi
fi

log "collecting repo stat after cleanup (best effort)"
repo_stat_after="$(repo_stat_size)"
rm -rf "$REMOTE_RELEASE_TREE"

Expand Down
5 changes: 5 additions & 0 deletions tests/test_ipfs_release_publish.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ grep -Fq "[ ! -f \"\$state_file\" ]" "$PUBLISH_SCRIPT" \
|| fail "release script must handle first run without bulk cleanup"
grep -Fq 'ipfs repo gc' "$PUBLISH_SCRIPT" \
|| fail "release script must run repo gc after stale unpins"
# shellcheck disable=SC2016
grep -Fq 'POD_RELEASE_TREE:-/tmp/ww-release-tree-publish-$(date +%s)-$$' "$PUBLISH_SCRIPT" \
|| fail "release script must use a unique pod staging path"
grep -Fq 'k cp --retries=3' "$PUBLISH_SCRIPT" \
|| fail "release script must retry kubectl cp under slow k3s API behavior"

pin_add_line="$(line_number "ipfs pin add \"\$CID\"" "$PUBLISH_SCRIPT")"
publish_line="$(line_number "ipfs name publish --key=ww-release \"/ipfs/\$CID\"" "$PUBLISH_SCRIPT")"
Expand Down