From d266f65e6b6cd56aec05c2312787a4ac85c9f80a Mon Sep 17 00:00:00 2001 From: Hamza Khalid Date: Wed, 18 Feb 2026 17:02:25 +0500 Subject: [PATCH 1/4] fix: silent error path --- crates/multithread/src/multithread.rs | 29 ++++++++++++++++---- crates/zk-prover/src/actors/proof_request.rs | 8 ++++++ 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/crates/multithread/src/multithread.rs b/crates/multithread/src/multithread.rs index a62106fbc3..61d682673c 100644 --- a/crates/multithread/src/multithread.rs +++ b/crates/multithread/src/multithread.rs @@ -46,8 +46,7 @@ use e3_zk_prover::{Provable, ZkBackend, ZkProver}; use fhe::bfv::PublicKey; use fhe_traits::DeserializeParametrized; use rand::Rng; -use tracing::error; -use tracing::info; +use tracing::{error, info}; /// Multithread actor pub struct Multithread { @@ -187,13 +186,33 @@ async fn handle_compute_request_event( let msg_string = msg.to_string(); let job_name = msg_string.clone(); let (msg, ctx) = msg.into_components(); - // We spawn a thread on rayon moving to "sync"-land + let request_snapshot = msg.clone(); - let (result, duration) = pool + let pool_result = pool .spawn(job_name, TaskTimeouts::default(), move || { handle_compute_request(rng, cipher, zk_prover, msg) }) - .await?; + .await; + + let (result, duration) = match pool_result { + Ok(v) => v, + Err(pool_err) => { + error!( + "Task pool error for compute request '{}': {pool_err}", + msg_string + ); + let error_kind = match &request_snapshot.request { + ComputeRequestKind::Zk(_) => ComputeRequestErrorKind::Zk( + ZkEventError::ProofGenerationFailed(format!("Pool error: {pool_err}")), + ), + ComputeRequestKind::TrBFV(_) => ComputeRequestErrorKind::TrBFV( + e3_trbfv::TrBFVError::GenPkShareAndSkSss(format!("Pool error: {pool_err}")), + ), + }; + bus.publish(ComputeRequestError::new(error_kind, request_snapshot), ctx)?; + return Ok(()); + } + }; if let Some(report) = report { report.do_send(TrackDuration::new(msg_string, duration)) diff --git a/crates/zk-prover/src/actors/proof_request.rs b/crates/zk-prover/src/actors/proof_request.rs index 2cf1d8d35f..088478d1bd 100644 --- a/crates/zk-prover/src/actors/proof_request.rs +++ b/crates/zk-prover/src/actors/proof_request.rs @@ -134,6 +134,10 @@ impl ProofRequestActor { ec: &EventContext, ) { let Some(pending) = self.pending_threshold.remove(correlation_id) else { + error!( + "Received PkBfv ComputeResponse with correlation_id {:?} but no matching pending request found.", + correlation_id + ); return; }; @@ -214,6 +218,10 @@ impl ProofRequestActor { ec: &EventContext, ) { let Some(pending) = self.pending.remove(&correlation_id) else { + error!( + "Received PkBfv ComputeResponse with correlation_id {:?} but no matching pending request found.", + correlation_id + ); return; }; From 56a64ab1cf527eb63abb54df192d5722f3696c23 Mon Sep 17 00:00:00 2001 From: Hamza Khalid Date: Wed, 18 Feb 2026 17:37:36 +0500 Subject: [PATCH 2/4] chore: debug logs --- crates/net/src/net_interface.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/crates/net/src/net_interface.rs b/crates/net/src/net_interface.rs index 246a3c87eb..d5d32b2df0 100644 --- a/crates/net/src/net_interface.rs +++ b/crates/net/src/net_interface.rs @@ -377,7 +377,7 @@ async fn process_swarm_event( match record { Ok(record) => { let key = ContentHash(record.key.to_vec()); - info!("PUT RECORD SUCCESS: {:?}", key); + debug!("PUT RECORD SUCCESS: {:?}", key); event_tx.send(NetEvent::DhtPutRecordSucceeded { key, correlation_id, @@ -411,7 +411,7 @@ async fn process_swarm_event( peer_id, topic, })) => { - info!("Peer {} subscribed to {}", peer_id, topic); + debug!("Peer {} subscribed to {}", peer_id, topic); let count = swarm.behaviour().gossipsub.mesh_peers(&topic).count(); event_tx.send(NetEvent::GossipSubscribed { count, topic })?; } @@ -425,7 +425,7 @@ async fn process_swarm_event( }, .. })) => { - info!("Incoming sync request received (id={})", request_id); + debug!("Incoming sync request received (id={})", request_id); // received a request for events event_tx.send(NetEvent::SyncRequestReceived(SyncRequestReceived { @@ -635,7 +635,7 @@ fn handle_put_record( Ok(qid) => { // QueryId is returned synchronously and we immediately add it to the correlator so race conditions should not be an issue. correlator.track(qid, correlation_id); - info!("PUT RECORD OK qid={:?} cid={}", qid, correlation_id); + debug!("PUT RECORD OK qid={:?} cid={}", qid, correlation_id); } Err(error) => { event_tx.send(NetEvent::DhtPutRecordError { @@ -660,7 +660,7 @@ fn handle_get_record( // QueryId is returned synchronously and we immediately add it to the correlator so race conditions should not be an issue. correlator.track(query_id, correlation_id); - info!( + debug!( "GET RECORD CORRELATED! query_id={:?} correlation_id={}", query_id, correlation_id ); @@ -687,7 +687,7 @@ fn handle_outgoing_sync_request( correlation_id: CorrelationId, value: SyncRequestValue, ) -> Result<()> { - info!("Outgoing sync request (cid={})", correlation_id); + debug!("Outgoing sync request (cid={})", correlation_id); // TODO: // This is a first pass. // Lots of stuff to work through here: From 41fce35851551b79ef7c5f46228ec50b64abe479 Mon Sep 17 00:00:00 2001 From: Hamza Khalid Date: Wed, 18 Feb 2026 17:54:46 +0500 Subject: [PATCH 3/4] fix: update ci to run on self-hosted --- .github/workflows/ci.yml | 38 ++++++++++---------- crates/multithread/src/multithread.rs | 19 ++++++++-- crates/multithread/src/pool.rs | 37 ++++++++++++++----- crates/zk-prover/src/actors/proof_request.rs | 2 +- 4 files changed, 64 insertions(+), 32 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3dbe36e654..b16eeb45b0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,7 +33,7 @@ permissions: jobs: rust_unit: - runs-on: ubuntu-latest + runs-on: self-hosted steps: - uses: actions/checkout@v4 @@ -76,7 +76,7 @@ jobs: run: 'cargo test --lib && cargo test --doc' rust_integration: - runs-on: ubuntu-latest + runs-on: self-hosted steps: - uses: actions/checkout@v4 @@ -119,7 +119,7 @@ jobs: run: 'cargo test --test integration -- --nocapture' zk_prover_integration: - runs-on: ubuntu-latest + runs-on: self-hosted steps: - uses: actions/checkout@v4 @@ -148,7 +148,7 @@ jobs: run: 'cargo test -p e3-zk-prover --features integration-tests --test integration_tests -- --nocapture' build_e3_support_risc0: - runs-on: ubuntu-latest + runs-on: self-hosted steps: - uses: actions/checkout@v4 - name: Generate tags @@ -185,7 +185,7 @@ jobs: type=gha,mode=max,scope=e3-support build_ciphernode_image: - runs-on: ubuntu-latest + runs-on: self-hosted steps: - uses: actions/checkout@v4 @@ -225,7 +225,7 @@ jobs: type=gha,mode=max,scope=ciphernode test_contracts: - runs-on: 'ubuntu-latest' + runs-on: self-hosted steps: - name: 'Check out the repo' uses: 'actions/checkout@v4' @@ -260,7 +260,7 @@ jobs: echo "✅ Passed" >> $GITHUB_STEP_SUMMARY test_net: - runs-on: 'ubuntu-latest' + runs-on: self-hosted steps: - name: 'Check out the repo' uses: 'actions/checkout@v4' @@ -273,7 +273,7 @@ jobs: echo "✅ Passed" >> $GITHUB_STEP_SUMMARY integration_prebuild: - runs-on: 'ubuntu-latest' + runs-on: self-hosted steps: - name: 'Check out the repo' uses: 'actions/checkout@v4' @@ -338,7 +338,7 @@ jobs: ciphernode_integration_test: needs: [integration_prebuild, build_enclave_cli, build_sdk] - runs-on: 'ubuntu-latest' + runs-on: self-hosted strategy: matrix: test-suite: [base, persist] @@ -400,7 +400,7 @@ jobs: echo "✅ Passed" >> $GITHUB_STEP_SUMMARY build_enclave_cli: - runs-on: ubuntu-latest + runs-on: self-hosted steps: - uses: actions/checkout@v4 with: @@ -431,7 +431,7 @@ jobs: retention-days: 1 crisp_unit: - runs-on: ubuntu-latest + runs-on: self-hosted needs: [build_crisp_sdk] steps: - uses: actions/checkout@v4 @@ -504,7 +504,7 @@ jobs: run: 'pnpm test:contracts' crisp_e2e: - runs-on: ubuntu-latest + runs-on: self-hosted needs: [build_enclave_cli, build_crisp_sdk] steps: - uses: actions/checkout@v4 @@ -612,7 +612,7 @@ jobs: retention-days: 30 test_enclave_circuits: - runs-on: ubuntu-latest + runs-on: self-hosted steps: - uses: actions/checkout@v4 with: @@ -638,7 +638,7 @@ jobs: if-no-files-found: warn build_e3_support_dev: - runs-on: ubuntu-latest + runs-on: self-hosted steps: - uses: actions/checkout@v4 with: @@ -667,7 +667,7 @@ jobs: if-no-files-found: error build_sdk: - runs-on: ubuntu-latest + runs-on: self-hosted steps: - uses: actions/checkout@v4 with: @@ -716,7 +716,7 @@ jobs: if-no-files-found: warn build_crisp_sdk: - runs-on: ubuntu-latest + runs-on: self-hosted steps: - uses: actions/checkout@v4 @@ -760,7 +760,7 @@ jobs: if-no-files-found: warn template_integration: - runs-on: ubuntu-latest + runs-on: self-hosted needs: [build_enclave_cli, build_e3_support_dev, build_sdk] steps: - uses: actions/checkout@v4 @@ -820,7 +820,7 @@ jobs: pnpm test:integration test_enclave_init: - runs-on: ubuntu-latest + runs-on: self-hosted needs: [build_enclave_cli, build_e3_support_dev] steps: - name: Install pnpm @@ -857,7 +857,7 @@ jobs: enclave init mycitest --verbose --template=${{ github.server_url }}/${{ github.repository }}.git#${BRANCH}:templates/default contrib-readme-job: - runs-on: ubuntu-latest + runs-on: self-hosted name: Populate Contributors List # Only run on main branch to avoid branch conflicts if: github.ref == 'refs/heads/main' && !contains(github.event.head_commit.message, 'contrib-readme-action') diff --git a/crates/multithread/src/multithread.rs b/crates/multithread/src/multithread.rs index 61d682673c..096bd7db37 100644 --- a/crates/multithread/src/multithread.rs +++ b/crates/multithread/src/multithread.rs @@ -205,9 +205,22 @@ async fn handle_compute_request_event( ComputeRequestKind::Zk(_) => ComputeRequestErrorKind::Zk( ZkEventError::ProofGenerationFailed(format!("Pool error: {pool_err}")), ), - ComputeRequestKind::TrBFV(_) => ComputeRequestErrorKind::TrBFV( - e3_trbfv::TrBFVError::GenPkShareAndSkSss(format!("Pool error: {pool_err}")), - ), + ComputeRequestKind::TrBFV(ref trbfv_req) => { + let msg = format!("Pool error: {pool_err}"); + ComputeRequestErrorKind::TrBFV(match trbfv_req { + TrBFVRequest::GenPkShareAndSkSss(_) => TrBFVError::GenPkShareAndSkSss(msg), + TrBFVRequest::GenEsiSss(_) => TrBFVError::GenEsiSss(msg), + TrBFVRequest::CalculateDecryptionKey(_) => { + TrBFVError::CalculateDecryptionKey(msg) + } + TrBFVRequest::CalculateDecryptionShare(_) => { + TrBFVError::CalculateDecryptionShare(msg) + } + TrBFVRequest::CalculateThresholdDecryption(_) => { + TrBFVError::CalculateThresholdDecryption(msg) + } + }) + } }; bus.publish(ComputeRequestError::new(error_kind, request_snapshot), ctx)?; return Ok(()); diff --git a/crates/multithread/src/pool.rs b/crates/multithread/src/pool.rs index 152b3a25b5..d6fe9d7956 100644 --- a/crates/multithread/src/pool.rs +++ b/crates/multithread/src/pool.rs @@ -27,6 +27,9 @@ pub enum TaskPoolError { #[error("{0}")] RecvError(RecvError), + + #[error("Task panicked: {0}")] + Panic(String), } impl TaskPool { @@ -86,18 +89,34 @@ impl TaskPool { // This uses channels to track pending and complete tasks when // using the thread pool let (tx, rx) = tokio::sync::oneshot::channel(); - self.thread_pool.spawn(|| { - let t = op(); - // try to return the result and it's duration note this is sync as it is a oneshot sender. - if let Err(res) = tx.send(t) { - error!( - "There was an error sending the result from the multithread actor: result = {:?}", - res - ); + self.thread_pool.spawn(move || { + // Catch panics inside the Rayon thread so we can report them + // as errors instead of silently dropping the oneshot sender. + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(op)); + match result { + Ok(t) => { + if let Err(res) = tx.send(Ok(t)) { + error!( + "There was an error sending the result from the multithread actor: result = {:?}", + res + ); + } + } + Err(panic_info) => { + let panic_msg = if let Some(s) = panic_info.downcast_ref::<&str>() { + s.to_string() + } else if let Some(s) = panic_info.downcast_ref::() { + s.clone() + } else { + "unknown panic".to_string() + }; + error!("Rayon task panicked: {}", panic_msg); + let _ = tx.send(Err(TaskPoolError::Panic(panic_msg))); + } } }); - let output = rx.await.map_err(|r| TaskPoolError::RecvError(r))?; + let output = rx.await.map_err(|r| TaskPoolError::RecvError(r))??; warning_handle.abort(); diff --git a/crates/zk-prover/src/actors/proof_request.rs b/crates/zk-prover/src/actors/proof_request.rs index 088478d1bd..170942d18b 100644 --- a/crates/zk-prover/src/actors/proof_request.rs +++ b/crates/zk-prover/src/actors/proof_request.rs @@ -135,7 +135,7 @@ impl ProofRequestActor { ) { let Some(pending) = self.pending_threshold.remove(correlation_id) else { error!( - "Received PkBfv ComputeResponse with correlation_id {:?} but no matching pending request found.", + "Received PkGeneration ComputeResponse with correlation_id {:?} but no matching pending request found.", correlation_id ); return; From 8db49917e71b2ad147c027f3020552992b347233 Mon Sep 17 00:00:00 2001 From: Hamza Khalid Date: Wed, 18 Feb 2026 18:05:36 +0500 Subject: [PATCH 4/4] fix: revert to ubuntu --- .github/workflows/ci.yml | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b16eeb45b0..3dbe36e654 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,7 +33,7 @@ permissions: jobs: rust_unit: - runs-on: self-hosted + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -76,7 +76,7 @@ jobs: run: 'cargo test --lib && cargo test --doc' rust_integration: - runs-on: self-hosted + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -119,7 +119,7 @@ jobs: run: 'cargo test --test integration -- --nocapture' zk_prover_integration: - runs-on: self-hosted + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -148,7 +148,7 @@ jobs: run: 'cargo test -p e3-zk-prover --features integration-tests --test integration_tests -- --nocapture' build_e3_support_risc0: - runs-on: self-hosted + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Generate tags @@ -185,7 +185,7 @@ jobs: type=gha,mode=max,scope=e3-support build_ciphernode_image: - runs-on: self-hosted + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -225,7 +225,7 @@ jobs: type=gha,mode=max,scope=ciphernode test_contracts: - runs-on: self-hosted + runs-on: 'ubuntu-latest' steps: - name: 'Check out the repo' uses: 'actions/checkout@v4' @@ -260,7 +260,7 @@ jobs: echo "✅ Passed" >> $GITHUB_STEP_SUMMARY test_net: - runs-on: self-hosted + runs-on: 'ubuntu-latest' steps: - name: 'Check out the repo' uses: 'actions/checkout@v4' @@ -273,7 +273,7 @@ jobs: echo "✅ Passed" >> $GITHUB_STEP_SUMMARY integration_prebuild: - runs-on: self-hosted + runs-on: 'ubuntu-latest' steps: - name: 'Check out the repo' uses: 'actions/checkout@v4' @@ -338,7 +338,7 @@ jobs: ciphernode_integration_test: needs: [integration_prebuild, build_enclave_cli, build_sdk] - runs-on: self-hosted + runs-on: 'ubuntu-latest' strategy: matrix: test-suite: [base, persist] @@ -400,7 +400,7 @@ jobs: echo "✅ Passed" >> $GITHUB_STEP_SUMMARY build_enclave_cli: - runs-on: self-hosted + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: @@ -431,7 +431,7 @@ jobs: retention-days: 1 crisp_unit: - runs-on: self-hosted + runs-on: ubuntu-latest needs: [build_crisp_sdk] steps: - uses: actions/checkout@v4 @@ -504,7 +504,7 @@ jobs: run: 'pnpm test:contracts' crisp_e2e: - runs-on: self-hosted + runs-on: ubuntu-latest needs: [build_enclave_cli, build_crisp_sdk] steps: - uses: actions/checkout@v4 @@ -612,7 +612,7 @@ jobs: retention-days: 30 test_enclave_circuits: - runs-on: self-hosted + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: @@ -638,7 +638,7 @@ jobs: if-no-files-found: warn build_e3_support_dev: - runs-on: self-hosted + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: @@ -667,7 +667,7 @@ jobs: if-no-files-found: error build_sdk: - runs-on: self-hosted + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: @@ -716,7 +716,7 @@ jobs: if-no-files-found: warn build_crisp_sdk: - runs-on: self-hosted + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -760,7 +760,7 @@ jobs: if-no-files-found: warn template_integration: - runs-on: self-hosted + runs-on: ubuntu-latest needs: [build_enclave_cli, build_e3_support_dev, build_sdk] steps: - uses: actions/checkout@v4 @@ -820,7 +820,7 @@ jobs: pnpm test:integration test_enclave_init: - runs-on: self-hosted + runs-on: ubuntu-latest needs: [build_enclave_cli, build_e3_support_dev] steps: - name: Install pnpm @@ -857,7 +857,7 @@ jobs: enclave init mycitest --verbose --template=${{ github.server_url }}/${{ github.repository }}.git#${BRANCH}:templates/default contrib-readme-job: - runs-on: self-hosted + runs-on: ubuntu-latest name: Populate Contributors List # Only run on main branch to avoid branch conflicts if: github.ref == 'refs/heads/main' && !contains(github.event.head_commit.message, 'contrib-readme-action')