diff --git a/Cargo.lock b/Cargo.lock index e04dfcb..49b0140 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -242,6 +242,12 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "assert_matches" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9" + [[package]] name = "async-channel" version = "2.5.0" @@ -662,6 +668,37 @@ dependencies = [ "tower-service", ] +[[package]] +name = "aya" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d18bc4e506fbb85ab7392ed993a7db4d1a452c71b75a246af4a80ab8c9d2dd50" +dependencies = [ + "assert_matches", + "aya-obj", + "bitflags 2.11.0", + "bytes", + "libc", + "log", + "object 0.36.7", + "once_cell", + "thiserror 1.0.69", +] + +[[package]] +name = "aya-obj" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c51b96c5a8ed8705b40d655273bc4212cbbf38d4e3be2788f36306f154523ec7" +dependencies = [ + "bytes", + "core-error", + "hashbrown 0.15.5", + "log", + "object 0.36.7", + "thiserror 1.0.69", +] + [[package]] name = "backtrace" version = "0.3.76" @@ -672,7 +709,7 @@ dependencies = [ "cfg-if", "libc", "miniz_oxide", - "object", + "object 0.37.3", "rustc-demangle", "windows-link 0.2.1", ] @@ -1015,6 +1052,15 @@ version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" +[[package]] +name = "core-error" +version = "0.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efcdb2972eb64230b4c50646d8498ff73f5128d196a90c7236eec4cbe8619b8f" +dependencies = [ + "version_check", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -1383,18 +1429,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "fallible-iterator" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" - -[[package]] -name = "fallible-streaming-iterator" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" - [[package]] name = "fastrand" version = "2.4.1" @@ -1759,21 +1793,14 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" -[[package]] -name = "hashbrown" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash", -] - [[package]] name = "hashbrown" version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ + "allocator-api2", + "equivalent", "foldhash 0.1.5", ] @@ -1788,15 +1815,6 @@ dependencies = [ "foldhash 0.2.0", ] -[[package]] -name = "hashlink" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" -dependencies = [ - "hashbrown 0.14.5", -] - [[package]] name = "heck" version = "0.5.0" @@ -2392,17 +2410,6 @@ dependencies = [ "windows-link 0.2.1", ] -[[package]] -name = "libsqlite3-sys" -version = "0.30.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" -dependencies = [ - "cc", - "pkg-config", - "vcpkg", -] - [[package]] name = "libz-ng-sys" version = "1.1.28" @@ -2769,17 +2776,17 @@ dependencies = [ name = "nullnet-client" version = "0.1.0" dependencies = [ + "aya", "chrono", "clap", "etherparse", "futures", "gag", "ipnetwork", + "libc", "listeners", "network-interface", "nfq", - "notify", - "nullnet-firewall", "nullnet-grpc-lib", "nullnet-liberror", "rtnetlink", @@ -2789,16 +2796,6 @@ dependencies = [ "tun-rs", ] -[[package]] -name = "nullnet-firewall" -version = "0.2.2" -source = "git+https://github.com/GyulyVGC/nullnet-firewall.git#e3289fc49ecf1a38f7b5b5ccd2370958823ba02d" -dependencies = [ - "chrono", - "etherparse", - "rusqlite", -] - [[package]] name = "nullnet-grpc-lib" version = "0.1.0" @@ -2923,6 +2920,18 @@ version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33" +[[package]] +name = "object" +version = "0.36.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +dependencies = [ + "crc32fast", + "hashbrown 0.15.5", + "indexmap 2.13.1", + "memchr", +] + [[package]] name = "object" version = "0.37.3" @@ -3862,20 +3871,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "rusqlite" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7753b721174eb8ff87a9a0e799e2d7bc3749323e773db92e0984debb00019d6e" -dependencies = [ - "bitflags 2.11.0", - "fallible-iterator", - "fallible-streaming-iterator", - "hashlink", - "libsqlite3-sys", - "smallvec", -] - [[package]] name = "rust-embed" version = "8.11.0" diff --git a/README.md b/README.md index ed93287..c8504f0 100644 --- a/README.md +++ b/README.md @@ -115,11 +115,10 @@ The repository should be cloned under `/root` so the provided `setup-*.sh` scrip ### nullnet-client - set environment variables (in `members/nullnet-client/.env`; set `CONTROL_SERVICE_ADDR` to the IP - of `nullnet-server`, `ETH_NAME` to the ethernet interface to monitor) + of `nullnet-server`). The uplink interface is auto-detected from the host's default route. ``` CONTROL_SERVICE_ADDR=192.168.1.100 CONTROL_SERVICE_PORT=50051 - ETH_NAME=ens18 ``` - service configuration must be stored at `members/nullnet-client/services.toml`. Each entry diff --git a/docs/ebpf-firewall-traffic.md b/docs/ebpf-firewall-traffic.md new file mode 100644 index 0000000..aac0d88 --- /dev/null +++ b/docs/ebpf-firewall-traffic.md @@ -0,0 +1,93 @@ +# eBPF host firewall — required traffic on a co-located host + +The `ebpf` branch attaches a **default-deny** TC classifier to the host's primary +ethernet interface (`ebpf/src/main.rs`, loaded by `members/nullnet-client/src/ebpf/mod.rs`). +The classifier is **strict nullnet-only** and **stateless** (no conntrack): every +"allow" must cover return traffic by port/endpoint, since there is no connection +tracking. + +The firewall is loaded by the **nullnet-client**, which in practice **always runs +alongside the nullnet-server on the same host**. That co-location is the problem +this doc addresses: the strict allowlist drops nearly all of the server's own +traffic, plus the host's management and operational traffic. + +## What the firewall allows today + +The kernel program (`ebpf/src/main.rs`) permits only: + +- **ARP** — next-hop resolution (`main.rs:72`) +- **TCP to/from `SERVER_IP:CONTROL_PORT`** — gRPC control plane, either direction + (`verdict_control_plane`, `main.rs:100-109`) +- **UDP 4789 (VXLAN) or 9999 (forward) to/from a known peer** — data plane + (`verdict_data_plane`, `main.rs:114-125`) + +Everything else is `TC_ACT_SHOT`, including **all ICMP and all IPv6** +(`main.rs:91,94`). + +## Complete traffic table + +App traffic is from a code trace; operational/host rows are standard requirements +for a real Linux box that the stateless firewall would also drop. + +| # | Traffic | Proto / Port | Direction | Currently | Severity | Notes | +|---|---|---|---|---|---|---| +| 1 | ARP | L2 | both | ✅ allowed | — | next-hop resolution (`main.rs:72`) | +| 2 | gRPC control plane | TCP 50051 | both | ✅ allowed | — | the one app flow that works (`verdict_control_plane`) | +| 3 | VXLAN data plane | UDP 4789 | both (peers) | ✅ allowed | — | known peers only | +| 4 | Forward data plane | UDP 9999 | both (peers) | ✅ allowed | — | known peers only | +| 5 | SSH admin access | TCP 22 | inbound | ❌ dropped | 🔴 critical | enabling over SSH kills the session — console-only today | +| 6 | Dashboard + SSE | TCP 8080 | inbound | ❌ dropped | 🔴 critical | single HTTPS listener (`http_server/mod.rs:24`) | +| 7 | ACME / Let's Encrypt | TCP 443 | outbound (+return) | ❌ dropped | 🔴 critical | issuance + 12h renewal (`cert/authority.rs`, `cert_renewal.rs:25`) | +| 8 | DNS-01 provider APIs | TCP 443 | outbound (+return) | ❌ dropped | 🔴 critical | Cloudflare/Route53/Google/OVH/etc (`cert/dns_providers/*`) | +| 9 | DNS resolution | UDP/TCP 53 | outbound (+return) | ❌ dropped | 🔴 critical | needed by 7, 8, and hostname `CONTROL_SERVICE_ADDR` on restart | +| 10 | DHCP lease renewal | UDP 67/68 | both | ❌ dropped | 🔴 critical | cloud Ubuntu default; lease expiry = **host loses its IP**. Moot only if static IP pinned | +| 11 | NTP time sync | UDP 123 | outbound (+return) | ❌ dropped | 🟠 high | clock drift breaks TLS/ACME validation | +| 12 | ICMP (echo, frag-needed/PMTUD, unreachable, time-exceeded) | ICMP | both | ❌ dropped | 🟠 high | breaks ping health checks + Path-MTU discovery; worsens the VXLAN MTU black hole | +| 13 | IPv6 + ICMPv6 (ND/RA) | IPv6 | both | ❌ dropped | 🟠 high | only ARP+IPv4 matched; on a v6 network, ND is mandatory (the v6 "ARP") → total v6 blackout | +| 14 | OS package updates | TCP 80/443 | outbound (+return) | ❌ dropped | 🟡 medium | apt / unattended-upgrades; security patches | +| 15 | Cloud metadata (IMDS) | TCP 80 → 169.254.169.254 | outbound | ❌ dropped | 🟡 medium | cloud-init / SSM / waagent / guest agents need it on AWS/Azure/GCP | +| 16 | Cloud provider agents | TCP 443 | outbound | ❌ dropped | 🟡 medium | SSM agent, waagent, ops agent — if present on the image | +| 17 | Mosh (if used instead of SSH) | UDP 60000-61000 | both | ❌ dropped | ⚪ optional | only if you use mosh | + +## Confirmed NOT needed + +Verified absent in both binaries by code trace — these will never appear in an +allowlist: + +- No remote database (no sqlx/postgres/mysql/redis/mongo) +- No telemetry / metrics export (no opentelemetry/otlp/prometheus/sentry/statsd) +- No webhooks, SMTP, or external notifications +- No object storage / S3 +- No app self-update or package fetching at runtime + +Events are in-memory only (`members/nullnet-server/src/events.rs`) and exposed +solely via the 8080 SSE stream (row 6). + +## Notable nuances + +- **Statelessness.** No conntrack means return traffic for any new allow (e.g. + outbound 443) must be matched by an explicit rule. The existing control-plane + rule already does this by matching `src/dst == SERVER_IP:CONTROL_PORT` in either + direction. +- **`CONTROL_SERVICE_ADDR` resolution.** The client's *initial* resolution runs + before the firewall is attached, but a process restart (the supervisor restarts + on control-channel drop) re-resolves. If the address is a **hostname**, that + restart needs DNS (row 9); a **literal IP** avoids it. +- **Loopback is not filtered.** Only the primary ethernet interface is hooked, so + `localhost:8080` on the box still works — but remote browser access to the + dashboard does not. + +## Implication + +Rows 5–13 are effectively mandatory for a co-located server host to function and +stay manageable; 14–16 for it to stay patched and cloud-integrated. Once SSH, +8080, 443, 53, 123, DHCP, ICMP, and IPv6/ICMPv6 are allowed, the "strict +nullnet-only" property no longer holds on the server box — it becomes a +conventional allowlist. + +The core decision: + +- **(a) Server-mode flag** — a gated mode that opens rows 5–16, keeping pure + client-only hosts strict. +- **(b) Don't attach the firewall on the co-located host** — only lock down + true client-only nodes. diff --git a/ebpf/src/main.rs b/ebpf/src/main.rs index e78e766..7175db7 100644 --- a/ebpf/src/main.rs +++ b/ebpf/src/main.rs @@ -1,16 +1,132 @@ #![no_std] #![no_main] -use aya_ebpf::{bindings::TC_ACT_SHOT, macros::classifier, programs::TcContext}; +use aya_ebpf::{ + bindings::{TC_ACT_OK, TC_ACT_SHOT}, + macros::{classifier, map}, + maps::HashMap, + programs::TcContext, +}; +use core::mem; +use network_types::{ + eth::{EthHdr, EtherType}, + ip::{IpProto, Ipv4Hdr}, + tcp::TcpHdr, + udp::UdpHdr, +}; + +// Host-NIC default-deny firewall (strict nullnet-only mode). Attached to TC +// ingress + egress on the host's primary interface. Only nullnet traffic is +// allowed; everything else on that NIC is dropped: +// - ARP (required for next-hop resolution) +// - TCP to/from SERVER_IP:PORT (nullnet control plane / gRPC) +// - UDP 4789/9999 to/from a peer (nullnet data plane: VXLAN / forward) +// Peers are added/removed from PEERS by userspace as the control channel +// installs/tears down VXLAN/VLAN edges. + +const VXLAN_PORT: u16 = 4789; +const FORWARD_PORT: u16 = 9999; + +// Allowlist of peer underlay IPs (host-order `u32::from(Ipv4Addr)` keys, which +// is exactly what `u32::from_be_bytes(ipv4_header.src_addr)` yields here). +#[map] +static PEERS: HashMap = HashMap::with_max_entries(4096, 0); + +// Set from userspace at load time (see members/nullnet-client/src/ebpf). +// SERVER_IP is host-order (`u32::from(Ipv4Addr)`); CONTROL_PORT is host-order. +#[unsafe(no_mangle)] +static SERVER_IP: u32 = 0; +#[unsafe(no_mangle)] +static CONTROL_PORT: u16 = 0; -/// Unconditional drop classifier. Currently defined but not attached anywhere; -/// kept as the seed for a future "block traffic not associated with a nullnet -/// flow" feature that will hook this (gated by a policy) somewhere on the -/// host's network path. Trigger detection has moved to the userspace NFQUEUE -/// listener — see members/nullnet-client/src/nfqueue/. #[classifier] -pub fn nullnet_drop(_ctx: TcContext) -> i32 { - TC_ACT_SHOT +pub fn nullnet_firewall(ctx: TcContext) -> i32 { + // A malformed / too-short frame can't be nullnet traffic → drop (strict). + match try_firewall(&ctx) { + Ok(ret) => ret, + Err(()) => TC_ACT_SHOT, + } +} + +#[inline] +fn ptr_at(ctx: &TcContext, offset: usize) -> Result<*const T, ()> { + let start = ctx.data(); + let end = ctx.data_end(); + let len = mem::size_of::(); + + if start + offset + len > end { + return Err(()); + } + + Ok((start + offset) as *const T) +} + +#[inline] +fn try_firewall(ctx: &TcContext) -> Result { + let eth_header: *const EthHdr = ptr_at(ctx, 0)?; + let ether_type = EtherType::try_from(unsafe { (*eth_header).ether_type }).map_err(|_| ())?; + + match ether_type { + // ARP must pass: without next-hop resolution nothing flows, nullnet + // control/data plane included. + EtherType::Arp => Ok(TC_ACT_OK), + EtherType::Ipv4 => { + let ipv4_header: *const Ipv4Hdr = ptr_at(ctx, EthHdr::LEN)?; + let src = u32::from_be_bytes(unsafe { (*ipv4_header).src_addr }); + let dst = u32::from_be_bytes(unsafe { (*ipv4_header).dst_addr }); + + match unsafe { (*ipv4_header).proto } { + IpProto::Tcp => { + let tcp_header: *const TcpHdr = ptr_at(ctx, EthHdr::LEN + Ipv4Hdr::LEN)?; + let src_port = u16::from_be_bytes(unsafe { (*tcp_header).source }); + let dst_port = u16::from_be_bytes(unsafe { (*tcp_header).dest }); + Ok(verdict_control_plane(src, dst, src_port, dst_port)) + } + IpProto::Udp => { + let udp_header: *const UdpHdr = ptr_at(ctx, EthHdr::LEN + Ipv4Hdr::LEN)?; + let src_port = u16::from_be_bytes(unsafe { (*udp_header).src }); + let dst_port = u16::from_be_bytes(unsafe { (*udp_header).dst }); + Ok(verdict_data_plane(src, dst, src_port, dst_port)) + } + _ => Ok(TC_ACT_SHOT), + } + } + _ => Ok(TC_ACT_SHOT), + } +} + +// Control plane: TCP where the server endpoint is on the control port, in +// either direction (egress to it, or its return traffic on ingress). +#[inline] +fn verdict_control_plane(src: u32, dst: u32, src_port: u16, dst_port: u16) -> i32 { + let server = unsafe { core::ptr::read_volatile(&SERVER_IP) }; + let ctrl_port = unsafe { core::ptr::read_volatile(&CONTROL_PORT) }; + if (dst == server && dst_port == ctrl_port) || (src == server && src_port == ctrl_port) { + TC_ACT_OK + } else { + TC_ACT_SHOT + } +} + +// Data plane: UDP on the VXLAN (4789) or forward (9999) port with a known peer +// as either endpoint. VXLAN's destination port is 4789 in both directions; the +// forward socket uses 9999 on both ends — checking src or dst covers both. +#[inline] +fn verdict_data_plane(src: u32, dst: u32, src_port: u16, dst_port: u16) -> i32 { + let on_data_port = dst_port == VXLAN_PORT + || src_port == VXLAN_PORT + || dst_port == FORWARD_PORT + || src_port == FORWARD_PORT; + if on_data_port && (is_peer(src) || is_peer(dst)) { + TC_ACT_OK + } else { + TC_ACT_SHOT + } +} + +#[inline] +fn is_peer(ip: u32) -> bool { + unsafe { PEERS.get(&ip) }.is_some() } #[panic_handler] diff --git a/members/nullnet-client/Cargo.toml b/members/nullnet-client/Cargo.toml index 79affbb..825b496 100644 --- a/members/nullnet-client/Cargo.toml +++ b/members/nullnet-client/Cargo.toml @@ -6,12 +6,10 @@ license.workspace = true authors.workspace = true [dependencies] -nullnet-firewall = { git = "https://github.com/GyulyVGC/nullnet-firewall.git" } tun-rs = { version = "2.8.1", features = ["async_tokio"] } etherparse = "0.19.0" clap = { version = "4.6.0", features = ["derive"] } tokio = { workspace = true, features = ["net", "sync", "rt-multi-thread", "macros", "io-util", "time", "fs", "process"] } -notify.workspace = true serde = { version = "1.0.228", default-features = false, features = ["derive", "alloc"] } nullnet-liberror.workspace = true nullnet-grpc-lib.workspace = true @@ -23,4 +21,6 @@ futures = "0.3.32" network-interface = "2.0.5" gag.workspace = true chrono.workspace = true -nfq = "0.2" \ No newline at end of file +nfq = "0.2" +aya = "0.13" +libc = "0.2" \ No newline at end of file diff --git a/members/nullnet-client/firewall/firewall.txt b/members/nullnet-client/firewall/firewall.txt deleted file mode 100644 index 5670de0..0000000 --- a/members/nullnet-client/firewall/firewall.txt +++ /dev/null @@ -1,14 +0,0 @@ -# Firewall rules -# Rules preceded by '+' have higher priority (quick rules) - -IN ACCEPT --dport 22 -IN ACCEPT --proto 1 --icmp-type 8 -IN ACCEPT --dport 3000,3001 - - -# Accept iperf traffic but don't log it!!! - -IN ACCEPT --dport 5001,5201 --log-level off -OUT ACCEPT --dport 5001,5201 --log-level off -IN ACCEPT --sport 5001,5201 --log-level off -OUT ACCEPT --sport 5001,5201 --log-level off diff --git a/members/nullnet-client/src/cli.rs b/members/nullnet-client/src/cli.rs index 0608efb..2e9edbb 100644 --- a/members/nullnet-client/src/cli.rs +++ b/members/nullnet-client/src/cli.rs @@ -7,9 +7,6 @@ pub struct Args { /// Maximum Transmission Unit (bytes) #[arg(long, default_value_t = 42500)] pub mtu: u16, - /// Path of the file defining firewall rules (it should be inside a dedicated folder) - #[arg(long, default_value_t = String::from("firewall/firewall.txt"))] - pub firewall_path: String, /// Number of asynchronous tasks to use (AKA coroutines) #[arg(long, default_value_t = 2, value_parser=clap::value_parser!(u8).range(2..))] pub num_tasks: u8, diff --git a/members/nullnet-client/src/commands/mod.rs b/members/nullnet-client/src/commands/mod.rs index 9e32b8f..86198e8 100644 --- a/members/nullnet-client/src/commands/mod.rs +++ b/members/nullnet-client/src/commands/mod.rs @@ -64,6 +64,24 @@ pub(crate) async fn find_ethernet_ip(rtnetlink_handle: &RtNetLinkHandle) -> Opti netlink::find_ethernet_ip(&rtnetlink_handle.handle).await } +/// Returns the name of the interface carrying `ip`, so the eBPF firewall can +/// attach to the same NIC the forward socket binds to. +pub(crate) fn find_ethernet_interface(ip: Ipv4Addr) -> Option { + use network_interface::{NetworkInterface, NetworkInterfaceConfig}; + use std::net::IpAddr; + + NetworkInterface::show() + .ok()? + .into_iter() + .find_map(|iface| { + iface + .addr + .iter() + .any(|addr| matches!(addr.ip(), IpAddr::V4(v4) if v4 == ip)) + .then_some(iface.name) + }) +} + #[derive(Clone)] pub(crate) struct RtNetLinkHandle { handle: Handle, diff --git a/members/nullnet-client/src/commands/netlink.rs b/members/nullnet-client/src/commands/netlink.rs index 71bb44e..3bd7d3f 100644 --- a/members/nullnet-client/src/commands/netlink.rs +++ b/members/nullnet-client/src/commands/netlink.rs @@ -4,7 +4,8 @@ use ipnetwork::Ipv4Network; use nullnet_liberror::{Error, ErrorHandler, Location, location}; use rtnetlink::packet_route::address::AddressAttribute; use rtnetlink::packet_route::link::{LinkAttribute, LinkMessage}; -use rtnetlink::{Handle, LinkUnspec, LinkVeth}; +use rtnetlink::packet_route::route::{RouteAttribute, RouteHeader}; +use rtnetlink::{Handle, LinkUnspec, LinkVeth, RouteMessageBuilder}; use std::net::{IpAddr, Ipv4Addr}; #[derive(Debug)] @@ -127,22 +128,58 @@ async fn set_interface_up(handle: &Handle, interface: &str) { } } +/// IPv4 of the host's uplink: the interface carrying the default route. More +/// robust than scanning for the first private IP, since nullnet's own +/// br0/tap/veth/vxlan interfaces never own the default route. pub(super) async fn find_ethernet_ip(handle: &Handle) -> Option { - let mut links = handle.address().get().execute(); - while let Some(msg_res) = links.next().await { - if let Ok(msg) = msg_res - && let Some(addr) = msg.attributes.iter().find_map(|attr| { - if let AddressAttribute::Local(ip) = attr - && let IpAddr::V4(ipv4) = ip - && ipv4.is_private() - { - Some(*ipv4) - } else { - None - } - }) + let oif = default_route_oif(handle).await?; + ipv4_on_link(handle, oif).await +} + +/// Output-interface index of the lowest-metric IPv4 default route (main table). +async fn default_route_oif(handle: &Handle) -> Option { + let route_msg = RouteMessageBuilder::::new().build(); + let mut routes = handle.route().get(route_msg).execute(); + let mut best: Option<(u32, u32)> = None; // (oif, metric) + while let Some(route) = routes.next().await { + let Ok(route) = route else { continue }; + // default route: zero-length destination prefix in the main table + if route.header.destination_prefix_length != 0 + || route.header.table != RouteHeader::RT_TABLE_MAIN { - return Some(addr); + continue; + } + let mut oif = None; + let mut metric = 0; + for attr in &route.attributes { + match attr { + RouteAttribute::Oif(index) => oif = Some(*index), + RouteAttribute::Priority(p) => metric = *p, + _ => {} + } + } + if let Some(oif) = oif + && best.is_none_or(|(_, best_metric)| metric < best_metric) + { + best = Some((oif, metric)); + } + } + best.map(|(oif, _)| oif) +} + +/// First IPv4 address assigned to the interface with index `oif`. +async fn ipv4_on_link(handle: &Handle, oif: u32) -> Option { + let mut addrs = handle.address().get().execute(); + while let Some(msg) = addrs.next().await { + let Ok(msg) = msg else { continue }; + if msg.header.index != oif { + continue; + } + if let Some(ipv4) = msg.attributes.iter().find_map(|attr| match attr { + AddressAttribute::Local(IpAddr::V4(ipv4)) => Some(*ipv4), + _ => None, + }) { + return Some(ipv4); } } None diff --git a/members/nullnet-client/src/control_channel.rs b/members/nullnet-client/src/control_channel.rs index ccfa058..35005fe 100644 --- a/members/nullnet-client/src/control_channel.rs +++ b/members/nullnet-client/src/control_channel.rs @@ -1,4 +1,5 @@ use crate::commands::{RtNetLinkHandle, configure_access_port, dnat, remove_vlan}; +use crate::ebpf::{FirewallPeers, NetId}; use crate::host_mappings::HostMappingsState; use crate::peers::peer::{Peers, VethKey}; use crate::triggers::TriggersState; @@ -36,6 +37,7 @@ pub(crate) async fn control_channel( rtnetlink_handle: RtNetLinkHandle, triggers_state: Arc, host_mappings_state: Arc, + firewall_peers: Arc, ) -> Result<(), Error> { let (outbound, grpc_rx) = mpsc::channel(64); let mut inbound = server @@ -54,6 +56,7 @@ pub(crate) async fn control_channel( let outbound = outbound.clone(); let host_mappings_state = host_mappings_state.clone(); let server = server.clone(); + let firewall_peers = firewall_peers.clone(); match message.message { Some(net_message::Message::VlanSetup(vlan_setup)) => { tokio::spawn(async move { @@ -64,6 +67,7 @@ pub(crate) async fn control_channel( outbound, host_mappings_state, server, + firewall_peers, ) .await; }); @@ -76,6 +80,7 @@ pub(crate) async fn control_channel( peers, host_mappings_state, server, + firewall_peers, ) .await; }); @@ -89,6 +94,7 @@ pub(crate) async fn control_channel( triggers_state, host_mappings_state, server, + firewall_peers, ) .await; }); @@ -101,6 +107,7 @@ pub(crate) async fn control_channel( triggers_state, host_mappings_state, server, + firewall_peers, ); }); } @@ -133,6 +140,7 @@ async fn handle_vlan_setup( outbound: Sender, host_mappings_state: Arc, grpc: NullnetGrpcInterface, + firewall_peers: Arc, ) -> Result<(), Error> { let msg_id = &message .msg_id @@ -182,6 +190,9 @@ async fn handle_vlan_setup( .await .insert(VethKey::new(remote_veth, vlan_id), remote_ip); + // allow this peer's data-plane traffic through the host firewall + firewall_peers.add(NetId::Vlan(vlan_id), remote_ip); + // add host mapping if needed if let Some(host_mapping) = &message.host_mapping { if add_host_mapping(host_mapping, None).is_err() { @@ -224,6 +235,7 @@ async fn handle_vlan_teardown( peers: Arc>, host_mappings_state: Arc, grpc: NullnetGrpcInterface, + firewall_peers: Arc, ) -> Result<(), Error> { let vlan_id = u16::try_from(message.vlan_id) .handle_err(location!()) @@ -250,6 +262,9 @@ async fn handle_vlan_teardown( // remove peer peers.write().await.remove(vlan_id); + // drop this peer's firewall allowance (refcounted; only removed if unused) + firewall_peers.remove(NetId::Vlan(vlan_id)); + // remove host mapping if one was installed at setup if let Some(host_mapping) = host_mappings_state.take_vlan(vlan_id) { let _ = remove_host_mapping(&host_mapping, None); @@ -264,6 +279,7 @@ async fn handle_vxlan_setup( triggers_state: Arc, host_mappings_state: Arc, grpc: NullnetGrpcInterface, + firewall_peers: Arc, ) -> Result<(), Error> { let msg_id = &message .msg_id @@ -289,6 +305,10 @@ async fn handle_vxlan_setup( .parse::() .handle_err(location!())?; + // allow this peer's data-plane (VXLAN underlay) traffic through the host + // firewall before the tunnel comes up, so the first packets aren't dropped. + firewall_peers.add(NetId::Vxlan(vxlan_id), remote_ip); + // setup VXLAN on this machine (optionally attaching a Docker container) let init_t = std::time::Instant::now(); let mut cmd = std::process::Command::new("./vxlan_scripts/vxlan-setup.sh"); @@ -429,7 +449,11 @@ fn handle_vxlan_teardown( triggers_state: Arc, host_mappings_state: Arc, grpc: NullnetGrpcInterface, + firewall_peers: Arc, ) { + // drop this peer's firewall allowance (refcounted; only removed if unused) + firewall_peers.remove(NetId::Vxlan(message.vxlan_id)); + // remove DNAT before tearing the tunnel down so existing flows reset // cleanly. The `container_ip` matches the `-s` we used at install time. if let Some((_container, port, overlay_ip, container_ip)) = diff --git a/members/nullnet-client/src/craft/mod.rs b/members/nullnet-client/src/craft/mod.rs deleted file mode 100644 index 79f42b6..0000000 --- a/members/nullnet-client/src/craft/mod.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod reject_payloads; diff --git a/members/nullnet-client/src/craft/reject_payloads.rs b/members/nullnet-client/src/craft/reject_payloads.rs deleted file mode 100644 index 52ddaf0..0000000 --- a/members/nullnet-client/src/craft/reject_payloads.rs +++ /dev/null @@ -1,194 +0,0 @@ -use etherparse::icmpv4::DestUnreachableHeader; -use etherparse::{ - Icmpv4Header, Icmpv4Type, IpFragOffset, IpNumber, LaxPacketHeaders, LinkExtHeader, LinkHeader, - NetHeaders, TcpOptions, TransportHeader, -}; -use std::net::SocketAddr; -use std::sync::Arc; -use tokio::net::UdpSocket; - -/// Sends a proper message to gracefully acknowledge a peer that a packet was rejected, -/// based on the observed protocol: -/// - in case of TCP, a packet with RST and ACK flag is sent -/// - in case of UDP, an ICMP port unreachable message is sent -/// - in case of other protocols, an ICMP host unreachable message is sent -pub async fn send_termination_message( - packet: &[u8], - socket: &Arc, - remote_socket: SocketAddr, -) { - let Ok(headers) = LaxPacketHeaders::from_ethernet(packet) else { - return; - }; - let Some(NetHeaders::Ipv4(ip_header, _)) = &headers.net else { - return; - }; - let IpNumber(proto) = ip_header.protocol; - - match proto { - 6 => Box::pin(send_tcp_rst(headers, socket, remote_socket)).await, - 17 => { - // port unreachable - let icmp_type = Icmpv4Type::DestinationUnreachable(DestUnreachableHeader::Port); - Box::pin(send_destination_unreachable( - packet, - headers, - socket, - icmp_type, - remote_socket, - )) - .await; - } - _ => { - // host unreachable - let icmp_type = Icmpv4Type::DestinationUnreachable(DestUnreachableHeader::Host); - Box::pin(send_destination_unreachable( - packet, - headers, - socket, - icmp_type, - remote_socket, - )) - .await; - } - } -} - -async fn send_destination_unreachable( - packet: &[u8], - headers: LaxPacketHeaders<'_>, - socket: &Arc, - icmp_type: Icmpv4Type, - remote_socket: SocketAddr, -) { - let Some(LinkHeader::Ethernet2(mut ethernet_header)) = headers.link else { - return; - }; - std::mem::swap( - &mut ethernet_header.source, - &mut ethernet_header.destination, - ); - let ethernet_header_bytes = ethernet_header.to_bytes(); - - let link_exts = &headers.link_exts; - let link_exts_bytes: Vec = link_exts - .iter() - .flat_map(|ext| match ext { - LinkExtHeader::Vlan(e) => e.to_bytes().to_vec(), - LinkExtHeader::Macsec(e) => e.to_bytes().to_vec(), - }) - .collect(); - - let Some(NetHeaders::Ipv4(mut ip_header, _)) = headers.net else { - return; - }; - let original_ip_header_bytes = ip_header.to_bytes(); - let size_up_to_ip_header = - ethernet_header_bytes.len() + link_exts_bytes.len() + original_ip_header_bytes.len(); - let icmp_payload = [ - original_ip_header_bytes.as_slice(), - packet - .get(size_up_to_ip_header..size_up_to_ip_header + 8) - .unwrap_or(&[]), - ] - .concat(); - ip_header.identification = 0; - ip_header.fragment_offset = IpFragOffset::ZERO; - std::mem::swap(&mut ip_header.source, &mut ip_header.destination); - ip_header.total_len = 56; // 20 (ip header) + 8 (icmp header) + 28 (original ip header + first 8 bytes of data) - ip_header.header_checksum = ip_header.calc_header_checksum(); - let ip_header_bytes = ip_header.to_bytes(); - - let mut icmp_header = Icmpv4Header::new(icmp_type); - icmp_header.update_checksum(&icmp_payload); - let icmp_header_bytes = icmp_header.to_bytes(); - - #[rustfmt::skip] - let pkt_response = [ - ðernet_header_bytes[..], - &link_exts_bytes[..], - &ip_header_bytes[..], - &icmp_header_bytes[..], - &icmp_payload[..], - ].concat(); - - socket - .send_to(&pkt_response, remote_socket) - .await - .unwrap_or(0); -} - -async fn send_tcp_rst( - headers: LaxPacketHeaders<'_>, - socket: &Arc, - remote_socket: SocketAddr, -) { - let Some(LinkHeader::Ethernet2(mut ethernet_header)) = headers.link else { - return; - }; - std::mem::swap( - &mut ethernet_header.source, - &mut ethernet_header.destination, - ); - let ethernet_header_bytes = ethernet_header.to_bytes(); - - let link_exts = &headers.link_exts; - let link_exts_bytes: Vec = link_exts - .iter() - .flat_map(|ext| match ext { - LinkExtHeader::Vlan(e) => e.to_bytes().to_vec(), - LinkExtHeader::Macsec(e) => e.to_bytes().to_vec(), - }) - .collect(); - - let Some(NetHeaders::Ipv4(mut ip_header, _)) = headers.net else { - return; - }; - ip_header.identification = 0; - ip_header.fragment_offset = IpFragOffset::ZERO; - std::mem::swap(&mut ip_header.source, &mut ip_header.destination); - ip_header.total_len = 40; - ip_header.header_checksum = ip_header.calc_header_checksum(); - let ip_header_bytes = ip_header.to_bytes(); - - let Some(TransportHeader::Tcp(mut tcp_header)) = headers.transport else { - return; - }; - let src_port_orig = tcp_header.source_port; - let seq_num_orig = tcp_header.sequence_number; - tcp_header.source_port = tcp_header.destination_port; - tcp_header.destination_port = src_port_orig; - tcp_header.sequence_number = tcp_header.acknowledgment_number; - tcp_header.acknowledgment_number = if tcp_header.syn { - seq_num_orig.wrapping_add(1) - } else { - seq_num_orig.wrapping_add(u32::try_from(headers.payload.slice().len()).unwrap_or(0)) - }; - tcp_header.ack = true; - tcp_header.rst = true; - tcp_header.cwr = false; - tcp_header.ns = false; - tcp_header.psh = false; - tcp_header.syn = false; - tcp_header.fin = false; - tcp_header.urg = false; - tcp_header.urgent_pointer = 0; - tcp_header.options = TcpOptions::new(); - tcp_header.checksum = tcp_header - .calc_checksum_ipv4(&ip_header, &[]) - .unwrap_or_default(); - let tcp_header_bytes = tcp_header.to_bytes(); - - #[rustfmt::skip] - let pkt_response = [ - ðernet_header_bytes[..], - &link_exts_bytes[..], - &ip_header_bytes[..], - &tcp_header_bytes[..], - ].concat(); - - socket - .send_to(&pkt_response, remote_socket) - .await - .unwrap_or(0); -} diff --git a/members/nullnet-client/src/ebpf/mod.rs b/members/nullnet-client/src/ebpf/mod.rs new file mode 100644 index 0000000..4f0877c --- /dev/null +++ b/members/nullnet-client/src/ebpf/mod.rs @@ -0,0 +1,178 @@ +//! Host-NIC eBPF default-deny firewall (strict nullnet-only mode). +//! +//! Loads the `nullnet_firewall` TC classifier and attaches it to ingress + +//! egress on the host's primary interface. Only nullnet control-plane (gRPC to +//! the server) and data-plane (VXLAN/forward to known peers) traffic is +//! allowed; everything else on that NIC is dropped. ARP is always allowed +//! (required for next-hop resolution). Peers are added/removed from the `PEERS` +//! map by the control channel as VXLAN/VLAN edges come and go. +//! +//! Loading mirrors the previous aya-based loader: raise the memlock rlimit, +//! `EbpfLoader` with `set_global`, ensure a clsact qdisc, then load+attach the +//! `SchedClassifier`. + +use aya::Ebpf; +use aya::maps::{HashMap as AyaHashMap, MapData}; +use nullnet_liberror::{Error, ErrorHandler, Location, location}; +use std::collections::HashMap; +use std::net::Ipv4Addr; +use std::sync::{Arc, Mutex}; + +const PROGRAM_NAME: &str = "nullnet_firewall"; + +/// Identifies the edge a peer allowlist entry belongs to, so a teardown (which +/// carries only the net id, not the remote IP) can decrement the right peer. +#[derive(Hash, Eq, PartialEq, Clone, Copy)] +pub enum NetId { + Vlan(u16), + Vxlan(u32), +} + +/// Live handle to the attached firewall. Holds the loaded `Ebpf` (dropping it +/// detaches the program) and the peer allowlist shared with the control +/// channel. Keep it alive for the whole run. +pub struct Firewall { + // held only to keep the loaded program + attached links alive (drop = + // detach); never read directly. + #[allow(dead_code)] + bpf: Ebpf, + pub peers: Arc, +} + +/// Refcounted peer allowlist backing the `PEERS` BPF map. Multiple edges can +/// reference the same underlay IP; an IP is only removed from the map once the +/// last edge using it is torn down. +pub struct FirewallPeers { + inner: Mutex, +} + +struct PeerInner { + map: AyaHashMap, + refcounts: HashMap, + by_id: HashMap, +} + +impl PeerInner { + fn incr(&mut self, key: u32) { + let count = self.refcounts.entry(key).or_insert(0); + *count += 1; + if *count == 1 { + let _ = self.map.insert(key, 0u8, 0); + } + } + + fn decr(&mut self, key: u32) { + if let Some(count) = self.refcounts.get_mut(&key) { + *count -= 1; + if *count == 0 { + self.refcounts.remove(&key); + let _ = self.map.remove(&key); + } + } + } +} + +impl FirewallPeers { + fn new(map: AyaHashMap) -> Self { + Self { + inner: Mutex::new(PeerInner { + map, + refcounts: HashMap::new(), + by_id: HashMap::new(), + }), + } + } + + /// Allow data-plane traffic to/from `peer` for edge `id` (refcounted). + pub fn add(&self, id: NetId, peer: Ipv4Addr) { + let key = u32::from(peer); + let mut inner = self.inner.lock().unwrap(); + match inner.by_id.insert(id, key) { + // edge already mapped to this peer: nothing to do + Some(old) if old == key => {} + // edge re-pointed at a different peer: move the refcount + Some(old) => { + inner.decr(old); + inner.incr(key); + } + None => inner.incr(key), + } + } + + /// Drop edge `id`'s reference; removes the peer once no edge needs it. + pub fn remove(&self, id: NetId) { + let mut inner = self.inner.lock().unwrap(); + if let Some(key) = inner.by_id.remove(&id) { + inner.decr(key); + } + } +} + +pub fn enable(iface: &str, server_ip: Ipv4Addr, control_port: u16) -> Result { + use aya::EbpfLoader; + use aya::programs::{SchedClassifier, TcAttachType, tc}; + + raise_memlock_rlimit(); + + // Bind the globals to locals: `set_global` holds a borrow until `load()`, + // so a temporary (e.g. `&u32::from(..)`) would dangle. + let server_ip_be = u32::from(server_ip); + let mut loader = EbpfLoader::new(); + loader.set_global("SERVER_IP", &server_ip_be, true); + loader.set_global("CONTROL_PORT", &control_port, true); + let mut bpf = loader + .load(aya::include_bytes_aligned!(env!( + "NULLNET_BIN_PATH", + "NULLNET_BIN_PATH not set — build via `cargo xtask build`" + ))) + .handle_err(location!())?; + + // clsact carries both the ingress and egress TC hooks; idempotent. + match tc::qdisc_add_clsact(iface) { + Ok(()) => println!("[ebpf] clsact qdisc added on {iface}"), + Err(e) => println!("[ebpf] clsact qdisc add returned: {e} (ok if already present)"), + } + + let program: &mut SchedClassifier = bpf + .program_mut(PROGRAM_NAME) + .ok_or("nullnet_firewall program not found in bytecode") + .handle_err(location!())? + .try_into() + .handle_err(location!())?; + program.load().handle_err(location!())?; + + // Same program enforces both directions; the filter logic is symmetric + // (matches on src or dst), so a single load attached twice suffices and + // keeps one shared PEERS map. + program + .attach(iface, TcAttachType::Ingress) + .handle_err(location!())?; + program + .attach(iface, TcAttachType::Egress) + .handle_err(location!())?; + println!("[ebpf] nullnet_firewall attached to {iface} (ingress + egress)"); + + let peers_map: AyaHashMap = bpf + .take_map("PEERS") + .ok_or("PEERS map not found in bytecode") + .handle_err(location!())? + .try_into() + .handle_err(location!())?; + + Ok(Firewall { + bpf, + peers: Arc::new(FirewallPeers::new(peers_map)), + }) +} + +fn raise_memlock_rlimit() { + let rlim = libc::rlimit { + rlim_cur: libc::RLIM_INFINITY, + rlim_max: libc::RLIM_INFINITY, + }; + let ret = unsafe { libc::setrlimit(libc::RLIMIT_MEMLOCK, &rlim) }; + if ret != 0 { + let err = std::io::Error::last_os_error(); + eprintln!("[ebpf] setrlimit(RLIMIT_MEMLOCK) failed: {err} (eBPF load may fail)"); + } +} diff --git a/members/nullnet-client/src/env.rs b/members/nullnet-client/src/env.rs index 0551c4d..20c599e 100644 --- a/members/nullnet-client/src/env.rs +++ b/members/nullnet-client/src/env.rs @@ -13,10 +13,3 @@ pub static CONTROL_SERVICE_PORT: std::sync::LazyLock = std::sync::LazyLock: str.parse().unwrap_or(50051) }); - -// pub static ETH_NAME: std::sync::LazyLock = std::sync::LazyLock::new(|| { -// std::env::var("ETH_NAME").unwrap_or_else(|_| { -// println!("'ETH_NAME' environment variable not set"); -// "ens18".to_string() -// }) -// }); diff --git a/members/nullnet-client/src/forward/receive.rs b/members/nullnet-client/src/forward/receive.rs index ad90662..66e0944 100644 --- a/members/nullnet-client/src/forward/receive.rs +++ b/members/nullnet-client/src/forward/receive.rs @@ -1,45 +1,24 @@ use std::sync::Arc; -use nullnet_firewall::{Firewall, FirewallAction, FirewallDirection}; use tokio::net::UdpSocket; -use tokio::sync::RwLock; use tun_rs::AsyncDevice; -use crate::craft::reject_payloads::send_termination_message; use crate::forward::frame::Frame; -/// Handles incoming network packets (receives packets from the socket and sends them to the TAP interface), -/// ensuring the firewall rules are correctly observed. -pub async fn receive( - device: &Arc, - socket: &Arc, - firewall: &Arc>, -) { +/// Handles incoming network packets (receives packets from the socket and +/// writes them to the TAP interface). +pub async fn receive(device: &Arc, socket: &Arc) { let mut frame = Frame::new(); - let mut remote_socket; loop { // wait until there is an incoming packet on the socket (packets on the socket are raw IP) - let Ok((s, r)) = socket.recv_from(&mut frame.frame).await else { + let Ok((s, _)) = socket.recv_from(&mut frame.frame).await else { continue; }; - (frame.size, remote_socket) = (s, r); + frame.size = s; if frame.size > 0 { - let pkt_data = frame.pkt_data(); - match firewall - .read() - .await - .resolve_packet(pkt_data, FirewallDirection::IN) - { - FirewallAction::ACCEPT => { - // write packet to the kernel - device.send(pkt_data).await.unwrap_or(0); - } - FirewallAction::REJECT => { - send_termination_message(pkt_data, socket, remote_socket).await; - } - FirewallAction::DENY => {} - } + // write packet to the kernel + device.send(frame.pkt_data()).await.unwrap_or(0); } } } diff --git a/members/nullnet-client/src/forward/send.rs b/members/nullnet-client/src/forward/send.rs index 2854580..26fad06 100644 --- a/members/nullnet-client/src/forward/send.rs +++ b/members/nullnet-client/src/forward/send.rs @@ -1,5 +1,4 @@ use etherparse::{EtherType, LaxPacketHeaders, NetHeaders}; -use nullnet_firewall::{Firewall, FirewallAction, FirewallDirection}; use nullnet_liberror::{Error, ErrorHandler, Location, location}; use std::net::{Ipv4Addr, SocketAddr}; use std::sync::Arc; @@ -10,14 +9,9 @@ use tun_rs::AsyncDevice; use crate::forward::frame::Frame; use crate::peers::peer::{Peers, VethKey}; -/// Handles outgoing network packets (receives packets from the TAP interface and sends them to the socket), -/// ensuring the firewall rules are correctly observed. -pub async fn send( - device: &Arc, - socket: &Arc, - firewall: &Arc>, - peers: Arc>, -) { +/// Handles outgoing network packets (receives packets from the TAP interface +/// and sends them to the socket). +pub async fn send(device: &Arc, socket: &Arc, peers: Arc>) { let mut frame = Frame::new(); loop { // wait until there is a packet outgoing from kernel @@ -29,16 +23,7 @@ pub async fn send( let Ok(dst_socket) = get_dst_socket(pkt_data, &peers).await else { continue; }; - match firewall - .read() - .await - .resolve_packet(pkt_data, FirewallDirection::OUT) - { - FirewallAction::ACCEPT => { - socket.send_to(pkt_data, dst_socket).await.unwrap_or(0); - } - FirewallAction::DENY | FirewallAction::REJECT => {} - } + socket.send_to(pkt_data, dst_socket).await.unwrap_or(0); } } } diff --git a/members/nullnet-client/src/main.rs b/members/nullnet-client/src/main.rs index 1232b6d..6dcd53f 100644 --- a/members/nullnet-client/src/main.rs +++ b/members/nullnet-client/src/main.rs @@ -1,7 +1,9 @@ #![allow(clippy::used_underscore_binding)] use crate::cli::Args; -use crate::commands::{RtNetLinkHandle, cleanup_network, setup_br0}; +use crate::commands::{ + RtNetLinkHandle, cleanup_network, find_ethernet_interface, find_ethernet_ip, setup_br0, +}; use crate::control_channel::control_channel; use crate::env::{CONTROL_SERVICE_ADDR, CONTROL_SERVICE_PORT}; use crate::forward::receive::receive; @@ -11,19 +13,15 @@ use crate::local_endpoints::LocalEndpoints; use crate::peers::peer::Peers; use crate::triggers::TriggersState; use clap::Parser; -use notify::{Config, Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher}; -use nullnet_firewall::{DataLink, Firewall, FirewallError, LogLevel}; use nullnet_grpc_lib::NullnetGrpcInterface; use nullnet_grpc_lib::nullnet_grpc::{ - AgentEvent, AgentFirewallRulesLoadFailed, AgentServicesListUpdateFailed, - AgentServicesListUpdated, Net, Services, agent_event::Event as AgentEventKind, + AgentEvent, AgentServicesListUpdateFailed, AgentServicesListUpdated, Net, Services, + agent_event::Event as AgentEventKind, }; use nullnet_liberror::{Error, ErrorHandler, Location, location}; use std::collections::HashMap; -use std::ops::Sub; -use std::path::PathBuf; use std::sync::Arc; -use std::time::{Duration, Instant}; +use std::time::Duration; use std::{panic, process}; use tokio::sync::mpsc::UnboundedSender; use tokio::sync::{Notify, RwLock}; @@ -32,7 +30,7 @@ use tun_rs::{DeviceBuilder, Layer}; mod cli; mod commands; mod control_channel; -mod craft; +mod ebpf; mod env; mod forward; mod host_mappings; @@ -64,11 +62,7 @@ async fn main() -> Result<(), Error> { })); // read CLI arguments - let Args { - firewall_path, - num_tasks, - .. - } = Args::parse(); + let Args { num_tasks, .. } = Args::parse(); // create a handle to execute netlink commands let rtnetlink_handle = RtNetLinkHandle::new()?; @@ -80,13 +74,6 @@ async fn main() -> Result<(), Error> { let peers = Arc::new(RwLock::new(Peers::default())); let peers_2 = peers.clone(); - // create firewall based on the defined rules - let mut firewall = Firewall::new(); - firewall.log_level(LogLevel::Db); - firewall.data_link(DataLink::Ethernet); - let firewall_shared = Arc::new(RwLock::new(firewall)); - set_firewall_rules(&firewall_shared, &firewall_path, true, None).await?; - // initialize gRPC connection let grpc_server = grpc_init().await?; let grpc_server2 = grpc_server.clone(); @@ -95,12 +82,28 @@ async fn main() -> Result<(), Error> { let net_type = grpc_server.network_type().await.handle_err(location!())?; if net_type.net() == Net::Vlan { - setup_tap(num_tasks, peers, &firewall_shared, &rtnetlink_handle).await?; + setup_tap(num_tasks, peers, &rtnetlink_handle).await?; setup_br0(&rtnetlink_handle).await; } print_info(net_type.net()); + // bring up the host-NIC eBPF default-deny firewall. Must happen before the + // control channel learns peers so its add()/remove() land in the PEERS map. + // Held alive for the whole run (drop = detach). Fails closed: any error + // aborts startup rather than running unprotected. + let ebpf_firewall = match setup_ebpf_firewall(&rtnetlink_handle).await { + Ok(fw) => { + println!("eBPF host firewall enabled (strict nullnet-only)"); + fw + } + Err(e) => { + eprintln!("Failed to enable eBPF firewall: {e:?}"); + process::exit(1); + } + }; + let firewall_peers = ebpf_firewall.peers.clone(); + // shared dedup + waiter state, keyed by (initiator_container, port). // The NFQUEUE listener marks Pending and awaits the Notify; the control // channel marks Active when the matching VxlanSetup lands. @@ -118,6 +121,7 @@ async fn main() -> Result<(), Error> { rtnetlink_handle, triggers_state_cc, host_mappings_state, + firewall_peers, ) .await { @@ -152,23 +156,15 @@ async fn main() -> Result<(), Error> { ); // declare services + push the port→service map to the NFQUEUE listener - // on each refresh. Clone the grpc handle: the original is still needed - // below for `set_firewall_rules`' event reporting. - let grpc_server_ds = grpc_server.clone(); + // on each refresh. tokio::spawn(async move { - declare_services(grpc_server_ds, config_tx, docker_changed) + declare_services(grpc_server, config_tx, docker_changed) .await .expect("Failed to declare services"); }); - // watch the file defining rules and update the firewall accordingly - set_firewall_rules( - &firewall_shared, - &firewall_path, - false, - Some(grpc_server.clone()), - ) - .await?; + // all work runs in the spawned tasks above; keep the process alive. + std::future::pending::<()>().await; Ok(()) } @@ -181,91 +177,46 @@ fn print_info(net: Net) { println!("{}\n", "=".repeat(40)); } -/// Loads and refreshes firewall rules whenever the corresponding file is updated. -/// `grpc` is only used in the reload path (watch loop); initial load happens before gRPC is up. -async fn set_firewall_rules( - firewall: &Arc>, - firewall_path: &str, - is_init: bool, - grpc: Option, -) -> Result<(), Error> { - let print_info = |result: &Result<(), FirewallError>, is_init: bool| match result { - Err(err) => { - println!("{err}"); - if is_init { - println!("Waiting for a valid firewall file..."); - } else { - println!("Firewall was not updated!"); - } - } - Ok(()) => { - if is_init { - println!("A valid firewall has been instantiated!"); - } else { - println!("Firewall has been updated!"); - } - } - }; - - if is_init { - let result = firewall.write().await.set_rules(firewall_path); - print_info(&result, is_init); - if result.is_ok() { - return Ok(()); - } +/// Resolve, attach, and return the host-NIC eBPF firewall. Fails closed: any +/// problem (unresolvable server, missing NIC, load error) aborts startup rather +/// than running unprotected. +async fn setup_ebpf_firewall(rtnetlink_handle: &RtNetLinkHandle) -> Result { + let server_ip = resolve_server_ip() + .ok_or("could not resolve CONTROL_SERVICE_ADDR to an IPv4 address") + .handle_err(location!())?; + if server_ip.is_unspecified() { + return Err( + "CONTROL_SERVICE_ADDR is unspecified (0.0.0.0); refusing to enable the \ + firewall as it would block the control plane", + ) + .handle_err(location!()); } - let firewall_path_owned = firewall_path.to_string(); - let mut firewall_directory = PathBuf::from(firewall_path); - firewall_directory.pop(); - - let (tx, rx) = std::sync::mpsc::channel(); - let mut watcher = RecommendedWatcher::new(tx, Config::default()).handle_err(location!())?; - watcher - .watch(&firewall_directory, RecursiveMode::Recursive) + let eth_ip = find_ethernet_ip(rtnetlink_handle) + .await + .ok_or("could not find the local ethernet IP") + .handle_err(location!())?; + let iface = find_ethernet_interface(eth_ip) + .ok_or("could not find the ethernet interface name") .handle_err(location!())?; - let mut last_update_time = Instant::now().sub(Duration::from_secs(60)); + let control_port = *CONTROL_SERVICE_PORT; + println!("Attaching eBPF firewall to {iface} (allow control plane {server_ip}:{control_port})"); + ebpf::enable(&iface, server_ip, control_port) +} - loop { - // only update rules if the event is related to a file change - if let Ok(Ok(Event { - kind: EventKind::Modify(_), - .. - })) = rx.recv() - { - // debounce duplicated events - if last_update_time.elapsed().as_millis() > 100 { - // ensure file changes are propagated - tokio::time::sleep(Duration::from_millis(100)).await; - let result = firewall.write().await.set_rules(&firewall_path_owned); - print_info(&result, is_init); - if let Err(ref err) = result - && let Some(ref g) = grpc - { - let g = g.clone(); - let path = firewall_path_owned.clone(); - let error_message = err.to_string(); - tokio::spawn(async move { - let _ = g - .report_event(AgentEvent { - event: Some(AgentEventKind::FirewallRulesLoadFailed( - AgentFirewallRulesLoadFailed { - path, - error_message, - }, - )), - }) - .await; - }); - } - if result.is_ok() && is_init { - return Ok(()); - } - last_update_time = Instant::now(); - } - } - } +/// Resolve `CONTROL_SERVICE_ADDR:CONTROL_SERVICE_PORT` to its first IPv4. +fn resolve_server_ip() -> Option { + use std::net::{IpAddr, ToSocketAddrs}; + let host = CONTROL_SERVICE_ADDR.as_str(); + let port = *CONTROL_SERVICE_PORT; + (host, port) + .to_socket_addrs() + .ok()? + .find_map(|sa| match sa.ip() { + IpAddr::V4(v4) => Some(v4), + IpAddr::V6(_) => None, + }) } async fn grpc_init() -> Result { @@ -439,7 +390,6 @@ async fn get_running_docker_containers() -> HashMap> { async fn setup_tap( num_tasks: u8, peers: Arc>, - firewall_shared: &Arc>, rtnetlink_handle: &RtNetLinkHandle, ) -> Result<(), Error> { // set up the local environment @@ -464,18 +414,16 @@ async fn setup_tap( let reader = reader_shared.clone(); let socket_1 = forward_socket.clone(); let socket_2 = socket_1.clone(); - let firewall_1 = firewall_shared.clone(); - let firewall_2 = firewall_shared.clone(); let peers_2 = peers.clone(); // handle incoming traffic tokio::spawn(async move { - Box::pin(receive(&writer, &socket_1, &firewall_1)).await; + Box::pin(receive(&writer, &socket_1)).await; }); // handle outgoing traffic tokio::spawn(async move { - Box::pin(send(&reader, &socket_2, &firewall_2, peers_2)).await; + Box::pin(send(&reader, &socket_2, peers_2)).await; }); }