diff --git a/ext/extconf.rb b/ext/extconf.rb index 87985c7..e4f7bad 100755 --- a/ext/extconf.rb +++ b/ext/extconf.rb @@ -49,6 +49,7 @@ have_header("sys/wait.h") have_header("sys/eventfd.h") +have_header("sys/signalfd.h") $srcs << "io/event/interrupt.c" have_func("rb_io_descriptor") diff --git a/ext/io/event/selector/epoll.c b/ext/io/event/selector/epoll.c index 6ef7a3f..8765703 100644 --- a/ext/io/event/selector/epoll.c +++ b/ext/io/event/selector/epoll.c @@ -12,6 +12,7 @@ #include "pidfd.c" #include "../interrupt.h" +#include "process_wait_signalfd.c" enum { DEBUG = 0, @@ -476,43 +477,118 @@ VALUE process_wait_ensure(VALUE _arguments) { struct IO_Event_List_Type IO_Event_Selector_EPoll_process_wait_list_type = {}; +#ifdef HAVE_SYS_SIGNALFD_H +struct process_wait_signalfd_arguments { + struct IO_Event_Selector_EPoll *selector; + struct IO_Event_Selector_EPoll_Waiting waiting; + pid_t pid; + int flags; + int descriptor; + VALUE fiber; + sigset_t old_mask; +}; + +static +VALUE process_wait_signalfd_transfer(VALUE _arguments) { + struct process_wait_signalfd_arguments *arguments = (struct process_wait_signalfd_arguments *)_arguments; + + while (1) { + arguments->waiting = (struct IO_Event_Selector_EPoll_Waiting){ + .list = {.type = &IO_Event_Selector_EPoll_process_wait_list_type}, + .fiber = arguments->fiber, + .events = IO_EVENT_READABLE, + }; + + int result = IO_Event_Selector_EPoll_Waiting_register(arguments->selector, PIDT2NUM(arguments->pid), arguments->descriptor, &arguments->waiting); + if (result == -1) { + rb_sys_fail("process_wait_signalfd:IO_Event_Selector_EPoll_Waiting_register"); + } + + IO_Event_Selector_loop_yield(&arguments->selector->backend); + + IO_Event_Selector_EPoll_Waiting_cancel(&arguments->waiting); + + if (!arguments->waiting.ready) return Qfalse; + + VALUE status = process_wait_signalfd_check(arguments->descriptor, arguments->pid, arguments->flags); + if (status != Qnil) return status; + } +} + +static +VALUE process_wait_signalfd_ensure(VALUE _arguments) { + struct process_wait_signalfd_arguments *arguments = (struct process_wait_signalfd_arguments *)_arguments; + + IO_Event_List_free(&arguments->waiting.list); + arguments->waiting.fiber = 0; + + process_wait_signalfd_close(arguments->descriptor, &arguments->old_mask); + + return Qnil; +} +#endif + VALUE IO_Event_Selector_EPoll_process_wait(VALUE self, VALUE fiber, VALUE _pid, VALUE _flags) { struct IO_Event_Selector_EPoll *selector = NULL; TypedData_Get_Struct(self, struct IO_Event_Selector_EPoll, &IO_Event_Selector_EPoll_Type, selector); - + pid_t pid = NUM2PIDT(_pid); int flags = NUM2INT(_flags); - + int descriptor = pidfd_open(pid, 0); - + if (descriptor == -1) { +#ifdef HAVE_SYS_SIGNALFD_H + if (errno == EPERM) { + // pidfd_open can fail with EPERM inside confined environments (e.g. snap). + // Fall back to signalfd with SIGCHLD: + VALUE status; + sigset_t old_mask; + int signalfd_descriptor = process_wait_signalfd_open(pid, flags, &old_mask, &status); + if (signalfd_descriptor < 0) return status; + + struct process_wait_signalfd_arguments signalfd_arguments = { + .selector = selector, + .waiting = {}, + .pid = pid, + .flags = flags, + .descriptor = signalfd_descriptor, + .fiber = fiber, + .old_mask = old_mask, + }; + + RB_OBJ_WRITTEN(self, Qundef, fiber); + + return rb_ensure(process_wait_signalfd_transfer, (VALUE)&signalfd_arguments, process_wait_signalfd_ensure, (VALUE)&signalfd_arguments); + } +#endif rb_sys_fail("IO_Event_Selector_EPoll_process_wait:pidfd_open"); } - + rb_update_max_fd(descriptor); - + // `pidfd_open` (above) may be edge triggered, so we need to check if the process is already exited, and if so, return immediately, otherwise we will block indefinitely. VALUE status = IO_Event_Selector_process_status_wait(pid, flags); if (status != Qnil) { close(descriptor); return status; } - + struct IO_Event_Selector_EPoll_Waiting waiting = { .list = {.type = &IO_Event_Selector_EPoll_process_wait_list_type}, .fiber = fiber, .events = IO_EVENT_READABLE, }; - + RB_OBJ_WRITTEN(self, Qundef, fiber); - + int result = IO_Event_Selector_EPoll_Waiting_register(selector, _pid, descriptor, &waiting); - + if (result == -1) { close(descriptor); rb_sys_fail("IO_Event_Selector_EPoll_process_wait:IO_Event_Selector_EPoll_Waiting_register"); } - + struct process_wait_arguments process_wait_arguments = { .selector = selector, .pid = pid, @@ -520,7 +596,7 @@ VALUE IO_Event_Selector_EPoll_process_wait(VALUE self, VALUE fiber, VALUE _pid, .descriptor = descriptor, .waiting = &waiting, }; - + return rb_ensure(process_wait_transfer, (VALUE)&process_wait_arguments, process_wait_ensure, (VALUE)&process_wait_arguments); } diff --git a/ext/io/event/selector/process_wait_signalfd.c b/ext/io/event/selector/process_wait_signalfd.c new file mode 100644 index 0000000..aa8ce21 --- /dev/null +++ b/ext/io/event/selector/process_wait_signalfd.c @@ -0,0 +1,64 @@ +// Released under the MIT License. +// Copyright, 2026, by Samuel Williams. + +// Fallback for process_wait when pidfd_open(2) returns EPERM, e.g. inside snap +// confinement (pre-snapd 2.75). Uses signalfd(2) + SIGCHLD instead. +// +// Included (not compiled separately) by epoll.c and uring.c, like pidfd.c. + +#ifdef HAVE_SYS_SIGNALFD_H +#include +#include + +// Block SIGCHLD for this thread and create a signalfd. +// +// If the process has already exited, stores the status in *result and returns -1. +// Otherwise returns the signalfd descriptor (>= 0). +static int +process_wait_signalfd_open(pid_t pid, int flags, sigset_t *old_mask, VALUE *result) +{ + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, SIGCHLD); + pthread_sigmask(SIG_BLOCK, &mask, old_mask); + + int descriptor = signalfd(-1, &mask, SFD_CLOEXEC | SFD_NONBLOCK); + if (descriptor == -1) { + pthread_sigmask(SIG_SETMASK, old_mask, NULL); + rb_sys_fail("process_wait_signalfd_open:signalfd"); + } + rb_update_max_fd(descriptor); + + // Check if the process has already exited: + *result = IO_Event_Selector_process_status_wait(pid, flags); + if (*result != Qnil) { + close(descriptor); + pthread_sigmask(SIG_SETMASK, old_mask, NULL); + return -1; + } + + return descriptor; +} + +// Drain the signalfd and check whether a specific process has exited. +// +// Returns the process status, or Qnil if it hasn't exited yet (the SIGCHLD was +// for a different child). +static VALUE +process_wait_signalfd_check(int descriptor, pid_t pid, int flags) +{ + struct signalfd_siginfo info; + while (read(descriptor, &info, sizeof(info)) > 0) {} + + return IO_Event_Selector_process_status_wait(pid, flags); +} + +// Close the signalfd and restore the original signal mask. +static void +process_wait_signalfd_close(int descriptor, sigset_t *old_mask) +{ + close(descriptor); + pthread_sigmask(SIG_SETMASK, old_mask, NULL); +} + +#endif diff --git a/ext/io/event/selector/uring.c b/ext/io/event/selector/uring.c index d342742..4c86070 100644 --- a/ext/io/event/selector/uring.c +++ b/ext/io/event/selector/uring.c @@ -12,6 +12,7 @@ #include #include "pidfd.c" +#include "process_wait_signalfd.c" #include @@ -457,27 +458,99 @@ VALUE process_wait_ensure(VALUE _arguments) { return Qnil; } +#ifdef HAVE_SYS_SIGNALFD_H +struct process_wait_signalfd_arguments { + struct IO_Event_Selector_URing *selector; + struct IO_Event_Selector_URing_Waiting waiting; + pid_t pid; + int flags; + int descriptor; + VALUE fiber; + sigset_t old_mask; +}; + +static +VALUE process_wait_signalfd_transfer(VALUE _arguments) { + struct process_wait_signalfd_arguments *arguments = (struct process_wait_signalfd_arguments *)_arguments; + + while (1) { + arguments->waiting = (struct IO_Event_Selector_URing_Waiting){.fiber = arguments->fiber}; + + struct IO_Event_Selector_URing_Completion *completion = IO_Event_Selector_URing_Completion_acquire(arguments->selector, &arguments->waiting); + + struct io_uring_sqe *sqe = io_get_sqe(arguments->selector); + io_uring_prep_poll_add(sqe, arguments->descriptor, POLLIN|POLLHUP|POLLERR); + io_uring_sqe_set_data(sqe, completion); + io_uring_submit_pending(arguments->selector); + + IO_Event_Selector_loop_yield(&arguments->selector->backend); + + IO_Event_Selector_URing_Waiting_cancel(&arguments->waiting); + + if (!arguments->waiting.result) return Qfalse; + + VALUE status = process_wait_signalfd_check(arguments->descriptor, arguments->pid, arguments->flags); + if (status != Qnil) return status; + } +} + +static +VALUE process_wait_signalfd_ensure(VALUE _arguments) { + struct process_wait_signalfd_arguments *arguments = (struct process_wait_signalfd_arguments *)_arguments; + + IO_Event_Selector_URing_Waiting_cancel(&arguments->waiting); + + process_wait_signalfd_close(arguments->descriptor, &arguments->old_mask); + + return Qnil; +} +#endif + VALUE IO_Event_Selector_URing_process_wait(VALUE self, VALUE fiber, VALUE _pid, VALUE _flags) { struct IO_Event_Selector_URing *selector = NULL; TypedData_Get_Struct(self, struct IO_Event_Selector_URing, &IO_Event_Selector_URing_Type, selector); - + pid_t pid = NUM2PIDT(_pid); int flags = NUM2INT(_flags); - + int descriptor = pidfd_open(pid, 0); if (descriptor < 0) { +#ifdef HAVE_SYS_SIGNALFD_H + if (errno == EPERM) { + // pidfd_open can fail with EPERM inside confined environments (e.g. snap). + // Fall back to signalfd with SIGCHLD: + VALUE status; + sigset_t old_mask; + int signalfd_descriptor = process_wait_signalfd_open(pid, flags, &old_mask, &status); + if (signalfd_descriptor < 0) return status; + + struct process_wait_signalfd_arguments signalfd_arguments = { + .selector = selector, + .waiting = {}, + .pid = pid, + .flags = flags, + .descriptor = signalfd_descriptor, + .fiber = fiber, + .old_mask = old_mask, + }; + + RB_OBJ_WRITTEN(self, Qundef, fiber); + + return rb_ensure(process_wait_signalfd_transfer, (VALUE)&signalfd_arguments, process_wait_signalfd_ensure, (VALUE)&signalfd_arguments); + } +#endif rb_syserr_fail(errno, "IO_Event_Selector_URing_process_wait:pidfd_open"); } rb_update_max_fd(descriptor); - + struct IO_Event_Selector_URing_Waiting waiting = { .fiber = fiber, }; - + RB_OBJ_WRITTEN(self, Qundef, fiber); - + struct IO_Event_Selector_URing_Completion *completion = IO_Event_Selector_URing_Completion_acquire(selector, &waiting); - + struct process_wait_arguments process_wait_arguments = { .selector = selector, .waiting = &waiting, @@ -485,13 +558,13 @@ VALUE IO_Event_Selector_URing_process_wait(VALUE self, VALUE fiber, VALUE _pid, .flags = flags, .descriptor = descriptor, }; - + if (DEBUG) fprintf(stderr, "IO_Event_Selector_URing_process_wait:io_uring_prep_poll_add(%p)\n", (void*)fiber); struct io_uring_sqe *sqe = io_get_sqe(selector); io_uring_prep_poll_add(sqe, descriptor, POLLIN|POLLHUP|POLLERR); io_uring_sqe_set_data(sqe, completion); io_uring_submit_pending(selector); - + return rb_ensure(process_wait_transfer, (VALUE)&process_wait_arguments, process_wait_ensure, (VALUE)&process_wait_arguments); } diff --git a/gems.rb b/gems.rb index f6bc856..488a2b0 100644 --- a/gems.rb +++ b/gems.rb @@ -29,4 +29,5 @@ gem "bake-test" gem "bake-test-external" gem "async" + gem "fiddle" end diff --git a/test/io/event/selector/process_wait_signalfd.rb b/test/io/event/selector/process_wait_signalfd.rb new file mode 100644 index 0000000..cab2c66 --- /dev/null +++ b/test/io/event/selector/process_wait_signalfd.rb @@ -0,0 +1,171 @@ +# frozen_string_literal: true + +# Released under the MIT License. +# Copyright, 2026, by Samuel Williams. + +require "io/event" +require "io/event/selector" + +require "fiddle" + +# Install a seccomp-BPF filter that makes pidfd_open(2) return EPERM. +# This simulates snap confinement (pre-snapd 2.75) where the seccomp profile +# blocks pidfd_open via its seccomp filter. +# +# MUST be called after fork — the filter applies to the calling process and all +# future children. +def install_pidfd_open_seccomp_block + libc = Fiddle.dlopen(nil) + prctl = Fiddle::Function.new( + libc["prctl"], + [Fiddle::TYPE_INT, Fiddle::TYPE_LONG, Fiddle::TYPE_LONG, Fiddle::TYPE_LONG, Fiddle::TYPE_LONG], + Fiddle::TYPE_INT + ) + + # PR_SET_NO_NEW_PRIVS is required before installing a seccomp filter. + raise "prctl(PR_SET_NO_NEW_PRIVS) failed" unless prctl.call(38, 1, 0, 0, 0) == 0 + + # BPF program (4 instructions): + # ld [0] — load syscall number from seccomp_data + # jeq #434, 0, 1 — if pidfd_open, fall through; else skip to ALLOW + # ret #ERRNO|EPERM — return EPERM + # ret #ALLOW — allow the syscall + filter = [ + [0x20, 0, 0, 0], + [0x15, 0, 1, 434], + [0x06, 0, 0, 0x00050001], + [0x06, 0, 0, 0x7fff0000], + ].map { |code, jt, jf, k| [code, jt, jf, k].pack("vCCV") }.join + + filter_ptr = Fiddle::Pointer.malloc(filter.bytesize) + filter_ptr[0, filter.bytesize] = filter + + # struct sock_fprog { unsigned short len; struct sock_filter *filter; } + padding = Fiddle::SIZEOF_VOIDP - 2 + pack_ptr = Fiddle::SIZEOF_VOIDP == 8 ? "Q" : "L" + prog = [4].pack("S") + ("\0" * padding) + [filter_ptr.to_i].pack(pack_ptr) + + prog_ptr = Fiddle::Pointer.malloc(prog.bytesize) + prog_ptr[0, prog.bytesize] = prog + + # PR_SET_SECCOMP = 22, SECCOMP_MODE_FILTER = 2 + raise "prctl(PR_SET_SECCOMP) failed" unless prctl.call(22, 2, prog_ptr.to_i, 0, 0) == 0 +end + +ProcessWaitSignalfd = Sus::Shared("process wait signalfd fallback") do + it "can wait for a process that has already exited" do + child = fork do + begin + install_pidfd_open_seccomp_block + rescue => error + $stderr.puts "Seccomp not available: #{error}" + exit!(2) + end + + loop_fiber = Fiber.current + sel = subject.new(loop_fiber) + result = nil + + fiber = Fiber.new do + pid = Process.spawn("true") + result = sel.process_wait(Fiber.current, pid, 0) + end + + fiber.transfer + + while fiber.alive? + sel.select(1) + end + + sel.close + exit!(result&.success? ? 0 : 1) + end + + _, status = Process.wait2(child) + skip_unless_seccomp_available(status) + expect(status.success?).to be == true + end + + it "can wait for a process that is still running" do + child = fork do + begin + install_pidfd_open_seccomp_block + rescue => error + $stderr.puts "Seccomp not available: #{error}" + exit!(2) + end + + loop_fiber = Fiber.current + sel = subject.new(loop_fiber) + result = nil + + fiber = Fiber.new do + pid = Process.spawn("sleep 0.01") + result = sel.process_wait(Fiber.current, pid, 0) + end + + fiber.transfer + + while fiber.alive? + sel.select(1) + end + + sel.close + exit!(result&.success? ? 0 : 1) + end + + _, status = Process.wait2(child) + skip_unless_seccomp_available(status) + expect(status.success?).to be == true + end + + it "can wait for two processes sequentially" do + child = fork do + begin + install_pidfd_open_seccomp_block + rescue => error + $stderr.puts "Seccomp not available: #{error}" + exit!(2) + end + + loop_fiber = Fiber.current + sel = subject.new(loop_fiber) + result1 = result2 = nil + + fiber = Fiber.new do + pid1 = Process.spawn("sleep 0") + pid2 = Process.spawn("sleep 0") + + result1 = sel.process_wait(Fiber.current, pid1, 0) + result2 = sel.process_wait(Fiber.current, pid2, 0) + end + + fiber.transfer + + while fiber.alive? + sel.select(1) + end + + sel.close + exit!(result1&.success? && result2&.success? ? 0 : 1) + end + + _, status = Process.wait2(child) + skip_unless_seccomp_available(status) + expect(status.success?).to be == true + end + + def skip_unless_seccomp_available(status) + skip "seccomp filter not available" if status.exitstatus == 2 + end +end + +IO::Event::Selector.constants.each do |name| + next unless name == :EPoll || name == :URing + + klass = IO::Event::Selector.const_get(name) + + describe(klass, unique: "#{name}_signalfd") do + it_behaves_like ProcessWaitSignalfd + end +end