From 7d28cc386dd39eb433c06f155b9de9639cfd72f8 Mon Sep 17 00:00:00 2001 From: Jack Fletcher Date: Sat, 2 May 2026 16:07:47 -0700 Subject: [PATCH 1/2] http_server: run internal HTTP server on dedicated worker thread The internal HTTP server was hardcoded to run on the main engine event loop (`use_caller_event_loop=FLB_TRUE`). This causes a deadlock when the exec input plugin curls the server's own endpoints, since the event loop blocks waiting for the child process while the server cannot respond on the same blocked loop. Before the v5.0 migration from Monkey to `flb_http_server`, Monkey ran on its own thread via `mk_start()`, so this pattern worked fine. Give the internal HTTP server its own worker thread by setting `use_caller_event_loop=FLB_FALSE`, using the same runtime path that HTTP input plugins already use. Fixes https://github.com/fluent/fluent-bit/issues/11769 Signed-off-by: Jack Fletcher --- src/http_server/flb_hs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/http_server/flb_hs.c b/src/http_server/flb_hs.c index f49d0fcb2e2..74aaeee9d17 100644 --- a/src/http_server/flb_hs.c +++ b/src/http_server/flb_hs.c @@ -370,9 +370,9 @@ struct flb_hs *flb_hs_create(const char *listen, const char *tcp_port, options.networking_flags = 0; flb_net_setup_init(&hs->net_setup); options.networking_setup = &hs->net_setup; - options.event_loop = config->evl; + options.event_loop = NULL; options.system_context = config; - options.use_caller_event_loop = FLB_TRUE; + options.use_caller_event_loop = FLB_FALSE; ret = flb_http_server_init_with_options(&hs->server, &options); if (ret != 0) { From 4bfb9c60dc3412e35e0e0177e71eb1fd7007b3a3 Mon Sep 17 00:00:00 2001 From: Jack Fletcher Date: Sat, 2 May 2026 16:09:16 -0700 Subject: [PATCH 2/2] tests: add integration test for internal HTTP server exec deadlock Add a test that verifies the internal HTTP server remains responsive after the exec input plugin makes a request to its own endpoints. Ref https://github.com/fluent/fluent-bit/issues/11769 Signed-off-by: Jack Fletcher --- .../internal_http_server_exec_deadlock.yaml | 19 ++++++ ..._internal_http_server_exec_deadlock_001.py | 63 +++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 tests/integration/scenarios/internal_http_server/config/internal_http_server_exec_deadlock.yaml create mode 100644 tests/integration/scenarios/internal_http_server/tests/test_internal_http_server_exec_deadlock_001.py diff --git a/tests/integration/scenarios/internal_http_server/config/internal_http_server_exec_deadlock.yaml b/tests/integration/scenarios/internal_http_server/config/internal_http_server_exec_deadlock.yaml new file mode 100644 index 00000000000..27840f7a5f9 --- /dev/null +++ b/tests/integration/scenarios/internal_http_server/config/internal_http_server_exec_deadlock.yaml @@ -0,0 +1,19 @@ +service: + flush: 1 + grace: 1 + log_level: info + http_server: on + http_listen: 127.0.0.1 + http_port: ${FLUENT_BIT_HTTP_MONITORING_PORT} + +pipeline: + inputs: + - name: exec + tag: test + command: curl -s http://127.0.0.1:${FLUENT_BIT_HTTP_MONITORING_PORT}/api/v1/metrics/prometheus + interval_sec: 1 + buf_size: 128k + + outputs: + - name: "null" + match: "*" diff --git a/tests/integration/scenarios/internal_http_server/tests/test_internal_http_server_exec_deadlock_001.py b/tests/integration/scenarios/internal_http_server/tests/test_internal_http_server_exec_deadlock_001.py new file mode 100644 index 00000000000..4db4ece5087 --- /dev/null +++ b/tests/integration/scenarios/internal_http_server/tests/test_internal_http_server_exec_deadlock_001.py @@ -0,0 +1,63 @@ +import os +import time + +from utils.http_matrix import run_curl_request +from utils.test_service import FluentBitTestService + + +class Service: + def __init__(self): + self.config_file = os.path.abspath( + os.path.join(os.path.dirname(__file__), "../config/internal_http_server_exec_deadlock.yaml") + ) + self.service = FluentBitTestService(self.config_file) + + def start(self): + self.service.start() + self.flb = self.service.flb + self.base_url = f"http://127.0.0.1:{self.flb.http_monitoring_port}" + + def stop(self): + self.service.stop() + + def request(self, path, *, method="GET", http_mode="http1.1"): + return run_curl_request( + f"{self.base_url}{path}", + method=method, + http_mode=http_mode, + ) + + +def test_http_server_responsive_after_exec_self_request(): + """The built-in HTTP server must remain responsive after the exec input + plugin makes an HTTP request to it. Before the fix, the exec child + process (curl) and the HTTP server shared the same event loop, causing + a deadlock that made the server permanently unresponsive.""" + + service = Service() + service.start() + + try: + # Verify the server works before exec fires + result = service.request("/api/v1/uptime") + assert result["status_code"] == 200 + assert "uptime_sec" in result["body"] + + # Wait for exec to fire (interval_sec=1) plus a small buffer + time.sleep(2) + + # Verify the server is still responsive after exec has fired + result = service.service.wait_for_condition( + lambda: ( + response + if response["status_code"] == 200 and "uptime_sec" in response["body"] + else None + ) if (response := service.request("/api/v1/uptime")) else None, + timeout=10, + interval=1, + description="HTTP server responsive after exec self-request", + ) + assert result["status_code"] == 200 + assert "uptime_sec" in result["body"] + finally: + service.stop()