From b9d023ed257c10719753be6b3cf960019fe87fa0 Mon Sep 17 00:00:00 2001 From: Eduardo Silva Date: Wed, 6 May 2026 21:41:34 -0600 Subject: [PATCH 1/2] in_exec: run collector in input thread Signed-off-by: Eduardo Silva --- plugins/in_exec/in_exec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/plugins/in_exec/in_exec.c b/plugins/in_exec/in_exec.c index 8a770d8720a..79aad4b2d58 100644 --- a/plugins/in_exec/in_exec.c +++ b/plugins/in_exec/in_exec.c @@ -482,6 +482,7 @@ static struct flb_config_map config_map[] = { struct flb_input_plugin in_exec_plugin = { .name = "exec", .description = "Exec Input", + .flags = FLB_INPUT_THREADED, .cb_init = in_exec_init, .cb_pre_run = in_exec_prerun, .cb_collect = in_exec_collect, From 4482c184802a347374edb6830202ec0001619a8c Mon Sep 17 00:00:00 2001 From: Eduardo Silva Date: Wed, 6 May 2026 21:41:41 -0600 Subject: [PATCH 2/2] tests: integration: cover exec self-request deadlock Signed-off-by: Eduardo Silva --- .../internal_http_server_exec_deadlock.yaml | 19 ++++++ ..._internal_http_server_exec_deadlock_001.py | 64 +++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 tests/integration/scenarios/internal_http_server/config/internal_http_server_exec_deadlock.yaml create mode 100644 tests/integration/scenarios/internal_http_server/tests/test_internal_http_server_exec_deadlock_001.py diff --git a/tests/integration/scenarios/internal_http_server/config/internal_http_server_exec_deadlock.yaml b/tests/integration/scenarios/internal_http_server/config/internal_http_server_exec_deadlock.yaml new file mode 100644 index 00000000000..27840f7a5f9 --- /dev/null +++ b/tests/integration/scenarios/internal_http_server/config/internal_http_server_exec_deadlock.yaml @@ -0,0 +1,19 @@ +service: + flush: 1 + grace: 1 + log_level: info + http_server: on + http_listen: 127.0.0.1 + http_port: ${FLUENT_BIT_HTTP_MONITORING_PORT} + +pipeline: + inputs: + - name: exec + tag: test + command: curl -s http://127.0.0.1:${FLUENT_BIT_HTTP_MONITORING_PORT}/api/v1/metrics/prometheus + interval_sec: 1 + buf_size: 128k + + outputs: + - name: "null" + match: "*" diff --git a/tests/integration/scenarios/internal_http_server/tests/test_internal_http_server_exec_deadlock_001.py b/tests/integration/scenarios/internal_http_server/tests/test_internal_http_server_exec_deadlock_001.py new file mode 100644 index 00000000000..d9b68df79f4 --- /dev/null +++ b/tests/integration/scenarios/internal_http_server/tests/test_internal_http_server_exec_deadlock_001.py @@ -0,0 +1,64 @@ +import os +import time + +from utils.http_matrix import run_curl_request +from utils.test_service import FluentBitTestService + + +class Service: + def __init__(self): + self.config_file = os.path.abspath( + os.path.join( + os.path.dirname(__file__), + "../config/internal_http_server_exec_deadlock.yaml", + ) + ) + self.service = FluentBitTestService(self.config_file) + + def start(self): + self.service.start() + self.flb = self.service.flb + self.base_url = f"http://127.0.0.1:{self.flb.http_monitoring_port}" + + def stop(self): + self.service.stop() + + def request(self, path, *, method="GET", http_mode="http1.1"): + return run_curl_request( + f"{self.base_url}{path}", + method=method, + http_mode=http_mode, + ) + + +def test_http_server_responsive_after_exec_self_request(): + """The built-in HTTP server must remain responsive after the exec input + plugin makes an HTTP request to it. Before the fix, the exec child + process (curl) and the HTTP server shared the same event loop, causing + a deadlock that made the server permanently unresponsive.""" + + service = Service() + + try: + service.start() + + result = service.request("/api/v1/uptime") + assert result["status_code"] == 200 + assert "uptime_sec" in result["body"] + + time.sleep(2) + + result = service.service.wait_for_condition( + lambda: ( + response + if response["status_code"] == 200 and "uptime_sec" in response["body"] + else None + ) if (response := service.request("/api/v1/uptime")) else None, + timeout=10, + interval=1, + description="HTTP server responsive after exec self-request", + ) + assert result["status_code"] == 200 + assert "uptime_sec" in result["body"] + finally: + service.stop()