From d4976a0e0159de502be52a5538e07677e3b08c0a Mon Sep 17 00:00:00 2001 From: Erik LaBianca Date: Mon, 11 May 2026 15:21:38 -0400 Subject: [PATCH 1/3] Add local server metadata introspection --- ds4_server.c | 354 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 337 insertions(+), 17 deletions(-) diff --git a/ds4_server.c b/ds4_server.c index 0ae9767..e096d71 100644 --- a/ds4_server.c +++ b/ds4_server.c @@ -7460,10 +7460,25 @@ typedef struct { static bool id_list_contains(const stop_list *ids, const char *id); static void id_list_push_unique(stop_list *ids, const char *id); +typedef struct { + const char *model_path; + ds4_backend backend; + int ctx_size; + int default_tokens; + int n_threads; + int routed_quant_bits; + bool mtp_enabled; + int mtp_draft_tokens; + bool quality; + bool warm_weights; +} server_runtime_config; + + struct server { ds4_engine *engine; ds4_session *session; int default_tokens; + server_runtime_config runtime; kv_disk_cache kv; tool_memory tool_mem; live_tool_state responses_live; @@ -11042,8 +11057,46 @@ typedef struct { int fd; } client_arg; -static void append_model_json_values(buf *b, int ctx, int default_tokens) { - const int max_completion = default_tokens < ctx ? default_tokens : ctx; +static int effective_max_completion_tokens(int ctx, int default_tokens) { + return default_tokens < ctx ? default_tokens : ctx; +} + +static void append_model_supported_parameters_json(buf *b) { + buf_puts(b, + "[" + "\"tools\"," + "\"tool_choice\"," + "\"max_tokens\"," + "\"temperature\"," + "\"top_p\"," + "\"top_k\"," + "\"min_p\"," + "\"stop\"," + "\"seed\"," + "\"stream\"," + "\"reasoning_effort\"]"); +} + +static void append_model_meta_json(buf *b, const server_runtime_config *rt) { + buf_puts(b, "{\"backend\":"); + json_escape(b, ds4_backend_name(rt->backend)); + buf_puts(b, ",\"routed_expert_quant_bits\":"); + if (rt->routed_quant_bits > 0) buf_printf(b, "%d", rt->routed_quant_bits); + else buf_puts(b, "null"); + buf_printf(b, + ",\"mtp\":%s," + "\"mtp_draft_tokens\":%d," + "\"reasoning\":true," + "\"reasoning_default\":\"high\"," + "\"reasoning_max_min_context\":%u}", + rt->mtp_enabled ? "true" : "false", + rt->mtp_draft_tokens, + ds4_think_max_min_context()); +} + +static void append_model_json_values(buf *b, int ctx, int default_tokens, + const server_runtime_config *rt) { + const int max_completion = effective_max_completion_tokens(ctx, default_tokens); buf_printf(b, "{\"id\":\"deepseek-v4-flash\"," "\"object\":\"model\"," @@ -11055,25 +11108,143 @@ static void append_model_json_values(buf *b, int ctx, int default_tokens) { "\"context_length\":%d," "\"max_completion_tokens\":%d," "\"is_moderated\":false}," - "\"supported_parameters\":[" - "\"tools\"," - "\"tool_choice\"," - "\"max_tokens\"," - "\"temperature\"," - "\"top_p\"," - "\"top_k\"," - "\"min_p\"," - "\"stop\"," - "\"seed\"," - "\"stream\"," - "\"reasoning_effort\"]}", + "\"supported_parameters\":", ctx, ctx, max_completion); + append_model_supported_parameters_json(b); + buf_puts(b, ",\"meta\":"); + append_model_meta_json(b, rt); + buf_putc(b, '}'); } static void append_model_json(buf *b, const server *s) { - append_model_json_values(b, ds4_session_ctx(s->session), s->default_tokens); + append_model_json_values(b, ds4_session_ctx(s->session), s->default_tokens, &s->runtime); +} + +static void append_nullable_string_json(buf *b, const char *s) { + if (s) json_escape(b, s); + else buf_puts(b, "null"); +} + +static void append_props_json(buf *b, server *s) { + const server_runtime_config *rt = &s->runtime; + const int ctx = rt->ctx_size > 0 ? rt->ctx_size : ds4_session_ctx(s->session); + const int default_tokens = rt->default_tokens > 0 ? rt->default_tokens : s->default_tokens; + const int effective_tokens = effective_max_completion_tokens(ctx, default_tokens); + ds4_context_memory mem = ds4_context_memory_estimate(rt->backend, ctx); + + int tool_ids = 0; + size_t tool_bytes = 0; + pthread_mutex_lock(&s->tool_mu); + tool_ids = s->tool_mem.entries; + tool_bytes = s->tool_mem.bytes; + pthread_mutex_unlock(&s->tool_mu); + + buf_puts(b, "{\"server\":{\"name\":\"ds4-server\"},\"model\":{"); + buf_puts(b, "\"id\":\"deepseek-v4-flash\",\"name\":\"DeepSeek V4 Flash\",\"path\":"); + append_nullable_string_json(b, rt->model_path); + buf_puts(b, ",\"routed_expert_quant_bits\":"); + if (rt->routed_quant_bits > 0) buf_printf(b, "%d", rt->routed_quant_bits); + else buf_puts(b, "null"); + buf_printf(b, ",\"mtp\":%s,\"mtp_draft_tokens\":%d}", + rt->mtp_enabled ? "true" : "false", + rt->mtp_draft_tokens); + + buf_puts(b, ",\"runtime\":{\"backend\":"); + json_escape(b, ds4_backend_name(rt->backend)); + buf_printf(b, + ",\"ctx_size\":%d," + "\"default_max_tokens\":%d," + "\"effective_max_completion_tokens\":%d," + "\"threads\":%d," + "\"quality\":%s," + "\"warm_weights\":%s}", + ctx, + default_tokens, + effective_tokens, + rt->n_threads, + rt->quality ? "true" : "false", + rt->warm_weights ? "true" : "false"); + + buf_printf(b, + ",\"reasoning\":{" + "\"supported_efforts\":[\"low\",\"medium\",\"high\",\"xhigh\",\"max\"]," + "\"aliases\":{\"low\":\"high\",\"medium\":\"high\",\"xhigh\":\"high\"}," + "\"default\":\"high\"," + "\"effective_default\":\"%s\"," + "\"think_max_min_context\":%u}", + ds4_think_mode_name(ds4_think_mode_for_context(DS4_THINK_HIGH, ctx)), + ds4_think_max_min_context()); + + buf_puts(b, + ",\"sampling\":{" + "\"defaults\":{\"temperature\":1,\"top_p\":1,\"top_k\":0,\"min_p\":0}," + "\"thinking_override\":{\"enabled\":true,\"temperature\":1,\"top_p\":1,\"top_k\":0,\"min_p\":0}," + "\"tool_protocol_sampling\":{\"structural_temperature\":0}}"); + + buf_printf(b, + ",\"context_memory\":{" + "\"total_bytes\":%llu," + "\"raw_bytes\":%llu," + "\"compressed_bytes\":%llu," + "\"scratch_bytes\":%llu," + "\"prefill_cap\":%u," + "\"raw_cap\":%u," + "\"comp_cap\":%u}", + (unsigned long long)mem.total_bytes, + (unsigned long long)mem.raw_bytes, + (unsigned long long)mem.compressed_bytes, + (unsigned long long)mem.scratch_bytes, + mem.prefill_cap, + mem.raw_cap, + mem.comp_cap); + + buf_puts(b, ",\"kv_disk_cache\":{"); + buf_printf(b, "\"enabled\":%s,\"dir\":", s->kv.enabled ? "true" : "false"); + append_nullable_string_json(b, s->kv.enabled ? s->kv.dir : NULL); + buf_printf(b, + ",\"budget_bytes\":%llu," + "\"reject_different_quant\":%s," + "\"policy\":{" + "\"min_tokens\":%d," + "\"cold_max_tokens\":%d," + "\"continued_interval_tokens\":%d," + "\"boundary_trim_tokens\":%d," + "\"boundary_align_tokens\":%d}," + "\"entries\":%d}", + (unsigned long long)(s->kv.enabled ? s->kv.budget_bytes : 0), + s->kv.reject_different_quant ? "true" : "false", + s->kv.opt.min_tokens, + s->kv.opt.cold_max_tokens, + s->kv.opt.continued_interval_tokens, + s->kv.opt.boundary_trim_tokens, + s->kv.opt.boundary_align_tokens, + s->kv.enabled ? s->kv.len : 0); + + buf_printf(b, + ",\"tool_replay\":{" + "\"exact_dsml_replay_enabled\":%s," + "\"max_ids\":%d," + "\"current_ids\":%d," + "\"current_bytes\":%llu}", + s->disable_exact_dsml_tool_replay ? "false" : "true", + s->tool_mem.max_entries, + tool_ids, + (unsigned long long)tool_bytes); + + buf_puts(b, + ",\"api\":{" + "\"endpoints\":[" + "\"GET /v1/models\"," + "\"GET /v1/models/deepseek-v4-flash\"," + "\"GET /props\"," + "\"POST /v1/chat/completions\"," + "\"POST /v1/completions\"," + "\"POST /v1/messages\"]," + "\"supported_request_parameters\":"); + append_model_supported_parameters_json(b); + buf_puts(b, "}}\n"); } static bool send_model(server *s, int fd) { @@ -11085,6 +11256,14 @@ static bool send_model(server *s, int fd) { return ok; } +static bool send_props(server *s, int fd) { + buf b = {0}; + append_props_json(&b, s); + bool ok = http_response(fd, 200, "application/json", b.ptr); + buf_free(&b); + return ok; +} + static bool send_models(server *s, int fd) { buf b = {0}; buf_puts(&b, "{\"object\":\"list\",\"data\":["); @@ -11102,6 +11281,22 @@ static void client_done(server *s) { pthread_mutex_unlock(&s->mu); } +static bool client_peer_is_loopback(int fd) { + struct sockaddr_storage ss; + socklen_t len = sizeof(ss); + if (getpeername(fd, (struct sockaddr *)&ss, &len) != 0) return false; + if (ss.ss_family == AF_INET) { + const struct sockaddr_in *in = (const struct sockaddr_in *)&ss; + uint32_t addr = ntohl(in->sin_addr.s_addr); + return (addr >> 24) == 127; + } + if (ss.ss_family == AF_INET6) { + const struct sockaddr_in6 *in6 = (const struct sockaddr_in6 *)&ss; + return IN6_IS_ADDR_LOOPBACK(&in6->sin6_addr); + } + return false; +} + static void set_client_socket_nonblocking(int fd); static void *client_main(void *arg) { @@ -11126,6 +11321,12 @@ static void *client_main(void *arg) { http_request_free(&hr); goto done; } + if (!strcmp(hr.method, "GET") && !strcmp(hr.path, "/props")) { + if (client_peer_is_loopback(fd)) send_props(s, fd); + else http_error(fd, 404, "unknown endpoint"); + http_request_free(&hr); + goto done; + } request req; char err[160]; @@ -11550,6 +11751,18 @@ int main(int argc, char **argv) { s.engine = engine; s.session = session; s.default_tokens = cfg.default_tokens; + s.runtime = (server_runtime_config){ + .model_path = cfg.engine.model_path, + .backend = cfg.engine.backend, + .ctx_size = cfg.ctx_size, + .default_tokens = cfg.default_tokens, + .n_threads = cfg.engine.n_threads, + .routed_quant_bits = ds4_engine_routed_quant_bits(engine), + .mtp_enabled = cfg.engine.mtp_path != NULL, + .mtp_draft_tokens = cfg.engine.mtp_draft_tokens, + .quality = cfg.engine.quality, + .warm_weights = cfg.engine.warm_weights, + }; s.disable_exact_dsml_tool_replay = cfg.disable_exact_dsml_tool_replay; s.tool_mem.max_entries = cfg.tool_memory_max_ids; if (cfg.kv_disk_dir) { @@ -13690,18 +13903,122 @@ static void test_json_skip_has_nesting_limit(void) { } static void test_model_metadata_clamps_completion_to_context(void) { + server_runtime_config rt = { + .backend = DS4_BACKEND_CPU, + .routed_quant_bits = 2, + .mtp_enabled = false, + .mtp_draft_tokens = 1, + }; buf b = {0}; - append_model_json_values(&b, 32768, 393216); + append_model_json_values(&b, 32768, 393216, &rt); TEST_ASSERT(strstr(b.ptr, "\"context_length\":32768") != NULL); TEST_ASSERT(strstr(b.ptr, "\"max_completion_tokens\":32768") != NULL); buf_free(&b); - append_model_json_values(&b, 100000, 4096); + append_model_json_values(&b, 100000, 4096, &rt); TEST_ASSERT(strstr(b.ptr, "\"context_length\":100000") != NULL); TEST_ASSERT(strstr(b.ptr, "\"max_completion_tokens\":4096") != NULL); buf_free(&b); } +static void test_model_metadata_contains_meta_fields(void) { + server_runtime_config rt = { + .backend = DS4_BACKEND_CPU, + .routed_quant_bits = 4, + .mtp_enabled = true, + .mtp_draft_tokens = 2, + }; + buf b = {0}; + append_model_json_values(&b, 32768, 393216, &rt); + TEST_ASSERT(strstr(b.ptr, "\"meta\":{") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"backend\":\"cpu\"") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"routed_expert_quant_bits\":4") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"mtp\":true") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"mtp_draft_tokens\":2") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"reasoning\":true") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"reasoning_default\":\"high\"") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"reasoning_max_min_context\":393216") != NULL); + buf_free(&b); +} + +static void test_props_server_init(server *s) { + memset(s, 0, sizeof(*s)); + s->default_tokens = 393216; + s->runtime = (server_runtime_config){ + .model_path = "/tmp/ds4flash.gguf", + .backend = DS4_BACKEND_CPU, + .ctx_size = 32768, + .default_tokens = 393216, + .n_threads = 7, + .routed_quant_bits = 2, + .mtp_enabled = true, + .mtp_draft_tokens = 3, + .quality = true, + .warm_weights = true, + }; + s->tool_mem.max_entries = 1234; + pthread_mutex_init(&s->tool_mu, NULL); +} + +static void test_props_json_includes_runtime_and_api_metadata(void) { + server s; + test_props_server_init(&s); + s.kv.enabled = true; + s.kv.dir = xstrdup("/tmp/ds4-kv"); + s.kv.budget_bytes = 8192; + s.kv.reject_different_quant = true; + s.kv.opt = kv_cache_default_options(); + s.kv.len = 5; + + buf b = {0}; + append_props_json(&b, &s); + TEST_ASSERT(strstr(b.ptr, "\"server\":{\"name\":\"ds4-server\"}") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"path\":\"/tmp/ds4flash.gguf\"") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"backend\":\"cpu\"") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"ctx_size\":32768") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"default_max_tokens\":393216") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"effective_max_completion_tokens\":32768") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"threads\":7") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"quality\":true") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"warm_weights\":true") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"default\":\"high\"") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"think_max_min_context\":393216") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"context_memory\":{") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"total_bytes\":") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"raw_bytes\":") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"compressed_bytes\":") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"scratch_bytes\":") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"kv_disk_cache\":{") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"dir\":\"/tmp/ds4-kv\"") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"budget_bytes\":8192") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"entries\":5") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"exact_dsml_replay_enabled\":true") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"max_ids\":1234") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"GET /props\"") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"POST /v1/chat/completions\"") != NULL); + + buf_free(&b); + free(s.kv.dir); + pthread_mutex_destroy(&s.tool_mu); +} + +static void test_props_json_reports_disabled_disk_kv(void) { + server s; + test_props_server_init(&s); + s.disable_exact_dsml_tool_replay = true; + + buf b = {0}; + append_props_json(&b, &s); + TEST_ASSERT(strstr(b.ptr, "\"kv_disk_cache\":{\"enabled\":false") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"dir\":null") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"budget_bytes\":0") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"entries\":0") != NULL); + TEST_ASSERT(strstr(b.ptr, "\"exact_dsml_replay_enabled\":false") != NULL); + + buf_free(&b); + pthread_mutex_destroy(&s.tool_mu); +} + static void test_client_socket_nonblocking_flag(void) { int sv[2]; TEST_ASSERT(socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == 0); @@ -14468,6 +14785,9 @@ static void ds4_server_unit_tests_run(void) { test_stop_list_streaming_holds_and_trims_stop_text(); test_json_skip_has_nesting_limit(); test_model_metadata_clamps_completion_to_context(); + test_model_metadata_contains_meta_fields(); + test_props_json_includes_runtime_and_api_metadata(); + test_props_json_reports_disabled_disk_kv(); test_client_socket_nonblocking_flag(); test_thinking_state_tracks_prompt_and_generated_tags(); test_thinking_checkpoint_remember_gate(); From 3c4ad28d9d844eea366f1afd9b2d5318af86632a Mon Sep 17 00:00:00 2001 From: Erik LaBianca Date: Mon, 11 May 2026 16:03:35 -0400 Subject: [PATCH 2/3] Address /props review polish - Drop loopback gate on GET /props; the endpoint is now reachable like /v1/models, matching llama-server convention. Operators are responsible for binding to a trusted interface. - Remove ctx_size / default_tokens fallback ladders in append_props_json by relying on the runtime config being unconditionally populated in main(). - Switch MTP introspection to ds4_engine_has_mtp() and ds4_engine_mtp_draft_tokens() so the flag reflects engine state rather than coupling to the cfg.engine.mtp_path argv shape. - Pull sampling defaults (temperature, top_p, top_k, min_p, tool-call structural temperature) into named macros and use them in request_init, the thinking override path, and the /props payload. %g formatting keeps the current JSON byte-identical while letting future non-integer defaults serialize cleanly. - Note that the kv.len read in append_props_json is intentionally lockless and consistent with other kv.* reads in the function. - Comment the api.endpoints array as a sync point with client_main() routing. - Add a short header comment on append_props_json describing its payload sections. --- ds4_server.c | 70 ++++++++++++++++++++++++++-------------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/ds4_server.c b/ds4_server.c index e096d71..b8fac45 100644 --- a/ds4_server.c +++ b/ds4_server.c @@ -40,6 +40,14 @@ static volatile sig_atomic_t g_listen_fd = -1; #define DS4_SERVER_IO_TIMEOUT_SEC 10 #define DS4_SERVER_SEND_STALL_TIMEOUT_MS 2000 +/* Sampling defaults applied at request_init time and surfaced in /props. */ +#define DS4_DEFAULT_TEMPERATURE 1.0f +#define DS4_DEFAULT_TOP_P 1.0f +#define DS4_DEFAULT_TOP_K 0 +#define DS4_DEFAULT_MIN_P 0.0f +/* Structural temperature used during tool-call protocol encoding. */ +#define DS4_TOOL_STRUCTURAL_TEMPERATURE 0.0f + static void stop_signal_handler(int sig) { (void)sig; if (g_stop_requested) _exit(130); @@ -742,10 +750,10 @@ static void request_init(request *r, req_kind kind, int max_tokens) { r->api = API_OPENAI; r->model = xstrdup("deepseek-v4-flash"); r->max_tokens = max_tokens; - r->top_k = 0; - r->temperature = 1.0f; - r->top_p = 1.0f; - r->min_p = 0.0f; + r->top_k = DS4_DEFAULT_TOP_K; + r->temperature = DS4_DEFAULT_TEMPERATURE; + r->top_p = DS4_DEFAULT_TOP_P; + r->min_p = DS4_DEFAULT_MIN_P; r->think_mode = DS4_THINK_HIGH; } @@ -10471,13 +10479,13 @@ static void generate_job(server *s, job *j) { float top_p = j->req.top_p; float min_p = j->req.min_p; if (ds4_think_mode_enabled(j->req.think_mode)) { - temperature = 1.0f; - top_k = 0; - top_p = 1.0f; - min_p = 0.0f; + temperature = DS4_DEFAULT_TEMPERATURE; + top_k = DS4_DEFAULT_TOP_K; + top_p = DS4_DEFAULT_TOP_P; + min_p = DS4_DEFAULT_MIN_P; } if (in_tool_call && !dsml_decode_state_uses_payload_sampling(dsml_state)) { - temperature = 0.0f; + temperature = DS4_TOOL_STRUCTURAL_TEMPERATURE; } int token = ds4_session_sample(s->session, temperature, top_k, top_p, min_p, &rng); if (token == ds4_token_eos(s->engine)) { @@ -11127,10 +11135,12 @@ static void append_nullable_string_json(buf *b, const char *s) { else buf_puts(b, "null"); } +/* Emit /props introspection payload covering server identity, model, runtime config, + * reasoning, sampling defaults, context memory estimates, KV disk cache, tool replay, and API surface. */ static void append_props_json(buf *b, server *s) { const server_runtime_config *rt = &s->runtime; - const int ctx = rt->ctx_size > 0 ? rt->ctx_size : ds4_session_ctx(s->session); - const int default_tokens = rt->default_tokens > 0 ? rt->default_tokens : s->default_tokens; + const int ctx = rt->ctx_size; + const int default_tokens = rt->default_tokens; const int effective_tokens = effective_max_completion_tokens(ctx, default_tokens); ds4_context_memory mem = ds4_context_memory_estimate(rt->backend, ctx); @@ -11177,11 +11187,16 @@ static void append_props_json(buf *b, server *s) { ds4_think_mode_name(ds4_think_mode_for_context(DS4_THINK_HIGH, ctx)), ds4_think_max_min_context()); - buf_puts(b, + buf_printf(b, ",\"sampling\":{" - "\"defaults\":{\"temperature\":1,\"top_p\":1,\"top_k\":0,\"min_p\":0}," - "\"thinking_override\":{\"enabled\":true,\"temperature\":1,\"top_p\":1,\"top_k\":0,\"min_p\":0}," - "\"tool_protocol_sampling\":{\"structural_temperature\":0}}"); + "\"defaults\":{\"temperature\":%g,\"top_p\":%g,\"top_k\":%d,\"min_p\":%g}," + "\"thinking_override\":{\"enabled\":true,\"temperature\":%g,\"top_p\":%g,\"top_k\":%d,\"min_p\":%g}," + "\"tool_protocol_sampling\":{\"structural_temperature\":%g}}", + (double)DS4_DEFAULT_TEMPERATURE, (double)DS4_DEFAULT_TOP_P, + DS4_DEFAULT_TOP_K, (double)DS4_DEFAULT_MIN_P, + (double)DS4_DEFAULT_TEMPERATURE, (double)DS4_DEFAULT_TOP_P, + DS4_DEFAULT_TOP_K, (double)DS4_DEFAULT_MIN_P, + (double)DS4_TOOL_STRUCTURAL_TEMPERATURE); buf_printf(b, ",\"context_memory\":{" @@ -11200,6 +11215,7 @@ static void append_props_json(buf *b, server *s) { mem.raw_cap, mem.comp_cap); + /* kv.len is only mutated on the worker thread; lockless read here matches the rest of kv.* in this function. */ buf_puts(b, ",\"kv_disk_cache\":{"); buf_printf(b, "\"enabled\":%s,\"dir\":", s->kv.enabled ? "true" : "false"); append_nullable_string_json(b, s->kv.enabled ? s->kv.dir : NULL); @@ -11233,6 +11249,7 @@ static void append_props_json(buf *b, server *s) { tool_ids, (unsigned long long)tool_bytes); + // Keep this list in sync with the GET/POST routing in client_main(). buf_puts(b, ",\"api\":{" "\"endpoints\":[" @@ -11281,22 +11298,6 @@ static void client_done(server *s) { pthread_mutex_unlock(&s->mu); } -static bool client_peer_is_loopback(int fd) { - struct sockaddr_storage ss; - socklen_t len = sizeof(ss); - if (getpeername(fd, (struct sockaddr *)&ss, &len) != 0) return false; - if (ss.ss_family == AF_INET) { - const struct sockaddr_in *in = (const struct sockaddr_in *)&ss; - uint32_t addr = ntohl(in->sin_addr.s_addr); - return (addr >> 24) == 127; - } - if (ss.ss_family == AF_INET6) { - const struct sockaddr_in6 *in6 = (const struct sockaddr_in6 *)&ss; - return IN6_IS_ADDR_LOOPBACK(&in6->sin6_addr); - } - return false; -} - static void set_client_socket_nonblocking(int fd); static void *client_main(void *arg) { @@ -11322,8 +11323,7 @@ static void *client_main(void *arg) { goto done; } if (!strcmp(hr.method, "GET") && !strcmp(hr.path, "/props")) { - if (client_peer_is_loopback(fd)) send_props(s, fd); - else http_error(fd, 404, "unknown endpoint"); + send_props(s, fd); http_request_free(&hr); goto done; } @@ -11758,8 +11758,8 @@ int main(int argc, char **argv) { .default_tokens = cfg.default_tokens, .n_threads = cfg.engine.n_threads, .routed_quant_bits = ds4_engine_routed_quant_bits(engine), - .mtp_enabled = cfg.engine.mtp_path != NULL, - .mtp_draft_tokens = cfg.engine.mtp_draft_tokens, + .mtp_enabled = ds4_engine_has_mtp(engine), + .mtp_draft_tokens = ds4_engine_mtp_draft_tokens(engine), .quality = cfg.engine.quality, .warm_weights = cfg.engine.warm_weights, }; From 33a474ac249cb6147a7bf6dfff76d79b191983de Mon Sep 17 00:00:00 2001 From: Erik LaBianca Date: Mon, 11 May 2026 16:04:56 -0400 Subject: [PATCH 3/3] Wrap over-long /props lockless-read comment The previous polish commit landed a 121-char single-line block comment, which is the longest single-line block comment in the file and past the ~90-char convention used elsewhere in ds4_server.c. Wrap it with the leading-asterisk style used by other multi-line block comments in this file. --- ds4_server.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ds4_server.c b/ds4_server.c index b8fac45..5c31b0f 100644 --- a/ds4_server.c +++ b/ds4_server.c @@ -11215,7 +11215,8 @@ static void append_props_json(buf *b, server *s) { mem.raw_cap, mem.comp_cap); - /* kv.len is only mutated on the worker thread; lockless read here matches the rest of kv.* in this function. */ + /* kv.len is only mutated on the worker thread; lockless read here + * matches the rest of kv.* in this function. */ buf_puts(b, ",\"kv_disk_cache\":{"); buf_printf(b, "\"enabled\":%s,\"dir\":", s->kv.enabled ? "true" : "false"); append_nullable_string_json(b, s->kv.enabled ? s->kv.dir : NULL);