From db6a5949bd75140bd90959d195dede20f4787c5b Mon Sep 17 00:00:00 2001 From: coseto6125 <80243681+coseto6125@users.noreply.github.com> Date: Tue, 23 Jun 2026 04:31:56 +0800 Subject: [PATCH 1/5] feat(protobuf): capture gRPC service/rpc as Route nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The protobuf provider extracted only `message` fields — `service { rpc }` blocks were explicitly ignored, so gRPC service contracts were invisible to the graph (graph-completeness gap, CLAUDE.md gate A): an LLM tracing "what endpoints does this service expose" or matching a client stub call to its definition hit a dead end at the `.proto`. Add a line-oriented `service`/`rpc` extractor mirroring the existing `extract_proto_fields` state machine. Each rpc becomes a `RawRoute` (method `GRPC`, path `/Service/Method` — the gRPC HTTP/2 wire convention), which the builder finalizes into a `NodeKind::Route` exactly like an HTTP endpoint. Reusing `Route` means zero schema change (no new NodeKind/RelType — keeps rkyv discriminants stable) and gRPC services flow through the existing `ecp routes` / `ecp contracts` tooling for free; the `/Service/Method` path string is the same one a stub call keys on, so cross-repo contract matching works without new edge types. Scope: REST cross-service edges (`RelType::Fetches`) already exist and already normalize dynamic path segments — this closes the gRPC half of the polyglot service-contract gap. GraphQL remains uncovered. Tests: provider-level (same verification standard as the existing `protobuf_schema.rs`) — package-prefixed paths, streaming rpc, multi- service files, message-only files emit no routes, and message field extraction still coexists. Config/IaC-style single-grammar detector, so the 14-language rule does not apply. --- crates/ecp-analyzer/src/protobuf/mod.rs | 5 +- crates/ecp-analyzer/src/protobuf/parser.rs | 242 +++++++++++++++++- .../tests/protobuf_grpc_service.rs | 98 +++++++ 3 files changed, 343 insertions(+), 2 deletions(-) create mode 100644 crates/ecp-analyzer/tests/protobuf_grpc_service.rs diff --git a/crates/ecp-analyzer/src/protobuf/mod.rs b/crates/ecp-analyzer/src/protobuf/mod.rs index 5f803adcd..4c4f53c54 100644 --- a/crates/ecp-analyzer/src/protobuf/mod.rs +++ b/crates/ecp-analyzer/src/protobuf/mod.rs @@ -16,7 +16,10 @@ //! - `oneof` blocks are not supported — fields inside them are not emitted. //! - `map` fields are not supported — skipped with no emission. //! - `enum` definitions are ignored (no `SchemaField` equivalent). -//! - RPC / service blocks are ignored. +//! - `service { rpc … }` blocks ARE captured: each `rpc` becomes a +//! `NodeKind::Route` (method `GRPC`, path `/Service/Method`) so +//! gRPC service contracts are visible to `ecp routes` / `ecp contracts`. +//! Nested services and rpc request/response message types are not captured. //! - Multi-line comments (`/* … */`) are treated as opaque — a field //! declaration whose line falls inside a block comment may be emitted. //! Single-line `//` comments are stripped correctly. diff --git a/crates/ecp-analyzer/src/protobuf/parser.rs b/crates/ecp-analyzer/src/protobuf/parser.rs index 6bc6fe3e9..3e61c9a9b 100644 --- a/crates/ecp-analyzer/src/protobuf/parser.rs +++ b/crates/ecp-analyzer/src/protobuf/parser.rs @@ -7,7 +7,7 @@ use super::schema_extractors::{ classify_protobuf_type, PROTOBUF_FIELD_MODIFIERS, PROTOBUF_FRAMEWORK, }; use ecp_core::analyzer::provider::LanguageProvider; -use ecp_core::analyzer::types::{LocalGraph, RawSchemaField}; +use ecp_core::analyzer::types::{LocalGraph, RawRoute, RawSchemaField}; use std::path::Path; pub struct ProtobufProvider; @@ -33,6 +33,7 @@ impl LanguageProvider for ProtobufProvider { Ok(LocalGraph { file_path: path.to_path_buf(), schema_fields, + routes: extract_proto_services(text), ..Default::default() }) } @@ -112,6 +113,140 @@ fn extract_proto_fields(text: &str) -> Vec { out } +/// Line-oriented `service { rpc … }` extractor — gRPC service contracts. +/// +/// Emits one [`RawRoute`] per `rpc` method so the graph builder finalizes it +/// into a `NodeKind::Route` (same node kind as an HTTP endpoint — an rpc IS a +/// service endpoint). Reusing `Route` lets gRPC services flow through the +/// existing route/contract tooling (`ecp routes`, `ecp contracts`) with no +/// schema change, closing the graph-completeness gap where a `service` block +/// was previously invisible (only `message` fields were captured). +/// +/// `method` is the literal `"GRPC"`; `path` follows the gRPC HTTP/2 wire +/// convention `/Service/Method`, so a `Fetches`-style consumer edge +/// or cross-repo contract match keys on the same string a gRPC stub call uses. +/// +/// Mirrors [`extract_proto_fields`]' state machine: top-level `package` +/// sets the path prefix, a depth-0 `service Name {` opens a service context, +/// and `rpc` lines are read only at `depth == 1` inside that service. +fn extract_proto_services(text: &str) -> Vec { + let mut out: Vec = Vec::new(); + let mut package: Option = None; + let mut current_service: Option = None; + let mut depth: u32 = 0; + + for (line_idx, raw_line) in text.lines().enumerate() { + let row = line_idx as u32; + let line = strip_line_comment(raw_line).trim(); + if line.is_empty() { + continue; + } + + // `package foo.bar;` is only meaningful at the top level (depth 0). + if depth == 0 && current_service.is_none() { + if let Some(pkg) = parse_package_line(line) { + package = Some(pkg); + } + } + + let opens = line.chars().filter(|&c| c == '{').count() as u32; + let closes = line.chars().filter(|&c| c == '}').count() as u32; + + if depth == 0 { + if let Some(name) = parse_service_header(line) { + current_service = Some(name); + } + } + + depth = depth.saturating_add(opens).saturating_sub(closes); + + if depth == 0 { + current_service = None; + } + + // rpc methods live at depth 1 inside a known service. + let Some(ref service) = current_service else { + continue; + }; + if depth != 1 { + continue; + } + + if let Some(method_name) = parse_rpc_line(line) { + let path = match &package { + Some(pkg) => format!("/{pkg}.{service}/{method_name}"), + None => format!("/{service}/{method_name}"), + }; + out.push(RawRoute { + method: "GRPC".to_string(), + path, + handler: None, + span: (row, 0u32, row, line.len() as u32), + }); + } + } + + out +} + +/// Parse a top-level `package foo.bar;` line, returning the dotted package name. +fn parse_package_line(line: &str) -> Option { + let rest = line.strip_prefix("package")?; + if !rest.starts_with(|c: char| c.is_whitespace()) { + return None; + } + let name = rest.trim().strip_suffix(';')?.trim(); + if name.is_empty() + || !name + .chars() + .all(|c| c.is_alphanumeric() || c == '_' || c == '.') + { + return None; + } + Some(name.to_string()) +} + +/// Parse a `service Name {` header, returning the service name. +fn parse_service_header(line: &str) -> Option { + let rest = line.strip_prefix("service")?; + if !rest.starts_with(|c: char| c.is_whitespace()) { + return None; + } + let rest = rest.trim_start(); + let name_end = rest + .find(|c: char| !c.is_alphanumeric() && c != '_') + .unwrap_or(rest.len()); + if name_end == 0 { + return None; + } + Some(rest[..name_end].to_string()) +} + +/// Parse an `rpc Method(Req) returns (Resp);` line, returning the method name. +/// +/// Tolerates `stream` modifiers and arbitrary whitespace; the request/response +/// message types are not captured (the rpc node carries the method identity — +/// the message shapes are already separate `message` schema-field nodes). +fn parse_rpc_line(line: &str) -> Option { + let rest = line.strip_prefix("rpc")?; + if !rest.starts_with(|c: char| c.is_whitespace()) { + return None; + } + let rest = rest.trim_start(); + // Method name runs up to `(` or whitespace. + let name_end = rest + .find(|c: char| c == '(' || c.is_whitespace()) + .unwrap_or(rest.len()); + if name_end == 0 { + return None; + } + let name = &rest[..name_end]; + if !name.chars().all(|c| c.is_alphanumeric() || c == '_') { + return None; + } + Some(name.to_string()) +} + /// Strip the `//`-prefixed tail of a line (proto single-line comment). /// /// Does not attempt to handle `//` inside string literals (proto field @@ -262,4 +397,109 @@ mod tests { assert!(parse_field_line("option java_package = \"com.example\";").is_none()); assert!(parse_field_line("oneof payload {").is_none()); } + + #[test] + fn service_header_parses() { + assert_eq!( + parse_service_header("service Greeter {"), + Some("Greeter".to_string()) + ); + assert_eq!(parse_service_header("message User {"), None); + assert_eq!(parse_service_header("serviceGreeter {"), None); + } + + #[test] + fn package_line_parses() { + assert_eq!( + parse_package_line("package routeguide.v1;"), + Some("routeguide.v1".to_string()) + ); + assert_eq!(parse_package_line("package;"), None); + assert_eq!(parse_package_line("packagefoo;"), None); + } + + #[test] + fn rpc_line_parses() { + assert_eq!( + parse_rpc_line("rpc SayHello(HelloRequest) returns (HelloReply);"), + Some("SayHello".to_string()) + ); + assert_eq!( + parse_rpc_line("rpc ListFeatures(Rectangle) returns (stream Feature) {}"), + Some("ListFeatures".to_string()) + ); + assert_eq!(parse_rpc_line("string name = 1;"), None); + assert_eq!(parse_rpc_line("rpcFoo()"), None); + } + + #[test] + fn service_with_package_emits_grpc_route() { + let proto = "\ +package helloworld; + +service Greeter { + rpc SayHello (HelloRequest) returns (HelloReply); + rpc SayHelloAgain (HelloRequest) returns (HelloReply); +} +"; + let routes = extract_proto_services(proto); + assert_eq!(routes.len(), 2); + assert!(routes.iter().all(|r| r.method == "GRPC")); + assert_eq!(routes[0].path, "/helloworld.Greeter/SayHello"); + assert_eq!(routes[1].path, "/helloworld.Greeter/SayHelloAgain"); + } + + #[test] + fn service_without_package_omits_prefix() { + let proto = "service Echo {\n rpc Ping(Req) returns (Resp);\n}\n"; + let routes = extract_proto_services(proto); + assert_eq!(routes.len(), 1); + assert_eq!(routes[0].path, "/Echo/Ping"); + } + + #[test] + fn streaming_rpc_captured() { + let proto = "\ +package route_guide; +service RouteGuide { + rpc RecordRoute(stream Point) returns (RouteSummary) {} + rpc RouteChat(stream RouteNote) returns (stream RouteNote) {} +} +"; + let routes = extract_proto_services(proto); + assert_eq!(routes.len(), 2); + assert_eq!(routes[0].path, "/route_guide.RouteGuide/RecordRoute"); + assert_eq!(routes[1].path, "/route_guide.RouteGuide/RouteChat"); + } + + #[test] + fn message_only_proto_emits_no_routes() { + let proto = "\ +package m; +message User { + string name = 1; + rpc not_a_real_rpc = 2; +} +"; + // A `message`-only file (even one with an `rpc`-looking field name) must + // not produce any gRPC route — `rpc` is only meaningful inside `service`. + assert!(extract_proto_services(proto).is_empty()); + } + + #[test] + fn multiple_services_in_one_file() { + let proto = "\ +package api; +service A { + rpc One(X) returns (Y); +} +service B { + rpc Two(X) returns (Y); +} +"; + let routes = extract_proto_services(proto); + assert_eq!(routes.len(), 2); + assert_eq!(routes[0].path, "/api.A/One"); + assert_eq!(routes[1].path, "/api.B/Two"); + } } diff --git a/crates/ecp-analyzer/tests/protobuf_grpc_service.rs b/crates/ecp-analyzer/tests/protobuf_grpc_service.rs new file mode 100644 index 000000000..3d58194b7 --- /dev/null +++ b/crates/ecp-analyzer/tests/protobuf_grpc_service.rs @@ -0,0 +1,98 @@ +//! gRPC `service { rpc … }` extraction — the protobuf provider emits one +//! `RawRoute` (method `GRPC`, path `/Service/Method`) per rpc so the +//! graph builder finalizes it into a `NodeKind::Route`, closing the +//! graph-completeness gap where service contracts were previously invisible +//! (only `message` fields were captured). +//! +//! Config/IaC-style detector (single grammar, `.proto` only), so the +//! 14-mainstream-language coverage rule does not apply — gRPC is a protobuf +//! construct with no per-language variants. + +use ecp_analyzer::protobuf::ProtobufProvider; +use ecp_core::analyzer::provider::LanguageProvider; +use std::path::Path; + +fn routes(src: &str) -> Vec<(String, String)> { + let provider = ProtobufProvider::new().expect("provider"); + let lg = provider + .parse_file(Path::new("svc.proto"), src.as_bytes()) + .expect("parse"); + lg.routes.into_iter().map(|r| (r.method, r.path)).collect() +} + +#[test] +fn grpc_service_emits_route_per_rpc_with_package_prefix() { + let proto = "\ +syntax = \"proto3\"; +package helloworld; + +message HelloRequest { string name = 1; } +message HelloReply { string message = 1; } + +service Greeter { + rpc SayHello (HelloRequest) returns (HelloReply); + rpc SayHelloAgain (HelloRequest) returns (HelloReply); +} +"; + let r = routes(proto); + assert_eq!(r.len(), 2); + assert!(r.iter().all(|(m, _)| m == "GRPC")); + assert_eq!(r[0].1, "/helloworld.Greeter/SayHello"); + assert_eq!(r[1].1, "/helloworld.Greeter/SayHelloAgain"); +} + +#[test] +fn grpc_streaming_rpc_captured() { + let proto = "\ +package route_guide; +service RouteGuide { + rpc GetFeature(Point) returns (Feature) {} + rpc ListFeatures(Rectangle) returns (stream Feature) {} + rpc RecordRoute(stream Point) returns (RouteSummary) {} + rpc RouteChat(stream RouteNote) returns (stream RouteNote) {} +} +"; + let r = routes(proto); + assert_eq!(r.len(), 4); + assert_eq!(r[0].1, "/route_guide.RouteGuide/GetFeature"); + assert_eq!(r[3].1, "/route_guide.RouteGuide/RouteChat"); +} + +#[test] +fn proto_without_service_emits_no_routes() { + // A pure message file (the pre-existing schema-field case) must not gain + // spurious gRPC routes. + let proto = "\ +package m; +message User { + string name = 1; + repeated int32 ids = 2; +} +"; + assert!(routes(proto).is_empty()); +} + +#[test] +fn proto_still_emits_message_schema_fields() { + // Regression: adding service extraction must not break the original + // message-field path. Both coexist in one file. + let provider = ProtobufProvider::new().expect("provider"); + let proto = "\ +package api; +message Req { + string id = 1; +} +service Svc { + rpc Do(Req) returns (Req); +} +"; + let lg = provider + .parse_file(Path::new("api.proto"), proto.as_bytes()) + .expect("parse"); + assert_eq!(lg.routes.len(), 1, "one rpc route"); + let fields = lg.schema_fields.expect("schema fields present"); + assert!( + fields.iter().any(|f| &*f.name == "id"), + "message field `id` still extracted" + ); +} From b8b42787ca3833641e616af9ea873195ef1d0a29 Mon Sep 17 00:00:00 2001 From: coseto6125 <80243681+coseto6125@users.noreply.github.com> Date: Tue, 23 Jun 2026 04:38:11 +0800 Subject: [PATCH 2/5] fix(routes): let gRPC RawRoutes survive detect_from_call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The protobuf provider's `GRPC` RawRoutes were silently dropped at builder.rs Pass 1.5: `detect_from_call` gated every route on the HTTP method allowlist (`HTTP_METHODS.contains`), so `GRPC` matched nothing and returned None — gRPC service contracts parsed correctly by the provider never reached the graph. End-to-end a `.proto` indexed to zero Route nodes despite `parse_file` returning them. Add a gRPC fast path: a `GRPC` method with a wire-format `/…` path is a confirmed service endpoint (the provider emits already-normalized records, not literals to be filtered), so it bypasses the HTTP allowlist whose only job is rejecting non-route call sites. HTTP detection is unchanged — all 21 builder + 25 route_detector tests pass, including the Express `use` mount-point case. Regression coverage: `detect_from_call_accepts_grpc_service_method`, `detect_from_call_grpc_requires_leading_slash` (route_detector), and `grpc_raw_route_promotes_to_route_node` (builder end-to-end: RawRoute → Route node). Verified live: `ecp routes` on a `.proto` now lists `GRPC /pkg.Service/Method` endpoints. Note: the sibling gap — protobuf `message` SchemaFields also not reaching the graph (different root cause: messages emit no owner Class node, so schema_field_mirrors drops them) — is NOT fixed here; tracked separately. --- crates/ecp-analyzer/src/resolution/builder.rs | 27 ++++++++++++++++ crates/ecp-analyzer/src/route_detector.rs | 31 +++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/crates/ecp-analyzer/src/resolution/builder.rs b/crates/ecp-analyzer/src/resolution/builder.rs index e11558078..2b46a226a 100644 --- a/crates/ecp-analyzer/src/resolution/builder.rs +++ b/crates/ecp-analyzer/src/resolution/builder.rs @@ -3477,6 +3477,33 @@ mod tests { .to_string() } + /// gRPC end-to-end: a `RawRoute { method: "GRPC", path: "/pkg.Svc/M" }` + /// (as the protobuf provider emits from a `service { rpc }` block) must + /// survive `detect_from_call` and land as a `NodeKind::Route`. Regression + /// for the HTTP-only gatekeeper that previously dropped every gRPC route. + #[test] + fn grpc_raw_route_promotes_to_route_node() { + let mut builder = GraphBuilder::new(); + builder.add_graph(route_local_graph( + "svc.proto", + "GRPC", + "/helloworld.Greeter/SayHello", + "SayHello", + )); + let graph = builder.build(); + + let route_nodes: Vec<_> = graph + .nodes + .iter() + .filter(|n| n.kind == NodeKind::Route) + .collect(); + assert_eq!(route_nodes.len(), 1, "exactly one gRPC Route node"); + assert_eq!( + s(&graph, route_nodes[0].name), + "GRPC /helloworld.Greeter/SayHello" + ); + } + /// TS route emitting `res.json({ id, name })` → RouteShape with /// response_keys `["id", "name"]` (sorted). Locks in that Pass 1.6a /// reads the source via `repo_root` and runs `response_shapes::extract`. diff --git a/crates/ecp-analyzer/src/route_detector.rs b/crates/ecp-analyzer/src/route_detector.rs index 61a433260..2d27501f0 100644 --- a/crates/ecp-analyzer/src/route_detector.rs +++ b/crates/ecp-analyzer/src/route_detector.rs @@ -65,6 +65,19 @@ pub fn detect_from_decorator(decorator: &str) -> Option { pub fn detect_from_call(raw: &RawRoute) -> Option { let lower = raw.method.to_lowercase(); + + // gRPC fast path: the protobuf provider emits already-normalized records + // (method `GRPC`, path `/Service/Method`) from `service { rpc }` + // blocks — these are confirmed service endpoints, not literals to be + // filtered, so they bypass the HTTP-method allowlist below (which exists + // to reject non-route call sites that happen to carry a path-like string). + if lower == "grpc" && raw.path.starts_with('/') { + return Some(DetectedRoute { + method: "GRPC".to_string(), + path: raw.path.clone(), + }); + } + let method = HTTP_METHODS.iter().find(|&&m| lower.contains(m))?; // Raw path may arrive wrapped in `"…"` / `'…'` because Python / TS @@ -193,6 +206,24 @@ mod tests { assert_eq!(r.path, "/api/v1"); } + #[test] + fn detect_from_call_accepts_grpc_service_method() { + // The protobuf provider emits `GRPC` / `/pkg.Service/Method` from a + // `service { rpc }` block. `grpc` is not an HTTP method, so without + // the gRPC fast path the route is dropped at builder.rs and the + // service contract never reaches the graph. + let r = detect_from_call(&raw("GRPC", "/helloworld.Greeter/SayHello")).unwrap(); + assert_eq!(r.method, "GRPC"); + assert_eq!(r.path, "/helloworld.Greeter/SayHello"); + } + + #[test] + fn detect_from_call_grpc_requires_leading_slash() { + // A malformed gRPC path without the wire-format leading slash is not a + // valid endpoint — reject rather than emit a junk Route. + assert!(detect_from_call(&raw("GRPC", "Greeter/SayHello")).is_none()); + } + // -- lax helper: per-framework bare-path support ------------------------- #[test] From 611039a4d2c0135df8d5f3e37a31b736b1fbe775 Mon Sep 17 00:00:00 2001 From: coseto6125 <80243681+coseto6125@users.noreply.github.com> Date: Tue, 23 Jun 2026 04:47:39 +0800 Subject: [PATCH 3/5] fix(protobuf): emit message as Struct node so schema fields reach graph MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Proto `message` fields parsed correctly but were dropped end-to-end: a message emitted no owner node, so `schema_field_mirrors` (which resolves each RawSchemaField.owner_class against the SymbolTable to attach HasProperty) found no owner and silently discarded every field. A `.proto` indexed to a lone File node — the schema-field feature was dead through the full pipeline, only ever exercised by parse_file-level unit tests. Emit each top-level `message` carrying ≥1 field as a `NodeKind::Struct` (value-type aggregate, no inheritance/vtable — must not be pattern-matched as a `Class`). The owner lookup now resolves, so SchemaField nodes + HasProperty edges land. Empty messages emit no node (no schema surface to own → would be an orphan). Perf: fold the message-node collection into the existing single `extract_proto_fields` line scan (pending-message flush on block close) rather than adding a second pass over the source — fields and their owner struct come out of one walk. Net cost over the prior behavior is one deferred Vec push per non-empty message, no extra iteration. Tests: new `protobuf_graph_e2e.rs` drives the full ProtobufProvider → GraphBuilder::build pipeline (Struct + SchemaField + HasProperty; empty-message orphan guard; service/message coexistence) — the first proto test exercising the builder rather than parse_file alone. Full ecp-analyzer suite green (2484 tests). Closes the SchemaField half of the proto-to-graph gap; the gRPC-route half was fixed in the sibling commit. --- crates/ecp-analyzer/src/protobuf/mod.rs | 8 +- crates/ecp-analyzer/src/protobuf/parser.rs | 60 ++++++++-- .../ecp-analyzer/tests/protobuf_graph_e2e.rs | 107 ++++++++++++++++++ 3 files changed, 167 insertions(+), 8 deletions(-) create mode 100644 crates/ecp-analyzer/tests/protobuf_graph_e2e.rs diff --git a/crates/ecp-analyzer/src/protobuf/mod.rs b/crates/ecp-analyzer/src/protobuf/mod.rs index 4c4f53c54..e7e960bbf 100644 --- a/crates/ecp-analyzer/src/protobuf/mod.rs +++ b/crates/ecp-analyzer/src/protobuf/mod.rs @@ -1,4 +1,5 @@ -//! Protobuf `.proto` file analysis — T4-5 schema-field detector. +//! Protobuf `.proto` file analysis — message structs, schema fields, and +//! gRPC service contracts. //! //! Uses a hand-rolled line-oriented lexer (Option B) because no //! `tree-sitter-protobuf` crate exists in the workspace. The lexer handles @@ -11,6 +12,11 @@ //! } //! ``` //! +//! Each top-level `message` with ≥1 field becomes a `NodeKind::Struct` +//! (value-type aggregate — no inheritance/vtable, distinct from `Class`), +//! owning its fields via `HasProperty`. Without this owner node the schema +//! fields are dropped at `schema_field_mirrors` and never reach the graph. +//! //! **Acknowledged limitations (v1)**: //! - Nested `message` definitions are skipped (parser does not recurse). //! - `oneof` blocks are not supported — fields inside them are not emitted. diff --git a/crates/ecp-analyzer/src/protobuf/parser.rs b/crates/ecp-analyzer/src/protobuf/parser.rs index 3e61c9a9b..01127221e 100644 --- a/crates/ecp-analyzer/src/protobuf/parser.rs +++ b/crates/ecp-analyzer/src/protobuf/parser.rs @@ -7,7 +7,8 @@ use super::schema_extractors::{ classify_protobuf_type, PROTOBUF_FIELD_MODIFIERS, PROTOBUF_FRAMEWORK, }; use ecp_core::analyzer::provider::LanguageProvider; -use ecp_core::analyzer::types::{LocalGraph, RawRoute, RawSchemaField}; +use ecp_core::analyzer::types::{LocalGraph, RawNode, RawRoute, RawSchemaField}; +use ecp_core::graph::NodeKind; use std::path::Path; pub struct ProtobufProvider; @@ -27,11 +28,12 @@ impl LanguageProvider for ProtobufProvider { let text = std::str::from_utf8(source) .map_err(|e| anyhow::anyhow!("protobuf: UTF-8 decode error in {:?}: {}", path, e))?; - let fields = extract_proto_fields(text); + let (fields, messages) = extract_proto_fields(text); let schema_fields = (!fields.is_empty()).then(|| fields.into_boxed_slice()); Ok(LocalGraph { file_path: path.to_path_buf(), + nodes: messages, schema_fields, routes: extract_proto_services(text), ..Default::default() @@ -39,7 +41,16 @@ impl LanguageProvider for ProtobufProvider { } } -/// Line-oriented proto lexer. +/// Line-oriented proto lexer — single pass extracting message fields AND the +/// owning `message` as a `NodeKind::Struct` node. +/// +/// The Struct node is load-bearing: `schema_field_mirrors` resolves each +/// `RawSchemaField.owner_class` against the SymbolTable to attach a +/// `HasProperty` edge, and silently drops fields whose owner isn't a known +/// node. Without emitting the message as a node, every proto field was +/// dropped end-to-end (the fields parsed but never reached the graph). +/// `Struct` (not `Class`) because a proto message is a value-type aggregate +/// with no inheritance / vtable — LLMs must not pattern-match OO conventions. /// /// State machine: /// - `current_message`: name of the enclosing `message { }` block, or `None` @@ -47,9 +58,16 @@ impl LanguageProvider for ProtobufProvider { /// - `depth`: brace nesting depth. A top-level `message` bumps depth to 1; /// any nested `{` (including nested messages, oneofs, options) bumps it /// further. Fields are only emitted when `depth == 1`. -fn extract_proto_fields(text: &str) -> Vec { +/// +/// Only messages that actually carry ≥1 field get a Struct node — an empty +/// message has no schema surface to own, so a node would be an orphan. +fn extract_proto_fields(text: &str) -> (Vec, Vec) { let mut out: Vec = Vec::new(); + let mut messages: Vec = Vec::new(); let mut current_message: Option = None; + // (name, header_span, has_field) for the open top-level message, deferred + // until the block closes so the node is emitted iff it had a field. + let mut pending: Option<(String, (u32, u32, u32, u32), bool)> = None; let mut depth: u32 = 0; for (line_idx, raw_line) in text.lines().enumerate() { @@ -72,7 +90,8 @@ fn extract_proto_fields(text: &str) -> Vec { // v1 limitation documented in mod.rs. if depth == 0 { if let Some(name) = parse_message_header(line) { - current_message = Some(name); + current_message = Some(name.clone()); + pending = Some((name, (row, 0, row, line.len() as u32), false)); // The `{` on this line is already counted below via `opens`. } } @@ -82,9 +101,14 @@ fn extract_proto_fields(text: &str) -> Vec { depth = depth.saturating_add(opens).saturating_sub(closes); // After depth update: if we just closed the outermost message block, - // clear the message context. + // flush the pending Struct node (iff it owned ≥1 field) and clear ctx. if depth == 0 { current_message = None; + if let Some((name, span, has_field)) = pending.take() { + if has_field { + messages.push(message_struct_node(name, span)); + } + } } // ── Field extraction — only at depth 1 inside a known message ─────── @@ -107,10 +131,32 @@ fn extract_proto_fields(text: &str) -> Vec { framework: PROTOBUF_FRAMEWORK, span, }); + if let Some(p) = pending.as_mut() { + p.2 = true; + } } } - out + (out, messages) +} + +/// Build the `NodeKind::Struct` node for a proto `message` (the owner of its +/// schema fields). `owner_class: None` — a top-level message is not nested in +/// another type. +fn message_struct_node(name: String, span: (u32, u32, u32, u32)) -> RawNode { + RawNode { + name, + kind: NodeKind::Struct, + span, + is_exported: true, + heritage: vec![], + type_annotation: None, + decorators: vec![], + calls: vec![], + field_reads: vec![], + owner_class: None, + content_hash: 0, + } } /// Line-oriented `service { rpc … }` extractor — gRPC service contracts. diff --git a/crates/ecp-analyzer/tests/protobuf_graph_e2e.rs b/crates/ecp-analyzer/tests/protobuf_graph_e2e.rs new file mode 100644 index 000000000..90e3bc24b --- /dev/null +++ b/crates/ecp-analyzer/tests/protobuf_graph_e2e.rs @@ -0,0 +1,107 @@ +//! Protobuf end-to-end graph emission: `ProtobufProvider::parse_file` → +//! `GraphBuilder::build()` → final `ZeroCopyGraph`. +//! +//! Regression for the dead-feature gap where proto output never reached the +//! graph: `message` fields were dropped because the message emitted no owner +//! node (`schema_field_mirrors` couldn't resolve the owner class), and gRPC +//! routes were dropped by the HTTP-only `detect_from_call`. End-to-end a +//! `.proto` indexed to a lone File node. This locks in that a message now +//! produces a `Struct` + its `SchemaField`s (+ `HasProperty`) and a service +//! produces `Route`s. +//! +//! All existing proto tests verify `parse_file` in isolation; this is the +//! only one that drives the full builder pipeline. + +use ecp_analyzer::protobuf::ProtobufProvider; +use ecp_analyzer::resolution::builder::GraphBuilder; +use ecp_core::analyzer::provider::LanguageProvider; +use ecp_core::graph::{NodeKind, RelType, ZeroCopyGraph}; +use std::path::Path; + +fn build_proto(src: &str) -> ZeroCopyGraph { + let provider = ProtobufProvider::new().expect("provider"); + let lg = provider + .parse_file(Path::new("api.proto"), src.as_bytes()) + .expect("parse"); + let mut builder = GraphBuilder::new(); + builder.add_graph(lg); + builder.build() +} + +fn names_of_kind(graph: &ZeroCopyGraph, kind: NodeKind) -> Vec { + let pool = graph.string_pool.as_slice(); + graph + .nodes + .iter() + .filter(|n| n.kind == kind) + .map(|n| n.name.resolve(pool).to_string()) + .collect() +} + +#[test] +fn message_reaches_graph_as_struct_with_schema_fields() { + let proto = "\ +syntax = \"proto3\"; +package api.v1; + +message User { + string email = 1; + int32 age = 2; +} +"; + let graph = build_proto(proto); + + assert_eq!( + names_of_kind(&graph, NodeKind::Struct), + vec!["User".to_string()], + "message → Struct node" + ); + let mut fields = names_of_kind(&graph, NodeKind::SchemaField); + fields.sort(); + assert_eq!(fields, vec!["age".to_string(), "email".to_string()]); + + // HasProperty: Struct → SchemaField for each field. + let has_property = graph + .edges + .iter() + .filter(|e| { + e.rel_type == RelType::HasProperty + && graph.nodes[e.source as usize].kind == NodeKind::Struct + && graph.nodes[e.target as usize].kind == NodeKind::SchemaField + }) + .count(); + assert_eq!(has_property, 2, "User owns both fields via HasProperty"); +} + +#[test] +fn empty_message_emits_no_struct_node() { + // A message with no fields has no schema surface to own — emitting a node + // would leave an orphan with no HasProperty edge. + let proto = "package m;\nmessage Empty {\n}\n"; + let graph = build_proto(proto); + assert!(names_of_kind(&graph, NodeKind::Struct).is_empty()); +} + +#[test] +fn service_and_messages_coexist_in_graph() { + let proto = "\ +package api; + +message Req { + string id = 1; +} + +service Svc { + rpc Do (Req) returns (Req); +} +"; + let graph = build_proto(proto); + assert_eq!( + names_of_kind(&graph, NodeKind::Struct), + vec!["Req".to_string()] + ); + assert_eq!( + names_of_kind(&graph, NodeKind::Route), + vec!["GRPC /api.Svc/Do".to_string()] + ); +} From f305b184738ab7d923406783509b75919f3b05ed Mon Sep 17 00:00:00 2001 From: coseto6125 <80243681+coseto6125@users.noreply.github.com> Date: Tue, 23 Jun 2026 04:49:26 +0800 Subject: [PATCH 4/5] perf(protobuf): drop redundant current_message state + its per-message clone MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `extract_proto_fields` tracked the open message in two places — `current_message: Option` and `pending.0` — holding the same name, which forced a `name.clone()` on every message header. Derive everything from `pending` instead: `Some` ⟺ inside a top-level message, `pending.0` is the owner name for field attribution. Removes the clone (one heap alloc per message) and one piece of duplicated state. Pure simplification — all 26 proto tests unchanged and green. --- crates/ecp-analyzer/src/protobuf/parser.rs | 23 ++++++++++------------ 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/crates/ecp-analyzer/src/protobuf/parser.rs b/crates/ecp-analyzer/src/protobuf/parser.rs index 01127221e..9ecb6967b 100644 --- a/crates/ecp-analyzer/src/protobuf/parser.rs +++ b/crates/ecp-analyzer/src/protobuf/parser.rs @@ -64,9 +64,10 @@ impl LanguageProvider for ProtobufProvider { fn extract_proto_fields(text: &str) -> (Vec, Vec) { let mut out: Vec = Vec::new(); let mut messages: Vec = Vec::new(); - let mut current_message: Option = None; - // (name, header_span, has_field) for the open top-level message, deferred - // until the block closes so the node is emitted iff it had a field. + // The open top-level message: (name, header_span, has_field). Holds the + // owner name for field attribution AND defers the Struct-node emission to + // block-close so it lands iff the message owned ≥1 field. `Some` ⟺ inside + // a top-level message. let mut pending: Option<(String, (u32, u32, u32, u32), bool)> = None; let mut depth: u32 = 0; @@ -90,7 +91,6 @@ fn extract_proto_fields(text: &str) -> (Vec, Vec) { // v1 limitation documented in mod.rs. if depth == 0 { if let Some(name) = parse_message_header(line) { - current_message = Some(name.clone()); pending = Some((name, (row, 0, row, line.len() as u32), false)); // The `{` on this line is already counted below via `opens`. } @@ -101,9 +101,8 @@ fn extract_proto_fields(text: &str) -> (Vec, Vec) { depth = depth.saturating_add(opens).saturating_sub(closes); // After depth update: if we just closed the outermost message block, - // flush the pending Struct node (iff it owned ≥1 field) and clear ctx. + // flush the pending Struct node (iff it owned ≥1 field). if depth == 0 { - current_message = None; if let Some((name, span, has_field)) = pending.take() { if has_field { messages.push(message_struct_node(name, span)); @@ -112,12 +111,12 @@ fn extract_proto_fields(text: &str) -> (Vec, Vec) { } // ── Field extraction — only at depth 1 inside a known message ─────── - let Some(ref owner) = current_message else { + // depth 0 = outside any message; depth ≥ 2 = nested block (oneof, + // nested message, options block) — skip in v1. + let Some(p) = pending.as_mut() else { continue; }; if depth != 1 { - // depth 0 = outside any message; depth ≥ 2 = nested block (oneof, - // nested message, options block) — skip in v1. continue; } @@ -127,13 +126,11 @@ fn extract_proto_fields(text: &str) -> (Vec, Vec) { out.push(RawSchemaField { name: field_name.into_boxed_str(), type_class, - owner_class: Box::from(owner.as_str()), + owner_class: Box::from(p.0.as_str()), framework: PROTOBUF_FRAMEWORK, span, }); - if let Some(p) = pending.as_mut() { - p.2 = true; - } + p.2 = true; } } From 6677c9dde1fec5c395b3f0ad496d8fdbb75cd288 Mon Sep 17 00:00:00 2001 From: coseto6125 <80243681+coseto6125@users.noreply.github.com> Date: Tue, 23 Jun 2026 04:51:23 +0800 Subject: [PATCH 5/5] style(protobuf): factor PendingMessage type alias to satisfy type_complexity The pre-push clippy hook (--all-features -D warnings) flagged the inline Option<(String, (u32,u32,u32,u32), bool)> as type_complexity. Extract a PendingMessage alias. No behavior change. --- crates/ecp-analyzer/src/protobuf/parser.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/crates/ecp-analyzer/src/protobuf/parser.rs b/crates/ecp-analyzer/src/protobuf/parser.rs index 9ecb6967b..55b7d8fc6 100644 --- a/crates/ecp-analyzer/src/protobuf/parser.rs +++ b/crates/ecp-analyzer/src/protobuf/parser.rs @@ -61,14 +61,16 @@ impl LanguageProvider for ProtobufProvider { /// /// Only messages that actually carry ≥1 field get a Struct node — an empty /// message has no schema surface to own, so a node would be an orphan. +/// The open top-level message during a single-pass walk: `(name, header_span, +/// has_field)`. Holds the owner name for field attribution AND defers the +/// Struct-node emission to block-close so it lands iff the message owned ≥1 +/// field. `Some` ⟺ inside a top-level message. +type PendingMessage = (String, (u32, u32, u32, u32), bool); + fn extract_proto_fields(text: &str) -> (Vec, Vec) { let mut out: Vec = Vec::new(); let mut messages: Vec = Vec::new(); - // The open top-level message: (name, header_span, has_field). Holds the - // owner name for field attribution AND defers the Struct-node emission to - // block-close so it lands iff the message owned ≥1 field. `Some` ⟺ inside - // a top-level message. - let mut pending: Option<(String, (u32, u32, u32, u32), bool)> = None; + let mut pending: Option = None; let mut depth: u32 = 0; for (line_idx, raw_line) in text.lines().enumerate() {