anthropics · iainmcgin · Apr 23, 2026 · Apr 22, 2026 · Apr 23, 2026 · Apr 23, 2026
diff --git a/Cargo.toml b/Cargo.toml
@@ -14,6 +14,7 @@ exclude = [
     "benchmarks/gen-datasets",
     "benchmarks/buffa",
     "benchmarks/prost",
+    "benchmarks/prost-bytes",
     "benchmarks/google",
     "benchmarks/go",
     "examples/addressbook",

diff --git a/README.md b/README.md
@@ -146,72 +146,94 @@ let decoded: MyMessage = serde_json::from_str(&json).unwrap();
 
 ## Performance
 
-Throughput comparison across four representative message types, measured on an Intel Xeon Platinum 8488C (x86_64). Cross-implementation benchmarks run in Docker for toolchain consistency (`task bench-cross`). Higher is better.
+Throughput comparison across five representative message types, measured on an Intel Xeon Platinum 8488C (x86_64). Cross-implementation benchmarks run in Docker for toolchain consistency (`task bench-cross`). Higher is better.
 
 ### Binary decode
 
-![Binary decode throughput](benchmarks/charts/binary-decode.svg)
+![Binary decode — ApiResponse](benchmarks/charts/binary-decode-api_response.svg)
+![Binary decode — LogRecord](benchmarks/charts/binary-decode-log_record.svg)
+![Binary decode — AnalyticsEvent](benchmarks/charts/binary-decode-analytics_event.svg)
+![Binary decode — GoogleMessage1](benchmarks/charts/binary-decode-google_message1_proto3.svg)
+![Binary decode — MediaFrame](benchmarks/charts/binary-decode-media_frame.svg)
 
 <details><summary>Raw data (MiB/s)</summary>
 
-| Message | buffa | buffa (view) | prost | protobuf-v4 | Go |
-|---------|------:|------:|------:|------:|------:|
-| ApiResponse | 834 | 1,413 (+69%) | 766 (−8%) | 712 (−15%) | 270 (−68%) |
-| LogRecord | 768 | 1,921 (+150%) | 681 (−11%) | 873 (+14%) | 249 (−68%) |
-| AnalyticsEvent | 198 | 316 (+60%) | 252 (+28%) | 358 (+81%) | 91 (−54%) |
-| GoogleMessage1 | 1,024 | 1,322 (+29%) | 998 (−3%) | 648 (−37%) | 344 (−66%) |
+| Message | buffa | buffa (view) | prost | prost (bytes) | protobuf-v4 | Go |
+|---------|------:|------:|------:|------:|------:|------:|
+| ApiResponse | 862 | 1,475 (+71%) | 756 (−12%) | 676 (−22%) | 695 (−19%) | 269 (−69%) |
+| LogRecord | 722 | 1,984 (+175%) | 712 (−1%) | 676 (−6%) | 857 (+19%) | 247 (−66%) |
+| AnalyticsEvent | 199 | 320 (+61%) | 254 (+28%) | 194 (−3%) | 361 (+82%) | 88 (−56%) |
+| GoogleMessage1 | 1,014 | 1,341 (+32%) | 956 (−6%) | 931 (−8%) | 639 (−37%) | 338 (−67%) |
+| MediaFrame | 16,816 | 73,004 (+334%) | 9,648 (−43%) | 23,516 (+40%) | 17,633 (+5%) | 1,241 (−93%) |
 
 </details>
 
 ### Binary encode
 
-![Binary encode throughput](benchmarks/charts/binary-encode.svg)
+![Binary encode — ApiResponse](benchmarks/charts/binary-encode-api_response.svg)
+![Binary encode — LogRecord](benchmarks/charts/binary-encode-log_record.svg)
+![Binary encode — AnalyticsEvent](benchmarks/charts/binary-encode-analytics_event.svg)
+![Binary encode — GoogleMessage1](benchmarks/charts/binary-encode-google_message1_proto3.svg)
+![Binary encode — MediaFrame](benchmarks/charts/binary-encode-media_frame.svg)
 
 <details><summary>Raw data (MiB/s)</summary>
 
 | Message | buffa | prost | protobuf-v4 | Go |
 |---------|------:|------:|------:|------:|
-| ApiResponse | 2,613 | 1,680 (−36%) | 1,049 (−60%) | 556 (−79%) |
-| LogRecord | 4,102 | 3,000 (−27%) | 1,666 (−59%) | 302 (−93%) |
-| AnalyticsEvent | 656 | 366 (−44%) | 511 (−22%) | 159 (−76%) |
-| GoogleMessage1 | 2,644 | 1,867 (−29%) | 872 (−67%) | 358 (−86%) |
+| ApiResponse | 2,543 | 1,810 (−29%) | 1,013 (−60%) | 560 (−78%) |
+| LogRecord | 4,018 | 3,093 (−23%) | 1,642 (−59%) | 303 (−92%) |
+| AnalyticsEvent | 656 | 357 (−46%) | 511 (−22%) | 160 (−76%) |
+| GoogleMessage1 | 2,594 | 1,808 (−30%) | 869 (−67%) | 360 (−86%) |
+| MediaFrame | 45,990 | 38,514 (−16%) | 10,463 (−77%) | 1,647 (−96%) |
 
 </details>
 
 ### JSON encode
 
-![JSON encode throughput](benchmarks/charts/json-encode.svg)
+![JSON encode — ApiResponse](benchmarks/charts/json-encode-api_response.svg)
+![JSON encode — LogRecord](benchmarks/charts/json-encode-log_record.svg)
+![JSON encode — AnalyticsEvent](benchmarks/charts/json-encode-analytics_event.svg)
+![JSON encode — GoogleMessage1](benchmarks/charts/json-encode-google_message1_proto3.svg)
+![JSON encode — MediaFrame](benchmarks/charts/json-encode-media_frame.svg)
 
 <details><summary>Raw data (MiB/s)</summary>
 
 | Message | buffa | prost | Go |
 |---------|------:|------:|---:|
-| ApiResponse | 867 | 805 (−7%) | 116 (−87%) |
-| LogRecord | 1,312 | 1,083 (−17%) | 140 (−89%) |
-| AnalyticsEvent | 777 | 758 (−2%) | 51 (−93%) |
-| GoogleMessage1 | 1,021 | 830 (−19%) | 128 (−88%) |
+| ApiResponse | 875 | 943 (+8%) | 114 (−87%) |
+| LogRecord | 1,294 | 1,407 (+9%) | 136 (−89%) |
+| AnalyticsEvent | 786 | 843 (+7%) | 51 (−93%) |
+| GoogleMessage1 | 961 | 1,007 (+5%) | 122 (−87%) |
+| MediaFrame | 1,423 | 1,449 (+2%) | 206 (−86%) |
 
 </details>
 
 ### JSON decode
 
-![JSON decode throughput](benchmarks/charts/json-decode.svg)
+![JSON decode — ApiResponse](benchmarks/charts/json-decode-api_response.svg)
+![JSON decode — LogRecord](benchmarks/charts/json-decode-log_record.svg)
+![JSON decode — AnalyticsEvent](benchmarks/charts/json-decode-analytics_event.svg)
+![JSON decode — GoogleMessage1](benchmarks/charts/json-decode-google_message1_proto3.svg)
+![JSON decode — MediaFrame](benchmarks/charts/json-decode-media_frame.svg)
 
 <details><summary>Raw data (MiB/s)</summary>
 
 | Message | buffa | prost | Go |
 |---------|------:|------:|---:|
-| ApiResponse | 718 | 293 (−59%) | 70 (−90%) |
-| LogRecord | 797 | 690 (−13%) | 110 (−86%) |
-| AnalyticsEvent | 265 | 235 (−11%) | 46 (−83%) |
-| GoogleMessage1 | 646 | 255 (−60%) | 73 (−89%) |
+| ApiResponse | 706 | 303 (−57%) | 67 (−90%) |
+| LogRecord | 757 | 696 (−8%) | 107 (−86%) |
+| AnalyticsEvent | 268 | 233 (−13%) | 45 (−83%) |
+| GoogleMessage1 | 640 | 258 (−60%) | 70 (−89%) |
+| MediaFrame | 1,942 | 1,954 (+1%) | 262 (−87%) |
 
 </details>
 
-**Message types:** ApiResponse (~200 B, flat scalars), LogRecord (~1 KB, strings + map + nested message), AnalyticsEvent (~10 KB, deeply nested + repeated sub-messages), GoogleMessage1 (standard protobuf benchmark message).
+**Message types:** ApiResponse (~200 B, flat scalars), LogRecord (~1 KB, strings + map + nested message), AnalyticsEvent (~10 KB, deeply nested + repeated sub-messages), GoogleMessage1 (standard protobuf benchmark message), MediaFrame (~10 KB, dominated by `bytes` fields — primary body + chunked sub-blobs + named attachments).
 
 **Libraries:** prost 0.13 + pbjson 0.7, protobuf‑v4 (Google Rust/upb, v4.33.1), Go `google.golang.org/protobuf` v1.36.6. protobuf-v4 JSON is not included as it does not provide a JSON codec.
 
+**`prost (bytes)`** uses `prost-build`'s `.bytes(["."])` config so every proto `bytes` field is generated as `bytes::Bytes` instead of `Vec<u8>`, and decodes from a `bytes::Bytes` input to exercise `Bytes`' zero-copy `copy_to_bytes` slicing. The substitution only affects the decode path, so only decode numbers are reported — `prost (bytes)` encode tracks default `prost` by construction. On the four non-bytes messages, `prost (bytes)` tracks default `prost` within noise (and is slightly slower on `ApiResponse` where the per-message `Bytes::clone` refcount overhead isn't offset by any actual zero-copy). On `MediaFrame` it runs ~2.4× faster than default `prost` at decode, confirming that prost's feature does land when it has bytes fields to work with. buffa views are in a different regime again: they borrow directly from the input buffer for strings, bytes, and nested message bodies, so `buffa (view)` on `MediaFrame` is ~3× the `prost (bytes)` number and ~4.3× `buffa`'s own owned decode. Views also benefit on the four non-bytes messages, where prost's `bytes` feature is inert.
+
 **Owned decode trade-offs:** buffa's owned decode is typically within ±10% of prost, trading a small throughput cost for features prost omits: unknown-field preservation by default, typed `EnumValue<E>` wrappers (not raw `i32`), and a type-stable decode loop that supports recursive message types without manual boxing. The zero-copy view path (`MyMessageView::decode_view`) sidesteps allocation entirely and is the recommended fast decode path. protobuf-v4's decode advantage on deeply-nested messages comes from upb's arena allocator — all sub-messages are bump-allocated in one arena rather than individually boxed.
 
 ## Conformance

diff --git a/Taskfile.yml b/Taskfile.yml
@@ -285,6 +285,15 @@ tasks:
     cmds:
       - cargo bench -- {{.CLI_ARGS}}
 
+  bench-prost-bytes:
+    desc: >-
+      Run the prost `bytes::Bytes` variant (prost-build `.bytes(["."])`
+      config + `Bytes` decode input). Intended as the fair comparison
+      point for buffa's view-based zero-copy decode.
+    dir: benchmarks/prost-bytes
+    cmds:
+      - cargo bench -- {{.CLI_ARGS}}
+
   # ── Cross-implementation comparison (Docker) ───────────────────────
   #
   # Docker is required for google-rs (needs cmake + specific protoc 33.1)
@@ -305,6 +314,12 @@ tasks:
       - docker build -t buffa-bench-prost -f benchmarks/Dockerfile.bench-prost .
       - docker run --rm buffa-bench-prost
 
+  bench-cross-prost-bytes:
+    desc: Run prost-bytes benchmarks in Docker (for bench-cross).
+    cmds:
+      - docker build -t buffa-bench-prost-bytes -f benchmarks/Dockerfile.bench-prost-bytes .
+      - docker run --rm buffa-bench-prost-bytes
+
   bench-cross-google:
     desc: Run Google protobuf-v4 benchmarks in Docker (for bench-cross).
     cmds:
@@ -325,7 +340,7 @@ tasks:
       fast dev-loop regression checking.
     cmds:
       - mkdir -p benchmarks/results
-      - rm -rf benchmarks/results/criterion-buffa benchmarks/results/criterion-prost
+      - rm -rf benchmarks/results/criterion-buffa benchmarks/results/criterion-prost benchmarks/results/criterion-prost-bytes
       - |
         echo "Running buffa benchmarks..."
         docker build -t buffa-bench-buffa -f benchmarks/Dockerfile.bench-buffa .
@@ -340,6 +355,13 @@ tasks:
         docker start -a "$CID" > benchmarks/results/prost.json
         docker cp "$CID:/workspace/benchmarks/prost/target/criterion/." benchmarks/results/criterion-prost/ || echo "(criterion extract failed)"
         docker rm "$CID" > /dev/null
+      - |
+        echo "Running prost-bytes benchmarks..."
+        docker build -t buffa-bench-prost-bytes -f benchmarks/Dockerfile.bench-prost-bytes .
+        CID=$(docker create buffa-bench-prost-bytes)
+        docker start -a "$CID" > benchmarks/results/prost-bytes.json
+        docker cp "$CID:/workspace/benchmarks/prost-bytes/target/criterion/." benchmarks/results/criterion-prost-bytes/ || echo "(criterion extract failed)"
+        docker rm "$CID" > /dev/null
       - |
         echo "Running google benchmarks..."
         docker build -t buffa-bench-google -f benchmarks/Dockerfile.bench-google .

diff --git a/benchmarks/Dockerfile.bench-prost-bytes b/benchmarks/Dockerfile.bench-prost-bytes
@@ -0,0 +1,25 @@
+# Build and run prost benchmarks with `prost-build`'s `.bytes(["."])`
+# substitution enabled, decoding from `bytes::Bytes` input. Intended as a
+# comparison point for buffa's view-based zero-copy decode — see the README's
+# Performance section.
+#
+# Usage:
+#   docker build -t buffa-bench-prost-bytes -f benchmarks/Dockerfile.bench-prost-bytes .
+#   docker run --rm buffa-bench-prost-bytes
+
+FROM rust:1.85-slim AS builder
+
+RUN apt-get update && apt-get install -y \
+    protobuf-compiler pkg-config \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /workspace
+
+COPY benchmarks/proto/        benchmarks/proto/
+COPY benchmarks/datasets/     benchmarks/datasets/
+COPY benchmarks/prost-bytes/  benchmarks/prost-bytes/
+
+RUN cargo install cargo-criterion \
+    && cargo bench --manifest-path benchmarks/prost-bytes/Cargo.toml --no-run
+
+ENTRYPOINT ["cargo", "criterion", "--manifest-path", "benchmarks/prost-bytes/Cargo.toml", "--bench", "protobuf", "--message-format=json", "--", "--warm-up-time", "3", "--measurement-time", "5"]
diff --git a/benchmarks/buffa/Cargo.lock b/benchmarks/buffa/Cargo.lock
diff --git a/benchmarks/buffa/benches/protobuf.rs b/benchmarks/buffa/benches/protobuf.rs
@@ -189,6 +189,24 @@ fn bench_google_message1_view(c: &mut Criterion) {
     group.finish();
 }
 
+fn bench_media_frame_view(c: &mut Criterion) {
+    let dataset = load_dataset(include_bytes!("../../datasets/media_frame.pb"));
+    let bytes = total_payload_bytes(&dataset);
+    let mut group = c.benchmark_group("buffa/media_frame");
+    group.throughput(Throughput::Bytes(bytes));
+
+    group.bench_function("decode_view", |b| {
+        b.iter(|| {
+            for payload in &dataset.payload {
+                let view = MediaFrameView::decode_view(payload).unwrap();
+                criterion::black_box(&view);
+            }
+        });
+    });
+
+    group.finish();
+}
+
 fn bench_api_response(c: &mut Criterion) {
     benchmark_decode::<ApiResponse>(
         c,
@@ -221,6 +239,14 @@ fn bench_google_message1(c: &mut Criterion) {
     );
 }
 
+fn bench_media_frame(c: &mut Criterion) {
+    benchmark_decode::<MediaFrame>(
+        c,
+        "buffa/media_frame",
+        include_bytes!("../../datasets/media_frame.pb"),
+    );
+}
+
 fn bench_api_response_json(c: &mut Criterion) {
     benchmark_json::<ApiResponse>(
         c,
@@ -253,12 +279,21 @@ fn bench_google_message1_json(c: &mut Criterion) {
     );
 }
 
+fn bench_media_frame_json(c: &mut Criterion) {
+    benchmark_json::<MediaFrame>(
+        c,
+        "buffa/media_frame",
+        include_bytes!("../../datasets/media_frame.pb"),
+    );
+}
+
 criterion_group!(
     owned,
     bench_api_response,
     bench_log_record,
     bench_analytics_event,
     bench_google_message1,
+    bench_media_frame,
 );
 
 criterion_group!(
@@ -267,6 +302,7 @@ criterion_group!(
     bench_log_record_view,
     bench_analytics_event_view,
     bench_google_message1_view,
+    bench_media_frame_view,
 );
 
 criterion_group!(
@@ -275,6 +311,7 @@ criterion_group!(
     bench_log_record_json,
     bench_analytics_event_json,
     bench_google_message1_json,
+    bench_media_frame_json,
 );
 
 criterion_main!(owned, views, json);
diff --git a/benchmarks/buffa/src/lib.rs b/benchmarks/buffa/src/lib.rs
@@ -1,13 +1,40 @@
 //! Generated protobuf types for buffa benchmarks.
 
+#[allow(
+    clippy::derivable_impls,
+    clippy::enum_variant_names,
+    clippy::match_single_binding,
+    clippy::upper_case_acronyms,
+    non_camel_case_types,
+    unused_imports,
+    dead_code
+)]
 pub mod bench {
     include!(concat!(env!("OUT_DIR"), "/bench_messages.rs"));
 }
 
+#[allow(
+    clippy::derivable_impls,
+    clippy::enum_variant_names,
+    clippy::match_single_binding,
+    clippy::upper_case_acronyms,
+    non_camel_case_types,
+    unused_imports,
+    dead_code
+)]
 pub mod benchmarks {
     include!(concat!(env!("OUT_DIR"), "/benchmarks.rs"));
 }
 
+#[allow(
+    clippy::derivable_impls,
+    clippy::enum_variant_names,
+    clippy::match_single_binding,
+    clippy::upper_case_acronyms,
+    non_camel_case_types,
+    unused_imports,
+    dead_code
+)]
 pub mod proto3 {
     include!(concat!(env!("OUT_DIR"), "/benchmark_message1_proto3.rs"));
 }