diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0889639..7500ae2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,6 +11,7 @@ permissions: jobs: go: strategy: + fail-fast: false matrix: os: [ubuntu-latest, macos-latest] runs-on: ${{ matrix.os }} @@ -18,9 +19,9 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 + - uses: jdx/mise-action@v2 with: - go-version-file: 'go.mod' + install_args: go golangci-lint - name: Build run: go build ./... @@ -29,7 +30,7 @@ jobs: run: go test ./... -count=1 -race -coverprofile=coverage.out - name: Lint - uses: golangci/golangci-lint-action@v7 + run: golangci-lint run - name: Upload coverage to Codecov if: matrix.os == 'ubuntu-latest' @@ -42,6 +43,7 @@ jobs: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - name: Coverage summary + if: matrix.os == 'ubuntu-latest' uses: actions/github-script@v7 with: script: | @@ -56,6 +58,7 @@ jobs: dotnet: strategy: + fail-fast: false matrix: os: [ubuntu-latest, macos-latest] runs-on: ${{ matrix.os }} diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml new file mode 100644 index 0000000..1ca9c4b --- /dev/null +++ b/.github/workflows/fuzz.yml @@ -0,0 +1,59 @@ +name: Fuzz + +on: + schedule: + - cron: '0 4 * * 1' # Monday 4am UTC + workflow_dispatch: + inputs: + fuzztime: + description: 'Fuzz duration per target (Go duration string)' + default: '5m' + type: string + +permissions: + contents: read + +jobs: + fuzz: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + target: + - FuzzDecode + - FuzzUnmarshalNew + - FuzzReadString + - FuzzParseIntLiteral + - FuzzParseType + steps: + - uses: actions/checkout@v4 + + - uses: jdx/mise-action@v2 + with: + install_args: go + + - name: Restore fuzz corpus + uses: actions/cache@v4 + with: + path: | + ~/.cache/go-test-fuzz + encoding/testdata/fuzz + key: fuzz-corpus-${{ matrix.target }}-week-${{ github.run_number }} + restore-keys: | + fuzz-corpus-${{ matrix.target }}-week- + fuzz-corpus-${{ matrix.target }}- + + - name: Fuzz ${{ matrix.target }} + run: | + go test ./encoding/ \ + -fuzz=${{ matrix.target }} \ + -fuzztime=${{ inputs.fuzztime || '5m' }} \ + -race + + - name: Upload crash artifacts + if: failure() + uses: actions/upload-artifact@v4 + with: + name: fuzz-crash-${{ matrix.target }} + path: encoding/testdata/fuzz/${{ matrix.target }}/ + retention-days: 30 diff --git a/.golangci.yml b/.golangci.yml index a1f4427..87bdb14 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -13,8 +13,18 @@ linters: - unused - ineffassign - misspell + - gosec + - nilerr + - exhaustive settings: errcheck: exclude-functions: - (io.Closer).Close - (*bufio.Reader).UnreadByte + gosec: + excludes: + - G104 # unhandled errors — covered by errcheck linter with nolint directives + - G204 # subprocess with variable — used in test builds + - G304 # file open with variable — CLI args and test fixtures + exhaustive: + default-signifies-exhaustive: true diff --git a/.mise.toml b/.mise.toml index 8697280..08702c8 100644 --- a/.mise.toml +++ b/.mise.toml @@ -1,4 +1,5 @@ [tools] go = "1.25" +golangci-lint = "2.11.4" hugo = "latest" node = "22" diff --git a/README.md b/README.md index 65dfbb6..0b52857 100644 --- a/README.md +++ b/README.md @@ -59,40 +59,36 @@ type Config struct { Port int `pakt:"port"` } -data := []byte("host:str = 'localhost'\nport:int = 8080") -var cfg Config -if err := encoding.Unmarshal(data, &cfg); err != nil { - log.Fatal(err) -} +cfg, err := encoding.UnmarshalNew[Config](data) ``` -### Streaming Decode (Events) +### Streaming (UnitReader) ```go -dec := encoding.NewDecoder(reader) -defer dec.Close() -for { - ev, err := dec.Decode() - if err == io.EOF { break } - fmt.Println(ev.Kind, ev.Name, ev.Value) +ur := encoding.NewUnitReader(reader) +defer ur.Close() +for prop := range ur.Properties() { + switch prop.Name { + case "config": + cfg, err := encoding.ReadValue[Config](ur) + case "events": + for event := range encoding.PackItems[LogEvent](ur) { + process(event) + } + } } +if err := ur.Err(); err != nil { ... } ``` -### Streaming Unmarshal (large datasets) - -Process stream entries one at a time with constant memory: +### Event-Level Decode ```go dec := encoding.NewDecoder(reader) defer dec.Close() - -// Read top-level fields into a struct -for dec.More() { - var entry FSEntry - if err := dec.UnmarshalNext(&entry); err != nil { - break - } - process(entry) +for { + ev, err := dec.Decode() + if err == io.EOF { break } + fmt.Println(ev.Kind, ev.Name, string(ev.Value)) } ``` diff --git a/cli.go b/cli.go index 15e5e98..b1728ad 100644 --- a/cli.go +++ b/cli.go @@ -19,14 +19,12 @@ type CLI struct { // ParseCmd reads a PAKT file and emits streaming events to stdout. type ParseCmd struct { File string `arg:"" help:"Path to .pakt file (use - for stdin)." type:"existingfile"` - Spec string `short:"s" optional:"" help:"Path to .spec.pakt for projection." type:"existingfile" env:"PAKT_SPEC"` Format string `short:"f" enum:"text,json" default:"text" help:"Output format (text or json)." env:"PAKT_FORMAT"` } // ValidateCmd checks a PAKT file for errors without emitting events. type ValidateCmd struct { File string `arg:"" help:"Path to .pakt file (use - for stdin)." type:"existingfile"` - Spec string `short:"s" optional:"" help:"Path to .spec.pakt for projection." type:"existingfile" env:"PAKT_SPEC"` } // VersionCmd prints version information. @@ -41,17 +39,7 @@ func (c *ParseCmd) Run(cli *CLI) error { defer func() { _ = r.Close() }() dec := encoding.NewDecoder(r) - - if c.Spec != "" { - specFile, err := os.Open(c.Spec) - if err != nil { - return fmt.Errorf("opening spec: %w", err) - } - defer func() { _ = specFile.Close() }() - if err := dec.SetSpec(specFile); err != nil { - return fmt.Errorf("loading spec: %w", err) - } - } + defer dec.Close() jsonEnc := json.NewEncoder(os.Stdout) @@ -85,17 +73,7 @@ func (c *ValidateCmd) Run(cli *CLI) error { defer func() { _ = r.Close() }() dec := encoding.NewDecoder(r) - - if c.Spec != "" { - specFile, err := os.Open(c.Spec) - if err != nil { - return fmt.Errorf("opening spec: %w", err) - } - defer func() { _ = specFile.Close() }() - if err := dec.SetSpec(specFile); err != nil { - return fmt.Errorf("loading spec: %w", err) - } - } + defer dec.Close() hasErrors := false for { diff --git a/cli_test.go b/cli_test.go index 561e56e..3b61d57 100644 --- a/cli_test.go +++ b/cli_test.go @@ -130,19 +130,6 @@ func TestParseStdin(t *testing.T) { } } -func TestParseWithSpec(t *testing.T) { - cmd := exec.Command(binaryPath, "parse", "testdata/valid/full.pakt", - "--spec", "testdata/valid/spec-example.spec.pakt") - out, err := cmd.Output() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - lines := strings.Split(strings.TrimSpace(string(out)), "\n") - if len(lines) == 0 { - t.Fatal("expected output with spec projection, got none") - } -} - func TestFormatEnvVar(t *testing.T) { cmd := exec.Command(binaryPath, "parse", "testdata/valid/scalars.pakt") cmd.Env = append(os.Environ(), "PAKT_FORMAT=json") diff --git a/codecov.yml b/codecov.yml index 3d336e3..0d12f91 100644 --- a/codecov.yml +++ b/codecov.yml @@ -9,7 +9,7 @@ coverage: threshold: 2% patch: default: - target: 70% + target: 65% ignore: - "main.go" diff --git a/design/deserialization-design.md b/design/deserialization-design.md new file mode 100644 index 0000000..fc7fb93 --- /dev/null +++ b/design/deserialization-design.md @@ -0,0 +1,1592 @@ +# Deserialization Design — PAKT + +## Problem Property + +What should deserialization look like for PAKT — a typed, streaming, self-describing data interchange format? This document is a design exploration: principles and API sketches for what a streaming-first deserialization architecture should be, independent of specific language implementations. + +**Design constraints:** +- **Streaming-first:** The entire design is streaming-first; materialization is sugar. This was a deliberate choice — PAKT's pack statements are the primary use case, not an advanced mode. +- **Custom deserializers:** Essential for real-world use; must participate in the stream (receive a reader, not pre-materialized data). Decided: per-field and per-host-type registration only — per-PAKT-type converters were rejected as too broad (can hijack unrelated target types). +- **No dynamic/untyped document model:** PAKT is typed; callers always have a target type. Deserializing into `any`/`object` is an error. +- **Cross-ecosystem consistency:** Share design principles across Go and .NET; API shape is fully idiomatic per-ecosystem. +- **Part 1** provides conceptual principles and pseudocode; **Part 2** provides concrete Go 1.25 and .NET 10 / C# 14 API designs + +--- + +## 1. What Makes PAKT Different (and Why It Matters for Deserialization) + +Five characteristics of PAKT drive the deserialization design away from the JSON/YAML model: + +### 1.1 Self-Describing at the Property Level + +Every top-level statement carries its type: `server:{host:str, port:int} = {'localhost', 8080}`. The parser validates values against the type annotation during parsing. By the time the deserializer sees data, it's **guaranteed well-typed** per the annotation. + +**Implication:** The deserializer's job is *mapping*, not *validating*. It doesn't ask "is this really an int?" — the parser already checked. It asks "can I fit this PAKT int into a Go int32?" (narrowing) or "does this PAKT struct have a field the target type expects?" (compatibility). + +### 1.2 Keyed Struct Types, Positional Struct Values + +PAKT struct *types* are keyed — they declare named, typed fields: `{host:str, port:int}`. But struct *values* are positional — they contain bare values matched left-to-right against the type's field declarations: `{'localhost', 8080}`. The parser resolves value positions to field names using the type annotation before the deserializer ever sees the data. + +**Implication:** Unlike JSON (where the deserializer matches `"host"` keys to struct fields), in PAKT the parser has already done that mapping. The event stream delivers named, typed values — the names come from the type, not the value. Deserialization is a simpler mapping step. + +### 1.3 Packs Are the Streaming Primitive + +Pack statements (`<<`) deliver open-ended sequences of values, terminated by end-of-unit or the next statement. They're designed for streaming: log lines, rows, events. + +**Implication:** The deserialization API must make packs feel natural to process one element at a time. This isn't an "advanced" mode — it's the primary use case for pack statements. + +### 1.4 The Decoder Is Lossless; Interpretation Is Layered + +The spec (§0.1, Principle 3) says: *"A conforming decoder preserves all information... Policy decisions such as rejecting duplicates belong to higher-level consumers."* + +**Implication:** Deserialization IS the higher-level consumer. It makes policy decisions: duplicate handling, unknown-field handling, type coercion rules. These policies should be explicit and configurable, not hidden. + +### 1.5 Type Context Flows With the Data + +The spec (§0.1, Principle 2): *"Every value carries or inherits its type. The parser never guesses."* + +**Implication:** The deserializer can always compare the data's declared type with the target type *before* reading any values. This enables early, precise errors — "field `port` is declared `str` in the data but the target expects `int`" — rather than the "strconv.Atoi failed" errors you get with JSON. + +--- + +## 2. Design Principles for PAKT Deserialization + +Derived from PAKT's spec principles and the streaming-first constraint: + +### P1. The Stream Is the Primitive + +The most fundamental deserialization operation is: **read one value from the stream into a typed host-language target.** Everything else — reading a full unit, reading a pack — is built on repetition of this operation. + +There is no "buffer everything then map." The deserializer pulls from the stream, one value at a time. + +### P2. Property Headers Are the Navigation Layer + +A PAKT unit is a sequence of statements. Each statement has a header (name, type, assign/pack). The **statement header** is how the deserializer navigates: + +1. Read header → know what's coming (name, type, pack?) +2. Decide what to do (deserialize into field X, skip, stream elements) +3. Read values + +This is a **pull model**: the caller decides when to advance and what to read. The deserializer never reads ahead of the caller's request. + +### P3. Type Compatibility Is Checked Early + +Because PAKT carries type annotations, the deserializer should compare the data type with the target type **before reading values** — at the statement header or composite entry point. This gives precise, early errors. + +### P4. Custom Deserializers Participate in the Stream + +A custom deserializer receives a positioned reader and the declared PAKT type. It reads from the stream — it doesn't receive a pre-materialized value. This keeps the streaming contract intact: no hidden buffering. + +### P5. Policy Is Explicit + +Decisions that the spec leaves to "higher-level consumers" — duplicate handling, unknown fields, type coercion — must be visible and configurable. Default policies should be documented and unsurprising. + +--- + +## 3. The Deserialization Tiers + +### Tier 0: Event Stream (the decoder) + +**Already exists.** The decoder emits one event per grammatical construct. This is the building block but not a deserialization interface. + +``` +decoder = NewDecoder(stream) +while event = decoder.Decode(): + // EventAssignStart, EventScalarValue, EventStructStart, ... +``` + +**Who uses this:** Tool builders, formatters, custom stream processors. Not typical deserialization. + +--- + +### Tier 1: Unit Reader (the primary interface) + +The streaming-first deserialization primitive. Reads one statement at a time. Within a statement, reads one typed value (or iterates pack elements). + +```pseudocode +reader = NewUnitReader(stream) + +while reader.NextStatement(): + name = reader.Name() // "server", "events", etc. + type = reader.Type() // the PAKT type annotation + isPack = reader.IsPack() // true if << + + if isPack: + while reader.HasMore(): + item = reader.ReadValue() // one pack element + process(item) + else: + value = reader.ReadValue() // the single assign value + handle(name, value) +``` + +**Key properties:** +- **Pull-based.** The caller decides when to advance. +- **Type-aware.** `reader.Type()` gives the declared PAKT type before any value is read. +- **Generic over the target type.** `ReadValue()` maps the PAKT value to `T` using the type metadata system (reflection, source generation, or custom deserializer). +- **Skip-friendly.** If the caller doesn't recognize a statement, they call `reader.Skip()` to advance past it without allocating. +- **Pack-native.** `HasMore()` + `ReadValue()` is the natural pack iteration pattern. No special API — same `ReadValue()`, just called in a loop. + +**Streaming contract:** At any point, only the current statement's current value is in flight. No look-ahead. Constant memory per nesting level. + +#### What `ReadValue()` Does + +This is the core mapping operation. Given a PAKT type and value stream, produce a `T`: + +1. **Check compatibility** between PAKT type and `T`. If incompatible, error early. +2. **Scalars:** Read the scalar literal, convert to `T`. Validate narrowing (int overflow, etc.). +3. **Composites:** Push into the composite, read child values, map to `T`'s fields/elements. +4. **Custom deserializers:** If `T` has a registered custom deserializer, delegate to it. +5. **Nullable:** If the value is `nil`, set `T` to its null representation (pointer, Optional, etc.). + +#### Heterogeneous Units + +Real PAKT units often have different types for different statements: + +```pakt +name:str = 'myservice' +version:(int, int, int) = (2, 1, 0) +config:{host:str, port:int} = {'localhost', 8080} +events:[{ts:ts, level:str, msg:str}] << + {2026-06-01T14:30:00Z, 'info', 'started'} +``` + +The statement reader handles this naturally: + +```pseudocode +reader = NewUnitReader(stream) + +while reader.NextStatement(): + switch reader.Name(): + case "name": + name = reader.ReadValue() + case "version": + version = reader.ReadValue() + case "config": + config = reader.ReadValue() + case "events": + while reader.HasMore(): + event = reader.ReadValue() + process(event) + default: + reader.Skip() +``` + +--- + +### Tier 2: Whole-Unit Materialization (sugar) + +Built on Tier 1. Reads all statements in a unit and maps them to fields of a target struct. + +```pseudocode +func Unmarshal(data, target: &T): + reader = NewUnitReader(data) + fields = TypeMetadata.Fields() // cached field info + + while reader.NextStatement(): + field = fields.FindByPaktName(reader.Name()) + if field is None: + reader.Skip() // unknown field policy + continue + + if reader.IsPack: + collection = field.AsCollection() + while reader.HasMore(): + elem = reader.ReadValue() + collection.Add(elem) + else: + value = reader.ReadValue() + field.Set(target, value) +``` + +**This is sugar.** It loops `NextStatement()` and dispatches `ReadValue()` for each field. The implementation can be generated (source gen), reflected (runtime reflection), or hand-written — the pattern is the same. + +**Materialization is a convenience wrapper over the streaming reader, not a parallel implementation.** Both code paths should use the same underlying `ReadValue` logic. + +--- + +### Tier 3: Custom Deserializers + +A custom deserializer is a user-defined function that takes over the deserialization of a specific type. It participates in the stream — it receives a reader positioned at the value, not a pre-materialized result. + +#### The Interface + +```pseudocode +interface ValueDeserializer: + // Called when a PAKT value of a compatible type needs to be deserialized into T. + // `reader` is positioned at the start of the value. + // `paktType` is the declared PAKT type annotation. + // The deserializer MUST consume exactly one complete value from the reader. + Deserialize(reader: ValueReader, paktType: PaktType) → T +``` + +#### What the ValueReader Provides + +For scalars: +```pseudocode +reader.ScalarType() → str | int | dec | float | bool | uuid | date | ts | bin +reader.StringValue() → string // the raw text +reader.IntValue() → int64 // parsed int +reader.DecValue() → decimal // parsed decimal +reader.BoolValue() → bool // parsed bool +// etc. +``` + +For composites: +```pseudocode +reader.IsStruct() → bool +reader.StructFields() → iterator of (name: string, type: PaktType) +reader.ReadField() → T // read next struct field value + +reader.IsList() → bool +reader.ListElement() → PaktType // the element type +reader.ReadElement()→ T // read next list element +reader.HasMore() → bool // more elements? + +reader.IsMap() → bool +reader.MapKeyType() → PaktType +reader.MapValueType() → PaktType +reader.ReadKey() → K +reader.ReadMapValue()→ V + +reader.IsTuple() → bool +reader.TupleElements() → []PaktType +reader.ReadElement()→ T // read next tuple element +``` + +#### Registration and Precedence + +Custom deserializers attach at two levels, with this precedence (highest first): + +1. **Per field:** "For this specific struct field, use this deserializer." +2. **Per host type:** "Whenever deserializing into type `T`, use this deserializer." + +Lower-precedence deserializers are only consulted if no higher-precedence one matches. + +#### Example: Custom Timestamp Deserializer + +```pseudocode +// A custom deserializer that parses PAKT timestamps into a domain-specific Instant type +struct InstantDeserializer implements ValueDeserializer: + Deserialize(reader, paktType): + raw = reader.StringValue() + return Instant.Parse(raw, myCustomFormat) + +// Registration (per host type) +options.RegisterDeserializer(InstantDeserializer{}) +``` + +#### Example: Custom Struct Deserializer (Validation) + +```pseudocode +// A custom deserializer that adds validation to a Config struct +struct ConfigDeserializer implements ValueDeserializer: + Deserialize(reader, paktType): + config = Config{} + for name, type in reader.StructFields(): + switch name: + case "host": + config.Host = reader.ReadField() + case "port": + port = reader.ReadField() + if port < 1 or port > 65535: + error("port out of range: {port}") + config.Port = port + default: + reader.SkipField() + return config +``` + +--- + +## 4. Key Design Decisions + +### 4.1 Type Compatibility Model + +Because PAKT annotations are validated at parse time, the deserializer deals with **mapping**, not **validation**. The compatibility rules: + +| Category | Rule | Example | +|----------|------|---------| +| **Exact match** | PAKT type matches host type directly | `int` → int64, `str` → string | +| **Narrowing** | PAKT type fits into a smaller host type | `int` → int32 (overflow check) | +| **Nullable** | PAKT `type?` maps to host nullable | `str?` → *string, Optional\ | +| **Structural** | PAKT composite maps to host composite | `{host:str}` → Config{Host string} | +| **Extra fields** | Data has fields target doesn't | Skip silently (configurable) | +| **Missing fields** | Target has fields data doesn't | Zero value (configurable) | +| **Atom → enum** | PAKT atom set maps to host enum | `\|a,b,c\|` → enum{A,B,C} | +| **Custom** | Custom deserializer handles mapping | any → any (user-defined) | + +**Not supported (error):** +- PAKT `str` → host `int` (fundamental type mismatch) +- PAKT non-nullable `nil` (caught at parse time, never reaches deserializer) + +### 4.2 Unknown Property/Field Handling + +**Default policy:** Skip silently. This enables forward compatibility — new fields can be added to data without breaking old consumers. + +**Configurable policies:** +- `Skip` (default) — unknown fields ignored +- `Error` — unknown fields are an error (strict mode) + +### 4.2b Missing Field Handling + +**Default policy:** Zero value. If the target type expects a field that the PAKT data doesn't contain, the field retains its zero/default value. + +**Configurable policies:** +- `ZeroValue` (default) — missing fields get the type's zero value +- `Error` — missing required fields are an error (strict mode) + +### 4.3 Duplicate Property Handling + +The decoder preserves duplicates. The deserializer must choose a policy: + +**Default policy:** Last-wins for struct targets (consistent with most config systems). + +**Configurable policies:** +- `LastWins` (default) — last value overwrites previous +- `FirstWins` — first value kept, subsequent ignored +- `Error` — duplicate is an error +- `Accumulate` — append to a collection (if target is a collection type) + +### 4.4 Atom Set Mapping + +PAKT atom sets (`|dev, staging, prod|`) are constrained string enumerations. Mapping options: + +- **String:** The simplest. Atom values are strings. No compile-time safety. +- **Enum:** Host language enum type. The deserializer validates that the atom value matches a known enum member. +- **Custom deserializer:** Full control. + +The default should be string (lowest friction). Enum mapping should be opt-in via type metadata (struct tags, attributes, etc.). + +### 4.5 Tuple Mapping + +PAKT tuples (`(int, str, bool)`) are heterogeneous and positional. Host language mapping depends on ecosystem: + +- **Go:** Struct with fields matched positionally. The first field gets the first tuple element, second gets second, etc. Field names are irrelevant — only count and types matter. A fixed-size array works when all elements share a type. +- **.NET:** `ValueTuple` or positional record. The tuple element types must match positionally. +- **Other:** Language-specific tuple/product types + +The key requirements: +1. The target type must declare exactly as many positional slots as the tuple has elements. +2. Each slot's type must be compatible with the corresponding tuple element's type. +3. Arity mismatch (too few or too many) is always an error — unlike structs, there's no concept of "unknown" or "missing" tuple elements. + +### 4.6 Error Propagation + +Deserialization errors should include: +- **Source position** (line, column) from the PAKT data +- **Property context** (which statement name) +- **Field context** (which field within a composite) +- **The nature of the failure** (type mismatch, overflow, missing field, custom deserializer error) + +Errors are returned immediately (fail-fast), not accumulated. This is consistent with streaming — you can't "continue past" a deserialization error in a stream. + +--- + +## 5. The Streaming Architecture Visualized + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ PAKT Byte Stream │ +└────────────────────────────┬────────────────────────────────────┘ + │ + ┌───────▼───────┐ + │ Decoder │ Tier 0: Events + │ (parser + │ EventAssignStart, EventScalarValue, ... + │ state │ [validates type annotations] + │ machine) │ + └───────┬───────┘ + │ + ┌────────▼────────┐ + │ Property │ Tier 1: Statements + │ Reader │ NextStatement() → Name, Type, IsPack + │ │ ReadValue() → one typed value + │ │ HasMore() → pack iteration + └───┬───────┬───┘ + │ │ + ┌─────────▼──┐ ┌─▼───────────────┐ + │ Unmarshal │ │ Custom │ Tier 2 & 3 + │ (sugar) │ │ Deserializers │ + │ │ │ (user-defined) │ + │ Loops │ │ Participate in │ + │ statements │ │ the stream │ + │ maps to │ │ │ + │ struct │ │ │ + │ fields │ │ │ + └────────────┘ └──────────────────┘ +``` + +### The critical invariant + +**Every tier reads from the same stream, in order, without buffering.** Materialization doesn't buffer-then-map; it loops the streaming primitives. Custom deserializers don't receive pre-read data; they read from the stream themselves. + +This means: +- Memory is O(nesting depth), not O(data size) +- Pack elements can be processed and discarded one at a time +- A custom deserializer in the middle of a struct doesn't break the streaming contract + +--- + +## 6. Pseudocode Sketches for Common Patterns + +### Pattern A: Config File (whole-unit materialization) + +```pakt +name:str = 'myservice' +host:str = 'localhost' +port:int = 8080 +debug:bool = false +``` + +```pseudocode +type Config struct { + Name string @pakt("name") + Host string @pakt("host") + Port int @pakt("port") + Debug bool @pakt("debug") +} + +config = Unmarshal(data) +// Uses Tier 2 (materialization) internally +``` + +### Pattern B: Streaming Log Processing (pack iteration) + +```pakt +events:[{ts:ts, level:|info,warn,error|, msg:str}] << + {2026-06-01T14:30:00Z, |info, 'server started'} + {2026-06-01T14:31:00Z, |warn, 'high latency'} + {2026-06-01T14:32:00Z, |error, 'connection lost'} +``` + +```pseudocode +reader = NewUnitReader(stream) + +while reader.NextStatement(): + if reader.Name() == "events" and reader.IsPack(): + while reader.HasMore(): + event = reader.ReadValue() + process(event) // constant memory per event +``` + +### Pattern C: Heterogeneous Unit (mixed statement types) + +```pakt +name:str = 'deployment-2026-06-01' +targets:[str] = ['us-east-1', 'eu-west-1'] +config:{replicas:int, image:str} = {3, 'myapp:latest'} +metrics: = <'cpu' ; 0.85, 'mem' ; 0.62> +``` + +```pseudocode +reader = NewUnitReader(stream) + +while reader.NextStatement(): + switch reader.Name(): + case "name": name = reader.ReadValue() + case "targets": targets = reader.ReadValue<[]string>() + case "config": config = reader.ReadValue() + case "metrics": metrics = reader.ReadValue() + default: reader.Skip() +``` + +### Pattern D: Custom Deserializer (semantic validation) + +```pakt +endpoint:{url:str, timeout:int, retries:int} = {'https://api.example.com', 30, 5} +``` + +```pseudocode +struct EndpointDeserializer implements ValueDeserializer: + Deserialize(reader, paktType): + ep = Endpoint{} + for name, type in reader.StructFields(): + switch name: + case "url": + raw = reader.ReadField() + ep.URL = ParseURL(raw) // domain-specific parsing + if ep.URL.Scheme != "https": + error("endpoint must use HTTPS") + case "timeout": + ep.Timeout = Duration(reader.ReadField(), Seconds) + case "retries": + n = reader.ReadField() + if n < 0 or n > 10: + error("retries must be 0-10") + ep.Retries = n + return ep +``` + +### Pattern E: Pack with Custom Deserializer (streaming + custom) + +```pakt +rows:[{id:int, data:bin, checksum:str}] << + {1, b'SGVsbG8=', 'sha256:abc123'} + {2, b'V29ybGQ=', 'sha256:def456'} +``` + +```pseudocode +struct VerifiedRowDeserializer implements ValueDeserializer: + Deserialize(reader, paktType): + row = VerifiedRow{} + for name, type in reader.StructFields(): + switch name: + case "id": row.ID = reader.ReadField() + case "data": row.Data = reader.ReadField() + case "checksum": row.Checksum = reader.ReadField() + // Verify integrity before returning + if not VerifyChecksum(row.Data, row.Checksum): + error("checksum mismatch for row {row.ID}") + return row + +// Usage: streaming with per-element verification +reader = NewUnitReader(stream) +while reader.NextStatement(): + if reader.Name() == "rows": + while reader.HasMore(): + row = reader.ReadValue() // custom deserializer runs + store(row) +``` + +--- + +## 7. Open Questions + +### Q1. Should ReadValue support reading into pre-existing values? + +Two modes: +- **Create:** `value = reader.ReadValue()` — allocates and returns a new T +- **Populate:** `reader.ReadValueInto(&existingT)` — populates an existing value + +Populate mode enables buffer reuse in hot loops (reuse the same struct for each pack element). This matters for performance in streaming scenarios. + +**Recommendation:** Support both. Create is the default for ergonomics. Populate is opt-in for performance-sensitive pack processing. + +### Q2. Should the Unit Reader expose the raw event stream? + +Some advanced callers may want to drop down to Tier 0 within a statement (e.g., to implement a custom event-driven processor). Should the statement reader expose its underlying decoder? + +**Recommendation:** Yes, but as an explicit "escape hatch" that clearly documents the contract: once you take the raw decoder, you own advancing it correctly. + +### Q3. How should atom sets interact with custom deserializers? + +Atom sets are validated at parse time — the value is guaranteed to be one of the declared members. Should a custom deserializer for an atom-set-typed field receive the raw atom string, or a pre-validated enum value? + +**Recommendation:** The custom deserializer receives the raw atom string. It can trust the string is a valid member (the parser checked), but it does its own mapping to the host type. This keeps the custom deserializer interface uniform. + +### Q4. Pack element count — should it be available? + +For list packs, the producer doesn't declare an element count. The consumer reads until the pack ends. Should the reader expose a count hint (if known)? + +**Recommendation:** No. The streaming contract means you don't know the count until you've read everything. Callers who need a count should collect into a list. Providing a count hint would violate the streaming-first principle and couldn't be trusted anyway. + +### Q5. Property-level type checking — when and how? + +When `reader.ReadValue()` is called, when does the type check happen? + +- **Eager:** Compare PAKT type annotation with Config's type metadata before reading any values. Fail immediately if incompatible. +- **Lazy:** Read values and let individual field mismatches surface naturally. + +**Recommendation:** Eager for composites (check structural compatibility upfront), lazy for scalars (check at conversion time). This gives the best error messages without unnecessary overhead. + +--- + +--- + +# Part 2: Language-Specific API Design + +> **Constraint:** This API design gives no weight to existing implementations. It asks: given Go 1.25 and .NET 10 / C# 14, what's the ideal API for each ecosystem? + +--- + +## 8. Relevant Language & Runtime Features + +### 8.1 Go 1.25 + +| Feature | Relevance to PAKT | +|---------|-------------------| +| **`iter.Seq[V]` / `iter.Seq2[K,V]`** | Pack iteration and composite traversal return iterators. `for event := range reader.Properties()` is idiomatic. | +| **Range-over-func (stable)** | Custom iterators compose with `for...range`. Property readers and pack readers become rangeable. | +| **Generics (no core types)** | `ReadValue[T]()` is now practical. Generic deserialization functions with proper type constraints. | +| **Bounded `sync.Pool`** | Pooled readers, state machines, and buffers with memory pressure control. | +| **PGO (stable)** | Hot paths (scalar conversion, field lookup) optimizable from production profiles. | + +**Not available in Go:** Source generation, compile-time metaprogramming, ref structs, `Span`. Go relies on runtime reflection or code generation tools (go generate). + +### 8.2 .NET 10 / C# 14 + +| Feature | Relevance to PAKT | +|---------|-------------------| +| **Partial constructors** | Source generator can emit constructor logic for deserialization targets. Generated partial ctors initialize type metadata without user boilerplate. | +| **Extension members** | `ReadOnlySpan.DeserializePakt()` as an extension method/property block. Cleaner API surface without polluting the type. | +| **Implicit `Span` conversions** | `byte[]`, `Memory`, and `ReadOnlySpan` all flow into deserializer APIs seamlessly. | +| **`ref struct`** | Reader type lives on the stack. Zero heap allocation for the reader itself. | +| **`IAsyncEnumerable`** | Async pack iteration: `await foreach (var item in reader.ReadPack())`. | +| **Source generators (incremental)** | Compile-time codegen for per-type deserialization delegates. No reflection at runtime. | +| **`field` keyword** | Simplifies generated property accessors in deserialized types. | + +--- + +## 9. Go API Design + +### 9.1 Package Structure + +``` +encoding/ # existing package: github.com/trippwill/pakt/encoding + decoder.go # Tier 0: event-level decoder (exists) + reader.go # Tier 1: UnitReader + unmarshal.go # Tier 2: Unmarshal / UnmarshalFrom + converter.go # Tier 3: ValueConverter interface + registry + options.go # DeserializeOptions (policies) + types.go # PaktType, TypeKind (exists) + errors.go # ParseError (exists) +``` + +### 9.2 Tier 0: Decoder (unchanged) + +The event-level decoder exists and is the foundation. No changes needed to its API. + +```go +type Decoder struct { /* ... */ } + +func NewDecoder(r io.Reader) *Decoder +func (d *Decoder) Decode() (Event, error) +func (d *Decoder) Close() +``` + +### 9.3 Tier 1: UnitReader — The Primary API + +The `UnitReader` wraps a decoder and provides a pull-based, statement-at-a-time interface. It's the primary way callers consume PAKT data. + +```go +// UnitReader reads PAKT statements one at a time from a stream. +// It is the primary deserialization interface. +type UnitReader struct { /* unexported fields */ } + +// NewUnitReader creates a reader from any io.Reader. +func NewUnitReader(r io.Reader, opts ...Option) *UnitReader + +// NewUnitReaderFromBytes creates a reader from a byte slice (zero-copy path). +func NewUnitReaderFromBytes(data []byte, opts ...Option) *UnitReader + +// Close releases all pooled resources. Must be called when done. +func (sr *UnitReader) Close() +``` + +#### Property Navigation + +```go +// Property represents a top-level statement header. +// It is valid only until the next call to NextStatement or Close. +type Property struct { + Name string // statement name (e.g., "server", "events") + Type Type // declared PAKT type annotation + IsPack bool // true if << (pack statement) +} + +// Statements returns an iterator over all statements in the unit. +// Each Property is valid only for the current iteration step. +// On error, iteration stops; call sr.Err() to retrieve the error. +// +// Usage: +// for stmt := range reader.Properties() { +// ... +// } +// if err := reader.Err(); err != nil { ... } +func (sr *UnitReader) Statements() iter.Seq[Property] + +// Err returns the first error encountered during iteration, +// or nil if iteration completed successfully. +func (sr *UnitReader) Err() error +``` + +#### Reading Values + +```go +// ReadValue reads the current statement's value (or current pack element) +// and deserializes it into a new value of type T. +// +// For assign statements: reads the single value. +// For pack statements: reads the next element. Call within PackItems loop. +func ReadValue[T any](sr *UnitReader) (T, error) + +// ReadValueInto reads the current value into an existing target. +// This enables buffer reuse in hot pack-processing loops. +func ReadValueInto[T any](sr *UnitReader, target *T) error + +// Skip advances past the current statement or pack element without +// allocating or deserializing. Use for unknown/unwanted statements. +func (sr *UnitReader) Skip() error +``` + +#### Pack Iteration + +```go +// PackItems returns an iterator over the elements of a pack statement. +// Each element is deserialized into type T. +// On error, iteration stops; call sr.Err() to retrieve the error. +// +// Early break: if the caller breaks out of the loop, the iterator +// drains the remaining pack elements (without deserializing them) +// so the reader is positioned at the next statement. +// +// Usage: +// for stmt := range reader.Properties() { +// if stmt.IsPack { +// for item := range PackItems[LogEvent](reader) { +// process(item) +// } +// if err := reader.Err(); err != nil { ... } +// } +// } +func PackItems[T any](sr *UnitReader) iter.Seq[T] + +// PackItemsInto returns an iterator that reuses a caller-provided buffer. +// On each iteration, the buffer is populated with the next element. +// The yielded pointer aliases the buffer — do not retain across iterations. +// Early break drains remaining pack elements. +func PackItemsInto[T any](sr *UnitReader, buf *T) iter.Seq[*T] +``` + +#### Complete Tier 1 Example + +```go +func processUnit(r io.Reader) error { + sr := encoding.NewUnitReader(r) + defer sr.Close() + + for stmt := range sr.Properties() { + switch stmt.Name { + case "name": + name, err := encoding.ReadValue[string](sr) + if err != nil { return err } + fmt.Println("Name:", name) + + case "config": + cfg, err := encoding.ReadValue[Config](sr) + if err != nil { return err } + startServer(cfg) + + case "events": + for event := range encoding.PackItems[LogEvent](sr) { + ingest(event) + } + if err := sr.Err(); err != nil { return err } + + default: + sr.Skip() + } + } + return sr.Err() +} +``` + +### 9.4 Tier 2: Whole-Unit Materialization + +Sugar over Tier 1. Reads all statements and maps to struct fields. + +```go +// Unmarshal deserializes a complete PAKT unit from bytes into a struct. +// This is convenience sugar over UnitReader. +func Unmarshal[T any](data []byte, opts ...Option) (T, error) + +// UnmarshalFrom deserializes a complete PAKT unit from a reader. +func UnmarshalFrom[T any](r io.Reader, opts ...Option) (T, error) +``` + +**Key difference from current API:** Returns the value instead of requiring a pre-allocated pointer. Uses generics to infer the return type. The pointer-based `UnmarshalInto` variant exists for buffer reuse: + +```go +// UnmarshalInto deserializes into an existing value. +// Useful when reusing buffers or populating embedded structs. +func UnmarshalInto[T any](data []byte, target *T, opts ...Option) error +``` + +#### Struct Tags + +```go +type Config struct { + Host string `pakt:"host"` + Port int `pakt:"port"` + Debug bool `pakt:"debug,omitempty"` + Labels []string `pakt:"labels"` + Meta map[string]string `pakt:"meta"` + Secret string `pakt:"-"` // skip +} +``` + +Tag syntax: `pakt:"name[,option]..."` where options are: +- `omitempty` — omit during marshal when zero +- `-` — skip field entirely + +#### Whole-Unit Example + +```go +type Deployment struct { + Name string `pakt:"name"` + Version [3]int `pakt:"version"` // tuple → fixed array + Config DeployConfig `pakt:"config"` + Metrics map[string]float64 `pakt:"metrics"` +} + +dep, err := encoding.Unmarshal[Deployment](data) +``` + +### 9.5 Tier 3: Custom Value Converters + +Custom converters receive a scoped `ValueReader` — not the full `UnitReader`. This gives them exactly enough API to read one value (scalar or composite) without access to statement-level navigation. + +```go +// ValueReader is a scoped view of the stream, positioned at a single value. +// It provides read access for scalars and navigation for composites. +// A ValueReader is only valid for the duration of the converter call. +type ValueReader struct { /* unexported: wraps *UnitReader */ } + +// --- Scalar access (only valid when positioned at a scalar) --- +func (vr *ValueReader) StringValue() (string, error) +func (vr *ValueReader) IntValue() (int64, error) +func (vr *ValueReader) DecValue() (string, error) // string to preserve precision +func (vr *ValueReader) FloatValue() (float64, error) +func (vr *ValueReader) BoolValue() (bool, error) +func (vr *ValueReader) BytesValue() ([]byte, error) +func (vr *ValueReader) IsNil() bool + +// --- Composite navigation --- +func (vr *ValueReader) StructFields() iter.Seq[FieldEntry] +func (vr *ValueReader) ListElements() iter.Seq[ValueReader] +func (vr *ValueReader) MapEntries() iter.Seq[MapValueEntry] +func (vr *ValueReader) TupleElements() iter.Seq[TupleValueEntry] + +// --- Delegated deserialization (for child values) --- +// ReadAs deserializes the current child value using the framework's +// type mapping, converters, and options. This is how converters compose. +func ReadAs[T any](vr *ValueReader) (T, error) + +// --- Skip --- +func (vr *ValueReader) Skip() error + +// --- Error --- +func (vr *ValueReader) Err() error + +type MapValueEntry struct { + Key ValueReader + Value ValueReader +} + +type TupleValueEntry struct { + Index int + Type Type +} +``` + +```go +// ValueConverter converts PAKT values to/from a specific Go type. +// Implementations receive a scoped ValueReader positioned at the value, +// not the full UnitReader. +type ValueConverter[T any] interface { + // FromPakt reads a PAKT value and returns T. + // The ValueReader is positioned at the start of the value. + // The converter MUST consume exactly one complete value. + FromPakt(vr *ValueReader, paktType Type) (T, error) + + // ToPakt writes a value of type T to the encoder. + ToPakt(enc *Encoder, value T) error +} +``` + +#### Registration + +```go +// RegisterConverter registers a ValueConverter for type T. +// When deserializing into T, the converter is used instead of +// the default reflection-based mapping. +func RegisterConverter[T any](c ValueConverter[T]) Option + +// Usage: +sr := encoding.NewUnitReader(r, + encoding.RegisterConverter[Instant](InstantConverter{}), + encoding.RegisterConverter[IPAddr](IPAddrConverter{}), +) +``` + +#### Converter Example: Validated Endpoint + +```go +type EndpointConverter struct{} + +func (EndpointConverter) FromPakt(vr *ValueReader, pt Type) (Endpoint, error) { + var ep Endpoint + + for field := range vr.StructFields() { + switch field.Name { + case "url": + raw, err := ReadAs[string](vr) + if err != nil { return ep, err } + u, err := url.Parse(raw) + if err != nil { return ep, fmt.Errorf("invalid URL: %w", err) } + if u.Scheme != "https" { + return ep, fmt.Errorf("endpoint must use HTTPS, got %s", u.Scheme) + } + ep.URL = u + + case "timeout": + secs, err := ReadAs[int64](vr) + if err != nil { return ep, err } + ep.Timeout = time.Duration(secs) * time.Second + + case "retries": + n, err := ReadAs[int](vr) + if err != nil { return ep, err } + if n < 0 || n > 10 { + return ep, fmt.Errorf("retries must be 0-10, got %d", n) + } + ep.Retries = n + + default: + vr.Skip() + } + } + if err := vr.Err(); err != nil { return ep, err } + return ep, nil +} +``` + +#### Composite Navigation Helpers + +These are methods on `ValueReader` (shown above) and also available as free functions for the `UnitReader` level: + +```go +// StructFields returns an iterator over the fields of a struct value. +// Each FieldEntry provides the field name and declared type. +// The caller reads each field's value via ReadAs or Skip. +// Errors stop iteration; call sr.Err() after. +func StructFields(sr *UnitReader) iter.Seq[FieldEntry] + +type FieldEntry struct { + Name string + Type Type +} + +// ListElements returns an iterator over elements of a list value. +// Errors stop iteration; call sr.Err() after. +func ListElements[T any](sr *UnitReader) iter.Seq[T] + +// MapEntries returns an iterator over key-value pairs of a map value. +// K is not constrained to comparable — iteration doesn't require hashing. +// Errors stop iteration; call sr.Err() after. +func MapEntries[K, V any](sr *UnitReader) iter.Seq[MapEntry[K, V]] + +type MapEntry[K, V any] struct { + Key K + Value V +} + +// TupleElements returns an iterator for heterogeneous tuples. +// Each entry provides the index and type; the caller reads each +// element with ReadAs of the appropriate type. +func TupleElements(sr *UnitReader) iter.Seq[TupleEntry] + +type TupleEntry struct { + Index int + Type Type +} +``` + +### 9.6 Options & Policies + +```go +type Option func(*options) + +// UnknownFieldPolicy controls behavior when PAKT data contains +// fields not present in the target struct. +func UnknownFields(policy FieldPolicy) Option + +type FieldPolicy int +const ( + SkipUnknown FieldPolicy = iota // default: silently skip + ErrorUnknown // return error on unknown field +) + +// MissingFieldPolicy controls behavior when the target struct has +// fields not present in the PAKT data. +func MissingFields(policy MissingPolicy) Option + +type MissingPolicy int +const ( + ZeroMissing MissingPolicy = iota // default: use zero value + ErrorMissing // return error on missing field +) + +// DuplicatePolicy controls behavior when PAKT data contains +// duplicate statement names or map keys. +func Duplicates(policy DuplicatePolicy) Option + +type DuplicatePolicy int +const ( + LastWins DuplicatePolicy = iota // default: last value wins + FirstWins // first value kept + ErrorDupes // return error on duplicate + Accumulate // append to collection (target must be slice/map) +) +``` + +### 9.7 Error Design + +```go +// DeserializeError wraps a parse error with deserialization context. +type DeserializeError struct { + Pos Pos // source position in the PAKT data + Property string // which statement (e.g., "config") + Field string // which field within a composite (e.g., "port") + Message string // human-readable description + Err error // wrapped underlying error (ParseError, type mismatch, etc.) +} + +func (e *DeserializeError) Error() string { + // "config.port (3:12): int64 overflow: value 999999999999999999999" +} +func (e *DeserializeError) Unwrap() error { return e.Err } +``` + +--- + +## 10. .NET API Design + +### 10.1 Namespace & Assembly Structure + +``` +Pakt/ + PaktReader.cs # Tier 0: token-level reader (exists, ref struct) + PaktUnitReader.cs # Tier 1: statement-level streaming + PaktSerializer.cs # Tier 2: whole-unit materialization + Serialization/ + PaktSerializerContext.cs # source-gen context base + PaktTypeInfo.cs # per-type metadata + delegates + PaktConverter.cs # Tier 3: custom converter base + PaktConverterAttribute.cs # field-level converter binding + PaktPropertyAttribute.cs # field name/order/ignore + DeserializeOptions.cs # policies +``` + +### 10.2 Tier 0: PaktReader (unchanged concept) + +The low-level token reader. A `ref struct` backed by `ReadOnlySpan`. Exists today. + +```csharp +public ref struct PaktReader +{ + public bool Read(); + public PaktTokenType TokenType { get; } + public PaktScalarType ScalarType { get; } + public string? StatementName { get; } + public PaktType? StatementType { get; } + // ... scalar accessors: GetString(), GetInt64(), etc. + public void Dispose(); +} +``` + +### 10.3 Tier 1: PaktUnitReader — The Primary API + +A higher-level reader that operates at the statement level. Unlike the raw `PaktReader`, this type is not a `ref struct` — it can be stored, passed, and used with `IAsyncEnumerable`. + +```csharp +/// +/// Reads PAKT statements one at a time from a stream. +/// This is the primary deserialization interface. +/// +public sealed class PaktUnitReader : IDisposable, IAsyncDisposable +{ + // --- Construction --- + + public static PaktUnitReader Create( + ReadOnlySpan data, + PaktSerializerContext context, + DeserializeOptions? options = null); + + public static PaktUnitReader Create( + Stream stream, + PaktSerializerContext context, + DeserializeOptions? options = null); + + // --- Property Navigation --- + + /// + /// Advances to the next statement. Returns false when the unit is exhausted. + /// + public bool ReadStatement(); + + /// + /// Async variant for stream-backed readers. + /// + public ValueTask ReadStatementAsync(CancellationToken ct = default); + + /// Current statement name (e.g., "server", "events"). + public string StatementName { get; } + + /// Current statement's declared PAKT type. + public PaktType StatementType { get; } + + /// True if the current statement uses pack syntax (<<). + public bool IsPack { get; } + + // --- Value Reading --- + + /// + /// Deserialize the current statement's value (or current pack element) as T. + /// + public T ReadValue(); + + /// + /// Skip the current statement or pack element without allocating. + /// + public void Skip(); + + // --- Pack Iteration --- + + /// + /// Returns an enumerable of pack elements, deserialized as T. + /// + public IEnumerable ReadPack(); + + /// + /// Returns an async enumerable of pack elements for stream-backed readers. + /// + public IAsyncEnumerable ReadPackAsync(CancellationToken ct = default); + + // --- Resource Management --- + public void Dispose(); + public ValueTask DisposeAsync(); +} +``` + +#### Complete Tier 1 Example + +```csharp +await using var reader = PaktUnitReader.Create(stream, AppContext.Default); + +while (await reader.ReadStatementAsync()) +{ + switch (reader.StatementName) + { + case "name": + var name = reader.ReadValue(); + Console.WriteLine($"Name: {name}"); + break; + + case "config": + var cfg = reader.ReadValue(); + StartServer(cfg); + break; + + case "events": + await foreach (var evt in reader.ReadPackAsync()) + { + Ingest(evt); + } + break; + + default: + reader.Skip(); + break; + } +} +``` + +### 10.4 Tier 2: Whole-Unit Materialization + +Static convenience methods. Sugar over Tier 1. + +```csharp +public static class PaktSerializer +{ + /// + /// Deserialize a complete PAKT unit into T. + /// + public static T Deserialize( + ReadOnlySpan data, + PaktSerializerContext context, + DeserializeOptions? options = null); + + /// + /// Deserialize from a stream. + /// + public static ValueTask DeserializeAsync( + Stream stream, + PaktSerializerContext context, + DeserializeOptions? options = null, + CancellationToken ct = default); + + /// + /// Serialize T into a PAKT unit. + /// + public static byte[] Serialize( + T value, + PaktSerializerContext context, + string statementName); +} +``` + +#### Source-Generated Context + +```csharp +[PaktSerializable(typeof(ServerConfig))] +[PaktSerializable(typeof(LogEvent))] +[PaktSerializable(typeof(Deployment))] +public partial class AppContext : PaktSerializerContext { } + +// Generated by source generator: +// - PaktTypeInfo with Deserialize/Serialize delegates +// - PaktTypeInfo with Deserialize/Serialize delegates +// - etc. +// - GetTypeInfo() override dispatching to the correct info +// - Default static singleton +``` + +The source generator uses **partial constructors** (C# 14) to inject initialization: + +```csharp +// Generated code +public partial class AppContext +{ + // C# 14 partial constructor — generator provides the body + public partial AppContext() + { + // Initialize type info cache + _serverConfig = CreateServerConfigTypeInfo(); + _logEvent = CreateLogEventTypeInfo(); + // ... + } +} +``` + +#### Type Configuration Attributes + +```csharp +public class ServerConfig +{ + [PaktProperty("host")] // explicit PAKT name + public string HostName { get; set; } + + public int Port { get; set; } // default: "port" (lowercase first char) + + [PaktIgnore] // excluded from serialization + public string InternalId { get; set; } + + [PaktPropertyOrder(0)] // explicit serialization order + public string Region { get; set; } + + [PaktConverter(typeof(InstantConverter))] // per-field custom converter + public Instant CreatedAt { get; set; } +} +``` + +### 10.5 Tier 3: Custom Converters + +Custom converters receive the raw `PaktReader` (for zero-alloc reads) plus a `PaktConvertContext` that provides access to nested deserialization (for composing with the framework). + +```csharp +/// +/// Base class for custom PAKT value converters. +/// Converters participate in the stream — they read from the reader directly. +/// +public abstract class PaktConverter +{ + /// + /// Read a PAKT value from the reader and return T. + /// The reader is positioned at the start of the value. + /// The converter MUST consume exactly one complete value. + /// Use context.ReadAs<U>() to delegate child value deserialization + /// back to the framework (enables converter composition). + /// + public abstract T Read(ref PaktReader reader, PaktType declaredType, PaktConvertContext context); + + /// + /// Write a value of type T to the writer. + /// + public abstract void Write(PaktWriter writer, T value); +} + +/// +/// Provides deserialization context to custom converters. +/// Enables converters to delegate child value deserialization +/// back to the framework (including other registered converters). +/// +public readonly ref struct PaktConvertContext +{ + /// + /// Deserialize a child value as U using the framework's type mapping, + /// registered converters, and options. + /// + public U ReadAs(ref PaktReader reader); + + /// Skip the current value without deserializing. + public void Skip(ref PaktReader reader); + + /// Access to the serializer context for type info lookup. + public PaktSerializerContext SerializerContext { get; } +} +``` + +#### Registration + +Two levels of precedence (highest first): + +```csharp +// 1. Per-field: via attribute +public class Config +{ + [PaktConverter(typeof(UrlConverter))] + public Uri Endpoint { get; set; } +} + +// 2. Per-type: via context options +var options = new DeserializeOptions +{ + Converters = { new InstantConverter(), new IPAddressConverter() } +}; +``` + +#### Converter Example: Validated Endpoint + +```csharp +public class EndpointConverter : PaktConverter +{ + public override Endpoint Read(ref PaktReader reader, PaktType declaredType, PaktConvertContext context) + { + var ep = new Endpoint(); + + // Expect struct start + reader.Read(); // StructStart + + while (reader.Read()) + { + if (reader.TokenType == PaktTokenType.StructEnd) break; + + switch (reader.CurrentName) + { + case "url": + reader.Read(); + var raw = reader.GetString(); + ep.Url = new Uri(raw); + if (ep.Url.Scheme != "https") + throw new PaktException("endpoint must use HTTPS"); + break; + + case "timeout": + reader.Read(); + ep.Timeout = TimeSpan.FromSeconds(reader.GetInt64()); + break; + + case "retries": + reader.Read(); + var n = (int)reader.GetInt64(); + if (n is < 0 or > 10) + throw new PaktException($"retries must be 0-10, got {n}"); + ep.Retries = n; + break; + + default: + context.Skip(ref reader); // use context for skip + break; + } + } + + return ep; + } +} +``` + +#### Composite Navigation Helpers + +Extension methods (using C# 14 extension members) for use in custom converters: + +```csharp +public static class PaktReaderExtensions +{ + extension(ref PaktReader reader) + { + /// + /// Enumerate struct fields. Yields (name, type) pairs. + /// Caller reads each field's value via reader methods or Skip. + /// + public IEnumerable StructFields() + { + while (reader.Read() && reader.TokenType != PaktTokenType.StructEnd) + yield return new(reader.CurrentName!, reader.CurrentType!); + } + + /// + /// Enumerate list elements as T. + /// + public IEnumerable ListElements(PaktSerializerContext ctx) + { + while (reader.Read() && reader.TokenType != PaktTokenType.ListEnd) + yield return ctx.GetTypeInfo()!.Deserialize!(ref reader); + } + + /// + /// Skip the current value (scalar or composite) entirely. + /// + public void SkipValue() { /* depth-aware skip */ } + } +} +``` + +### 10.6 Options & Policies + +```csharp +public sealed class DeserializeOptions +{ + /// + /// How to handle unknown fields in PAKT data. + /// Default: Skip. + /// + public UnknownFieldPolicy UnknownFields { get; init; } = UnknownFieldPolicy.Skip; + + /// + /// How to handle missing fields (target has fields data doesn't). + /// Default: ZeroValue. + /// + public MissingFieldPolicy MissingFields { get; init; } = MissingFieldPolicy.ZeroValue; + + /// + /// How to handle duplicate statement names. + /// Default: LastWins. + /// + public DuplicatePolicy Duplicates { get; init; } = DuplicatePolicy.LastWins; + + /// + /// Custom converters registered by target CLR type. + /// + public IList Converters { get; } = new List(); +} + +public enum UnknownFieldPolicy { Skip, Error } +public enum MissingFieldPolicy { ZeroValue, Error } +public enum DuplicatePolicy { LastWins, FirstWins, Error, Accumulate } +``` + +### 10.7 Error Design + +```csharp +public class PaktDeserializeException : PaktException +{ + public string? StatementName { get; } + public string? FieldName { get; } + public PaktPosition Position { get; } + + // "config.port (3:12): Int64 overflow: value 999999999999999999999" + public override string Message { get; } +} +``` + +--- + +## 11. Cross-Cutting Design Patterns + +### 11.1 Streaming Architecture Invariant + +Both APIs enforce the same invariant: + +> **Every tier reads from the same stream, in order, without buffering.** Materialization loops the streaming primitives. Custom converters read from the stream themselves. + +In Go, this is achieved by having `Unmarshal` internally create a `UnitReader` and iterate it. In .NET, `PaktSerializer.Deserialize` internally creates a `PaktUnitReader`. + +### 11.2 Type Metadata Caching + +| Concern | Go | .NET | +|---------|-----|------| +| Field mapping | `sync.Map` keyed by `reflect.Type` | Source-generated `PaktTypeInfo` | +| Field lookup | `map[string]*fieldInfo` (per-type) | Generated `switch` on field name | +| Type inference | `typeOfReflect(reflect.Type) Type` at runtime | `TypeModelBuilder` at compile-time | +| Converter lookup | Options chain checked at call site | Options chain checked at call site | + +### 11.3 Pack Processing Comparison + +| Pattern | Go | .NET | +|---------|-----|------| +| Iterate | `for item, err := range PackItems[T](sr)` | `foreach (var item in reader.ReadPack())` | +| Async iterate | N/A (use goroutine + channel if needed) | `await foreach (var item in reader.ReadPackAsync())` | +| Buffer reuse | `PackItemsInto[T](sr, &buf)` | Not needed (struct value types are stack-allocated) | +| Early exit | `break` in range loop (yield returns false) | `break` in foreach (IEnumerable disposes) | + +### 11.4 Custom Converter Comparison + +| Concern | Go | .NET | +|---------|-----|------| +| Interface | `ValueConverter[T]` (generic interface) | `PaktConverter` (abstract class) | +| Receives | `*ValueReader` (scoped) + `Type` | `ref PaktReader` + `PaktType` + `PaktConvertContext` | +| Child dispatch | `ReadAs[U](vr)` free function | `context.ReadAs(ref reader)` method | +| Per-field | `[PaktConverter(typeof(...))]` attribute (Go: use parent converter with ReadAs) | +| Per-type | `RegisterConverter[T](c)` option | `options.Converters.Add(c)` | + +--- + +## 12. Open Questions (Updated) + +### Q1. Go: Should UnitReader be an interface? + +An interface would allow mock implementations for testing. But concrete types are idiomatic Go and enable inlining. **Recommendation:** Concrete type. Provide a test helper that creates a `UnitReader` from a string. + +### Q2. .NET: Streaming invariant for async paths + +The `PaktReader` is a `ref struct` (stack-only, zero-alloc). The `PaktUnitReader` needs to support `IAsyncEnumerable` for pack iteration, which requires heap state. The current design has `PaktUnitReader` as a class that internally manages the reader lifecycle. + +**Concern:** Async state machines can't hold `ref struct` fields. The `PaktUnitReader` must buffer at least one token's worth of state to bridge between its internal `PaktReader` and the async enumeration pattern. + +**Recommendation:** Accept this single-token bridge buffer as an implementation detail. The streaming invariant holds at the semantic level: callers still see one value at a time, and memory is O(nesting depth). The `ref struct PaktReader` remains available as the Tier 0 escape hatch for true zero-alloc synchronous scenarios. + +### Q3. Go: Scanner pattern — RESOLVED + +Use `iter.Seq[T]` with `sr.Err() error` checked after the loop. This is the scanner pattern, consistent with `bufio.Scanner` and idiomatic Go. + +### Q4. Go: Early break in pack iterators — RESOLVED + +When a caller breaks out of a `PackItems` loop, the iterator drains the remaining pack elements (skipping without deserializing) so the reader is positioned at the next statement. This is necessary to maintain the streaming invariant. + +### Q5. Both: Converter composition — RESOLVED + +Custom converters compose by delegating child values back to the framework: +- **Go:** `ReadAs[U](vr)` free function on `ValueReader` +- **.NET:** `context.ReadAs(ref reader)` on `PaktConvertContext` + +This enables a converter for `Config` to delegate its `Server` field to the framework (which may invoke another converter), without the parent converter needing to know about child converters. + +### Q6. .NET: Should ReadPack return IEnumerable or a custom type? + +`IEnumerable` is universal but boxes value types. A custom `PaktPackEnumerable` struct could avoid allocation. + +**Recommendation:** Return `IEnumerable` for simplicity. The per-element deserialization cost dwarfs enumerator allocation. For the async path, `IAsyncEnumerable` is required. + +### Q7. Map Pack Streaming + +Top-level map packs (`data: << 'a';1\n'b';2`) should be consumable through the same Tier 1 API. The pack iterator yields `MapEntry[K,V]` for map packs and `T` for list packs. The `Property.Type` tells the caller which kind of pack it is. + +**Go:** +```go +for stmt := range sr.Properties() { + if stmt.IsPack && stmt.Type.Kind() == TypeMap { + for entry := range PackItems[MapEntry[string, int]](sr) { + fmt.Printf("%s = %d\n", entry.Key, entry.Value) + } + } +} +``` + +**.NET:** +```csharp +if (reader.IsPack && reader.StatementType.IsMap) +{ + foreach (var entry in reader.ReadPack>()) + Console.WriteLine($"{entry.Key} = {entry.Value}"); +} +``` + +### Q8. Behavior for `any`/`object`/interface targets + +Since there is no dynamic document model, attempting to deserialize into `any` (Go) or `object` (.NET) should be an error. The caller must always provide a concrete target type. + +**Recommendation:** Return a clear error: "cannot deserialize into interface type; provide a concrete target type." + + + diff --git a/dotnet/src/Pakt/PaktException.cs b/dotnet/src/Pakt/PaktException.cs index 368d5e6..4fd82c9 100644 --- a/dotnet/src/Pakt/PaktException.cs +++ b/dotnet/src/Pakt/PaktException.cs @@ -11,17 +11,14 @@ public enum PaktErrorCode /// Input ends before a construct is complete. UnexpectedEof = 1, - /// Reserved (formerly duplicate_name; see spec §6.1). - DuplicateName = 2, - /// A value does not conform to its declared type. - TypeMismatch = 3, + TypeMismatch = 2, /// nil assigned to a non-nullable type. - NilNonNullable = 4, + NilNonNullable = 3, /// Lexical or grammatical error (catch-all). - Syntax = 5, + Syntax = 4, } /// diff --git a/dotnet/tests/Pakt.Tests/CoreTypeTests.cs b/dotnet/tests/Pakt.Tests/CoreTypeTests.cs index 0810345..84d1396 100644 --- a/dotnet/tests/Pakt.Tests/CoreTypeTests.cs +++ b/dotnet/tests/Pakt.Tests/CoreTypeTests.cs @@ -128,10 +128,9 @@ public void Constructor_WithInnerException_Preserves() [Theory] [InlineData(PaktErrorCode.UnexpectedEof, 1)] - [InlineData(PaktErrorCode.DuplicateName, 2)] - [InlineData(PaktErrorCode.TypeMismatch, 3)] - [InlineData(PaktErrorCode.NilNonNullable, 4)] - [InlineData(PaktErrorCode.Syntax, 5)] + [InlineData(PaktErrorCode.TypeMismatch, 2)] + [InlineData(PaktErrorCode.NilNonNullable, 3)] + [InlineData(PaktErrorCode.Syntax, 4)] public void ErrorCodes_MatchSpecValues(PaktErrorCode code, int expected) { Assert.Equal(expected, (int)code); diff --git a/encoding/bench_test.go b/encoding/bench_test.go index 4919e7a..8924bc9 100644 --- a/encoding/bench_test.go +++ b/encoding/bench_test.go @@ -23,7 +23,6 @@ import ( "bytes" "encoding/json" "fmt" - "io" "math/rand" "reflect" "strconv" @@ -156,7 +155,7 @@ func benchInitSmall() { Verbose: false, Label: "production", } - fields, err := StructFields(reflect.TypeOf(benchSmallVal)) + fields, err := ReflectStructFields(reflect.TypeOf(benchSmallVal)) if err != nil { panic(err) } @@ -361,7 +360,7 @@ func benchFSBuildEncFields(ds benchFSDataset) []benchEncField { } func benchGenerateFS(n int) (benchFSDataset, []byte, []byte) { - rng := rand.New(rand.NewSource(42)) + rng := rand.New(rand.NewSource(42)) //nolint:gosec // deterministic seed for reproducible benchmarks extensions := []string{".csv", ".parquet", ".json", ".log", ".tmp", ".idx"} subdirs := []string{"incoming", "archive", "staging", "reports", "temp", "indexes"} @@ -511,7 +510,7 @@ func BenchmarkPAKTUnmarshalSmall(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { var v benchSmallDoc - Unmarshal(data, &v) //nolint:errcheck + UnmarshalNewInto(data, &v) //nolint:errcheck } } @@ -718,7 +717,7 @@ func BenchmarkPAKTUnmarshalFS1K(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { var v benchFSDataset - Unmarshal(data, &v) //nolint:errcheck + UnmarshalNewInto(data, &v) //nolint:errcheck } } @@ -799,7 +798,7 @@ func BenchmarkPAKTUnmarshalFS10K(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { var v benchFSDataset - Unmarshal(data, &v) //nolint:errcheck + UnmarshalNewInto(data, &v) //nolint:errcheck } } @@ -886,23 +885,16 @@ func benchStreamPAKT(b *testing.B, data []byte) { b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { - dec := NewDecoder(bytes.NewReader(data)) - // Read header assigns into a struct, then stream pack elements. type header struct { Root string `pakt:"root"` Scanned string `pakt:"scanned"` Entries []benchFSEntry `pakt:"entries"` } - var h header - for dec.More() { - if err := dec.UnmarshalNext(&h); err != nil { - if err == io.EOF { - break - } - b.Fatal(err) - } + h, err := UnmarshalNew[header](data) + if err != nil { + b.Fatal(err) } - dec.Close() + _ = h } } @@ -920,3 +912,349 @@ func benchStreamJSON(b *testing.B, data []byte) { } } } + +// --------------------------------------------------------------------------- +// Financial Benchmark: Trade + Position data +// +// Domain: trade execution log with a map-pack of portfolio positions. +// Designed to stress non-string scalars (int, dec, bool, ts, uuid), +// atom sets, and embedded composites (list inside struct). +// --------------------------------------------------------------------------- + +type benchTrade struct { + Timestamp string `pakt:"timestamp" json:"timestamp"` + Ticker string `pakt:"ticker" json:"ticker"` + Side string `pakt:"side" json:"side"` + Quantity int64 `pakt:"quantity" json:"quantity"` + Price string `pakt:"price" json:"price"` // dec → string + Fees string `pakt:"fees" json:"fees"` // dec → string + Filled bool `pakt:"filled" json:"filled"` + Venue string `pakt:"venue" json:"venue"` + OrderID string `pakt:"order_id" json:"order_id"` + Tags []string `pakt:"tags" json:"tags"` +} + +type benchPosition struct { + Qty int64 `pakt:"qty" json:"qty"` + AvgCost string `pakt:"avg_cost" json:"avg_cost"` + UnrealizedPnl string `pakt:"unrealized_pnl" json:"unrealized_pnl"` + LastPrice string `pakt:"last_price" json:"last_price"` + Updated string `pakt:"updated" json:"updated"` +} + +type benchFinDataset struct { + Account string `pakt:"account" json:"account"` + AsOf string `pakt:"as_of" json:"as_of"` + Trades []benchTrade `pakt:"trades" json:"trades"` + Positions map[string]benchPosition `pakt:"positions" json:"positions"` +} + +var ( + benchFin1KPAKT []byte + benchFin1KJSON []byte + benchFin1KVal benchFinDataset + benchFin1KNDJSON []byte + + benchFin10KPAKT []byte + benchFin10KJSON []byte + benchFin10KVal benchFinDataset + benchFin10KNDJSON []byte +) + +func init() { + benchInitFin() +} + +func benchInitFin() { + benchFin1KVal, benchFin1KPAKT, benchFin1KJSON = benchGenerateFin(1000) + benchFin10KVal, benchFin10KPAKT, benchFin10KJSON = benchGenerateFin(10000) + benchFin1KNDJSON = benchGenerateNDJSON2(benchFin1KVal.Trades) + benchFin10KNDJSON = benchGenerateNDJSON2(benchFin10KVal.Trades) +} + +func benchGenerateNDJSON2[T any](items []T) []byte { + var buf bytes.Buffer + enc := json.NewEncoder(&buf) + for i := range items { + enc.Encode(items[i]) //nolint:errcheck + } + return buf.Bytes() +} + +func benchGenerateFin(n int) (benchFinDataset, []byte, []byte) { + rng := rand.New(rand.NewSource(77)) //nolint:gosec // deterministic seed for reproducible benchmarks + + tickers := []string{"AAPL", "GOOG", "MSFT", "AMZN", "NVDA", "META", "TSLA", "JPM", "V", "UNH", + "XOM", "JNJ", "PG", "MA", "HD", "CVX", "MRK", "ABBV", "PEP", "KO"} + venues := []string{"NYSE", "NASDAQ", "BATS", "IEX", "EDGX", "MEMX"} + tagPool := []string{"algo", "manual", "dark-pool", "pre-market", "post-market", "block", "sweep", "iceberg"} + + baseTime := time.Date(2026, 3, 1, 9, 30, 0, 0, time.FixedZone("EST", -5*3600)) + + trades := make([]benchTrade, n) + for i := 0; i < n; i++ { + ticker := tickers[rng.Intn(len(tickers))] + + side := "buy" + if rng.Float64() < 0.45 { + side = "sell" + } + + qty := int64(rng.Intn(9900) + 100) + priceDollars := rng.Intn(400) + 10 + priceCents := rng.Intn(100) + price := fmt.Sprintf("%d.%02d", priceDollars, priceCents) + + feesCents := rng.Intn(500) + 1 + fees := fmt.Sprintf("%d.%02d", feesCents/100, feesCents%100) + + filled := rng.Float64() < 0.92 + venue := venues[rng.Intn(len(venues))] + + orderID := fmt.Sprintf("%08x-%04x-%04x-%04x-%012x", + rng.Uint32(), rng.Uint32()&0xFFFF, 0x4000|rng.Uint32()&0x0FFF, + 0x8000|rng.Uint32()&0x3FFF, rng.Int63()&0xFFFFFFFFFFFF) + + // 1-3 tags per trade + numTags := rng.Intn(3) + 1 + tags := make([]string, numTags) + for j := range numTags { + tags[j] = tagPool[rng.Intn(len(tagPool))] + } + + offset := time.Duration(i*3+rng.Intn(3)) * time.Second + ts := baseTime.Add(offset) + + trades[i] = benchTrade{ + Timestamp: ts.Format(time.RFC3339), + Ticker: ticker, + Side: side, + Quantity: qty, + Price: price, + Fees: fees, + Filled: filled, + Venue: venue, + OrderID: orderID, + Tags: tags, + } + } + + // Build positions from unique tickers seen + positions := make(map[string]benchPosition) + for _, t := range tickers { + priceDollars := rng.Intn(400) + 10 + priceCents := rng.Intn(100) + costDollars := rng.Intn(400) + 10 + costCents := rng.Intn(100) + pnl := (priceDollars - costDollars) * (rng.Intn(5000) + 100) + + positions[t] = benchPosition{ + Qty: int64(rng.Intn(50000) + 100), + AvgCost: fmt.Sprintf("%d.%02d", costDollars, costCents), + UnrealizedPnl: fmt.Sprintf("%d.%02d", pnl, rng.Intn(100)), + LastPrice: fmt.Sprintf("%d.%02d", priceDollars, priceCents), + Updated: baseTime.Add(time.Duration(n*3) * time.Second).Format(time.RFC3339), + } + } + + val := benchFinDataset{ + Account: "ACCT-7734-PRIME", + AsOf: baseTime.Add(time.Duration(n*3) * time.Second).Format(time.RFC3339), + Trades: trades, + Positions: positions, + } + + // Build PAKT + var pb strings.Builder + pb.WriteString("account:str = 'ACCT-7734-PRIME'\n") + fmt.Fprintf(&pb, "as_of:ts = %s\n", val.AsOf) + + // Trades as list pack + pb.WriteString("trades:[{timestamp:ts, ticker:str, side:|buy, sell|, quantity:int, price:dec, fees:dec, filled:bool, venue:str, order_id:uuid, tags:[str]}] <<\n") + for i, tr := range trades { + if i > 0 { + pb.WriteByte('\n') + } + boolStr := "false" + if tr.Filled { + boolStr = "true" + } + // Build tags list + var tagBuf strings.Builder + tagBuf.WriteByte('[') + for j, tag := range tr.Tags { + if j > 0 { + tagBuf.WriteString(", ") + } + fmt.Fprintf(&tagBuf, "'%s'", tag) + } + tagBuf.WriteByte(']') + + fmt.Fprintf(&pb, " { %s, '%s', |%s, %d, %s, %s, %s, '%s', %s, %s }", + tr.Timestamp, tr.Ticker, tr.Side, tr.Quantity, tr.Price, tr.Fees, + boolStr, tr.Venue, tr.OrderID, tagBuf.String()) + } + pb.WriteString("\n") + + // Positions as map pack + pb.WriteString("positions: <<\n") + first := true + for ticker, pos := range positions { + if !first { + pb.WriteByte('\n') + } + first = false + fmt.Fprintf(&pb, " '%s' ; { %d, %s, %s, %s, %s }", + ticker, pos.Qty, pos.AvgCost, pos.UnrealizedPnl, pos.LastPrice, pos.Updated) + } + pb.WriteString("\n") + + jsonBytes, _ := json.Marshal(val) + return val, []byte(pb.String()), jsonBytes +} + +// --------------------------------------------------------------------------- +// Financial Benchmarks — 1K trades +// --------------------------------------------------------------------------- + +func BenchmarkPAKTDecodeFin1K(b *testing.B) { + runPAKTDecodeBenchmark(b, benchFin1KPAKT) +} + +func BenchmarkJSONDecodeFin1K(b *testing.B) { + data := benchFin1KJSON + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + var v map[string]any + json.Unmarshal(data, &v) //nolint:errcheck + } +} + +func BenchmarkPAKTUnmarshalFin1K(b *testing.B) { + data := benchFin1KPAKT + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + var v benchFinDataset + UnmarshalNewInto(data, &v) //nolint:errcheck + } +} + +func BenchmarkJSONUnmarshalFin1K(b *testing.B) { + data := benchFin1KJSON + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + var v benchFinDataset + json.Unmarshal(data, &v) //nolint:errcheck + } +} + +// --------------------------------------------------------------------------- +// Financial Benchmarks — 10K trades +// --------------------------------------------------------------------------- + +func BenchmarkPAKTDecodeFin10K(b *testing.B) { + runPAKTDecodeBenchmark(b, benchFin10KPAKT) +} + +func BenchmarkJSONDecodeFin10K(b *testing.B) { + data := benchFin10KJSON + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + var v map[string]any + json.Unmarshal(data, &v) //nolint:errcheck + } +} + +func BenchmarkPAKTUnmarshalFin10K(b *testing.B) { + data := benchFin10KPAKT + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + var v benchFinDataset + UnmarshalNewInto(data, &v) //nolint:errcheck + } +} + +func BenchmarkJSONUnmarshalFin10K(b *testing.B) { + data := benchFin10KJSON + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + var v benchFinDataset + json.Unmarshal(data, &v) //nolint:errcheck + } +} + +// --------------------------------------------------------------------------- +// Financial Benchmarks — Streaming (one trade at a time) +// --------------------------------------------------------------------------- + +func BenchmarkPAKTStreamFin1K(b *testing.B) { + data := benchFin1KPAKT + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + sr := NewUnitReaderFromBytes(data) + for stmt := range sr.Properties() { + if stmt.Name == "trades" && stmt.IsPack { + for trade := range PackItems[benchTrade](sr) { + _ = trade + } + } + } + sr.Close() + } +} + +func BenchmarkPAKTStreamFin10K(b *testing.B) { + data := benchFin10KPAKT + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + sr := NewUnitReaderFromBytes(data) + for stmt := range sr.Properties() { + if stmt.Name == "trades" && stmt.IsPack { + for trade := range PackItems[benchTrade](sr) { + _ = trade + } + } + } + sr.Close() + } +} + +func BenchmarkJSONStreamFin1K(b *testing.B) { + data := benchFin1KNDJSON + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + dec := json.NewDecoder(bytes.NewReader(data)) + for dec.More() { + var trade benchTrade + if err := dec.Decode(&trade); err != nil { + b.Fatal(err) + } + _ = trade + } + } +} + +func BenchmarkJSONStreamFin10K(b *testing.B) { + data := benchFin10KNDJSON + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + dec := json.NewDecoder(bytes.NewReader(data)) + for dec.More() { + var trade benchTrade + if err := dec.Decode(&trade); err != nil { + b.Fatal(err) + } + _ = trade + } + } +} diff --git a/encoding/bytesource.go b/encoding/bytesource.go index 833a9b7..fe5f7f7 100644 --- a/encoding/bytesource.go +++ b/encoding/bytesource.go @@ -6,8 +6,6 @@ import ( ) // byteSource abstracts the byte-level input operations used by the reader. -// Two implementations exist: bufioSource (wrapping bufio.Reader for streaming) -// and bytesSource (operating directly on []byte for Unmarshal). type byteSource interface { // PeekByte returns the next byte without consuming it. PeekByte() (byte, error) @@ -54,54 +52,3 @@ func (s *bufioSource) Discard(n int) { func (s *bufioSource) Reset(r io.Reader) { s.br.Reset(r) } - -// bytesSource operates directly on a []byte slice with zero buffering overhead. -type bytesSource struct { - data []byte - off int -} - -func newBytesSource(data []byte) *bytesSource { - return &bytesSource{data: data} -} - -func (s *bytesSource) PeekByte() (byte, error) { - if s.off >= len(s.data) { - return 0, io.EOF - } - return s.data[s.off], nil -} - -func (s *bytesSource) ReadByte() (byte, error) { - if s.off >= len(s.data) { - return 0, io.EOF - } - b := s.data[s.off] - s.off++ - return b, nil -} - -func (s *bytesSource) UnreadByte() error { - if s.off > 0 { - s.off-- - } - return nil -} - -func (s *bytesSource) Peek(n int) ([]byte, error) { - remaining := len(s.data) - s.off - if remaining <= 0 { - return nil, io.EOF - } - if n > remaining { - return s.data[s.off:], io.EOF - } - return s.data[s.off : s.off+n], nil -} - -func (s *bytesSource) Discard(n int) { - s.off += n - if s.off > len(s.data) { - s.off = len(s.data) - } -} diff --git a/encoding/converter.go b/encoding/converter.go new file mode 100644 index 0000000..37624f2 --- /dev/null +++ b/encoding/converter.go @@ -0,0 +1,138 @@ +package encoding + +import ( + "reflect" +) + +// ValueConverter converts PAKT values to/from a specific Go type. +// Implementations receive a scoped [ValueReader] positioned at the value, +// not the full [UnitReader]. +type ValueConverter[T any] interface { + // FromPakt reads a PAKT value and returns T. + // The ValueReader is positioned at the start of the value. + // The converter MUST consume exactly one complete value. + FromPakt(vr *ValueReader, paktType Type) (T, error) + + // ToPakt writes a value of type T to the encoder. + ToPakt(enc *Encoder, value T) error +} + +// RegisterConverter registers a [ValueConverter] for type T. +// When deserializing into T, the converter is used instead of the +// default reflection-based mapping. +func RegisterConverter[T any](c ValueConverter[T]) Option { + return func(o *options) { + reg := o.ensureConverters() + var zero T + reg.byType[reflect.TypeOf(&zero).Elem()] = c + } +} + +// ValueReader is a scoped view of the stream, positioned at a single value. +// It provides read access for scalars and navigation for composites. +// A ValueReader is only valid for the duration of the converter call. +type ValueReader struct { + sr *UnitReader + event Event // the initial event for this value +} + +// StringValue returns the scalar string value. +func (vr *ValueReader) StringValue() (string, error) { + if vr.event.Kind != EventScalarValue { + return "", &DeserializeError{Pos: vr.event.Pos, Message: "not a scalar value"} + } + return vr.event.ValueString(), nil +} + +// IntValue returns the scalar integer value. +func (vr *ValueReader) IntValue() (int64, error) { + if vr.event.Kind != EventScalarValue { + return 0, &DeserializeError{Pos: vr.event.Pos, Message: "not a scalar value"} + } + return parseIntLiteral(vr.event.ValueString()) +} + +// FloatValue returns the scalar float value. +func (vr *ValueReader) FloatValue() (float64, error) { + if vr.event.Kind != EventScalarValue { + return 0, &DeserializeError{Pos: vr.event.Pos, Message: "not a scalar value"} + } + return parseFloatLiteral(vr.event.ValueString()) +} + +// BoolValue returns the scalar boolean value. +func (vr *ValueReader) BoolValue() (bool, error) { + if vr.event.Kind != EventScalarValue { + return false, &DeserializeError{Pos: vr.event.Pos, Message: "not a scalar value"} + } + switch string(vr.event.Value) { + case "true": + return true, nil + case "false": + return false, nil + default: + return false, &DeserializeError{Pos: vr.event.Pos, Message: "invalid bool: " + vr.event.ValueString()} + } +} + +// DecValue returns the scalar decimal value as a string (preserving precision). +func (vr *ValueReader) DecValue() (string, error) { + if vr.event.Kind != EventScalarValue { + return "", &DeserializeError{Pos: vr.event.Pos, Message: "not a scalar value"} + } + return vr.event.ValueString(), nil +} + +// BytesValue returns the scalar binary value as decoded bytes. +func (vr *ValueReader) BytesValue() ([]byte, error) { + if vr.event.Kind != EventScalarValue { + return nil, &DeserializeError{Pos: vr.event.Pos, Message: "not a scalar value"} + } + // The event value is hex-encoded for bin + target := reflect.New(reflect.TypeOf([]byte{})).Elem() + if err := setBinFromEvent(target, vr.event.ValueString()); err != nil { + return nil, err + } + return target.Bytes(), nil +} + +// IsNil returns true if the current value is nil. +func (vr *ValueReader) IsNil() bool { + return vr.event.Kind == EventScalarValue && vr.event.IsNilValue() +} + +// Skip consumes and discards the current value. +func (vr *ValueReader) Skip() error { + return skipValueEvent(vr.sr, vr.event) +} + +// Err returns the UnitReader's accumulated error. +func (vr *ValueReader) Err() error { + return vr.sr.Err() +} + +// ReadAs deserializes the current child value using the framework's +// type mapping, converters, and options. This is how converters compose: +// they delegate child values back to the framework. +func ReadAs[T any](vr *ValueReader) (T, error) { + // Read the next event from the stream for the child value. + ev, err := vr.sr.nextEvent() + if err != nil { + var zero T + return zero, err + } + + var val T + target := reflect.ValueOf(&val).Elem() + if ev.Kind == EventScalarValue && ev.IsNilValue() { + if err := setNil(target); err != nil { + return val, err + } + return val, nil + } + target = allocPtr(target) + if err := handleValueEvent(vr.sr, ev, target); err != nil { + return val, err + } + return val, nil +} diff --git a/encoding/converter_test.go b/encoding/converter_test.go new file mode 100644 index 0000000..fef159f --- /dev/null +++ b/encoding/converter_test.go @@ -0,0 +1,348 @@ +package encoding + +import ( + "fmt" + "strings" + "testing" +) + +// --- test converter types --- + +type Celsius float64 + +type celsiusConverter struct{} + +func (c celsiusConverter) FromPakt(vr *ValueReader, pt Type) (Celsius, error) { + f, err := vr.FloatValue() + if err != nil { + return 0, err + } + return Celsius(f), nil +} + +func (c celsiusConverter) ToPakt(enc *Encoder, v Celsius) error { + return fmt.Errorf("ToPakt not implemented") +} + +// --- tests --- + +func TestRegisterConverterAndReadValue(t *testing.T) { + input := "temp:float = 3.65e1\n" + sr := NewUnitReader(strings.NewReader(input), + RegisterConverter[Celsius](celsiusConverter{})) + defer sr.Close() + + for stmt := range sr.Properties() { + if stmt.Name != "temp" { + t.Fatalf("expected 'temp', got %q", stmt.Name) + } + val, err := ReadValue[Celsius](sr) + if err != nil { + t.Fatal(err) + } + if val != Celsius(36.5) { + t.Errorf("expected 36.5, got %v", val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestValueReaderStringValue(t *testing.T) { + tests := []struct { + name string + input string + want string + wantErr bool + }{ + {"valid", "msg:str = 'hello'\n", "hello", false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + sr := NewUnitReader(strings.NewReader(tt.input), + RegisterConverter[string](stringViaVR{})) + defer sr.Close() + + for range sr.Properties() { + val, err := ReadValue[string](sr) + if (err != nil) != tt.wantErr { + t.Fatalf("err=%v, wantErr=%v", err, tt.wantErr) + } + if val != tt.want { + t.Errorf("got %q, want %q", val, tt.want) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } + }) + } +} + +type stringViaVR struct{} + +func (s stringViaVR) FromPakt(vr *ValueReader, pt Type) (string, error) { + return vr.StringValue() +} +func (s stringViaVR) ToPakt(enc *Encoder, v string) error { return nil } + +func TestValueReaderIntValue(t *testing.T) { + sr := NewUnitReader(strings.NewReader("n:int = 42\n"), + RegisterConverter[int64](intViaVR{})) + defer sr.Close() + + for range sr.Properties() { + val, err := ReadValue[int64](sr) + if err != nil { + t.Fatal(err) + } + if val != 42 { + t.Errorf("got %d, want 42", val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +type intViaVR struct{} + +func (iv intViaVR) FromPakt(vr *ValueReader, pt Type) (int64, error) { + return vr.IntValue() +} +func (iv intViaVR) ToPakt(enc *Encoder, v int64) error { return nil } + +func TestValueReaderFloatValue(t *testing.T) { + sr := NewUnitReader(strings.NewReader("rate:float = 2.5e0\n"), + RegisterConverter[float64](floatViaVR{})) + defer sr.Close() + + for range sr.Properties() { + val, err := ReadValue[float64](sr) + if err != nil { + t.Fatal(err) + } + if val != 2.5 { + t.Errorf("got %f, want 2.5", val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +type floatViaVR struct{} + +func (fv floatViaVR) FromPakt(vr *ValueReader, pt Type) (float64, error) { + return vr.FloatValue() +} +func (fv floatViaVR) ToPakt(enc *Encoder, v float64) error { return nil } + +func TestValueReaderBoolValue(t *testing.T) { + tests := []struct { + name string + input string + want bool + wantErr bool + }{ + {"true", "flag:bool = true\n", true, false}, + {"false", "flag:bool = false\n", false, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + sr := NewUnitReader(strings.NewReader(tt.input), + RegisterConverter[bool](boolViaVR{})) + defer sr.Close() + + for range sr.Properties() { + val, err := ReadValue[bool](sr) + if (err != nil) != tt.wantErr { + t.Fatalf("err=%v, wantErr=%v", err, tt.wantErr) + } + if val != tt.want { + t.Errorf("got %v, want %v", val, tt.want) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } + }) + } +} + +type boolViaVR struct{} + +func (bv boolViaVR) FromPakt(vr *ValueReader, pt Type) (bool, error) { + return vr.BoolValue() +} +func (bv boolViaVR) ToPakt(enc *Encoder, v bool) error { return nil } + +func TestValueReaderDecValue(t *testing.T) { + sr := NewUnitReader(strings.NewReader("price:dec = 19.99\n"), + RegisterConverter[string](decViaVR{})) + defer sr.Close() + + for range sr.Properties() { + val, err := ReadValue[string](sr) + if err != nil { + t.Fatal(err) + } + if val != "19.99" { + t.Errorf("got %q, want '19.99'", val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +type decViaVR struct{} + +func (dv decViaVR) FromPakt(vr *ValueReader, pt Type) (string, error) { + return vr.DecValue() +} +func (dv decViaVR) ToPakt(enc *Encoder, v string) error { return nil } + +func TestValueReaderBytesValue(t *testing.T) { + // Use hex-encoded binary + sr := NewUnitReader(strings.NewReader("data:bin = x'48454c4c4f'\n"), + RegisterConverter[[]byte](bytesViaVR{})) + defer sr.Close() + + for range sr.Properties() { + val, err := ReadValue[[]byte](sr) + if err != nil { + t.Fatal(err) + } + if string(val) != "HELLO" { + t.Errorf("got %q, want 'HELLO'", val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +type bytesViaVR struct{} + +func (bv bytesViaVR) FromPakt(vr *ValueReader, pt Type) ([]byte, error) { + return vr.BytesValue() +} +func (bv bytesViaVR) ToPakt(enc *Encoder, v []byte) error { return nil } + +func TestValueReaderIsNil(t *testing.T) { + // Test IsNil returns false for non-nil values (nil values are intercepted before converter) + sr := NewUnitReader(strings.NewReader("label:str = 'hello'\n"), + RegisterConverter[string](nilAndErrCheckVR{})) + defer sr.Close() + + for range sr.Properties() { + val, err := ReadValue[string](sr) + if err != nil { + t.Fatal(err) + } + if val != "hello" { + t.Errorf("expected 'hello', got %q", val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +type nilAndErrCheckVR struct{} + +func (n nilAndErrCheckVR) FromPakt(vr *ValueReader, pt Type) (string, error) { + if vr.IsNil() { + return "", nil + } + // Also exercise Err() + if vr.Err() != nil { + return "", vr.Err() + } + return vr.StringValue() +} +func (n nilAndErrCheckVR) ToPakt(enc *Encoder, v string) error { return nil } + +func TestValueReaderBoolValueInvalidLiteral(t *testing.T) { + // Force a converter that calls BoolValue on a non-boolean string + sr := NewUnitReader(strings.NewReader("flag:str = 'notbool'\n"), + RegisterConverter[bool](boolViaVR{})) + defer sr.Close() + + for range sr.Properties() { + _, err := ReadValue[bool](sr) + if err == nil { + t.Fatal("expected error for invalid bool literal") + } + } +} + +func TestValueReaderStringValueOnNonScalar(t *testing.T) { + // Converter receives a struct start event, StringValue should error + sr := NewUnitReader(strings.NewReader("s:{x:int} = {1}\n"), + RegisterConverter[dummy](structStringVR{})) + defer sr.Close() + + for range sr.Properties() { + _, err := ReadValue[dummy](sr) + if err == nil { + t.Fatal("expected error calling StringValue on non-scalar") + } + } +} + +type structStringVR struct{} + +func (sv structStringVR) FromPakt(vr *ValueReader, pt Type) (dummy, error) { + _, err := vr.StringValue() + return dummy{}, err +} +func (sv structStringVR) ToPakt(enc *Encoder, v dummy) error { return nil } + +type dummy struct{} + +// Test ReadAs — delegated deserialization from within a converter +type Wrapper struct { + Inner string +} + +func TestReadAsFromConverter(t *testing.T) { + // The struct has 2 fields. The converter reads the struct start, then delegates each field. + input := "data:{a:str, b:str} = {'hello', 'world'}\n" + sr := NewUnitReader(strings.NewReader(input), + RegisterConverter[Wrapper](structWrapperConverter{})) + defer sr.Close() + + for range sr.Properties() { + val, err := ReadValue[Wrapper](sr) + if err != nil { + t.Fatal(err) + } + if val.Inner != "hello+world" { + t.Errorf("got %q, want 'hello+world'", val.Inner) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +type structWrapperConverter struct{} + +func (sw structWrapperConverter) FromPakt(vr *ValueReader, pt Type) (Wrapper, error) { + // We're positioned at StructStart. Read two string children via ReadAs. + a, err := ReadAs[string](vr) + if err != nil { + return Wrapper{}, err + } + b, err := ReadAs[string](vr) + if err != nil { + return Wrapper{}, err + } + // Consume the struct end + _ = vr.Skip() + return Wrapper{Inner: a + "+" + b}, nil +} +func (sw structWrapperConverter) ToPakt(enc *Encoder, v Wrapper) error { return nil } diff --git a/encoding/decoder.go b/encoding/decoder.go index ff84043..34a2794 100644 --- a/encoding/decoder.go +++ b/encoding/decoder.go @@ -1,25 +1,15 @@ package encoding import ( - "fmt" "io" - "reflect" ) // Decoder reads a PAKT document from an input source and emits [Event] values -// one at a time, similar to [encoding/json.Decoder]. An optional spec -// projection may be applied via [Decoder.SetSpec] to filter and validate the -// source against a .spec.pakt definition. +// one at a time, similar to [encoding/json.Decoder]. type Decoder struct { r *reader sm *stateMachine - spec *Spec done bool // true after document fully parsed - - // pack unmarshal state - inPack bool // true while inside a pack statement - packList *ListType - packMap *MapType } // NewDecoder returns a Decoder that reads PAKT input from r. @@ -31,24 +21,6 @@ func NewDecoder(r io.Reader) *Decoder { } } -// SetSpec applies a spec projection to the decoder. The spec is parsed from r, -// which should contain a valid .spec.pakt document. Fields matching the spec -// are parsed and emitted; unmatched fields are skipped. Type mismatches between -// the document and spec produce an error. -// -// NOTE: The spec API is experimental and its contract may evolve. Currently, -// specs act as advisory filters — they control which fields are parsed and -// validate types, but do not enforce presence of fields. Use pointer struct -// fields to detect absent values. -func (d *Decoder) SetSpec(r io.Reader) error { - spec, err := ParseSpec(r) - if err != nil { - return err - } - d.spec = spec - return nil -} - // Close releases internal resources (such as pooled buffers) back to their // pools. Callers should defer Close after creating a Decoder. It is safe to // call Close multiple times. @@ -66,15 +38,7 @@ func (d *Decoder) Close() { // // On each call it returns the next [Event] in document order. When the // document is fully consumed, it returns a zero Event and [io.EOF]. -// If a spec is active, unmatched fields are silently skipped. func (d *Decoder) Decode() (Event, error) { - if d.spec != nil { - return d.decodeWithSpec() - } - return d.decodeDirect() -} - -func (d *Decoder) decodeDirect() (Event, error) { if d.done { return Event{}, io.EOF } @@ -96,195 +60,3 @@ func (d *Decoder) decodeDirect() (Event, error) { return ev, nil } - -// UnmarshalNext reads the next top-level statement from the PAKT source and -// stores the result in the value pointed to by v. It uses a visitor-driven -// path that bypasses Event creation, writing parsed values directly into -// struct fields. -// -// For assignment statements (name:type = value), v must be a pointer to a -// struct with a matching field. For pack statements (name:type <<), behavior -// depends on the target type: -// - Struct target: the pack is unmarshalled in full into a matching slice or -// map field. Use this when consuming an entire pack at once. -// - Direct target (e.g., pointer to a scalar or value type): the first call -// reads the pack header and the first element; subsequent calls each read -// one element. Use [Decoder.More] to drive the loop. -// -// Returns [io.EOF] when no more statements remain. -func (d *Decoder) UnmarshalNext(v any) error { - if d.done { - return io.EOF - } - if d.sm == nil { - d.sm = newStateMachine(d.r) - } - - rv := reflect.ValueOf(v) - if rv.Kind() != reflect.Pointer || rv.IsNil() { - return &ParseError{Message: "pakt: UnmarshalNext requires a non-nil pointer"} - } - rv = rv.Elem() - - // If we're mid-pack, read the next pack element. - if d.inPack { - return d.unmarshalNextPackElement(rv) - } - - // Read the next statement header. - var h statementHeader - var err error - - if d.spec != nil { - h, err = d.nextMatchedHeader() - } else { - d.r.skipInsignificant(true) - h, err = d.sm.readStatementHeader() - } - if err != nil { - if err == io.EOF { - d.done = true - return io.EOF - } - d.done = true - return err - } - - if h.pack { - // Enter pack mode. - d.inPack = true - if h.typ.List != nil { - d.packList = h.typ.List - } else { - d.packMap = h.typ.Map - } - // For a struct target, try to set the pack into a matching field. - if rv.Kind() == reflect.Struct { - return d.unmarshalPackIntoField(h, rv) - } - // For a direct target, read one element at a time. - return d.unmarshalNextPackElement(rv) - } - - // Assignment statement — unmarshal into matching struct field or directly. - if rv.Kind() == reflect.Struct { - info, cerr := cachedStructFields(rv.Type()) - if cerr != nil { - return cerr - } - fi, ok := info.fieldMap[h.name] - if !ok { - // Skip unknown statement body. - return d.r.skipStatementBody(h) - } - d.r.skipWS() - return d.sm.unmarshalValue(h.typ, rv.Field(fi.Index)) - } - - // Direct target — unmarshal the value into it. - d.r.skipWS() - return d.sm.unmarshalValue(h.typ, rv) -} - -// More reports whether there are more elements to read. When inside a pack -// statement, it reports whether additional pack elements remain. When at -// the top level, it reports whether more statements exist. -func (d *Decoder) More() bool { - if d.done { - return false - } - if d.inPack { - d.r.skipInsignificant(true) - b, err := d.r.peekByte() - if err != nil { - d.inPack = false - d.packList = nil - d.packMap = nil - return false - } - // NUL byte terminates the pack (end-of-unit per spec §10.1). - if b == 0 || !d.r.canStartValueInPack(b) { - d.inPack = false - d.packList = nil - d.packMap = nil - return false - } - return true - } - d.r.skipInsignificant(true) - b, err := d.r.peekByte() - if err != nil { - return false - } - // NUL byte at top level is end-of-unit (spec §10.1). - return b != 0 -} - -func (d *Decoder) nextMatchedHeader() (statementHeader, error) { - for { - d.r.skipInsignificant(true) - h, err := d.sm.readStatementHeader() - if err != nil { - return h, err - } - specType, ok := d.spec.Fields[h.name] - if !ok { - if err := d.r.skipStatementBody(h); err != nil { - return statementHeader{}, err - } - continue - } - if specType.String() != h.typ.String() { - return statementHeader{}, Wrapf(h.pos, ErrTypeMismatch, - "spec field %q expected type %s, got %s", h.name, specType.String(), h.typ.String()) - } - return h, nil - } -} - -func (d *Decoder) unmarshalPackIntoField(h statementHeader, rv reflect.Value) error { - info, err := cachedStructFields(rv.Type()) - if err != nil { - return err - } - fi, ok := info.fieldMap[h.name] - if !ok { - // Skip entire pack. - d.inPack = false - d.packList = nil - d.packMap = nil - return d.r.skipPackBody(h.typ) - } - target := rv.Field(fi.Index) - if d.packList != nil { - err = d.sm.unmarshalPackList(d.packList, target) - } else { - err = d.sm.unmarshalPackMap(d.packMap, target) - } - d.inPack = false - d.packList = nil - d.packMap = nil - return err -} - -func (d *Decoder) unmarshalNextPackElement(rv reflect.Value) error { - d.r.skipInsignificant(true) - if d.packList != nil { - err := d.sm.unmarshalValue(d.packList.Element, rv) - if err != nil { - return err - } - d.r.readSep() //nolint:errcheck - return nil - } - if d.packMap != nil { - // For map packs, caller gets key-value pairs. - err := d.sm.unmarshalValue(d.packMap.Value, rv) - if err != nil { - return err - } - d.r.readSep() //nolint:errcheck - return nil - } - return fmt.Errorf("pakt: not in a pack") -} diff --git a/encoding/decoder_test.go b/encoding/decoder_test.go index 3bde67e..b4992a6 100644 --- a/encoding/decoder_test.go +++ b/encoding/decoder_test.go @@ -2,6 +2,7 @@ package encoding import ( "io" + "slices" "strings" "testing" ) @@ -22,6 +23,8 @@ func decodeAll(t *testing.T, input string) []Event { if err != nil { t.Fatalf("Decode(): %v", err) } + // Clone borrowed Value bytes so they survive across Decode calls. + ev.Value = slices.Clone(ev.Value) events = append(events, ev) } return events @@ -39,7 +42,7 @@ func TestDecodeSimpleStr(t *testing.T) { if events[0].Kind != EventAssignStart || events[0].Name != "name" { t.Fatalf("event[0] = %v", events[0]) } - if events[1].Kind != EventScalarValue || events[1].Value != "hello" { + if events[1].Kind != EventScalarValue || events[1].ValueString() != "hello" { t.Fatalf("event[1] = %v", events[1]) } if events[1].Name != "name" { @@ -55,8 +58,8 @@ func TestDecodeSimpleInt(t *testing.T) { if len(events) != 3 { t.Fatalf("expected 3 events, got %d", len(events)) } - if events[1].Value != "42" { - t.Fatalf("value = %q, want %q", events[1].Value, "42") + if events[1].ValueString() != "42" { + t.Fatalf("value = %q, want %q", events[1].ValueString(), "42") } } @@ -65,7 +68,7 @@ func TestDecodeSimpleBool(t *testing.T) { if len(events) != 3 { t.Fatalf("expected 3 events, got %d", len(events)) } - if events[1].Value != "true" { + if events[1].ValueString() != "true" { t.Fatalf("value = %q", events[1].Value) } } @@ -107,10 +110,10 @@ func TestDecodeStructAssignment(t *testing.T) { if events[1].Kind != EventStructStart { t.Fatalf("event[1] = %v", events[1]) } - if events[2].Kind != EventScalarValue || events[2].Name != "host" || events[2].Value != "localhost" { + if events[2].Kind != EventScalarValue || events[2].Name != "host" || events[2].ValueString() != "localhost" { t.Fatalf("event[2] = %v", events[2]) } - if events[3].Kind != EventScalarValue || events[3].Name != "port" || events[3].Value != "8080" { + if events[3].Kind != EventScalarValue || events[3].Name != "port" || events[3].ValueString() != "8080" { t.Fatalf("event[3] = %v", events[3]) } if events[4].Kind != EventStructEnd { @@ -128,7 +131,7 @@ func TestDecodeTupleAssignment(t *testing.T) { if len(events) != 7 { t.Fatalf("expected 7 events, got %d: %v", len(events), events) } - if events[2].Name != "[0]" || events[2].Value != "1" { + if events[2].Name != "[0]" || events[2].ValueString() != "1" { t.Fatalf("event[2] = %v", events[2]) } } @@ -162,10 +165,10 @@ func TestDecodeDuplicateRootNamePreserved(t *testing.T) { if len(events) != 6 { t.Fatalf("expected 6 events for two duplicate statements, got %d: %v", len(events), events) } - if events[0].Name != "name" || events[1].Value != "a" { + if events[0].Name != "name" || events[1].ValueString() != "a" { t.Fatalf("first statement not preserved: %v", events[:3]) } - if events[3].Name != "name" || events[4].Value != "b" { + if events[3].Name != "name" || events[4].ValueString() != "b" { t.Fatalf("second statement not preserved: %v", events[3:]) } } @@ -237,7 +240,7 @@ func TestDecodeBlockInlineEquivalence(t *testing.T) { if inlineEvents[i].Name != blockEvents[i].Name { t.Errorf("event[%d] name: inline=%q, block=%q", i, inlineEvents[i].Name, blockEvents[i].Name) } - if inlineEvents[i].Value != blockEvents[i].Value { + if inlineEvents[i].ValueString() != blockEvents[i].ValueString() { t.Errorf("event[%d] value: inline=%q, block=%q", i, inlineEvents[i].Value, blockEvents[i].Value) } if inlineEvents[i].ScalarType != blockEvents[i].ScalarType { @@ -260,7 +263,7 @@ func TestDecodeTupleBlockInlineEquivalence(t *testing.T) { if inlineEvents[i].Kind != blockEvents[i].Kind { t.Errorf("event[%d] kind: inline=%s, block=%s", i, inlineEvents[i].Kind, blockEvents[i].Kind) } - if inlineEvents[i].Value != blockEvents[i].Value { + if inlineEvents[i].ValueString() != blockEvents[i].ValueString() { t.Errorf("event[%d] value: inline=%q, block=%q", i, inlineEvents[i].Value, blockEvents[i].Value) } } @@ -321,8 +324,8 @@ func TestDecodeNullableScalar(t *testing.T) { if len(events) != 3 { t.Fatalf("expected 3 events, got %d: %v", len(events), events) } - if events[1].Value != "nil" { - t.Fatalf("value = %q, want %q", events[1].Value, "nil") + if events[1].ValueString() != "nil" { + t.Fatalf("value = %q, want %q", events[1].ValueString(), "nil") } if events[1].ScalarType != TypeStr { t.Fatalf("scalarType = %s, want TypeStr", events[1].ScalarType) @@ -335,8 +338,8 @@ func TestDecodeNullableWithValue(t *testing.T) { if len(events) != 3 { t.Fatalf("expected 3 events, got %d", len(events)) } - if events[1].Value != "hello" { - t.Fatalf("value = %q, want %q", events[1].Value, "hello") + if events[1].ValueString() != "hello" { + t.Fatalf("value = %q, want %q", events[1].ValueString(), "hello") } } @@ -376,7 +379,7 @@ func TestDecodeEventStream(t *testing.T) { if ev.ScalarType != exp.scalarType { t.Errorf("event[%d]: scalarType=%s, want %s", i, ev.ScalarType, exp.scalarType) } - if ev.Value != exp.value { + if ev.ValueString() != exp.value { t.Errorf("event[%d]: value=%q, want %q", i, ev.Value, exp.value) } } @@ -392,8 +395,8 @@ func TestDecodeAtomAssignment(t *testing.T) { if len(events) != 3 { t.Fatalf("expected 3 events, got %d: %v", len(events), events) } - if events[1].Value != "active" { - t.Fatalf("value = %q, want %q", events[1].Value, "active") + if events[1].ValueString() != "active" { + t.Fatalf("value = %q, want %q", events[1].ValueString(), "active") } } @@ -407,8 +410,8 @@ func TestDecodeLeadingDotDecimal(t *testing.T) { if len(events) != 3 { t.Fatalf("expected 3 events, got %d: %v", len(events), events) } - if events[1].Value != ".99" { - t.Fatalf("value = %q, want %q", events[1].Value, ".99") + if events[1].ValueString() != ".99" { + t.Fatalf("value = %q, want %q", events[1].ValueString(), ".99") } } @@ -446,7 +449,7 @@ func TestDecodeNoWhitespaceAroundEquals(t *testing.T) { if len(events) != 3 { t.Fatalf("expected 3 events, got %d", len(events)) } - if events[1].Value != "42" { + if events[1].ValueString() != "42" { t.Fatalf("value = %q", events[1].Value) } } diff --git a/encoding/doc.go b/encoding/doc.go index 3577dd3..81d74b9 100644 --- a/encoding/doc.go +++ b/encoding/doc.go @@ -1,14 +1,12 @@ // Package encoding implements the canonical Go library for the PAKT data // interchange format. It provides streaming decode, typed marshal/unmarshal, -// encoding, and spec-based projection. +// and encoding. // // # Decoder // // [Decoder] reads PAKT input from an [io.Reader] and emits [Event] values one // at a time. Each grammatical construct — assignment, pack, struct, tuple, -// list, map, scalar — maps to a distinct [EventKind]. An optional [Spec] -// projection filters the source to matched fields, skipping everything else -// without allocation. +// list, map, scalar — maps to a distinct [EventKind]. // // # Events // @@ -17,33 +15,36 @@ // - Composite values emit StructStart/End, TupleStart/End, ListStart/End, MapStart/End // - Scalar values emit ScalarValue with a [TypeKind] (integer, not string) // +// # UnitReader +// +// [UnitReader] is the primary deserialization interface. It wraps a +// [Decoder] and provides property-level navigation with iterator-based +// pack streaming: +// +// ur := encoding.NewUnitReader(r) +// defer ur.Close() +// for prop := range ur.Properties() { +// switch prop.Name { +// case "config": +// cfg, err := encoding.ReadValue[Config](ur) +// case "events": +// for event := range encoding.PackItems[LogEvent](ur) { +// process(event) +// } +// } +// } +// // # Marshal / Unmarshal // -// [Marshal] and [Unmarshal] convert between Go structs and PAKT text, using +// [Marshal] and [UnmarshalNew] convert between Go structs and PAKT text, using // struct tags (`pakt:"name"`) for field mapping. [Encoder] provides low-level // control over output formatting. // -// # Incremental Unmarshal -// -// For large datasets, [Decoder.UnmarshalNext] reads one top-level statement at a -// time and populates a Go value directly — no intermediate Event objects are -// created. Combined with [Decoder.More], this enables constant-memory processing -// of arbitrarily large pack (<<) statements: -// -// dec := encoding.NewDecoder(r) -// defer dec.Close() -// for dec.More() { -// var entry MyStruct -// if err := dec.UnmarshalNext(&entry); err != nil { ... } -// process(entry) -// } -// -// [Unmarshal] uses an optimized path that reads directly from the input []byte -// without buffering overhead. -// // # Errors // // Parse errors are reported as [*ParseError] with source position and a // numeric [ErrorCode] matching spec §11 categories. Use [errors.Is] to check // sentinel categories like [ErrUnexpectedEOF] or [ErrTypeMismatch]. +// Deserialization errors are reported as [*DeserializeError] with additional +// statement and field context. package encoding diff --git a/encoding/encoder_test.go b/encoding/encoder_test.go index dc3fa6d..a6593bf 100644 --- a/encoding/encoder_test.go +++ b/encoding/encoder_test.go @@ -3,6 +3,7 @@ package encoding import ( "bytes" "io" + "slices" "strings" "testing" ) @@ -51,6 +52,7 @@ func roundTrip(t *testing.T, name string, typ Type, v any) []Event { if err != nil { t.Fatalf("Decode failed on input %q: %v", buf.String(), err) } + ev.Value = slices.Clone(ev.Value) events = append(events, ev) } return events @@ -457,7 +459,7 @@ func TestRoundTripStr(t *testing.T) { if events[0].Kind != EventAssignStart || events[0].Name != "name" { t.Errorf("event[0] = %v", events[0]) } - if events[1].Kind != EventScalarValue || events[1].Value != "hello" { + if events[1].Kind != EventScalarValue || events[1].ValueString() != "hello" { t.Errorf("event[1] = %v", events[1]) } if events[2].Kind != EventAssignEnd { @@ -467,22 +469,22 @@ func TestRoundTripStr(t *testing.T) { func TestRoundTripInt(t *testing.T) { events := roundTrip(t, "n", scalarType(TypeInt), int64(-42)) - if events[1].Value != "-42" { - t.Errorf("got value %q, want %q", events[1].Value, "-42") + if events[1].ValueString() != "-42" { + t.Errorf("got value %q, want %q", events[1].ValueString(), "-42") } } func TestRoundTripBool(t *testing.T) { events := roundTrip(t, "b", scalarType(TypeBool), true) - if events[1].Value != "true" { - t.Errorf("got value %q, want %q", events[1].Value, "true") + if events[1].ValueString() != "true" { + t.Errorf("got value %q, want %q", events[1].ValueString(), "true") } } func TestRoundTripDec(t *testing.T) { events := roundTrip(t, "d", scalarType(TypeDec), "1000.50") - if events[1].Value != "1000.50" { - t.Errorf("got value %q, want %q", events[1].Value, "1000.50") + if events[1].ValueString() != "1000.50" { + t.Errorf("got value %q, want %q", events[1].ValueString(), "1000.50") } } @@ -497,37 +499,37 @@ func TestRoundTripFloat(t *testing.T) { func TestRoundTripUUID(t *testing.T) { uuid := "550e8400-e29b-41d4-a716-446655440000" events := roundTrip(t, "id", scalarType(TypeUUID), uuid) - if events[1].Value != uuid { + if events[1].ValueString() != uuid { t.Errorf("got value %q, want %q", events[1].Value, uuid) } } func TestRoundTripDate(t *testing.T) { events := roundTrip(t, "d", scalarType(TypeDate), "2026-06-01") - if events[1].Value != "2026-06-01" { - t.Errorf("got value %q, want %q", events[1].Value, "2026-06-01") + if events[1].ValueString() != "2026-06-01" { + t.Errorf("got value %q, want %q", events[1].ValueString(), "2026-06-01") } } func TestRoundTripTs(t *testing.T) { events := roundTrip(t, "dt", scalarType(TypeTs), "2026-06-01T14:30:00Z") - if events[1].Value != "2026-06-01T14:30:00Z" { - t.Errorf("got value %q, want %q", events[1].Value, "2026-06-01T14:30:00Z") + if events[1].ValueString() != "2026-06-01T14:30:00Z" { + t.Errorf("got value %q, want %q", events[1].ValueString(), "2026-06-01T14:30:00Z") } } func TestRoundTripNullable(t *testing.T) { events := roundTrip(t, "x", nullableScalar(TypeInt), nil) - if events[1].Value != "nil" { - t.Errorf("got value %q, want %q", events[1].Value, "nil") + if events[1].ValueString() != "nil" { + t.Errorf("got value %q, want %q", events[1].ValueString(), "nil") } } func TestRoundTripAtomSet(t *testing.T) { typ := Type{AtomSet: &AtomSet{Members: []string{"dev", "staging", "prod"}}} events := roundTrip(t, "env", typ, "prod") - if events[1].Value != "prod" { - t.Errorf("got value %q, want %q", events[1].Value, "prod") + if events[1].ValueString() != "prod" { + t.Errorf("got value %q, want %q", events[1].ValueString(), "prod") } } @@ -543,10 +545,10 @@ func TestRoundTripStruct(t *testing.T) { if len(events) != 6 { t.Fatalf("expected 6 events, got %d: %v", len(events), events) } - if events[2].Kind != EventScalarValue || events[2].Value != "localhost" { + if events[2].Kind != EventScalarValue || events[2].ValueString() != "localhost" { t.Errorf("host event = %v", events[2]) } - if events[3].Kind != EventScalarValue || events[3].Value != "8080" { + if events[3].Kind != EventScalarValue || events[3].ValueString() != "8080" { t.Errorf("port event = %v", events[3]) } } @@ -560,7 +562,7 @@ func TestRoundTripTuple(t *testing.T) { if len(events) != 7 { t.Fatalf("expected 7 events, got %d: %v", len(events), events) } - if events[2].Value != "1" || events[3].Value != "0" || events[4].Value != "0" { + if events[2].ValueString() != "1" || events[3].ValueString() != "0" || events[4].ValueString() != "0" { t.Errorf("values: %q %q %q", events[2].Value, events[3].Value, events[4].Value) } } @@ -574,7 +576,7 @@ func TestRoundTripList(t *testing.T) { if len(events) != 7 { t.Fatalf("expected 7 events, got %d", len(events)) } - if events[2].Value != "alpha" || events[3].Value != "bravo" || events[4].Value != "charlie" { + if events[2].ValueString() != "alpha" || events[3].ValueString() != "bravo" || events[4].ValueString() != "charlie" { t.Errorf("values: %q %q %q", events[2].Value, events[3].Value, events[4].Value) } } @@ -599,10 +601,10 @@ func TestRoundTripMap(t *testing.T) { if len(events) != 6 { t.Fatalf("expected 6 events, got %d: %v", len(events), events) } - if events[2].Kind != EventScalarValue || events[2].Value != "x" { + if events[2].Kind != EventScalarValue || events[2].ValueString() != "x" { t.Errorf("key event = %v", events[2]) } - if events[3].Kind != EventScalarValue || events[3].Value != "10" { + if events[3].Kind != EventScalarValue || events[3].ValueString() != "10" { t.Errorf("value event = %v", events[3]) } } @@ -648,6 +650,7 @@ func TestCompactVsPrettyStruct(t *testing.T) { if err != nil { t.Fatalf("Decode(%q): %v", input, err) } + ev.Value = slices.Clone(ev.Value) events = append(events, ev) } if len(events) != 6 { @@ -679,6 +682,7 @@ func TestCompactVsPrettyList(t *testing.T) { if err != nil { t.Fatalf("Decode(%q): %v", input, err) } + ev.Value = slices.Clone(ev.Value) events = append(events, ev) } // AssignStart + CompositeStart + 3 values + CompositeEnd + AssignEnd @@ -710,6 +714,7 @@ func TestCompactVsPrettyTuple(t *testing.T) { if err != nil { t.Fatalf("Decode(%q): %v", input, err) } + ev.Value = slices.Clone(ev.Value) events = append(events, ev) } if len(events) != 6 { @@ -740,6 +745,7 @@ func TestCompactVsPrettyMap(t *testing.T) { if err != nil { t.Fatalf("Decode(%q): %v", input, err) } + ev.Value = slices.Clone(ev.Value) events = append(events, ev) } if len(events) != 6 { @@ -795,29 +801,29 @@ func TestEncodeStrTypeMismatch(t *testing.T) { func TestRoundTripStrWithTab(t *testing.T) { events := roundTrip(t, "s", scalarType(TypeStr), "hello\tworld") - if events[1].Value != "hello\tworld" { - t.Errorf("got value %q, want %q", events[1].Value, "hello\tworld") + if events[1].ValueString() != "hello\tworld" { + t.Errorf("got value %q, want %q", events[1].ValueString(), "hello\tworld") } } func TestRoundTripStrWithBackslash(t *testing.T) { events := roundTrip(t, "s", scalarType(TypeStr), `path\to\file`) - if events[1].Value != `path\to\file` { + if events[1].ValueString() != `path\to\file` { t.Errorf("got value %q, want %q", events[1].Value, `path\to\file`) } } func TestRoundTripStrWithQuotes(t *testing.T) { events := roundTrip(t, "s", scalarType(TypeStr), "it's fine") - if events[1].Value != "it's fine" { - t.Errorf("got value %q, want %q", events[1].Value, "it's fine") + if events[1].ValueString() != "it's fine" { + t.Errorf("got value %q, want %q", events[1].ValueString(), "it's fine") } } func TestRoundTripMultiLineStr(t *testing.T) { val := "line one\nline two" events := roundTrip(t, "s", scalarType(TypeStr), val) - if events[1].Value != val { + if events[1].ValueString() != val { t.Errorf("got value %q, want %q", events[1].Value, val) } } @@ -854,6 +860,7 @@ func TestRoundTripNestedPretty(t *testing.T) { if err != nil { t.Fatalf("Decode: %v", err) } + ev.Value = slices.Clone(ev.Value) events = append(events, ev) } @@ -861,10 +868,10 @@ func TestRoundTripNestedPretty(t *testing.T) { if len(events) != 9 { t.Fatalf("expected 9 events, got %d: %v", len(events), events) } - if events[2].Value != "test" { + if events[2].ValueString() != "test" { t.Errorf("name value = %q", events[2].Value) } - if events[4].Value != "10" || events[5].Value != "20" { + if events[4].ValueString() != "10" || events[5].ValueString() != "20" { t.Errorf("list values = %q, %q", events[4].Value, events[5].Value) } } diff --git a/encoding/errors.go b/encoding/errors.go index 2778dc9..590a2ea 100644 --- a/encoding/errors.go +++ b/encoding/errors.go @@ -10,15 +10,13 @@ type ErrorCode int const ( ErrUnexpectedEOF ErrorCode = 1 // unexpected end of input - _ ErrorCode = 2 // reserved (formerly duplicate_name; removed per spec §6.1) - ErrTypeMismatch ErrorCode = 3 // type mismatch - ErrNilNonNullable ErrorCode = 4 // nil on non-nullable type - ErrSyntax ErrorCode = 5 // syntax error (catch-all) + ErrTypeMismatch ErrorCode = 2 // type mismatch + ErrNilNonNullable ErrorCode = 3 // nil on non-nullable type + ErrSyntax ErrorCode = 4 // syntax error (catch-all) ) var errorCodeNames = [...]string{ ErrUnexpectedEOF: "unexpected_eof", - 2: "", ErrTypeMismatch: "type_mismatch", ErrNilNonNullable: "nil_non_nullable", ErrSyntax: "syntax", @@ -74,3 +72,42 @@ func (e *ParseError) Unwrap() error { } return e.Wrapped } + +// DeserializeError wraps a parse or conversion error with deserialization context. +type DeserializeError struct { + Pos Pos // source position in the PAKT data + Property string // which unit property (e.g., "config") + Field string // which field within a composite (e.g., "port"), or empty + Message string // human-readable description + Err error // wrapped underlying error +} + +// Error implements the [error] interface. +// Format: "property.field (line:col): message" or "property (line:col): message". +// When Pos is zero, the position is omitted. +func (e *DeserializeError) Error() string { + hasPos := e.Pos.Line != 0 || e.Pos.Col != 0 + loc := "" + if hasPos { + loc = fmt.Sprintf("(%d:%d)", e.Pos.Line, e.Pos.Col) + } + if e.Field != "" { + if hasPos { + return fmt.Sprintf("%s.%s (%d:%d): %s", e.Property, e.Field, e.Pos.Line, e.Pos.Col, e.Message) + } + return fmt.Sprintf("%s.%s: %s", e.Property, e.Field, e.Message) + } + if e.Property != "" { + if hasPos { + return fmt.Sprintf("%s (%d:%d): %s", e.Property, e.Pos.Line, e.Pos.Col, e.Message) + } + return fmt.Sprintf("%s: %s", e.Property, e.Message) + } + if hasPos { + return fmt.Sprintf("%s: %s", loc, e.Message) + } + return e.Message +} + +// Unwrap returns the underlying error. +func (e *DeserializeError) Unwrap() error { return e.Err } diff --git a/encoding/errors_test.go b/encoding/errors_test.go new file mode 100644 index 0000000..b7fc490 --- /dev/null +++ b/encoding/errors_test.go @@ -0,0 +1,140 @@ +package encoding + +import ( + "errors" + "testing" +) + +func TestDeserializeErrorFormatting(t *testing.T) { + tests := []struct { + name string + err DeserializeError + want string + }{ + { + name: "with property and field", + err: DeserializeError{ + Pos: Pos{Line: 5, Col: 10}, + Property: "config", + Field: "port", + Message: "invalid value", + }, + want: "config.port (5:10): invalid value", + }, + { + name: "with property no field", + err: DeserializeError{ + Pos: Pos{Line: 3, Col: 1}, + Property: "server", + Message: "type mismatch", + }, + want: "server (3:1): type mismatch", + }, + { + name: "no property no field", + err: DeserializeError{ + Pos: Pos{Line: 1, Col: 1}, + Message: "unexpected event", + }, + want: "(1:1): unexpected event", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := tt.err.Error() + if got != tt.want { + t.Errorf("got %q, want %q", got, tt.want) + } + }) + } +} + +func TestDeserializeErrorUnwrap(t *testing.T) { + inner := errors.New("root cause") + err := &DeserializeError{ + Pos: Pos{Line: 1, Col: 1}, + Message: "wrap", + Err: inner, + } + if !errors.Is(err, inner) { + t.Error("expected Unwrap to return inner error") + } + + // nil Err + err2 := &DeserializeError{Message: "no inner"} + if err2.Unwrap() != nil { + t.Error("expected nil Unwrap when Err is nil") + } +} + +func TestErrorCodeError(t *testing.T) { + tests := []struct { + code ErrorCode + want string + }{ + {ErrUnexpectedEOF, "unexpected_eof"}, + {ErrTypeMismatch, "type_mismatch"}, + {ErrNilNonNullable, "nil_non_nullable"}, + {ErrSyntax, "syntax"}, + {ErrorCode(99), "error_99"}, // unknown code + } + for _, tt := range tests { + t.Run(tt.want, func(t *testing.T) { + got := tt.code.Error() + if got != tt.want { + t.Errorf("got %q, want %q", got, tt.want) + } + }) + } +} + +func TestNewParseError(t *testing.T) { + pe := NewParseError(Pos{Line: 2, Col: 5}, "something broke") + if pe.Pos.Line != 2 || pe.Pos.Col != 5 { + t.Errorf("wrong position: %+v", pe.Pos) + } + if pe.Message != "something broke" { + t.Errorf("wrong message: %q", pe.Message) + } + want := "2:5: something broke" + if pe.Error() != want { + t.Errorf("got %q, want %q", pe.Error(), want) + } + if pe.Unwrap() != nil { + t.Error("expected nil Unwrap for uncategorized error") + } +} + +func TestParseErrorWrap(t *testing.T) { + pe := Wrap(Pos{Line: 10, Col: 3}, "nil not allowed", ErrNilNonNullable) + if pe.Wrapped != ErrNilNonNullable { + t.Errorf("wrong wrapped code: %v", pe.Wrapped) + } + if !errors.Is(pe, ErrNilNonNullable) { + t.Error("expected errors.Is to match ErrNilNonNullable") + } + if pe.Code() != int(ErrNilNonNullable) { + t.Errorf("wrong Code(): %d", pe.Code()) + } +} + +func TestParseErrorWrapf(t *testing.T) { + pe := Wrapf(Pos{Line: 1, Col: 1}, ErrSyntax, "bad token %q", "@@") + if pe.Message != `bad token "@@"` { + t.Errorf("wrong message: %q", pe.Message) + } + if !errors.Is(pe, ErrSyntax) { + t.Error("expected errors.Is to match ErrSyntax") + } +} + +func TestParseErrorErrorf(t *testing.T) { + pe := Errorf(Pos{Line: 7, Col: 12}, "unexpected %s", "token") + if pe.Message != "unexpected token" { + t.Errorf("wrong message: %q", pe.Message) + } + want := "7:12: unexpected token" + if pe.Error() != want { + t.Errorf("got %q, want %q", pe.Error(), want) + } +} diff --git a/encoding/event.go b/encoding/event.go index 29bc422..1d85a24 100644 --- a/encoding/event.go +++ b/encoding/event.go @@ -105,8 +105,9 @@ type Event struct { Kind EventKind `json:"kind"` // category of event Pos Pos `json:"pos"` // source position Name string `json:"name,omitempty"` // assignment or field name (empty for positional values) + Type *Type `json:"type,omitempty"` // declared PAKT type (populated on statement start events) ScalarType TypeKind `json:"scalarType,omitempty"` // scalar type kind (zero for structural events) - Value string `json:"value,omitempty"` // literal value text (empty for structural events) + Value []byte `json:"value,omitempty"` // literal value bytes (borrow: valid until next Decode call) Err error `json:"-"` // non-nil only when Kind == EventError; handled by custom MarshalJSON } @@ -115,21 +116,74 @@ type Event struct { // EVENT\tLINE:COL\tNAME\tSCALAR_TYPE\tVALUE func (e Event) String() string { return fmt.Sprintf("%s\t%d:%d\t%s\t%s\t%s", - e.Kind, e.Pos.Line, e.Pos.Col, e.Name, e.ScalarType, e.Value) + e.Kind, e.Pos.Line, e.Pos.Col, e.Name, e.ScalarType, string(e.Value)) +} + +// ValueString returns the Value as a string. This allocates. +// Prefer working with Value as []byte when possible. +func (e Event) ValueString() string { return string(e.Value) } + +// IsNilValue reports whether this scalar event represents a nil value. +func (e Event) IsNilValue() bool { + return e.Kind == EventScalarValue && len(e.Value) == 3 && + e.Value[0] == 'n' && e.Value[1] == 'i' && e.Value[2] == 'l' } // MarshalJSON produces a JSON object for the Event. -// When Err is non-nil, an "error" field is included with the error message. +// Value is encoded as a string (not base64). When Err is non-nil, +// an "error" field is included with the error message. func (e Event) MarshalJSON() ([]byte, error) { - type eventAlias Event // prevent infinite recursion a := struct { - eventAlias - Error string `json:"error,omitempty"` + Kind EventKind `json:"kind"` + Pos Pos `json:"pos"` + Name string `json:"name,omitempty"` + Type *Type `json:"type,omitempty"` + ScalarType TypeKind `json:"scalarType,omitempty"` + Value string `json:"value,omitempty"` + Error string `json:"error,omitempty"` }{ - eventAlias: eventAlias(e), + Kind: e.Kind, + Pos: e.Pos, + Name: e.Name, + Type: e.Type, + ScalarType: e.ScalarType, + Value: string(e.Value), } if e.Err != nil { a.Error = e.Err.Error() } return json.Marshal(a) } + +// UnmarshalJSON reads an Event from JSON, decoding Value from a string +// (not base64) to match [Event.MarshalJSON]. +func (e *Event) UnmarshalJSON(data []byte) error { + var raw struct { + Kind EventKind `json:"kind"` + Pos Pos `json:"pos"` + Name string `json:"name"` + Type *Type `json:"type"` + ScalarType TypeKind `json:"scalarType"` + Value string `json:"value"` + Error string `json:"error"` + } + if err := json.Unmarshal(data, &raw); err != nil { + return err + } + e.Kind = raw.Kind + e.Pos = raw.Pos + e.Name = raw.Name + e.Type = raw.Type + e.ScalarType = raw.ScalarType + if raw.Value != "" { + e.Value = []byte(raw.Value) + } else { + e.Value = nil + } + if raw.Error != "" { + e.Err = fmt.Errorf("%s", raw.Error) + } else { + e.Err = nil + } + return nil +} diff --git a/encoding/event_test.go b/encoding/event_test.go index b67bde0..7174889 100644 --- a/encoding/event_test.go +++ b/encoding/event_test.go @@ -68,7 +68,7 @@ func TestEventMarshalScalar(t *testing.T) { Pos: Pos{Line: 1, Col: 16}, Name: "greeting", ScalarType: TypeStr, - Value: "'hello world'", + Value: []byte("'hello world'"), } data, err := json.Marshal(e) @@ -170,7 +170,7 @@ func TestEventRoundTrip(t *testing.T) { Pos: Pos{Line: 7, Col: 3}, Name: "count", ScalarType: TypeInt, - Value: "42", + Value: []byte("42"), } data, err := json.Marshal(orig) @@ -196,7 +196,7 @@ func TestEventRoundTrip(t *testing.T) { if got.ScalarType != orig.ScalarType { t.Errorf("ScalarType: got %q, want %q", got.ScalarType, orig.ScalarType) } - if got.Value != orig.Value { + if got.ValueString() != orig.ValueString() { t.Errorf("Value: got %q, want %q", got.Value, orig.Value) } } diff --git a/encoding/fuzz_test.go b/encoding/fuzz_test.go new file mode 100644 index 0000000..cdbe4a3 --- /dev/null +++ b/encoding/fuzz_test.go @@ -0,0 +1,139 @@ +package encoding + +import ( + "bytes" + "strings" + "testing" +) + +// FuzzDecode exercises the full decoder with arbitrary input. +// Catches panics, infinite loops, and OOM on malformed PAKT. +func FuzzDecode(f *testing.F) { + // Seed corpus from valid PAKT patterns + f.Add([]byte("name:str = 'hello'\n")) + f.Add([]byte("count:int = 42\n")) + f.Add([]byte("rate:float = 3.14e0\n")) + f.Add([]byte("ok:bool = true\n")) + f.Add([]byte("id:uuid = 550e8400-e29b-41d4-a716-446655440000\n")) + f.Add([]byte("d:date = 2026-06-01\n")) + f.Add([]byte("t:ts = 2026-06-01T14:30:00Z\n")) + f.Add([]byte("b:bin = x'48656C6C6F'\n")) + f.Add([]byte("s:{x:str, y:int} = {'a', 1}\n")) + f.Add([]byte("t:(int, str) = (1, 'x')\n")) + f.Add([]byte("l:[int] = [1, 2, 3]\n")) + f.Add([]byte("m: = <'a' ; 1>\n")) + f.Add([]byte("p:[int] <<\n1\n2\n3\n")) + f.Add([]byte("n:str? = nil\n")) + f.Add([]byte("a:|x, y, z| = |x\n")) + f.Add([]byte("# comment\nname:str = 'hello'\n")) + f.Add([]byte("")) + f.Add([]byte("\x00")) + f.Add([]byte("name:str = 'hello'\x00")) + + f.Fuzz(func(t *testing.T, data []byte) { + dec := NewDecoder(bytes.NewReader(data)) + defer dec.Close() + for i := 0; i < 10000; i++ { + _, err := dec.Decode() + if err != nil { + return + } + } + }) +} + +// FuzzUnmarshalNew exercises the full deserialization pipeline. +// Catches reflection panics, type confusion, and event stream corruption. +func FuzzUnmarshalNew(f *testing.F) { + type Target struct { + Name string `pakt:"name"` + Count int64 `pakt:"count"` + Rate float64 `pakt:"rate"` + Active bool `pakt:"active"` + Label *string `pakt:"label"` + } + + f.Add([]byte("name:str = 'test'\ncount:int = 1\nrate:float = 1e0\nactive:bool = true\n")) + f.Add([]byte("name:str = 'x'\n")) + f.Add([]byte("label:str? = nil\n")) + f.Add([]byte("")) + f.Add([]byte("unknown:int = 42\n")) + + f.Fuzz(func(t *testing.T, data []byte) { + var target Target + _ = UnmarshalNewInto(data, &target) + }) +} + +// FuzzReadString exercises string parsing with escape processing. +// Catches panics on malformed escapes, unterminated strings, null bytes. +func FuzzReadString(f *testing.F) { + f.Add("'hello'") + f.Add("'hello\\nworld'") + f.Add("'\\u0041'") + f.Add("'''\\nmulti\\nline\\n'''") + f.Add("r'raw string'") + f.Add("r'''\\nraw multi\\n'''") + f.Add("'escape \\' inside'") + f.Add("'") + f.Add("''") + f.Add("'\\") + f.Add("'\\u'") + f.Add("'\\u00'") + + f.Fuzz(func(t *testing.T, input string) { + r := newReader(strings.NewReader(input)) + defer r.release() + _, _ = r.readString() + }) +} + +// FuzzParseIntLiteral exercises integer literal parsing. +// Catches overflow, invalid prefix combinations, underscore edge cases. +func FuzzParseIntLiteral(f *testing.F) { + f.Add("0") + f.Add("42") + f.Add("-7") + f.Add("+3") + f.Add("1_000_000") + f.Add("0xFF") + f.Add("0b1010") + f.Add("0o777") + f.Add("9223372036854775807") // MaxInt64 + f.Add("-9223372036854775808") // MinInt64 + f.Add("9223372036854775808") // overflow + f.Add("0x") + f.Add("0b") + f.Add("") + f.Add("_") + f.Add("0xGG") + + f.Fuzz(func(t *testing.T, input string) { + _, _ = parseIntLiteral(input) + }) +} + +// FuzzParseType exercises the recursive descent type annotation parser. +// Catches stack overflow on deeply nested types, malformed syntax. +func FuzzParseType(f *testing.F) { + f.Add("str") + f.Add("int") + f.Add("str?") + f.Add("{x:str, y:int}") + f.Add("(int, str)") + f.Add("[int]") + f.Add("") + f.Add("|a, b, c|") + f.Add("{a:{b:{c:str}}}") + f.Add("[[[[int]]]]") + f.Add("") + f.Add("???") + f.Add("{") + f.Add("{{{{{{{{{{{{{{{{{{{{") + + f.Fuzz(func(t *testing.T, input string) { + r := newReader(strings.NewReader(input)) + defer r.release() + _, _ = r.readType() + }) +} diff --git a/encoding/integration_test.go b/encoding/integration_test.go index 07e3f71..b0b8d67 100644 --- a/encoding/integration_test.go +++ b/encoding/integration_test.go @@ -5,6 +5,7 @@ import ( "io" "os" "path/filepath" + "slices" "strings" "testing" ) @@ -17,7 +18,7 @@ import ( // failing the test on any unexpected error. func fileDecodeAll(t *testing.T, path string) []Event { t.Helper() - f, err := os.Open(path) + f, err := os.Open(path) //nolint:gosec // test fixture path if err != nil { t.Fatalf("open %s: %v", path, err) } @@ -33,6 +34,7 @@ func fileDecodeAll(t *testing.T, path string) []Event { if err != nil { t.Fatalf("Decode(%s): %v", filepath.Base(path), err) } + ev.Value = slices.Clone(ev.Value) events = append(events, ev) } return events @@ -42,7 +44,7 @@ func fileDecodeAll(t *testing.T, path string) []Event { // if the document parses without error. func fileDecodeExpectError(t *testing.T, path string) error { t.Helper() - f, err := os.Open(path) + f, err := os.Open(path) //nolint:gosec // test fixture path if err != nil { t.Fatalf("open %s: %v", path, err) } @@ -158,8 +160,8 @@ func TestIntegrationValidScalars(t *testing.T) { for i := 0; i < len(events); i += 3 { name := events[i].Name if want, ok := spotChecks[name]; ok { - if events[i+1].Value != want { - t.Errorf("%s: value = %q, want %q", name, events[i+1].Value, want) + if events[i+1].ValueString() != want { + t.Errorf("%s: value = %q, want %q", name, events[i+1].ValueString(), want) } } } @@ -179,7 +181,7 @@ func TestIntegrationValidStrings(t *testing.T) { vals := make(map[string]string) for i := 0; i < len(events); i++ { if events[i].Kind == EventScalarValue { - vals[events[i].Name] = events[i].Value + vals[events[i].Name] = events[i].ValueString() } } @@ -294,7 +296,7 @@ func TestIntegrationValidNullable(t *testing.T) { scalarTypes := make(map[string]TypeKind) for _, ev := range events { if ev.Kind == EventScalarValue && ev.Name != "" { - vals[ev.Name] = ev.Value + vals[ev.Name] = ev.ValueString() scalarTypes[ev.Name] = ev.ScalarType } } @@ -354,8 +356,8 @@ func TestIntegrationValidAtoms(t *testing.T) { for i := 0; i < len(events); i += 3 { name := events[i].Name if want, ok := expectedValues[name]; ok { - if events[i+1].Value != want { - t.Errorf("%s: value = %q, want %q", name, events[i+1].Value, want) + if events[i+1].ValueString() != want { + t.Errorf("%s: value = %q, want %q", name, events[i+1].ValueString(), want) } } } @@ -423,8 +425,8 @@ func TestIntegrationValidFull(t *testing.T) { if events[deployIdx+2].Kind != EventScalarValue || events[deployIdx+2].Name != "level" { t.Errorf("deploy[2]: got %s name=%q, want ScalarValue name=level", events[deployIdx+2].Kind, events[deployIdx+2].Name) } - if events[deployIdx+2].Value != "prod" { - t.Errorf("deploy level: value = %q, want %q", events[deployIdx+2].Value, "prod") + if events[deployIdx+2].ValueString() != "prod" { + t.Errorf("deploy level: value = %q, want %q", events[deployIdx+2].ValueString(), "prod") } if events[deployIdx+3].Kind != EventScalarValue || events[deployIdx+3].Name != "release" { t.Errorf("deploy[3]: got %s name=%q, want ScalarValue name=release", events[deployIdx+3].Kind, events[deployIdx+3].Name) @@ -454,8 +456,8 @@ func TestIntegrationValidFull(t *testing.T) { featureValues := []string{"dark-mode", "notifications", "audit-log"} for j, want := range featureValues { ev := events[featIdx+2+j] - if ev.Kind != EventScalarValue || ev.Value != want { - t.Errorf("features[%d]: got %s value=%q, want ScalarValue value=%q", j, ev.Kind, ev.Value, want) + if ev.Kind != EventScalarValue || ev.ValueString() != want { + t.Errorf("features[%d]: got %s value=%q, want ScalarValue value=%q", j, ev.Kind, ev.ValueString(), want) } } @@ -484,8 +486,8 @@ func TestIntegrationValidFull(t *testing.T) { // Verify nullable nil: rollback-version should have nil value for _, ev := range events { if ev.Kind == EventScalarValue && ev.Name == "rollback-version" { - if ev.Value != "nil" { - t.Errorf("rollback-version: value = %q, want %q", ev.Value, "nil") + if ev.ValueString() != "nil" { + t.Errorf("rollback-version: value = %q, want %q", ev.ValueString(), "nil") } break } @@ -662,13 +664,14 @@ func TestDuplicateRootNamesPreserved(t *testing.T) { if err != nil { t.Fatalf("unexpected error: %v", err) } + ev.Value = slices.Clone(ev.Value) events = append(events, ev) } // Both statements preserved: AssignStart, ScalarValue, AssignEnd × 2 if len(events) != 6 { t.Fatalf("expected 6 events for two duplicate statements, got %d: %v", len(events), events) } - if events[1].Value != "a" || events[4].Value != "b" { + if events[1].ValueString() != "a" || events[4].ValueString() != "b" { t.Fatalf("duplicate names not preserved in order: %v", events) } } @@ -678,7 +681,7 @@ func TestDuplicateMapKeysFixtureParses(t *testing.T) { if len(events) != 10 { t.Fatalf("expected 10 events, got %d: %v", len(events), events) } - if events[2].Value != "alice" || events[3].Value != "1" || events[6].Value != "alice" || events[7].Value != "3" { + if events[2].ValueString() != "alice" || events[3].ValueString() != "1" || events[6].ValueString() != "alice" || events[7].ValueString() != "3" { t.Fatalf("unexpected duplicate-key event sequence: %v", events) } } @@ -709,16 +712,6 @@ func TestSentinelErrUnexpectedEOF(t *testing.T) { } } -func TestSentinelErrDuplicateNameInSpec(t *testing.T) { - _, err := ParseSpec(strings.NewReader("name:str\nname:int")) - if err == nil { - t.Fatal("expected error for duplicate name in spec") - } - if !errors.Is(err, ErrSyntax) { - t.Fatalf("expected errors.Is(err, ErrSyntax), got: %v", err) - } -} - func TestDuplicateMapKeysUnit(t *testing.T) { typ := mapType(scalarType(TypeStr), scalarType(TypeInt)) events, err := decodeValue("< 'a' ; 1, 'a' ; 2 >", typ) @@ -728,7 +721,7 @@ func TestDuplicateMapKeysUnit(t *testing.T) { if len(events) != 6 { t.Fatalf("expected 6 events, got %d: %v", len(events), events) } - if events[1].Value != "a" || events[2].Value != "1" || events[3].Value != "a" || events[4].Value != "2" { + if events[1].ValueString() != "a" || events[2].ValueString() != "1" || events[3].ValueString() != "a" || events[4].ValueString() != "2" { t.Fatalf("unexpected duplicate-key event sequence: %v", events) } } @@ -767,7 +760,7 @@ func TestNulByteTerminatesUnitAtTopLevel(t *testing.T) { if len(events) != 3 { t.Fatalf("expected 3 events (one statement before NUL), got %d: %v", len(events), events) } - if events[1].Value != "Alice" { + if events[1].ValueString() != "Alice" { t.Errorf("expected value 'Alice', got %q", events[1].Value) } } @@ -811,8 +804,8 @@ func TestNulByteTerminatesPack(t *testing.T) { } } -func TestNulByteMoreReturnsFalse(t *testing.T) { - // More() should return false when NUL terminates the unit. +func TestNulByteTerminatesUnit(t *testing.T) { + // After NUL, the decoder should return EOF. input := "name:str = 'Alice'\x00" d := NewDecoder(strings.NewReader(input)) defer d.Close() @@ -829,7 +822,9 @@ func TestNulByteMoreReturnsFalse(t *testing.T) { break } } - if d.More() { - t.Fatal("More() should return false after NUL terminator") + // Next Decode should return EOF (NUL terminated the unit). + _, err := d.Decode() + if err != io.EOF { + t.Fatalf("expected io.EOF after NUL terminator, got: %v", err) } } diff --git a/encoding/marshal.go b/encoding/marshal.go index 73928cb..c29b8d1 100644 --- a/encoding/marshal.go +++ b/encoding/marshal.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding" "fmt" + "math" "reflect" "time" ) @@ -94,7 +95,11 @@ func prepareValue(typ Type, v reflect.Value) (any, error) { return v.Int(), nil case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: - return int64(v.Uint()), nil + u := v.Uint() + if u > math.MaxInt64 { + return nil, fmt.Errorf("pakt: uint value %d overflows int64", u) + } + return int64(u), nil //nolint:gosec // overflow checked above case reflect.Float32, reflect.Float64: return v.Float(), nil @@ -124,7 +129,7 @@ func prepareStruct(typ Type, v reflect.Value) (map[string]any, error) { return nil, fmt.Errorf("pakt: expected struct type, got %s", typ.String()) } - fields, err := StructFields(v.Type()) + fields, err := ReflectStructFields(v.Type()) if err != nil { return nil, err } diff --git a/encoding/navigation.go b/encoding/navigation.go new file mode 100644 index 0000000..85a09fc --- /dev/null +++ b/encoding/navigation.go @@ -0,0 +1,232 @@ +package encoding + +import ( + "io" + "iter" + "reflect" +) + +// FieldEntry represents a named field within a struct value. +type FieldEntry struct { + Name string +} + +// MapEntry represents a key-value pair from a PAKT map value. +// K is not constrained to comparable — iteration doesn't require hashing. +type MapEntry[K, V any] struct { + Key K + Value V +} + +// TupleEntry represents one element in a heterogeneous tuple value. +type TupleEntry struct { + Index int +} + +// StructFields returns an iterator over the fields of a struct value +// in the current statement. Each [FieldEntry] provides the field name. +// After each yield, the caller reads the field's value via [ReadValue] +// or skips it via [UnitReader.Skip]. +// +// Errors stop iteration; call [UnitReader.Err] after the loop. +func StructFields(sr *UnitReader) iter.Seq[FieldEntry] { + return func(yield func(FieldEntry) bool) { + for { + // If the previous field's value wasn't consumed by the caller, + // the pending event is still set — drain it before reading the next field. + if sr.pending != nil { + ev := *sr.pending + sr.pending = nil + if err := skipValueEvent(sr, ev); err != nil { + sr.setErr(err) + return + } + } + + ev, err := sr.nextEvent() + if err != nil { + if err != io.EOF { + sr.setErr(err) + } + return + } + + if ev.Kind == EventStructEnd { + return + } + + entry := FieldEntry{ + Name: ev.Name, + } + + // Push the event back so the caller's ReadValue/ReadAs + // picks it up as the field's value. + sr.pushBack(ev) + + if !yield(entry) { + // Caller broke — drain pending + skip rest of struct. + if sr.pending != nil { + pev := *sr.pending + sr.pending = nil + skipValueEvent(sr, pev) //nolint:errcheck + } + skipComposite(sr, EventStructStart) //nolint:errcheck + return + } + } + } +} + +// ListElements returns an iterator over elements of a list value in the +// current statement. Each element is deserialized into type T. +// +// Errors stop iteration; call [UnitReader.Err] after the loop. +func ListElements[T any](sr *UnitReader) iter.Seq[T] { + return func(yield func(T) bool) { + for { + ev, err := sr.nextEvent() + if err != nil { + if err != io.EOF { + sr.setErr(err) + } + return + } + + if ev.Kind == EventListEnd { + return + } + + var val T + target := reflect.ValueOf(&val).Elem() + if ev.Kind == EventScalarValue && ev.IsNilValue() { + if err := setNil(target); err != nil { + sr.setErr(err) + return + } + } else { + target = allocPtr(target) + if err := handleValueEvent(sr, ev, target); err != nil { + sr.setErr(err) + return + } + } + + if !yield(val) { + skipComposite(sr, EventListStart) //nolint:errcheck + return + } + } + } +} + +// MapEntries returns an iterator over key-value pairs of a map value in the +// current statement. K is not constrained to comparable — iteration doesn't +// require hashing. +// +// Errors stop iteration; call [UnitReader.Err] after the loop. +func MapEntries[K, V any](sr *UnitReader) iter.Seq[MapEntry[K, V]] { + return func(yield func(MapEntry[K, V]) bool) { + for { + // Read key + keyEv, err := sr.nextEvent() + if err != nil { + if err != io.EOF { + sr.setErr(err) + } + return + } + + if keyEv.Kind == EventMapEnd { + return + } + + var key K + keyTarget := reflect.ValueOf(&key).Elem() + keyTarget = allocPtr(keyTarget) + if err := handleValueEvent(sr, keyEv, keyTarget); err != nil { + sr.setErr(err) + return + } + + // Read value + valEv, err := sr.nextEvent() + if err != nil { + sr.setErr(err) + return + } + + var val V + valTarget := reflect.ValueOf(&val).Elem() + if valEv.Kind == EventScalarValue && valEv.IsNilValue() { + if err := setNil(valTarget); err != nil { + sr.setErr(err) + return + } + } else { + valTarget = allocPtr(valTarget) + if err := handleValueEvent(sr, valEv, valTarget); err != nil { + sr.setErr(err) + return + } + } + + if !yield(MapEntry[K, V]{Key: key, Value: val}) { + skipComposite(sr, EventMapStart) //nolint:errcheck + return + } + } + } +} + +// TupleElements returns an iterator over the elements of a tuple value +// in the current statement. Each [TupleEntry] provides the element index. +// After each yield, the caller reads the element's value via [ReadValue] +// or skips it via [UnitReader.Skip]. +// +// Errors stop iteration; call [UnitReader.Err] after the loop. +func TupleElements(sr *UnitReader) iter.Seq[TupleEntry] { + return func(yield func(TupleEntry) bool) { + idx := 0 + for { + // Drain unconsumed previous element. + if sr.pending != nil { + ev := *sr.pending + sr.pending = nil + if err := skipValueEvent(sr, ev); err != nil { + sr.setErr(err) + return + } + } + + ev, err := sr.nextEvent() + if err != nil { + if err != io.EOF { + sr.setErr(err) + } + return + } + + if ev.Kind == EventTupleEnd { + return + } + + entry := TupleEntry{ + Index: idx, + } + + sr.pushBack(ev) + + if !yield(entry) { + if sr.pending != nil { + pev := *sr.pending + sr.pending = nil + skipValueEvent(sr, pev) //nolint:errcheck + } + skipComposite(sr, EventTupleStart) //nolint:errcheck + return + } + + idx++ + } + } +} diff --git a/encoding/navigation_test.go b/encoding/navigation_test.go new file mode 100644 index 0000000..3564eb6 --- /dev/null +++ b/encoding/navigation_test.go @@ -0,0 +1,234 @@ +package encoding + +import ( + "strings" + "testing" +) + +func TestListElements(t *testing.T) { + input := "tags:[str] = ['alpha', 'beta', 'gamma']\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + var items []string + for stmt := range sr.Properties() { + _ = stmt + // Consume the ListStart event first + ev, err := sr.nextEvent() + if err != nil { + t.Fatal(err) + } + if ev.Kind != EventListStart { + t.Fatalf("expected ListStart, got %s", ev.Kind) + } + for item := range ListElements[string](sr) { + items = append(items, item) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } + if len(items) != 3 || items[0] != "alpha" || items[1] != "beta" || items[2] != "gamma" { + t.Errorf("expected [alpha, beta, gamma], got %v", items) + } +} + +func TestMapEntries(t *testing.T) { + input := "scores: = <'alice' ; 100, 'bob' ; 200>\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + result := make(map[string]int64) + for stmt := range sr.Properties() { + _ = stmt + ev, err := sr.nextEvent() + if err != nil { + t.Fatal(err) + } + if ev.Kind != EventMapStart { + t.Fatalf("expected MapStart, got %s", ev.Kind) + } + for entry := range MapEntries[string, int64](sr) { + result[entry.Key] = entry.Value + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } + if result["alice"] != 100 || result["bob"] != 200 { + t.Errorf("unexpected: %v", result) + } +} + +func TestListElementsEarlyBreak(t *testing.T) { + input := "nums:[int] = [1, 2, 3, 4, 5]\nname:str = 'after'\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + var first int64 + var name string + for stmt := range sr.Properties() { + switch stmt.Name { + case "nums": + ev, _ := sr.nextEvent() // ListStart + _ = ev + for item := range ListElements[int64](sr) { + first = item + break // early break — should drain remaining + } + case "name": + var err error + name, err = ReadValue[string](sr) + if err != nil { + t.Fatal(err) + } + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } + if first != 1 { + t.Errorf("expected first=1, got %d", first) + } + if name != "after" { + t.Errorf("expected name='after', got %q", name) + } +} + +func TestStructFields(t *testing.T) { + input := "cfg:{host:str, port:int} = {'localhost', 8080}\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + for stmt := range sr.Properties() { + _ = stmt + // Consume the StructStart event + ev, err := sr.nextEvent() + if err != nil { + t.Fatal(err) + } + if ev.Kind != EventStructStart { + t.Fatalf("expected StructStart, got %s", ev.Kind) + } + + var fieldNames []string + for field := range StructFields(sr) { + fieldNames = append(fieldNames, field.Name) + // StructFields identifies the field and leaves its value event pending on + // the UnitReader so callers can consume it with ReadValue or Skip. + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } + if len(fieldNames) != 2 || fieldNames[0] != "host" || fieldNames[1] != "port" { + t.Errorf("expected [host, port], got %v", fieldNames) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestStructFieldsEarlyBreak(t *testing.T) { + input := "cfg:{a:str, b:str, c:str} = {'one', 'two', 'three'}\nname:str = 'after'\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + var firstName string + var afterName string + for stmt := range sr.Properties() { + switch stmt.Name { + case "cfg": + ev, _ := sr.nextEvent() // StructStart + _ = ev + for field := range StructFields(sr) { + firstName = field.Name + break // early break — should drain remaining struct + } + case "name": + var err error + afterName, err = ReadValue[string](sr) + if err != nil { + t.Fatal(err) + } + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } + if firstName != "a" { + t.Errorf("expected first field 'a', got %q", firstName) + } + if afterName != "after" { + t.Errorf("expected afterName='after', got %q", afterName) + } +} + +func TestTupleElements(t *testing.T) { + input := "point:(int, int, int) = (10, 20, 30)\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + for stmt := range sr.Properties() { + _ = stmt + // Consume the TupleStart event + ev, err := sr.nextEvent() + if err != nil { + t.Fatal(err) + } + if ev.Kind != EventTupleStart { + t.Fatalf("expected TupleStart, got %s", ev.Kind) + } + + var indices []int + for elem := range TupleElements(sr) { + indices = append(indices, elem.Index) + // TupleElements already consumed the element's event (scalar). + // For scalar elements, no further read is needed. + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } + if len(indices) != 3 || indices[0] != 0 || indices[1] != 1 || indices[2] != 2 { + t.Errorf("expected indices [0,1,2], got %v", indices) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestTupleElementsEarlyBreak(t *testing.T) { + input := "point:(int, int, int) = (10, 20, 30)\nname:str = 'after'\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + var firstIdx int + var name string + for stmt := range sr.Properties() { + switch stmt.Name { + case "point": + ev, _ := sr.nextEvent() // TupleStart + _ = ev + for elem := range TupleElements(sr) { + firstIdx = elem.Index + break // early break — should drain remaining tuple + } + case "name": + var err error + name, err = ReadValue[string](sr) + if err != nil { + t.Fatal(err) + } + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } + if firstIdx != 0 { + t.Errorf("expected first index 0, got %d", firstIdx) + } + if name != "after" { + t.Errorf("expected name='after', got %q", name) + } +} diff --git a/encoding/options.go b/encoding/options.go new file mode 100644 index 0000000..d4c7da7 --- /dev/null +++ b/encoding/options.go @@ -0,0 +1,94 @@ +package encoding + +// Option configures deserialization behavior for UnitReader and UnmarshalNew. +type Option func(*options) + +type options struct { + unknownFields FieldPolicy + missingFields MissingPolicy + duplicates DuplicatePolicy + converters *converterRegistry +} + +func defaultOptions() *options { + return &options{ + unknownFields: SkipUnknown, + missingFields: ZeroMissing, + duplicates: LastWins, + } +} + +func buildOptions(opts []Option) *options { + o := defaultOptions() + for _, fn := range opts { + fn(o) + } + return o +} + +// FieldPolicy controls behavior when PAKT data contains fields not present +// in the target struct. +type FieldPolicy int + +const ( + // SkipUnknown silently skips unknown fields (default). + SkipUnknown FieldPolicy = iota + // ErrorUnknown returns an error on unknown fields. + ErrorUnknown +) + +// MissingPolicy controls behavior when the target struct has fields not +// present in the PAKT data. +type MissingPolicy int + +const ( + // ZeroMissing leaves missing fields at their zero value (default). + ZeroMissing MissingPolicy = iota + // ErrorMissing returns an error for missing fields. + ErrorMissing +) + +// DuplicatePolicy controls behavior when PAKT data contains duplicate +// statement names or map keys. +type DuplicatePolicy int + +const ( + // LastWins overwrites with the last value encountered (default). + LastWins DuplicatePolicy = iota + // FirstWins keeps the first value and ignores subsequent duplicates. + FirstWins + // ErrorDupes returns an error on duplicate names or keys. + ErrorDupes + // Accumulate appends duplicate values to a collection (target must be a slice). + Accumulate +) + +// UnknownFields sets the policy for unknown fields in PAKT data. +func UnknownFields(policy FieldPolicy) Option { + return func(o *options) { o.unknownFields = policy } +} + +// MissingFields sets the policy for target fields missing from PAKT data. +func MissingFields(policy MissingPolicy) Option { + return func(o *options) { o.missingFields = policy } +} + +// Duplicates sets the policy for duplicate property names or map keys. +func Duplicates(policy DuplicatePolicy) Option { + return func(o *options) { o.duplicates = policy } +} + +// converterRegistry holds registered ValueConverters keyed by target type +// and named converters for field-level overrides. +type converterRegistry struct { + byType map[any]any // reflect.Type → ValueConverter (type-erased) +} + +func (o *options) ensureConverters() *converterRegistry { + if o.converters == nil { + o.converters = &converterRegistry{ + byType: make(map[any]any), + } + } + return o.converters +} diff --git a/encoding/pack_iter.go b/encoding/pack_iter.go new file mode 100644 index 0000000..32fd67f --- /dev/null +++ b/encoding/pack_iter.go @@ -0,0 +1,108 @@ +package encoding + +import ( + "io" + "iter" + "reflect" +) + +// PackItems returns an iterator over the elements of a pack statement. +// Each element is deserialized into type T. +// +// On error, iteration stops. Call [UnitReader.Err] after the loop. +// +// If the caller breaks out of the loop early, the iterator drains the +// remaining pack elements (without deserializing them) so the reader is +// positioned at the next statement. +func PackItems[T any](sr *UnitReader) iter.Seq[T] { + return func(yield func(T) bool) { + if sr.current == nil || !sr.inPack { + sr.setErr(&DeserializeError{Message: "PackItems called outside a pack statement"}) + return + } + + for { + ev, err := sr.nextEvent() + if err != nil { + if err != io.EOF { + sr.setErr(err) + } + // EOF or pack-end: nextEvent cleared sr.current + return + } + + // Deserialize the element. + var val T + target := reflect.ValueOf(&val).Elem() + target = allocPtr(target) + if err := handleValueEvent(sr, ev, target); err != nil { + sr.setErr(err) + // Drain remaining pack events. + sr.drainCurrent() + return + } + + if !yield(val) { + // Caller broke out of loop — drain remaining pack events. + sr.drainCurrent() + return + } + } + } +} + +// PackItemsInto returns an iterator that reuses a caller-provided buffer. +// On each iteration, the buffer is populated with the next element. +// The yielded pointer aliases the buffer — do not retain across iterations. +// +// Early break drains remaining pack elements. +func PackItemsInto[T any](sr *UnitReader, buf *T) iter.Seq[*T] { + return func(yield func(*T) bool) { + if buf == nil { + sr.setErr(&DeserializeError{Message: "PackItemsInto requires a non-nil buffer"}) + return + } + if sr.current == nil || !sr.inPack { + sr.setErr(&DeserializeError{Message: "PackItemsInto called outside a pack statement"}) + return + } + + for { + ev, err := sr.nextEvent() + if err != nil { + if err != io.EOF { + sr.setErr(err) + } + // EOF or pack-end: nextEvent cleared sr.current + return + } + + // Zero the buffer and populate. + *buf = *new(T) + target := reflect.ValueOf(buf).Elem() + target = allocPtr(target) + if err := handleValueEvent(sr, ev, target); err != nil { + sr.setErr(err) + sr.drainCurrent() + return + } + + if !yield(buf) { + sr.drainCurrent() + return + } + } + } +} + +// drainCurrent reads and discards events until the current statement ends. +// It uses nextEvent to properly track nesting depth. +func (sr *UnitReader) drainCurrent() { + for { + _, err := sr.nextEvent() + if err != nil { + // io.EOF means statement ended; other errors are also terminal. + return + } + } +} diff --git a/encoding/pack_iter_test.go b/encoding/pack_iter_test.go new file mode 100644 index 0000000..6738d0b --- /dev/null +++ b/encoding/pack_iter_test.go @@ -0,0 +1,150 @@ +package encoding + +import ( + "strings" + "testing" +) + +func TestPackItemsBasic(t *testing.T) { + sr := NewUnitReader(strings.NewReader("items:[int] <<\n10\n20\n30\n")) + defer sr.Close() + + var items []int64 + for stmt := range sr.Properties() { + if stmt.Name == "items" && stmt.IsPack { + for item := range PackItems[int64](sr) { + items = append(items, item) + } + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } + if len(items) != 3 || items[0] != 10 || items[1] != 20 || items[2] != 30 { + t.Errorf("expected [10, 20, 30], got %v", items) + } +} + +func TestPackItemsStruct(t *testing.T) { + type Entry struct { + Name string `pakt:"name"` + Size int64 `pakt:"size"` + } + + input := "files:[{name:str, size:int}] <<\n{'readme.md', 100}\n{'main.go', 500}\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + var entries []Entry + for stmt := range sr.Properties() { + if stmt.IsPack { + for entry := range PackItems[Entry](sr) { + entries = append(entries, entry) + } + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } + if len(entries) != 2 { + t.Fatalf("expected 2 entries, got %d", len(entries)) + } + if entries[0].Name != "readme.md" || entries[0].Size != 100 { + t.Errorf("entry 0: %+v", entries[0]) + } + if entries[1].Name != "main.go" || entries[1].Size != 500 { + t.Errorf("entry 1: %+v", entries[1]) + } +} + +func TestPackItemsEarlyBreak(t *testing.T) { + input := "nums:[int] <<\n1\n2\n3\n4\n5\nname:str = 'after'\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + var firstTwo []int64 + var afterName string + for stmt := range sr.Properties() { + switch stmt.Name { + case "nums": + count := 0 + for item := range PackItems[int64](sr) { + firstTwo = append(firstTwo, item) + count++ + if count >= 2 { + break + } + } + case "name": + var err error + afterName, err = ReadValue[string](sr) + if err != nil { + t.Fatal(err) + } + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } + + if len(firstTwo) != 2 || firstTwo[0] != 1 || firstTwo[1] != 2 { + t.Errorf("expected [1, 2], got %v", firstTwo) + } + if afterName != "after" { + t.Errorf("expected 'after', got %q", afterName) + } +} + +func TestPackItemsIntoReuse(t *testing.T) { + sr := NewUnitReader(strings.NewReader("items:[str] <<\n'a'\n'b'\n'c'\n")) + defer sr.Close() + + var collected []string + var buf string + for stmt := range sr.Properties() { + if stmt.IsPack { + for p := range PackItemsInto[string](sr, &buf) { + collected = append(collected, *p) + } + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } + if len(collected) != 3 || collected[0] != "a" || collected[1] != "b" || collected[2] != "c" { + t.Errorf("expected [a, b, c], got %v", collected) + } +} + +func TestPackItemsEmpty(t *testing.T) { + // Empty pack followed by another statement + input := "items:[int] <<\nname:str = 'after'\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + var packCount int + var afterName string + for stmt := range sr.Properties() { + switch stmt.Name { + case "items": + for range PackItems[int64](sr) { + packCount++ + } + case "name": + var err error + afterName, err = ReadValue[string](sr) + if err != nil { + t.Fatal(err) + } + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } + if packCount != 0 { + t.Errorf("expected 0 pack items, got %d", packCount) + } + if afterName != "after" { + t.Errorf("expected 'after', got %q", afterName) + } +} diff --git a/encoding/pack_test.go b/encoding/pack_test.go index 9ec5345..9e750f5 100644 --- a/encoding/pack_test.go +++ b/encoding/pack_test.go @@ -5,6 +5,23 @@ import ( "testing" ) +type withList struct { + Tags []string `pakt:"tags"` +} + +type innerStruct struct { + Host string `pakt:"host"` + Port int64 `pakt:"port"` +} + +type nestedListOfStructs struct { + Servers []innerStruct `pakt:"servers"` +} + +type withMap struct { + Headers map[string]string `pakt:"headers"` +} + func TestDecodeListPack(t *testing.T) { events := decodeAll(t, "ports:[int] << 80, 443, 8080") if len(events) != 5 { @@ -13,13 +30,13 @@ func TestDecodeListPack(t *testing.T) { if events[0].Kind != EventListPackStart || events[0].Name != "ports" { t.Fatalf("event[0] = %v", events[0]) } - if events[1].Name != "[0]" || events[1].Value != "80" { + if events[1].Name != "[0]" || events[1].ValueString() != "80" { t.Fatalf("event[1] = %v", events[1]) } - if events[2].Name != "[1]" || events[2].Value != "443" { + if events[2].Name != "[1]" || events[2].ValueString() != "443" { t.Fatalf("event[2] = %v", events[2]) } - if events[3].Name != "[2]" || events[3].Value != "8080" { + if events[3].Name != "[2]" || events[3].ValueString() != "8080" { t.Fatalf("event[3] = %v", events[3]) } if events[4].Kind != EventListPackEnd || events[4].Name != "ports" { @@ -36,7 +53,7 @@ func TestDecodeListPackStopsAtNextStatement(t *testing.T) { if events[0].Kind != EventListPackStart || events[0].Name != "states" { t.Fatalf("event[0] = %v", events[0]) } - if events[1].Kind != EventScalarValue || events[1].Value != "dev" { + if events[1].Kind != EventScalarValue || events[1].ValueString() != "dev" { t.Fatalf("event[1] = %v", events[1]) } if events[2].Kind != EventListPackEnd || events[2].Name != "states" { @@ -56,10 +73,10 @@ func TestDecodeMapPack(t *testing.T) { if events[0].Kind != EventMapPackStart || events[0].Name != "headers" { t.Fatalf("event[0] = %v", events[0]) } - if events[1].Kind != EventScalarValue || events[1].Value != "a" { + if events[1].Kind != EventScalarValue || events[1].ValueString() != "a" { t.Fatalf("event[1] = %v", events[1]) } - if events[2].Kind != EventScalarValue || events[2].Value != "1" { + if events[2].Kind != EventScalarValue || events[2].ValueString() != "1" { t.Fatalf("event[2] = %v", events[2]) } if events[5].Kind != EventMapPackEnd || events[5].Name != "headers" { @@ -73,33 +90,15 @@ func TestDecodeMapPackDuplicateKeysPreserved(t *testing.T) { if len(events) != 6 { t.Fatalf("expected 6 events, got %d: %v", len(events), events) } - if events[1].Value != "a" || events[2].Value != "1" || events[3].Value != "a" || events[4].Value != "2" { + if events[1].ValueString() != "a" || events[2].ValueString() != "1" || events[3].ValueString() != "a" || events[4].ValueString() != "2" { t.Fatalf("unexpected duplicate-key event sequence: %v", events) } } -func TestProjectionMatchesPack(t *testing.T) { - doc := "drop:int = 1\nports:[int] << 80, 443\nname:str = 'svc'" - spec := "ports:[int]\nname:str" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 7 { - t.Fatalf("expected 7 events, got %d: %v", len(events), events) - } - if events[0].Kind != EventListPackStart || events[0].Name != "ports" { - t.Fatalf("event[0] = %v", events[0]) - } - if events[3].Kind != EventListPackEnd || events[3].Name != "ports" { - t.Fatalf("event[3] = %v", events[3]) - } - if events[4].Kind != EventAssignStart || events[4].Name != "name" { - t.Fatalf("event[4] = %v", events[4]) - } -} - func TestUnmarshalListPack(t *testing.T) { data := []byte("tags:[str] << 'alpha', 'beta', 'gamma'") - var v withList - if err := Unmarshal(data, &v); err != nil { + v, err := UnmarshalNew[withList](data) + if err != nil { t.Fatal(err) } want := []string{"alpha", "beta", "gamma"} @@ -110,8 +109,8 @@ func TestUnmarshalListPack(t *testing.T) { func TestUnmarshalStructListPack(t *testing.T) { data := []byte("servers:[{host:str, port:int}] << { 'a', 80 }, { 'b', 443 }") - var v nestedListOfStructs - if err := Unmarshal(data, &v); err != nil { + v, err := UnmarshalNew[nestedListOfStructs](data) + if err != nil { t.Fatal(err) } want := []innerStruct{ @@ -125,8 +124,8 @@ func TestUnmarshalStructListPack(t *testing.T) { func TestUnmarshalMapPackLastWins(t *testing.T) { data := []byte("headers: << 'Accept' ; 'json', 'Accept' ; 'text/html'") - var v withMap - if err := Unmarshal(data, &v); err != nil { + v, err := UnmarshalNew[withMap](data) + if err != nil { t.Fatal(err) } if got := v.Headers["Accept"]; got != "text/html" { @@ -136,8 +135,8 @@ func TestUnmarshalMapPackLastWins(t *testing.T) { func TestUnmarshalDelimitedMapDuplicateKeysLastWins(t *testing.T) { data := []byte("headers: = <'Accept' ; 'json', 'Accept' ; 'text/html'>") - var v withMap - if err := Unmarshal(data, &v); err != nil { + v, err := UnmarshalNew[withMap](data) + if err != nil { t.Fatal(err) } if got := v.Headers["Accept"]; got != "text/html" { diff --git a/encoding/read_value.go b/encoding/read_value.go new file mode 100644 index 0000000..2185a44 --- /dev/null +++ b/encoding/read_value.go @@ -0,0 +1,498 @@ +package encoding + +import ( + "encoding/base64" + "encoding/hex" + "fmt" + "io" + "reflect" + "strconv" + "strings" + "unsafe" +) + +// ReadValue reads the current statement's value (or current pack element) +// and deserializes it into a new value of type T. +// +// For assign statements: reads the single value. +// For pack statements: reads the next element. Call within [PackItems] loop. +func ReadValue[T any](sr *UnitReader) (T, error) { + var zero T + target := reflect.New(reflect.TypeOf(&zero).Elem()).Elem() + if err := readValueReflect(sr, target); err != nil { + return zero, err + } + return target.Interface().(T), nil +} + +// ReadValueInto reads the current value into an existing target. +// This enables buffer reuse in hot pack-processing loops. +func ReadValueInto[T any](sr *UnitReader, target *T) error { + if target == nil { + return &DeserializeError{Message: "ReadValueInto requires a non-nil pointer"} + } + rv := reflect.ValueOf(target).Elem() + return readValueReflect(sr, rv) +} + +// readValueReflect is the core event-consuming value reader. +// It reads events from the UnitReader's decoder and populates target. +func readValueReflect(sr *UnitReader, target reflect.Value) error { + ev, err := sr.nextEvent() + if err != nil { + return err + } + + // Handle nil before pointer allocation. + if ev.Kind == EventScalarValue && ev.IsNilValue() { + return setNil(target) + } + + // Check for registered converter before default path. + if sr.opts != nil && sr.opts.converters != nil { + baseType := target.Type() + for baseType.Kind() == reflect.Pointer { + baseType = baseType.Elem() + } + if conv, ok := sr.opts.converters.byType[baseType]; ok { + vr := &ValueReader{sr: sr, event: ev} + return invokeConverter(conv, vr, ev, target) + } + } + + // Allocate through pointer indirections. + target = allocPtr(target) + + switch ev.Kind { + case EventScalarValue: + return setScalarFromEvent(ev, target) + + case EventStructStart: + return readStructFromEvents(sr, ev, target) + + case EventTupleStart: + return readTupleFromEvents(sr, ev, target) + + case EventListStart: + return readListFromEvents(sr, ev, target) + + case EventMapStart: + return readMapFromEvents(sr, ev, target) + + default: + return &DeserializeError{ + Pos: ev.Pos, + Message: fmt.Sprintf("unexpected event %s while reading value", ev.Kind), + } + } +} + +// invokeConverter calls a type-erased ValueConverter using reflection. +func invokeConverter(conv any, vr *ValueReader, ev Event, target reflect.Value) error { + // The converter implements ValueConverter[T] which has FromPakt(*ValueReader, Type) (T, error). + // We call it via reflection since the type is erased at registration time. + convVal := reflect.ValueOf(conv) + var paktType Type + if ev.Type != nil { + paktType = *ev.Type + } + results := convVal.MethodByName("FromPakt").Call([]reflect.Value{ + reflect.ValueOf(vr), + reflect.ValueOf(paktType), + }) + if !results[1].IsNil() { + return results[1].Interface().(error) + } + // Set the result. + result := results[0] + target = allocPtr(target) + target.Set(result) + return nil +} + +// setScalarFromEvent maps a ScalarValue event to a Go reflect.Value. +func setScalarFromEvent(ev Event, target reflect.Value) error { + // Handle nil + if ev.IsNilValue() { + return setNil(target) + } + + switch ev.ScalarType { + case TypeStr, TypeAtom, TypeUUID: + // String-like types: the target retains the value, so we must allocate. + return setString(target, string(ev.Value)) + + case TypeInt: + // Zero-copy string view — parsed immediately, not retained. + return setInt(target, unsafeString(ev.Value)) + + case TypeFloat: + return setFloat(target, unsafeString(ev.Value)) + + case TypeDec: + return setDec(target, unsafeString(ev.Value)) + + case TypeBool: + return setBool(target, unsafeString(ev.Value)) + + case TypeDate, TypeTs: + return setTemporalString(target, unsafeString(ev.Value), target.Kind()) + + case TypeBin: + return setBinFromEvent(target, unsafeString(ev.Value)) + + case TypeNone: + return setNil(target) + + default: + return fmt.Errorf("unsupported scalar type: %s", ev.ScalarType) + } +} + +// unsafeString returns a zero-copy string view of a byte slice. +// The caller must not retain the string beyond the lifetime of the byte slice. +func unsafeString(b []byte) string { + if len(b) == 0 { + return "" + } + return unsafe.String(unsafe.SliceData(b), len(b)) //nolint:gosec // audited: borrowed view consumed immediately +} + +// setFloat parses a PAKT float literal into a Go float target. +func setFloat(target reflect.Value, raw string) error { + switch target.Kind() { + case reflect.Float32, reflect.Float64: + f, err := parseFloatLiteral(raw) + if err != nil { + return err + } + target.SetFloat(f) + return nil + case reflect.String: + target.SetString(strings.Clone(raw)) + return nil + default: + return fmt.Errorf("cannot set float into %s", target.Type()) + } +} + +// parseFloatLiteral parses a PAKT float literal, stripping underscores. +func parseFloatLiteral(raw string) (float64, error) { + s := raw + for i := 0; i < len(s); i++ { + if s[i] == '_' { + s = removeUnderscores(s) + break + } + } + f, err := parseFloat64(s) + if err != nil { + return 0, fmt.Errorf("invalid float literal %q: %w", raw, err) + } + return f, nil +} + +func removeUnderscores(s string) string { + buf := make([]byte, 0, len(s)) + for i := 0; i < len(s); i++ { + if s[i] != '_' { + buf = append(buf, s[i]) + } + } + return string(buf) +} + +func parseFloat64(s string) (float64, error) { + return strconv.ParseFloat(s, 64) +} + +// setBool sets a boolean value from a string. +func setBool(target reflect.Value, raw string) error { + switch target.Kind() { + case reflect.Bool: + switch raw { + case "true": + target.SetBool(true) + case "false": + target.SetBool(false) + default: + return fmt.Errorf("invalid bool value: %q", raw) + } + return nil + case reflect.String: + target.SetString(strings.Clone(raw)) + return nil + default: + return fmt.Errorf("cannot set bool into %s", target.Type()) + } +} + +// setBinFromEvent handles bin values from the event stream. +// The event Value contains the raw decoded content (hex or base64 prefix stripped). +func setBinFromEvent(target reflect.Value, raw string) error { + // The decoder already strips the x'' or b'' wrapper but the value + // may still be hex-encoded or base64-encoded based on format. + // Try hex first (the event stream delivers the inner content). + data, err := hex.DecodeString(raw) + if err != nil { + // Try base64 + data, err = base64.StdEncoding.DecodeString(raw) + if err != nil { + // Treat as raw bytes + data = []byte(raw) + } + } + + switch target.Kind() { + case reflect.Slice: + if target.Type().Elem().Kind() == reflect.Uint8 { + target.SetBytes(data) + return nil + } + case reflect.String: + target.SetString(string(data)) + return nil + default: + // fall through to error + } + return fmt.Errorf("cannot set bin into %s", target.Type()) +} + +// readStructFromEvents reads struct events into a Go struct or map. +func readStructFromEvents(sr *UnitReader, startEv Event, target reflect.Value) error { + if target.Kind() == reflect.Map { + return readStructIntoMapFromEvents(sr, target) + } + + if target.Kind() != reflect.Struct { + return fmt.Errorf("cannot unmarshal struct into %s", target.Type()) + } + + info, err := cachedStructFields(target.Type()) + if err != nil { + return err + } + + for { + ev, err := sr.nextEvent() + if err != nil { + if err == io.EOF { + return &DeserializeError{Pos: startEv.Pos, Message: "unterminated struct"} + } + return err + } + + if ev.Kind == EventStructEnd { + return nil + } + + // ev should be a value event for the next positional field. + // The field name comes from ev.Name (set by the state machine). + fieldName := ev.Name + fi, ok := info.fieldMap[fieldName] + if ok { + fieldTarget := target.Field(fi.Index) + fieldTarget = allocPtr(fieldTarget) + if err := handleValueEvent(sr, ev, fieldTarget); err != nil { + return fmt.Errorf("field %q: %w", fieldName, err) + } + } else { + // Unknown field — skip its value + if err := skipValueEvent(sr, ev); err != nil { + return err + } + } + } +} + +// readStructIntoMapFromEvents reads struct events into a Go map[string]T. +// Struct field names are always strings, so the map key type must be string. +// For general maps with non-string keys, use readMapFromEvents. +func readStructIntoMapFromEvents(sr *UnitReader, target reflect.Value) error { + if target.IsNil() { + target.Set(reflect.MakeMap(target.Type())) + } + valType := target.Type().Elem() + + for { + ev, err := sr.nextEvent() + if err != nil { + return err + } + if ev.Kind == EventStructEnd { + return nil + } + + val := reflect.New(valType).Elem() + if err := handleValueEvent(sr, ev, val); err != nil { + return fmt.Errorf("map key %q: %w", ev.Name, err) + } + target.SetMapIndex(reflect.ValueOf(ev.Name), val) + } +} + +// readTupleFromEvents reads tuple events into a Go slice. +func readTupleFromEvents(sr *UnitReader, startEv Event, target reflect.Value) error { + if target.Kind() != reflect.Slice { + return fmt.Errorf("cannot unmarshal tuple into %s", target.Type()) + } + + elemType := target.Type().Elem() + target.Set(reflect.MakeSlice(target.Type(), 0, 4)) + + for { + ev, err := sr.nextEvent() + if err != nil { + if err == io.EOF { + return &DeserializeError{Pos: startEv.Pos, Message: "unterminated tuple"} + } + return err + } + if ev.Kind == EventTupleEnd { + return nil + } + + target.Grow(1) + target.SetLen(target.Len() + 1) + elem := target.Index(target.Len() - 1) + if elemType.Kind() == reflect.Ptr || elemType.Kind() == reflect.Map || elemType.Kind() == reflect.Slice { + elem.Set(reflect.New(elemType).Elem()) + } + if err := handleValueEvent(sr, ev, elem); err != nil { + return err + } + } +} + +// readListFromEvents reads list events into a Go slice. +func readListFromEvents(sr *UnitReader, startEv Event, target reflect.Value) error { + if target.Kind() != reflect.Slice { + return fmt.Errorf("cannot unmarshal list into %s", target.Type()) + } + + elemType := target.Type().Elem() + target.Set(reflect.MakeSlice(target.Type(), 0, 8)) + + for { + ev, err := sr.nextEvent() + if err != nil { + if err == io.EOF { + return &DeserializeError{Pos: startEv.Pos, Message: "unterminated list"} + } + return err + } + if ev.Kind == EventListEnd { + return nil + } + + target.Grow(1) + target.SetLen(target.Len() + 1) + elem := target.Index(target.Len() - 1) + if elemType.Kind() == reflect.Ptr || elemType.Kind() == reflect.Map || elemType.Kind() == reflect.Slice { + elem.Set(reflect.New(elemType).Elem()) + } + if err := handleValueEvent(sr, ev, elem); err != nil { + return err + } + } +} + +// readMapFromEvents reads map events into a Go map. +func readMapFromEvents(sr *UnitReader, startEv Event, target reflect.Value) error { + if target.Kind() != reflect.Map { + return fmt.Errorf("cannot unmarshal map into %s", target.Type()) + } + + if target.IsNil() { + target.Set(reflect.MakeMap(target.Type())) + } + + keyType := target.Type().Key() + valType := target.Type().Elem() + + // Map events alternate: key (ScalarValue) → value → key → value → MapEnd + for { + // Read key + keyEv, err := sr.nextEvent() + if err != nil { + if err == io.EOF { + return &DeserializeError{Pos: startEv.Pos, Message: "unterminated map"} + } + return err + } + if keyEv.Kind == EventMapEnd { + return nil + } + + key := reflect.New(keyType).Elem() + if err := handleValueEvent(sr, keyEv, key); err != nil { + return fmt.Errorf("map key: %w", err) + } + + // Read value + valEv, err := sr.nextEvent() + if err != nil { + return fmt.Errorf("map value: %w", err) + } + + val := reflect.New(valType).Elem() + if err := handleValueEvent(sr, valEv, val); err != nil { + return fmt.Errorf("map value: %w", err) + } + + target.SetMapIndex(key, val) + } +} + +// handleValueEvent processes a single value event (which may be a scalar +// or the start of a composite), writing the result into target. +func handleValueEvent(sr *UnitReader, ev Event, target reflect.Value) error { + target = allocPtr(target) + + switch ev.Kind { + case EventScalarValue: + return setScalarFromEvent(ev, target) + case EventStructStart: + return readStructFromEvents(sr, ev, target) + case EventTupleStart: + return readTupleFromEvents(sr, ev, target) + case EventListStart: + return readListFromEvents(sr, ev, target) + case EventMapStart: + return readMapFromEvents(sr, ev, target) + default: + return &DeserializeError{ + Pos: ev.Pos, + Message: fmt.Sprintf("unexpected event %s in value position", ev.Kind), + } + } +} + +// skipValueEvent skips a value event and any nested events it contains. +func skipValueEvent(sr *UnitReader, ev Event) error { + switch { + case ev.Kind == EventScalarValue: + return nil // scalar — nothing more to consume + case ev.Kind.IsCompositeStart(): + return skipComposite(sr, ev.Kind) + default: + return nil + } +} + +// skipComposite reads and discards events until the matching end event. +func skipComposite(sr *UnitReader, startKind EventKind) error { + depth := 1 + for depth > 0 { + ev, err := sr.nextEvent() + if err != nil { + return err + } + if ev.Kind.IsCompositeStart() { + depth++ + } else if ev.Kind.IsCompositeEnd() { + depth-- + } + } + return nil +} diff --git a/encoding/read_value_test.go b/encoding/read_value_test.go new file mode 100644 index 0000000..25e105e --- /dev/null +++ b/encoding/read_value_test.go @@ -0,0 +1,557 @@ +package encoding + +import ( + "strings" + "testing" + "time" +) + +func TestReadValueString(t *testing.T) { + sr := NewUnitReader(strings.NewReader("name:str = 'hello'\n")) + defer sr.Close() + + for stmt := range sr.Properties() { + if stmt.Name != "name" { + t.Fatalf("expected 'name', got %q", stmt.Name) + } + val, err := ReadValue[string](sr) + if err != nil { + t.Fatal(err) + } + if val != "hello" { + t.Errorf("expected 'hello', got %q", val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestReadValueInt(t *testing.T) { + sr := NewUnitReader(strings.NewReader("port:int = 8080\n")) + defer sr.Close() + + for stmt := range sr.Properties() { + if stmt.Name != "port" { + t.Fatalf("expected 'port', got %q", stmt.Name) + } + val, err := ReadValue[int64](sr) + if err != nil { + t.Fatal(err) + } + if val != 8080 { + t.Errorf("expected 8080, got %d", val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestReadValueBool(t *testing.T) { + sr := NewUnitReader(strings.NewReader("debug:bool = true\n")) + defer sr.Close() + + for stmt := range sr.Properties() { + _ = stmt + val, err := ReadValue[bool](sr) + if err != nil { + t.Fatal(err) + } + if val != true { + t.Errorf("expected true, got %v", val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestReadValueFloat(t *testing.T) { + sr := NewUnitReader(strings.NewReader("rate:float = 3.14e0\n")) + defer sr.Close() + + for stmt := range sr.Properties() { + _ = stmt + val, err := ReadValue[float64](sr) + if err != nil { + t.Fatal(err) + } + if val != 3.14 { + t.Errorf("expected 3.14, got %f", val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestReadValueStruct(t *testing.T) { + type Server struct { + Host string `pakt:"host"` + Port int64 `pakt:"port"` + } + + sr := NewUnitReader(strings.NewReader( + "server:{host:str, port:int} = {'localhost', 8080}\n")) + defer sr.Close() + + for stmt := range sr.Properties() { + if stmt.Name != "server" { + t.Fatalf("expected 'server', got %q", stmt.Name) + } + val, err := ReadValue[Server](sr) + if err != nil { + t.Fatal(err) + } + if val.Host != "localhost" || val.Port != 8080 { + t.Errorf("expected {localhost, 8080}, got %+v", val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestReadValueList(t *testing.T) { + sr := NewUnitReader(strings.NewReader( + "tags:[str] = ['alpha', 'beta', 'gamma']\n")) + defer sr.Close() + + for stmt := range sr.Properties() { + _ = stmt + val, err := ReadValue[[]string](sr) + if err != nil { + t.Fatal(err) + } + if len(val) != 3 || val[0] != "alpha" || val[1] != "beta" || val[2] != "gamma" { + t.Errorf("expected [alpha, beta, gamma], got %v", val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestReadValueMap(t *testing.T) { + sr := NewUnitReader(strings.NewReader( + "headers: = <'Content-Type' ; 'text/html', 'Accept' ; '*/*'>\n")) + defer sr.Close() + + for stmt := range sr.Properties() { + _ = stmt + val, err := ReadValue[map[string]string](sr) + if err != nil { + t.Fatal(err) + } + if len(val) != 2 { + t.Errorf("expected 2 entries, got %d", len(val)) + } + if val["Content-Type"] != "text/html" { + t.Errorf("expected 'text/html', got %q", val["Content-Type"]) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestReadValueMultipleStatements(t *testing.T) { + input := "name:str = 'svc'\nport:int = 9090\ndebug:bool = false\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + var name string + var port int64 + var debug bool + + for stmt := range sr.Properties() { + var err error + switch stmt.Name { + case "name": + name, err = ReadValue[string](sr) + case "port": + port, err = ReadValue[int64](sr) + case "debug": + debug, err = ReadValue[bool](sr) + } + if err != nil { + t.Fatal(err) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } + + if name != "svc" || port != 9090 || debug != false { + t.Errorf("got name=%q port=%d debug=%v", name, port, debug) + } +} + +func TestReadValueTimestamp(t *testing.T) { + sr := NewUnitReader(strings.NewReader( + "created:ts = 2026-06-01T14:30:00Z\n")) + defer sr.Close() + + for stmt := range sr.Properties() { + _ = stmt + val, err := ReadValue[time.Time](sr) + if err != nil { + t.Fatal(err) + } + if val.Year() != 2026 || val.Month() != 6 || val.Day() != 1 { + t.Errorf("unexpected time: %v", val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestReadValueNullable(t *testing.T) { + sr := NewUnitReader(strings.NewReader( + "label:str? = nil\n")) + defer sr.Close() + + for stmt := range sr.Properties() { + _ = stmt + val, err := ReadValue[*string](sr) + if err != nil { + t.Fatal(err) + } + if val != nil { + t.Errorf("expected nil, got %q", *val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestReadValueInto(t *testing.T) { + sr := NewUnitReader(strings.NewReader("name:str = 'hello'\n")) + defer sr.Close() + + for range sr.Properties() { + var val string + err := ReadValueInto(sr, &val) + if err != nil { + t.Fatal(err) + } + if val != "hello" { + t.Errorf("expected 'hello', got %q", val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestReadValueIntoReuse(t *testing.T) { + input := "a:int = 1\nb:int = 2\nc:int = 3\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + var val int64 + var sum int64 + for range sr.Properties() { + err := ReadValueInto(sr, &val) + if err != nil { + t.Fatal(err) + } + sum += val + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } + if sum != 6 { + t.Errorf("expected sum=6, got %d", sum) + } +} + +func TestReadValueTuple(t *testing.T) { + input := "point:(int, int, int) = (10, 20, 30)\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + for range sr.Properties() { + val, err := ReadValue[[]int64](sr) + if err != nil { + t.Fatal(err) + } + if len(val) != 3 { + t.Fatalf("expected 3 elements, got %d", len(val)) + } + if val[0] != 10 || val[1] != 20 || val[2] != 30 { + t.Errorf("expected [10,20,30], got %v", val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestReadValueStructIntoMap(t *testing.T) { + input := "cfg:{host:str, mode:str} = {'localhost', 'debug'}\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + for range sr.Properties() { + val, err := ReadValue[map[string]string](sr) + if err != nil { + t.Fatal(err) + } + if val["host"] != "localhost" { + t.Errorf("expected host=localhost, got %q", val["host"]) + } + if val["mode"] != "debug" { + t.Errorf("expected mode=debug, got %q", val["mode"]) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestReadValueBin(t *testing.T) { + input := "data:bin = x'48454c4c4f'\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + for range sr.Properties() { + val, err := ReadValue[[]byte](sr) + if err != nil { + t.Fatal(err) + } + if string(val) != "HELLO" { + t.Errorf("expected 'HELLO', got %q", val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestReadValueDec(t *testing.T) { + input := "price:dec = 19.99\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + for range sr.Properties() { + val, err := ReadValue[float64](sr) + if err != nil { + t.Fatal(err) + } + if val != 19.99 { + t.Errorf("expected 19.99, got %f", val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestReadValueDecIntoString(t *testing.T) { + input := "price:dec = 99.999\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + for range sr.Properties() { + val, err := ReadValue[string](sr) + if err != nil { + t.Fatal(err) + } + if val != "99.999" { + t.Errorf("expected '99.999', got %q", val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestReadValueSkipUnknownField(t *testing.T) { + type Small struct { + Name string `pakt:"name"` + } + input := "data:{name:str, extra:int, bonus:{a:str}} = {'hello', 42, {'nested'}}\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + for range sr.Properties() { + val, err := ReadValue[Small](sr) + if err != nil { + t.Fatal(err) + } + if val.Name != "hello" { + t.Errorf("expected 'hello', got %q", val.Name) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestReadValueNestedStruct(t *testing.T) { + type Inner struct { + X int64 `pakt:"x"` + Y int64 `pakt:"y"` + } + type Outer struct { + Name string `pakt:"name"` + Point Inner `pakt:"point"` + } + + sr := NewUnitReader(strings.NewReader( + "data:{name:str, point:{x:int, y:int}} = {'origin', {0, 0}}\n")) + defer sr.Close() + + for stmt := range sr.Properties() { + _ = stmt + val, err := ReadValue[Outer](sr) + if err != nil { + t.Fatal(err) + } + if val.Name != "origin" || val.Point.X != 0 || val.Point.Y != 0 { + t.Errorf("unexpected: %+v", val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestReadValueFloatWithUnderscores(t *testing.T) { + sr := NewUnitReader(strings.NewReader("rate:float = 1_000.5e1\n")) + defer sr.Close() + for range sr.Properties() { + val, err := ReadValue[float64](sr) + if err != nil { + t.Fatal(err) + } + if val != 10005.0 { + t.Errorf("expected 10005.0, got %f", val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestReadValueBinBase64(t *testing.T) { + sr := NewUnitReader(strings.NewReader("data:bin = b'SGVsbG8='\n")) + defer sr.Close() + for range sr.Properties() { + val, err := ReadValue[[]byte](sr) + if err != nil { + t.Fatal(err) + } + if string(val) != "Hello" { + t.Errorf("expected 'Hello', got %q", val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestEventString(t *testing.T) { + ev := Event{ + Kind: EventScalarValue, + Pos: Pos{Line: 1, Col: 5}, + Name: "port", + ScalarType: TypeInt, + Value: []byte("8080"), + } + s := ev.String() + if !strings.Contains(s, "ScalarValue") || !strings.Contains(s, "8080") { + t.Errorf("unexpected Event.String(): %q", s) + } +} + +func TestReadValueTupleIntoSlice(t *testing.T) { + sr := NewUnitReader(strings.NewReader("v:(int, int, int) = (1, 2, 3)\n")) + defer sr.Close() + for range sr.Properties() { + val, err := ReadValue[[]int64](sr) + if err != nil { + t.Fatal(err) + } + if len(val) != 3 || val[0] != 1 || val[1] != 2 || val[2] != 3 { + t.Errorf("expected [1, 2, 3], got %v", val) + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } +} + +func TestReadValueFloatIntoString(t *testing.T) { + sr := NewUnitReader(strings.NewReader("v:float = 1e2\n")) + defer sr.Close() + for range sr.Properties() { + val, err := ReadValue[string](sr) + if err != nil { + t.Fatal(err) + } + if val != "1e2" { + t.Errorf("expected '1e2', got %q", val) + } + } +} + +func TestReadValueBoolIntoString(t *testing.T) { + sr := NewUnitReader(strings.NewReader("v:bool = true\n")) + defer sr.Close() + for range sr.Properties() { + val, err := ReadValue[string](sr) + if err != nil { + t.Fatal(err) + } + if val != "true" { + t.Errorf("expected 'true', got %q", val) + } + } +} + +func TestReadValueIntIntoString(t *testing.T) { + sr := NewUnitReader(strings.NewReader("v:int = 42\n")) + defer sr.Close() + for range sr.Properties() { + val, err := ReadValue[string](sr) + if err != nil { + t.Fatal(err) + } + if val != "42" { + t.Errorf("expected '42', got %q", val) + } + } +} + +func TestReadValueDateIntoString(t *testing.T) { + sr := NewUnitReader(strings.NewReader("v:date = 2026-06-01\n")) + defer sr.Close() + for range sr.Properties() { + val, err := ReadValue[string](sr) + if err != nil { + t.Fatal(err) + } + if val != "2026-06-01" { + t.Errorf("expected '2026-06-01', got %q", val) + } + } +} + +func TestReadValueBinIntoString(t *testing.T) { + sr := NewUnitReader(strings.NewReader("v:bin = x'48656C6C6F'\n")) + defer sr.Close() + for range sr.Properties() { + val, err := ReadValue[string](sr) + if err != nil { + t.Fatal(err) + } + if val != "Hello" { + t.Errorf("expected 'Hello', got %q", val) + } + } +} diff --git a/encoding/reader.go b/encoding/reader.go index 4418bc4..a91aba4 100644 --- a/encoding/reader.go +++ b/encoding/reader.go @@ -2,8 +2,6 @@ package encoding import ( "bufio" - "encoding/base64" - "encoding/hex" "fmt" "io" "slices" @@ -27,7 +25,41 @@ type reader struct { pos Pos lastPos Pos hitNUL bool // true after consuming a NUL byte (end-of-unit per spec §10.1) - sb strings.Builder // reusable builder to avoid per-read allocations + sb strings.Builder // reusable builder for identifiers + valBuf []byte // reusable buffer for scalar values (borrow semantics) +} + +// byteAppender is the interface for writing bytes during scalar parsing. +// Both strings.Builder (for idents) and the valBuf adapter (for scalar +// values) satisfy this interface. +type byteAppender interface { + WriteByte(c byte) error + WriteRune(r rune) (int, error) +} + +// valBufAdapter adapts *reader's valBuf as a byteAppender. +type valBufAdapter struct { + r *reader +} + +func (a valBufAdapter) WriteByte(c byte) error { + a.r.valBuf = append(a.r.valBuf, c) + return nil +} + +func (a valBufAdapter) WriteRune(ch rune) (int, error) { + if ch < utf8.RuneSelf { + a.r.valBuf = append(a.r.valBuf, byte(ch)) //nolint:gosec // ch < utf8.RuneSelf (128), fits in byte + return 1, nil + } + var buf [4]byte + n := utf8.EncodeRune(buf[:], ch) + a.r.valBuf = append(a.r.valBuf, buf[:n]...) + return n, nil +} + +func (r *reader) valBufAppender() valBufAdapter { + return valBufAdapter{r: r} } func newReader(r io.Reader) *reader { @@ -43,15 +75,6 @@ func newReader(r io.Reader) *reader { return rd } -func newReaderFromBytes(data []byte) *reader { - rd := &reader{ - src: newBytesSource(data), - pos: Pos{Line: 1, Col: 1}, - } - rd.skipBOM() - return rd -} - // release returns the pooled bufio.Reader. func (r *reader) release() { if r.bufSrc != nil { @@ -62,6 +85,17 @@ func (r *reader) release() { r.src = nil } +// resetValBuf resets the value buffer for reuse. +func (r *reader) resetValBuf() { + r.valBuf = r.valBuf[:0] +} + +// valBufBytes returns the current value buffer content. +// The returned slice is valid until the next resetValBuf call. +func (r *reader) valBufBytes() []byte { + return r.valBuf +} + // --------------------------------------------------------------------------- // Byte-level operations // --------------------------------------------------------------------------- @@ -262,7 +296,8 @@ func (r *reader) readIdent() (string, error) { break } } - return r.sb.String(), nil + return r.sb.String(), nil //nolint:nilerr // EOF on peek means ident ended at EOF + } // --------------------------------------------------------------------------- @@ -374,7 +409,7 @@ func (r *reader) readUnicodeEscape(n int) (rune, error) { return 0, r.errorf("invalid hex digit in %s escape: found %q", prefix, prefix+digits.String()) } digits.WriteByte(b) - val = val*16 + rune(d) + val = val*16 + rune(d) //nolint:gosec // d is 0-15 from hexVal } if val == 0 { return 0, r.errorf("null byte (U+0000) not permitted in strings") @@ -469,60 +504,6 @@ func (r *reader) consumeMultiLineString(quote byte, raw bool, out *strings.Build } } -// readBin reads a binary literal and returns its canonical lower-case hex value. -func (r *reader) readBin() (string, error) { - prefix, err := r.readByte() - if err != nil { - return "", r.wrapf(ErrUnexpectedEOF, "expected binary literal, got EOF") - } - if prefix != 'x' && prefix != 'b' { - r.unreadByte() - return "", r.errorf("expected binary literal, got %q", rune(prefix)) - } - if err := r.expectByte('\''); err != nil { - return "", err - } - - r.sb.Reset() - for { - b, err := r.readByte() - if err != nil { - return "", r.wrapf(ErrUnexpectedEOF, "unterminated binary literal") - } - if b == '\'' { - break - } - if b == '\n' { - return "", r.errorf("newline in binary literal") - } - if b == 0 { - return "", r.errorf("null byte in binary literal") - } - r.sb.WriteByte(b) - } - - lit := r.sb.String() - switch prefix { - case 'x': - if len(lit)%2 != 0 { - return "", r.errorf("hex binary literal must contain an even number of digits") - } - data, err := hex.DecodeString(lit) - if err != nil { - return "", r.errorf("invalid hex binary literal") - } - return hex.EncodeToString(data), nil - case 'b': - data, err := base64.StdEncoding.Strict().DecodeString(lit) - if err != nil { - return "", r.errorf("invalid base64 binary literal") - } - return hex.EncodeToString(data), nil - default: - return "", r.errorf("unknown binary literal prefix %q", rune(prefix)) - } -} - // readRawLine reads bytes until a newline (or EOF) without escape processing. // If bytes were read before EOF, the partial line is returned without error. func (r *reader) readRawLine() (string, error) { @@ -631,7 +612,7 @@ func parseHexDigits(s string) (rune, bool) { if d < 0 { return 0, false } - val = val*16 + rune(d) + val = val*16 + rune(d) //nolint:gosec // d is 0-15 from hexVal } return val, true } @@ -641,7 +622,7 @@ func parseHexDigits(s string) (rune, bool) { // --------------------------------------------------------------------------- // readDigitSep reads DIGIT_SEP = DIGIT (DIGIT | '_')*. -func (r *reader) readDigitSep(sb *strings.Builder) error { +func (r *reader) readDigitSep(sb byteAppender) error { b, err := r.readByte() if err != nil { return r.wrapf(ErrUnexpectedEOF, "expected digit, got EOF") @@ -650,24 +631,25 @@ func (r *reader) readDigitSep(sb *strings.Builder) error { r.unreadByte() return r.errorf("expected digit, got %q", rune(b)) } - sb.WriteByte(b) + sb.WriteByte(b) //nolint:errcheck for { b, err = r.peekByte() if err != nil { break } if isDigit(b) || b == '_' { - r.readByte() //nolint:errcheck - sb.WriteByte(b) + r.readByte() //nolint:errcheck + sb.WriteByte(b) //nolint:errcheck } else { break } } - return nil + return nil //nolint:nilerr // EOF on peek means digits ended at EOF + } // readExactDigits reads exactly n decimal digits. -func (r *reader) readExactDigits(sb *strings.Builder, n int) error { +func (r *reader) readExactDigits(sb byteAppender, n int) error { for range n { b, err := r.readByte() if err != nil { @@ -677,13 +659,13 @@ func (r *reader) readExactDigits(sb *strings.Builder, n int) error { r.unreadByte() return r.errorf("expected digit, got %q", rune(b)) } - sb.WriteByte(b) + sb.WriteByte(b) //nolint:errcheck } return nil } // readExactHex reads exactly n hex digits. -func (r *reader) readExactHex(sb *strings.Builder, n int) error { +func (r *reader) readExactHex(sb byteAppender, n int) error { for range n { b, err := r.readByte() if err != nil { @@ -693,14 +675,14 @@ func (r *reader) readExactHex(sb *strings.Builder, n int) error { r.unreadByte() return r.errorf("expected hex digit, got %q", rune(b)) } - sb.WriteByte(b) + sb.WriteByte(b) //nolint:errcheck } return nil } // readPrefixedDigits reads digits for 0x/0b/0o literals. // check validates whether a byte is a valid digit for the given base. -func (r *reader) readPrefixedDigits(sb *strings.Builder, check func(byte) bool) error { +func (r *reader) readPrefixedDigits(sb byteAppender, check func(byte) bool) error { b, err := r.readByte() if err != nil { return r.wrapf(ErrUnexpectedEOF, "expected digit after base prefix, got EOF") @@ -709,205 +691,21 @@ func (r *reader) readPrefixedDigits(sb *strings.Builder, check func(byte) bool) r.unreadByte() return r.errorf("expected digit after base prefix, got %q", rune(b)) } - sb.WriteByte(b) + sb.WriteByte(b) //nolint:errcheck for { b, err = r.peekByte() if err != nil { break } if check(b) || b == '_' { - r.readByte() //nolint:errcheck - sb.WriteByte(b) + r.readByte() //nolint:errcheck + sb.WriteByte(b) //nolint:errcheck } else { break } } - return nil -} - -// --------------------------------------------------------------------------- -// Integer reading -// --------------------------------------------------------------------------- - -// readInt reads INT = ['-'] DIGIT_SEP | ['-'] '0x' HEX_SEP | etc. -func (r *reader) readInt() (string, error) { - r.sb.Reset() - - // Optional negative sign. - if b, err := r.peekByte(); err == nil && b == '-' { - r.readByte() //nolint:errcheck - r.sb.WriteByte('-') - } - - // Peek at first digit. - first, err := r.peekByte() - if err != nil { - return "", r.wrapf(ErrUnexpectedEOF, "expected digit in integer, got EOF") - } - if !isDigit(first) { - return "", r.errorf("expected digit in integer, got %q", rune(first)) - } - - if first == '0' { - r.readByte() //nolint:errcheck - r.sb.WriteByte('0') - // Check for base prefix. - if b, err := r.peekByte(); err == nil { - switch b { - case 'x': - r.readByte() //nolint:errcheck - r.sb.WriteByte('x') - if err := r.readPrefixedDigits(&r.sb, isHex); err != nil { - return "", err - } - return r.sb.String(), nil - case 'b': - r.readByte() //nolint:errcheck - r.sb.WriteByte('b') - if err := r.readPrefixedDigits(&r.sb, isBin); err != nil { - return "", err - } - return r.sb.String(), nil - case 'o': - r.readByte() //nolint:errcheck - r.sb.WriteByte('o') - if err := r.readPrefixedDigits(&r.sb, isOct); err != nil { - return "", err - } - return r.sb.String(), nil - } - } - // Plain decimal that starts with 0. Continue reading digits. - for { - b, err := r.peekByte() - if err != nil { - break - } - if isDigit(b) || b == '_' { - r.readByte() //nolint:errcheck - r.sb.WriteByte(b) - } else { - break - } - } - return r.sb.String(), nil - } - - // Regular decimal DIGIT_SEP. - if err := r.readDigitSep(&r.sb); err != nil { - return "", err - } - return r.sb.String(), nil -} - -// --------------------------------------------------------------------------- -// Decimal reading -// --------------------------------------------------------------------------- - -// readDec reads DEC = ['-'] DIGIT_SEP? '.' DIGIT_SEP. -func (r *reader) readDec() (string, error) { - r.sb.Reset() - - if b, err := r.peekByte(); err == nil && b == '-' { - r.readByte() //nolint:errcheck - r.sb.WriteByte('-') - } - // Leading digits are optional: .5 is valid - if b, err := r.peekByte(); err == nil && b != '.' { - if err := r.readDigitSep(&r.sb); err != nil { - return "", err - } - } - if err := r.expectByte('.'); err != nil { - return "", err - } - r.sb.WriteByte('.') - if err := r.readDigitSep(&r.sb); err != nil { - return "", err - } - return r.sb.String(), nil -} - -// --------------------------------------------------------------------------- -// Float reading -// --------------------------------------------------------------------------- - -// readFloat reads FLOAT = ['-'] DIGIT_SEP? ('.' DIGIT_SEP)? ('e'|'E') [+-]? DIGIT+. -func (r *reader) readFloat() (string, error) { - r.sb.Reset() - - if b, err := r.peekByte(); err == nil && b == '-' { - r.readByte() //nolint:errcheck - r.sb.WriteByte('-') - } - // Leading digits are optional when followed by '.' or exponent. - if b, err := r.peekByte(); err == nil && b != '.' && b != 'e' && b != 'E' { - if err := r.readDigitSep(&r.sb); err != nil { - return "", err - } - } - - // Optional '.' DIGIT_SEP. - if b, err := r.peekByte(); err == nil && b == '.' { - r.readByte() //nolint:errcheck - r.sb.WriteByte('.') - if err := r.readDigitSep(&r.sb); err != nil { - return "", err - } - } - - // Mandatory exponent. - b, err := r.peekByte() - if err != nil { - return "", r.wrapf(ErrUnexpectedEOF, "expected exponent ('e' or 'E') in float, got EOF") - } - if b != 'e' && b != 'E' { - return "", r.errorf("expected exponent ('e' or 'E') in float, got %q", rune(b)) - } - r.readByte() //nolint:errcheck - r.sb.WriteByte(b) - - // Optional sign. - if b, err := r.peekByte(); err == nil && (b == '+' || b == '-') { - r.readByte() //nolint:errcheck - r.sb.WriteByte(b) - } - - // DIGIT+ (no underscores in exponent per spec). - count := 0 - for { - b, err := r.peekByte() - if err != nil || !isDigit(b) { - break - } - r.readByte() //nolint:errcheck - r.sb.WriteByte(b) - count++ - } - if count == 0 { - if b, err := r.peekByte(); err != nil { - return "", r.wrapf(ErrUnexpectedEOF, "expected digits in float exponent, got EOF") - } else { - return "", r.errorf("expected digits in float exponent, got %q", rune(b)) - } - } - return r.sb.String(), nil -} + return nil //nolint:nilerr // EOF on peek means digits ended at EOF -// --------------------------------------------------------------------------- -// Keyword reading -// --------------------------------------------------------------------------- - -// readBool reads "true" or "false". -func (r *reader) readBool() (string, error) { - id, err := r.readIdent() - if err != nil { - return "", err - } - if id != "true" && id != "false" { - return "", r.errorf("expected 'true' or 'false', got %q", id) - } - return id, nil } // readNil reads the keyword "nil". @@ -922,147 +720,6 @@ func (r *reader) readNil() error { return nil } -// --------------------------------------------------------------------------- -// Temporal reading -// --------------------------------------------------------------------------- - -// readDate reads DATE = DIGIT{4}-DIGIT{2}-DIGIT{2}. -func (r *reader) readDate() (string, error) { - r.sb.Reset() - if err := r.readExactDigits(&r.sb, 4); err != nil { - return "", err - } - if err := r.expectByte('-'); err != nil { - return "", err - } - r.sb.WriteByte('-') - if err := r.readExactDigits(&r.sb, 2); err != nil { - return "", err - } - if err := r.expectByte('-'); err != nil { - return "", err - } - r.sb.WriteByte('-') - if err := r.readExactDigits(&r.sb, 2); err != nil { - return "", err - } - return r.sb.String(), nil -} - -// readTimePart reads the time portion: DIGIT{2}:DIGIT{2}:DIGIT{2}(.DIGIT+)? TZ. -func (r *reader) readTimePart() (string, error) { - r.sb.Reset() - if err := r.readExactDigits(&r.sb, 2); err != nil { - return "", err - } - if err := r.expectByte(':'); err != nil { - return "", err - } - r.sb.WriteByte(':') - if err := r.readExactDigits(&r.sb, 2); err != nil { - return "", err - } - if err := r.expectByte(':'); err != nil { - return "", err - } - r.sb.WriteByte(':') - if err := r.readExactDigits(&r.sb, 2); err != nil { - return "", err - } - - // Optional fractional seconds. - if b, err := r.peekByte(); err == nil && b == '.' { - r.readByte() //nolint:errcheck - r.sb.WriteByte('.') - count := 0 - for { - b, err := r.peekByte() - if err != nil || !isDigit(b) { - break - } - r.readByte() //nolint:errcheck - r.sb.WriteByte(b) - count++ - } - if count == 0 { - if b, err := r.peekByte(); err != nil { - return "", r.wrapf(ErrUnexpectedEOF, "expected digits after '.' in time, got EOF") - } else { - return "", r.errorf("expected digits after '.' in time, got %q", rune(b)) - } - } - } - - // Timezone. - b, err := r.peekByte() - if err != nil { - return "", r.wrapf(ErrUnexpectedEOF, "expected timezone in time, got EOF") - } - switch b { - case 'Z': - r.readByte() //nolint:errcheck - r.sb.WriteByte('Z') - case '+', '-': - r.readByte() //nolint:errcheck - r.sb.WriteByte(b) - if err := r.readExactDigits(&r.sb, 2); err != nil { - return "", err - } - if err := r.expectByte(':'); err != nil { - return "", err - } - r.sb.WriteByte(':') - if err := r.readExactDigits(&r.sb, 2); err != nil { - return "", err - } - default: - return "", r.errorf("expected timezone (Z or ±HH:MM) in time, got %q", rune(b)) - } - return r.sb.String(), nil -} - -// readTs reads TS = DATE 'T' TIME. -func (r *reader) readTs() (string, error) { - date, err := r.readDate() - if err != nil { - return "", err - } - if err := r.expectByte('T'); err != nil { - return "", err - } - t, err := r.readTimePart() - if err != nil { - return "", err - } - r.sb.Reset() - r.sb.WriteString(date) - r.sb.WriteByte('T') - r.sb.WriteString(t) - return r.sb.String(), nil -} - -// --------------------------------------------------------------------------- -// UUID reading -// --------------------------------------------------------------------------- - -// readUUID reads UUID = HEX{8}-HEX{4}-HEX{4}-HEX{4}-HEX{12}. -func (r *reader) readUUID() (string, error) { - r.sb.Reset() - segments := [5]int{8, 4, 4, 4, 12} - for i, n := range segments { - if i > 0 { - if err := r.expectByte('-'); err != nil { - return "", err - } - r.sb.WriteByte('-') - } - if err := r.readExactHex(&r.sb, n); err != nil { - return "", err - } - } - return r.sb.String(), nil -} - // --------------------------------------------------------------------------- // Atom reading // --------------------------------------------------------------------------- diff --git a/encoding/reader_reflect.go b/encoding/reader_reflect.go deleted file mode 100644 index 57a35d8..0000000 --- a/encoding/reader_reflect.go +++ /dev/null @@ -1,249 +0,0 @@ -package encoding - -import ( - "encoding/hex" - "fmt" - "math" - "reflect" - "strconv" - "time" -) - -// readIntInto parses a PAKT integer literal directly into target without -// allocating an intermediate string. Falls back to string path for hex/bin/oct -// or underscore-containing literals. -func (r *reader) readIntInto(target reflect.Value) error { - // Peek ahead to decide: fast decimal path or fallback. - // We need to check for negative sign and base prefixes without consuming. - p, _ := r.src.Peek(3) - offset := 0 - if len(p) > 0 && p[0] == '-' { - offset = 1 - } - // If it starts with 0 followed by a base prefix, use fallback. - if offset < len(p) && p[offset] == '0' && offset+1 < len(p) { - next := p[offset+1] - if next == 'x' || next == 'X' || next == 'b' || next == 'B' || next == 'o' || next == 'O' { - val, err := r.readInt() - if err != nil { - return err - } - return setInt(target, val) - } - } - - // Fast path: decimal integer, accumulate value directly. - neg := false - if b, err := r.peekByte(); err == nil && b == '-' { - r.readByte() //nolint:errcheck - neg = true - } - - first, err := r.peekByte() - if err != nil { - return r.wrapf(ErrUnexpectedEOF, "expected digit in integer, got EOF") - } - if !isDigit(first) { - return r.errorf("expected digit in integer, got %q", rune(first)) - } - - var val uint64 - for { - b, err := r.peekByte() - if err != nil { - break - } - if isDigit(b) { - r.readByte() //nolint:errcheck - val = val*10 + uint64(b-'0') - } else if b == '_' { - r.readByte() //nolint:errcheck - // skip underscores - } else { - break - } - } - - if neg { - if val > math.MaxInt64+1 { - return r.errorf("integer literal overflows int64") - } - return setIntDirect(target, -int64(val)) - } - if val > math.MaxInt64 { - return r.errorf("integer literal overflows int64") - } - return setIntDirect(target, int64(val)) -} - -// setIntDirect sets a reflect.Value from an already-parsed int64. -func setIntDirect(target reflect.Value, n int64) error { - target = allocPtr(target) - switch target.Kind() { - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - if target.OverflowInt(n) { - return fmt.Errorf("value %d overflows %s", n, target.Type()) - } - target.SetInt(n) - return nil - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: - if n < 0 { - return fmt.Errorf("cannot set negative value %d into %s", n, target.Type()) - } - u := uint64(n) - if target.OverflowUint(u) { - return fmt.Errorf("value %d overflows %s", n, target.Type()) - } - target.SetUint(u) - return nil - case reflect.Float32, reflect.Float64: - target.SetFloat(float64(n)) - return nil - case reflect.String: - target.SetString(strconv.FormatInt(n, 10)) - return nil - default: - return fmt.Errorf("cannot set int into %s", target.Type()) - } -} - -// readBoolInto parses a PAKT bool directly into target. -func (r *reader) readBoolInto(target reflect.Value) error { - id, err := r.readIdent() - if err != nil { - return err - } - if id != "true" && id != "false" { - return r.errorf("expected 'true' or 'false', got %q", id) - } - target = allocPtr(target) - if target.Kind() != reflect.Bool { - return fmt.Errorf("cannot set bool into %s", target.Type()) - } - target.SetBool(id == "true") - return nil -} - -// readFloatInto parses a PAKT float literal directly into target. -func (r *reader) readFloatInto(target reflect.Value) error { - val, err := r.readFloat() - if err != nil { - return err - } - target = allocPtr(target) - f, ferr := strconv.ParseFloat(val, 64) - if ferr != nil { - return fmt.Errorf("invalid float literal %q: %w", val, ferr) - } - switch target.Kind() { - case reflect.Float32, reflect.Float64: - target.SetFloat(f) - return nil - case reflect.String: - target.SetString(val) - return nil - default: - return fmt.Errorf("cannot set float into %s", target.Type()) - } -} - -// readDecInto parses a PAKT decimal literal directly into target. -func (r *reader) readDecInto(target reflect.Value) error { - val, err := r.readDec() - if err != nil { - return err - } - return setDec(target, val) -} - -// readStringInto reads a PAKT string directly into target. -func (r *reader) readStringInto(target reflect.Value) error { - val, err := r.readString() - if err != nil { - return err - } - return setString(allocPtr(target), val) -} - -// readTsInto reads a PAKT timestamp directly into target. -func (r *reader) readTsInto(target reflect.Value) error { - val, err := r.readTs() - if err != nil { - return err - } - return setTemporalString(allocPtr(target), val, allocPtr(target).Kind()) -} - -// readDateInto reads a PAKT date directly into target. -func (r *reader) readDateInto(target reflect.Value) error { - val, err := r.readDate() - if err != nil { - return err - } - return setTemporalString(allocPtr(target), val, allocPtr(target).Kind()) -} - -// readUUIDInto reads a PAKT UUID directly into target. -func (r *reader) readUUIDInto(target reflect.Value) error { - val, err := r.readUUID() - if err != nil { - return err - } - return setString(allocPtr(target), val) -} - -// readBinInto reads a PAKT bin literal directly into target. -func (r *reader) readBinInto(target reflect.Value) error { - val, err := r.readBin() - if err != nil { - return err - } - return setBin(allocPtr(target), val) -} - -// readScalarInto dispatches to the appropriate read*Into method. -func (r *reader) readScalarInto(kind TypeKind, target reflect.Value) error { - switch kind { - case TypeStr: - return r.readStringInto(target) - case TypeInt: - return r.readIntInto(target) - case TypeDec: - return r.readDecInto(target) - case TypeFloat: - return r.readFloatInto(target) - case TypeBool: - return r.readBoolInto(target) - case TypeUUID: - return r.readUUIDInto(target) - case TypeDate: - return r.readDateInto(target) - case TypeTs: - return r.readTsInto(target) - case TypeBin: - return r.readBinInto(target) - default: - return r.errorf("unknown scalar type kind %d", int(kind)) - } -} - -// readNilInto sets target to its zero value. -func (r *reader) readNilInto(target reflect.Value) error { - if err := r.readNil(); err != nil { - return err - } - return setNil(target) -} - -// readAtomInto reads an atom value directly into target. -func (r *reader) readAtomInto(allowed []string, target reflect.Value) error { - val, err := r.readAtom(allowed) - if err != nil { - return err - } - return setString(allocPtr(target), val) -} - -// Ensure time-related imports are available. -var _ = time.RFC3339 -var _ = hex.DecodeString diff --git a/encoding/reader_scalar_buf.go b/encoding/reader_scalar_buf.go new file mode 100644 index 0000000..4242126 --- /dev/null +++ b/encoding/reader_scalar_buf.go @@ -0,0 +1,349 @@ +package encoding + +import ( + "encoding/base64" + "encoding/hex" +) + +// readIntTo reads an integer literal into w (zero-copy variant of readInt). +func (r *reader) readIntTo(w byteAppender) error { + if b, err := r.peekByte(); err == nil && b == '-' { + r.readByte() //nolint:errcheck + w.WriteByte('-') //nolint:errcheck + } + + first, err := r.peekByte() + if err != nil { + return r.wrapf(ErrUnexpectedEOF, "expected digit in integer, got EOF") + } + if !isDigit(first) { + return r.errorf("expected digit in integer, got %q", rune(first)) + } + + if first == '0' { + r.readByte() //nolint:errcheck + w.WriteByte('0') //nolint:errcheck + if b, err := r.peekByte(); err == nil { + switch b { + case 'x': + r.readByte() //nolint:errcheck + w.WriteByte('x') //nolint:errcheck + return r.readPrefixedDigits(w, isHex) + case 'b': + r.readByte() //nolint:errcheck + w.WriteByte('b') //nolint:errcheck + return r.readPrefixedDigits(w, isBin) + case 'o': + r.readByte() //nolint:errcheck + w.WriteByte('o') //nolint:errcheck + return r.readPrefixedDigits(w, isOct) + } + } + for { + b, err := r.peekByte() + if err != nil { + break + } + if isDigit(b) || b == '_' { + r.readByte() //nolint:errcheck + w.WriteByte(b) //nolint:errcheck + } else { + break + } + } + return nil //nolint:nilerr // EOF on peek means int ended at EOF + + } + + return r.readDigitSep(w) +} + +// readDecTo reads a decimal literal into w. +func (r *reader) readDecTo(w byteAppender) error { + if b, err := r.peekByte(); err == nil && b == '-' { + r.readByte() //nolint:errcheck + w.WriteByte('-') //nolint:errcheck + } + if b, err := r.peekByte(); err == nil && b != '.' { + if err := r.readDigitSep(w); err != nil { + return err + } + } + if err := r.expectByte('.'); err != nil { + return err + } + w.WriteByte('.') //nolint:errcheck + return r.readDigitSep(w) +} + +// readFloatTo reads a float literal into w. +func (r *reader) readFloatTo(w byteAppender) error { + if b, err := r.peekByte(); err == nil && b == '-' { + r.readByte() //nolint:errcheck + w.WriteByte('-') //nolint:errcheck + } + if b, err := r.peekByte(); err == nil && b != '.' && b != 'e' && b != 'E' { + if err := r.readDigitSep(w); err != nil { + return err + } + } + + if b, err := r.peekByte(); err == nil && b == '.' { + r.readByte() //nolint:errcheck + w.WriteByte('.') //nolint:errcheck + if err := r.readDigitSep(w); err != nil { + return err + } + } + + b, err := r.peekByte() + if err != nil { + return r.wrapf(ErrUnexpectedEOF, "expected exponent ('e' or 'E') in float, got EOF") + } + if b != 'e' && b != 'E' { + return r.errorf("expected exponent ('e' or 'E') in float, got %q", rune(b)) + } + r.readByte() //nolint:errcheck + w.WriteByte(b) //nolint:errcheck + + if b, err := r.peekByte(); err == nil && (b == '+' || b == '-') { + r.readByte() //nolint:errcheck + w.WriteByte(b) //nolint:errcheck + } + + b, err = r.readByte() + if err != nil { + return r.wrapf(ErrUnexpectedEOF, "expected digit in float exponent, got EOF") + } + if !isDigit(b) { + r.unreadByte() + return r.errorf("expected digit in float exponent, got %q", rune(b)) + } + w.WriteByte(b) //nolint:errcheck + for { + b, err = r.peekByte() + if err != nil { + break + } + if isDigit(b) { + r.readByte() //nolint:errcheck + w.WriteByte(b) //nolint:errcheck + } else { + break + } + } + return nil //nolint:nilerr // EOF on peek means float exponent ended at EOF + +} + +// readBoolTo reads a boolean keyword into w. +func (r *reader) readBoolTo(w byteAppender) error { + id, err := r.readIdent() + if err != nil { + return err + } + if id != "true" && id != "false" { + return r.errorf("expected 'true' or 'false', got %q", id) + } + for i := range len(id) { + w.WriteByte(id[i]) //nolint:errcheck + } + return nil +} + +// readDateTo reads DATE = DIGIT{4}-DIGIT{2}-DIGIT{2} into w. +func (r *reader) readDateTo(w byteAppender) error { + if err := r.readExactDigits(w, 4); err != nil { + return err + } + if err := r.expectByte('-'); err != nil { + return err + } + w.WriteByte('-') //nolint:errcheck + if err := r.readExactDigits(w, 2); err != nil { + return err + } + if err := r.expectByte('-'); err != nil { + return err + } + w.WriteByte('-') //nolint:errcheck + return r.readExactDigits(w, 2) +} + +// readTsTo reads a timestamp into w. +func (r *reader) readTsTo(w byteAppender) error { + if err := r.readDateTo(w); err != nil { + return err + } + if err := r.expectByte('T'); err != nil { + return err + } + w.WriteByte('T') //nolint:errcheck + if err := r.readExactDigits(w, 2); err != nil { + return err + } + if err := r.expectByte(':'); err != nil { + return err + } + w.WriteByte(':') //nolint:errcheck + if err := r.readExactDigits(w, 2); err != nil { + return err + } + if err := r.expectByte(':'); err != nil { + return err + } + w.WriteByte(':') //nolint:errcheck + if err := r.readExactDigits(w, 2); err != nil { + return err + } + // Optional fractional seconds. + if b, err := r.peekByte(); err == nil && b == '.' { + r.readByte() //nolint:errcheck + w.WriteByte('.') //nolint:errcheck + for { + b, err := r.peekByte() + if err != nil || !isDigit(b) { + break + } + r.readByte() //nolint:errcheck + w.WriteByte(b) //nolint:errcheck + } + } + // Timezone. + b, err := r.peekByte() + if err != nil { + return r.wrapf(ErrUnexpectedEOF, "expected timezone in timestamp") + } + if b == 'Z' { + r.readByte() //nolint:errcheck + w.WriteByte('Z') //nolint:errcheck + return nil + } + if b == '+' || b == '-' { + r.readByte() //nolint:errcheck + w.WriteByte(b) //nolint:errcheck + if err := r.readExactDigits(w, 2); err != nil { + return err + } + if err := r.expectByte(':'); err != nil { + return err + } + w.WriteByte(':') //nolint:errcheck + return r.readExactDigits(w, 2) + } + return r.errorf("expected timezone ('Z' or '+'/'-'), got %q", rune(b)) +} + +// readUUIDTo reads UUID into w. +func (r *reader) readUUIDTo(w byteAppender) error { + if err := r.readExactHex(w, 8); err != nil { + return err + } + if err := r.expectByte('-'); err != nil { + return err + } + w.WriteByte('-') //nolint:errcheck + if err := r.readExactHex(w, 4); err != nil { + return err + } + if err := r.expectByte('-'); err != nil { + return err + } + w.WriteByte('-') //nolint:errcheck + if err := r.readExactHex(w, 4); err != nil { + return err + } + if err := r.expectByte('-'); err != nil { + return err + } + w.WriteByte('-') //nolint:errcheck + if err := r.readExactHex(w, 4); err != nil { + return err + } + if err := r.expectByte('-'); err != nil { + return err + } + w.WriteByte('-') //nolint:errcheck + return r.readExactHex(w, 12) +} + +// readStringTo reads a quoted string value into w. +// Strings require escape processing so this delegates to readString +// and copies the result. Future optimization: scan the peek buffer +// and avoid the intermediate string for escape-free strings. +func (r *reader) readStringTo(w byteAppender) error { + val, err := r.readString() + if err != nil { + return err + } + for i := range len(val) { + w.WriteByte(val[i]) //nolint:errcheck + } + return nil +} + +// readBinTo reads a binary literal directly into w. +// No escape processing needed — bin literals contain only hex/base64 chars. +func (r *reader) readBinTo(w byteAppender) error { + prefix, err := r.readByte() + if err != nil { + return r.wrapf(ErrUnexpectedEOF, "expected binary literal, got EOF") + } + if prefix != 'x' && prefix != 'b' { + r.unreadByte() + return r.errorf("expected binary literal, got %q", rune(prefix)) + } + if err := r.expectByte('\''); err != nil { + return err + } + + // Scan the raw content between quotes into a temporary slice. + // We need the raw content to validate hex/base64 before writing + // the normalized hex output to w. + r.sb.Reset() + for { + ch, err := r.readByte() + if err != nil { + return r.wrapf(ErrUnexpectedEOF, "unterminated binary literal") + } + if ch == '\'' { + break + } + if ch == '\n' { + return r.errorf("newline in binary literal") + } + if ch == 0 { + return r.errorf("null byte in binary literal") + } + r.sb.WriteByte(ch) + } + + lit := r.sb.String() + switch prefix { + case 'x': + if len(lit)%2 != 0 { + return r.errorf("hex binary literal must contain an even number of digits") + } + data, derr := hex.DecodeString(lit) + if derr != nil { + return r.errorf("invalid hex binary literal") + } + encoded := hex.EncodeToString(data) + for i := range len(encoded) { + w.WriteByte(encoded[i]) //nolint:errcheck + } + return nil + case 'b': + data, derr := base64.StdEncoding.Strict().DecodeString(lit) + if derr != nil { + return r.errorf("invalid base64 binary literal") + } + encoded := hex.EncodeToString(data) + for i := range len(encoded) { + w.WriteByte(encoded[i]) //nolint:errcheck + } + return nil + default: + return r.errorf("unknown binary literal prefix %q", rune(prefix)) + } +} diff --git a/encoding/reader_state.go b/encoding/reader_state.go index ac7c73c..d7cab2f 100644 --- a/encoding/reader_state.go +++ b/encoding/reader_state.go @@ -85,11 +85,12 @@ type statementHeader struct { } type stateMachine struct { - r *reader - stack []frame - state parserState - valType Type - valName string + r *reader + stack []frame + state parserState + valType Type + valName string + stmtType Type // full type annotation of the current top-level statement } func newStateMachine(r *reader) *stateMachine { @@ -99,6 +100,7 @@ func newStateMachine(r *reader) *stateMachine { sm.state = stateTop sm.valType = Type{} sm.valName = "" + sm.stmtType = Type{} return sm } @@ -224,6 +226,7 @@ func (sm *stateMachine) beginPack(h statementHeader) { } func (sm *stateMachine) beginStatement(h statementHeader) { + sm.stmtType = h.typ if h.pack { sm.beginPack(h) return @@ -231,30 +234,6 @@ func (sm *stateMachine) beginStatement(h statementHeader) { sm.beginAssignment(h) } -func (sm *stateMachine) primeNextMatchedStatement(spec *Spec) (string, error) { - for { - h, err := sm.readStatementHeader() - if err != nil { - return "", err - } - - specType, ok := spec.Fields[h.name] - if !ok { - if err := sm.r.skipStatementBody(h); err != nil { - return "", err - } - continue - } - - if specType.String() != h.typ.String() { - return "", Wrapf(h.pos, ErrTypeMismatch, "spec field %q expected type %s, got %s", h.name, specType.String(), h.typ.String()) - } - - sm.beginStatement(h) - return h.name, nil - } -} - // packTerminated checks whether the pack has ended (EOF, NUL, or next // top-level statement). With the '|' prefix on atom values and reserved // keywords for booleans/nil, a bare identifier always starts a new statement. @@ -364,7 +343,7 @@ func (sm *stateMachine) beginMapKeyValue(keyType Type, after parserState) (Event Pos: pos, Name: fr.keyStr, ScalarType: scalarTypeKind(keyType), - Value: fr.keyStr, + Value: []byte(fr.keyStr), }, true, nil case !keyType.Nullable && sm.r.peekNil(): @@ -375,12 +354,12 @@ func (sm *stateMachine) beginMapKeyValue(keyType Type, after parserState) (Event if err != nil { return Event{}, false, err } - fr.keyStr = val + fr.keyStr = string(val) sm.state = after return Event{ Kind: EventScalarValue, Pos: pos, - Name: val, + Name: fr.keyStr, ScalarType: *keyType.Scalar, Value: val, }, true, nil @@ -398,7 +377,7 @@ func (sm *stateMachine) beginMapKeyValue(keyType Type, after parserState) (Event Pos: pos, Name: val, ScalarType: TypeAtom, - Value: val, + Value: []byte(val), }, true, nil } @@ -438,6 +417,7 @@ func (sm *stateMachine) step() (Event, error) { Kind: EventAssignStart, Pos: fr.pos, Name: fr.name, + Type: &sm.stmtType, }, nil case statePackStart: @@ -456,6 +436,7 @@ func (sm *stateMachine) step() (Event, error) { Kind: kind, Pos: fr.pos, Name: fr.name, + Type: &sm.stmtType, }, nil case stateValue: @@ -476,7 +457,7 @@ func (sm *stateMachine) step() (Event, error) { Pos: pos, Name: name, ScalarType: scalarTypeKind(typ), - Value: "nil", + Value: []byte("nil"), }, nil } } else if sm.r.peekNil() { @@ -510,7 +491,7 @@ func (sm *stateMachine) step() (Event, error) { Pos: pos, Name: name, ScalarType: TypeAtom, - Value: val, + Value: []byte(val), }, nil case typ.Struct != nil: diff --git a/encoding/reader_test.go b/encoding/reader_test.go index ba1f1e1..a96b5b8 100644 --- a/encoding/reader_test.go +++ b/encoding/reader_test.go @@ -14,6 +14,16 @@ func mkReader(s string) *reader { return newReader(strings.NewReader(s)) } +// readScalar is a test helper that reads a scalar of the given kind +// via readScalarDirect and returns the result as a string. +func readScalar(r *reader, kind TypeKind) (string, error) { + b, _, err := r.readScalarDirect(kind) + if err != nil { + return "", err + } + return string(b), nil +} + // --------------------------------------------------------------------------- // BOM handling // --------------------------------------------------------------------------- @@ -381,7 +391,7 @@ func TestReadRawStringMultiLine(t *testing.T) { func TestReadBinHex(t *testing.T) { r := mkReader(`x'48656C6C6F'`) - got, err := r.readBin() + got, err := readScalar(r, TypeBin) if err != nil { t.Fatal(err) } @@ -392,7 +402,7 @@ func TestReadBinHex(t *testing.T) { func TestReadBinBase64(t *testing.T) { r := mkReader(`b'SGVsbG8='`) - got, err := r.readBin() + got, err := readScalar(r, TypeBin) if err != nil { t.Fatal(err) } @@ -429,7 +439,7 @@ func TestReadInt(t *testing.T) { } for _, tc := range tests { r := mkReader(tc.input) - got, err := r.readInt() + got, err := readScalar(r, TypeInt) if err != nil { t.Errorf("readInt(%q): %v", tc.input, err) continue @@ -442,7 +452,7 @@ func TestReadInt(t *testing.T) { func TestReadIntBad(t *testing.T) { r := mkReader("abc") - _, err := r.readInt() + _, err := readScalar(r, TypeInt) if err == nil { t.Fatal("expected error for non-integer") } @@ -467,7 +477,7 @@ func TestReadDec(t *testing.T) { } for _, tc := range tests { r := mkReader(tc.input) - got, err := r.readDec() + got, err := readScalar(r, TypeDec) if err != nil { t.Errorf("readDec(%q): %v", tc.input, err) continue @@ -497,7 +507,7 @@ func TestReadFloat(t *testing.T) { } for _, tc := range tests { r := mkReader(tc.input) - got, err := r.readFloat() + got, err := readScalar(r, TypeFloat) if err != nil { t.Errorf("readFloat(%q): %v", tc.input, err) continue @@ -510,7 +520,7 @@ func TestReadFloat(t *testing.T) { func TestReadFloatMissingExponent(t *testing.T) { r := mkReader("3.14") - _, err := r.readFloat() + _, err := readScalar(r, TypeFloat) if err == nil { t.Fatal("expected error when exponent is missing") } @@ -523,7 +533,7 @@ func TestReadFloatMissingExponent(t *testing.T) { func TestReadBool(t *testing.T) { for _, kw := range []string{"true", "false"} { r := mkReader(kw) - got, err := r.readBool() + got, err := readScalar(r, TypeBool) if err != nil { t.Errorf("readBool(%q): %v", kw, err) continue @@ -536,7 +546,7 @@ func TestReadBool(t *testing.T) { func TestReadBoolBad(t *testing.T) { r := mkReader("maybe") - _, err := r.readBool() + _, err := readScalar(r, TypeBool) if err == nil { t.Fatal("expected error for non-bool keyword") } @@ -562,7 +572,7 @@ func TestReadNilBad(t *testing.T) { func TestReadDate(t *testing.T) { r := mkReader("2026-06-01") - got, err := r.readDate() + got, err := readScalar(r, TypeDate) if err != nil { t.Fatal(err) } @@ -571,42 +581,42 @@ func TestReadDate(t *testing.T) { } } -func TestReadTimePartZ(t *testing.T) { - r := mkReader("14:30:00Z") - got, err := r.readTimePart() +func TestReadTsZ(t *testing.T) { + r := mkReader("2026-06-01T14:30:00Z") + got, err := readScalar(r, TypeTs) if err != nil { t.Fatal(err) } - if got != "14:30:00Z" { + if got != "2026-06-01T14:30:00Z" { t.Fatalf("got %q", got) } } -func TestReadTimePartOffset(t *testing.T) { - r := mkReader("14:30:00-04:00") - got, err := r.readTimePart() +func TestReadTsWithOffset(t *testing.T) { + r := mkReader("2026-06-01T14:30:00-04:00") + got, err := readScalar(r, TypeTs) if err != nil { t.Fatal(err) } - if got != "14:30:00-04:00" { + if got != "2026-06-01T14:30:00-04:00" { t.Fatalf("got %q", got) } } -func TestReadTimePartFractional(t *testing.T) { - r := mkReader("14:30:00.123Z") - got, err := r.readTimePart() +func TestReadTsFractional(t *testing.T) { + r := mkReader("2026-06-01T14:30:00.123Z") + got, err := readScalar(r, TypeTs) if err != nil { t.Fatal(err) } - if got != "14:30:00.123Z" { + if got != "2026-06-01T14:30:00.123Z" { t.Fatalf("got %q", got) } } func TestReadTs(t *testing.T) { r := mkReader("2026-06-01T14:30:00Z") - got, err := r.readTs() + got, err := readScalar(r, TypeTs) if err != nil { t.Fatal(err) } @@ -617,7 +627,7 @@ func TestReadTs(t *testing.T) { func TestReadTsOffset(t *testing.T) { r := mkReader("2026-06-01T14:30:00.500+05:30") - got, err := r.readTs() + got, err := readScalar(r, TypeTs) if err != nil { t.Fatal(err) } @@ -632,7 +642,7 @@ func TestReadTsOffset(t *testing.T) { func TestReadUUID(t *testing.T) { r := mkReader("550e8400-e29b-41d4-a716-446655440000") - got, err := r.readUUID() + got, err := readScalar(r, TypeUUID) if err != nil { t.Fatal(err) } @@ -643,7 +653,7 @@ func TestReadUUID(t *testing.T) { func TestReadUUIDBad(t *testing.T) { r := mkReader("550e8400-e29b-41d4-a716-44665544000") // too short - _, err := r.readUUID() + _, err := readScalar(r, TypeUUID) if err == nil { t.Fatal("expected error for short UUID") } diff --git a/encoding/reader_value_helpers.go b/encoding/reader_value_helpers.go index 485a8e5..03c632c 100644 --- a/encoding/reader_value_helpers.go +++ b/encoding/reader_value_helpers.go @@ -34,7 +34,8 @@ func (r *reader) readSep() (bool, error) { r.skipInsignificant(false) // skip WS and comments, but not newlines b, err := r.peekByte() if err != nil { - return false, nil // EOF is not an error for SEP + return false, nil //nolint:nilerr // EOF is not an error for SEP + } if b == ',' { r.readByte() //nolint:errcheck @@ -53,35 +54,54 @@ func (r *reader) readSep() (bool, error) { // Scalar value helpers // --------------------------------------------------------------------------- -// readScalarDirect reads a scalar value and returns it without emitting an event. -func (r *reader) readScalarDirect(kind TypeKind) (string, Pos, error) { +// readScalarDirect reads a scalar value into the reader's value buffer. +// The returned slice is borrowed — valid only until the next readScalarDirect call. +func (r *reader) readScalarDirect(kind TypeKind) ([]byte, Pos, error) { pos := r.pos - var val string - var err error + r.resetValBuf() + w := r.valBufAppender() switch kind { case TypeStr: - val, err = r.readString() + if err := r.readStringTo(w); err != nil { + return nil, pos, err + } case TypeInt: - val, err = r.readInt() + if err := r.readIntTo(w); err != nil { + return nil, pos, err + } case TypeDec: - val, err = r.readDec() + if err := r.readDecTo(w); err != nil { + return nil, pos, err + } case TypeFloat: - val, err = r.readFloat() + if err := r.readFloatTo(w); err != nil { + return nil, pos, err + } case TypeBool: - val, err = r.readBool() + if err := r.readBoolTo(w); err != nil { + return nil, pos, err + } case TypeUUID: - val, err = r.readUUID() + if err := r.readUUIDTo(w); err != nil { + return nil, pos, err + } case TypeDate: - val, err = r.readDate() + if err := r.readDateTo(w); err != nil { + return nil, pos, err + } case TypeTs: - val, err = r.readTs() + if err := r.readTsTo(w); err != nil { + return nil, pos, err + } case TypeBin: - val, err = r.readBin() + if err := r.readBinTo(w); err != nil { + return nil, pos, err + } default: - return "", pos, r.errorf("unknown scalar type kind %d", int(kind)) + return nil, pos, r.errorf("unknown scalar type kind %d", int(kind)) } - return val, pos, err + return r.valBufBytes(), pos, nil } // peekNil checks whether the next non-WS content is the keyword "nil" followed diff --git a/encoding/reader_value_test.go b/encoding/reader_value_test.go index 6a7db71..756903e 100644 --- a/encoding/reader_value_test.go +++ b/encoding/reader_value_test.go @@ -1,6 +1,7 @@ package encoding import ( + "slices" "strings" "testing" ) @@ -55,6 +56,7 @@ func decodeValue(input string, typ Type) ([]Event, error) { if err != nil { return events, err } + ev.Value = slices.Clone(ev.Value) events = append(events, ev) if sm.atTop() { return events, nil @@ -87,7 +89,7 @@ func expectEvents(t *testing.T, events []Event, expected []Event) { if exp.Name != "" && ev.Name != exp.Name { t.Errorf("event[%d]: name=%q, want %q", i, ev.Name, exp.Name) } - if exp.Value != "" && ev.Value != exp.Value { + if exp.ValueString() != "" && ev.ValueString() != exp.ValueString() { t.Errorf("event[%d]: value=%q, want %q", i, ev.Value, exp.Value) } if exp.ScalarType != TypeNone && ev.ScalarType != exp.ScalarType { @@ -128,7 +130,7 @@ func TestReadScalarValues(t *testing.T) { if events[0].Kind != EventScalarValue { t.Fatalf("expected ScalarValue, got %s", events[0].Kind) } - if events[0].Value != tc.value { + if events[0].ValueString() != tc.value { t.Fatalf("value=%q, want %q", events[0].Value, tc.value) } if events[0].ScalarType != tc.kind { @@ -162,8 +164,8 @@ func TestReadNilValue(t *testing.T) { if events[0].Kind != EventScalarValue { t.Fatalf("expected ScalarValue, got %s", events[0].Kind) } - if events[0].Value != "nil" { - t.Fatalf("value=%q, want %q", events[0].Value, "nil") + if events[0].ValueString() != "nil" { + t.Fatalf("value=%q, want %q", events[0].ValueString(), "nil") } }) } @@ -200,7 +202,7 @@ func TestReadAtomValues(t *testing.T) { if events[0].Kind != EventScalarValue { t.Fatalf("expected ScalarValue, got %s", events[0].Kind) } - if events[0].Value != tc.value { + if events[0].ValueString() != tc.value { t.Fatalf("value=%q, want %q", events[0].Value, tc.value) } }) @@ -226,8 +228,8 @@ func TestReadStructInline(t *testing.T) { events := readValueEvents(t, "{ 'localhost', 8080 }", typ) expectEvents(t, events, []Event{ {Kind: EventStructStart}, - {Kind: EventScalarValue, Name: "host", Value: "localhost", ScalarType: TypeStr}, - {Kind: EventScalarValue, Name: "port", Value: "8080", ScalarType: TypeInt}, + {Kind: EventScalarValue, Name: "host", Value: []byte("localhost"), ScalarType: TypeStr}, + {Kind: EventScalarValue, Name: "port", Value: []byte("8080"), ScalarType: TypeInt}, {Kind: EventStructEnd}, }) } @@ -241,8 +243,8 @@ func TestReadStructBlock(t *testing.T) { events := readValueEvents(t, input, typ) expectEvents(t, events, []Event{ {Kind: EventStructStart}, - {Kind: EventScalarValue, Name: "level", Value: "platform", ScalarType: TypeStr}, - {Kind: EventScalarValue, Name: "release", Value: "26", ScalarType: TypeInt}, + {Kind: EventScalarValue, Name: "level", Value: []byte("platform"), ScalarType: TypeStr}, + {Kind: EventScalarValue, Name: "release", Value: []byte("26"), ScalarType: TypeInt}, {Kind: EventStructEnd}, }) } @@ -253,8 +255,8 @@ func TestReadStructSingleField(t *testing.T) { if len(events) != 3 { t.Fatalf("expected 3 events, got %d", len(events)) } - if events[1].Value != "solo" { - t.Fatalf("value=%q, want %q", events[1].Value, "solo") + if events[1].ValueString() != "solo" { + t.Fatalf("value=%q, want %q", events[1].ValueString(), "solo") } } @@ -304,9 +306,9 @@ func TestReadTupleInline(t *testing.T) { events := readValueEvents(t, "(3, 45, 5678)", typ) expectEvents(t, events, []Event{ {Kind: EventTupleStart}, - {Kind: EventScalarValue, Name: "[0]", Value: "3", ScalarType: TypeInt}, - {Kind: EventScalarValue, Name: "[1]", Value: "45", ScalarType: TypeInt}, - {Kind: EventScalarValue, Name: "[2]", Value: "5678", ScalarType: TypeInt}, + {Kind: EventScalarValue, Name: "[0]", Value: []byte("3"), ScalarType: TypeInt}, + {Kind: EventScalarValue, Name: "[1]", Value: []byte("45"), ScalarType: TypeInt}, + {Kind: EventScalarValue, Name: "[2]", Value: []byte("5678"), ScalarType: TypeInt}, {Kind: EventTupleEnd}, }) } @@ -318,7 +320,7 @@ func TestReadTupleBlock(t *testing.T) { if len(events) != 4 { t.Fatalf("expected 4 events, got %d", len(events)) } - if events[1].Value != "42" || events[2].Value != "hello" { + if events[1].ValueString() != "42" || events[2].ValueString() != "hello" { t.Fatalf("unexpected values: %v, %v", events[1], events[2]) } } @@ -329,8 +331,8 @@ func TestReadTupleSingleElement(t *testing.T) { if len(events) != 3 { t.Fatalf("expected 3 events, got %d", len(events)) } - if events[1].Value != "true" { - t.Fatalf("value=%q, want %q", events[1].Value, "true") + if events[1].ValueString() != "true" { + t.Fatalf("value=%q, want %q", events[1].ValueString(), "true") } } @@ -359,9 +361,9 @@ func TestReadListInline(t *testing.T) { events := readValueEvents(t, "[1, 2, 3]", typ) expectEvents(t, events, []Event{ {Kind: EventListStart}, - {Kind: EventScalarValue, Name: "[0]", Value: "1", ScalarType: TypeInt}, - {Kind: EventScalarValue, Name: "[1]", Value: "2", ScalarType: TypeInt}, - {Kind: EventScalarValue, Name: "[2]", Value: "3", ScalarType: TypeInt}, + {Kind: EventScalarValue, Name: "[0]", Value: []byte("1"), ScalarType: TypeInt}, + {Kind: EventScalarValue, Name: "[1]", Value: []byte("2"), ScalarType: TypeInt}, + {Kind: EventScalarValue, Name: "[2]", Value: []byte("3"), ScalarType: TypeInt}, {Kind: EventListEnd}, }) } @@ -392,8 +394,8 @@ func TestReadListSingleElement(t *testing.T) { if len(events) != 3 { t.Fatalf("expected 3 events, got %d", len(events)) } - if events[1].Value != "true" { - t.Fatalf("value=%q, want %q", events[1].Value, "true") + if events[1].ValueString() != "true" { + t.Fatalf("value=%q, want %q", events[1].ValueString(), "true") } } @@ -419,16 +421,16 @@ func TestReadMapInline(t *testing.T) { t.Fatalf("event[0]: expected MapStart, got %s", events[0].Kind) } // Key events - if events[1].Kind != EventScalarValue || events[1].Value != "host" { + if events[1].Kind != EventScalarValue || events[1].ValueString() != "host" { t.Fatalf("event[1]: %v", events[1]) } - if events[2].Kind != EventScalarValue || events[2].Value != "8080" { + if events[2].Kind != EventScalarValue || events[2].ValueString() != "8080" { t.Fatalf("event[2]: %v", events[2]) } - if events[3].Kind != EventScalarValue || events[3].Value != "port" { + if events[3].Kind != EventScalarValue || events[3].ValueString() != "port" { t.Fatalf("event[3]: %v", events[3]) } - if events[4].Kind != EventScalarValue || events[4].Value != "9090" { + if events[4].Kind != EventScalarValue || events[4].ValueString() != "9090" { t.Fatalf("event[4]: %v", events[4]) } if events[5].Kind != EventMapEnd { @@ -466,10 +468,10 @@ func TestReadMapDuplicateKeysPreserveEntries(t *testing.T) { events := readValueEvents(t, "< 'a' ; 1, 'a' ; 2 >", typ) expectEvents(t, events, []Event{ {Kind: EventMapStart}, - {Kind: EventScalarValue, Name: "a", Value: "a", ScalarType: TypeStr}, - {Kind: EventScalarValue, Name: "a", Value: "1", ScalarType: TypeInt}, - {Kind: EventScalarValue, Name: "a", Value: "a", ScalarType: TypeStr}, - {Kind: EventScalarValue, Name: "a", Value: "2", ScalarType: TypeInt}, + {Kind: EventScalarValue, Name: "a", Value: []byte("a"), ScalarType: TypeStr}, + {Kind: EventScalarValue, Name: "a", Value: []byte("1"), ScalarType: TypeInt}, + {Kind: EventScalarValue, Name: "a", Value: []byte("a"), ScalarType: TypeStr}, + {Kind: EventScalarValue, Name: "a", Value: []byte("2"), ScalarType: TypeInt}, {Kind: EventMapEnd}, }) } @@ -519,7 +521,7 @@ func TestReadNestedStructWithList(t *testing.T) { if events[0].Kind != EventStructStart { t.Fatalf("event[0] kind=%s", events[0].Kind) } - if events[1].Name != "name" || events[1].Value != "alice" { + if events[1].Name != "name" || events[1].ValueString() != "alice" { t.Fatalf("event[1] = %v", events[1]) } if events[2].Kind != EventListStart && events[2].Name != "scores" { diff --git a/encoding/spec.go b/encoding/spec.go deleted file mode 100644 index 367537c..0000000 --- a/encoding/spec.go +++ /dev/null @@ -1,486 +0,0 @@ -package encoding - -import ( - "io" -) - -// Spec represents a parsed .spec.pakt file — a map of expected field names to -// their types. A spec enables projection: only matching fields are fully parsed -// while unmatched fields are skipped. -type Spec struct { - Fields map[string]Type -} - -// ParseSpec reads a .spec.pakt document from r and returns a [Spec]. -// The format is `(IDENT COLON type)*` — like assignments but without `= value`. -// Duplicate field names cause an error. -func ParseSpec(r io.Reader) (*Spec, error) { - rd := newReader(r) - fields := make(map[string]Type) - - for { - rd.skipInsignificant(true) - - if _, err := rd.peekByte(); err != nil { - break // EOF - } - - identPos := rd.pos - name, err := rd.readIdent() - if err != nil { - return nil, err - } - - if _, dup := fields[name]; dup { - return nil, Wrapf(identPos, ErrSyntax, "duplicate field %q in spec", name) - } - - typ, err := rd.readTypeAnnot() - if err != nil { - return nil, err - } - - fields[name] = typ - } - - return &Spec{Fields: fields}, nil -} - -// --------------------------------------------------------------------------- -// skipValue — fast skip past any value form without allocating or emitting -// --------------------------------------------------------------------------- - -func (r *reader) skipValue() error { - r.skipWS() - b, err := r.peekByte() - if err != nil { - return r.wrapf(ErrUnexpectedEOF, "expected value, got EOF") - } - - switch { - case r.peekRawStringStart(): - return r.skipString() - case r.peekBinLiteralStart(): - return r.skipBinLiteral() - case b == '\'' || b == '"': - return r.skipString() - case b == '{': - return r.skipComposite('{', '}') - case b == '(': - return r.skipComposite('(', ')') - case b == '[': - return r.skipComposite('[', ']') - case b == '<': - return r.skipComposite('<', '>') - case b == '|': - return r.skipAtom() - case b == '.': - return r.skipNumberLike() - case b == 't', b == 'f', b == 'n': - return r.skipKeywordOrAtom() - case isDigit(b) || b == '-': - return r.skipNumberLike() - case isAlpha(b) || b == '_': - return r.skipKeywordOrAtom() - default: - return r.errorf("unexpected byte %q at start of value", rune(b)) - } -} - -// skipString skips a single-line or triple-quoted string, including raw forms. -func (r *reader) skipString() error { - raw := false - start, err := r.readByte() - if err != nil { - return r.wrapf(ErrUnexpectedEOF, "expected string, got EOF") - } - quote := start - if start == 'r' { - raw = true - quote, err = r.readByte() - if err != nil { - return r.wrapf(ErrUnexpectedEOF, "expected quote after raw string prefix, got EOF") - } - } - if quote != '\'' && quote != '"' { - if raw { - return r.errorf("expected quote after raw string prefix, got %q", rune(quote)) - } - r.unreadByte() - return r.errorf("expected string, got %q", rune(quote)) - } - - // Check for triple-quote. - if p, perr := r.src.Peek(2); perr == nil && p[0] == quote && p[1] == quote { - r.readByte() //nolint:errcheck - r.readByte() //nolint:errcheck - return r.skipTripleQuotedString(quote, raw) - } - - // Single-line string: skip until matching unescaped quote. - for { - b, err := r.readByte() - if err != nil { - return r.wrapf(ErrUnexpectedEOF, "unterminated string") - } - if !raw && b == '\\' { - // Skip the escaped character. - if _, err := r.readByte(); err != nil { - return r.wrapf(ErrUnexpectedEOF, "unterminated escape in string") - } - continue - } - if b == quote { - return nil - } - if b == '\n' { - return r.errorf("newline in single-line string") - } - if b == 0 { - return r.errorf("null byte in string") - } - } -} - -// skipTripleQuotedString skips past the closing triple-quote delimiter. -func (r *reader) skipTripleQuotedString(quote byte, raw bool) error { - return r.consumeMultiLineString(quote, raw, nil) -} - -func (r *reader) skipBinLiteral() error { - _, err := r.readBin() - return err -} - -// skipComposite skips a balanced-delimiter composite value. It handles nested -// composites and strings containing delimiter characters. -func (r *reader) skipComposite(open, close byte) error { - if _, err := r.readByte(); err != nil { - return r.wrapf(ErrUnexpectedEOF, "expected %q, got EOF", rune(open)) - } - depth := 1 - for depth > 0 { - b, err := r.readByte() - if err != nil { - return r.wrapf(ErrUnexpectedEOF, "unterminated composite value (expected %q)", rune(close)) - } - - switch b { - case open: - depth++ - case close: - depth-- - case '\'', '"': - // Must skip string content to avoid false delimiter matches. - r.unreadByte() - if err := r.skipString(); err != nil { - return err - } - case 'r': - if p, err := r.src.Peek(1); err == nil && (p[0] == '\'' || p[0] == '"') { - r.unreadByte() - if err := r.skipString(); err != nil { - return err - } - } - case 'x', 'b': - if p, err := r.src.Peek(1); err == nil && p[0] == '\'' { - r.unreadByte() - if err := r.skipBinLiteral(); err != nil { - return err - } - } - case '#': - // Skip comment to avoid false matches in comment text. - r.skipToNewline() - // Also handle other composite delimiters inside the value. - case '{': - if open != '{' { - if err := r.skipCompositeInner('{', '}'); err != nil { - return err - } - } - case '(': - if open != '(' { - if err := r.skipCompositeInner('(', ')'); err != nil { - return err - } - } - case '[': - if open != '[' { - if err := r.skipCompositeInner('[', ']'); err != nil { - return err - } - } - case '<': - if open != '<' { - if err := r.skipCompositeInner('<', '>'); err != nil { - return err - } - } - } - } - return nil -} - -// skipCompositeInner skips a nested composite that uses different delimiters -// than the outer composite being skipped. -func (r *reader) skipCompositeInner(open, close byte) error { - depth := 1 - for depth > 0 { - b, err := r.readByte() - if err != nil { - return r.wrapf(ErrUnexpectedEOF, "unterminated nested composite value") - } - switch b { - case open: - depth++ - case close: - depth-- - case '\'', '"': - r.unreadByte() - if err := r.skipString(); err != nil { - return err - } - case 'r': - if p, err := r.src.Peek(1); err == nil && (p[0] == '\'' || p[0] == '"') { - r.unreadByte() - if err := r.skipString(); err != nil { - return err - } - } - case 'x', 'b': - if p, err := r.src.Peek(1); err == nil && p[0] == '\'' { - r.unreadByte() - if err := r.skipBinLiteral(); err != nil { - return err - } - } - case '#': - r.skipToNewline() - case '{': - if open != '{' { - if err := r.skipCompositeInner('{', '}'); err != nil { - return err - } - } - case '(': - if open != '(' { - if err := r.skipCompositeInner('(', ')'); err != nil { - return err - } - } - case '[': - if open != '[' { - if err := r.skipCompositeInner('[', ']'); err != nil { - return err - } - } - case '<': - if open != '<' { - if err := r.skipCompositeInner('<', '>'); err != nil { - return err - } - } - } - } - return nil -} - -// skipToNewline consumes bytes until a newline or EOF. -func (r *reader) skipToNewline() { - for { - b, err := r.readByte() - if err != nil || b == '\n' { - return - } - } -} - -// skipKeywordOrAtom skips a keyword (true, false, nil) or bare atom identifier. -func (r *reader) skipKeywordOrAtom() error { - // Read until non-identifier char. - b, err := r.readByte() - if err != nil { - return r.wrapf(ErrUnexpectedEOF, "expected identifier, got EOF") - } - if !isAlpha(b) && b != '_' { - r.unreadByte() - return r.errorf("expected identifier, got %q", rune(b)) - } - for { - b, err = r.peekByte() - if err != nil { - return nil - } - if isAlpha(b) || isDigit(b) || b == '_' || b == '-' { - r.readByte() //nolint:errcheck - } else { - return nil - } - } -} - -// skipAtom skips a '|'-prefixed atom value. -func (r *reader) skipAtom() error { - if _, err := r.readByte(); err != nil { // consume '|' - return err - } - return r.skipKeywordOrAtom() -} - -// skipNumberLike skips a number, date, ts, or UUID literal. -// Reads until whitespace, newline, comma, closing delimiter, comment, or EOF. -func (r *reader) skipNumberLike() error { - count := 0 - for { - b, err := r.peekByte() - if err != nil { - if count == 0 { - return r.wrapf(ErrUnexpectedEOF, "expected value, got EOF") - } - return nil - } - if b == ' ' || b == '\t' || b == '\n' || b == '\r' || - b == ',' || b == '}' || b == ')' || b == ']' || b == '>' || b == '#' { - return nil - } - r.readByte() //nolint:errcheck - count++ - } -} - -func (r *reader) skipStatementBody(h statementHeader) error { - if h.pack { - return r.skipPackBody(h.typ) - } - return r.skipValue() -} - -func (r *reader) skipPackBody(typ Type) error { - switch { - case typ.List != nil: - return r.skipListPackBody() - case typ.Map != nil: - return r.skipMapPackBody() - default: - return r.errorf("pack type must be list or map, got %s", typ.String()) - } -} - -func (r *reader) skipListPackBody() error { - for { - r.skipInsignificant(true) - b, err := r.peekByte() - if err != nil { - return nil - } - if !r.canStartValueInPack(b) { - return nil - } - - if err := r.skipValue(); err != nil { - return err - } - - sep, err := r.readSep() - if err != nil { - return err - } - if sep { - continue - } - - r.skipInsignificant(true) - b, err = r.peekByte() - if err != nil { - return nil - } - if !r.canStartValueInPack(b) { - return nil - } - return r.errorf("expected separator between pack items") - } -} - -func (r *reader) skipMapPackBody() error { - for { - r.skipInsignificant(true) - b, err := r.peekByte() - if err != nil { - return nil - } - if !r.canStartValueInPack(b) { - return nil - } - - if err := r.skipValue(); err != nil { - return err - } - - r.skipWS() - if err := r.expectByte(';'); err != nil { - return err - } - r.skipWS() - - if err := r.skipValue(); err != nil { - return err - } - - sep, err := r.readSep() - if err != nil { - return err - } - if sep { - continue - } - - r.skipInsignificant(true) - b, err = r.peekByte() - if err != nil { - return nil - } - if !r.canStartValueInPack(b) { - return nil - } - return r.errorf("expected separator between pack map entries") - } -} - -// --------------------------------------------------------------------------- -// Decoder integration -// --------------------------------------------------------------------------- - -func (d *Decoder) decodeWithSpec() (Event, error) { - if d.done { - return Event{}, io.EOF - } - if d.sm == nil { - d.sm = newStateMachine(d.r) - } - - for { - if !d.sm.atTop() { - ev, err := d.sm.step() - if err != nil { - d.done = true - d.r.release() - return Event{}, err - } - return ev, nil - } - - _, err := d.sm.primeNextMatchedStatement(d.spec) - if err != nil { - if err == io.EOF { - d.done = true - d.r.release() - return Event{}, io.EOF - } - d.done = true - d.r.release() - return Event{}, err - } - } -} diff --git a/encoding/spec_test.go b/encoding/spec_test.go deleted file mode 100644 index 77e2933..0000000 --- a/encoding/spec_test.go +++ /dev/null @@ -1,1189 +0,0 @@ -package encoding - -import ( - "io" - "os" - "strings" - "testing" -) - -// --------------------------------------------------------------------------- -// ParseSpec tests -// --------------------------------------------------------------------------- - -func TestParseSpecSimple(t *testing.T) { - spec, err := ParseSpec(strings.NewReader("name:str\ncount:int")) - if err != nil { - t.Fatalf("ParseSpec: %v", err) - } - if len(spec.Fields) != 2 { - t.Fatalf("expected 2 fields, got %d", len(spec.Fields)) - } - if spec.Fields["name"].Scalar == nil || *spec.Fields["name"].Scalar != TypeStr { - t.Fatalf("expected name:str, got %v", spec.Fields["name"]) - } - if spec.Fields["count"].Scalar == nil || *spec.Fields["count"].Scalar != TypeInt { - t.Fatalf("expected count:int, got %v", spec.Fields["count"]) - } -} - -func TestParseSpecCompositeTypes(t *testing.T) { - spec, err := ParseSpec(strings.NewReader( - "deploy:{level:|dev, staging, prod|, date:date}")) - if err != nil { - t.Fatalf("ParseSpec: %v", err) - } - if len(spec.Fields) != 1 { - t.Fatalf("expected 1 field, got %d", len(spec.Fields)) - } - dt := spec.Fields["deploy"] - if dt.Struct == nil { - t.Fatalf("expected struct type for deploy") - } - if len(dt.Struct.Fields) != 2 { - t.Fatalf("expected 2 struct fields, got %d", len(dt.Struct.Fields)) - } - if dt.Struct.Fields[0].Name != "level" || dt.Struct.Fields[0].Type.AtomSet == nil { - t.Fatalf("expected field level:|dev, staging, prod|, got %v", dt.Struct.Fields[0]) - } - if dt.Struct.Fields[1].Name != "date" || dt.Struct.Fields[1].Type.Scalar == nil { - t.Fatalf("expected field date:date, got %v", dt.Struct.Fields[1]) - } -} - -func TestParseSpecAllTypeForms(t *testing.T) { - input := `name:str -count:int -ratio:dec -rate:float -active:bool -id:uuid -created:date -started:ts -updated:ts -level:|dev, staging, prod| -config:{host:str, port:int} -version:(int, int, int) -tags:[str] -meta: -nickname:str? -` - spec, err := ParseSpec(strings.NewReader(input)) - if err != nil { - t.Fatalf("ParseSpec: %v", err) - } - if len(spec.Fields) != 15 { - t.Fatalf("expected 15 fields, got %d", len(spec.Fields)) - } - // Spot-check a few - if spec.Fields["version"].Tuple == nil { - t.Fatal("expected tuple type for version") - } - if spec.Fields["tags"].List == nil { - t.Fatal("expected list type for tags") - } - if spec.Fields["meta"].Map == nil { - t.Fatal("expected map type for meta") - } - if !spec.Fields["nickname"].Nullable { - t.Fatal("expected nickname to be nullable") - } -} - -func TestParseSpecDuplicateNameError(t *testing.T) { - _, err := ParseSpec(strings.NewReader("name:str\nname:int")) - if err == nil { - t.Fatal("expected error for duplicate name") - } - if !strings.Contains(err.Error(), "duplicate") { - t.Fatalf("expected duplicate error, got: %v", err) - } -} - -func TestParseSpecEmpty(t *testing.T) { - spec, err := ParseSpec(strings.NewReader("")) - if err != nil { - t.Fatalf("ParseSpec: %v", err) - } - if len(spec.Fields) != 0 { - t.Fatalf("expected 0 fields, got %d", len(spec.Fields)) - } -} - -func TestParseSpecWithComments(t *testing.T) { - input := `# This is a spec file -name:str -# counts things -count:int -` - spec, err := ParseSpec(strings.NewReader(input)) - if err != nil { - t.Fatalf("ParseSpec: %v", err) - } - if len(spec.Fields) != 2 { - t.Fatalf("expected 2 fields, got %d", len(spec.Fields)) - } -} - -// --------------------------------------------------------------------------- -// Projection tests (via Decoder) -// --------------------------------------------------------------------------- - -// decodeAllWithSpec is a test helper that creates a decoder with a spec and -// collects all events. -func decodeAllWithSpec(t *testing.T, doc, specDoc string) []Event { - t.Helper() - d := NewDecoder(strings.NewReader(doc)) - if err := d.SetSpec(strings.NewReader(specDoc)); err != nil { - t.Fatalf("SetSpec: %v", err) - } - var events []Event - for { - ev, err := d.Decode() - if err == io.EOF { - break - } - if err != nil { - t.Fatalf("Decode(): %v", err) - } - events = append(events, ev) - } - return events -} - -func decodeExpectErrorWithSpec(t *testing.T, doc, specDoc string) error { - t.Helper() - d := NewDecoder(strings.NewReader(doc)) - if err := d.SetSpec(strings.NewReader(specDoc)); err != nil { - return err - } - for { - _, err := d.Decode() - if err == io.EOF { - t.Fatal("expected error but got EOF") - } - if err != nil { - return err - } - } -} - -func TestProjectionAllFieldsMatch(t *testing.T) { - doc := "name:str = 'hello'\ncount:int = 42" - spec := "name:str\ncount:int" - events := decodeAllWithSpec(t, doc, spec) - // 3 events per assignment (start, value, end) = 6 - if len(events) != 6 { - t.Fatalf("expected 6 events, got %d: %v", len(events), events) - } - if events[0].Kind != EventAssignStart || events[0].Name != "name" { - t.Fatalf("event[0] = %v", events[0]) - } - if events[1].Kind != EventScalarValue || events[1].Value != "hello" { - t.Fatalf("event[1] = %v", events[1]) - } - if events[3].Kind != EventAssignStart || events[3].Name != "count" { - t.Fatalf("event[3] = %v", events[3]) - } - if events[4].Kind != EventScalarValue || events[4].Value != "42" { - t.Fatalf("event[4] = %v", events[4]) - } -} - -func TestProjectionSubsetFields(t *testing.T) { - doc := "name:str = 'hello'\ncount:int = 42\nactive:bool = true" - spec := "count:int" - events := decodeAllWithSpec(t, doc, spec) - // Only count field emitted: 3 events - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[0].Kind != EventAssignStart || events[0].Name != "count" { - t.Fatalf("event[0] = %v", events[0]) - } - if events[1].Kind != EventScalarValue || events[1].Value != "42" { - t.Fatalf("event[1] = %v", events[1]) - } -} - -func TestProjectionMissingFieldNoError(t *testing.T) { - // Spec is advisory — missing fields are not an error. - // Callers use pointer struct fields to detect absent values. - doc := "name:str = 'hello'" - spec := "name:str\ncount:int" - events := decodeAllWithSpec(t, doc, spec) - // Only name field emitted: 3 events (AssignStart, ScalarValue, AssignEnd) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[0].Kind != EventAssignStart || events[0].Name != "name" { - t.Fatalf("event[0] = %v", events[0]) - } -} - -func TestProjectionSkipComplexComposite(t *testing.T) { - doc := `name:str = 'hello' -config:{host:str, port:int, tags:[str]} = { - 'localhost' - 8080 - ['a', 'b', 'c'] -} -count:int = 99` - spec := "name:str\ncount:int" - events := decodeAllWithSpec(t, doc, spec) - // name: 3 events, count: 3 events = 6 - if len(events) != 6 { - t.Fatalf("expected 6 events, got %d: %v", len(events), events) - } - if events[1].Value != "hello" { - t.Fatalf("expected 'hello', got %q", events[1].Value) - } - if events[4].Value != "99" { - t.Fatalf("expected '99', got %q", events[4].Value) - } -} - -func TestProjectionSkipStringWithDelimiters(t *testing.T) { - doc := `greeting:str = 'hello { world }' -count:int = 5` - spec := "count:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "5" { - t.Fatalf("expected '5', got %q", events[1].Value) - } -} - -func TestProjectionSkipMultiLineString(t *testing.T) { - doc := "msg:str = '''\n hello\n world\n '''\ncount:int = 7" - spec := "count:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "7" { - t.Fatalf("expected '7', got %q", events[1].Value) - } -} - -func TestProjectionSkipNestedComposites(t *testing.T) { - doc := `simple:int = 1 -nested:{items:[], count:int} = { - < - 'alpha' ; { 10, 20 } - 'beta' = { 30, 40 } - > - 2 -} -wanted:str = 'found'` - spec := "simple:int\nwanted:str" - events := decodeAllWithSpec(t, doc, spec) - // simple: 3, wanted: 3 ; 6 - if len(events) != 6 { - t.Fatalf("expected 6 events, got %d: %v", len(events), events) - } - if events[1].Value != "1" { - t.Fatalf("expected '1', got %q", events[1].Value) - } - if events[4].Value != "found" { - t.Fatalf("expected 'found', got %q", events[4].Value) - } -} - -func TestProjectionSkipAtomValue(t *testing.T) { - doc := "level:|dev, staging, prod| = |prod\ncount:int = 3" - spec := "count:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "3" { - t.Fatalf("expected '3', got %q", events[1].Value) - } -} - -func TestProjectionSkipBoolAndNil(t *testing.T) { - doc := "active:bool = true\nmaybe:str? = nil\ncount:int = 10" - spec := "count:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "10" { - t.Fatalf("expected '10', got %q", events[1].Value) - } -} - -func TestProjectionSkipUUID(t *testing.T) { - doc := "id:uuid = 550e8400-e29b-41d4-a716-446655440000\nname:str = 'test'" - spec := "name:str" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "test" { - t.Fatalf("expected 'test', got %q", events[1].Value) - } -} - -func TestProjectionSkipTsValues(t *testing.T) { - doc := "started:ts = 2026-06-01T14:30:00Z\ncount:int = 1" - spec := "count:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "1" { - t.Fatalf("expected '1', got %q", events[1].Value) - } -} - -func TestProjectionSkipNegativeNumber(t *testing.T) { - doc := "offset:int = -42\nname:str = 'ok'" - spec := "name:str" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "ok" { - t.Fatalf("expected 'ok', got %q", events[1].Value) - } -} - -func TestProjectionSkipMapValue(t *testing.T) { - doc := `meta: = < - 'owner' ; 'team' - 'region' ; 'us-east' -> -count:int = 5` - spec := "count:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "5" { - t.Fatalf("expected '5', got %q", events[1].Value) - } -} - -func TestProjectionSkipTupleValue(t *testing.T) { - doc := "version:(int, int, int) = (2, 14, 0)\nname:str = 'app'" - spec := "name:str" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "app" { - t.Fatalf("expected 'app', got %q", events[1].Value) - } -} - -func TestProjectionSkipListValue(t *testing.T) { - doc := `features:[str] = ['dark-mode', 'notifications'] -count:int = 2` - spec := "count:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "2" { - t.Fatalf("expected '2', got %q", events[1].Value) - } -} - -func TestProjectionSkipStringWithEscapedQuotes(t *testing.T) { - doc := `msg:str = 'it\'s a \"test\"' -count:int = 1` - spec := "count:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "1" { - t.Fatalf("expected '1', got %q", events[1].Value) - } -} - -func TestProjectionSkipStringWithEscapedBackslash(t *testing.T) { - doc := `path:str = 'C:\\' -count:int = 1` - spec := "count:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "1" { - t.Fatalf("expected '1', got %q", events[1].Value) - } -} - -func TestProjectionEmptySpec(t *testing.T) { - doc := "name:str = 'hello'\ncount:int = 42" - spec := "" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 0 { - t.Fatalf("expected 0 events with empty spec, got %d: %v", len(events), events) - } -} - -func TestProjectionWithComments(t *testing.T) { - doc := `# header comment -name:str = 'hello' # inline -count:int = 42` - spec := "name:str\ncount:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 6 { - t.Fatalf("expected 6 events, got %d: %v", len(events), events) - } -} - -func TestProjectionSkipCompositeWithStringContainingDelimiters(t *testing.T) { - doc := `config:{msg:str, level:int} = { 'hello } world { foo', 5 } -count:int = 3` - spec := "count:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "3" { - t.Fatalf("expected '3', got %q", events[1].Value) - } -} - -// --------------------------------------------------------------------------- -// Integration test with test data files -// --------------------------------------------------------------------------- - -func TestProjectionIntegrationWithTestData(t *testing.T) { - specFile, err := os.Open("../testdata/valid/spec-example.spec.pakt") - if err != nil { - t.Skipf("skipping integration test: %v", err) - } - defer func() { _ = specFile.Close() }() - - docFile, err := os.Open("../testdata/valid/full.pakt") - if err != nil { - t.Fatalf("cannot open full.pakt: %v", err) - } - defer func() { _ = docFile.Close() }() - - d := NewDecoder(docFile) - if err := d.SetSpec(specFile); err != nil { - t.Fatalf("SetSpec: %v", err) - } - - var events []Event - for { - ev, err := d.Decode() - if err == io.EOF { - break - } - if err != nil { - t.Fatalf("Decode(): %v", err) - } - events = append(events, ev) - } - - // The spec requests deploy and version. - // Verify that we got events for both. - foundDeploy := false - foundVersion := false - for _, ev := range events { - if ev.Kind == EventAssignStart && ev.Name == "deploy" { - foundDeploy = true - } - if ev.Kind == EventAssignStart && ev.Name == "version" { - foundVersion = true - } - } - if !foundDeploy { - t.Fatal("expected deploy assignment in projected output") - } - if !foundVersion { - t.Fatal("expected version assignment in projected output") - } - - // Verify no other top-level assignments are present. - for _, ev := range events { - if ev.Kind == EventAssignStart { - if ev.Name != "deploy" && ev.Name != "version" { - t.Fatalf("unexpected assignment %q in projected output", ev.Name) - } - } - } -} - -// --------------------------------------------------------------------------- -// skipValue edge-case tests -// --------------------------------------------------------------------------- - -func TestSkipValueHexInt(t *testing.T) { - doc := "val:int = 0xFF\nname:str = 'ok'" - spec := "name:str" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d", len(events)) - } - if events[1].Value != "ok" { - t.Fatalf("expected 'ok', got %q", events[1].Value) - } -} - -func TestSkipValueDecimal(t *testing.T) { - doc := "ratio:dec = 3.14\nname:str = 'ok'" - spec := "name:str" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d", len(events)) - } -} - -func TestSkipValueFloat(t *testing.T) { - doc := "rate:float = 1.5e10\nname:str = 'ok'" - spec := "name:str" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d", len(events)) - } -} - -func TestSkipValueDate(t *testing.T) { - doc := "d:date = 2026-01-15\nname:str = 'ok'" - spec := "name:str" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d", len(events)) - } -} - -func TestSkipValueTs(t *testing.T) { - doc := "t:ts = 2026-01-15T14:30:00Z\nname:str = 'ok'" - spec := "name:str" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d", len(events)) - } -} - -func TestProjectionSkipDoubleQuotedString(t *testing.T) { - doc := "msg:str = \"hello world\"\ncount:int = 1" - spec := "count:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "1" { - t.Fatalf("expected '1', got %q", events[1].Value) - } -} - -func TestProjectionSkipTripleDoubleQuotedString(t *testing.T) { - doc := "msg:str = \"\"\"\n hello\n world\n \"\"\"\ncount:int = 7" - spec := "count:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "7" { - t.Fatalf("expected '7', got %q", events[1].Value) - } -} - -func TestProjectionSkipEmptyComposites(t *testing.T) { - doc := `items:[str] = [] -meta: = <> -count:int = 1` - spec := "count:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "1" { - t.Fatalf("expected '1', got %q", events[1].Value) - } -} - -func TestProjectionSkipWithInlineComments(t *testing.T) { - doc := `name:str = 'hello' # skip this -count:int = 42 # and this` - spec := "count:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "42" { - t.Fatalf("expected '42', got %q", events[1].Value) - } -} - -func TestProjectionSkipBlockCompositeWithComments(t *testing.T) { - doc := `config:{host:str, port:int} = { - # the host - 'localhost' - # the port - 8080 -} -count:int = 1` - spec := "count:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } -} - -// --------------------------------------------------------------------------- -// skipCompositeInner — deeply nested composites -// --------------------------------------------------------------------------- - -func TestProjectionSkipTupleWithAllInnerTypes(t *testing.T) { - // Tuple containing struct, list, map — exercises - // skipComposite('(', ')') hitting '{', '[', '<' and comments. - doc := `data:(int, {x:int, y:int}, [int], ) = ( - 1 - # comment inside tuple - { 10, 20 } - [1, 2] - <'a' ; 5> -) -wanted:int = 99` - spec := "wanted:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "99" { - t.Fatalf("expected '99', got %q", events[1].Value) - } -} - -func TestProjectionSkipListWithAllInnerTypes(t *testing.T) { - // List containing struct with tuple and map inside — exercises - // skipComposite('[', ']') hitting '{' → skipCompositeInner, - // then '(' and '<' within inner. - doc := `data:[{a:int, b:(int, int), c:}] = [ - # comment inside list - { - 1 - (2, 3) - <'k' ; 4> - } -] -wanted:int = 88` - spec := "wanted:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "88" { - t.Fatalf("expected '88', got %q", events[1].Value) - } -} - -func TestProjectionSkipMapWithAllInnerTypes(t *testing.T) { - // Map containing struct values with tuple and list — exercises - // skipComposite('<', '>') hitting '{' → skipCompositeInner, - // then '(' and '[' within inner. - doc := `data: = < - # comment inside map - 'key' ; { - 1 - (2, 3) - [4, 5] - } -> -wanted:int = 77` - spec := "wanted:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "77" { - t.Fatalf("expected '77', got %q", events[1].Value) - } -} - -func TestProjectionSkipDeeplyNestedFiveLevels(t *testing.T) { - // 5 levels: struct → list → map → struct → tuple - // Exercises skipCompositeInner recursively with all delimiter types. - doc := `deep:{items:[]} = { - [ - < - 'alpha' ; { (10, 20) } - 'beta' = { (30, 40) } - > - ] -} -wanted:int = 1` - spec := "wanted:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "1" { - t.Fatalf("expected '1', got %q", events[1].Value) - } -} - -func TestProjectionSkipMixedCompositesAtSameLevel(t *testing.T) { - // Struct containing a list, map, and tuple at the same level. - doc := `server:{ports:[int], labels:, version:(int, int, int)} = { - [8080, 8443] - <'env' ; 'prod'> - (1, 2, 3) -} -wanted:int = 1` - spec := "wanted:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "1" { - t.Fatalf("expected '1', got %q", events[1].Value) - } -} - -func TestProjectionSkipInnerCompositeWithStringsAndComments(t *testing.T) { - // Struct → list → map → struct with strings containing delimiters - // and comments inside skipCompositeInner paths. - doc := `deep:{items:[]} = { - [ - < - 'key with {brackets} and [more] and (parens) and ' ; { - # comment with > and ) and ] delimiters - (10, 20) - } - > - ] -} -wanted:int = 1` - spec := "wanted:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "1" { - t.Fatalf("expected '1', got %q", events[1].Value) - } -} - -// --------------------------------------------------------------------------- -// skipTripleQuotedString edge cases -// --------------------------------------------------------------------------- - -func TestProjectionSkipTripleQuotedWithEmbeddedQuote(t *testing.T) { - // Triple-quoted string containing the quote character inside. - doc := "msg:str = '''\nit's a test\n'''\nwanted:int = 1" - spec := "wanted:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "1" { - t.Fatalf("expected '1', got %q", events[1].Value) - } -} - -func TestProjectionSkipTripleQuotedWithEscapedBackslash(t *testing.T) { - // Triple-quoted with backslash-escaped backslash before the closing quotes. - doc := "msg:str = '''\nline\\\\\n'''\nwanted:int = 2" - spec := "wanted:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "2" { - t.Fatalf("expected '2', got %q", events[1].Value) - } -} - -func TestProjectionSkipTripleQuotedWithEscapedQuoteBeforeClose(t *testing.T) { - // Backslash-quote inside triple-quoted — the \' should not start closing. - doc := "msg:str = '''\ndon\\'t stop\n'''\nwanted:int = 3" - spec := "wanted:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "3" { - t.Fatalf("expected '3', got %q", events[1].Value) - } -} - -func TestProjectionSkipTripleDoubleQuotedWithEmbeddedQuotes(t *testing.T) { - // Triple double-quoted string containing a double quote inside. - doc := "msg:str = \"\"\"\nhello \"world\"\n\"\"\"\nwanted:int = 4" - spec := "wanted:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "4" { - t.Fatalf("expected '4', got %q", events[1].Value) - } -} - -func TestProjectionSkipEmptyTripleQuotedString(t *testing.T) { - doc := "msg:str = '''\n'''\nwanted:int = 5" - spec := "wanted:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "5" { - t.Fatalf("expected '5', got %q", events[1].Value) - } -} - -func TestProjectionSkipTripleQuotedWithTwoConsecutiveQuotesThenOther(t *testing.T) { - // Two consecutive quotes that don't form a closing triple. - doc := "msg:str = '''\nab''cd\n'''\nwanted:int = 6" - spec := "wanted:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "6" { - t.Fatalf("expected '6', got %q", events[1].Value) - } -} - -// --------------------------------------------------------------------------- -// skipComposite with strings containing delimiters -// --------------------------------------------------------------------------- - -func TestProjectionSkipCompositeWithAllDelimitersInString(t *testing.T) { - // String value containing all delimiter characters. - doc := `greeting:str = 'hello {world} [foo] (bar) ' -count:int = 10` - spec := "count:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "10" { - t.Fatalf("expected '10', got %q", events[1].Value) - } -} - -func TestProjectionSkipMultiLineStringInSkippedStruct(t *testing.T) { - // Triple-quoted string inside a struct that is being skipped. - doc := "config:{msg:str, n:int} = {\n '''\n hello\n world\n '''\n 5\n}\nwanted:int = 9" - spec := "wanted:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "9" { - t.Fatalf("expected '9', got %q", events[1].Value) - } -} - -func TestProjectionSkipMapWithEqualsInStringValues(t *testing.T) { - // Map where string values contain '=' signs. - doc := `env: = < - 'PATH' ; '/usr/bin=/usr/local/bin' - 'OPTS' ; '--key=value --flag=true' -> -wanted:int = 5` - spec := "wanted:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "5" { - t.Fatalf("expected '5', got %q", events[1].Value) - } -} - -func TestProjectionSkipNestedCompositeWithDelimiterStrings(t *testing.T) { - // Inside a list (inner composite), strings with all delimiter chars. - doc := `data:{items:[str]} = { - ['hello } world { and [more] (stuff) '] -} -wanted:int = 7` - spec := "wanted:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "7" { - t.Fatalf("expected '7', got %q", events[1].Value) - } -} - -// --------------------------------------------------------------------------- -// skipValue for all scalar skip paths -// --------------------------------------------------------------------------- - -func TestProjectionSkipFalseValue(t *testing.T) { - // Specifically skip 'false' to cover the b == 'f' branch in skipValue. - doc := "flag:bool = false\nwanted:int = 11" - spec := "wanted:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "11" { - t.Fatalf("expected '11', got %q", events[1].Value) - } -} - -func TestProjectionSkipAllScalarTypes(t *testing.T) { - // Skip every scalar type in one document to exercise all skipValue paths. - doc := `flag-t:bool = true -flag-f:bool = false -nothing:str? = nil -neg:int = -42 -id:uuid = 550e8400-e29b-41d4-a716-446655440000 -d:date = 2026-01-15 -t:ts = 2026-01-15T14:30:00Z -dt:ts = 2026-06-01T14:30:00Z -level:|dev, staging, prod| = |staging -wanted:int = 100` - spec := "wanted:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "100" { - t.Fatalf("expected '100', got %q", events[1].Value) - } -} - -// --------------------------------------------------------------------------- -// Projection with complex documents -// --------------------------------------------------------------------------- - -func TestProjectionFirstFieldSkippedSecondCaptured(t *testing.T) { - // The first field is skipped (deeply nested), second is captured. - doc := `complex:{items:[]} = { - [ - < - 'key' ; { 42 } - > - ] -} -wanted:str = 'captured'` - spec := "wanted:str" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[0].Kind != EventAssignStart || events[0].Name != "wanted" { - t.Fatalf("expected AssignStart for 'wanted', got %v", events[0]) - } - if events[1].Value != "captured" { - t.Fatalf("expected 'captured', got %q", events[1].Value) - } -} - -func TestProjectionComplexDocWithNestedDelimiterStrings(t *testing.T) { - // Skipped field has deeply nested composites with strings containing - // all delimiter types; the second field is captured. - doc := `config:{servers:[]} = { - [ - < - 'prod' ; { - 'server {prod} on port [443] via (tls) at ' - 443 - } - 'staging' ; { - 'server {staging} on port [8443]' - 8443 - } - > - ] -} -result:str = 'ok'` - spec := "result:str" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "ok" { - t.Fatalf("expected 'ok', got %q", events[1].Value) - } -} - -func TestProjectionSkipMultipleComplexFieldsCaptureMiddle(t *testing.T) { - // First and last fields are skipped; only the middle field is captured. - doc := `before:{items:[int]} = { - [1, 2, 3] -} -wanted:str = 'middle' -after: = < - 'a' ; (1, 2) - 'b' ; (3, 4) ->` - spec := "wanted:str" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "middle" { - t.Fatalf("expected 'middle', got %q", events[1].Value) - } -} - -func TestProjectionSkipTripleQuotedInsideNestedComposite(t *testing.T) { - // Triple-quoted string inside a nested composite being skipped. - doc := "data:{items:[str]} = {\n [\n '''\n multi-line with 'quotes' inside\n '''\n ]\n}\nwanted:int = 42" - spec := "wanted:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "42" { - t.Fatalf("expected '42', got %q", events[1].Value) - } -} - -func TestProjectionSkipCommentsWithDelimitersInNestedComposite(t *testing.T) { - // Comments containing delimiter chars inside nested composites. - doc := `data:{items:[{n:int}]} = { - [ - { - # comment: } ] > ) won't close anything - 42 - } - ] -} -wanted:int = 1` - spec := "wanted:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "1" { - t.Fatalf("expected '1', got %q", events[1].Value) - } -} - -// --------------------------------------------------------------------------- -// Nested same-type delimiters (covers depth++ in skipComposite/Inner) -// --------------------------------------------------------------------------- - -func TestProjectionSkipNestedSameTypeList(t *testing.T) { - // List of lists — skipComposite('[', ']') sees inner '[' → depth++. - doc := `data:[[int]] = [[1, 2], [3, 4]] -wanted:int = 1` - spec := "wanted:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "1" { - t.Fatalf("expected '1', got %q", events[1].Value) - } -} - -func TestProjectionSkipNestedSameTypeInInnerComposite(t *testing.T) { - // Struct containing list of lists — skipCompositeInner('[',']') - // sees another '[' → depth++ inside skipCompositeInner. - doc := `data:{matrix:[[int]]} = { - [[1, 2], [3, 4]] -} -wanted:int = 1` - spec := "wanted:int" - events := decodeAllWithSpec(t, doc, spec) - if len(events) != 3 { - t.Fatalf("expected 3 events, got %d: %v", len(events), events) - } - if events[1].Value != "1" { - t.Fatalf("expected '1', got %q", events[1].Value) - } -} - -// --------------------------------------------------------------------------- -// Error paths for skip functions (unterminated values) -// --------------------------------------------------------------------------- - -func TestProjectionSkipUnterminatedComposite(t *testing.T) { - doc := "data:[int] = [1, 2" - spec := "wanted:int" - err := decodeExpectErrorWithSpec(t, doc, spec) - if err == nil { - t.Fatal("expected error for unterminated composite") - } -} - -func TestProjectionSkipUnterminatedInnerComposite(t *testing.T) { - // Struct containing an unterminated list — triggers error return - // from skipCompositeInner propagated through skipComposite. - doc := "data:{items:[int]} = { [1, 2" - spec := "wanted:int" - err := decodeExpectErrorWithSpec(t, doc, spec) - if err == nil { - t.Fatal("expected error for unterminated inner composite") - } -} - -func TestProjectionSkipUnterminatedDeeplyNestedInner(t *testing.T) { - // Struct → list → struct (unterminated) — triggers error return - // from skipCompositeInner recursive call. - doc := "data:{items:[{n:int}]} = { [{ 42" - spec := "wanted:int" - err := decodeExpectErrorWithSpec(t, doc, spec) - if err == nil { - t.Fatal("expected error for unterminated deeply nested composite") - } -} - -func TestProjectionSkipUnterminatedString(t *testing.T) { - doc := "data:str = 'unterminated" - spec := "wanted:int" - err := decodeExpectErrorWithSpec(t, doc, spec) - if err == nil { - t.Fatal("expected error for unterminated string") - } -} - -func TestProjectionSkipUnterminatedStringEscape(t *testing.T) { - doc := "data:str = 'test\\" - spec := "wanted:int" - err := decodeExpectErrorWithSpec(t, doc, spec) - if err == nil { - t.Fatal("expected error for unterminated escape in string") - } -} - -func TestProjectionSkipUnterminatedTripleQuoted(t *testing.T) { - doc := "data:str = '''unterminated content" - spec := "wanted:int" - err := decodeExpectErrorWithSpec(t, doc, spec) - if err == nil { - t.Fatal("expected error for unterminated triple-quoted string") - } -} - -func TestProjectionSkipUnterminatedTripleQuotedEscape(t *testing.T) { - doc := "data:str = '''content\\" - spec := "wanted:int" - err := decodeExpectErrorWithSpec(t, doc, spec) - if err == nil { - t.Fatal("expected error for unterminated escape in triple-quoted string") - } -} - -func TestProjectionSkipUnterminatedStringInComposite(t *testing.T) { - // String error inside skipComposite — covers return err from skipString. - doc := "data:{msg:str} = { 'unterminated" - spec := "wanted:int" - err := decodeExpectErrorWithSpec(t, doc, spec) - if err == nil { - t.Fatal("expected error for unterminated string in composite") - } -} - -func TestProjectionSkipUnterminatedStringInInnerComposite(t *testing.T) { - // String error inside skipCompositeInner — covers return err from - // skipString within the inner composite path. - doc := "data:{items:[str]} = { ['unterminated" - spec := "wanted:int" - err := decodeExpectErrorWithSpec(t, doc, spec) - if err == nil { - t.Fatal("expected error for unterminated string in inner composite") - } -} diff --git a/encoding/tags.go b/encoding/tags.go index b831cfe..3515cff 100644 --- a/encoding/tags.go +++ b/encoding/tags.go @@ -146,15 +146,15 @@ func typeOfReflect(t reflect.Type, seen map[reflect.Type]bool) (Type, error) { } } -// StructFields returns the PAKT field mapping for a Go struct type. +// ReflectStructFields returns the PAKT field mapping for a Go struct type. // t must be a struct type (or pointer to struct); otherwise an error is returned. // Results are cached per type for subsequent calls. -func StructFields(t reflect.Type) ([]FieldInfo, error) { +func ReflectStructFields(t reflect.Type) ([]FieldInfo, error) { for t.Kind() == reflect.Pointer { t = t.Elem() } if t.Kind() != reflect.Struct { - return nil, fmt.Errorf("pakt: StructFields requires struct type, got %s", t.Kind()) + return nil, fmt.Errorf("pakt: ReflectStructFields requires struct type, got %s", t.Kind()) } info, err := cachedStructFields(t) if err != nil { diff --git a/encoding/tags_test.go b/encoding/tags_test.go index cffa2d7..50ebfd0 100644 --- a/encoding/tags_test.go +++ b/encoding/tags_test.go @@ -366,7 +366,7 @@ func TestStructFields_OmitEmpty(t *testing.T) { Name string `pakt:",omitempty"` Value int `pakt:"val,omitempty"` } - fields, err := StructFields(reflect.TypeOf(WithOmit{})) + fields, err := ReflectStructFields(reflect.TypeOf(WithOmit{})) if err != nil { t.Fatalf("StructFields: %v", err) } @@ -391,9 +391,9 @@ func TestStructFields_PointerToStruct(t *testing.T) { type S struct { X int } - fields, err := StructFields(reflect.TypeOf(&S{})) + fields, err := ReflectStructFields(reflect.TypeOf(&S{})) if err != nil { - t.Fatalf("StructFields(*S): %v", err) + t.Fatalf("ReflectStructFields(*S): %v", err) } if len(fields) != 1 { t.Fatalf("expected 1 field, got %d", len(fields)) @@ -404,9 +404,9 @@ func TestStructFields_PointerToStruct(t *testing.T) { } func TestStructFields_NonStruct(t *testing.T) { - _, err := StructFields(reflect.TypeOf("hello")) + _, err := ReflectStructFields(reflect.TypeOf("hello")) if err == nil { - t.Error("StructFields(string) should return error") + t.Error("ReflectStructFields(string) should return error") } } @@ -456,7 +456,7 @@ func TestStructFields_Index(t *testing.T) { B int C bool } - fields, err := StructFields(reflect.TypeOf(S{})) + fields, err := ReflectStructFields(reflect.TypeOf(S{})) if err != nil { t.Fatalf("StructFields: %v", err) } diff --git a/encoding/unit_reader.go b/encoding/unit_reader.go new file mode 100644 index 0000000..b4c5b3c --- /dev/null +++ b/encoding/unit_reader.go @@ -0,0 +1,225 @@ +package encoding + +import ( + "bytes" + "io" + "iter" +) + +// Property represents a top-level PAKT property header. +// It is valid only until the next call to [UnitReader.Properties] iteration +// or [UnitReader.Close]. +type Property struct { + Name string // property name (e.g., "server", "events") + Type Type // declared PAKT type annotation + Pos Pos // source position of the property + IsPack bool // true if << (pack statement) +} + +// UnitReader reads PAKT statements one at a time from a stream. +// It is the primary deserialization interface, wrapping a [Decoder] and +// providing statement-level navigation with iterator-based pack streaming. +type UnitReader struct { + dec *Decoder + opts *options + err error // first error encountered during iteration + current *Event // most recently yielded statement-start event, or nil + depth int // nesting depth within current statement (0 = at statement level) + inPack bool // true while iterating pack elements + pending *Event // one-event pushback for navigation helpers +} + +// NewUnitReader creates a UnitReader from any [io.Reader]. +func NewUnitReader(r io.Reader, opts ...Option) *UnitReader { + return &UnitReader{ + dec: NewDecoder(r), + opts: buildOptions(opts), + } +} + +// NewUnitReaderFromBytes creates a UnitReader from a byte slice. +func NewUnitReaderFromBytes(data []byte, opts ...Option) *UnitReader { + return NewUnitReader(bytes.NewReader(data), opts...) +} + +// Close releases all resources held by the UnitReader. +// It is safe to call Close multiple times. +func (sr *UnitReader) Close() { + if sr.dec != nil { + sr.dec.Close() + } +} + +// Err returns the first error encountered during iteration, or nil if +// iteration completed successfully or hasn't started. +func (sr *UnitReader) Err() error { + return sr.err +} + +// Properties returns an iterator over the top-level properties in the PAKT unit. +// Each [Property] is valid only for the current iteration step. +// +// On error, iteration stops. Call [UnitReader.Err] after the loop to +// check for errors. +// +// Within each iteration step, the caller should read the property's value +// using [ReadValue], [PackItems], or [UnitReader.Skip]. +// If the caller does not consume the property's value, Properties +// automatically skips to the next property. +func (sr *UnitReader) Properties() iter.Seq[Property] { + return func(yield func(Property) bool) { + for { + // If there's an unconsumed statement from the previous iteration, + // skip its remaining events. + if sr.current != nil { + if err := sr.skipCurrent(); err != nil { + sr.setErr(err) + return + } + } + + ev, err := sr.dec.Decode() + if err != nil { + if err != io.EOF { + sr.setErr(err) + } + return + } + + // We expect statement-start events at the top level. + switch ev.Kind { + case EventAssignStart, EventListPackStart, EventMapPackStart: + // Good — this is a statement header. + default: + sr.setErr(&DeserializeError{ + Pos: ev.Pos, + Message: "expected statement start event, got " + ev.Kind.String(), + }) + return + } + + sr.current = &ev + sr.depth = 0 + sr.inPack = ev.Kind.IsPackStart() + + var typ Type + if ev.Type != nil { + typ = *ev.Type + } + + stmt := Property{ + Name: ev.Name, + Type: typ, + Pos: ev.Pos, + IsPack: sr.inPack, + } + + if !yield(stmt) { + return + } + } + } +} + +// Skip advances past the current statement or pack element without +// deserializing. Use for unknown or unwanted statements. +func (sr *UnitReader) Skip() error { + return sr.skipCurrent() +} + +// skipCurrent consumes all remaining events for the current statement. +func (sr *UnitReader) skipCurrent() error { + if sr.current == nil { + return nil + } + + endKind := sr.endKindForCurrent() + + for { + ev, err := sr.dec.Decode() + if err != nil { + if err == io.EOF { + sr.current = nil + return nil + } + sr.current = nil + return err + } + + if ev.Kind == endKind && sr.depth == 0 { + sr.current = nil + return nil + } + + // Track nesting depth for composite values within the statement. + if ev.Kind.IsCompositeStart() || ev.Kind.IsPackStart() { + sr.depth++ + } else if ev.Kind.IsCompositeEnd() || ev.Kind.IsPackEnd() { + sr.depth-- + } + } +} + +// endKindForCurrent returns the EventKind that terminates the current statement. +func (sr *UnitReader) endKindForCurrent() EventKind { + if sr.current == nil { + return EventError + } + switch sr.current.Kind { + case EventAssignStart: + return EventAssignEnd + case EventListPackStart: + return EventListPackEnd + case EventMapPackStart: + return EventMapPackEnd + default: + return EventError + } +} + +// setErr records the first error. +func (sr *UnitReader) setErr(err error) { + if sr.err == nil { + sr.err = err + } +} + +// pushBack stores an event for the next nextEvent() call. +func (sr *UnitReader) pushBack(ev Event) { + sr.pending = &ev +} + +// nextEvent reads the next event from the decoder, tracking nesting depth. +// It returns io.EOF when the current statement/pack is exhausted. +// If a pending event was pushed back, it is returned first. +func (sr *UnitReader) nextEvent() (Event, error) { + var ev Event + var err error + + if sr.pending != nil { + ev = *sr.pending + sr.pending = nil + } else { + ev, err = sr.dec.Decode() + if err != nil { + return Event{}, err + } + } + + endKind := sr.endKindForCurrent() + + // Check for end of current statement. + if ev.Kind == endKind && sr.depth == 0 { + sr.current = nil + return Event{}, io.EOF + } + + // Track nesting depth for composite values within the statement. + if ev.Kind.IsCompositeStart() || ev.Kind.IsPackStart() { + sr.depth++ + } else if ev.Kind.IsCompositeEnd() || ev.Kind.IsPackEnd() { + sr.depth-- + } + + return ev, nil +} diff --git a/encoding/unit_reader_test.go b/encoding/unit_reader_test.go new file mode 100644 index 0000000..f224501 --- /dev/null +++ b/encoding/unit_reader_test.go @@ -0,0 +1,204 @@ +package encoding + +import ( + "strings" + "testing" +) + +func TestUnitReaderBasic(t *testing.T) { + input := "name:str = 'hello'\nport:int = 8080\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + var names []string + for stmt := range sr.Properties() { + names = append(names, stmt.Name) + if stmt.IsPack { + t.Errorf("unexpected pack statement: %s", stmt.Name) + } + // Skip the value (we're just testing navigation) + } + if err := sr.Err(); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(names) != 2 { + t.Fatalf("expected 2 statements, got %d", len(names)) + } + if names[0] != "name" || names[1] != "port" { + t.Errorf("expected [name, port], got %v", names) + } +} + +func TestUnitReaderPack(t *testing.T) { + input := "items:[int] <<\n1\n2\n3\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + var found bool + for stmt := range sr.Properties() { + if stmt.Name == "items" { + found = true + if !stmt.IsPack { + t.Error("expected pack statement") + } + if stmt.Type.List == nil { + t.Error("expected list type") + } + } + } + if err := sr.Err(); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !found { + t.Error("expected to find 'items' statement") + } +} + +func TestUnitReaderSkip(t *testing.T) { + input := "a:str = 'first'\nb:{x:int, y:int} = {1, 2}\nc:str = 'third'\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + var names []string + for stmt := range sr.Properties() { + names = append(names, stmt.Name) + // All properties are auto-skipped by Properties() iterator + } + if err := sr.Err(); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(names) != 3 { + t.Fatalf("expected 3 statements, got %d: %v", len(names), names) + } + if names[0] != "a" || names[1] != "b" || names[2] != "c" { + t.Errorf("expected [a, b, c], got %v", names) + } +} + +func TestUnitReaderEmpty(t *testing.T) { + sr := NewUnitReader(strings.NewReader("")) + defer sr.Close() + + count := 0 + for range sr.Properties() { + count++ + } + if err := sr.Err(); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if count != 0 { + t.Errorf("expected 0 statements, got %d", count) + } +} + +func TestUnitReaderMixed(t *testing.T) { + input := "name:str = 'svc'\nevents:[str] <<\n'a'\n'b'\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + var stmts []Property + for stmt := range sr.Properties() { + stmts = append(stmts, stmt) + } + if err := sr.Err(); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(stmts) != 2 { + t.Fatalf("expected 2 statements, got %d", len(stmts)) + } + if stmts[0].Name != "name" || stmts[0].IsPack { + t.Errorf("stmt 0: expected assign 'name', got %+v", stmts[0]) + } + if stmts[1].Name != "events" || !stmts[1].IsPack { + t.Errorf("stmt 1: expected pack 'events', got %+v", stmts[1]) + } +} + +func TestUnitReaderExplicitSkip(t *testing.T) { + input := "a:{x:int, y:int} = {1, 2}\nb:str = 'hello'\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + var bVal string + for stmt := range sr.Properties() { + switch stmt.Name { + case "a": + if err := sr.Skip(); err != nil { + t.Fatal(err) + } + case "b": + val, err := ReadValue[string](sr) + if err != nil { + t.Fatal(err) + } + bVal = val + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } + if bVal != "hello" { + t.Errorf("expected 'hello', got %q", bVal) + } +} + +func TestUnitReaderErrPropagation(t *testing.T) { + // Malformed input should surface via Err() + input := "name:str = 'unterminated\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + for range sr.Properties() { + _, _ = ReadValue[string](sr) + } + // We expect an error from the malformed string + if err := sr.Err(); err == nil { + // The parser may or may not error depending on the exact parse rules. + // Accept both outcomes but verify Err() is callable. + t.Log("no error from unterminated string (parser may accept)") + } +} + +func TestUnitReaderSkipPackStatement(t *testing.T) { + input := "items:[int] <<\n1\n2\n3\nname:str = 'after'\n" + sr := NewUnitReader(strings.NewReader(input)) + defer sr.Close() + + var name string + for stmt := range sr.Properties() { + switch stmt.Name { + case "items": + // Explicitly skip the pack + if err := sr.Skip(); err != nil { + t.Fatal(err) + } + case "name": + val, err := ReadValue[string](sr) + if err != nil { + t.Fatal(err) + } + name = val + } + } + if err := sr.Err(); err != nil { + t.Fatal(err) + } + if name != "after" { + t.Errorf("expected 'after', got %q", name) + } +} + +func TestUnitReaderErrOnMalformedInput(t *testing.T) { + // Trigger setErr path with malformed PAKT + sr := NewUnitReader(strings.NewReader("name:str\n")) + defer sr.Close() + for range sr.Properties() { + // malformed — no = or << + } + if err := sr.Err(); err == nil { + t.Error("expected error for malformed input") + } +} diff --git a/encoding/unmarshal.go b/encoding/unmarshal.go index 09de6ab..99a71f2 100644 --- a/encoding/unmarshal.go +++ b/encoding/unmarshal.go @@ -1,9 +1,7 @@ package encoding import ( - "encoding/hex" "fmt" - "io" "math" "reflect" "strconv" @@ -11,86 +9,6 @@ import ( "time" ) -// Unmarshal parses PAKT data and stores the result in the value pointed to by v. -// v must be a pointer to a struct. Each top-level PAKT statement is matched -// to struct fields by name (using pakt struct tags or lowercase field names). -// -// Unmarshal uses an optimized path that reads directly from the input byte slice -// without buffering, and populates struct fields via a visitor-driven parser that -// bypasses Event creation. For incremental use cases, prefer [Decoder.UnmarshalNext]. -func Unmarshal(data []byte, v any) error { - if v == nil { - return fmt.Errorf("pakt: Unmarshal requires a non-nil pointer") - } - rv := reflect.ValueOf(v) - if rv.Kind() != reflect.Pointer { - return fmt.Errorf("pakt: Unmarshal requires a pointer, got %s", rv.Type()) - } - if rv.IsNil() { - return fmt.Errorf("pakt: Unmarshal requires a non-nil pointer") - } - rv = rv.Elem() - if rv.Kind() != reflect.Struct { - return fmt.Errorf("pakt: Unmarshal requires a pointer to a struct, got pointer to %s", rv.Type()) - } - - info, err := cachedStructFields(rv.Type()) - if err != nil { - return err - } - - rd := newReaderFromBytes(data) - sm := newStateMachine(rd) - defer func() { - sm.release() - rd.release() - }() - - for { - rd.skipInsignificant(true) - if _, err := rd.peekByte(); err != nil { - if err == io.EOF { - return nil - } - return err - } - - h, err := sm.readStatementHeader() - if err != nil { - if err == io.EOF { - return nil - } - return err - } - - fi, ok := info.fieldMap[h.name] - if !ok { - if err := rd.skipStatementBody(h); err != nil { - return err - } - continue - } - - target := rv.Field(fi.Index) - if h.pack { - var serr error - if h.typ.List != nil { - serr = sm.unmarshalPackList(h.typ.List, target) - } else { - serr = sm.unmarshalPackMap(h.typ.Map, target) - } - if serr != nil { - return fmt.Errorf("pakt: field %q: %w", h.name, serr) - } - } else { - rd.skipWS() - if err := sm.unmarshalValue(h.typ, target); err != nil { - return fmt.Errorf("pakt: field %q: %w", h.name, err) - } - } - } -} - // setNil sets a value to its zero value, or nil for pointers/maps/slices. func setNil(target reflect.Value) error { if target.Kind() == reflect.Pointer || target.Kind() == reflect.Map || @@ -125,22 +43,6 @@ func setString(target reflect.Value, val string) error { return fmt.Errorf("cannot set string into %s", target.Type()) } -func setBin(target reflect.Value, raw string) error { - data, err := hex.DecodeString(raw) - if err != nil { - return fmt.Errorf("invalid bin value %q: %w", raw, err) - } - if target.Kind() == reflect.Slice && target.Type().Elem().Kind() == reflect.Uint8 { - target.SetBytes(data) - return nil - } - if target.Kind() == reflect.String { - target.SetString(string(data)) - return nil - } - return fmt.Errorf("cannot set bin into %s", target.Type()) -} - func setInt(target reflect.Value, raw string) error { n, err := parseIntLiteral(raw) if err != nil { @@ -168,7 +70,9 @@ func setInt(target reflect.Value, raw string) error { target.SetFloat(float64(n)) return nil case reflect.String: - target.SetString(raw) + // Clone to ensure the string is independently allocated + // (raw may be an unsafe view of borrowed bytes). + target.SetString(strings.Clone(raw)) return nil default: return fmt.Errorf("cannot set int into %s", target.Type()) @@ -214,7 +118,7 @@ func parseIntLiteral(raw string) (int64, error) { if val > math.MaxInt64+1 { return 0, fmt.Errorf("int literal %q overflows int64", raw) } - return -int64(val), nil + return -int64(val), nil //nolint:gosec // overflow checked: val <= MaxInt64+1 } if val > math.MaxInt64 { return 0, fmt.Errorf("int literal %q overflows int64", raw) @@ -225,7 +129,7 @@ func parseIntLiteral(raw string) (int64, error) { func setDec(target reflect.Value, raw string) error { switch target.Kind() { case reflect.String: - target.SetString(raw) + target.SetString(strings.Clone(raw)) return nil case reflect.Float32, reflect.Float64: s := raw @@ -255,7 +159,7 @@ func setTemporalString(target reflect.Value, raw string, kind reflect.Kind) erro } if kind == reflect.String { - target.SetString(raw) + target.SetString(strings.Clone(raw)) return nil } diff --git a/encoding/unmarshal_new.go b/encoding/unmarshal_new.go new file mode 100644 index 0000000..29437c2 --- /dev/null +++ b/encoding/unmarshal_new.go @@ -0,0 +1,204 @@ +package encoding + +import ( + "fmt" + "io" + "maps" + "reflect" + "slices" +) + +// UnmarshalNew deserializes a complete PAKT unit from bytes into a struct of type T. +// This is convenience sugar over [UnitReader]. +// +// T must be a struct type. Each top-level PAKT property is matched to struct +// fields by name (using pakt struct tags or lowercase field names). +func UnmarshalNew[T any](data []byte, opts ...Option) (T, error) { + var result T + if err := UnmarshalNewInto(data, &result, opts...); err != nil { + return result, err + } + return result, nil +} + +// UnmarshalNewFrom deserializes a complete PAKT unit from a reader into a struct of type T. +func UnmarshalNewFrom[T any](r io.Reader, opts ...Option) (T, error) { + var result T + rv := reflect.ValueOf(&result).Elem() + if rv.Kind() != reflect.Struct { + return result, fmt.Errorf("pakt: UnmarshalNewFrom requires a struct type, got %s", rv.Type()) + } + + sr := NewUnitReader(r, opts...) + defer sr.Close() + + if err := unmarshalIntoStruct(sr, rv); err != nil { + return result, err + } + return result, nil +} + +// UnmarshalNewInto deserializes a complete PAKT unit from bytes into an existing value. +// Useful when reusing buffers or populating embedded structs. +func UnmarshalNewInto[T any](data []byte, target *T, opts ...Option) error { + if target == nil { + return fmt.Errorf("pakt: UnmarshalNewInto requires a non-nil pointer") + } + rv := reflect.ValueOf(target).Elem() + if rv.Kind() != reflect.Struct { + return fmt.Errorf("pakt: UnmarshalNewInto requires a pointer to a struct, got pointer to %s", rv.Type()) + } + + sr := NewUnitReaderFromBytes(data, opts...) + defer sr.Close() + + return unmarshalIntoStruct(sr, rv) +} + +// unmarshalIntoStruct iterates properties and maps them to struct fields. +func unmarshalIntoStruct(sr *UnitReader, rv reflect.Value) error { + info, err := cachedStructFields(rv.Type()) + if err != nil { + return err + } + + seen := make(map[string]bool) + + for stmt := range sr.Properties() { + fi, ok := info.fieldMap[stmt.Name] + if !ok { + // Apply unknown field policy. + if sr.opts.unknownFields == ErrorUnknown { + return &DeserializeError{ + Pos: stmt.Pos, + Property: stmt.Name, + Message: fmt.Sprintf("unknown property %q", stmt.Name), + } + } + continue // auto-skipped by Properties iterator + } + + // Handle duplicates. + if seen[stmt.Name] { + switch sr.opts.duplicates { + case ErrorDupes: + return &DeserializeError{ + Pos: stmt.Pos, + Property: stmt.Name, + Message: fmt.Sprintf("duplicate property %q", stmt.Name), + } + case FirstWins: + continue // skip, auto-skipped by iterator + case LastWins: + // fall through — overwrite + case Accumulate: + return &DeserializeError{ + Pos: stmt.Pos, + Property: stmt.Name, + Message: "Accumulate duplicate policy is not yet implemented", + } + } + } + seen[stmt.Name] = true + + target := rv.Field(fi.Index) + if stmt.IsPack { + // For pack properties, collect all elements into the target. + if err := unmarshalPackIntoTarget(sr, stmt, target); err != nil { + return err + } + } else { + if err := readValueReflect(sr, target); err != nil { + return fmt.Errorf("pakt: field %q: %w", stmt.Name, err) + } + } + } + + if err := sr.Err(); err != nil { + return err + } + + // Check missing fields. + if sr.opts.missingFields == ErrorMissing { + for _, name := range slices.Sorted(maps.Keys(info.fieldMap)) { + if !seen[name] { + return &DeserializeError{ + Field: name, + Message: fmt.Sprintf("missing property for field %q", name), + } + } + } + } + + return nil +} + +// unmarshalPackIntoTarget reads all pack elements into a slice or map field. +func unmarshalPackIntoTarget(sr *UnitReader, stmt Property, target reflect.Value) error { + target = allocPtr(target) + + switch target.Kind() { + case reflect.Slice: + elemType := target.Type().Elem() + target.Set(reflect.MakeSlice(target.Type(), 0, 64)) + + for { + ev, err := sr.nextEvent() + if err != nil { + if err == io.EOF { + return nil + } + return err + } + + target.Grow(1) + target.SetLen(target.Len() + 1) + elem := target.Index(target.Len() - 1) + if elemType.Kind() == reflect.Ptr || elemType.Kind() == reflect.Map || elemType.Kind() == reflect.Slice { + elem.Set(reflect.New(elemType).Elem()) + } + elem = allocPtr(elem) + if err := handleValueEvent(sr, ev, elem); err != nil { + return fmt.Errorf("pakt: field %q: %w", stmt.Name, err) + } + } + + case reflect.Map: + if target.IsNil() { + target.Set(reflect.MakeMap(target.Type())) + } + keyType := target.Type().Key() + valType := target.Type().Elem() + + for { + // Read key + keyEv, err := sr.nextEvent() + if err != nil { + if err == io.EOF { + return nil + } + return err + } + + key := reflect.New(keyType).Elem() + if err := handleValueEvent(sr, keyEv, key); err != nil { + return fmt.Errorf("pakt: field %q key: %w", stmt.Name, err) + } + + // Read value + valEv, err := sr.nextEvent() + if err != nil { + return fmt.Errorf("pakt: field %q value: %w", stmt.Name, err) + } + val := reflect.New(valType).Elem() + if err := handleValueEvent(sr, valEv, val); err != nil { + return fmt.Errorf("pakt: field %q value: %w", stmt.Name, err) + } + + target.SetMapIndex(key, val) + } + + default: + return fmt.Errorf("pakt: field %q: cannot unmarshal pack into %s (need slice or map)", stmt.Name, target.Type()) + } +} diff --git a/encoding/unmarshal_new_test.go b/encoding/unmarshal_new_test.go new file mode 100644 index 0000000..647a3bf --- /dev/null +++ b/encoding/unmarshal_new_test.go @@ -0,0 +1,251 @@ +package encoding + +import ( + "strings" + "testing" + "time" +) + +func TestUnmarshalNewBasic(t *testing.T) { + type Config struct { + Host string `pakt:"host"` + Port int64 `pakt:"port"` + Debug bool `pakt:"debug"` + } + + data := []byte("host:str = 'localhost'\nport:int = 8080\ndebug:bool = true\n") + cfg, err := UnmarshalNew[Config](data) + if err != nil { + t.Fatal(err) + } + if cfg.Host != "localhost" || cfg.Port != 8080 || cfg.Debug != true { + t.Errorf("unexpected: %+v", cfg) + } +} + +func TestUnmarshalNewNested(t *testing.T) { + type Server struct { + Host string `pakt:"host"` + Port int64 `pakt:"port"` + } + type Config struct { + Name string `pakt:"name"` + Server Server `pakt:"server"` + } + + data := []byte("name:str = 'myapp'\nserver:{host:str, port:int} = {'example.com', 443}\n") + cfg, err := UnmarshalNew[Config](data) + if err != nil { + t.Fatal(err) + } + if cfg.Name != "myapp" || cfg.Server.Host != "example.com" || cfg.Server.Port != 443 { + t.Errorf("unexpected: %+v", cfg) + } +} + +func TestUnmarshalNewList(t *testing.T) { + type Config struct { + Tags []string `pakt:"tags"` + } + + data := []byte("tags:[str] = ['alpha', 'beta']\n") + cfg, err := UnmarshalNew[Config](data) + if err != nil { + t.Fatal(err) + } + if len(cfg.Tags) != 2 || cfg.Tags[0] != "alpha" || cfg.Tags[1] != "beta" { + t.Errorf("unexpected: %+v", cfg) + } +} + +func TestUnmarshalNewMap(t *testing.T) { + type Config struct { + Headers map[string]string `pakt:"headers"` + } + + data := []byte("headers: = <'X-Foo' ; 'bar'>\n") + cfg, err := UnmarshalNew[Config](data) + if err != nil { + t.Fatal(err) + } + if cfg.Headers["X-Foo"] != "bar" { + t.Errorf("unexpected: %+v", cfg) + } +} + +func TestUnmarshalNewNullable(t *testing.T) { + type Config struct { + Label *string `pakt:"label"` + Count *int64 `pakt:"count"` + } + + data := []byte("label:str? = nil\ncount:int? = 42\n") + cfg, err := UnmarshalNew[Config](data) + if err != nil { + t.Fatal(err) + } + if cfg.Label != nil { + t.Errorf("expected nil label, got %q", *cfg.Label) + } + if cfg.Count == nil || *cfg.Count != 42 { + t.Errorf("expected count=42, got %v", cfg.Count) + } +} + +func TestUnmarshalNewTimestamp(t *testing.T) { + type Config struct { + Created time.Time `pakt:"created"` + } + + data := []byte("created:ts = 2026-06-01T14:30:00Z\n") + cfg, err := UnmarshalNew[Config](data) + if err != nil { + t.Fatal(err) + } + if cfg.Created.Year() != 2026 || cfg.Created.Month() != 6 { + t.Errorf("unexpected: %v", cfg.Created) + } +} + +func TestUnmarshalNewUnknownFields(t *testing.T) { + type Config struct { + Name string `pakt:"name"` + } + + data := []byte("name:str = 'svc'\nextra:int = 42\n") + + // Default: skip unknown + cfg, err := UnmarshalNew[Config](data) + if err != nil { + t.Fatal(err) + } + if cfg.Name != "svc" { + t.Errorf("unexpected: %+v", cfg) + } + + // Strict: error on unknown + _, err = UnmarshalNew[Config](data, UnknownFields(ErrorUnknown)) + if err == nil { + t.Error("expected error for unknown field 'extra'") + } +} + +func TestUnmarshalNewPack(t *testing.T) { + type Entry struct { + Name string `pakt:"name"` + Size int64 `pakt:"size"` + } + type Doc struct { + Files []Entry `pakt:"files"` + } + + data := []byte("files:[{name:str, size:int}] <<\n{'readme.md', 100}\n{'main.go', 500}\n") + doc, err := UnmarshalNew[Doc](data) + if err != nil { + t.Fatal(err) + } + if len(doc.Files) != 2 { + t.Fatalf("expected 2 files, got %d", len(doc.Files)) + } + if doc.Files[0].Name != "readme.md" || doc.Files[0].Size != 100 { + t.Errorf("file 0: %+v", doc.Files[0]) + } +} + +func TestUnmarshalNewDuplicateError(t *testing.T) { + type Config struct { + Name string `pakt:"name"` + } + + data := []byte("name:str = 'first'\nname:str = 'second'\n") + _, err := UnmarshalNew[Config](data, Duplicates(ErrorDupes)) + if err == nil { + t.Error("expected error for duplicate 'name'") + } +} + +func TestUnmarshalNewFrom(t *testing.T) { + type Config struct { + Host string `pakt:"host"` + Port int64 `pakt:"port"` + } + + r := strings.NewReader("host:str = 'example.com'\nport:int = 443\n") + cfg, err := UnmarshalNewFrom[Config](r) + if err != nil { + t.Fatal(err) + } + if cfg.Host != "example.com" || cfg.Port != 443 { + t.Errorf("unexpected: %+v", cfg) + } +} + +func TestUnmarshalNewMissingFieldsError(t *testing.T) { + type Config struct { + Host string `pakt:"host"` + Port int64 `pakt:"port"` + } + + data := []byte("host:str = 'localhost'\n") // missing 'port' + _, err := UnmarshalNew[Config](data, MissingFields(ErrorMissing)) + if err == nil { + t.Error("expected error for missing field 'port'") + } +} + +func TestUnmarshalNewMissingFieldsZero(t *testing.T) { + type Config struct { + Host string `pakt:"host"` + Port int64 `pakt:"port"` + } + + data := []byte("host:str = 'localhost'\n") // missing 'port' + cfg, err := UnmarshalNew[Config](data, MissingFields(ZeroMissing)) + if err != nil { + t.Fatal(err) + } + if cfg.Host != "localhost" { + t.Errorf("unexpected host: %q", cfg.Host) + } + if cfg.Port != 0 { + t.Errorf("expected port=0, got %d", cfg.Port) + } +} + +func TestUnmarshalNewDuplicateFirstWins(t *testing.T) { + type Config struct { + Name string `pakt:"name"` + } + + data := []byte("name:str = 'first'\nname:str = 'second'\n") + cfg, err := UnmarshalNew[Config](data, Duplicates(FirstWins)) + if err != nil { + t.Fatal(err) + } + if cfg.Name != "first" { + t.Errorf("expected 'first' (FirstWins), got %q", cfg.Name) + } +} + +func TestUnmarshalNewDuplicateLastWins(t *testing.T) { + type Config struct { + Name string `pakt:"name"` + } + + data := []byte("name:str = 'first'\nname:str = 'second'\n") + cfg, err := UnmarshalNew[Config](data, Duplicates(LastWins)) + if err != nil { + t.Fatal(err) + } + if cfg.Name != "second" { + t.Errorf("expected 'second' (LastWins), got %q", cfg.Name) + } +} + +func TestUnmarshalNewFromNonStruct(t *testing.T) { + r := strings.NewReader("x:int = 1\n") + _, err := UnmarshalNewFrom[int](r) + if err == nil { + t.Error("expected error for non-struct type") + } +} diff --git a/encoding/unmarshal_next_test.go b/encoding/unmarshal_next_test.go deleted file mode 100644 index 593e345..0000000 --- a/encoding/unmarshal_next_test.go +++ /dev/null @@ -1,407 +0,0 @@ -package encoding - -import ( - "bytes" - "io" - "strings" - "testing" -) - -func TestUnmarshalNextBasicAssignment(t *testing.T) { - doc := "name:str = 'hello'\ncount:int = 42\n" - dec := NewDecoder(strings.NewReader(doc)) - defer dec.Close() - - type Doc struct { - Name string `pakt:"name"` - Count int `pakt:"count"` - } - - var d Doc - for dec.More() { - if err := dec.UnmarshalNext(&d); err != nil { - t.Fatal(err) - } - } - - if d.Name != "hello" { - t.Errorf("Name = %q, want %q", d.Name, "hello") - } - if d.Count != 42 { - t.Errorf("Count = %d, want %d", d.Count, 42) - } -} - -func TestUnmarshalNextPackList(t *testing.T) { - doc := "items:[int] <<\n1\n2\n3\n" - dec := NewDecoder(strings.NewReader(doc)) - defer dec.Close() - - type Doc struct { - Items []int `pakt:"items"` - } - - var d Doc - for dec.More() { - if err := dec.UnmarshalNext(&d); err != nil { - t.Fatal(err) - } - } - - if len(d.Items) != 3 { - t.Fatalf("Items length = %d, want 3", len(d.Items)) - } - if d.Items[0] != 1 || d.Items[1] != 2 || d.Items[2] != 3 { - t.Errorf("Items = %v, want [1, 2, 3]", d.Items) - } -} - -func TestUnmarshalNextPackStruct(t *testing.T) { - doc := `root:str = '/data' -entries:[{name:str, size:int}] << - {'file1.txt', 100} - {'file2.txt', 200} -` - dec := NewDecoder(strings.NewReader(doc)) - defer dec.Close() - - type Entry struct { - Name string `pakt:"name"` - Size int `pakt:"size"` - } - type Doc struct { - Root string `pakt:"root"` - Entries []Entry `pakt:"entries"` - } - - var d Doc - for dec.More() { - if err := dec.UnmarshalNext(&d); err != nil { - t.Fatal(err) - } - } - - if d.Root != "/data" { - t.Errorf("Root = %q, want %q", d.Root, "/data") - } - if len(d.Entries) != 2 { - t.Fatalf("Entries length = %d, want 2", len(d.Entries)) - } - if d.Entries[0].Name != "file1.txt" || d.Entries[0].Size != 100 { - t.Errorf("Entries[0] = %+v", d.Entries[0]) - } -} - -func TestUnmarshalNextPackElementByElement(t *testing.T) { - doc := "items:[int] <<\n10\n20\n30\n" - dec := NewDecoder(strings.NewReader(doc)) - defer dec.Close() - - var items []int - for dec.More() { - var item int - if err := dec.UnmarshalNext(&item); err != nil { - t.Fatal(err) - } - items = append(items, int(item)) - } - - if len(items) != 3 { - t.Fatalf("items length = %d, want 3", len(items)) - } - if items[0] != 10 || items[1] != 20 || items[2] != 30 { - t.Errorf("items = %v, want [10, 20, 30]", items) - } -} - -func TestUnmarshalNextPackElementThenAssign(t *testing.T) { - doc := "nums:[int] <<\n1\n2\nname:str = 'after'\n" - dec := NewDecoder(strings.NewReader(doc)) - defer dec.Close() - - var nums []int - for dec.More() { - var n int - if err := dec.UnmarshalNext(&n); err != nil { - if err == io.EOF { - break - } - t.Fatal(err) - } - nums = append(nums, int(n)) - // After reading pack elements, check if more pack elements - // or if the next statement has started. - if !dec.More() { - break - } - } - - if len(nums) != 2 { - t.Fatalf("nums = %v, want [1, 2]", nums) - } - - // Now read the assignment after the pack. - type Doc struct { - Name string `pakt:"name"` - } - var d Doc - for dec.More() { - if err := dec.UnmarshalNext(&d); err != nil { - t.Fatal(err) - } - } - if d.Name != "after" { - t.Errorf("Name = %q, want %q", d.Name, "after") - } -} - -func TestUnmarshalNextSkipsUnknownFields(t *testing.T) { - doc := "extra:str = 'skip me'\nname:str = 'keep'\n" - dec := NewDecoder(strings.NewReader(doc)) - defer dec.Close() - - type Doc struct { - Name string `pakt:"name"` - } - - var d Doc - for dec.More() { - if err := dec.UnmarshalNext(&d); err != nil { - t.Fatal(err) - } - } - - if d.Name != "keep" { - t.Errorf("Name = %q, want %q", d.Name, "keep") - } -} - -func TestUnmarshalNextEOF(t *testing.T) { - dec := NewDecoder(strings.NewReader("")) - defer dec.Close() - - if dec.More() { - t.Error("More() = true on empty input") - } - - type Doc struct{} - var d Doc - err := dec.UnmarshalNext(&d) - if err != io.EOF { - t.Errorf("expected io.EOF, got %v", err) - } -} - -func TestUnmarshalNextNilPointerError(t *testing.T) { - dec := NewDecoder(strings.NewReader("x:int = 1\n")) - defer dec.Close() - - err := dec.UnmarshalNext(nil) - if err == nil { - t.Error("expected error for nil argument") - } -} - -func TestUnmarshalNextWithSpec(t *testing.T) { - doc := "name:str = 'hello'\nextra:int = 99\ncount:int = 42\n" - dec := NewDecoder(strings.NewReader(doc)) - defer dec.Close() - - spec := "name:str\ncount:int" - if err := dec.SetSpec(strings.NewReader(spec)); err != nil { - t.Fatal(err) - } - - type Doc struct { - Name string `pakt:"name"` - Count int `pakt:"count"` - } - - var d Doc - for dec.More() { - if err := dec.UnmarshalNext(&d); err != nil { - t.Fatal(err) - } - } - - if d.Name != "hello" { - t.Errorf("Name = %q, want %q", d.Name, "hello") - } - if d.Count != 42 { - t.Errorf("Count = %d, want %d", d.Count, 42) - } -} - -func TestUnmarshalNextList(t *testing.T) { - doc := "tags:[str] = ['alpha', 'beta', 'gamma']\n" - dec := NewDecoder(strings.NewReader(doc)) - defer dec.Close() - - type Doc struct { - Tags []string `pakt:"tags"` - } - - var d Doc - for dec.More() { - if err := dec.UnmarshalNext(&d); err != nil { - t.Fatal(err) - } - } - - if len(d.Tags) != 3 || d.Tags[0] != "alpha" { - t.Errorf("Tags = %v", d.Tags) - } -} - -func TestUnmarshalNextMap(t *testing.T) { - doc := "data: = <'a' ; 1, 'b' ; 2>\n" - dec := NewDecoder(strings.NewReader(doc)) - defer dec.Close() - - type Doc struct { - Data map[string]int `pakt:"data"` - } - - var d Doc - for dec.More() { - if err := dec.UnmarshalNext(&d); err != nil { - t.Fatal(err) - } - } - - if len(d.Data) != 2 || d.Data["a"] != 1 || d.Data["b"] != 2 { - t.Errorf("Data = %v", d.Data) - } -} - -func TestUnmarshalNextBoolAndFloat(t *testing.T) { - doc := "active:bool = true\nrate:float = 3.14e0\n" - dec := NewDecoder(strings.NewReader(doc)) - defer dec.Close() - - type Doc struct { - Active bool `pakt:"active"` - Rate float64 `pakt:"rate"` - } - - var d Doc - for dec.More() { - if err := dec.UnmarshalNext(&d); err != nil { - t.Fatal(err) - } - } - - if !d.Active { - t.Error("Active = false, want true") - } - if d.Rate < 3.13 || d.Rate > 3.15 { - t.Errorf("Rate = %f, want ~3.14", d.Rate) - } -} - -func TestUnmarshalNextNullable(t *testing.T) { - doc := "name:str? = nil\n" - dec := NewDecoder(strings.NewReader(doc)) - defer dec.Close() - - type Doc struct { - Name *string `pakt:"name"` - } - - var d Doc - for dec.More() { - if err := dec.UnmarshalNext(&d); err != nil { - t.Fatal(err) - } - } - - if d.Name != nil { - t.Errorf("Name = %v, want nil", d.Name) - } -} - -func TestDecoderCloseIdempotent(t *testing.T) { - dec := NewDecoder(bytes.NewReader(nil)) - dec.Close() - dec.Close() // second close should not panic -} - -func TestUnmarshalNextNestedStruct(t *testing.T) { - doc := "config:{host:str, port:int} = {'localhost', 8080}\n" - dec := NewDecoder(strings.NewReader(doc)) - defer dec.Close() - - type Config struct { - Host string `pakt:"host"` - Port int `pakt:"port"` - } - type Doc struct { - Config Config `pakt:"config"` - } - - var d Doc - for dec.More() { - if err := dec.UnmarshalNext(&d); err != nil { - t.Fatal(err) - } - } - - if d.Config.Host != "localhost" || d.Config.Port != 8080 { - t.Errorf("Config = %+v", d.Config) - } -} - -func TestUnmarshalNextStructIntoMap(t *testing.T) { - doc := "meta:{author:str, version:int} = {'alice', 3}\n" - - type Doc struct { - Meta map[string]string `pakt:"meta"` - } - - var d Doc - if err := Unmarshal([]byte(doc), &d); err != nil { - t.Fatal(err) - } - - if d.Meta["author"] != "alice" { - t.Errorf("Meta[author] = %q, want %q", d.Meta["author"], "alice") - } -} - -func TestUnmarshalNextTuple(t *testing.T) { - doc := "pair:(str, int) = ('hello', 42)\n" - - type Doc struct { - Pair []string `pakt:"pair"` - } - - var d Doc - if err := Unmarshal([]byte(doc), &d); err != nil { - t.Fatal(err) - } - - if len(d.Pair) != 2 || d.Pair[0] != "hello" { - t.Errorf("Pair = %v", d.Pair) - } -} - -func TestUnmarshalNextTs(t *testing.T) { - doc := "ts:ts = 2026-01-15T10:30:00Z\n" - dec := NewDecoder(strings.NewReader(doc)) - defer dec.Close() - - type Doc struct { - Ts string `pakt:"ts"` - } - - var d Doc - for dec.More() { - if err := dec.UnmarshalNext(&d); err != nil { - t.Fatal(err) - } - } - - if d.Ts != "2026-01-15T10:30:00Z" { - t.Errorf("Ts = %q", d.Ts) - } -} diff --git a/encoding/unmarshal_test.go b/encoding/unmarshal_test.go deleted file mode 100644 index c46c7f6..0000000 --- a/encoding/unmarshal_test.go +++ /dev/null @@ -1,642 +0,0 @@ -package encoding - -import ( - "bytes" - "reflect" - "testing" - "time" -) - -// --------------------------------------------------------------------------- -// Test structs -// --------------------------------------------------------------------------- - -type simpleScalar struct { - Host string `pakt:"host"` -} - -type multiField struct { - Host string `pakt:"host"` - Port int64 `pakt:"port"` -} - -type allScalars struct { - Name string `pakt:"name"` - Age int64 `pakt:"age"` - Price string `pakt:"price"` - Rate float64 `pakt:"rate"` - Active bool `pakt:"active"` - ID string `pakt:"id"` - Born string `pakt:"born"` - Created string `pakt:"created"` -} - -type withNestedStruct struct { - Server struct { - Host string `pakt:"host"` - Port int64 `pakt:"port"` - } `pakt:"server"` -} - -type withList struct { - Tags []string `pakt:"tags"` -} - -type withMap struct { - Headers map[string]string `pakt:"headers"` -} - -type withBytes struct { - Data []byte `pakt:"data"` -} - -type withPointer struct { - Name *string `pakt:"name"` - Age *int64 `pakt:"age"` -} - -type withTimeFields struct { - Created time.Time `pakt:"created"` -} - -type innerStruct struct { - Host string `pakt:"host"` - Port int64 `pakt:"port"` -} - -type outerWithInner struct { - Server innerStruct `pakt:"server"` -} - -type withIntList struct { - Ports []int64 `pakt:"ports"` -} - -type nestedListOfStructs struct { - Servers []innerStruct `pakt:"servers"` -} - -// --------------------------------------------------------------------------- -// Test: Simple scalar -// --------------------------------------------------------------------------- - -func TestUnmarshalSimpleScalar(t *testing.T) { - data := []byte(`host:str = 'localhost'`) - var v simpleScalar - if err := Unmarshal(data, &v); err != nil { - t.Fatal(err) - } - if v.Host != "localhost" { - t.Errorf("got Host=%q, want %q", v.Host, "localhost") - } -} - -// --------------------------------------------------------------------------- -// Test: Multiple assignments -// --------------------------------------------------------------------------- - -func TestUnmarshalMultipleAssignments(t *testing.T) { - data := []byte("host:str = 'example.com'\nport:int = 8080") - var v multiField - if err := Unmarshal(data, &v); err != nil { - t.Fatal(err) - } - if v.Host != "example.com" { - t.Errorf("got Host=%q, want %q", v.Host, "example.com") - } - if v.Port != 8080 { - t.Errorf("got Port=%d, want %d", v.Port, 8080) - } -} - -// --------------------------------------------------------------------------- -// Test: All scalar types -// --------------------------------------------------------------------------- - -func TestUnmarshalAllScalarTypes(t *testing.T) { - data := []byte(`name:str = 'Alice' -age:int = 30 -price:dec = 19.99 -rate:float = 1.5e+2 -active:bool = true -id:uuid = 550e8400-e29b-41d4-a716-446655440000 -born:date = 2000-01-15 -created:ts = 2024-06-01T12:00:00Z`) - - var v allScalars - if err := Unmarshal(data, &v); err != nil { - t.Fatal(err) - } - - checks := []struct { - name string - got any - want any - }{ - {"Name", v.Name, "Alice"}, - {"Age", v.Age, int64(30)}, - {"Price", v.Price, "19.99"}, - {"Rate", v.Rate, 150.0}, - {"Active", v.Active, true}, - {"ID", v.ID, "550e8400-e29b-41d4-a716-446655440000"}, - {"Born", v.Born, "2000-01-15"}, - {"Created", v.Created, "2024-06-01T12:00:00Z"}, - } - for _, c := range checks { - if !reflect.DeepEqual(c.got, c.want) { - t.Errorf("%s: got %v (%T), want %v (%T)", c.name, c.got, c.got, c.want, c.want) - } - } -} - -// --------------------------------------------------------------------------- -// Test: Struct value → nested Go struct -// --------------------------------------------------------------------------- - -func TestUnmarshalStructValue(t *testing.T) { - data := []byte("server:{host:str, port:int} = {'localhost', 8080}") - var v withNestedStruct - if err := Unmarshal(data, &v); err != nil { - t.Fatal(err) - } - if v.Server.Host != "localhost" { - t.Errorf("got Host=%q, want %q", v.Server.Host, "localhost") - } - if v.Server.Port != 8080 { - t.Errorf("got Port=%d, want %d", v.Server.Port, 8080) - } -} - -func TestUnmarshalNamedStructField(t *testing.T) { - data := []byte("server:{host:str, port:int} = {'example.com', 443}") - var v outerWithInner - if err := Unmarshal(data, &v); err != nil { - t.Fatal(err) - } - if v.Server.Host != "example.com" { - t.Errorf("got Host=%q, want %q", v.Server.Host, "example.com") - } - if v.Server.Port != 443 { - t.Errorf("got Port=%d, want %d", v.Server.Port, 443) - } -} - -// --------------------------------------------------------------------------- -// Test: List value → slice -// --------------------------------------------------------------------------- - -func TestUnmarshalListValue(t *testing.T) { - data := []byte("tags:[str] = ['alpha', 'beta', 'gamma']") - var v withList - if err := Unmarshal(data, &v); err != nil { - t.Fatal(err) - } - want := []string{"alpha", "beta", "gamma"} - if !reflect.DeepEqual(v.Tags, want) { - t.Errorf("got Tags=%v, want %v", v.Tags, want) - } -} - -func TestUnmarshalIntList(t *testing.T) { - data := []byte("ports:[int] = [80, 443, 8080]") - var v withIntList - if err := Unmarshal(data, &v); err != nil { - t.Fatal(err) - } - want := []int64{80, 443, 8080} - if !reflect.DeepEqual(v.Ports, want) { - t.Errorf("got Ports=%v, want %v", v.Ports, want) - } -} - -// --------------------------------------------------------------------------- -// Test: Map value → Go map -// --------------------------------------------------------------------------- - -func TestUnmarshalMapValue(t *testing.T) { - data := []byte("headers: = <'Content-Type' ; 'application/json', 'Accept' ; 'text/html'>") - var v withMap - if err := Unmarshal(data, &v); err != nil { - t.Fatal(err) - } - if v.Headers["Content-Type"] != "application/json" { - t.Errorf("got Content-Type=%q", v.Headers["Content-Type"]) - } - if v.Headers["Accept"] != "text/html" { - t.Errorf("got Accept=%q", v.Headers["Accept"]) - } -} - -func TestUnmarshalBinValue(t *testing.T) { - data := []byte("data:bin = b'SGVsbG8='") - var v withBytes - if err := Unmarshal(data, &v); err != nil { - t.Fatal(err) - } - if !reflect.DeepEqual(v.Data, []byte("Hello")) { - t.Fatalf("got %v, want %v", v.Data, []byte("Hello")) - } -} - -// --------------------------------------------------------------------------- -// Test: Nullable/pointer -// --------------------------------------------------------------------------- - -func TestUnmarshalPointerNonNil(t *testing.T) { - data := []byte("name:str? = 'hello'\nage:int? = 42") - var v withPointer - if err := Unmarshal(data, &v); err != nil { - t.Fatal(err) - } - if v.Name == nil || *v.Name != "hello" { - t.Errorf("got Name=%v, want 'hello'", v.Name) - } - if v.Age == nil || *v.Age != 42 { - t.Errorf("got Age=%v, want 42", v.Age) - } -} - -func TestUnmarshalPointerNil(t *testing.T) { - data := []byte("name:str? = nil\nage:int? = nil") - var v withPointer - if err := Unmarshal(data, &v); err != nil { - t.Fatal(err) - } - if v.Name != nil { - t.Errorf("got Name=%v, want nil", v.Name) - } - if v.Age != nil { - t.Errorf("got Age=%v, want nil", v.Age) - } -} - -// --------------------------------------------------------------------------- -// Test: Unknown fields → ignored -// --------------------------------------------------------------------------- - -func TestUnmarshalUnknownFields(t *testing.T) { - data := []byte("host:str = 'x'\nunknown_field:int = 99") - var v simpleScalar - if err := Unmarshal(data, &v); err != nil { - t.Fatal(err) - } - if v.Host != "x" { - t.Errorf("got Host=%q, want %q", v.Host, "x") - } -} - -// --------------------------------------------------------------------------- -// Test: Missing fields → zero value -// --------------------------------------------------------------------------- - -func TestUnmarshalMissingFields(t *testing.T) { - data := []byte("host:str = 'only-host'") - var v multiField - if err := Unmarshal(data, &v); err != nil { - t.Fatal(err) - } - if v.Host != "only-host" { - t.Errorf("got Host=%q, want %q", v.Host, "only-host") - } - if v.Port != 0 { - t.Errorf("got Port=%d, want 0", v.Port) - } -} - -// --------------------------------------------------------------------------- -// Test: Nested composites — struct with list of structs -// --------------------------------------------------------------------------- - -func TestUnmarshalNestedComposites(t *testing.T) { - data := []byte("servers:[{host:str, port:int}] = [{'web1', 80}, {'web2', 443}]") - var v nestedListOfStructs - if err := Unmarshal(data, &v); err != nil { - t.Fatal(err) - } - if len(v.Servers) != 2 { - t.Fatalf("got %d servers, want 2", len(v.Servers)) - } - if v.Servers[0].Host != "web1" || v.Servers[0].Port != 80 { - t.Errorf("servers[0] = %+v", v.Servers[0]) - } - if v.Servers[1].Host != "web2" || v.Servers[1].Port != 443 { - t.Errorf("servers[1] = %+v", v.Servers[1]) - } -} - -// --------------------------------------------------------------------------- -// Test: time.Time parsing -// --------------------------------------------------------------------------- - -func TestUnmarshalTimeTime(t *testing.T) { - data := []byte("created:ts = 2024-06-01T12:00:00Z") - var v withTimeFields - if err := Unmarshal(data, &v); err != nil { - t.Fatal(err) - } - want := time.Date(2024, 6, 1, 12, 0, 0, 0, time.UTC) - if !v.Created.Equal(want) { - t.Errorf("got Created=%v, want %v", v.Created, want) - } -} - -// --------------------------------------------------------------------------- -// Test: Round-trip (Encode → Unmarshal) -// --------------------------------------------------------------------------- - -func TestUnmarshalRoundTrip(t *testing.T) { - type Config struct { - Host string `pakt:"host"` - Port int64 `pakt:"port"` - Debug bool `pakt:"debug"` - Rate float64 `pakt:"rate"` - } - - original := Config{ - Host: "example.com", - Port: 8080, - Debug: true, - Rate: 9.5e+1, - } - - // Encode each field. - var buf bytes.Buffer - enc := NewEncoder(&buf) - - fields, err := StructFields(reflect.TypeOf(original)) - if err != nil { - t.Fatal(err) - } - rv := reflect.ValueOf(original) - for _, fi := range fields { - if err := enc.Encode(fi.Name, fi.Type, rv.Field(fi.Index).Interface()); err != nil { - t.Fatalf("encode %s: %v", fi.Name, err) - } - } - - // Unmarshal back. - var decoded Config - if err := Unmarshal(buf.Bytes(), &decoded); err != nil { - t.Fatalf("unmarshal: %v\npakt data:\n%s", err, buf.String()) - } - - if decoded.Host != original.Host { - t.Errorf("Host: got %q, want %q", decoded.Host, original.Host) - } - if decoded.Port != original.Port { - t.Errorf("Port: got %d, want %d", decoded.Port, original.Port) - } - if decoded.Debug != original.Debug { - t.Errorf("Debug: got %v, want %v", decoded.Debug, original.Debug) - } - if decoded.Rate != original.Rate { - t.Errorf("Rate: got %v, want %v", decoded.Rate, original.Rate) - } -} - -func TestUnmarshalRoundTripList(t *testing.T) { - type Doc struct { - Tags []string `pakt:"tags"` - } - - original := Doc{Tags: []string{"a", "b", "c"}} - - var buf bytes.Buffer - enc := NewEncoder(&buf) - fields, err := StructFields(reflect.TypeOf(original)) - if err != nil { - t.Fatal(err) - } - rv := reflect.ValueOf(original) - for _, fi := range fields { - if err := enc.Encode(fi.Name, fi.Type, rv.Field(fi.Index).Interface()); err != nil { - t.Fatal(err) - } - } - - var decoded Doc - if err := Unmarshal(buf.Bytes(), &decoded); err != nil { - t.Fatalf("unmarshal: %v\npakt:\n%s", err, buf.String()) - } - if !reflect.DeepEqual(decoded.Tags, original.Tags) { - t.Errorf("Tags: got %v, want %v", decoded.Tags, original.Tags) - } -} - -func TestUnmarshalRoundTripStruct(t *testing.T) { - type Inner struct { - X int64 `pakt:"x"` - Y string `pakt:"y"` - } - type Doc struct { - Data Inner `pakt:"data"` - } - - original := Doc{Data: Inner{X: 42, Y: "hello"}} - - var buf bytes.Buffer - enc := NewEncoder(&buf) - fields, err := StructFields(reflect.TypeOf(original)) - if err != nil { - t.Fatal(err) - } - rv := reflect.ValueOf(original) - for _, fi := range fields { - if err := enc.Encode(fi.Name, fi.Type, rv.Field(fi.Index).Interface()); err != nil { - t.Fatal(err) - } - } - - var decoded Doc - if err := Unmarshal(buf.Bytes(), &decoded); err != nil { - t.Fatalf("unmarshal: %v\npakt:\n%s", err, buf.String()) - } - if decoded.Data != original.Data { - t.Errorf("Data: got %+v, want %+v", decoded.Data, original.Data) - } -} - -// --------------------------------------------------------------------------- -// Test: Error cases -// --------------------------------------------------------------------------- - -func TestUnmarshalErrors(t *testing.T) { - t.Run("non-pointer", func(t *testing.T) { - var v simpleScalar - err := Unmarshal([]byte("host:str = 'x'"), v) - if err == nil { - t.Fatal("expected error for non-pointer") - } - }) - - t.Run("pointer-to-non-struct", func(t *testing.T) { - var s string - err := Unmarshal([]byte("host:str = 'x'"), &s) - if err == nil { - t.Fatal("expected error for pointer-to-string") - } - }) - - t.Run("nil-pointer", func(t *testing.T) { - err := Unmarshal([]byte("host:str = 'x'"), nil) - if err == nil { - t.Fatal("expected error for nil pointer") - } - }) - - t.Run("type-mismatch-bool-into-string", func(t *testing.T) { - type S struct { - Active bool `pakt:"active"` - } - // Valid PAKT but Active is bool, receiving str value — this would actually - // be a parse error from the decoder since the type annotation says str but - // the value is 'hello'. Let's try bool into int. - data := []byte("active:bool = true") - var v S - err := Unmarshal(data, &v) - // This should succeed since the types match. - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if !v.Active { - t.Error("expected Active=true") - } - }) - - t.Run("invalid-pakt", func(t *testing.T) { - type S struct { - Host string `pakt:"host"` - } - err := Unmarshal([]byte("this is not valid pakt"), &S{}) - if err == nil { - t.Fatal("expected error for invalid PAKT") - } - }) -} - -// --------------------------------------------------------------------------- -// Test: Int formats (hex, binary, octal, underscore) -// --------------------------------------------------------------------------- - -func TestUnmarshalIntFormats(t *testing.T) { - type S struct { - Val int64 `pakt:"val"` - } - - tests := []struct { - pakt string - want int64 - }{ - {"val:int = 42", 42}, - {"val:int = -10", -10}, - {"val:int = 0xFF", 255}, - {"val:int = 0b1010", 10}, - {"val:int = 0o77", 63}, - {"val:int = 1_000", 1000}, - } - - for _, tc := range tests { - var v S - if err := Unmarshal([]byte(tc.pakt), &v); err != nil { - t.Errorf("Unmarshal(%q): %v", tc.pakt, err) - continue - } - if v.Val != tc.want { - t.Errorf("Unmarshal(%q): got %d, want %d", tc.pakt, v.Val, tc.want) - } - } -} - -// --------------------------------------------------------------------------- -// Test: Empty list and map -// --------------------------------------------------------------------------- - -func TestUnmarshalEmptyList(t *testing.T) { - data := []byte("tags:[str] = []") - var v withList - if err := Unmarshal(data, &v); err != nil { - t.Fatal(err) - } - if v.Tags == nil || len(v.Tags) != 0 { - t.Errorf("got Tags=%v, want empty slice", v.Tags) - } -} - -func TestUnmarshalEmptyMap(t *testing.T) { - data := []byte("headers: = <>") - var v withMap - if err := Unmarshal(data, &v); err != nil { - t.Fatal(err) - } - if v.Headers == nil || len(v.Headers) != 0 { - t.Errorf("got Headers=%v, want empty map", v.Headers) - } -} - -// --------------------------------------------------------------------------- -// Test: Dec into float64 -// --------------------------------------------------------------------------- - -func TestUnmarshalDecIntoFloat(t *testing.T) { - type S struct { - Price float64 `pakt:"price"` - } - data := []byte("price:dec = 19.99") - var v S - if err := Unmarshal(data, &v); err != nil { - t.Fatal(err) - } - if v.Price != 19.99 { - t.Errorf("got Price=%v, want 19.99", v.Price) - } -} - -// --------------------------------------------------------------------------- -// Test: Lowercase field name fallback (no pakt tag) -// --------------------------------------------------------------------------- - -func TestUnmarshalLowercaseFieldName(t *testing.T) { - type S struct { - Hostname string - } - data := []byte("hostname:str = 'myhost'") - var v S - if err := Unmarshal(data, &v); err != nil { - t.Fatal(err) - } - if v.Hostname != "myhost" { - t.Errorf("got Hostname=%q, want %q", v.Hostname, "myhost") - } -} - -// --------------------------------------------------------------------------- -// Test: Int into uint -// --------------------------------------------------------------------------- - -func TestUnmarshalIntIntoUint(t *testing.T) { - type S struct { - Port uint16 `pakt:"port"` - } - data := []byte("port:int = 8080") - var v S - if err := Unmarshal(data, &v); err != nil { - t.Fatal(err) - } - if v.Port != 8080 { - t.Errorf("got Port=%d, want 8080", v.Port) - } -} - -func TestUnmarshalLeadingDotDecimal(t *testing.T) { - type S struct { - Price string `pakt:"price"` - } - data := []byte("price:dec = .99") - var v S - if err := Unmarshal(data, &v); err != nil { - t.Fatal(err) - } - if v.Price != ".99" { - t.Errorf("got Price=%q, want %q", v.Price, ".99") - } -} diff --git a/encoding/unmarshal_visitor.go b/encoding/unmarshal_visitor.go deleted file mode 100644 index 2df3dbd..0000000 --- a/encoding/unmarshal_visitor.go +++ /dev/null @@ -1,599 +0,0 @@ -package encoding - -import ( - "fmt" - "io" - "reflect" -) - -// unmarshalValue reads the next value from the reader using the given type -// information and writes it directly into target, bypassing Event creation. -func (sm *stateMachine) unmarshalValue(typ Type, target reflect.Value) error { - sm.r.skipWS() - - // Handle nullable types. - if typ.Nullable { - if sm.r.peekNil() { - return sm.r.readNilInto(target) - } - } else if sm.r.peekNil() { - return sm.r.wrapf(ErrNilNonNullable, "nil value for non-nullable type %s", typ.String()) - } - - switch { - case typ.Scalar != nil: - return sm.r.readScalarInto(*typ.Scalar, target) - - case typ.AtomSet != nil: - return sm.r.readAtomInto(typ.AtomSet.Members, target) - - case typ.Struct != nil: - return sm.unmarshalStruct(typ.Struct, target) - - case typ.Tuple != nil: - return sm.unmarshalTuple(typ.Tuple, target) - - case typ.List != nil: - return sm.unmarshalList(typ.List, target) - - case typ.Map != nil: - return sm.unmarshalMap(typ.Map, target) - - default: - return sm.r.errorf("unknown type: no type variant set") - } -} - -// unmarshalStruct reads { value, value, ... } into target using positional -// field matching from the StructType definition. -func (sm *stateMachine) unmarshalStruct(st *StructType, target reflect.Value) error { - sm.r.skipWS() - if err := sm.r.expectByte('{'); err != nil { - return err - } - - target = allocPtr(target) - - if target.Kind() == reflect.Map { - return sm.unmarshalStructIntoMap(st, target) - } - - if target.Kind() != reflect.Struct { - return fmt.Errorf("cannot unmarshal struct into %s", target.Type()) - } - - info, err := cachedStructFields(target.Type()) - if err != nil { - return err - } - - for i, field := range st.Fields { - if i == 0 { - sm.r.skipInsignificant(true) - } - - b, err := sm.r.peekByte() - if err != nil { - return sm.r.wrapf(ErrUnexpectedEOF, "unterminated struct value") - } - if b == '}' { - return sm.r.errorf("too few values in struct: expected %d fields, got %d", - len(st.Fields), i) - } - - fi, ok := info.fieldMap[field.Name] - if ok { - if err := sm.unmarshalValue(field.Type, target.Field(fi.Index)); err != nil { - return fmt.Errorf("field %q: %w", field.Name, err) - } - } else { - // Skip unknown field — read and discard value. - if _, _, err := sm.skipTypedValue(field.Type); err != nil { - return err - } - } - - if i < len(st.Fields)-1 { - sep, err := sm.r.readSep() - if err != nil { - return err - } - if !sep { - sm.r.skipWS() - b, err := sm.r.peekByte() - if err != nil { - return sm.r.wrapf(ErrUnexpectedEOF, "unterminated struct value") - } - if b == '}' { - return sm.r.errorf("too few values in struct: expected %d fields, got %d", - len(st.Fields), i+1) - } - return sm.r.errorf("expected separator between struct fields") - } - } - } - - // Consume optional trailing separator and closing brace. - sm.r.readSep() //nolint:errcheck - sm.r.skipInsignificant(true) - return sm.r.expectByte('}') -} - -// unmarshalStructIntoMap reads a PAKT struct into a Go map[string]T. -func (sm *stateMachine) unmarshalStructIntoMap(st *StructType, target reflect.Value) error { - if target.IsNil() { - target.Set(reflect.MakeMap(target.Type())) - } - valType := target.Type().Elem() - - for i, field := range st.Fields { - if i == 0 { - sm.r.skipInsignificant(true) - } - - val := reflect.New(valType).Elem() - if err := sm.unmarshalValue(field.Type, val); err != nil { - return fmt.Errorf("map key %q: %w", field.Name, err) - } - target.SetMapIndex(reflect.ValueOf(field.Name), val) - - if i < len(st.Fields)-1 { - sep, err := sm.r.readSep() - if err != nil { - return err - } - if !sep { - return sm.r.errorf("expected separator between struct fields") - } - } - } - - sm.r.readSep() //nolint:errcheck - sm.r.skipInsignificant(true) - return sm.r.expectByte('}') -} - -// unmarshalTuple reads ( value, value, ... ) into target. -func (sm *stateMachine) unmarshalTuple(tt *TupleType, target reflect.Value) error { - sm.r.skipWS() - if err := sm.r.expectByte('('); err != nil { - return err - } - - target = allocPtr(target) - if target.Kind() != reflect.Slice { - return fmt.Errorf("cannot unmarshal tuple into %s", target.Type()) - } - - elemType := target.Type().Elem() - target.Set(reflect.MakeSlice(target.Type(), 0, len(tt.Elements))) - - for i, elemTyp := range tt.Elements { - if i == 0 { - sm.r.skipInsignificant(true) - } - - target.Grow(1) - target.SetLen(target.Len() + 1) - elem := target.Index(target.Len() - 1) - if elemType.Kind() == reflect.Ptr || elemType.Kind() == reflect.Map || elemType.Kind() == reflect.Slice { - elem.Set(reflect.New(elemType).Elem()) - } - - if err := sm.unmarshalValue(elemTyp, elem); err != nil { - return err - } - - if i < len(tt.Elements)-1 { - sep, err := sm.r.readSep() - if err != nil { - return err - } - if !sep { - return sm.r.errorf("expected separator between tuple elements") - } - } - } - - sm.r.readSep() //nolint:errcheck - sm.r.skipInsignificant(true) - return sm.r.expectByte(')') -} - -// unmarshalList reads [ value, value, ... ] into target. -func (sm *stateMachine) unmarshalList(lt *ListType, target reflect.Value) error { - sm.r.skipWS() - if err := sm.r.expectByte('['); err != nil { - return err - } - - target = allocPtr(target) - if target.Kind() != reflect.Slice { - return fmt.Errorf("cannot unmarshal list into %s", target.Type()) - } - - elemType := target.Type().Elem() - target.Set(reflect.MakeSlice(target.Type(), 0, 8)) - - sm.r.skipInsignificant(true) - b, err := sm.r.peekByte() - if err != nil { - return sm.r.wrapf(ErrUnexpectedEOF, "unterminated list value") - } - if b == ']' { - sm.r.readByte() //nolint:errcheck - return nil - } - - for { - target.Grow(1) - target.SetLen(target.Len() + 1) - elem := target.Index(target.Len() - 1) - if elemType.Kind() == reflect.Ptr || elemType.Kind() == reflect.Map || elemType.Kind() == reflect.Slice { - elem.Set(reflect.New(elemType).Elem()) - } - - if err := sm.unmarshalValue(lt.Element, elem); err != nil { - return err - } - - sep, err := sm.r.readSep() - if err != nil { - return err - } - if !sep { - sm.r.skipWS() - b, err := sm.r.peekByte() - if err != nil { - return sm.r.wrapf(ErrUnexpectedEOF, "unterminated list value") - } - if b != ']' { - return sm.r.errorf("expected ',' or ']' in list, got %q", rune(b)) - } - sm.r.readByte() //nolint:errcheck - return nil - } - - sm.r.skipInsignificant(true) - if b, err := sm.r.peekByte(); err == nil && b == ']' { - sm.r.readByte() //nolint:errcheck - return nil - } - } -} - -// unmarshalMap reads < key ; value, ... > into target. -func (sm *stateMachine) unmarshalMap(mt *MapType, target reflect.Value) error { - sm.r.skipWS() - if err := sm.r.expectByte('<'); err != nil { - return err - } - - target = allocPtr(target) - if target.Kind() != reflect.Map { - return fmt.Errorf("cannot unmarshal map into %s", target.Type()) - } - - if target.IsNil() { - target.Set(reflect.MakeMap(target.Type())) - } - - keyType := target.Type().Key() - valType := target.Type().Elem() - - sm.r.skipInsignificant(true) - b, err := sm.r.peekByte() - if err != nil { - return sm.r.wrapf(ErrUnexpectedEOF, "unterminated map value") - } - if b == '>' { - sm.r.readByte() //nolint:errcheck - return nil - } - - for { - key := reflect.New(keyType).Elem() - if err := sm.unmarshalValue(mt.Key, key); err != nil { - return fmt.Errorf("map key: %w", err) - } - - sm.r.skipWS() - if err := sm.r.expectByte(';'); err != nil { - return err - } - sm.r.skipWS() - - val := reflect.New(valType).Elem() - if err := sm.unmarshalValue(mt.Value, val); err != nil { - return fmt.Errorf("map value: %w", err) - } - - target.SetMapIndex(key, val) - - sep, err := sm.r.readSep() - if err != nil { - return err - } - if !sep { - sm.r.skipWS() - b, err := sm.r.peekByte() - if err != nil { - return sm.r.wrapf(ErrUnexpectedEOF, "unterminated map value") - } - if b != '>' { - return sm.r.errorf("expected ',' or '>' in map, got %q", rune(b)) - } - sm.r.readByte() //nolint:errcheck - return nil - } - - sm.r.skipInsignificant(true) - if b, err := sm.r.peekByte(); err == nil && b == '>' { - sm.r.readByte() //nolint:errcheck - return nil - } - } -} - -// unmarshalPackList reads pack list elements (<<) into target. -func (sm *stateMachine) unmarshalPackList(lt *ListType, target reflect.Value) error { - target = allocPtr(target) - if target.Kind() != reflect.Slice { - return fmt.Errorf("cannot unmarshal list pack into %s", target.Type()) - } - - elemType := target.Type().Elem() - target.Set(reflect.MakeSlice(target.Type(), 0, 64)) - - for { - sm.r.skipInsignificant(true) - b, err := sm.r.peekByte() - if err != nil { - if err == io.EOF { - return nil - } - return err - } - if !sm.r.canStartValueInPack(b) { - return nil - } - - target.Grow(1) - target.SetLen(target.Len() + 1) - elem := target.Index(target.Len() - 1) - if elemType.Kind() == reflect.Ptr || elemType.Kind() == reflect.Map || elemType.Kind() == reflect.Slice { - elem.Set(reflect.New(elemType).Elem()) - } - - if err := sm.unmarshalValue(lt.Element, elem); err != nil { - return err - } - - sep, err := sm.r.readSep() - if err != nil { - return err - } - if !sep { - sm.r.skipInsignificant(true) - b, err := sm.r.peekByte() - if err != nil { - if err == io.EOF { - return nil - } - return err - } - if !sm.r.canStartValueInPack(b) { - return nil - } - return sm.r.errorf("expected separator between pack items") - } - } -} - -// unmarshalPackMap reads pack map entries (<<) into target. -func (sm *stateMachine) unmarshalPackMap(mt *MapType, target reflect.Value) error { - target = allocPtr(target) - if target.Kind() != reflect.Map { - return fmt.Errorf("cannot unmarshal map pack into %s", target.Type()) - } - - if target.IsNil() { - target.Set(reflect.MakeMap(target.Type())) - } - - keyType := target.Type().Key() - valType := target.Type().Elem() - - for { - sm.r.skipInsignificant(true) - b, err := sm.r.peekByte() - if err != nil { - if err == io.EOF { - return nil - } - return err - } - if !sm.r.canStartValueInPack(b) { - return nil - } - - key := reflect.New(keyType).Elem() - if err := sm.unmarshalValue(mt.Key, key); err != nil { - return fmt.Errorf("pack map key: %w", err) - } - - sm.r.skipWS() - if err := sm.r.expectByte(';'); err != nil { - return err - } - sm.r.skipWS() - - val := reflect.New(valType).Elem() - if err := sm.unmarshalValue(mt.Value, val); err != nil { - return fmt.Errorf("pack map value: %w", err) - } - - target.SetMapIndex(key, val) - - sep, err := sm.r.readSep() - if err != nil { - return err - } - if !sep { - sm.r.skipInsignificant(true) - b, err := sm.r.peekByte() - if err != nil { - if err == io.EOF { - return nil - } - return err - } - if !sm.r.canStartValueInPack(b) { - return nil - } - return sm.r.errorf("expected separator between pack map entries") - } - } -} - -// skipTypedValue reads and discards a value of the given type. -func (sm *stateMachine) skipTypedValue(typ Type) (string, Pos, error) { - sm.r.skipWS() - - if typ.Nullable && sm.r.peekNil() { - pos := sm.r.pos - if err := sm.r.readNil(); err != nil { - return "", pos, err - } - return "nil", pos, nil - } - - switch { - case typ.Scalar != nil: - return sm.r.readScalarDirect(*typ.Scalar) - case typ.AtomSet != nil: - pos := sm.r.pos - val, err := sm.r.readAtom(typ.AtomSet.Members) - return val, pos, err - case typ.Struct != nil: - return "", sm.r.pos, sm.skipStruct(typ.Struct) - case typ.Tuple != nil: - return "", sm.r.pos, sm.skipTuple(typ.Tuple) - case typ.List != nil: - return "", sm.r.pos, sm.skipList(typ.List) - case typ.Map != nil: - return "", sm.r.pos, sm.skipMap(typ.Map) - default: - return "", sm.r.pos, sm.r.errorf("unknown type in skip") - } -} - -func (sm *stateMachine) skipStruct(st *StructType) error { - sm.r.skipWS() - if err := sm.r.expectByte('{'); err != nil { - return err - } - for i, field := range st.Fields { - if i == 0 { - sm.r.skipInsignificant(true) - } - if _, _, err := sm.skipTypedValue(field.Type); err != nil { - return err - } - if i < len(st.Fields)-1 { - sm.r.readSep() //nolint:errcheck - } - } - sm.r.readSep() //nolint:errcheck - sm.r.skipInsignificant(true) - return sm.r.expectByte('}') -} - -func (sm *stateMachine) skipTuple(tt *TupleType) error { - sm.r.skipWS() - if err := sm.r.expectByte('('); err != nil { - return err - } - for i, elemTyp := range tt.Elements { - if i == 0 { - sm.r.skipInsignificant(true) - } - if _, _, err := sm.skipTypedValue(elemTyp); err != nil { - return err - } - if i < len(tt.Elements)-1 { - sm.r.readSep() //nolint:errcheck - } - } - sm.r.readSep() //nolint:errcheck - sm.r.skipInsignificant(true) - return sm.r.expectByte(')') -} - -func (sm *stateMachine) skipList(lt *ListType) error { - sm.r.skipWS() - if err := sm.r.expectByte('['); err != nil { - return err - } - sm.r.skipInsignificant(true) - if b, err := sm.r.peekByte(); err == nil && b == ']' { - sm.r.readByte() //nolint:errcheck - return nil - } - for { - if _, _, err := sm.skipTypedValue(lt.Element); err != nil { - return err - } - sep, err := sm.r.readSep() - if err != nil { - return err - } - if !sep { - sm.r.skipWS() - return sm.r.expectByte(']') - } - sm.r.skipInsignificant(true) - if b, err := sm.r.peekByte(); err == nil && b == ']' { - sm.r.readByte() //nolint:errcheck - return nil - } - } -} - -func (sm *stateMachine) skipMap(mt *MapType) error { - sm.r.skipWS() - if err := sm.r.expectByte('<'); err != nil { - return err - } - sm.r.skipInsignificant(true) - if b, err := sm.r.peekByte(); err == nil && b == '>' { - sm.r.readByte() //nolint:errcheck - return nil - } - for { - if _, _, err := sm.skipTypedValue(mt.Key); err != nil { - return err - } - sm.r.skipWS() - if err := sm.r.expectByte(';'); err != nil { - return err - } - sm.r.skipWS() - if _, _, err := sm.skipTypedValue(mt.Value); err != nil { - return err - } - sep, err := sm.r.readSep() - if err != nil { - return err - } - if !sep { - sm.r.skipWS() - return sm.r.expectByte('>') - } - sm.r.skipInsignificant(true) - if b, err := sm.r.peekByte(); err == nil && b == '>' { - sm.r.readByte() //nolint:errcheck - return nil - } - } -} diff --git a/site/content/docs/install.md b/site/content/docs/install.md index 2c7532c..c063d54 100644 --- a/site/content/docs/install.md +++ b/site/content/docs/install.md @@ -20,34 +20,108 @@ Add the encoding package to your Go project: go get github.com/trippwill/pakt/encoding ``` -### Usage +### Streaming (recommended) + +Process PAKT data one property at a time with constant memory: ```go package main import ( - "os" "fmt" - "io" + "os" + "github.com/trippwill/pakt/encoding" ) +type Config struct { + Host string `pakt:"host"` + Port int64 `pakt:"port"` +} + +type LogEvent struct { + Timestamp string `pakt:"ts"` + Level string `pakt:"level"` + Message string `pakt:"msg"` +} + func main() { - f, _ := os.Open("data.pakt") + f, err := os.Open("data.pakt") + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } defer f.Close() - dec := encoding.NewDecoder(f) - for { - evt, err := dec.Decode() - if err == io.EOF { - break + ur := encoding.NewUnitReader(f) + defer ur.Close() + + for prop := range ur.Properties() { + switch prop.Name { + case "config": + cfg, err := encoding.ReadValue[Config](ur) + if err != nil { + fmt.Fprintln(os.Stderr, err) + return + } + fmt.Printf("Server: %s:%d\n", cfg.Host, cfg.Port) + + case "events": + // Stream pack elements one at a time + for event := range encoding.PackItems[LogEvent](ur) { + fmt.Printf("[%s] %s: %s\n", event.Timestamp, event.Level, event.Message) + } } - if err != nil { - fmt.Fprintln(os.Stderr, err) - return - } - fmt.Println(evt) } + if err := ur.Err(); err != nil { + fmt.Fprintln(os.Stderr, err) + } +} +``` + +### Quick unmarshal + +Deserialize an entire PAKT unit into a struct: + +```go +type AppConfig struct { + Name string `pakt:"name"` + Port int64 `pakt:"port"` + Debug bool `pakt:"debug"` + Tags []string `pakt:"tags"` +} + +cfg, err := encoding.UnmarshalNew[AppConfig](data) +``` + +### Event-level decode + +For custom processing, use the low-level event decoder: + +```go +import ( + "fmt" + "io" + "os" + + "github.com/trippwill/pakt/encoding" +) + +// ... + +dec := encoding.NewDecoder(f) +defer dec.Close() + +for { + evt, err := dec.Decode() + if err == io.EOF { + break + } + if err != nil { + fmt.Fprintln(os.Stderr, err) + return + } + fmt.Println(evt) } ``` diff --git a/site/layouts/index.html b/site/layouts/index.html index 69a7d6f..3b572b2 100644 --- a/site/layouts/index.html +++ b/site/layouts/index.html @@ -12,10 +12,10 @@

PAactive:bool = true server:{host:str, port:int} = { 'localhost', 8080 } -# Pack events — no delimiters needed -events:[{ts:ts, level:str}] << - { '2026-06-01T14:30:00Z', 'info' } - { '2026-06-01T14:31:00Z', 'warn' } +# Stream events with << — one per line +events:[{ts:ts, level:|info, warn, error|}] << + { 2026-06-01T14:30:00Z, |info } + { 2026-06-01T14:31:00Z, |warn }
Read the Docs @@ -124,7 +124,7 @@

Parse

3

Consume

-

Unmarshal into typed structs, process events one by one, or pack values into streaming output. Your data, your way.

+

Iterate properties with UnitReader, stream pack elements with PackItems[T], or unmarshal an entire unit with UnmarshalNew[T].

@@ -145,11 +145,11 @@

Pick your ecosystem

Go

-

Streaming state-machine decoder, encoder, reflection-based marshal/unmarshal. Zero-allocation hot path.

+

Streaming UnitReader with iter.Seq iterators, generic ReadValue[T], pack streaming, custom converters.

go install github.com/trippwill/pakt@latest
- Streaming decoder - Marshal/Unmarshal + UnitReader + PackItems + UnmarshalNew[T] CLI tool
Get started → diff --git a/spec/pakt-v0.md b/spec/pakt-v0.md index 8b9cd65..c0987a8 100644 --- a/spec/pakt-v0.md +++ b/spec/pakt-v0.md @@ -553,15 +553,14 @@ Each error MUST include: ### 11.2 Normative Error Categories -Codes 1–99 are reserved for the spec. Implementations MUST support at least the active categories below (those with an identifier) and MUST allow callers to distinguish them programmatically (via sentinel errors, error codes, typed exceptions, or equivalent). Reserved slots are not active categories and impose no implementation requirement. +Codes 1–99 are reserved for the spec. Implementations MUST support at least the categories below and MUST allow callers to distinguish them programmatically (via sentinel errors, error codes, typed exceptions, or equivalent). | Code | Identifier | Condition | |------|-----------|-----------| | 1 | `unexpected_eof` | Input ends before a syntactic construct is complete | -| 2 | *(reserved)* | *(formerly `duplicate_name`; removed — see §6.1)* | -| 3 | `type_mismatch` | A value does not conform to its declared type | -| 4 | `nil_non_nullable` | `nil` appears where the type is not nullable | -| 5 | `syntax` | Any lexical or grammatical error not covered by a more specific category | +| 2 | `type_mismatch` | A value does not conform to its declared type | +| 3 | `nil_non_nullable` | `nil` appears where the type is not nullable | +| 4 | `syntax` | Any lexical or grammatical error not covered by a more specific category | ### 11.3 Extensibility