diff --git a/go.mod b/go.mod index 5cb84fd3..ac2cb5dd 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.24.0 require ( github.com/google/go-cmp v0.7.0 github.com/ladzaretti/migrate v0.1.7 - github.com/pelletier/go-toml/v2 v2.3.1 + github.com/pelletier/go-toml/v2 v2.4.0 github.com/spf13/cobra v1.10.2 github.com/spf13/pflag v1.0.10 golang.org/x/crypto v0.47.0 diff --git a/go.sum b/go.sum index f765870e..4fbc4180 100644 --- a/go.sum +++ b/go.sum @@ -93,8 +93,8 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040= github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M= -github.com/pelletier/go-toml/v2 v2.3.1 h1:MYEvvGnQjeNkRF1qUuGolNtNExTDwct51yp7olPtrEc= -github.com/pelletier/go-toml/v2 v2.3.1/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= +github.com/pelletier/go-toml/v2 v2.4.0 h1:Mwu0mAkUKbittDs3/ADDWXqMmq3EOK2VHiuCkV00Row= +github.com/pelletier/go-toml/v2 v2.4.0/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= diff --git a/vendor/github.com/pelletier/go-toml/v2/.goreleaser.yaml b/vendor/github.com/pelletier/go-toml/v2/.goreleaser.yaml index 3e19ea71..3208f721 100644 --- a/vendor/github.com/pelletier/go-toml/v2/.goreleaser.yaml +++ b/vendor/github.com/pelletier/go-toml/v2/.goreleaser.yaml @@ -74,39 +74,43 @@ universal_binaries: name_template: jsontoml archives: - id: jsontoml - format: tar.xz - builds: + formats: + - tar.xz + ids: - jsontoml files: - none* name_template: "{{ .Binary }}_{{.Version}}_{{ .Os }}_{{ .Arch }}" - id: tomljson - format: tar.xz - builds: + formats: + - tar.xz + ids: - tomljson files: - none* name_template: "{{ .Binary }}_{{.Version}}_{{ .Os }}_{{ .Arch }}" - id: tomll - format: tar.xz - builds: + formats: + - tar.xz + ids: - tomll files: - none* name_template: "{{ .Binary }}_{{.Version}}_{{ .Os }}_{{ .Arch }}" -dockers: +dockers_v2: - id: tools - goos: linux - goarch: amd64 ids: - jsontoml - tomljson - tomll - image_templates: - - "ghcr.io/pelletier/go-toml:latest" - - "ghcr.io/pelletier/go-toml:{{ .Tag }}" - - "ghcr.io/pelletier/go-toml:v{{ .Major }}" - skip_push: false + images: + - "ghcr.io/pelletier/go-toml" + tags: + - "latest" + - "{{ .Tag }}" + - "v{{ .Major }}" + platforms: + - linux/amd64 checksum: name_template: 'sha256sums.txt' snapshot: diff --git a/vendor/github.com/pelletier/go-toml/v2/AGENTS.md b/vendor/github.com/pelletier/go-toml/v2/AGENTS.md index dafe44d7..f495afd8 100644 --- a/vendor/github.com/pelletier/go-toml/v2/AGENTS.md +++ b/vendor/github.com/pelletier/go-toml/v2/AGENTS.md @@ -53,6 +53,14 @@ go-toml is a TOML library for Go. The goal is to provide an easy-to-use and effi - Commit messages must explain **why** the change is needed - Keep messages clear and informative even if details are in the PR description +### Capabilities + +go-toml tracks system-level capabilities using [capslock](https://github.com/google/capslock). The baseline is in `capability_baseline.txt` and CI enforces that it does not grow. + +- **Do not introduce new capabilities.** PRs that increase the capability set (e.g., adding network access, subprocess execution, syscalls) are unlikely to be accepted. +- If a change causes the capabilities check to fail, do not update the baseline to make it pass. Instead, rethink the approach to avoid requiring new capabilities. +- To check locally: `./caps.sh check` (requires `capslock` installed via `go install github.com/google/capslock/cmd/capslock@latest`) + ## Pull Request Checklist Before submitting: @@ -61,4 +69,5 @@ Before submitting: 2. No backward-incompatible changes (unless discussed) 3. Relevant documentation added/updated 4. No performance regression (verify with benchmarks) -5. Title is clear and understandable for changelog +5. Capabilities are not increasing (`./caps.sh check`) +6. Title is clear and understandable for changelog diff --git a/vendor/github.com/pelletier/go-toml/v2/CONTRIBUTING.md b/vendor/github.com/pelletier/go-toml/v2/CONTRIBUTING.md index 28b88ec3..65a3ff49 100644 --- a/vendor/github.com/pelletier/go-toml/v2/CONTRIBUTING.md +++ b/vendor/github.com/pelletier/go-toml/v2/CONTRIBUTING.md @@ -180,6 +180,25 @@ description. Pull requests that lower performance will receive more scrutiny. [benchstat]: https://pkg.go.dev/golang.org/x/perf/cmd/benchstat +### Capabilities + +We use [capslock](https://github.com/google/capslock) to track what +system-level capabilities (file access, network, syscalls, etc.) each package +requires. The current baseline is in `capability_baseline.txt`. CI will fail if +a change introduces a new capability. + +**Pull requests that increase the set of capabilities are unlikely to be +accepted.** go-toml is a parsing library and should not need network access, +subprocess execution, or other capabilities beyond what it already uses. + +If you believe a new capability is genuinely needed, discuss it in an issue +first. To update the baseline after approval: + +```bash +go install github.com/google/capslock/cmd/capslock@latest +./caps.sh generate +``` + ### Style Try to look around and follow the same format and structure as the rest of the diff --git a/vendor/github.com/pelletier/go-toml/v2/Dockerfile b/vendor/github.com/pelletier/go-toml/v2/Dockerfile index b9e93323..ebd9baba 100644 --- a/vendor/github.com/pelletier/go-toml/v2/Dockerfile +++ b/vendor/github.com/pelletier/go-toml/v2/Dockerfile @@ -1,5 +1,6 @@ FROM scratch ENV PATH "$PATH:/bin" -COPY tomll /bin/tomll -COPY tomljson /bin/tomljson -COPY jsontoml /bin/jsontoml +ARG TARGETPLATFORM +COPY $TARGETPLATFORM/tomll /bin/tomll +COPY $TARGETPLATFORM/tomljson /bin/tomljson +COPY $TARGETPLATFORM/jsontoml /bin/jsontoml diff --git a/vendor/github.com/pelletier/go-toml/v2/README.md b/vendor/github.com/pelletier/go-toml/v2/README.md index 14e65644..067ba3fd 100644 --- a/vendor/github.com/pelletier/go-toml/v2/README.md +++ b/vendor/github.com/pelletier/go-toml/v2/README.md @@ -2,7 +2,7 @@ Go library for the [TOML](https://toml.io/en/) format. -This library supports [TOML v1.0.0](https://toml.io/en/v1.0.0). +This library supports [TOML v1.1.0](https://toml.io/en/v1.1.0). [🐞 Bug Reports](https://github.com/pelletier/go-toml/issues) @@ -28,6 +28,11 @@ import "github.com/pelletier/go-toml/v2" As much as possible, this library is designed to behave similarly as the standard library's `encoding/json`. +When encoding structs, fields tagged with `omitempty` are omitted if they are +empty. For `time.Time`, the zero value is considered empty, so timestamps such +as `created_at` or `updated_at` are not written unless you remove `omitempty` +from the struct tag or use a pointer type (`*time.Time`). + ### Performance While go-toml favors usability, it is written with performance in mind. Most @@ -65,7 +70,7 @@ this use-case, go-toml provides [`LocalDate`][tld], [`LocalTime`][tlt], and making them convenient yet unambiguous structures for their respective TOML representation. -[ldt]: https://toml.io/en/v1.0.0#local-date-time +[ldt]: https://toml.io/en/v1.1.0#local-date-time [tld]: https://pkg.go.dev/github.com/pelletier/go-toml/v2#LocalDate [tlt]: https://pkg.go.dev/github.com/pelletier/go-toml/v2#LocalTime [tldt]: https://pkg.go.dev/github.com/pelletier/go-toml/v2#LocalDateTime @@ -237,12 +242,12 @@ Execution time speedup compared to other Go TOML libraries: Benchmarkgo-toml v1BurntSushi/toml - Marshal/HugoFrontMatter-22.1x2.0x - Marshal/ReferenceFile/map-22.0x2.0x - Marshal/ReferenceFile/struct-22.3x2.5x - Unmarshal/HugoFrontMatter-23.3x2.8x - Unmarshal/ReferenceFile/map-22.9x3.0x - Unmarshal/ReferenceFile/struct-24.8x5.0x + Marshal/HugoFrontMatter-22.3x2.4x + Marshal/ReferenceFile/map-22.2x2.6x + Marshal/ReferenceFile/struct-24.9x5.0x + Unmarshal/HugoFrontMatter-27.8x5.9x + Unmarshal/ReferenceFile/map-26.8x6.4x + Unmarshal/ReferenceFile/struct-26.8x6.3x
See more @@ -255,17 +260,17 @@ provided for completeness.

Benchmarkgo-toml v1BurntSushi/toml - Marshal/SimpleDocument/map-22.0x2.9x - Marshal/SimpleDocument/struct-22.5x3.6x - Unmarshal/SimpleDocument/map-24.2x3.4x - Unmarshal/SimpleDocument/struct-25.9x4.4x - UnmarshalDataset/example-23.2x2.9x - UnmarshalDataset/code-22.4x2.8x - UnmarshalDataset/twitter-22.7x2.5x - UnmarshalDataset/citm_catalog-22.3x2.3x - UnmarshalDataset/canada-21.9x1.5x - UnmarshalDataset/config-25.4x3.0x - geomean2.9x2.8x + Marshal/SimpleDocument/map-22.1x3.1x + Marshal/SimpleDocument/struct-23.4x4.8x + Unmarshal/SimpleDocument/map-210.1x7.0x + Unmarshal/SimpleDocument/struct-212.4x8.0x + UnmarshalDataset/example-28.2x6.9x + UnmarshalDataset/code-27.5x8.3x + UnmarshalDataset/twitter-29.0x7.6x + UnmarshalDataset/citm_catalog-25.0x4.5x + UnmarshalDataset/canada-26.4x4.7x + UnmarshalDataset/config-210.2x6.1x + geomean5.8x5.3x

This table can be generated with ./ci.sh benchmark -a -html.

@@ -309,304 +314,6 @@ Multiple versions are available on [ghcr.io][docker]. [docker]: https://github.com/pelletier/go-toml/pkgs/container/go-toml -## Migrating from v1 - -This section describes the differences between v1 and v2, with some pointers on -how to get the original behavior when possible. - -### Decoding / Unmarshal - -#### Automatic field name guessing - -When unmarshaling to a struct, if a key in the TOML document does not exactly -match the name of a struct field or any of the `toml`-tagged field, v1 tries -multiple variations of the key ([code][v1-keys]). - -V2 instead does a case-insensitive matching, like `encoding/json`. - -This could impact you if you are relying on casing to differentiate two fields, -and one of them is a not using the `toml` struct tag. The recommended solution -is to be specific about tag names for those fields using the `toml` struct tag. - -[v1-keys]: https://github.com/pelletier/go-toml/blob/a2e52561804c6cd9392ebf0048ca64fe4af67a43/marshal.go#L775-L781 - -#### Ignore preexisting value in interface - -When decoding into a non-nil `interface{}`, go-toml v1 uses the type of the -element in the interface to decode the object. For example: - -```go -type inner struct { - B interface{} -} -type doc struct { - A interface{} -} - -d := doc{ - A: inner{ - B: "Before", - }, -} - -data := ` -[A] -B = "After" -` - -toml.Unmarshal([]byte(data), &d) -fmt.Printf("toml v1: %#v\n", d) - -// toml v1: main.doc{A:main.inner{B:"After"}} -``` - -In this case, field `A` is of type `interface{}`, containing a `inner` struct. -V1 sees that type and uses it when decoding the object. - -When decoding an object into an `interface{}`, V2 instead disregards whatever -value the `interface{}` may contain and replaces it with a -`map[string]interface{}`. With the same data structure as above, here is what -the result looks like: - -```go -toml.Unmarshal([]byte(data), &d) -fmt.Printf("toml v2: %#v\n", d) - -// toml v2: main.doc{A:map[string]interface {}{"B":"After"}} -``` - -This is to match `encoding/json`'s behavior. There is no way to make the v2 -decoder behave like v1. - -#### Values out of array bounds ignored - -When decoding into an array, v1 returns an error when the number of elements -contained in the doc is superior to the capacity of the array. For example: - -```go -type doc struct { - A [2]string -} -d := doc{} -err := toml.Unmarshal([]byte(`A = ["one", "two", "many"]`), &d) -fmt.Println(err) - -// (1, 1): unmarshal: TOML array length (3) exceeds destination array length (2) -``` - -In the same situation, v2 ignores the last value: - -```go -err := toml.Unmarshal([]byte(`A = ["one", "two", "many"]`), &d) -fmt.Println("err:", err, "d:", d) -// err: d: {[one two]} -``` - -This is to match `encoding/json`'s behavior. There is no way to make the v2 -decoder behave like v1. - -#### Support for `toml.Unmarshaler` has been dropped - -This method was not widely used, poorly defined, and added a lot of complexity. -A similar effect can be achieved by implementing the `encoding.TextUnmarshaler` -interface and use strings. - -#### Support for `default` struct tag has been dropped - -This feature adds complexity and a poorly defined API for an effect that can be -accomplished outside of the library. - -It does not seem like other format parsers in Go support that feature (the -project referenced in the original ticket #202 has not been updated since 2017). -Given that go-toml v2 should not touch values not in the document, the same -effect can be achieved by pre-filling the struct with defaults (libraries like -[go-defaults][go-defaults] can help). Also, string representation is not well -defined for all types: it creates issues like #278. - -The recommended replacement is pre-filling the struct before unmarshaling. - -[go-defaults]: https://github.com/mcuadros/go-defaults - -#### `toml.Tree` replacement - -This structure was the initial attempt at providing a document model for -go-toml. It allows manipulating the structure of any document, encoding and -decoding from their TOML representation. While a more robust feature was -initially planned in go-toml v2, this has been ultimately [removed from -scope][nodoc] of this library, with no plan to add it back at the moment. The -closest equivalent at the moment would be to unmarshal into an `interface{}` and -use type assertions and/or reflection to manipulate the arbitrary -structure. However this would fall short of providing all of the TOML features -such as adding comments and be specific about whitespace. - - -#### `toml.Position` are not retrievable anymore - -The API for retrieving the position (line, column) of a specific TOML element do -not exist anymore. This was done to minimize the amount of concepts introduced -by the library (query path), and avoid the performance hit related to storing -positions in the absence of a document model, for a feature that seemed to have -little use. Errors however have gained more detailed position -information. Position retrieval seems better fitted for a document model, which -has been [removed from the scope][nodoc] of go-toml v2 at the moment. - -### Encoding / Marshal - -#### Default struct fields order - -V1 emits struct fields order alphabetically by default. V2 struct fields are -emitted in order they are defined. For example: - -```go -type S struct { - B string - A string -} - -data := S{ - B: "B", - A: "A", -} - -b, _ := tomlv1.Marshal(data) -fmt.Println("v1:\n" + string(b)) - -b, _ = tomlv2.Marshal(data) -fmt.Println("v2:\n" + string(b)) - -// Output: -// v1: -// A = "A" -// B = "B" - -// v2: -// B = 'B' -// A = 'A' -``` - -There is no way to make v2 encoder behave like v1. A workaround could be to -manually sort the fields alphabetically in the struct definition, or generate -struct types using `reflect.StructOf`. - -#### No indentation by default - -V1 automatically indents content of tables by default. V2 does not. However the -same behavior can be obtained using [`Encoder.SetIndentTables`][sit]. For example: - -```go -data := map[string]interface{}{ - "table": map[string]string{ - "key": "value", - }, -} - -b, _ := tomlv1.Marshal(data) -fmt.Println("v1:\n" + string(b)) - -b, _ = tomlv2.Marshal(data) -fmt.Println("v2:\n" + string(b)) - -buf := bytes.Buffer{} -enc := tomlv2.NewEncoder(&buf) -enc.SetIndentTables(true) -enc.Encode(data) -fmt.Println("v2 Encoder:\n" + string(buf.Bytes())) - -// Output: -// v1: -// -// [table] -// key = "value" -// -// v2: -// [table] -// key = 'value' -// -// -// v2 Encoder: -// [table] -// key = 'value' -``` - -[sit]: https://pkg.go.dev/github.com/pelletier/go-toml/v2#Encoder.SetIndentTables - -#### Keys and strings are single quoted - -V1 always uses double quotes (`"`) around strings and keys that cannot be -represented bare (unquoted). V2 uses single quotes instead by default (`'`), -unless a character cannot be represented, then falls back to double quotes. As a -result of this change, `Encoder.QuoteMapKeys` has been removed, as it is not -useful anymore. - -There is no way to make v2 encoder behave like v1. - -#### `TextMarshaler` emits as a string, not TOML - -Types that implement [`encoding.TextMarshaler`][tm] can emit arbitrary TOML in -v1. The encoder would append the result to the output directly. In v2 the result -is wrapped in a string. As a result, this interface cannot be implemented by the -root object. - -There is no way to make v2 encoder behave like v1. - -[tm]: https://golang.org/pkg/encoding/#TextMarshaler - -#### `Encoder.CompactComments` has been removed - -Emitting compact comments is now the default behavior of go-toml. This option -is not necessary anymore. - -#### Struct tags have been merged - -V1 used to provide multiple struct tags: `comment`, `commented`, `multiline`, -`toml`, and `omitempty`. To behave more like the standard library, v2 has merged -`toml`, `multiline`, `commented`, and `omitempty`. For example: - -```go -type doc struct { - // v1 - F string `toml:"field" multiline:"true" omitempty:"true" commented:"true"` - // v2 - F string `toml:"field,multiline,omitempty,commented"` -} -``` - -Has a result, the `Encoder.SetTag*` methods have been removed, as there is just -one tag now. - -#### `Encoder.ArraysWithOneElementPerLine` has been renamed - -The new name is `Encoder.SetArraysMultiline`. The behavior should be the same. - -#### `Encoder.Indentation` has been renamed - -The new name is `Encoder.SetIndentSymbol`. The behavior should be the same. - - -#### Embedded structs behave like stdlib - -V1 defaults to merging embedded struct fields into the embedding struct. This -behavior was unexpected because it does not follow the standard library. To -avoid breaking backward compatibility, the `Encoder.PromoteAnonymous` method was -added to make the encoder behave correctly. Given backward compatibility is not -a problem anymore, v2 does the right thing by default: it follows the behavior -of `encoding/json`. `Encoder.PromoteAnonymous` has been removed. - -[nodoc]: https://github.com/pelletier/go-toml/discussions/506#discussioncomment-1526038 - -### `query` - -go-toml v1 provided the [`go-toml/query`][query] package. It allowed to run -JSONPath-style queries on TOML files. This feature is not available in v2. For a -replacement, check out [dasel][dasel]. - -This package has been removed because it was essentially not supported anymore -(last commit May 2020), increased the complexity of the code base, and more -complete solutions exist out there. - -[query]: https://github.com/pelletier/go-toml/tree/f99d6bbca119636aeafcf351ee52b3d202782627/query -[dasel]: https://github.com/TomWright/dasel - ## Versioning Expect for parts explicitly marked otherwise, go-toml follows [Semantic diff --git a/vendor/github.com/pelletier/go-toml/v2/capability_baseline.txt b/vendor/github.com/pelletier/go-toml/v2/capability_baseline.txt new file mode 100644 index 00000000..c556456c --- /dev/null +++ b/vendor/github.com/pelletier/go-toml/v2/capability_baseline.txt @@ -0,0 +1 @@ +github.com/pelletier/go-toml/v2: CAPABILITY_REFLECT, CAPABILITY_UNANALYZED, CAPABILITY_UNSAFE_POINTER diff --git a/vendor/github.com/pelletier/go-toml/v2/caps.sh b/vendor/github.com/pelletier/go-toml/v2/caps.sh new file mode 100644 index 00000000..31fec2b9 --- /dev/null +++ b/vendor/github.com/pelletier/go-toml/v2/caps.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +# +# Generates or checks the capability baseline for go-toml. +# +# Usage: +# ./caps.sh generate # regenerate capability_baseline.txt +# ./caps.sh check # check that capabilities haven't grown +# +# Requires: go, capslock (go install github.com/google/capslock/cmd/capslock@latest) + +set -euo pipefail + +BASELINE="capability_baseline.txt" +CAPSLOCK="${CAPSLOCK:-capslock}" + +# Capabilities that must never appear in any package. +FORBIDDEN_CAPS=( + CAPABILITY_NETWORK + CAPABILITY_CGO + CAPABILITY_EXEC +) + +capslock_to_baseline() { + "$CAPSLOCK" -packages=. -output=package -granularity=package \ + | jq -r 'to_entries | sort_by(.key) | .[] | .key + ": " + (.value | sort | join(", "))' +} + +generate() { + capslock_to_baseline > "$BASELINE" + echo "Wrote $BASELINE" +} + +check() { + if [ ! -f "$BASELINE" ]; then + echo "ERROR: $BASELINE not found. Run '$0 generate' first." + exit 1 + fi + + current=$(mktemp) + trap 'rm -f "$current"' EXIT + + capslock_to_baseline > "$current" + + failed=0 + + # Check for forbidden capabilities in current output. + for cap in "${FORBIDDEN_CAPS[@]}"; do + if grep -q "$cap" "$current"; then + echo "FORBIDDEN capability found: $cap" + grep "$cap" "$current" + failed=1 + fi + done + + # Extract all unique capability names from baseline and current. + baseline_caps=$(grep -oE 'CAPABILITY_[A-Z_]+' "$BASELINE" | sort -u) + current_caps=$(grep -oE 'CAPABILITY_[A-Z_]+' "$current" | sort -u) + + # Check for new capability names not in the baseline. + new_caps=$(comm -13 <(echo "$baseline_caps") <(echo "$current_caps")) + if [ -n "$new_caps" ]; then + echo "NEW capabilities detected (not in baseline):" + echo "$new_caps" + failed=1 + fi + + # Check for new per-package capabilities (a package gained a capability it didn't have before). + while IFS=': ' read -r pkg caps; do + baseline_pkg_caps=$(grep "^${pkg}:" "$BASELINE" 2>/dev/null | sed 's/^[^:]*: //' || true) + if [ -z "$baseline_pkg_caps" ]; then + echo "NEW package with capabilities: $pkg: $caps" + failed=1 + continue + fi + # Check each capability in current for this package + for cap in $(echo "$caps" | tr ', ' '\n' | grep -v '^$'); do + if ! echo "$baseline_pkg_caps" | grep -q "$cap"; then + echo "NEW capability for $pkg: $cap" + failed=1 + fi + done + done < "$current" + + if [ "$failed" -eq 1 ]; then + echo "" + echo "FAILED: capabilities have grown." + echo "If this is intentional, run '$0 generate' and commit the updated $BASELINE." + exit 1 + fi + + echo "OK: no new capabilities detected." +} + +case "${1:-}" in + generate) generate ;; + check) check ;; + *) + echo "Usage: $0 {generate|check}" + exit 1 + ;; +esac diff --git a/vendor/github.com/pelletier/go-toml/v2/ci.sh b/vendor/github.com/pelletier/go-toml/v2/ci.sh index 30c23d1a..6b9723bf 100644 --- a/vendor/github.com/pelletier/go-toml/v2/ci.sh +++ b/vendor/github.com/pelletier/go-toml/v2/ci.sh @@ -146,13 +146,17 @@ bench() { pushd "$dir" + tags="" if [ "${replace}" != "" ]; then find ./benchmark/ -iname '*.go' -exec sed -i -E "s|github.com/pelletier/go-toml/v2\"|${replace}\"|g" {} \; go get "${replace}" + # The realworld benchmarks use v2-only API and cannot compile against + # the other libraries; exclude them from cross-library comparisons. + tags="-tags cross_library_benchmark" fi export GOMAXPROCS=2 - go test '-bench=^Benchmark(Un)?[mM]arshal' -count=10 -run=Nothing ./... | tee "${out}" + go test ${tags} '-bench=^Benchmark(Un)?[mM]arshal' -count=10 -run=Nothing ./... | tee "${out}" popd if [ "${branch}" != "HEAD" ]; then diff --git a/vendor/github.com/pelletier/go-toml/v2/decode.go b/vendor/github.com/pelletier/go-toml/v2/decode.go index f3f14eff..46de2fb4 100644 --- a/vendor/github.com/pelletier/go-toml/v2/decode.go +++ b/vendor/github.com/pelletier/go-toml/v2/decode.go @@ -1,6 +1,7 @@ package toml import ( + "bytes" "fmt" "math" "strconv" @@ -22,191 +23,340 @@ func parseInteger(b []byte) (int64, error) { panic(fmt.Errorf("invalid base '%c', should have been checked by scanIntOrFloat", b[1])) } } - return parseIntDec(b) } -func parseLocalDate(b []byte) (LocalDate, error) { - // full-date = date-fullyear "-" date-month "-" date-mday - // date-fullyear = 4DIGIT - // date-month = 2DIGIT ; 01-12 - // date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on month/year - var date LocalDate - - if len(b) != 10 || b[4] != '-' || b[7] != '-' { - return date, unstable.NewParserError(b, "dates are expected to have the format YYYY-MM-DD") +func parseIntHex(b []byte) (int64, error) { + var v uint64 + for _, c := range b[2:] { + if c == '_' { + continue + } + var d byte + switch { + case c >= '0' && c <= '9': + d = c - '0' + case c >= 'a' && c <= 'f': + d = c - 'a' + 10 + case c >= 'A' && c <= 'F': + d = c - 'A' + 10 + } + if v > math.MaxInt64>>4 { + return 0, unstable.NewParserError(b, "hexadecimal number is too large to fit in a 64-bit signed integer") + } + v = v<<4 | uint64(d) } + return int64(v), nil +} - var err error - - date.Year, err = parseDecimalDigits(b[0:4]) - if err != nil { - return LocalDate{}, err +func parseIntOct(b []byte) (int64, error) { + var v uint64 + for _, c := range b[2:] { + if c == '_' { + continue + } + if v > math.MaxInt64>>3 { + return 0, unstable.NewParserError(b, "octal number is too large to fit in a 64-bit signed integer") + } + v = v<<3 | uint64(c-'0') } + return int64(v), nil +} - date.Month, err = parseDecimalDigits(b[5:7]) - if err != nil { - return LocalDate{}, err +func parseIntBin(b []byte) (int64, error) { + var v uint64 + for _, c := range b[2:] { + if c == '_' { + continue + } + if v > math.MaxInt64>>1 { + return 0, unstable.NewParserError(b, "binary number is too large to fit in a 64-bit signed integer") + } + v = v<<1 | uint64(c-'0') } + return int64(v), nil +} - date.Day, err = parseDecimalDigits(b[8:10]) - if err != nil { - return LocalDate{}, err +func parseIntDec(b []byte) (int64, error) { + i := 0 + neg := false + switch b[0] { + case '-': + neg = true + i++ + case '+': + i++ } - if !isValidDate(date.Year, date.Month, date.Day) { - return LocalDate{}, unstable.NewParserError(b, "impossible date") + var limit uint64 = math.MaxInt64 + if neg { + limit = math.MaxInt64 + 1 } - return date, nil + var v uint64 + for ; i < len(b); i++ { + c := b[i] + if c == '_' { + continue + } + if v > limit/10 { + return 0, unstable.NewParserError(b, "decimal number is too large to fit in a 64-bit signed integer") + } + v = v*10 + uint64(c-'0') + if v > limit { + return 0, unstable.NewParserError(b, "decimal number is too large to fit in a 64-bit signed integer") + } + } + if neg { + return -int64(v), nil //nolint:gosec // v <= MaxInt64+1, the conversion wraps to the intended negative value + } + return int64(v), nil //nolint:gosec // v <= MaxInt64 } -func parseDecimalDigits(b []byte) (int, error) { - v := 0 - - for i, c := range b { - if c < '0' || c > '9' { - return 0, unstable.NewParserError(b[i:i+1], "expected digit (0-9)") +func parseFloat(b []byte) (float64, error) { + i := 0 + if len(b) > 0 && (b[0] == '+' || b[0] == '-') { + i = 1 + } + if len(b) == i+3 { + switch b[i] { + case 'i': + // inf + if b[0] == '-' { + return math.Inf(-1), nil + } + return math.Inf(1), nil + case 'n': + // nan + return math.NaN(), nil } - v *= 10 - v += int(c - '0') } - return v, nil -} - -func parseDateTime(b []byte) (time.Time, error) { - // offset-date-time = full-date time-delim full-time - // full-time = partial-time time-offset - // time-offset = "Z" / time-numoffset - // time-numoffset = ( "+" / "-" ) time-hour ":" time-minute + // Fast path: a plain decimal whose significand fits in 53 bits and whose + // base-10 exponent is within [-22, 22] is parsed exactly with a single + // rounding (Clinger's method) straight from the bytes, with no string + // allocation and no full strconv parse. This is the common shape for + // numeric data (e.g. coordinate lists). Anything outside those bounds, or + // with underscores, falls through to strconv, which is the reference. + if f, ok := fastParseFloat(b); ok { + return f, nil + } + + // strconv.ParseFloat is the reference implementation for parsing + // floating point numbers. The position of underscores has already been + // validated by the parser; strip them so that they do not interfere with + // Go's own underscore rules. + cleaned := b + if bytes.IndexByte(b, '_') >= 0 { + cleaned = make([]byte, 0, len(b)) + for _, c := range b { + if c != '_' { + cleaned = append(cleaned, c) + } + } + } - dt, b, err := parseLocalDateTime(b) + f, err := strconv.ParseFloat(string(cleaned), 64) if err != nil { - return time.Time{}, err + return 0, unstable.NewParserError(b, "unable to parse float: %s", err) } + return f, nil +} - var zone *time.Location +// float64pow10 holds the powers of ten that are exactly representable as a +// float64 (10^0 .. 10^22). +var float64pow10 = [...]float64{ + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, + 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, +} - if len(b) == 0 { - // parser should have checked that when assigning the date time node - panic("date time should have a timezone") +// fastParseFloat parses b as a float64 using Clinger's exact method and reports +// whether it applied. It accepts only plain decimal numbers (optional sign, +// digits, one optional '.', optional 'e'/'E' exponent) whose significand fits +// in 53 bits and whose effective base-10 exponent is within [-22, 22]; under +// those conditions float64(significand) * 10^exp (or / 10^-exp) is the exact, +// correctly-rounded result, identical to strconv.ParseFloat. It returns +// ok=false (deferring to strconv) for underscores, hexadecimal floats, large +// significands or exponents, and any other shape. +func fastParseFloat(b []byte) (float64, bool) { + i := 0 + neg := false + if i < len(b) && (b[i] == '+' || b[i] == '-') { + neg = b[i] == '-' + i++ } - if b[0] == 'Z' || b[0] == 'z' { - b = b[1:] - zone = time.UTC - } else { - const dateTimeByteLen = 6 - if len(b) != dateTimeByteLen { - return time.Time{}, unstable.NewParserError(b, "invalid date-time timezone") - } - var direction int - switch b[0] { - case '-': - direction = -1 - case '+': - direction = +1 + var mantissa uint64 + digits := 0 + fracDigits := 0 + sawDot := false + sawDigit := false + for ; i < len(b); i++ { + c := b[i] + switch { + case c >= '0' && c <= '9': + if digits >= 19 { + // Too many significant digits to accumulate without risking a + // uint64 overflow (and well past the 53-bit exact range). + return 0, false + } + mantissa = mantissa*10 + uint64(c-'0') + digits++ + if sawDot { + fracDigits++ + } + sawDigit = true + case c == '.': + if sawDot { + return 0, false + } + sawDot = true default: - return time.Time{}, unstable.NewParserError(b[:1], "invalid timezone offset character") - } - - if b[3] != ':' { - return time.Time{}, unstable.NewParserError(b[3:4], "expected a : separator") + goto exponent } - - hours, err := parseDecimalDigits(b[1:3]) - if err != nil { - return time.Time{}, err - } - if hours > 23 { - return time.Time{}, unstable.NewParserError(b[:1], "invalid timezone offset hours") - } - - minutes, err := parseDecimalDigits(b[4:6]) - if err != nil { - return time.Time{}, err + } +exponent: + if !sawDigit { + return 0, false + } + exp := -fracDigits + if i < len(b) && (b[i] == 'e' || b[i] == 'E') { + i++ + esign := 1 + if i < len(b) && (b[i] == '+' || b[i] == '-') { + if b[i] == '-' { + esign = -1 + } + i++ } - if minutes > 59 { - return time.Time{}, unstable.NewParserError(b[:1], "invalid timezone offset minutes") + if i >= len(b) { + return 0, false } - - seconds := direction * (hours*3600 + minutes*60) - if seconds == 0 { - zone = time.UTC - } else { - zone = time.FixedZone("", seconds) + eval := 0 + for ; i < len(b); i++ { + c := b[i] + if c < '0' || c > '9' { + return 0, false + } + eval = eval*10 + int(c-'0') + if eval > 1000 { + return 0, false + } } - b = b[dateTimeByteLen:] + exp += esign * eval } - - if len(b) > 0 { - return time.Time{}, unstable.NewParserError(b, "extra bytes at the end of the timezone") + if i != len(b) { + // Trailing bytes (an underscore, a hexadecimal marker, ...). + return 0, false + } + if mantissa > 1<<53 { + return 0, false } - t := time.Date( - dt.Year, - time.Month(dt.Month), - dt.Day, - dt.Hour, - dt.Minute, - dt.Second, - dt.Nanosecond, - zone) + f := float64(mantissa) + switch { + case exp == 0: + case exp > 0 && exp <= 22: + f *= float64pow10[exp] + case exp < 0 && exp >= -22: + f /= float64pow10[-exp] + default: + return 0, false + } + if neg { + f = -f + } + return f, true +} - return t, nil +func isDecimalDigit(c byte) bool { + return c >= '0' && c <= '9' } -func parseLocalDateTime(b []byte) (LocalDateTime, []byte, error) { - var dt LocalDateTime +// parseLocalDate parses a date of the exact form YYYY-MM-DD and validates +// its components. +func parseLocalDate(b []byte) (LocalDate, error) { + var date LocalDate - const localDateTimeByteMinLen = 11 - if len(b) < localDateTimeByteMinLen { - return dt, nil, unstable.NewParserError(b, "local datetimes are expected to have the format YYYY-MM-DDTHH:MM:SS[.NNNNNNNNN]") + if len(b) != 10 || b[4] != '-' || b[7] != '-' { + return date, unstable.NewParserError(b, "dates are expected to have the format YYYY-MM-DD") } - date, err := parseLocalDate(b[:10]) + var err error + date.Year, err = parseDecimalDigits(b[0:4]) if err != nil { - return dt, nil, err + return date, err + } + date.Month, err = parseDecimalDigits(b[5:7]) + if err != nil { + return date, err + } + date.Day, err = parseDecimalDigits(b[8:10]) + if err != nil { + return date, err } - dt.LocalDate = date - sep := b[10] - if sep != 'T' && sep != ' ' && sep != 't' { - return dt, nil, unstable.NewParserError(b[10:11], "datetime separator is expected to be T or a space") + if date.Month < 1 || date.Month > 12 { + return date, unstable.NewParserError(b[5:7], "impossible date") + } + maxDay := daysIn(date.Month, date.Year) + if date.Day < 1 || date.Day > maxDay { + return date, unstable.NewParserError(b[8:10], "impossible date") } - t, rest, err := parseLocalTime(b[11:]) - if err != nil { - return dt, nil, err + return date, nil +} + +func daysIn(month int, year int) int { + switch month { + case 2: + if isLeapYear(year) { + return 29 + } + return 28 + case 4, 6, 9, 11: + return 30 + default: + return 31 } - dt.LocalTime = t +} - return dt, rest, nil +func isLeapYear(year int) bool { + return year%4 == 0 && (year%100 != 0 || year%400 == 0) +} + +// parseDecimalDigits parses a sequence of digits as a decimal number. +func parseDecimalDigits(b []byte) (int, error) { + v := 0 + for i, c := range b { + if !isDecimalDigit(c) { + return 0, unstable.NewParserError(b[i:i+1], "expected digit (0-9)") + } + v = v*10 + int(c-'0') + } + return v, nil } -// parseLocalTime is a bit different because it also returns the remaining -// []byte that is didn't need. This is to allow parseDateTime to parse those -// remaining bytes as a timezone. +// parseLocalTime parses a time of the form HH:MM with optional seconds and an +// optional fractional part (TOML v1.1.0). It returns the remaining bytes after +// the time. func parseLocalTime(b []byte) (LocalTime, []byte, error) { var ( nspow = [10]int{0, 1e8, 1e7, 1e6, 1e5, 1e4, 1e3, 1e2, 1e1, 1e0} t LocalTime ) - // check if b matches to have expected format HH:MM:SS[.NNNNNN] - const localTimeByteLen = 8 - if len(b) < localTimeByteLen { - return t, nil, unstable.NewParserError(b, "times are expected to have the format HH:MM:SS[.NNNNNN]") + // check if b matches to have expected format HH:MM[:SS[.NNNNNN]] + const localTimeByteMinLen = 5 + if len(b) < localTimeByteMinLen { + return t, nil, unstable.NewParserError(b, "times are expected to have the format HH:MM[:SS[.NNNNNN]]") } var err error - t.Hour, err = parseDecimalDigits(b[0:2]) if err != nil { return t, nil, err } - if t.Hour > 23 { return t, nil, unstable.NewParserError(b[0:2], "hour cannot be greater 23") } @@ -221,54 +371,53 @@ func parseLocalTime(b []byte) (LocalTime, []byte, error) { if t.Minute > 59 { return t, nil, unstable.NewParserError(b[3:5], "minutes cannot be greater 59") } - if b[5] != ':' { - return t, nil, unstable.NewParserError(b[5:6], "expecting colon between minutes and seconds") - } - t.Second, err = parseDecimalDigits(b[6:8]) - if err != nil { - return t, nil, err - } + b = b[5:] - if t.Second > 59 { - return t, nil, unstable.NewParserError(b[6:8], "seconds cannot be greater than 59") - } + // Seconds are optional (TOML v1.1.0). Fractional seconds may only appear + // when seconds are present: + // partial-time = time-hour ":" time-minute [ ":" time-second [ time-secfrac ] ] + secondsPresent := false + + if len(b) >= 1 && b[0] == ':' { + if len(b) < 3 { + return t, nil, unstable.NewParserError(b, "incomplete seconds") + } + + t.Second, err = parseDecimalDigits(b[1:3]) + if err != nil { + return t, nil, err + } + + if t.Second > 59 { + return t, nil, unstable.NewParserError(b[1:3], "seconds cannot be greater than 59") + } - b = b[8:] + b = b[3:] + secondsPresent = true + } - if len(b) >= 1 && b[0] == '.' { + if secondsPresent && len(b) >= 1 && b[0] == '.' { frac := 0 precision := 0 digits := 0 for i, c := range b[1:] { - if !isDigit(c) { + if !isDecimalDigit(c) { if i == 0 { return t, nil, unstable.NewParserError(b[0:1], "need at least one digit after fraction point") } break } digits++ - - const maxFracPrecision = 9 - if i >= maxFracPrecision { - // go-toml allows decoding fractional seconds - // beyond the supported precision of 9 - // digits. It truncates the fractional component - // to the supported precision and ignores the - // remaining digits. - // - // https://github.com/pelletier/go-toml/discussions/707 - continue + if i < 9 { + frac = frac*10 + int(c-'0') + precision++ } - - frac *= 10 - frac += int(c - '0') - precision++ } - if precision == 0 { - return t, nil, unstable.NewParserError(b[:1], "nanoseconds need at least one digit") + if digits == 0 { + return t, nil, unstable.NewParserError(b[0:1], "need at least one digit after fraction point") } t.Nanosecond = frac * nspow[precision] @@ -279,271 +428,111 @@ func parseLocalTime(b []byte) (LocalTime, []byte, error) { return t, b, nil } -func parseFloat(b []byte) (float64, error) { - if len(b) == 4 && (b[0] == '+' || b[0] == '-') && b[1] == 'n' && b[2] == 'a' && b[3] == 'n' { - return math.NaN(), nil - } - - cleaned, err := checkAndRemoveUnderscoresFloats(b) - if err != nil { - return 0, err - } - - if cleaned[0] == '.' { - return 0, unstable.NewParserError(b, "float cannot start with a dot") - } - - if cleaned[len(cleaned)-1] == '.' { - return 0, unstable.NewParserError(b, "float cannot end with a dot") - } - - dotAlreadySeen := false - for i, c := range cleaned { - if c == '.' { - if dotAlreadySeen { - return 0, unstable.NewParserError(b[i:i+1], "float can have at most one decimal point") - } - if !isDigit(cleaned[i-1]) { - return 0, unstable.NewParserError(b[i-1:i+1], "float decimal point must be preceded by a digit") - } - if !isDigit(cleaned[i+1]) { - return 0, unstable.NewParserError(b[i:i+2], "float decimal point must be followed by a digit") - } - dotAlreadySeen = true - } - } - - start := 0 - if cleaned[0] == '+' || cleaned[0] == '-' { - start = 1 - } - if cleaned[start] == '0' && len(cleaned) > start+1 && isDigit(cleaned[start+1]) { - return 0, unstable.NewParserError(b, "float integer part cannot have leading zeroes") - } - - f, err := strconv.ParseFloat(string(cleaned), 64) - if err != nil { - return 0, unstable.NewParserError(b, "unable to parse float: %w", err) - } - - return f, nil -} - -func parseIntHex(b []byte) (int64, error) { - cleaned, err := checkAndRemoveUnderscoresIntegers(b[2:]) - if err != nil { - return 0, err - } - - i, err := strconv.ParseInt(string(cleaned), 16, 64) - if err != nil { - return 0, unstable.NewParserError(b, "couldn't parse hexadecimal number: %w", err) - } - - return i, nil -} +// parseLocalDateTime parses a local date time of the form +// YYYY-MM-DD(T| )HH:MM:SS[.NNNNNN]. It returns the remaining bytes after the +// date-time. +func parseLocalDateTime(b []byte) (LocalDateTime, []byte, error) { + var dt LocalDateTime -func parseIntOct(b []byte) (int64, error) { - cleaned, err := checkAndRemoveUnderscoresIntegers(b[2:]) - if err != nil { - return 0, err + const localDateTimeByteMinLen = 11 + if len(b) < localDateTimeByteMinLen { + return dt, nil, unstable.NewParserError(b, "local datetimes are expected to have the format YYYY-MM-DDTHH:MM[:SS[.NNNNNNNNN]]") } - i, err := strconv.ParseInt(string(cleaned), 8, 64) + date, err := parseLocalDate(b[:10]) if err != nil { - return 0, unstable.NewParserError(b, "couldn't parse octal number: %w", err) + return dt, nil, err } + dt.LocalDate = date - return i, nil -} - -func parseIntBin(b []byte) (int64, error) { - cleaned, err := checkAndRemoveUnderscoresIntegers(b[2:]) - if err != nil { - return 0, err + sep := b[10] + if sep != 'T' && sep != ' ' && sep != 't' { + return dt, nil, unstable.NewParserError(b[10:11], "datetime separator is expected to be T or a space") } - i, err := strconv.ParseInt(string(cleaned), 2, 64) + t, rest, err := parseLocalTime(b[11:]) if err != nil { - return 0, unstable.NewParserError(b, "couldn't parse binary number: %w", err) + return dt, nil, err } + dt.LocalTime = t - return i, nil -} - -func isSign(b byte) bool { - return b == '+' || b == '-' + return dt, rest, nil } -func parseIntDec(b []byte) (int64, error) { - cleaned, err := checkAndRemoveUnderscoresIntegers(b) - if err != nil { - return 0, err - } - - startIdx := 0 - - if isSign(cleaned[0]) { - startIdx++ - } - - if len(cleaned) > startIdx+1 && cleaned[startIdx] == '0' { - return 0, unstable.NewParserError(b, "leading zero not allowed on decimal number") - } - - i, err := strconv.ParseInt(string(cleaned), 10, 64) +// parseDateTime parses a date-time with a timezone offset (Z or +/-HH:MM). +func parseDateTime(b []byte) (time.Time, error) { + dt, b, err := parseLocalDateTime(b) if err != nil { - return 0, unstable.NewParserError(b, "couldn't parse decimal number: %w", err) - } - - return i, nil -} - -func checkAndRemoveUnderscoresIntegers(b []byte) ([]byte, error) { - start := 0 - if b[start] == '+' || b[start] == '-' { - start++ - } - - if len(b) == start { - return b, nil + return time.Time{}, err } - if b[start] == '_' { - return nil, unstable.NewParserError(b[start:start+1], "number cannot start with underscore") - } + var zone *time.Location - if b[len(b)-1] == '_' { - return nil, unstable.NewParserError(b[len(b)-1:], "number cannot end with underscore") + if len(b) == 0 { + // parser should have checked that there is a timezone + return time.Time{}, unstable.NewParserError(b, "date-time is missing timezone") } - // fast path - i := 0 - for ; i < len(b); i++ { - if b[i] == '_' { - break + if b[0] == 'Z' || b[0] == 'z' { + b = b[1:] + zone = time.UTC + } else { + const dateTimeByteLen = 6 + if len(b) != dateTimeByteLen { + return time.Time{}, unstable.NewParserError(b, "invalid date-time timezone") } - } - if i == len(b) { - return b, nil - } - - before := false - cleaned := make([]byte, i, len(b)) - copy(cleaned, b) - - for i++; i < len(b); i++ { - c := b[i] - if c == '_' { - if !before { - return nil, unstable.NewParserError(b[i-1:i+1], "number must have at least one digit between underscores") - } - before = false - } else { - before = true - cleaned = append(cleaned, c) + var direction int + switch b[0] { + case '-': + direction = -1 + case '+': + direction = +1 + default: + return time.Time{}, unstable.NewParserError(b[:1], "invalid timezone offset character") } - } - - return cleaned, nil -} - -func checkAndRemoveUnderscoresFloats(b []byte) ([]byte, error) { - if b[0] == '_' { - return nil, unstable.NewParserError(b[0:1], "number cannot start with underscore") - } - if b[len(b)-1] == '_' { - return nil, unstable.NewParserError(b[len(b)-1:], "number cannot end with underscore") - } - - // fast path - i := 0 - for ; i < len(b); i++ { - if b[i] == '_' { - break + if b[3] != ':' { + return time.Time{}, unstable.NewParserError(b[3:4], "expected a : separator") } - } - if i == len(b) { - return b, nil - } - before := false - cleaned := make([]byte, 0, len(b)) + hours, err := parseDecimalDigits(b[1:3]) + if err != nil { + return time.Time{}, err + } + if hours > 23 { + return time.Time{}, unstable.NewParserError(b[1:3], "invalid timezone offset hours") + } - for i := 0; i < len(b); i++ { - c := b[i] + minutes, err := parseDecimalDigits(b[4:6]) + if err != nil { + return time.Time{}, err + } + if minutes > 59 { + return time.Time{}, unstable.NewParserError(b[4:6], "invalid timezone offset minutes") + } - switch c { - case '_': - if !before { - return nil, unstable.NewParserError(b[i-1:i+1], "number must have at least one digit between underscores") - } - if i < len(b)-1 && (b[i+1] == 'e' || b[i+1] == 'E') { - return nil, unstable.NewParserError(b[i+1:i+2], "cannot have underscore before exponent") - } - before = false - case '+', '-': - // signed exponents - cleaned = append(cleaned, c) - before = false - case 'e', 'E': - if i < len(b)-1 && b[i+1] == '_' { - return nil, unstable.NewParserError(b[i+1:i+2], "cannot have underscore after exponent") - } - cleaned = append(cleaned, c) - case '.': - if i < len(b)-1 && b[i+1] == '_' { - return nil, unstable.NewParserError(b[i+1:i+2], "cannot have underscore after decimal point") - } - if i > 0 && b[i-1] == '_' { - return nil, unstable.NewParserError(b[i-1:i], "cannot have underscore before decimal point") - } - cleaned = append(cleaned, c) - default: - before = true - cleaned = append(cleaned, c) + seconds := direction * (hours*3600 + minutes*60) + if seconds == 0 { + zone = time.UTC + } else { + zone = time.FixedZone("", seconds) } + b = b[dateTimeByteLen:] } - return cleaned, nil -} - -// isValidDate checks if a provided date is a date that exists. -func isValidDate(year int, month int, day int) bool { - return month > 0 && month < 13 && day > 0 && day <= daysIn(month, year) -} - -// daysBefore[m] counts the number of days in a non-leap year -// before month m begins. There is an entry for m=12, counting -// the number of days before January of next year (365). -var daysBefore = [...]int32{ - 0, - 31, - 31 + 28, - 31 + 28 + 31, - 31 + 28 + 31 + 30, - 31 + 28 + 31 + 30 + 31, - 31 + 28 + 31 + 30 + 31 + 30, - 31 + 28 + 31 + 30 + 31 + 30 + 31, - 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31, - 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30, - 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31, - 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30, - 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30 + 31, -} - -func daysIn(m int, year int) int { - if m == 2 && isLeap(year) { - return 29 + if len(b) > 0 { + return time.Time{}, unstable.NewParserError(b, "extra bytes at the end of the timezone") } - return int(daysBefore[m] - daysBefore[m-1]) -} -func isLeap(year int) bool { - return year%4 == 0 && (year%100 != 0 || year%400 == 0) -} + t := time.Date( + dt.Year, + time.Month(dt.Month), + dt.Day, + dt.Hour, + dt.Minute, + dt.Second, + dt.Nanosecond, + zone) -func isDigit(r byte) bool { - return r >= '0' && r <= '9' + return t, nil } diff --git a/vendor/github.com/pelletier/go-toml/v2/decode_fused.go b/vendor/github.com/pelletier/go-toml/v2/decode_fused.go new file mode 100644 index 00000000..9de2d652 --- /dev/null +++ b/vendor/github.com/pelletier/go-toml/v2/decode_fused.go @@ -0,0 +1,362 @@ +package toml + +import ( + "errors" + "reflect" + "strings" + + "github.com/pelletier/go-toml/v2/internal/parserbridge" + "github.com/pelletier/go-toml/v2/unstable" +) + +// unmarshalFused decodes a whole document into a native map[string]interface{} +// tree with no reflection on the document structure, and without building an +// AST for table headers and scalar key-values. Only container values (arrays +// and inline tables) are parsed into the parser arena, so that the seen-tracker +// can validate them and decodeAny can presize the resulting slices and maps — +// the AST is what makes that cheap O(1) presizing possible. +// +// It is used when the target is a fully generic value (interface{} or +// map[string]interface{}) and the unmarshaler interface is disabled. The +// seen-tracker validates the document (duplicate keys, type consistency), so +// the builder creates and merges containers without revalidating. Strict mode +// never applies to a generic target (a map has no "unknown fields"), and +// captures never apply (a generic value implements no Unmarshaler). +func (d *decoder) unmarshalFused(root reflect.Value, data []byte) error { + var m map[string]interface{} + if !root.IsNil() { + // Decode into (merge with) an existing generic map when present. + if em, ok := root.Interface().(map[string]interface{}); ok { + m = em + } + } + if m == nil { + m = map[string]interface{}{} + } + + if err := d.fusedDocument(m, data); err != nil { + return d.wrapFusedError(data, err) + } + + if root.CanSet() { + root.Set(reflect.ValueOf(m)) + } + return nil +} + +// fusedDocument runs the top-level expression loop, mirroring +// Parser.NextExpression but storing values directly into native maps. +func (d *decoder) fusedDocument(m map[string]interface{}, b []byte) error { + cur := m + for { + b = fusedSkipWS(b) + if len(b) == 0 { + return nil + } + switch b[0] { + case '\n': + b = b[1:] + case '\r': + if len(b) > 1 && b[1] == '\n' { + b = b[2:] + continue + } + return unstable.NewParserError(b[:1], "expected newline but got %#U", b[0]) + case '#': + _, rest, err := parserbridge.ScanComment(b) + if err != nil { + return err + } + rest, err = fusedConsumeEOL(rest) + if err != nil { + return err + } + b = rest + case '[': + rest, err := d.fusedTable(b, m, &cur) + if err != nil { + return err + } + b = rest + default: + rest, err := d.fusedKeyVal(b, cur) + if err != nil { + return err + } + b = rest + } + } +} + +// fusedTable handles a [table] or [[array table]] header. b starts at '['. It +// updates *cur to the table the following key-values belong to. +func (d *decoder) fusedTable(b []byte, root map[string]interface{}, cur *map[string]interface{}) ([]byte, error) { + arrayTable := len(b) > 1 && b[1] == '[' + + var start []byte + if arrayTable { + start = fusedSkipWS(b[2:]) + } else { + start = fusedSkipWS(b[1:]) + } + + var err error + var rawKey []byte + d.keyParts, rawKey, b, err = parserbridge.ScanKey(&d.p, start, d.keyParts[:0]) + if err != nil { + return nil, err + } + + if arrayTable { + if len(b) < 2 || b[0] != ']' || b[1] != ']' { + return nil, unstable.NewParserError(fusedHL1(b), "expected ']]' to close array table name") + } + b = b[2:] + } else { + if len(b) == 0 || b[0] != ']' { + return nil, unstable.NewParserError(fusedHL1(b), "expected ']' to close table name") + } + b = b[1:] + } + + // The whole expression (including its line termination) is parsed before + // it is validated, to keep error precedence identical to the AST path. + b, err = d.fusedFinishLine(b) + if err != nil { + return nil, err + } + + if arrayTable { + first, err := d.seen.CheckArrayTable(d.keyParts) + if err != nil { + return nil, d.fusedSeenError(rawKey, d.keyParts, err) + } + *cur = d.anyArrayTableParts(root, d.keyParts, first) + } else { + if _, err := d.seen.CheckTable(d.keyParts); err != nil { + return nil, d.fusedSeenError(rawKey, d.keyParts, err) + } + *cur = d.anyTableParts(root, d.keyParts) + } + return b, nil +} + +// fusedKeyVal handles a `key = value` expression relative to the current table +// cur. b starts at the first character of the key. +func (d *decoder) fusedKeyVal(b []byte, cur map[string]interface{}) ([]byte, error) { + var err error + var rawKey []byte + d.keyParts, rawKey, b, err = parserbridge.ScanKey(&d.p, b, d.keyParts[:0]) + if err != nil { + return nil, err + } + if len(b) == 0 || b[0] != '=' { + return nil, unstable.NewParserError(fusedHL1(b), "expected '=' after key") + } + b = fusedSkipWS(b[1:]) + if len(b) == 0 { + return nil, unstable.NewParserError(b, "expected value, not end of input") + } + + if c := b[0]; c == '[' || c == '{' { + // Container value: build its AST so the seen-tracker can validate it + // and decodeAny can presize the resulting slices and maps. + nodeAny, rest, err := parserbridge.ParseValue(&d.p, b) + if err != nil { + return nil, err + } + node := nodeAny.(*unstable.Node) + rest, err = d.fusedFinishLine(rest) + if err != nil { + return nil, err + } + leafID, err := d.seen.CheckKeyValue(d.keyParts) + if err != nil { + return nil, d.fusedSeenError(rawKey, d.keyParts, err) + } + if err := d.seen.CheckValueUnder(leafID, node); err != nil { + return nil, d.fusedSeenError(rawKey, d.keyParts, err) + } + av, err := d.decodeAny(node) + if err != nil { + return nil, err + } + d.setFusedLeaf(cur, d.keyParts, av) + return rest, nil + } + + // Scalar value: scan it without building a node, then validate and convert + // it natively. + k, _, value, rest, err := parserbridge.ScanScalar(&d.p, b) + if err != nil { + return nil, err + } + kind := unstable.Kind(k) + rest, err = d.fusedFinishLine(rest) + if err != nil { + return nil, err + } + if _, err := d.seen.CheckKeyValue(d.keyParts); err != nil { + return nil, d.fusedSeenError(rawKey, d.keyParts, err) + } + av, err := d.fusedScalar(kind, value) + if err != nil { + return nil, err + } + d.setFusedLeaf(cur, d.keyParts, av) + return rest, nil +} + +// fusedSeenError turns a bare error returned by a SeenTracker parts-method +// into a ParserError carrying the position (the raw key span) and key path of +// the offending expression, so that it is reported as a DecodeError with +// context. It mirrors decoder.wrapSeenError for the fused (AST-less) path. +func (d *decoder) fusedSeenError(rawKey []byte, parts [][]byte, err error) error { + key := make(Key, len(parts)) + for i, p := range parts { + key[i] = string(p) + } + return &unstable.ParserError{ + Highlight: rawKey, + Message: strings.TrimPrefix(err.Error(), "toml: "), + Key: key, + } +} + +// fusedScalar converts a scanned scalar value into the native Go value used +// for generic targets. It mirrors the scalar cases of decodeAny. +func (d *decoder) fusedScalar(kind unstable.Kind, value []byte) (interface{}, error) { + switch kind { + case unstable.String: + return string(value), nil + case unstable.Integer: + i, err := parseInteger(value) + return i, err + case unstable.Float: + f, err := parseFloat(value) + return f, err + case unstable.Bool: + return value[0] == 't', nil + case unstable.DateTime: + t, err := parseDateTime(value) + return t, err + case unstable.LocalDateTime: + dt, rest, err := parseLocalDateTime(value) + if err != nil { + return nil, err + } + if len(rest) > 0 { + return nil, unstable.NewParserError(rest, "extra characters at the end of a local date time") + } + return dt, nil + case unstable.LocalDate: + date, err := parseLocalDate(value) + return date, err + case unstable.LocalTime: + t, rest, err := parseLocalTime(value) + if err != nil { + return nil, err + } + if len(rest) > 0 { + return nil, unstable.NewParserError(rest, "extra characters at the end of a local time") + } + return t, nil + default: + return nil, unstable.NewParserError(value, "unsupported value kind %s", kind) + } +} + +// anyTableParts navigates a [table] header (given its key parts) to the map it +// designates, creating intermediate tables as needed. +func (d *decoder) anyTableParts(m map[string]interface{}, parts [][]byte) map[string]interface{} { + cur := m + for _, p := range parts { + cur = d.anyChildTable(cur, d.intern(p)) + } + return cur +} + +// anyArrayTableParts navigates a [[array table]] header (given its key parts), +// appends a fresh element to the designated array, and returns it. first is +// true the first time this header is seen, in which case any pre-existing array +// (from a reused target) is reset. +func (d *decoder) anyArrayTableParts(m map[string]interface{}, parts [][]byte, first bool) map[string]interface{} { + cur := m + name := d.intern(parts[0]) + for i := 1; i < len(parts); i++ { + cur = d.anyChildTable(cur, name) + name = d.intern(parts[i]) + } + s, _ := cur[name].([]interface{}) + if first { + s = s[:0] + } + elem := map[string]interface{}{} + cur[name] = append(s, elem) + return elem +} + +// setFusedLeaf assigns av at the (possibly dotted) key parts within cur, +// creating intermediate maps as needed. +func (d *decoder) setFusedLeaf(cur map[string]interface{}, parts [][]byte, av interface{}) { + for i := 0; i < len(parts)-1; i++ { + cur = d.anyChildTable(cur, d.intern(parts[i])) + } + cur[d.intern(parts[len(parts)-1])] = av +} + +// wrapFusedError gives document context to errors produced by the fused +// decoder. +func (d *decoder) wrapFusedError(data []byte, err error) error { + var perr *unstable.ParserError + if errors.As(err, &perr) && len(perr.Highlight) == 0 { + // Mirror NextExpression: give end-of-input errors a usable position by + // extending the empty highlight to the last byte of the document. + if offset := cap(data) - cap(perr.Highlight); offset > 0 && offset == len(data) { + perr.Highlight = data[offset-1 : offset] + } + } + return d.wrapError(data, err) +} + +func fusedSkipWS(b []byte) []byte { + for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') { + b = b[1:] + } + return b +} + +func fusedConsumeEOL(b []byte) ([]byte, error) { + if len(b) == 0 { + return b, nil + } + switch b[0] { + case '\n': + return b[1:], nil + case '\r': + if len(b) > 1 && b[1] == '\n' { + return b[2:], nil + } + } + return nil, unstable.NewParserError(b[:1], "expected newline but got %#U", b[0]) +} + +// fusedFinishLine consumes `ws [comment] (newline|eof)` after an expression. +func (d *decoder) fusedFinishLine(b []byte) ([]byte, error) { + b = fusedSkipWS(b) + if len(b) > 0 && b[0] == '#' { + _, rest, err := parserbridge.ScanComment(b) + if err != nil { + return nil, err + } + b = rest + } + return fusedConsumeEOL(b) +} + +func fusedHL1(b []byte) []byte { + if len(b) > 0 { + return b[:1] + } + return b +} diff --git a/vendor/github.com/pelletier/go-toml/v2/errors.go b/vendor/github.com/pelletier/go-toml/v2/errors.go index d68835df..8e10b128 100644 --- a/vendor/github.com/pelletier/go-toml/v2/errors.go +++ b/vendor/github.com/pelletier/go-toml/v2/errors.go @@ -1,8 +1,7 @@ package toml import ( - "fmt" - "reflect" + "errors" "strconv" "strings" @@ -47,7 +46,6 @@ func (s *StrictMissingError) String() string { if i > 0 { buf.WriteString("\n---\n") } - buf.WriteString(e.String()) } @@ -73,7 +71,8 @@ func (e *DecodeError) Error() string { return "toml: " + e.message } -// String returns the human-readable contextualized error. This string is multi-line. +// String returns the human-readable contextualized error. This string is +// multi-line. func (e *DecodeError) String() string { return e.human } @@ -84,200 +83,151 @@ func (e *DecodeError) Position() (row int, column int) { return e.line, e.column } -// Key that was being processed when the error occurred. The key is present only -// if this DecodeError is part of a StrictMissingError. +// Key that was being processed when the error occurred. func (e *DecodeError) Key() Key { return e.key } -// wrapDecodeError creates a DecodeError referencing a highlighted -// range of bytes from document. -// -// highlight needs to be a sub-slice of document, or this function panics. -// -// The function copies all bytes used in DecodeError, so that document and -// highlight can be freely deallocated. -// -//nolint:funlen +// wrapDecodeError creates a DecodeError from a ParserError. The highlight of +// the ParserError needs to be a subslice of the document. func wrapDecodeError(document []byte, de *unstable.ParserError) *DecodeError { - offset := subsliceOffset(document, de.Highlight) - - errMessage := de.Error() - errLine, errColumn := positionAtEnd(document[:offset]) - before, after := linesOfContext(document, de.Highlight, offset, 3) - - var buf strings.Builder + if de == nil { + return nil + } + return newDecodeError(document, de.Highlight, de.Key, de.Message) +} - maxLine := errLine + len(after) - 1 - lineColumnWidth := len(strconv.Itoa(maxLine)) +// newDecodeError creates a DecodeError pointing at the given highlight, which +// needs to be a subslice of the document. +func newDecodeError(document []byte, highlight []byte, key Key, message string) *DecodeError { + offset := subsliceOffset(document, highlight) - // Write the lines of context strictly before the error. - for i := len(before) - 1; i > 0; i-- { - line := errLine - i - buf.WriteString(formatLineNumber(line, lineColumnWidth)) - buf.WriteString("|") + errLineIdx, errColumn := positionAt(document, offset) - if len(before[i]) > 0 { - buf.WriteString(" ") - buf.Write(before[i]) - } + human := buildHumanContext(document, errLineIdx, errColumn, len(highlight), message) - buf.WriteRune('\n') + return &DecodeError{ + message: message, + line: errLineIdx + 1, + column: errColumn, + key: key, + human: human, } +} - // Write the document line that contains the error. - - buf.WriteString(formatLineNumber(errLine, lineColumnWidth)) - buf.WriteString("| ") - - if len(before) > 0 { - buf.Write(before[0]) +// subsliceOffset returns the offset of the subslice b within the document. +func subsliceOffset(document, b []byte) int { + // Highlights are subslices of the document, which means they share the + // same backing array, and their capacity counts the bytes between their + // start and the end of the backing array. + offset := cap(document) - cap(b) + if offset < 0 || offset+len(b) > len(document) { + panic(errors.New("highlight is not a subslice of the document")) } + return offset +} - buf.Write(de.Highlight) - - if len(after) > 0 { - buf.Write(after[0]) +// positionAt returns the 0-indexed line and the 1-indexed column of the given +// offset in the document. +func positionAt(document []byte, offset int) (lineIdx int, column int) { + lineStart := 0 + for i := 0; i < offset; i++ { + if document[i] == '\n' { + lineIdx++ + lineStart = i + 1 + } } + return lineIdx, offset - lineStart + 1 +} - buf.WriteRune('\n') - - // Write the line with the error message itself (so it does not have a line - // number). - - buf.WriteString(strings.Repeat(" ", lineColumnWidth)) - buf.WriteString("| ") - - if len(before) > 0 { - buf.WriteString(strings.Repeat(" ", len(before[0]))) +// docLines splits the document into lines, removing the trailing newline +// characters. +func docLines(document []byte) []string { + s := string(document) + lines := strings.Split(s, "\n") + for i, l := range lines { + lines[i] = strings.TrimSuffix(l, "\r") } + return lines +} - buf.WriteString(strings.Repeat("~", len(de.Highlight))) +// buildHumanContext renders the human-readable multi-line context of an +// error: a window of up to 3 lines before and after the error line, with +// the error position underlined. +func buildHumanContext(document []byte, errLineIdx, errColumn, highlightLen int, message string) string { + lines := docLines(document) - if len(errMessage) > 0 { - buf.WriteString(" ") - buf.WriteString(errMessage) + const window = 3 + firstIdx := errLineIdx - window + if firstIdx < 0 { + firstIdx = 0 } - - // Write the lines of context strictly after the error. - - for i := 1; i < len(after); i++ { - buf.WriteRune('\n') - line := errLine + i - buf.WriteString(formatLineNumber(line, lineColumnWidth)) - buf.WriteString("|") - - if len(after[i]) > 0 { - buf.WriteString(" ") - buf.Write(after[i]) - } + lastIdx := errLineIdx + window + if lastIdx > len(lines)-1 { + lastIdx = len(lines) - 1 } - - return &DecodeError{ - message: errMessage, - line: errLine, - column: errColumn, - key: de.Key, - human: buf.String(), + // Empty lines at the edges of the window are dropped, unless the error + // is about that very position. + for firstIdx < errLineIdx && lines[firstIdx] == "" { + firstIdx++ + } + for lastIdx > errLineIdx && lines[lastIdx] == "" { + lastIdx-- } -} - -func formatLineNumber(line int, width int) string { - format := "%" + strconv.Itoa(width) + "d" - return fmt.Sprintf(format, line) -} + // Width of the column of line numbers. + width := len(strconv.Itoa(lastIdx + 1)) -func linesOfContext(document []byte, highlight []byte, offset int, linesAround int) ([][]byte, [][]byte) { - return beforeLines(document, offset, linesAround), afterLines(document, highlight, offset, linesAround) -} + var buf strings.Builder -func beforeLines(document []byte, offset int, linesAround int) [][]byte { - var beforeLines [][]byte - - // Walk the document backward from the highlight to find previous lines - // of context. - rest := document[:offset] -backward: - for o := len(rest) - 1; o >= 0 && len(beforeLines) <= linesAround && len(rest) > 0; { - switch { - case rest[o] == '\n': - // handle individual lines - beforeLines = append(beforeLines, rest[o+1:]) - rest = rest[:o] - o = len(rest) - 1 - case o == 0: - // add the first line only if it's non-empty - beforeLines = append(beforeLines, rest) - - break backward - default: - o-- + writeLine := func(idx int) { + number := strconv.Itoa(idx + 1) + for i := len(number); i < width; i++ { + buf.WriteByte(' ') } - } - - return beforeLines -} - -func afterLines(document []byte, highlight []byte, offset int, linesAround int) [][]byte { - var afterLines [][]byte - - // Walk the document forward from the highlight to find the following - // lines of context. - rest := document[offset+len(highlight):] -forward: - for o := 0; o < len(rest) && len(afterLines) <= linesAround; { - switch { - case rest[o] == '\n': - // handle individual lines - afterLines = append(afterLines, rest[:o]) - rest = rest[o+1:] - o = 0 - - case o == len(rest)-1: - // add last line only if it's non-empty - afterLines = append(afterLines, rest) - - break forward - default: - o++ + buf.WriteString(number) + buf.WriteByte('|') + if len(lines[idx]) > 0 { + buf.WriteByte(' ') + buf.WriteString(lines[idx]) } + buf.WriteByte('\n') } - return afterLines -} - -func positionAtEnd(b []byte) (row int, column int) { - row = 1 - column = 1 + for idx := firstIdx; idx <= errLineIdx; idx++ { + writeLine(idx) + } - for _, c := range b { - if c == '\n' { - row++ - column = 1 - } else { - column++ + // Underline the error. + for i := 0; i < width; i++ { + buf.WriteByte(' ') + } + buf.WriteString("| ") + for i := 1; i < errColumn; i++ { + buf.WriteByte(' ') + } + // The highlight cannot extend past the end of its line. + tildes := highlightLen + if errLineIdx < len(lines) { + if avail := len(lines[errLineIdx]) - errColumn + 1; tildes > avail { + tildes = avail } } - - return row, column -} - -// subsliceOffset returns the byte offset of subslice within data. -// subslice must share the same backing array as data. -func subsliceOffset(data []byte, subslice []byte) int { - if len(subslice) == 0 { - return 0 + if tildes < 1 { + tildes = 1 } + for i := 0; i < tildes; i++ { + buf.WriteByte('~') + } + if message != "" { + buf.WriteByte(' ') + buf.WriteString(message) + } + buf.WriteByte('\n') - // Use reflect to get the data pointers of both slices. - // This is safe because we're only reading the pointer values for comparison. - dataPtr := reflect.ValueOf(data).Pointer() - subPtr := reflect.ValueOf(subslice).Pointer() - - offset := int(subPtr - dataPtr) - if offset < 0 || offset > len(data) { - panic("subslice is not within data") + for idx := errLineIdx + 1; idx <= lastIdx; idx++ { + writeLine(idx) } - return offset + + return strings.TrimSuffix(buf.String(), "\n") } diff --git a/vendor/github.com/pelletier/go-toml/v2/internal/characters/ascii.go b/vendor/github.com/pelletier/go-toml/v2/internal/characters/ascii.go deleted file mode 100644 index 50a6d170..00000000 --- a/vendor/github.com/pelletier/go-toml/v2/internal/characters/ascii.go +++ /dev/null @@ -1,42 +0,0 @@ -package characters - -var invalidASCIITable = [256]bool{ - 0x00: true, - 0x01: true, - 0x02: true, - 0x03: true, - 0x04: true, - 0x05: true, - 0x06: true, - 0x07: true, - 0x08: true, - // 0x09 TAB - // 0x0A LF - 0x0B: true, - 0x0C: true, - // 0x0D CR - 0x0E: true, - 0x0F: true, - 0x10: true, - 0x11: true, - 0x12: true, - 0x13: true, - 0x14: true, - 0x15: true, - 0x16: true, - 0x17: true, - 0x18: true, - 0x19: true, - 0x1A: true, - 0x1B: true, - 0x1C: true, - 0x1D: true, - 0x1E: true, - 0x1F: true, - // 0x20 - 0x7E Printable ASCII characters - 0x7F: true, -} - -func InvalidASCII(b byte) bool { - return invalidASCIITable[b] -} diff --git a/vendor/github.com/pelletier/go-toml/v2/internal/characters/utf8.go b/vendor/github.com/pelletier/go-toml/v2/internal/characters/utf8.go deleted file mode 100644 index 7c5cb55e..00000000 --- a/vendor/github.com/pelletier/go-toml/v2/internal/characters/utf8.go +++ /dev/null @@ -1,175 +0,0 @@ -// Package characters provides functions for working with string encodings. -package characters - -import ( - "unicode/utf8" -) - -// Utf8TomlValidAlreadyEscaped verifies that a given string is only made of -// valid UTF-8 characters allowed by the TOML spec: -// -// Any Unicode character may be used except those that must be escaped: -// quotation mark, backslash, and the control characters other than tab (U+0000 -// to U+0008, U+000A to U+001F, U+007F). -// -// It is a copy of the Go 1.17 utf8.Valid implementation, tweaked to exit early -// when a character is not allowed. -// -// The returned slice is empty if the string is valid, or contains the bytes -// of the invalid character. -// -// quotation mark => already checked -// backslash => already checked -// 0-0x8 => invalid -// 0x9 => tab, ok -// 0xA - 0x1F => invalid -// 0x7F => invalid -func Utf8TomlValidAlreadyEscaped(p []byte) []byte { - // Fast path. Check for and skip 8 bytes of ASCII characters per iteration. - for len(p) >= 8 { - // Combining two 32 bit loads allows the same code to be used - // for 32 and 64 bit platforms. - // The compiler can generate a 32bit load for first32 and second32 - // on many platforms. See test/codegen/memcombine.go. - first32 := uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24 - second32 := uint32(p[4]) | uint32(p[5])<<8 | uint32(p[6])<<16 | uint32(p[7])<<24 - if (first32|second32)&0x80808080 != 0 { - // Found a non ASCII byte (>= RuneSelf). - break - } - - for i, b := range p[:8] { - if InvalidASCII(b) { - return p[i : i+1] - } - } - - p = p[8:] - } - n := len(p) - for i := 0; i < n; { - pi := p[i] - if pi < utf8.RuneSelf { - if InvalidASCII(pi) { - return p[i : i+1] - } - i++ - continue - } - x := first[pi] - if x == xx { - // Illegal starter byte. - return p[i : i+1] - } - size := int(x & 7) - if i+size > n { - // Short or invalid. - return p[i:n] - } - accept := acceptRanges[x>>4] - if c := p[i+1]; c < accept.lo || accept.hi < c { - return p[i : i+2] - } else if size == 2 { //revive:disable:empty-block - } else if c := p[i+2]; c < locb || hicb < c { - return p[i : i+3] - } else if size == 3 { //revive:disable:empty-block - } else if c := p[i+3]; c < locb || hicb < c { - return p[i : i+4] - } - i += size - } - return nil -} - -// Utf8ValidNext returns the size of the next rune if valid, 0 otherwise. -func Utf8ValidNext(p []byte) int { - c := p[0] - - if c < utf8.RuneSelf { - if InvalidASCII(c) { - return 0 - } - return 1 - } - - x := first[c] - if x == xx { - // Illegal starter byte. - return 0 - } - size := int(x & 7) - if size > len(p) { - // Short or invalid. - return 0 - } - accept := acceptRanges[x>>4] - if c := p[1]; c < accept.lo || accept.hi < c { - return 0 - } else if size == 2 { //nolint:revive - } else if c := p[2]; c < locb || hicb < c { - return 0 - } else if size == 3 { //nolint:revive - } else if c := p[3]; c < locb || hicb < c { - return 0 - } - - return size -} - -// acceptRange gives the range of valid values for the second byte in a UTF-8 -// sequence. -type acceptRange struct { - lo uint8 // lowest value for second byte. - hi uint8 // highest value for second byte. -} - -// acceptRanges has size 16 to avoid bounds checks in the code that uses it. -var acceptRanges = [16]acceptRange{ - 0: {locb, hicb}, - 1: {0xA0, hicb}, - 2: {locb, 0x9F}, - 3: {0x90, hicb}, - 4: {locb, 0x8F}, -} - -// first is information about the first byte in a UTF-8 sequence. -var first = [256]uint8{ - // 1 2 3 4 5 6 7 8 9 A B C D E F - as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x00-0x0F - as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x10-0x1F - as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x20-0x2F - as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x30-0x3F - as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x40-0x4F - as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x50-0x5F - as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x60-0x6F - as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x70-0x7F - // 1 2 3 4 5 6 7 8 9 A B C D E F - xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x80-0x8F - xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x90-0x9F - xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xA0-0xAF - xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xB0-0xBF - xx, xx, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xC0-0xCF - s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xD0-0xDF - s2, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s4, s3, s3, // 0xE0-0xEF - s5, s6, s6, s6, s7, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xF0-0xFF -} - -const ( - // The default lowest and highest continuation byte. - locb = 0b10000000 - hicb = 0b10111111 - - // These names of these constants are chosen to give nice alignment in the - // table below. The first nibble is an index into acceptRanges or F for - // special one-byte cases. The second nibble is the Rune length or the - // Status for the special one-byte case. - xx = 0xF1 // invalid: size 1 - as = 0xF0 // ASCII: size 1 - s1 = 0x02 // accept 0, size 2 - s2 = 0x13 // accept 1, size 3 - s3 = 0x03 // accept 0, size 3 - s4 = 0x23 // accept 2, size 3 - s5 = 0x34 // accept 3, size 4 - s6 = 0x04 // accept 0, size 4 - s7 = 0x44 // accept 4, size 4 -) diff --git a/vendor/github.com/pelletier/go-toml/v2/internal/parserbridge/parserbridge.go b/vendor/github.com/pelletier/go-toml/v2/internal/parserbridge/parserbridge.go new file mode 100644 index 00000000..703aceea --- /dev/null +++ b/vendor/github.com/pelletier/go-toml/v2/internal/parserbridge/parserbridge.go @@ -0,0 +1,37 @@ +// Package parserbridge exposes the unstable parser's non-AST scanners to the +// root toml package without making them part of the unstable public API. +// +// The fused generic-decode fast path needs to scan keys, scalars and comments +// (and parse container values into the arena) without going through the +// AST-pushing NextExpression/Expression methods. Those scanners depend on +// Parser internals (the string-unescape scratch buffer and the node arena), so +// they have to live in the unstable package; but they are an implementation +// detail of the decoder, not something we want to commit to in the public API. +// +// The unstable package populates these variables in its init; the toml package +// reads them. The parser is passed as an any (it is always an *unstable.Parser) +// and the scalar kind is an int (it is always an unstable.Kind) so that this +// package imports neither unstable nor toml, avoiding an import cycle. Passing +// a pointer through an interface does not allocate, so the fused path keeps its +// allocation profile. +package parserbridge + +var ( + // ScanScalar scans a single scalar value (string, integer, float, bool or + // date/time) without building an AST node. kind is an unstable.Kind. + ScanScalar func(p any, b []byte) (kind int, raw, value, rest []byte, err error) + + // ScanKey scans a (possibly dotted) key without building AST nodes, + // appending each decoded part to dst. + ScanKey func(p any, b []byte, dst [][]byte) (parts [][]byte, raw, rest []byte, err error) + + // ScanComment scans a comment starting at '#', returning the comment bytes + // (including '#', excluding the line ending) and the rest of the input. It + // needs no parser state. + ScanComment func(b []byte) (comment, rest []byte, err error) + + // ParseValue parses a single value (including arrays and inline tables) into + // the parser arena, returning the root *unstable.Node and the rest of the + // input. + ParseValue func(p any, b []byte) (node any, rest []byte, err error) +) diff --git a/vendor/github.com/pelletier/go-toml/v2/internal/tracker/key.go b/vendor/github.com/pelletier/go-toml/v2/internal/tracker/key.go index 6344fd04..661b11cf 100644 --- a/vendor/github.com/pelletier/go-toml/v2/internal/tracker/key.go +++ b/vendor/github.com/pelletier/go-toml/v2/internal/tracker/key.go @@ -14,7 +14,8 @@ func (t *KeyTracker) UpdateTable(node *unstable.Node) { t.Push(node) } -// UpdateArrayTable sets the state of the tracker with the AST array table node. +// UpdateArrayTable sets the state of the tracker with the AST array table +// node. func (t *KeyTracker) UpdateArrayTable(node *unstable.Node) { t.reset() t.Push(node) diff --git a/vendor/github.com/pelletier/go-toml/v2/internal/tracker/seen.go b/vendor/github.com/pelletier/go-toml/v2/internal/tracker/seen.go index 20623580..e88cd941 100644 --- a/vendor/github.com/pelletier/go-toml/v2/internal/tracker/seen.go +++ b/vendor/github.com/pelletier/go-toml/v2/internal/tracker/seen.go @@ -3,7 +3,6 @@ package tracker import ( "bytes" "fmt" - "sync" "github.com/pelletier/go-toml/v2/unstable" ) @@ -12,9 +11,21 @@ type keyKind uint8 const ( invalidKind keyKind = iota + // valueKind is a regular value (scalar, array, or inline table). It + // cannot be extended. valueKind + // kvTableKind is a table created implicitly by a dotted key. It can only + // be extended by other dotted keys. + kvTableKind + // tableKind is a table created by a [header]. The explicit flag tells + // whether the table was created by its own header (true) or as an + // intermediate step of a longer key (false). tableKind + // arrayTableKind is an array of tables created by [[header]]. arrayTableKind + // anonymousKind is an entry that cannot be looked up by name. It serves + // as the parent of the content of inline tables stored inside arrays. + anonymousKind ) func (k keyKind) String() string { @@ -23,22 +34,36 @@ func (k keyKind) String() string { return "invalid" case valueKind: return "value" + case kvTableKind: + return "kv-table" case tableKind: return "table" case arrayTableKind: - return "array table" + return "array-table" + case anonymousKind: + return "anonymous" } panic("missing keyKind string mapping") } +// entry represents a node that has been seen in the document. Its size has a +// direct impact on the performance of unmarshaling documents: keep it as +// small as possible. +type entry struct { + parent int32 + kind keyKind + explicit bool + name []byte +} + // SeenTracker tracks which keys have been seen with which TOML type to flag // duplicates and mismatches according to the spec. // -// Each node in the visited tree is represented by an entry. Each entry has an -// identifier, which is provided by a counter. Entries are stored in the array -// entries. As new nodes are discovered (referenced for the first time in the -// TOML document), entries are created and appended to the array. An entry -// points to its parent using its id. +// Each node in the visited tree is represented by an entry. Each entry has +// an identifier, which is provided by a counter. Entries are stored in the +// array entries. As new nodes are discovered (referenced for the first time +// in the TOML document), entries are created and appended to the array. An +// entry points to its parent using its id. // // To find whether a given key (sequence of []byte) has already been visited, // the entries are linearly searched, looking for one with the right name and @@ -53,307 +78,373 @@ func (k keyKind) String() string { // invariant above, the deletion process needs to keep the order of entries. // This results in more copies in that case. type SeenTracker struct { - entries []entry - currentIdx int -} + entries []entry + currentTable int32 -var pool = sync.Pool{ - New: func() interface{} { - return &SeenTracker{} - }, + // scratch buffers for clear() + removedBuf []bool + remapBuf []int32 } -func (s *SeenTracker) reset() { - // Always contains a root element at index 0. - s.currentIdx = 0 - if len(s.entries) == 0 { - s.entries = make([]entry, 1, 2) - } else { - s.entries = s.entries[:1] - } - s.entries[0].child = -1 - s.entries[0].next = -1 +// Reset brings the tracker to its initial state, with just a root table, so +// that it can be reused across documents. +func (s *SeenTracker) Reset() { + s.reset() } -type entry struct { - // Use -1 to indicate no child or no sibling. - child int - next int - - name []byte - kind keyKind - explicit bool - kv bool +// reset brings the tracker to its initial state, with just a root table. +func (s *SeenTracker) reset() { + s.entries = append(s.entries[:0], entry{ + parent: -1, + kind: tableKind, + }) + s.currentTable = 0 } -// Find the index of the child of parentIdx with key k. Returns -1 if -// it does not exist. -func (s *SeenTracker) find(parentIdx int, k []byte) int { - for i := s.entries[parentIdx].child; i >= 0; i = s.entries[i].next { - if bytes.Equal(s.entries[i].name, k) { - return i +// find returns the id of the entry with the given parent and name, or -1. +// Anonymous entries are never returned. +func (s *SeenTracker) find(parent int32, name []byte) int32 { + // Children always appear after their parent. + for i := int(parent) + 1; i < len(s.entries); i++ { + e := &s.entries[i] + if e.parent == parent && e.kind != anonymousKind && bytes.Equal(e.name, name) { + return int32(i) //nolint:gosec // entry counts are bounded by document size } } return -1 } -// Remove all descendants of node at position idx. -func (s *SeenTracker) clear(idx int) { - if idx >= len(s.entries) { - return - } - - for i := s.entries[idx].child; i >= 0; { - next := s.entries[i].next - n := s.entries[0].next - s.entries[0].next = i - s.entries[i].next = n - s.entries[i].name = nil - s.clear(i) - i = next - } - - s.entries[idx].child = -1 -} - -func (s *SeenTracker) create(parentIdx int, name []byte, kind keyKind, explicit bool, kv bool) int { - e := entry{ - child: -1, - next: s.entries[parentIdx].child, - - name: name, +// create appends a new entry and returns its id. +func (s *SeenTracker) create(parent int32, name []byte, kind keyKind, explicit bool) int32 { + id := int32(len(s.entries)) //nolint:gosec // entry counts are bounded by document size + s.entries = append(s.entries, entry{ + parent: parent, kind: kind, explicit: explicit, - kv: kv, + name: name, + }) + return id +} + +// clear removes all the descendants of the entry with the given id, keeping +// the order of the remaining entries. +func (s *SeenTracker) clear(id int32) { + // Compute which entries are removed. Given that children always appear + // after their parent, a single forward pass is enough. + if cap(s.removedBuf) < len(s.entries) { + s.removedBuf = make([]bool, len(s.entries)) + s.remapBuf = make([]int32, len(s.entries)) } - var idx int - if s.entries[0].next >= 0 { - idx = s.entries[0].next - s.entries[0].next = s.entries[idx].next - s.entries[idx] = e - } else { - idx = len(s.entries) - s.entries = append(s.entries, e) + removed := s.removedBuf[:len(s.entries)] + remap := s.remapBuf[:len(s.entries)] + for i := range removed { + removed[i] = false } - s.entries[parentIdx].child = idx - - return idx -} - -func (s *SeenTracker) setExplicitFlag(parentIdx int) { - for i := s.entries[parentIdx].child; i >= 0; i = s.entries[i].next { - if s.entries[i].kv { - s.entries[i].explicit = true - s.entries[i].kv = false + n := int32(0) + for i := 0; i < len(s.entries); i++ { + parent := s.entries[i].parent + if parent >= 0 && (parent == id && s.entries[i].kind != invalidKind || removed[parent]) { + removed[i] = true + continue } - s.setExplicitFlag(i) + remap[i] = n + if int32(i) != n { //nolint:gosec // entry counts are bounded by document size + e := s.entries[i] + e.parent = remap[e.parent] + s.entries[n] = e + } + n++ } + s.entries = s.entries[:n] } // CheckExpression takes a top-level node and checks that it does not contain // keys that have been seen in previous calls, and validates that types are -// consistent. It returns true if it is the first time this node's key is seen. -// Useful to clear array tables on first use. +// consistent. It returns true if it is the first time this node's key is +// seen. Useful to clear array tables on first use. func (s *SeenTracker) CheckExpression(node *unstable.Node) (bool, error) { - if s.entries == nil { + if len(s.entries) == 0 { s.reset() } switch node.Kind { case unstable.KeyValue: - return s.checkKeyValue(node) + return false, s.checkKeyValue(s.currentTable, node) case unstable.Table: return s.checkTable(node) case unstable.ArrayTable: return s.checkArrayTable(node) default: - panic(fmt.Errorf("this should not be a top level node type: %s", node.Kind)) + return false, fmt.Errorf("toml: unexpected expression kind %s", node.Kind) } } -func (s *SeenTracker) checkTable(node *unstable.Node) (bool, error) { - if s.currentIdx >= 0 { - s.setExplicitFlag(s.currentIdx) - } - - it := node.Key() - - parentIdx := 0 - - // This code is duplicated in checkArrayTable. This is because factoring - // it in a function requires to copy the iterator, or allocate it to the - // heap, which is not cheap. - for it.Next() { - if it.IsLast() { - break +// CheckTable validates a [table] header given the decoded parts of its key. +// It mirrors checkTable but is driven directly from the key parts instead of +// an AST, for callers that decode without building one. It returns whether the +// table is seen for the first time. +func (s *SeenTracker) CheckTable(parts [][]byte) (bool, error) { + parent := int32(0) + for k := 0; k < len(parts); k++ { + name := parts[k] + if k == len(parts)-1 { + // Final part of the key. + i := s.find(parent, name) + if i < 0 { + i = s.create(parent, name, tableKind, true) + s.currentTable = i + return true, nil + } + e := &s.entries[i] + switch e.kind { + case tableKind: + if e.explicit { + return false, fmt.Errorf("toml: table %s already exists", name) + } + e.explicit = true + s.currentTable = i + return false, nil + case kvTableKind: + return false, fmt.Errorf("toml: table %s already exists as defined by a dotted key", name) + case arrayTableKind: + return false, fmt.Errorf("toml: table %s already exists as an array of tables", name) + default: + return false, fmt.Errorf("toml: key %s should be a table, not a %s", name, e.kind) + } } - k := it.Node().Data + i := s.find(parent, name) + if i < 0 { + i = s.create(parent, name, tableKind, false) + } else { + switch s.entries[i].kind { + case tableKind, arrayTableKind, kvTableKind: + // Tables created by dotted keys can receive new sub-tables, + // but cannot be redefined (handled by the last-part case). + default: + return false, fmt.Errorf("toml: key %s already exists as a value", name) + } + } + parent = i + } + panic("unreachable: table expression without key") +} - idx := s.find(parentIdx, k) +// CheckArrayTable validates a [[array table]] header given the decoded parts +// of its key. It mirrors checkArrayTable but is driven directly from the key +// parts. It returns whether the array table is seen for the first time. +func (s *SeenTracker) CheckArrayTable(parts [][]byte) (bool, error) { + parent := int32(0) + for k := 0; k < len(parts); k++ { + name := parts[k] + if k == len(parts)-1 { + i := s.find(parent, name) + if i < 0 { + i = s.create(parent, name, arrayTableKind, true) + s.currentTable = i + return true, nil + } + if s.entries[i].kind != arrayTableKind { + return false, fmt.Errorf("toml: key %s already exists as a %s, but should be an array table", name, s.entries[i].kind) + } + // Make the descendants of this array table re-discoverable for + // the new element. + s.clear(i) + s.currentTable = i + return false, nil + } - if idx < 0 { - idx = s.create(parentIdx, k, tableKind, false, false) + i := s.find(parent, name) + if i < 0 { + i = s.create(parent, name, tableKind, false) } else { - entry := s.entries[idx] - if entry.kind == valueKind { - return false, fmt.Errorf("toml: expected %s to be a table, not a %s", string(k), entry.kind) + switch s.entries[i].kind { + case tableKind, arrayTableKind, kvTableKind: + // Tables created by dotted keys can receive new sub-tables, + // but cannot be redefined (handled by the last-part case). + default: + return false, fmt.Errorf("toml: key %s already exists as a value", name) } } - parentIdx = idx + parent = i } + panic("unreachable: array table expression without key") +} - k := it.Node().Data - idx := s.find(parentIdx, k) - - first := false - if idx >= 0 { - kind := s.entries[idx].kind - if kind != tableKind { - return false, fmt.Errorf("toml: key %s should be a table, not a %s", string(k), kind) +// CheckKeyValue validates the (possibly dotted) key of a key-value under the +// current table, WITHOUT validating its value. It returns the id of the leaf +// entry, so the caller can validate a container value with CheckValueUnder. +func (s *SeenTracker) CheckKeyValue(parts [][]byte) (int32, error) { + parent := s.currentTable + for k := 0; k < len(parts); k++ { + name := parts[k] + if k == len(parts)-1 { + if i := s.find(parent, name); i >= 0 { + return -1, fmt.Errorf("toml: key %s is already defined", name) + } + return s.create(parent, name, valueKind, false), nil } - if s.entries[idx].explicit { - return false, fmt.Errorf("toml: table %s already exists", string(k)) + + i := s.find(parent, name) + if i < 0 { + i = s.create(parent, name, kvTableKind, false) + } else if s.entries[i].kind != kvTableKind { + return -1, fmt.Errorf("toml: key %s is already defined", name) } - s.entries[idx].explicit = true - } else { - idx = s.create(parentIdx, k, tableKind, true, false) - first = true + parent = i } + panic("unreachable: key-value expression without key") +} - s.currentIdx = idx - - return first, nil +// CheckValueUnder validates the content of a value stored under the given +// entry (typically the leaf returned by CheckKeyValue): inline tables cannot +// contain duplicate keys, including in the inline tables and arrays they +// contain. +func (s *SeenTracker) CheckValueUnder(parent int32, value *unstable.Node) error { + return s.checkValue(parent, value) } -func (s *SeenTracker) checkArrayTable(node *unstable.Node) (bool, error) { - if s.currentIdx >= 0 { - s.setExplicitFlag(s.currentIdx) - } +func (s *SeenTracker) checkTable(node *unstable.Node) (bool, error) { + parent := int32(0) it := node.Key() - - parentIdx := 0 - + // Handle the intermediate parts of the key. for it.Next() { + part := it.Node() + name := part.Data if it.IsLast() { - break + // Final part of the key. + i := s.find(parent, name) + if i < 0 { + i = s.create(parent, name, tableKind, true) + s.currentTable = i + return true, nil + } + e := &s.entries[i] + switch e.kind { + case tableKind: + if e.explicit { + return false, fmt.Errorf("toml: table %s already exists", name) + } + e.explicit = true + s.currentTable = i + return false, nil + case kvTableKind: + return false, fmt.Errorf("toml: table %s already exists as defined by a dotted key", name) + case arrayTableKind: + return false, fmt.Errorf("toml: table %s already exists as an array of tables", name) + default: + return false, fmt.Errorf("toml: key %s should be a table, not a %s", name, e.kind) + } } - k := it.Node().Data - - idx := s.find(parentIdx, k) - - if idx < 0 { - idx = s.create(parentIdx, k, tableKind, false, false) + i := s.find(parent, name) + if i < 0 { + i = s.create(parent, name, tableKind, false) } else { - entry := s.entries[idx] - if entry.kind == valueKind { - return false, fmt.Errorf("toml: expected %s to be a table, not a %s", string(k), entry.kind) + switch s.entries[i].kind { + case tableKind, arrayTableKind, kvTableKind: + // Tables created by dotted keys can receive new sub-tables, + // but cannot be redefined (handled by the last-part case). + default: + return false, fmt.Errorf("toml: key %s already exists as a value", name) } } - - parentIdx = idx + parent = i } - - k := it.Node().Data - idx := s.find(parentIdx, k) - - firstTime := idx < 0 - if firstTime { - idx = s.create(parentIdx, k, arrayTableKind, true, false) - } else { - kind := s.entries[idx].kind - if kind != arrayTableKind { - return false, fmt.Errorf("toml: key %s already exists as a %s, but should be an array table", kind, string(k)) - } - s.clear(idx) - } - - s.currentIdx = idx - - return firstTime, nil + panic("unreachable: table expression without key") } -func (s *SeenTracker) checkKeyValue(node *unstable.Node) (bool, error) { - parentIdx := s.currentIdx - it := node.Key() +func (s *SeenTracker) checkArrayTable(node *unstable.Node) (bool, error) { + parent := int32(0) + it := node.Key() for it.Next() { - k := it.Node().Data - - idx := s.find(parentIdx, k) + part := it.Node() + name := part.Data + if it.IsLast() { + i := s.find(parent, name) + if i < 0 { + i = s.create(parent, name, arrayTableKind, true) + s.currentTable = i + return true, nil + } + if s.entries[i].kind != arrayTableKind { + return false, fmt.Errorf("toml: key %s already exists as a %s, but should be an array table", name, s.entries[i].kind) + } + // Make the descendants of this array table re-discoverable for + // the new element. + s.clear(i) + // Note: clear cannot move i because i comes before all its + // descendants. + s.currentTable = i + return false, nil + } - if idx < 0 { - idx = s.create(parentIdx, k, tableKind, false, true) + i := s.find(parent, name) + if i < 0 { + i = s.create(parent, name, tableKind, false) } else { - entry := s.entries[idx] - switch { - case it.IsLast(): - return false, fmt.Errorf("toml: key %s is already defined", string(k)) - case entry.kind != tableKind: - return false, fmt.Errorf("toml: expected %s to be a table, not a %s", string(k), entry.kind) - case entry.explicit: - return false, fmt.Errorf("toml: cannot redefine table %s that has already been explicitly defined", string(k)) + switch s.entries[i].kind { + case tableKind, arrayTableKind, kvTableKind: + // Tables created by dotted keys can receive new sub-tables, + // but cannot be redefined (handled by the last-part case). + default: + return false, fmt.Errorf("toml: key %s already exists as a value", name) } } - - parentIdx = idx - } - - s.entries[parentIdx].kind = valueKind - - value := node.Value() - - switch value.Kind { - case unstable.InlineTable: - return s.checkInlineTable(value) - case unstable.Array: - return s.checkArray(value) - default: - return false, nil + parent = i } + panic("unreachable: array table expression without key") } -func (s *SeenTracker) checkArray(node *unstable.Node) (first bool, err error) { - it := node.Children() +func (s *SeenTracker) checkKeyValue(parent int32, node *unstable.Node) error { + it := node.Key() for it.Next() { - n := it.Node() - switch n.Kind { //nolint:exhaustive - case unstable.InlineTable: - first, err = s.checkInlineTable(n) - if err != nil { - return false, err - } - case unstable.Array: - first, err = s.checkArray(n) - if err != nil { - return false, err + part := it.Node() + name := part.Data + if it.IsLast() { + if i := s.find(parent, name); i >= 0 { + return fmt.Errorf("toml: key %s is already defined", name) } + id := s.create(parent, name, valueKind, false) + return s.checkValue(id, node.Value()) } + + i := s.find(parent, name) + if i < 0 { + i = s.create(parent, name, kvTableKind, false) + } else if s.entries[i].kind != kvTableKind { + return fmt.Errorf("toml: key %s is already defined", name) + } + parent = i } - return first, nil + panic("unreachable: key-value expression without key") } -func (s *SeenTracker) checkInlineTable(node *unstable.Node) (first bool, err error) { - s = pool.Get().(*SeenTracker) - s.reset() - - it := node.Children() - for it.Next() { - n := it.Node() - first, err = s.checkKeyValue(n) - if err != nil { - return false, err +// checkValue verifies the content of a value: inline tables cannot contain +// duplicate keys, including in the inline tables and arrays they contain. +func (s *SeenTracker) checkValue(id int32, value *unstable.Node) error { + switch value.Kind { + case unstable.InlineTable: + it := value.Children() + for it.Next() { + if err := s.checkKeyValue(id, it.Node()); err != nil { + return err + } + } + case unstable.Array: + it := value.Children() + for it.Next() { + elem := it.Node() + if elem.Kind == unstable.InlineTable || elem.Kind == unstable.Array { + elemID := s.create(id, nil, anonymousKind, false) + if err := s.checkValue(elemID, elem); err != nil { + return err + } + } } + default: } - - // As inline tables are self-contained, the tracker does not - // need to retain the details of what they contain. The - // keyValue element that creates the inline table is kept to - // mark the presence of the inline table and prevent - // redefinition of its keys: check* functions cannot walk into - // a value. - pool.Put(s) - return first, nil + return nil } diff --git a/vendor/github.com/pelletier/go-toml/v2/localtime.go b/vendor/github.com/pelletier/go-toml/v2/localtime.go index 502ef2f2..eb8c20e6 100644 --- a/vendor/github.com/pelletier/go-toml/v2/localtime.go +++ b/vendor/github.com/pelletier/go-toml/v2/localtime.go @@ -62,7 +62,7 @@ func (d LocalTime) String() string { } else if d.Nanosecond > 0 { // Nanoseconds are specified, but precision is not provided. Use the // minimum. - s += strings.Trim(fmt.Sprintf(".%09d", d.Nanosecond), "0") + s += strings.TrimRight(fmt.Sprintf(".%09d", d.Nanosecond), "0") } return s @@ -77,7 +77,7 @@ func (d LocalTime) MarshalText() ([]byte, error) { func (d *LocalTime) UnmarshalText(b []byte) error { res, left, err := parseLocalTime(b) if err == nil && len(left) != 0 { - err = unstable.NewParserError(left, "extra characters") + err = unstable.NewParserError(left, "extra characters at the end of a local time") } if err != nil { return err @@ -111,12 +111,11 @@ func (d LocalDateTime) MarshalText() ([]byte, error) { func (d *LocalDateTime) UnmarshalText(data []byte) error { res, left, err := parseLocalDateTime(data) if err == nil && len(left) != 0 { - err = unstable.NewParserError(left, "extra characters") + err = unstable.NewParserError(left, "extra characters at the end of a local date time") } if err != nil { return err } - *d = res return nil } diff --git a/vendor/github.com/pelletier/go-toml/v2/marshaler.go b/vendor/github.com/pelletier/go-toml/v2/marshaler.go index ca462d40..8b8d7723 100644 --- a/vendor/github.com/pelletier/go-toml/v2/marshaler.go +++ b/vendor/github.com/pelletier/go-toml/v2/marshaler.go @@ -12,25 +12,33 @@ import ( "slices" "strconv" "strings" + "sync" "time" - "unicode" - - "github.com/pelletier/go-toml/v2/internal/characters" + "unicode/utf8" ) // Marshal serializes a Go value as a TOML document. // // It is a shortcut for Encoder.Encode() with the default options. func Marshal(v interface{}) ([]byte, error) { - var buf bytes.Buffer - enc := NewEncoder(&buf) + enc := Encoder{indentSymbol: " "} + + e := encoderStatePool.Get().(*encoderState) + e.Encoder = &enc + e.buf = e.buf[:0] + e.keyStack = e.keyStack[:0] + e.lastWasHeader = false - err := enc.Encode(v) + err := e.encodeRoot(v) if err != nil { + encoderStatePool.Put(e) return nil, err } - return buf.Bytes(), nil + out := make([]byte, len(e.buf)) + copy(out, e.buf) + encoderStatePool.Put(e) + return out, nil } // Encoder writes a TOML document to an output stream. @@ -56,8 +64,8 @@ func NewEncoder(w io.Writer) *Encoder { // SetTablesInline forces the encoder to emit all tables inline. // -// This behavior can be controlled on an individual struct field basis with the -// inline tag: +// This behavior can be controlled on an individual struct field basis with +// the inline tag: // // MyField `toml:",inline"` func (enc *Encoder) SetTablesInline(inline bool) *Encoder { @@ -65,10 +73,11 @@ func (enc *Encoder) SetTablesInline(inline bool) *Encoder { return enc } -// SetArraysMultiline forces the encoder to emit all arrays with one element per -// line. +// SetArraysMultiline forces the encoder to emit all arrays with one element +// per line. // -// This behavior can be controlled on an individual struct field basis with the multiline tag: +// This behavior can be controlled on an individual struct field basis with +// the multiline tag: // // MyField `multiline:"true"` func (enc *Encoder) SetArraysMultiline(multiline bool) *Encoder { @@ -119,9 +128,9 @@ func (enc *Encoder) SetMarshalJSONNumbers(indent bool) *Encoder { // // Intermediate tables are always printed. // -// By default, strings are encoded as literal string, unless they contain either -// a newline character or a single quote. In that case they are emitted as -// quoted strings. +// By default, strings are encoded as literal string, unless they contain +// either a newline character or a single quote. In that case they are emitted +// as quoted strings. // // Unsigned integers larger than math.MaxInt64 cannot be encoded. Doing so // results in an error. This rule exists because the TOML specification only @@ -130,8 +139,8 @@ func (enc *Encoder) SetMarshalJSONNumbers(indent bool) *Encoder { // readable (at best) by other implementations. To encode such numbers, a // solution is a custom type that implements encoding.TextMarshaler. // -// When encoding structs, fields are encoded in order of definition, with their -// exact name. +// When encoding structs, fields are encoded in order of definition, with +// their exact name. // // Tables and array tables are separated by empty lines. However, consecutive // subtables definitions are not. For example: @@ -151,1052 +160,1061 @@ func (enc *Encoder) SetMarshalJSONNumbers(indent bool) *Encoder { // The encoding of each public struct field can be customized by the format // string in the "toml" key of the struct field's tag. This follows // encoding/json's convention. The format string starts with the name of the -// field, optionally followed by a comma-separated list of options. The name may -// be empty in order to provide options without overriding the default name. +// field, optionally followed by a comma-separated list of options. The name +// may be empty in order to provide options without overriding the default +// name. // // The "multiline" option emits strings as quoted multi-line TOML strings. It // has no effect on fields that would not be encoded as strings. // -// The "inline" option turns fields that would be emitted as tables into inline -// tables instead. It has no effect on other fields. +// The "inline" option turns fields that would be emitted as tables into +// inline tables instead. It has no effect on other fields. // // The "omitempty" option prevents empty values or groups from being emitted. // // The "omitzero" option prevents zero values or groups from being emitted. // -// The "commented" option prefixes the value and all its children with a comment -// symbol. +// The "commented" option prefixes the value and all its children with a +// comment symbol. // -// In addition to the "toml" tag struct tag, a "comment" tag can be used to emit -// a TOML comment before the value being annotated. Comments are ignored inside -// inline tables. For array tables, the comment is only present before the first -// element of the array. +// In addition to the "toml" tag struct tag, a "comment" tag can be used to +// emit a TOML comment before the value being annotated. Comments are ignored +// inside inline tables. For array tables, the comment is only present before +// the first element of the array. func (enc *Encoder) Encode(v interface{}) error { - var ( - b []byte - ctx encoderCtx - ) - - ctx.inline = enc.tablesInline - - if v == nil { - return errors.New("toml: cannot encode a nil interface") - } + e := encoderStatePool.Get().(*encoderState) + e.Encoder = enc + e.buf = e.buf[:0] + e.keyStack = e.keyStack[:0] + e.lastWasHeader = false - b, err := enc.encode(b, ctx, reflect.ValueOf(v)) + err := e.encodeRoot(v) if err != nil { + encoderStatePool.Put(e) return err } - _, err = enc.w.Write(b) + _, err = enc.w.Write(e.buf) + encoderStatePool.Put(e) if err != nil { return fmt.Errorf("toml: cannot write: %w", err) } - return nil } -type valueOptions struct { - multiline bool - omitempty bool - omitzero bool - commented bool - comment string +var encoderStatePool = sync.Pool{ + New: func() interface{} { return &encoderState{} }, } -type encoderCtx struct { - // Current top-level key. - parentKey []string +type encoderState struct { + *Encoder - // Key that should be used for a KV. - key string - // Extra flag to account for the empty string - hasKey bool + buf []byte - // Set to true to indicate that the encoder is inside a KV, so that all - // tables need to be inlined. - insideKv bool + // keyStack is the dotted key of the table being encoded, shared by the + // whole encode as a stack. + keyStack []string - // Set to true to skip the first table header in an array table. - skipTableHeader bool + // entriesPool recycles entry slices across tables of the same encode. + entriesPool [][]entry - // Should the next table be encoded as inline - inline bool + // lastWasHeader is true when the last line written was a table header, + // used to avoid empty lines between consecutive table definitions. + lastWasHeader bool - // Indentation level - indent int + // stringKeyBuf is a reusable buffer to read string map keys without + // allocating one per map. + stringKeyBuf reflect.Value +} - // Prefix the current value with a comment. +// valueOptions are the encoding options attached to one entry of a table. +type valueOptions struct { + multiline bool + inline bool + omitempty bool + omitzero bool commented bool + comment string +} - // Options coming from struct tags +// entry is a deferred key-value of a table being encoded. +type entry struct { + key string + value reflect.Value options valueOptions } -func (ctx *encoderCtx) shiftKey() { - if ctx.hasKey { - ctx.parentKey = append(ctx.parentKey, ctx.key) - ctx.clearKey() +func (e *encoderState) encodeRoot(v interface{}) error { + if v == nil { + return errors.New("toml: cannot encode a nil interface") } -} - -func (ctx *encoderCtx) setKey(k string) { - ctx.key = k - ctx.hasKey = true -} -func (ctx *encoderCtx) clearKey() { - ctx.key = "" - ctx.hasKey = false -} + rv := reflect.ValueOf(v) + rv, ok := resolve(rv) + if !ok { + return errors.New("toml: cannot encode a nil pointer") + } -func (ctx *encoderCtx) isRoot() bool { - return len(ctx.parentKey) == 0 && !ctx.hasKey + switch rv.Kind() { + case reflect.Map, reflect.Struct: + if isValueKind(rv) { + return fmt.Errorf("toml: cannot encode a %s as a document root", rv.Type()) + } + return e.encodeTable(rv, false, 0) + default: + return fmt.Errorf("toml: cannot encode a %s as a document root", rv.Type()) + } } -func (enc *Encoder) encode(b []byte, ctx encoderCtx, v reflect.Value) ([]byte, error) { - i := v.Interface() - - switch x := i.(type) { - case time.Time: - if x.Nanosecond() > 0 { - return x.AppendFormat(b, time.RFC3339Nano), nil - } - return x.AppendFormat(b, time.RFC3339), nil - case LocalTime: - return append(b, x.String()...), nil - case LocalDate: - return append(b, x.String()...), nil - case LocalDateTime: - return append(b, x.String()...), nil - case json.Number: - if enc.marshalJSONNumbers { - if x == "" { /// Useful zero value. - return append(b, "0"...), nil - } else if v, err := x.Int64(); err == nil { - return enc.encode(b, ctx, reflect.ValueOf(v)) - } else if f, err := x.Float64(); err == nil { - return enc.encode(b, ctx, reflect.ValueOf(f)) +// resolve unwraps pointers and interfaces until a concrete value is found. +// Returns false if it resolves to nil. +func resolve(v reflect.Value) (reflect.Value, bool) { + for { + switch v.Kind() { + case reflect.Ptr: + if v.IsNil() { + return v, false + } + v = v.Elem() + case reflect.Interface: + if v.IsNil() { + return v, false } - return nil, fmt.Errorf("toml: unable to convert %q to int64 or float64", x) + v = v.Elem() + default: + return v, true } } +} - hasTextMarshaler := v.Type().Implements(textMarshalerType) - if hasTextMarshaler || (v.CanAddr() && reflect.PointerTo(v.Type()).Implements(textMarshalerType)) { - if !hasTextMarshaler { - v = v.Addr() - } - - if ctx.isRoot() { - return nil, fmt.Errorf("toml: type %s implementing the TextMarshaler interface cannot be a root element", v.Type()) - } - - text, err := v.Interface().(encoding.TextMarshaler).MarshalText() - if err != nil { - return nil, err - } +// typeEncProps caches the per-type facts used on every value encode. +type typeEncProps struct { + // 0: not a TextMarshaler, 1: the type implements it, 2: its pointer does + text uint8 + // encoded as a TOML value (as opposed to a table) + isValue bool +} - b = enc.encodeString(b, string(text), ctx.options) +var typeEncPropsCache sync.Map // reflect.Type -> typeEncProps - return b, nil +func encPropsForType(t reflect.Type) typeEncProps { + if p, ok := typeEncPropsCache.Load(t); ok { + return p.(typeEncProps) } - - switch v.Kind() { - // containers - case reflect.Map: - return enc.encodeMap(b, ctx, v) - case reflect.Struct: - return enc.encodeStruct(b, ctx, v) - case reflect.Slice, reflect.Array: - return enc.encodeSlice(b, ctx, v) - case reflect.Interface: - if v.IsNil() { - return nil, errors.New("toml: encoding a nil interface is not supported") - } - - return enc.encode(b, ctx, v.Elem()) - case reflect.Ptr: - if v.IsNil() { - return enc.encode(b, ctx, reflect.Zero(v.Type().Elem())) - } - - return enc.encode(b, ctx, v.Elem()) - - // values - case reflect.String: - b = enc.encodeString(b, v.String(), ctx.options) - case reflect.Float32: - f := v.Float() - - switch { - case math.IsNaN(f): - b = append(b, "nan"...) - case f > math.MaxFloat32: - b = append(b, "inf"...) - case f < -math.MaxFloat32: - b = append(b, "-inf"...) - case math.Trunc(f) == f: - b = strconv.AppendFloat(b, f, 'f', 1, 32) - default: - b = strconv.AppendFloat(b, f, 'f', -1, 32) - } - case reflect.Float64: - f := v.Float() - switch { - case math.IsNaN(f): - b = append(b, "nan"...) - case f > math.MaxFloat64: - b = append(b, "inf"...) - case f < -math.MaxFloat64: - b = append(b, "-inf"...) - case math.Trunc(f) == f: - b = strconv.AppendFloat(b, f, 'f', 1, 64) - default: - b = strconv.AppendFloat(b, f, 'f', -1, 64) - } - case reflect.Bool: - if v.Bool() { - b = append(b, "true"...) + var p typeEncProps + switch { + case t.Implements(textMarshalerType): + p.text = 1 + case reflect.PtrTo(t).Implements(textMarshalerType): + p.text = 2 + } + switch t { + case timeType, localDateType, localTimeType, localDateTimeType: + p.isValue = true + default: + if p.text != 0 { + p.isValue = true } else { - b = append(b, "false"...) - } - case reflect.Uint64, reflect.Uint32, reflect.Uint16, reflect.Uint8, reflect.Uint: - x := v.Uint() - if x > uint64(math.MaxInt64) { - return nil, fmt.Errorf("toml: not encoding uint (%d) greater than max int64 (%d)", x, int64(math.MaxInt64)) + switch t.Kind() { + case reflect.Map, reflect.Struct: + p.isValue = false + default: + p.isValue = true + } } - b = strconv.AppendUint(b, x, 10) - case reflect.Int64, reflect.Int32, reflect.Int16, reflect.Int8, reflect.Int: - b = strconv.AppendInt(b, v.Int(), 10) - default: - return nil, fmt.Errorf("toml: cannot encode value of type %s", v.Kind()) } + typeEncPropsCache.Store(t, p) + return p +} - return b, nil +// isValueKind returns true when the resolved value is encoded as a TOML +// value (as opposed to a table). +func isValueKind(v reflect.Value) bool { + return encPropsForType(v.Type()).isValue } -func isNil(v reflect.Value) bool { - switch v.Kind() { - case reflect.Ptr, reflect.Interface, reflect.Map: - return v.IsNil() - default: +// isTableLike returns true when the value should be encoded as a table (or +// an array of tables for slices). +func (e *encoderState) isTableLike(v reflect.Value) bool { + v, ok := resolve(v) + if !ok { + // Unresolvable values (interface-held nil pointers) are encoded as + // the zero value of their element type by the value path. return false } + return !isValueKind(v) } -func shouldOmitEmpty(options valueOptions, v reflect.Value) bool { - return options.omitempty && isEmptyValue(v) -} - -func shouldOmitZero(options valueOptions, v reflect.Value) bool { - if !options.omitzero { +// isArrayOfTables returns true when the value is a non-empty slice or array +// containing only table-like values. +func (e *encoderState) isArrayOfTables(v reflect.Value) bool { + v, ok := resolve(v) + if !ok { return false } - - // Check if the type implements isZeroer interface (has a custom IsZero method). - if v.Type().Implements(isZeroerType) { - return v.Interface().(isZeroer).IsZero() + if v.Kind() != reflect.Slice && v.Kind() != reflect.Array { + return false } - - // Check if pointer type implements isZeroer. - if reflect.PointerTo(v.Type()).Implements(isZeroerType) { - if v.CanAddr() { - return v.Addr().Interface().(isZeroer).IsZero() + if v.Len() == 0 { + return false + } + for i := 0; i < v.Len(); i++ { + elem, ok := resolve(v.Index(i)) + if !ok || isValueKind(elem) { + return false } - // Create a temporary addressable copy to call the pointer receiver method. - pv := reflect.New(v.Type()) - pv.Elem().Set(v) - return pv.Interface().(isZeroer).IsZero() } - - // Fall back to reflect's IsZero for types without custom IsZero method. - return v.IsZero() + return true } -func (enc *Encoder) encodeKv(b []byte, ctx encoderCtx, options valueOptions, v reflect.Value) ([]byte, error) { - var err error - - if !ctx.inline { - b = enc.encodeComment(ctx.indent, options.comment, b) - b = enc.commented(ctx.commented, b) - b = enc.indent(ctx.indent, b) - } - - b = enc.encodeKey(b, ctx.key) - b = append(b, " = "...) - - // create a copy of the context because the value of a KV shouldn't - // modify the global context. - subctx := ctx - subctx.insideKv = true - subctx.shiftKey() - subctx.options = options - - b, err = enc.encode(b, subctx, v) +// encodeTable writes the content of a table at the given key path. +func (e *encoderState) encodeTable(v reflect.Value, commented bool, indent int) error { + entries, err := e.collectEntries(v) if err != nil { - return nil, err - } - - return b, nil -} - -func (enc *Encoder) commented(commented bool, b []byte) []byte { - if commented { - return append(b, "# "...) + return err } - return b -} -func isEmptyValue(v reflect.Value) bool { - switch v.Kind() { - case reflect.Struct: - return isEmptyStruct(v) - case reflect.Array, reflect.Map, reflect.Slice, reflect.String: - return v.Len() == 0 - case reflect.Bool: - return !v.Bool() - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - return v.Int() == 0 - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - return v.Uint() == 0 - case reflect.Float32, reflect.Float64: - return v.Float() == 0 - case reflect.Interface, reflect.Ptr: - return v.IsNil() - default: - return false + // First pass: emit all key-values; tables are handled by the second + // pass. + for i := range entries { + ent := &entries[i] + if e.entryIsTable(ent) { + continue + } + err := e.encodeKeyValue(*ent, commented, indent) + if err != nil { + return err + } } -} - -func isEmptyStruct(v reflect.Value) bool { - // TODO: merge with walkStruct and cache. - typ := v.Type() - for i := 0; i < typ.NumField(); i++ { - fieldType := typ.Field(i) - // only consider exported fields - if fieldType.PkgPath != "" { + // Second pass: emit the sub-tables, extending the shared key stack. + for i := range entries { + ent := entries[i] + if !e.entryIsTable(&ent) { continue } + entCommented := commented || ent.options.commented + e.keyStack = append(e.keyStack, ent.key) - tag := fieldType.Tag.Get("toml") - - // special field name to skip field - if tag == "-" { + if e.isArrayOfTables(ent.value) { + err := e.encodeArrayTable(ent, entCommented, indent) + if err != nil { + return err + } + e.keyStack = e.keyStack[:len(e.keyStack)-1] continue } - f := v.Field(i) + // The value is resolvable: entryIsTable already resolved it. + tv, _ := resolve(ent.value) - if !isEmptyValue(f) { - return false + e.writeTableHeader(ent.options.comment, entCommented, false, indent) + + err := e.encodeTable(tv, entCommented, indent+1) + if err != nil { + return err } + e.keyStack = e.keyStack[:len(e.keyStack)-1] } - return true + e.putEntries(entries) + return nil } -const literalQuote = '\'' +// entryIsTable reports whether the entry is emitted as a (sub-)table rather +// than a key-value. +func (e *encoderState) entryIsTable(ent *entry) bool { + return !e.tablesInline && !ent.options.inline && (e.isTableLike(ent.value) || e.isArrayOfTables(ent.value)) +} -func (enc *Encoder) encodeString(b []byte, v string, options valueOptions) []byte { - if needsQuoting(v) { - return enc.encodeQuotedString(options.multiline, b, v) +// getEntries returns a reusable entry slice. +func (e *encoderState) getEntries() []entry { + if n := len(e.entriesPool); n > 0 { + s := e.entriesPool[n-1] + e.entriesPool = e.entriesPool[:n-1] + return s[:0] } + return nil +} - return enc.encodeLiteralString(b, v) +// putEntries returns an entry slice to the pool. +func (e *encoderState) putEntries(s []entry) { + if cap(s) > 0 { + e.entriesPool = append(e.entriesPool, s) + } } -func needsQuoting(v string) bool { - // TODO: vectorize - for _, b := range []byte(v) { - if b == '\'' || b == '\r' || b == '\n' || characters.InvalidASCII(b) { - return true +// encodeArrayTable writes all the elements of an array of tables. +func (e *encoderState) encodeArrayTable(ent entry, commented bool, indent int) error { + v, _ := resolve(ent.value) + comment := ent.options.comment + for i := 0; i < v.Len(); i++ { + // Elements are resolvable: isArrayOfTables already resolved them. + elem, _ := resolve(v.Index(i)) + + e.writeTableHeader(comment, commented, true, indent) + // The comment is only present before the first element. + comment = "" + + err := e.encodeTable(elem, commented, indent+1) + if err != nil { + return err } } - return false + return nil } -// caller should have checked that the string does not contain new lines or ' . -func (enc *Encoder) encodeLiteralString(b []byte, v string) []byte { - b = append(b, literalQuote) - b = append(b, v...) - b = append(b, literalQuote) +// writeTableHeader emits a [table] or [[array table]] header line, preceded +// by an empty line and comments as needed. +func (e *encoderState) writeTableHeader(comment string, commented bool, array bool, indent int) { + key := e.keyStack + if len(e.buf) > 0 && !e.lastWasHeader { + e.buf = append(e.buf, '\n') + } - return b -} + headerIndent := indent -func (enc *Encoder) encodeQuotedString(multiline bool, b []byte, v string) []byte { - stringQuote := `"` + e.writeComment(comment, headerIndent) - if multiline { - stringQuote = `"""` + e.writeIndent(headerIndent) + if commented { + e.buf = append(e.buf, "# "...) } - - b = append(b, stringQuote...) - if multiline { - b = append(b, '\n') + e.buf = append(e.buf, '[') + if array { + e.buf = append(e.buf, '[') } - - const ( - hextable = "0123456789ABCDEF" - // U+0000 to U+0008, U+000A to U+001F, U+007F - nul = 0x0 - bs = 0x8 - lf = 0xa - us = 0x1f - del = 0x7f - ) - - bv := []byte(v) - for i := 0; i < len(bv); i++ { - r := bv[i] - switch r { - case '\\': - b = append(b, `\\`...) - case '"': - if multiline { - // Quotation marks do not need to be quoted in multiline strings unless - // it contains 3 consecutive. If 3+ quotes appear, quote all of them - // because it's visually better - if i+2 > len(bv) || bv[i+1] != '"' || bv[i+2] != '"' { - b = append(b, r) - } else { - b = append(b, `\"\"\"`...) - i += 2 - } - } else { - b = append(b, `\"`...) - } - case '\b': - b = append(b, `\b`...) - case '\f': - b = append(b, `\f`...) - case '\n': - if multiline { - b = append(b, r) - } else { - b = append(b, `\n`...) - } - case '\r': - b = append(b, `\r`...) - case '\t': - b = append(b, `\t`...) - default: - switch { - case r >= nul && r <= bs, r >= lf && r <= us, r == del: - b = append(b, `\u00`...) - b = append(b, hextable[r>>4]) - b = append(b, hextable[r&0x0f]) - default: - b = append(b, r) - } + for i, part := range key { + if i > 0 { + e.buf = append(e.buf, '.') } + e.buf = e.appendKey(e.buf, part) } - - b = append(b, stringQuote...) - - return b + e.buf = append(e.buf, ']') + if array { + e.buf = append(e.buf, ']') + } + e.buf = append(e.buf, '\n') + e.lastWasHeader = true } -// caller should have checked that the string is in A-Z / a-z / 0-9 / - / _ . -func (enc *Encoder) encodeUnquotedKey(b []byte, v string) []byte { - return append(b, v...) +func (e *encoderState) writeIndent(indent int) { + if !e.indentTables { + return + } + for i := 0; i < indent; i++ { + e.buf = append(e.buf, e.indentSymbol...) + } } -func (enc *Encoder) encodeTableHeader(ctx encoderCtx, b []byte) []byte { - if len(ctx.parentKey) == 0 { - return b +// writeComment emits the comment lines attached to an entry. +func (e *encoderState) writeComment(comment string, indent int) { + if comment == "" { + return } + for _, line := range strings.Split(comment, "\n") { + e.writeIndent(indent) + e.buf = append(e.buf, "# "...) + e.buf = append(e.buf, line...) + e.buf = append(e.buf, '\n') + } +} - b = enc.encodeComment(ctx.indent, ctx.options.comment, b) - - b = enc.commented(ctx.commented, b) - - b = enc.indent(ctx.indent, b) - - b = append(b, '[') +// encodeKeyValue writes one `key = value` line of a table. +func (e *encoderState) encodeKeyValue(ent entry, commented bool, indent int) error { + commented = commented || ent.options.commented - b = enc.encodeKey(b, ctx.parentKey[0]) + e.writeComment(ent.options.comment, indent) - for _, k := range ctx.parentKey[1:] { - b = append(b, '.') - b = enc.encodeKey(b, k) + e.writeIndent(indent) + if commented { + e.buf = append(e.buf, "# "...) } + e.buf = e.appendKey(e.buf, ent.key) + e.buf = append(e.buf, " = "...) - b = append(b, "]\n"...) + var err error + e.buf, err = e.appendValue(e.buf, ent.value, ent.options, indent) + if err != nil { + return err + } + e.buf = append(e.buf, '\n') + e.lastWasHeader = false + return nil +} - return b +// collectEntries builds the ordered list of the entries of a table, +// applying tags and omission rules. +func (e *encoderState) collectEntries(v reflect.Value) ([]entry, error) { + switch v.Kind() { + case reflect.Map: + return e.collectMapEntries(v) + case reflect.Struct: + entries := e.getEntries() + e.collectStructEntries(&entries, v) + return entries, nil + default: + return nil, fmt.Errorf("toml: cannot encode a %s as a table", v.Type()) + } } -func (enc *Encoder) encodeKey(b []byte, k string) []byte { - needsQuotation := false - cannotUseLiteral := false +func (e *encoderState) collectMapEntries(v reflect.Value) ([]entry, error) { + entries := e.getEntries() - if len(k) == 0 { - return append(b, "''"...) + // Keys are converted to strings right away: read them into a reusable + // buffer to avoid one allocation per key. + var kbuf reflect.Value + if v.Type().Key() == stringType { + if !e.stringKeyBuf.IsValid() { + e.stringKeyBuf = reflect.New(stringType).Elem() + } + kbuf = e.stringKeyBuf + } else { + kbuf = reflect.New(v.Type().Key()).Elem() } - for _, c := range k { - if (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '-' || c == '_' { + iter := v.MapRange() + for iter.Next() { + kbuf.SetIterKey(iter) + key, err := mapKeyString(kbuf) + if err != nil { + return nil, err + } + value := iter.Value() + if value.Kind() == reflect.Interface && value.IsNil() { + // nil interface values are skipped continue } - - if c == literalQuote { - cannotUseLiteral = true + if value.Kind() == reflect.Ptr && value.IsNil() { + // nil pointers in maps are encoded as their zero value + value = reflect.New(value.Type().Elem()).Elem() } - - needsQuotation = true + entries = append(entries, entry{key: key, value: value}) } - if needsQuotation && needsQuoting(k) { - cannotUseLiteral = true + if len(entries) > 1 { + // slices.SortFunc avoids boxing the slice into a sort.Interface (an + // allocation that sort.Sort incurs for every table). + slices.SortFunc(entries, func(a, b entry) int { + return strings.Compare(a.key, b.key) + }) } - switch { - case cannotUseLiteral: - return enc.encodeQuotedString(false, b, k) - case needsQuotation: - return enc.encodeLiteralString(b, k) - default: - return enc.encodeUnquotedKey(b, k) - } + return entries, nil } -func (enc *Encoder) keyToString(k reflect.Value) (string, error) { - keyType := k.Type() - if keyType.Implements(textMarshalerType) { - keyB, err := k.Interface().(encoding.TextMarshaler).MarshalText() +// mapKeyString converts a map key to its string representation. +func mapKeyString(k reflect.Value) (string, error) { + kr, ok := resolve(k) + if !ok { + return "", errors.New("toml: cannot encode a nil map key") + } + if kr.Type().Implements(textMarshalerType) { + b, err := kr.Interface().(encoding.TextMarshaler).MarshalText() if err != nil { - return "", fmt.Errorf("toml: error marshalling key %v from text: %w", k, err) + return "", fmt.Errorf("toml: cannot marshal map key: %w", err) } - return string(keyB), nil + return string(b), nil } - - switch keyType.Kind() { + if kr.CanAddr() && reflect.PtrTo(kr.Type()).Implements(textMarshalerType) { + b, err := kr.Addr().Interface().(encoding.TextMarshaler).MarshalText() + if err != nil { + return "", fmt.Errorf("toml: cannot marshal map key: %w", err) + } + return string(b), nil + } + switch kr.Kind() { case reflect.String: - return k.String(), nil - + return kr.String(), nil case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - return strconv.FormatInt(k.Int(), 10), nil - - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: - return strconv.FormatUint(k.Uint(), 10), nil - + return strconv.FormatInt(kr.Int(), 10), nil + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return strconv.FormatUint(kr.Uint(), 10), nil case reflect.Float32: - return strconv.FormatFloat(k.Float(), 'f', -1, 32), nil - + return strconv.FormatFloat(kr.Float(), 'f', -1, 32), nil case reflect.Float64: - return strconv.FormatFloat(k.Float(), 'f', -1, 64), nil - + return strconv.FormatFloat(kr.Float(), 'f', -1, 64), nil default: - return "", fmt.Errorf("toml: type %s is not supported as a map key", keyType.Kind()) + return "", fmt.Errorf("toml: cannot encode a map with key type %s", k.Type()) } } -func (enc *Encoder) encodeMap(b []byte, ctx encoderCtx, v reflect.Value) ([]byte, error) { - var ( - t table - emptyValueOptions valueOptions - ) - - iter := v.MapRange() - for iter.Next() { - v := iter.Value() - - // Handle nil values: convert nil pointers to zero value, - // skip nil interfaces and nil maps. - switch v.Kind() { - case reflect.Ptr: - if v.IsNil() { - v = reflect.Zero(v.Type().Elem()) - } - case reflect.Interface, reflect.Map: - if v.IsNil() { - continue - } - default: - } - - k, err := enc.keyToString(iter.Key()) - if err != nil { - return nil, err - } - - if willConvertToTableOrArrayTable(ctx, v) { - t.pushTable(k, v, emptyValueOptions) - } else { - t.pushKV(k, v, emptyValueOptions) - } - } - - sortEntriesByKey(t.kvs) - sortEntriesByKey(t.tables) - - return enc.encodeTable(b, ctx, t) -} - -func sortEntriesByKey(e []entry) { - slices.SortFunc(e, func(a, b entry) int { - return strings.Compare(a.Key, b.Key) - }) +// encPlanField is the static encoding information of one field of a struct. +type encPlanField struct { + name string + index []int + depth int + options valueOptions } -type entry struct { - Key string - Value reflect.Value - Options valueOptions +// encPlan caches the per-type information needed to encode a struct: +// flattened fields with parsed tags, in order of definition, with shadowed +// duplicates already removed. +type encPlan struct { + fields []encPlanField } -type table struct { - kvs []entry - tables []entry -} +var encPlans sync.Map // reflect.Type -> *encPlan -func (t *table) pushKV(k string, v reflect.Value, options valueOptions) { - for _, e := range t.kvs { - if e.Key == k { - return - } +func encPlanForType(t reflect.Type) *encPlan { + if plan, ok := encPlans.Load(t); ok { + return plan.(*encPlan) } - - t.kvs = append(t.kvs, entry{Key: k, Value: v, Options: options}) + plan := &encPlan{} + visited := map[reflect.Type]bool{} + buildEncPlan(plan, t, nil, 0, visited) + dedupEncPlan(plan) + encPlans.Store(t, plan) + return plan } -func (t *table) pushTable(k string, v reflect.Value, options valueOptions) { - for _, e := range t.tables { - if e.Key == k { - return - } +func buildEncPlan(plan *encPlan, t reflect.Type, prefix []int, depth int, visited map[reflect.Type]bool) { + if visited[t] { + return } - t.tables = append(t.tables, entry{Key: k, Value: v, Options: options}) -} + visited[t] = true + defer delete(visited, t) -func walkStruct(ctx encoderCtx, t *table, v reflect.Value) { - // TODO: cache this - typ := v.Type() - for i := 0; i < typ.NumField(); i++ { - fieldType := typ.Field(i) + for i := 0; i < t.NumField(); i++ { + f := t.Field(i) - // only consider exported fields - if fieldType.PkgPath != "" { - continue - } - - tag := fieldType.Tag.Get("toml") - - // special field name to skip field + tag, tagged := f.Tag.Lookup("toml") if tag == "-" { continue } - k, opts := parseTag(tag) - if !isValidName(k) { - k = "" - } - - f := v.Field(i) - - if k == "" { - if fieldType.Anonymous { - if fieldType.Type.Kind() == reflect.Struct { - walkStruct(ctx, t, f) - } else if fieldType.Type.Kind() == reflect.Ptr && !f.IsNil() && f.Elem().Kind() == reflect.Struct { - walkStruct(ctx, t, f.Elem()) + name := f.Name + var opts valueOptions + if tagged { + parts := strings.Split(tag, ",") + if parts[0] != "" { + name = parts[0] + } + for _, opt := range parts[1:] { + switch opt { + case "multiline": + opts.multiline = true + case "inline": + opts.inline = true + case "omitempty": + opts.omitempty = true + case "omitzero": + opts.omitzero = true + case "commented": + opts.commented = true } - continue } - k = fieldType.Name } - - if isNil(f) { - continue + // Standalone boolean tags, e.g. multiline:"true". + const tagTrue = "true" + if f.Tag.Get("multiline") == tagTrue { + opts.multiline = true } - - options := valueOptions{ - multiline: opts.multiline, - omitempty: opts.omitempty, - omitzero: opts.omitzero, - commented: opts.commented, - comment: fieldType.Tag.Get("comment"), + if f.Tag.Get("inline") == tagTrue { + opts.inline = true } - - if opts.inline || !willConvertToTableOrArrayTable(ctx, f) { - t.pushKV(k, f, options) - } else { - t.pushTable(k, f, options) + if f.Tag.Get("commented") == tagTrue { + opts.commented = true } - } -} - -func (enc *Encoder) encodeStruct(b []byte, ctx encoderCtx, v reflect.Value) ([]byte, error) { - var t table + opts.comment = f.Tag.Get("comment") - walkStruct(ctx, &t, v) + index := make([]int, 0, len(prefix)+1) + index = append(index, prefix...) + index = append(index, i) - return enc.encodeTable(b, ctx, t) -} - -func (enc *Encoder) encodeComment(indent int, comment string, b []byte) []byte { - for len(comment) > 0 { - var line string - idx := strings.IndexByte(comment, '\n') - if idx >= 0 { - line = comment[:idx] - comment = comment[idx+1:] - } else { - line = comment - comment = "" + if f.Anonymous { + ft := f.Type + if ft.Kind() == reflect.Ptr { + ft = ft.Elem() + } + if ft.Kind() == reflect.Struct && (!tagged || tagName(tag) == "") { + buildEncPlan(plan, ft, index, depth+1, visited) + continue + } + if f.PkgPath != "" && ft.Kind() != reflect.Interface { + continue + } + } else if f.PkgPath != "" { + // unexported + continue } - b = enc.indent(indent, b) - b = append(b, "# "...) - b = append(b, line...) - b = append(b, '\n') - } - return b -} -func isValidName(s string) bool { - if s == "" { - return false - } - for _, c := range s { - switch { - case strings.ContainsRune("!#$%&()*+-./:;<=>?@[]^_{|}~ ", c): - // Backslash and quote chars are reserved, but - // otherwise any punctuation chars are allowed - // in a tag name. - case !unicode.IsLetter(c) && !unicode.IsDigit(c): - return false - } + plan.fields = append(plan.fields, encPlanField{ + name: name, + index: index, + depth: depth, + options: opts, + }) } - return true -} - -type tagOptions struct { - multiline bool - inline bool - omitempty bool - omitzero bool - commented bool } -func parseTag(tag string) (string, tagOptions) { - opts := tagOptions{} - - idx := strings.Index(tag, ",") - if idx == -1 { - return tag, opts - } - - raw := tag[idx+1:] - tag = tag[:idx] - for raw != "" { - var o string - i := strings.Index(raw, ",") - if i >= 0 { - o, raw = raw[:i], raw[i+1:] +// dedupEncPlan removes the fields shadowed by another one with the same +// name (the shallowest wins), keeping the order of first appearance. +func dedupEncPlan(plan *encPlan) { + byName := make(map[string]int, len(plan.fields)) + drop := false + for i := range plan.fields { + f := &plan.fields[i] + j, seen := byName[f.name] + if !seen { + byName[f.name] = i + continue + } + drop = true + // Shallowest wins; on equal depth, the first in order wins. + if f.depth < plan.fields[j].depth { + plan.fields[j].name = "" + byName[f.name] = i } else { - o, raw = raw, "" + f.name = "" } - switch o { - case "multiline": - opts.multiline = true - case "inline": - opts.inline = true - case "omitempty": - opts.omitempty = true - case "omitzero": - opts.omitzero = true - case "commented": - opts.commented = true + } + if !drop { + return + } + out := plan.fields[:0] + for _, f := range plan.fields { + if f.name != "" { + out = append(out, f) } } - - return tag, opts + plan.fields = out } -func (enc *Encoder) encodeTable(b []byte, ctx encoderCtx, t table) ([]byte, error) { - var err error - - ctx.shiftKey() +// collectStructEntries appends the entries of a struct, flattening embedded +// structs in place. +func (e *encoderState) collectStructEntries(entries *[]entry, v reflect.Value) { + plan := encPlanForType(v.Type()) - if ctx.insideKv || (ctx.inline && !ctx.isRoot()) { - return enc.encodeTableInline(b, ctx, t) - } - - if !ctx.skipTableHeader { - b = enc.encodeTableHeader(ctx, b) - - if enc.indentTables && len(ctx.parentKey) > 0 { - ctx.indent++ + for i := range plan.fields { + f := &plan.fields[i] + fv, ok := fieldByIndexSkipNil(v, f.index) + if !ok { + // nil embedded pointer on the way: skipped + continue } - } - ctx.skipTableHeader = false - hasNonEmptyKV := false - for _, kv := range t.kvs { - if shouldOmitEmpty(kv.Options, kv.Value) { + // Anonymous interface fields that are nil are skipped. + if fv.Kind() == reflect.Interface && fv.IsNil() { continue } - if kv.Options.omitzero && shouldOmitZero(kv.Options, kv.Value) { + // nil values in struct fields are skipped + if (fv.Kind() == reflect.Ptr || fv.Kind() == reflect.Map) && fv.IsNil() { continue } - hasNonEmptyKV = true - - ctx.setKey(kv.Key) - ctx2 := ctx - ctx2.commented = kv.Options.commented || ctx2.commented - - b, err = enc.encodeKv(b, ctx2, kv.Options, kv.Value) - if err != nil { - return nil, err - } - - b = append(b, '\n') - } - first := true - for _, table := range t.tables { - if shouldOmitEmpty(table.Options, table.Value) { + if f.options.omitempty && isEmptyValue(fv) { continue } - if table.Options.omitzero && shouldOmitZero(table.Options, table.Value) { + if f.options.omitzero && isZeroValue(fv) { continue } - if first { - first = false - if hasNonEmptyKV { - b = append(b, '\n') - } - } else { - b = append(b, "\n"...) - } - ctx.setKey(table.Key) - - ctx.options = table.Options - ctx2 := ctx - ctx2.commented = ctx2.commented || ctx.options.commented - - b, err = enc.encode(b, ctx2, table.Value) - if err != nil { - return nil, err - } + *entries = append(*entries, entry{key: f.name, value: fv, options: f.options}) } - - return b, nil } -func (enc *Encoder) encodeTableInline(b []byte, ctx encoderCtx, t table) ([]byte, error) { - var err error - - b = append(b, '{') - - first := true - for _, kv := range t.kvs { - if shouldOmitEmpty(kv.Options, kv.Value) { - continue - } - if kv.Options.omitzero && shouldOmitZero(kv.Options, kv.Value) { - continue - } - - if first { - first = false - } else { - b = append(b, `, `...) - } - - ctx.setKey(kv.Key) - - b, err = enc.encodeKv(b, ctx, kv.Options, kv.Value) - if err != nil { - return nil, err +// fieldByIndexSkipNil returns the field at the given index path, reporting +// false if a nil embedded pointer is found on the way. +func fieldByIndexSkipNil(v reflect.Value, index []int) (reflect.Value, bool) { + for i, x := range index { + if i > 0 { + for v.Kind() == reflect.Ptr { + if v.IsNil() { + return v, false + } + v = v.Elem() + } } + v = v.Field(x) } + return v, true +} - if len(t.tables) > 0 { - panic("inline table cannot contain nested tables, only key-values") +func tagName(tag string) string { + if idx := strings.IndexByte(tag, ','); idx >= 0 { + return tag[:idx] } - - b = append(b, "}"...) - - return b, nil + return tag } -func willConvertToTable(ctx encoderCtx, v reflect.Value) bool { - if !v.IsValid() { +// isEmptyValue implements the omitempty rules. +func isEmptyValue(v reflect.Value) bool { + switch v.Kind() { + case reflect.String: + return v.Len() == 0 + case reflect.Bool: + return !v.Bool() + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return v.Int() == 0 + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + return v.Uint() == 0 + case reflect.Float32, reflect.Float64: + return v.Float() == 0 + case reflect.Map, reflect.Slice, reflect.Array: + return v.Len() == 0 + case reflect.Ptr, reflect.Interface: + return v.IsNil() + case reflect.Struct: + return v.IsZero() + default: return false } - t := v.Type() - if t == timeType || t.Implements(textMarshalerType) { - return false +} + +// isZeroValue implements the omitzero rules: the type's own IsZero() when +// implemented, the reflect zero value otherwise. +func isZeroValue(v reflect.Value) bool { + if v.Type().Implements(isZeroerType) { + return v.Interface().(isZeroer).IsZero() } - if v.Kind() != reflect.Ptr && v.CanAddr() && reflect.PointerTo(t).Implements(textMarshalerType) { - return false + if v.CanAddr() && reflect.PtrTo(v.Type()).Implements(isZeroerType) { + return v.Addr().Interface().(isZeroer).IsZero() } - - switch t.Kind() { - case reflect.Map, reflect.Struct: - return !ctx.inline - case reflect.Interface: - return willConvertToTable(ctx, v.Elem()) - case reflect.Ptr: - if v.IsNil() { - return false - } - - return willConvertToTable(ctx, v.Elem()) - default: - return false + if !v.CanAddr() && reflect.PtrTo(v.Type()).Implements(isZeroerType) { + tmp := reflect.New(v.Type()) + tmp.Elem().Set(v) + return tmp.Interface().(isZeroer).IsZero() } + return v.IsZero() } -func willConvertToTableOrArrayTable(ctx encoderCtx, v reflect.Value) bool { - if ctx.insideKv { - return false +// appendKey emits a key, quoted only if necessary. +func (e *encoderState) appendKey(b []byte, key string) []byte { + if isBareKey(key) { + return append(b, key...) } - t := v.Type() + return e.appendString(b, key) +} - if t.Kind() == reflect.Interface { - return willConvertToTableOrArrayTable(ctx, v.Elem()) +func isBareKey(key string) bool { + if len(key) == 0 { + return false } - - if t.Kind() == reflect.Slice || t.Kind() == reflect.Array { - if v.Len() == 0 { - // An empty slice should be a kv = []. + for _, c := range []byte(key) { + if !isUnquotedKeyByte(c) { return false } - - for i := 0; i < v.Len(); i++ { - t := willConvertToTable(ctx, v.Index(i)) - - if !t { - return false - } - } - - return true } + return true +} - return willConvertToTable(ctx, v) +func isUnquotedKeyByte(c byte) bool { + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '-' || c == '_' } -func (enc *Encoder) encodeSlice(b []byte, ctx encoderCtx, v reflect.Value) ([]byte, error) { - if v.Len() == 0 { - b = append(b, "[]"...) +// appendValue emits a TOML value. +func (e *encoderState) appendValue(b []byte, v reflect.Value, opts valueOptions, indent int) ([]byte, error) { + t := v.Type() - return b, nil + // Special types take precedence over their kind. + switch t { + case timeType: + return v.Interface().(time.Time).AppendFormat(b, "2006-01-02T15:04:05.999999999Z07:00"), nil + case localDateType: + return append(b, v.Interface().(LocalDate).String()...), nil + case localTimeType: + return append(b, v.Interface().(LocalTime).String()...), nil + case localDateTimeType: + return append(b, v.Interface().(LocalDateTime).String()...), nil + case jsonNumberType: + if e.marshalJSONNumbers { + return appendJSONNumber(b, v.Interface().(json.Number)) + } } - if willConvertToTableOrArrayTable(ctx, v) { - return enc.encodeSliceAsArrayTable(b, ctx, v) + switch encPropsForType(t).text { + case 1: + if t.Kind() != reflect.String { + return e.appendTextMarshaler(b, v.Interface().(encoding.TextMarshaler)) + } + case 2: + if v.CanAddr() { + return e.appendTextMarshaler(b, v.Addr().Interface().(encoding.TextMarshaler)) + } + tmp := reflect.New(t) + tmp.Elem().Set(v) + return e.appendTextMarshaler(b, tmp.Interface().(encoding.TextMarshaler)) } - return enc.encodeSliceAsArray(b, ctx, v) + switch v.Kind() { + case reflect.Ptr: + if v.IsNil() { + // nil pointers are encoded as the zero value of their element + // type. + return e.appendValue(b, reflect.Zero(t.Elem()), opts, indent) + } + return e.appendValue(b, v.Elem(), opts, indent) + case reflect.Interface: + if v.IsNil() { + return nil, errors.New("toml: cannot encode a nil interface") + } + return e.appendValue(b, v.Elem(), opts, indent) + case reflect.String: + if opts.multiline { + return e.appendMultilineString(b, v.String()), nil + } + return e.appendString(b, v.String()), nil + case reflect.Bool: + if v.Bool() { + return append(b, "true"...), nil + } + return append(b, "false"...), nil + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return strconv.AppendInt(b, v.Int(), 10), nil + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + u := v.Uint() + if u > math.MaxInt64 { + return nil, fmt.Errorf("toml: cannot encode an unsigned integer above math.MaxInt64: %d", u) + } + return strconv.AppendUint(b, u, 10), nil + case reflect.Float32: + return appendFloat(b, v.Float(), 32), nil + case reflect.Float64: + return appendFloat(b, v.Float(), 64), nil + case reflect.Slice, reflect.Array: + return e.appendArray(b, v, opts, indent) + case reflect.Map: + return e.appendInlineTable(b, v, indent) + case reflect.Struct: + return e.appendInlineTable(b, v, indent) + default: + return nil, fmt.Errorf("toml: cannot encode value of type %s", v.Type()) + } } -// caller should have checked that v is a slice that only contains values that -// encode into tables. -func (enc *Encoder) encodeSliceAsArrayTable(b []byte, ctx encoderCtx, v reflect.Value) ([]byte, error) { - ctx.shiftKey() - - scratch := make([]byte, 0, 64) +var jsonNumberType = reflect.TypeOf(json.Number("")) - scratch = enc.commented(ctx.commented, scratch) - - if enc.indentTables { - scratch = enc.indent(ctx.indent, scratch) +func appendJSONNumber(b []byte, n json.Number) ([]byte, error) { + if n == "" { + return append(b, '0'), nil } - - scratch = append(scratch, "[["...) - - for i, k := range ctx.parentKey { - if i > 0 { - scratch = append(scratch, '.') - } - - scratch = enc.encodeKey(scratch, k) + if i, err := n.Int64(); err == nil { + return strconv.AppendInt(b, i, 10), nil } - - scratch = append(scratch, "]]\n"...) - ctx.skipTableHeader = true - - b = enc.encodeComment(ctx.indent, ctx.options.comment, b) - - if enc.indentTables { - ctx.indent++ + f, err := n.Float64() + if err != nil { + return nil, fmt.Errorf("toml: cannot encode json.Number %q: %w", string(n), err) } + return appendFloat(b, f, 64), nil +} - for i := 0; i < v.Len(); i++ { - if i != 0 { - b = append(b, "\n"...) - } - - b = append(b, scratch...) - - var err error - b, err = enc.encode(b, ctx, v.Index(i)) - if err != nil { - return nil, err - } +func appendFloat(b []byte, f float64, bitSize int) []byte { + switch { + case math.IsNaN(f): + return append(b, "nan"...) + case math.IsInf(f, 1): + return append(b, "inf"...) + case math.IsInf(f, -1): + return append(b, "-inf"...) } + start := len(b) + b = strconv.AppendFloat(b, f, 'f', -1, bitSize) + // TOML floats must have a fractional part or an exponent. + if !bytes.ContainsAny(b[start:], ".eE") { + b = append(b, ".0"...) + } + return b +} - return b, nil +func (e *encoderState) appendTextMarshaler(b []byte, m encoding.TextMarshaler) ([]byte, error) { + text, err := m.MarshalText() + if err != nil { + return nil, fmt.Errorf("toml: error calling MarshalText: %w", err) + } + return e.appendString(b, string(text)), nil } -func (enc *Encoder) encodeSliceAsArray(b []byte, ctx encoderCtx, v reflect.Value) ([]byte, error) { - multiline := ctx.options.multiline || enc.arraysMultiline - separator := ", " +// appendArray encodes a slice or array value. +func (e *encoderState) appendArray(b []byte, v reflect.Value, opts valueOptions, indent int) ([]byte, error) { + multiline := opts.multiline || e.arraysMultiline b = append(b, '[') - - subCtx := ctx - subCtx.options = valueOptions{} - - if multiline { - separator = ",\n" - + if multiline && v.Len() > 0 { + for i := 0; i < v.Len(); i++ { + if i > 0 { + b = append(b, ',') + } + b = append(b, '\n') + for j := 0; j <= indent; j++ { + b = append(b, e.indentSymbol...) + } + var err error + b, err = e.appendValue(b, v.Index(i), valueOptions{}, indent+1) + if err != nil { + return nil, err + } + } b = append(b, '\n') - - subCtx.indent++ + for j := 0; j < indent; j++ { + b = append(b, e.indentSymbol...) + } + } else { + for i := 0; i < v.Len(); i++ { + if i > 0 { + b = append(b, ", "...) + } + var err error + b, err = e.appendValue(b, v.Index(i), valueOptions{}, indent) + if err != nil { + return nil, err + } + } } + return append(b, ']'), nil +} - var err error - first := true - - for i := 0; i < v.Len(); i++ { - if first { - first = false - } else { - b = append(b, separator...) - } +// appendInlineTable encodes a map or a struct as an inline table. +func (e *encoderState) appendInlineTable(b []byte, v reflect.Value, indent int) ([]byte, error) { + entries, err := e.collectEntries(v) + if err != nil { + return nil, err + } - if multiline { - b = enc.indent(subCtx.indent, b) + b = append(b, '{') + for i, ent := range entries { + if i > 0 { + b = append(b, ", "...) } - - b, err = enc.encode(b, subCtx, v.Index(i)) + b = e.appendKey(b, ent.key) + b = append(b, " = "...) + // multiline strings are not allowed inside inline tables: they + // would break the single-line requirement. + opts := ent.options + opts.multiline = false + b, err = e.appendValue(b, ent.value, opts, indent) if err != nil { return nil, err } } + e.putEntries(entries) + return append(b, '}'), nil +} - if multiline { - b = append(b, '\n') - b = enc.indent(ctx.indent, b) +// appendString encodes a string, using a literal string when possible and a +// basic string otherwise. +func (e *encoderState) appendString(b []byte, s string) []byte { + if canBeLiteral(s) { + b = append(b, '\'') + b = append(b, s...) + return append(b, '\'') } + return appendBasicString(b, s) +} - b = append(b, ']') +// canBeLiteral returns true when the string can be represented as a TOML +// literal string: no control characters, no single quote, no newline. +func canBeLiteral(s string) bool { + for i := 0; i < len(s); i++ { + c := s[i] + if c == '\'' || c == 0x7f || c < 0x20 { + return false + } + } + return utf8.ValidString(s) +} - return b, nil +// appendBasicString encodes a string as a TOML basic (double-quoted) string. +func appendBasicString(b []byte, s string) []byte { + b = append(b, '"') + for i := 0; i < len(s); { + c := s[i] + switch { + case c == '"': + b = append(b, '\\', '"') + i++ + case c == '\\': + b = append(b, '\\', '\\') + i++ + case c == '\b': + b = append(b, '\\', 'b') + i++ + case c == '\f': + b = append(b, '\\', 'f') + i++ + case c == '\n': + b = append(b, '\\', 'n') + i++ + case c == '\r': + b = append(b, '\\', 'r') + i++ + case c == '\t': + b = append(b, '\\', 't') + i++ + case c < 0x20 || c == 0x7f: + b = append(b, fmt.Sprintf("\\u%04X", c)...) + i++ + default: + r, size := utf8.DecodeRuneInString(s[i:]) + if r == utf8.RuneError && size == 1 { + // Replace invalid bytes by the replacement character. + b = append(b, fmt.Sprintf("\\u%04X", c)...) + i++ + continue + } + b = append(b, s[i:i+size]...) + i += size + } + } + return append(b, '"') } -func (enc *Encoder) indent(level int, b []byte) []byte { - for i := 0; i < level; i++ { - b = append(b, enc.indentSymbol...) +// appendMultilineString encodes a string as a TOML multi-line basic string. +func appendMultilineString(b []byte, s string) []byte { + b = append(b, `"""`...) + b = append(b, '\n') + for i := 0; i < len(s); { + c := s[i] + switch { + case c == '"': + // Runs of three or more quotes must be escaped. + j := i + for j < len(s) && s[j] == '"' { + j++ + } + if j-i >= 3 { + for ; i < j; i++ { + b = append(b, '\\', '"') + } + } else { + b = append(b, s[i:j]...) + i = j + } + case c == '\\': + b = append(b, '\\', '\\') + i++ + case c == '\n': + b = append(b, '\n') + i++ + case c == '\b': + b = append(b, '\\', 'b') + i++ + case c == '\f': + b = append(b, '\\', 'f') + i++ + case c == '\r': + b = append(b, '\\', 'r') + i++ + case c == '\t': + b = append(b, '\t') + i++ + case c < 0x20 || c == 0x7f: + b = append(b, fmt.Sprintf("\\u%04X", c)...) + i++ + default: + r, size := utf8.DecodeRuneInString(s[i:]) + if r == utf8.RuneError && size == 1 { + b = append(b, fmt.Sprintf("\\u%04X", c)...) + i++ + continue + } + b = append(b, s[i:i+size]...) + i += size + } } + return append(b, `"""`...) +} - return b +func (e *encoderState) appendMultilineString(b []byte, s string) []byte { + return appendMultilineString(b, s) } diff --git a/vendor/github.com/pelletier/go-toml/v2/strict.go b/vendor/github.com/pelletier/go-toml/v2/strict.go index e9a4be2c..b58571df 100644 --- a/vendor/github.com/pelletier/go-toml/v2/strict.go +++ b/vendor/github.com/pelletier/go-toml/v2/strict.go @@ -11,69 +11,63 @@ type strict struct { // Tracks the current key being processed. key tracker.KeyTracker - missing []unstable.ParserError - - // Reference to the document for computing key ranges. - doc []byte -} - -func (s *strict) EnterTable(node *unstable.Node) { - if !s.Enabled { - return - } - - s.key.UpdateTable(node) + missing []decodeError } -func (s *strict) EnterArrayTable(node *unstable.Node) { - if !s.Enabled { - return - } - - s.key.UpdateArrayTable(node) +// decodeError is the information needed to materialize a DecodeError once the +// whole document is available. +type decodeError struct { + highlight unstable.Range + key Key + message string } -func (s *strict) EnterKeyValue(node *unstable.Node) { - if !s.Enabled { - return - } - - s.key.Push(node) +// Reset clears the state of the tracker so it can be reused for another +// document. +func (s *strict) Reset() { + s.key = tracker.KeyTracker{} + s.missing = s.missing[:0] } -func (s *strict) ExitKeyValue(node *unstable.Node) { +// EnterTable is called when a new table or array table expression starts +// being processed. +func (s *strict) EnterTable(node *unstable.Node) { if !s.Enabled { return } - - s.key.Pop(node) + s.key.UpdateTable(node) } +// MissingTable is called when a table is present in the document but has no +// corresponding field in the target. func (s *strict) MissingTable(node *unstable.Node) { if !s.Enabled { return } - - s.missing = append(s.missing, unstable.ParserError{ - Highlight: s.keyLocation(node), - Message: "missing table", - Key: s.key.Key(), + s.missing = append(s.missing, decodeError{ + highlight: keyLocation(node), + key: s.key.Key(), + message: "missing table", }) } +// MissingField is called when a key-value is present in the document but has +// no corresponding field in the target. func (s *strict) MissingField(node *unstable.Node) { if !s.Enabled { return } - - s.missing = append(s.missing, unstable.ParserError{ - Highlight: s.keyLocation(node), - Message: "unknown field", - Key: s.key.Key(), + s.key.Push(node) + s.missing = append(s.missing, decodeError{ + highlight: keyLocation(node), + key: s.key.Key(), + message: "unknown field", }) + s.key.Pop(node) } -func (s *strict) Error(doc []byte) error { +// Error returns the cumulated StrictMissingError for the document, or nil. +func (s *strict) Error(document []byte) error { if !s.Enabled || len(s.missing) == 0 { return nil } @@ -83,14 +77,16 @@ func (s *strict) Error(doc []byte) error { } for _, derr := range s.missing { - derr := derr - err.Errors = append(err.Errors, *wrapDecodeError(doc, &derr)) + highlight := document[derr.highlight.Offset : derr.highlight.Offset+derr.highlight.Length] + err.Errors = append(err.Errors, *newDecodeError(document, highlight, derr.key, derr.message)) } return err } -func (s *strict) keyLocation(node *unstable.Node) []byte { +// keyLocation returns the range of the document covering all the parts of +// the key of the given node. +func keyLocation(node *unstable.Node) unstable.Range { k := node.Key() hasOne := k.Next() @@ -98,17 +94,15 @@ func (s *strict) keyLocation(node *unstable.Node) []byte { panic("should not be called with empty key") } - // Get the range from the first key to the last key. - firstRaw := k.Node().Raw - lastRaw := firstRaw + start := k.Node().Raw + end := start for k.Next() { - lastRaw = k.Node().Raw + end = k.Node().Raw } - // Compute the slice from the document using the ranges. - start := firstRaw.Offset - end := lastRaw.Offset + lastRaw.Length - - return s.doc[start:end] + return unstable.Range{ + Offset: start.Offset, + Length: end.Offset + end.Length - start.Offset, + } } diff --git a/vendor/github.com/pelletier/go-toml/v2/types.go b/vendor/github.com/pelletier/go-toml/v2/types.go index 6d12fe58..420f1982 100644 --- a/vendor/github.com/pelletier/go-toml/v2/types.go +++ b/vendor/github.com/pelletier/go-toml/v2/types.go @@ -6,17 +6,18 @@ import ( "time" ) -// isZeroer is used to check if a type has a custom IsZero method. -// This allows custom types to define their own zero-value semantics. +// isZeroer is used to check whether a value is the zero value for its type, +// as defined by the type itself. type isZeroer interface { IsZero() bool } +var isZeroerType = reflect.TypeOf(new(isZeroer)).Elem() + var ( - timeType = reflect.TypeOf((*time.Time)(nil)).Elem() - textMarshalerType = reflect.TypeOf((*encoding.TextMarshaler)(nil)).Elem() - textUnmarshalerType = reflect.TypeOf((*encoding.TextUnmarshaler)(nil)).Elem() - isZeroerType = reflect.TypeOf((*isZeroer)(nil)).Elem() + timeType = reflect.TypeOf(time.Time{}) + textMarshalerType = reflect.TypeOf(new(encoding.TextMarshaler)).Elem() + textUnmarshalerType = reflect.TypeOf(new(encoding.TextUnmarshaler)).Elem() mapStringInterfaceType = reflect.TypeOf(map[string]interface{}(nil)) sliceInterfaceType = reflect.TypeOf([]interface{}(nil)) stringType = reflect.TypeOf("") diff --git a/vendor/github.com/pelletier/go-toml/v2/unmarshaler.go b/vendor/github.com/pelletier/go-toml/v2/unmarshaler.go index e7db8128..d497669e 100644 --- a/vendor/github.com/pelletier/go-toml/v2/unmarshaler.go +++ b/vendor/github.com/pelletier/go-toml/v2/unmarshaler.go @@ -9,20 +9,70 @@ import ( "reflect" "strconv" "strings" - "sync/atomic" + "sync" "time" "github.com/pelletier/go-toml/v2/internal/tracker" "github.com/pelletier/go-toml/v2/unstable" ) +// decoderPool recycles decoders (and their internal buffers: parser arena, +// seen-tracker entries, scratch buffers) across calls to Unmarshal and +// Decode. +var decoderPool = sync.Pool{ + New: func() interface{} { return &decoder{} }, +} + +func getDecoder(strictMode, unmarshalerInterface bool) *decoder { + d := decoderPool.Get().(*decoder) + d.reset() + d.strict.Enabled = strictMode + d.unmarshalerInterface = unmarshalerInterface + return d +} + +func putDecoder(d *decoder) { + decoderPool.Put(d) +} + +// reset clears the per-document state of the decoder, keeping the allocated +// buffers for reuse. +func (d *decoder) reset() { + d.seen.Reset() + d.tableKey = d.tableKey[:0] + d.skipUntilTable = false + d.path = d.path[:0] + d.captures = d.captures[:0] + d.captureIdx = -1 + d.segIdx = d.segIdx[:0] + // Reuse the array-table counter slots across documents instead of + // deleting them: a zeroed slot is indistinguishable from an absent one, + // and keeping it alive means setArrayCount does not have to allocate a new + // *int every time the same path reappears. A safety valve bounds the table + // for adversarial inputs that introduce unboundedly many distinct paths. + if len(d.arrayCounts) > 1<<14 { + d.arrayCounts = nil + } else { + for _, p := range d.arrayCounts { + *p = 0 + } + } + d.tableTarget = reflect.Value{} + d.tableTargetValid = false + d.tableFlush = d.tableFlush[:0] + d.tableParentSlot = slotWriter{} + d.keyParts = d.keyParts[:0] + d.strict.Reset() +} + // Unmarshal deserializes a TOML document into a Go value. // // It is a shortcut for Decoder.Decode() with the default options. func Unmarshal(data []byte, v interface{}) error { - d := decoder{} - d.p.Reset(data) - return d.FromParser(v) + d := getDecoder(false, false) + err := d.unmarshal(data, v) + putDecoder(d) + return err } // Decoder reads and decode a TOML document from an input stream. @@ -124,1335 +174,2161 @@ func (d *Decoder) Decode(v interface{}) error { return fmt.Errorf("toml: %w", err) } - dec := decoder{ - strict: strict{ - Enabled: d.strict, - doc: b, - }, - unmarshalerInterface: d.unmarshalerInterface, + dec := getDecoder(d.strict, d.unmarshalerInterface) + err = dec.unmarshal(b, v) + putDecoder(dec) + return err +} + +// pathPart is one part of the key path leading to a value. Parts that come +// from the current table header only carry a name; parts that come from the +// key of the current key-value expression also carry the AST node, and their +// name is materialized lazily to avoid allocations. +type pathPart struct { + name string + node *unstable.Node +} + +// bytes returns the raw bytes of the key part. +func (p *pathPart) bytes() []byte { + if p.node != nil { + return p.node.Data + } + return []byte(p.name) +} + +// str returns the key part as a string, possibly allocating. +func (p *pathPart) str() string { + if p.node != nil { + return string(p.node.Data) } - dec.p.Reset(b) + return p.name +} - return dec.FromParser(v) +// rawCapture accumulates the raw bytes fed to a type implementing +// unstable.Unmarshaler for a table target. The target is identified by the +// parts of its key and the array-table indexes in effect when the capture +// was created, so that it can be located again once the whole document has +// been processed (the address of the target may change as slices grow). +type rawCapture struct { + names []string + // indexes[i] is the index to use when reaching a slice or array right + // before consuming names[i]. indexes[len(names)] is the index of the + // element when the target is an element of an array table. -1 when not + // relevant. + indexes []int + buf []byte } type decoder struct { - // Which parser instance in use for this decoding session. p unstable.Parser - // Flag indicating that the current expression is stashed. - // If set to true, calling nextExpr will not actually pull a new expression - // but turn off the flag instead. - stashedExpr bool - - // Skip expressions until a table is found. This is set to true when a - // table could not be created (missing field in map), so all KV expressions - // need to be skipped. - skipUntilTable bool - - // Flag indicating that the current array/slice table should be cleared because - // it is the first encounter of an array table. - clearArrayTable bool + // strict mode + strict strict - // Tracks position in Go arrays. - // This is used when decoding [[array tables]] into Go arrays. Given array - // tables are separate TOML expression, we need to keep track of where we - // are at in the Go array, as we can't just introspect its size. - arrayIndexes map[reflect.Value]int + // toggles unmarshaler interface + unmarshalerInterface bool - // Tracks keys that have been seen, with which type. + // tracks the duplicate and type consistency of the keys seen tracker.SeenTracker - // Strict mode - strict strict - - // Flag that enables/disables unmarshaler interface. - unmarshalerInterface bool + // path of the current table header, as copied strings + tableKey []string - // Current context for the error. - errorContext *errorContext -} + // true when the expressions under the current table header cannot be + // stored anywhere and should be skipped + skipUntilTable bool -type errorContext struct { - Struct reflect.Type - Field []int + // scratch buffer for the key path of the current expression + path []pathPart + + // raw captures for the unmarshaler interface, in order of first + // appearance. captureIdx is the index of the capture the current table + // belongs to, or -1. + captures []rawCapture + captureIdx int + + // segIdx[i] records the array element index used when traversing a + // slice or array right before consuming the i-th part of the current + // table key. Reset for each table expression. + segIdx []int + + // arrayCounts tracks the number of elements appended to fixed-size + // arrays used as array tables, keyed by the NUL-joined key parts. + // Values are pointer slots so that updating an existing path does not + // allocate a new key string. + arrayCounts map[string]*int + + // Cached target of the current table, so that key-values do not need to + // walk the document structure from the root for every expression. + // tableFlush holds the write-backs to perform when leaving the table + // (for targets reached through map values, which are copies). + // tableParentSlot stores a replacement of the target itself (e.g. a nil + // map that was allocated) into its parent. + tableTarget reflect.Value + tableTargetValid bool + tableFlush []flushOp + tableParentSlot slotWriter + + // strKey is a reusable string value used as map key, so that map + // operations with string keys do not need to allocate a boxed key for + // every access. It must be refreshed with stringMapKey immediately + // before each use: any recursive call may overwrite it. + strKey reflect.Value + + // interned de-duplicates key strings: documents repeat the same keys + // over and over, and the table survives pooling, so repeated decodes + // of similar documents stop allocating key strings altogether. + interned map[string]string + + // pathScratch is the buffer used by joinPath. + pathScratch []byte + + // keyParts is the reusable buffer holding the decoded parts of the key of + // the current expression in the fused generic decode path. + keyParts [][]byte } -func (d *decoder) typeMismatchError(toml string, target reflect.Type) error { - return fmt.Errorf("toml: %s", d.typeMismatchString(toml, target)) +// slotWriter remembers how to store a value at some location of the target +// structure. Implemented as a struct instead of a closure to avoid +// allocations. +type slotWriter struct { + kind uint8 // 0: none, 1: slot.Set, 2: m.SetMapIndex(k, ...), 3: m.SetMapIndex(string key ks, ...) + slot reflect.Value + m reflect.Value + k reflect.Value + ks string } -func (d *decoder) typeMismatchString(toml string, target reflect.Type) string { - if d.errorContext != nil && d.errorContext.Struct != nil { - ctx := d.errorContext - f := ctx.Struct.FieldByIndex(ctx.Field) - return fmt.Sprintf("cannot decode TOML %s into struct field %s.%s of type %s", toml, ctx.Struct, f.Name, f.Type) +func (d *decoder) storeSlot(s *slotWriter, nv reflect.Value) { + switch s.kind { + case 1: + if s.slot.CanSet() { + s.slot.Set(nv) + } + case 2: + s.m.SetMapIndex(s.k, nv) + case 3: + s.m.SetMapIndex(d.stringMapKey(s.ks), nv) } - return fmt.Sprintf("cannot decode TOML %s into a Go value of type %s", toml, target) } -func (d *decoder) expr() *unstable.Node { - return d.p.Expression() +// flushOp stores val using w when the table is flushed. +type flushOp struct { + w slotWriter + val reflect.Value } -func (d *decoder) nextExpr() bool { - if d.stashedExpr { - d.stashedExpr = false - return true +// flushTable performs the pending write-backs of the cached table target, in +// reverse order so that inner copies land before their parents are stored. +func (d *decoder) flushTable() { + for i := len(d.tableFlush) - 1; i >= 0; i-- { + d.storeSlot(&d.tableFlush[i].w, d.tableFlush[i].val) } - return d.p.NextExpression() + d.tableFlush = d.tableFlush[:0] + d.tableTargetValid = false + d.tableParentSlot = slotWriter{} + d.tableTarget = reflect.Value{} } -func (d *decoder) stashExpr() { - d.stashedExpr = true +// intern returns the string corresponding to the given bytes, reusing a +// previous allocation when the same key has been seen before. +func (d *decoder) intern(b []byte) string { + if s, ok := d.interned[string(b)]; ok { // does not allocate + return s + } + if d.interned == nil { + d.interned = make(map[string]string, 64) + } else if len(d.interned) >= 1<<14 { + // Safety valve for adversarial inputs: do not let the table grow + // without bounds. + for k := range d.interned { + delete(d.interned, k) + } + } + s := string(b) + d.interned[s] = s + return s } -func (d *decoder) arrayIndex(shouldAppend bool, v reflect.Value) int { - if d.arrayIndexes == nil { - d.arrayIndexes = make(map[reflect.Value]int, 1) +// partString returns the name of a path part, interning it when it comes +// from the document. +func (d *decoder) partString(p *pathPart) string { + if p.node != nil { + return d.intern(p.node.Data) } + return p.name +} - idx, ok := d.arrayIndexes[v] - - if !ok { - d.arrayIndexes[v] = 0 - } else if shouldAppend { - idx++ - d.arrayIndexes[v] = idx +// stringMapKey returns a reflect.Value holding the given string, reusing the +// same allocation every time. The result must be used (the map operation +// performed) before any recursive call, which may overwrite the buffer. +func (d *decoder) stringMapKey(s string) reflect.Value { + if !d.strKey.IsValid() { + d.strKey = reflect.New(stringType).Elem() } - - return idx + d.strKey.SetString(s) + return d.strKey } -func (d *decoder) FromParser(v interface{}) error { - r := reflect.ValueOf(v) - if r.Kind() != reflect.Ptr { - return fmt.Errorf("toml: decoding can only be performed into a pointer, not %s", r.Kind()) +// joinPath builds the NUL-joined representation of a key path in the +// decoder's scratch buffer. The result is only valid until the next call. +func (d *decoder) joinPath(parts []string) []byte { + d.pathScratch = d.pathScratch[:0] + for i, p := range parts { + if i > 0 { + d.pathScratch = append(d.pathScratch, 0) + } + d.pathScratch = append(d.pathScratch, p...) } + return d.pathScratch +} - if r.IsNil() { - return errors.New("toml: decoding pointer target cannot be nil") +// arrayCount returns the number of elements appended so far to the array +// table at the given path. +func (d *decoder) arrayCount(key []byte) int { + if d.arrayCounts == nil { + return 0 } - - r = r.Elem() - if r.Kind() == reflect.Interface && r.IsNil() { - newMap := map[string]interface{}{} - r.Set(reflect.ValueOf(newMap)) + if p := d.arrayCounts[string(key)]; p != nil { // does not allocate + return *p } + return 0 +} - err := d.fromParser(r) - if err == nil { - return d.strict.Error(d.p.Data()) +func (d *decoder) setArrayCount(key []byte, n int) { + if d.arrayCounts == nil { + d.arrayCounts = map[string]*int{} } - - var e *unstable.ParserError - if errors.As(err, &e) { - return wrapDecodeError(d.p.Data(), e) + if p := d.arrayCounts[string(key)]; p != nil { // does not allocate + *p = n + return } - - return err + v := n + d.arrayCounts[string(key)] = &v } -func (d *decoder) fromParser(root reflect.Value) error { - for d.nextExpr() { - err := d.handleRootExpression(d.expr(), root) - if err != nil { - return err +// resetChildArrayCounts forgets the counts of all the array tables under +// the given path, so that a new element starts fresh. +func (d *decoder) resetChildArrayCounts(key []byte) { + if len(d.arrayCounts) == 0 { + return + } + for k, p := range d.arrayCounts { + // Prefix match without building the prefix string: same bytes as + // key, followed by the NUL separator. + if len(k) > len(key) && k[len(key)] == 0 && k[:len(key)] == string(key) { + // Zero instead of delete: the next element of the parent table + // will reuse the slot without allocating a new key. + *p = 0 } } - - return d.p.Error() } -/* -Rules for the unmarshal code: - -- The stack is used to keep track of which values need to be set where. -- handle* functions <=> switch on a given unstable.Kind. -- unmarshalX* functions need to unmarshal a node of kind X. -- An "object" is either a struct or a map. -*/ +func (d *decoder) typeMismatchError(toml string, target reflect.Type, highlight []byte) error { + return &typeMismatchError{ + toml: toml, + target: target, + highlight: highlight, + } +} -func (d *decoder) handleRootExpression(expr *unstable.Node, v reflect.Value) error { - var x reflect.Value - var err error - var first bool // used for to clear array tables on first use +type typeMismatchError struct { + toml string + target reflect.Type + highlight []byte + // key is the TOML key being processed when the mismatch occurred. It is + // populated lazily as the error propagates back up to the key-value + // handler (see contextualizeError). + key Key +} - if !d.skipUntilTable || expr.Kind != unstable.KeyValue { - first, err = d.seen.CheckExpression(expr) - if err != nil { - return err - } - } +func (e *typeMismatchError) Error() string { + return fmt.Sprintf("cannot decode TOML %s into %s", e.toml, e.target) +} - switch expr.Kind { - case unstable.KeyValue: - if d.skipUntilTable { - return nil +// contextualizeError attaches the TOML key currently being processed to errors +// raised while decoding a key-value expression, so that DecodeError.Key() +// reports the offending key (e.g. on type mismatch errors). The current key is +// reconstructed from d.path; when the table target is cached, d.path holds only +// the key-value parts, so the table key prefix is prepended. This only runs on +// the error path and adds no cost to successful decodes. +func (d *decoder) contextualizeError(err error, withTableKey bool) error { + var mm *typeMismatchError + if errors.As(err, &mm) { + if mm.key == nil { + mm.key = d.currentKey(withTableKey) } - x, err = d.handleKeyValue(expr, v) - case unstable.Table: - d.skipUntilTable = false - d.strict.EnterTable(expr) - x, err = d.handleTable(expr.Key(), v) - case unstable.ArrayTable: - d.skipUntilTable = false - d.strict.EnterArrayTable(expr) - d.clearArrayTable = first - x, err = d.handleArrayTable(expr.Key(), v) - default: - panic(fmt.Errorf("parser should not permit expression of kind %s at document root", expr.Kind)) + return err } - - if d.skipUntilTable { - if expr.Kind == unstable.Table || expr.Kind == unstable.ArrayTable { - d.strict.MissingTable(expr) + var perr *unstable.ParserError + if errors.As(err, &perr) { + if perr.Key == nil { + perr.Key = d.currentKey(withTableKey) } - } else if err == nil && x.IsValid() { - v.Set(x) } - return err } -func (d *decoder) handleArrayTable(key unstable.Iterator, v reflect.Value) (reflect.Value, error) { - if key.Next() { - return d.handleArrayTablePart(key, v) +// currentKey reconstructs the full TOML key being processed from the decoder's +// path. When withTableKey is true, d.path contains only the key-value parts +// (the table target is cached) and the table key is prepended. +func (d *decoder) currentKey(withTableKey bool) Key { + n := len(d.path) + if withTableKey { + n += len(d.tableKey) } - return d.handleKeyValues(v) -} - -func (d *decoder) handleArrayTableCollectionLast(key unstable.Iterator, v reflect.Value) (reflect.Value, error) { - switch v.Kind() { - case reflect.Interface: - elem := v.Elem() - if !elem.IsValid() { - elem = reflect.New(sliceInterfaceType).Elem() - elem.Set(reflect.MakeSlice(sliceInterfaceType, 0, 16)) - } else if elem.Kind() == reflect.Slice { - if elem.Type() != sliceInterfaceType { - elem = reflect.New(sliceInterfaceType).Elem() - elem.Set(reflect.MakeSlice(sliceInterfaceType, 0, 16)) - } else if !elem.CanSet() { - nelem := reflect.New(sliceInterfaceType).Elem() - nelem.Set(reflect.MakeSlice(sliceInterfaceType, elem.Len(), elem.Cap())) - reflect.Copy(nelem, elem) - elem = nelem - } - if d.clearArrayTable && elem.Len() > 0 { - elem.SetLen(0) - d.clearArrayTable = false - } - } - return d.handleArrayTableCollectionLast(key, elem) - case reflect.Ptr: - elem := v.Elem() - if !elem.IsValid() { - ptr := reflect.New(v.Type().Elem()) - v.Set(ptr) - elem = ptr.Elem() - } - - elem, err := d.handleArrayTableCollectionLast(key, elem) - if err != nil { - return reflect.Value{}, err - } - v.Elem().Set(elem) - - return v, nil - case reflect.Slice: - if d.clearArrayTable && v.Len() > 0 { - v.SetLen(0) - d.clearArrayTable = false - } - elemType := v.Type().Elem() - var elem reflect.Value - if elemType.Kind() == reflect.Interface { - elem = makeMapStringInterface() - } else { - elem = reflect.New(elemType).Elem() - } - elem2, err := d.handleArrayTable(key, elem) - if err != nil { - return reflect.Value{}, err - } - if elem2.IsValid() { - elem = elem2 - } - return reflect.Append(v, elem), nil - case reflect.Array: - idx := d.arrayIndex(true, v) - if idx >= v.Len() { - return v, fmt.Errorf("%w at position %d", d.typeMismatchError("array table", v.Type()), idx) - } - elem := v.Index(idx) - _, err := d.handleArrayTable(key, elem) - return v, err - default: - return reflect.Value{}, d.typeMismatchError("array table", v.Type()) + key := make(Key, 0, n) + if withTableKey { + key = append(key, d.tableKey...) } + for i := range d.path { + key = append(key, d.path[i].str()) + } + return key } -// When parsing an array table expression, each part of the key needs to be -// evaluated like a normal key, but if it returns a collection, it also needs to -// point to the last element of the collection. Unless it is the last part of -// the key, then it needs to create a new element at the end. -func (d *decoder) handleArrayTableCollection(key unstable.Iterator, v reflect.Value) (reflect.Value, error) { - if key.IsLast() { - return d.handleArrayTableCollectionLast(key, v) +func (d *decoder) unmarshal(data []byte, v interface{}) error { + r := reflect.ValueOf(v) + if r.Kind() != reflect.Ptr { + return fmt.Errorf("toml: decoding can only be performed into a pointer, not %s", r.Kind()) + } + if r.IsNil() { + return errors.New("toml: decoding pointer target cannot be nil") } - switch v.Kind() { - case reflect.Ptr: - elem := v.Elem() - if !elem.IsValid() { - ptr := reflect.New(v.Type().Elem()) - v.Set(ptr) - elem = ptr.Elem() - } - - elem, err := d.handleArrayTableCollection(key, elem) - if err != nil { - return reflect.Value{}, err - } - if elem.IsValid() { - v.Elem().Set(elem) - } - - return v, nil - case reflect.Slice: - // Create a new element when the slice is empty; otherwise operate on - // the last element. - var ( - elem reflect.Value - created bool - ) - if v.Len() == 0 { - created = true - elemType := v.Type().Elem() - if elemType.Kind() == reflect.Interface { - elem = makeMapStringInterface() - } else { - elem = reflect.New(elemType).Elem() - } - } else { - elem = v.Index(v.Len() - 1) - } + root := r.Elem() - x, err := d.handleArrayTable(key, elem) - if err != nil || d.skipUntilTable { - return reflect.Value{}, err - } - if x.IsValid() { - if created { - elem = x - } else { - elem.Set(x) - } - } + d.captureIdx = -1 + d.p.Reset(data) - if created { - return reflect.Append(v, elem), nil - } - return v, err - case reflect.Array: - idx := d.arrayIndex(false, v) - if idx >= v.Len() { - return v, fmt.Errorf("%w at position %d", d.typeMismatchError("array table", v.Type()), idx) + // Fully generic targets (interface{} or map[string]interface{}) are + // decoded straight into native Go maps and slices, with no reflection on + // the document structure at all. This covers the common "decode arbitrary + // TOML into a map" case, including every standard benchmark dataset. + if !d.unmarshalerInterface { + if k := root.Kind(); k == reflect.Interface || (k == reflect.Map && root.Type() == mapStringInterfaceType) { + return d.unmarshalFused(root, data) } - elem := v.Index(idx) - _, err := d.handleArrayTable(key, elem) - return v, err - default: - return d.handleArrayTable(key, v) } -} - -func (d *decoder) handleKeyPart(key unstable.Iterator, v reflect.Value, nextFn handlerFn, makeFn valueMakerFn) (reflect.Value, error) { - var rv reflect.Value - - // First, dispatch over v to make sure it is a valid object. - // There is no guarantee over what it could be. - switch v.Kind() { - case reflect.Ptr: - elem := v.Elem() - if !elem.IsValid() { - v.Set(reflect.New(v.Type().Elem())) - } - elem = v.Elem() - return d.handleKeyPart(key, elem, nextFn, makeFn) - case reflect.Map: - vt := v.Type() - // Create the key for the map element. Convert to key type. - mk, err := d.keyFromData(vt.Key(), key.Node().Data) + for d.p.NextExpression() { + err := d.handleRootExpression(d.p.Expression(), root) if err != nil { - return reflect.Value{}, err + return d.wrapError(data, err) } - - // If the map does not exist, create it. - if v.IsNil() { - vt := v.Type() - v = reflect.MakeMap(vt) - rv = v + } + if err := d.p.Error(); err != nil { + var perr *unstable.ParserError + if errors.As(err, &perr) { + return wrapDecodeError(data, perr) } + return err + } - mv := v.MapIndex(mk) - set := false - switch { - case !mv.IsValid(): - // If there is no value in the map, create a new one according to - // the map type. If the element type is interface, create either a - // map[string]interface{} or a []interface{} depending on whether - // this is the last part of the array table key. - - t := vt.Elem() - if t.Kind() == reflect.Interface { - mv = makeFn() - } else { - mv = reflect.New(t).Elem() - } - set = true - case mv.Kind() == reflect.Interface: - mv = mv.Elem() - if !mv.IsValid() { - mv = makeFn() - } - set = true - case !mv.CanAddr(): - vt := v.Type() - t := vt.Elem() - oldmv := mv - mv = reflect.New(t).Elem() - mv.Set(oldmv) - set = true - } + d.flushTable() - x, err := nextFn(key, mv) + // Deliver the accumulated raw documents to the unmarshaler-interface + // targets. + for i := range d.captures { + nv, err := d.resolveCapture(root, &d.captures[i], 0, false) if err != nil { - return reflect.Value{}, err - } - - if x.IsValid() { - mv = x - set = true - } - - if set { - v.SetMapIndex(mk, mv) - } - case reflect.Struct: - path, found := structFieldPath(v, string(key.Node().Data)) - if !found { - d.skipUntilTable = true - return reflect.Value{}, nil + return err } - - if d.errorContext == nil { - d.errorContext = new(errorContext) + if nv.IsValid() { + root.Set(nv) } - t := v.Type() - d.errorContext.Struct = t - d.errorContext.Field = path + } - f := fieldByIndex(v, path) - x, err := nextFn(key, f) - if err != nil || d.skipUntilTable { - return reflect.Value{}, err - } - if x.IsValid() { - f.Set(x) + // An empty document into a generic target still initializes it. + switch root.Kind() { + case reflect.Map: + if root.IsNil() { + root.Set(reflect.MakeMap(root.Type())) } - d.errorContext.Field = nil - d.errorContext.Struct = nil case reflect.Interface: - if v.Elem().IsValid() { - v = v.Elem() - } else { - v = makeMapStringInterface() - } - - x, err := d.handleKeyPart(key, v, nextFn, makeFn) - if err != nil { - return reflect.Value{}, err - } - if x.IsValid() { - v = x + if root.IsNil() { + root.Set(reflect.ValueOf(map[string]interface{}{})) } - rv = v default: - panic(fmt.Errorf("unhandled part: %s", v.Kind())) } - return rv, nil + return d.strict.Error(data) } -// HandleArrayTablePart navigates the Go structure v using the key v. It is -// only used for the prefix (non-last) parts of an array-table. When -// encountering a collection, it should go to the last element. -func (d *decoder) handleArrayTablePart(key unstable.Iterator, v reflect.Value) (reflect.Value, error) { - var makeFn valueMakerFn - if key.IsLast() { - makeFn = makeSliceInterface - } else { - makeFn = makeMapStringInterface +// setAnyKey assigns the value of a key-value into the native map m, following +// the (possibly dotted) key and creating intermediate maps as needed. +func (d *decoder) setAnyKey(m map[string]interface{}, key unstable.Iterator, value *unstable.Node) error { + cur := m + for key.Next() { + name := d.intern(key.Node().Data) + if key.IsLast() { + av, err := d.decodeAny(value) + if err != nil { + return err + } + cur[name] = av + return nil + } + cur = d.anyChildTable(cur, name) } - return d.handleKeyPart(key, v, d.handleArrayTableCollection, makeFn) + return nil } -// HandleTable returns a reference when it has checked the next expression but -// cannot handle it. -func (d *decoder) handleTable(key unstable.Iterator, v reflect.Value) (reflect.Value, error) { - if v.Kind() == reflect.Slice { - // For non-empty slices, work with the last element - if v.Len() > 0 { - elem := v.Index(v.Len() - 1) - x, err := d.handleTable(key, elem) - if err != nil { - return reflect.Value{}, err - } - if x.IsValid() { - elem.Set(x) - } - return reflect.Value{}, nil - } - // Empty slice - check if it implements Unmarshaler (e.g., RawMessage) - // and we're at the end of the key path - if d.unmarshalerInterface && !key.Next() { - if v.CanAddr() && v.Addr().CanInterface() { - if outi, ok := v.Addr().Interface().(unstable.Unmarshaler); ok { - return d.handleKeyValuesUnmarshaler(outi) - } +// anyChildTable returns the child table at name within cur, creating it if +// absent and descending into the current (last) element when an array table +// occupies the slot. A non-container in the slot cannot occur for a document +// the seen-tracker has accepted. +func (d *decoder) anyChildTable(cur map[string]interface{}, name string) map[string]interface{} { + switch v := cur[name].(type) { + case map[string]interface{}: + return v + case []interface{}: + if len(v) > 0 { + if last, ok := v[len(v)-1].(map[string]interface{}); ok { + return last } } - return reflect.Value{}, unstable.NewParserError(key.Node().Data, "cannot store a table in a slice") } - if key.Next() { - // Still scoping the key - return d.handleTablePart(key, v) + nm := map[string]interface{}{} + cur[name] = nm + return nm +} + +// wrapError gives document context to errors generated while processing an +// expression. +func (d *decoder) wrapError(data []byte, err error) error { + var perr *unstable.ParserError + if errors.As(err, &perr) { + return wrapDecodeError(data, perr) + } + var mm *typeMismatchError + if errors.As(err, &mm) { + return wrapDecodeError(data, &unstable.ParserError{ + Highlight: mm.highlight, + Message: mm.Error(), + Key: mm.key, + }) } - // Done scoping the key. - // Now handle all the key-value expressions in this table. - return d.handleKeyValues(v) + return err } -// Handle root expressions until the end of the document or the next -// non-key-value. -func (d *decoder) handleKeyValues(v reflect.Value) (reflect.Value, error) { - // Check if target implements Unmarshaler before processing key-values. - // This allows types to handle entire tables themselves. - if d.unmarshalerInterface { - vv := v - for vv.Kind() == reflect.Ptr { - if vv.IsNil() { - vv.Set(reflect.New(vv.Type().Elem())) - } - vv = vv.Elem() - } - if vv.CanAddr() && vv.Addr().CanInterface() { - if outi, ok := vv.Addr().Interface().(unstable.Unmarshaler); ok { - // Collect all key-value expressions for this table - return d.handleKeyValuesUnmarshaler(outi) - } - } +// wrapSeenError turns an error returned by SeenTracker.CheckExpression into a +// ParserError carrying the position and key of the offending expression, so +// that redefinition and duplicate-key errors are reported as a DecodeError +// with context (see issue #668). +// +// The highlight spans the expression's key. Unlike Node.Raw, key nodes always +// carry a Raw range, so this works for tables and array tables too (whose own +// Raw range is not set by the parser). For a duplicate detected inside an +// inline table, node is the enclosing key-value expression, so the error +// points at that expression's key. +func (d *decoder) wrapSeenError(node *unstable.Node, err error) error { + if err == nil { + return nil } - var rv reflect.Value - for d.nextExpr() { - expr := d.expr() - if expr.Kind != unstable.KeyValue { - // Stash the expression so that fromParser can just loop and use - // the right handler. - // We could just recurse ourselves here, but at least this gives a - // chance to pop the stack a bit. - d.stashExpr() - break + var key Key + var start, end unstable.Range + it := node.Key() + for it.Next() { + n := it.Node() + key = append(key, string(n.Data)) + if len(key) == 1 { + start = n.Raw } + end = n.Raw + } - _, err := d.seen.CheckExpression(expr) - if err != nil { - return reflect.Value{}, err - } + var highlight []byte + if len(key) > 0 { + highlight = d.p.Raw(unstable.Range{ + Offset: start.Offset, + Length: end.Offset + end.Length - start.Offset, + }) + } - x, err := d.handleKeyValue(expr, v) - if err != nil { - return reflect.Value{}, err - } - if x.IsValid() { - v = x - rv = x - } + return &unstable.ParserError{ + Highlight: highlight, + Message: strings.TrimPrefix(err.Error(), "toml: "), + Key: key, } - return rv, nil } -// handleKeyValuesUnmarshaler collects all key-value expressions for a table -// and passes them to the Unmarshaler as raw TOML bytes. -func (d *decoder) handleKeyValuesUnmarshaler(u unstable.Unmarshaler) (reflect.Value, error) { - // Collect raw bytes from all key-value expressions for this table. - // We use the Raw field on each KeyValue expression to preserve the - // original formatting (whitespace, quoting style, etc.) from the document. - var buf []byte - - for d.nextExpr() { - expr := d.expr() - if expr.Kind != unstable.KeyValue { - d.stashExpr() - break - } +func (d *decoder) handleRootExpression(expr *unstable.Node, root reflect.Value) error { + first, err := d.seen.CheckExpression(expr) + if err != nil { + return d.wrapSeenError(expr, err) + } - _, err := d.seen.CheckExpression(expr) - if err != nil { - return reflect.Value{}, err + switch expr.Kind { + case unstable.KeyValue: + if d.skipUntilTable { + return nil } - - // Use the raw bytes from the original document to preserve formatting - if expr.Raw.Length > 0 { - raw := d.p.Raw(expr.Raw) - buf = append(buf, raw...) + if d.captureIdx >= 0 { + d.captureKeyValue(expr) + return nil } - buf = append(buf, '\n') + return d.handleKeyValueExpression(expr, root) + case unstable.Table: + d.flushTable() + d.skipUntilTable = false + d.captureIdx = -1 + d.strict.EnterTable(expr) + return d.handleTableExpression(expr, root, false, first) + case unstable.ArrayTable: + d.flushTable() + d.skipUntilTable = false + d.captureIdx = -1 + d.strict.EnterTable(expr) + return d.handleTableExpression(expr, root, true, first) + default: + return unstable.NewParserError(expr.Data, "unsupported expression kind %s", expr.Kind) } +} - if err := u.UnmarshalTOML(buf); err != nil { - return reflect.Value{}, err +// updateTableKey copies the parts of the key of a table expression into +// tableKey. +func (d *decoder) updateTableKey(expr *unstable.Node) { + d.tableKey = d.tableKey[:0] + it := expr.Key() + for it.Next() { + d.tableKey = append(d.tableKey, d.intern(it.Node().Data)) } - - return reflect.Value{}, nil } -type ( - handlerFn func(key unstable.Iterator, v reflect.Value) (reflect.Value, error) - valueMakerFn func() reflect.Value -) +func (d *decoder) handleTableExpression(expr *unstable.Node, root reflect.Value, isArrayTable bool, first bool) error { + d.updateTableKey(expr) -func makeMapStringInterface() reflect.Value { - return reflect.MakeMap(mapStringInterfaceType) -} + // Check whether this table belongs to an exisiting raw capture (split + // tables, or children of a table assigned to an Unmarshaler). + if d.unmarshalerInterface { + if d.resumeCapture(expr) { + return nil + } + } -func makeSliceInterface() reflect.Value { - return reflect.MakeSlice(sliceInterfaceType, 0, 16) -} + // Reset the per-segment array indexes. + d.segIdx = d.segIdx[:0] + for i := 0; i <= len(d.tableKey); i++ { + d.segIdx = append(d.segIdx, -1) + } -func (d *decoder) handleTablePart(key unstable.Iterator, v reflect.Value) (reflect.Value, error) { - return d.handleKeyPart(key, v, d.handleTable, makeMapStringInterface) + return d.walkTable(root, expr, isArrayTable, first) } -func (d *decoder) tryTextUnmarshaler(node *unstable.Node, v reflect.Value) (bool, error) { - // Special case for time, because we allow to unmarshal to it from - // different kind of AST nodes. - if v.Type() == timeType { - return false, nil +// newContainerElem returns a fresh element for a slice of the given element +// type. Plain interface elements start out as an empty table. +func newContainerElem(et reflect.Type) reflect.Value { + if et == interfaceType { + return reflect.ValueOf(map[string]interface{}{}) } + return reflect.New(et).Elem() +} - if v.CanAddr() && v.Addr().Type().Implements(textUnmarshalerType) { - err := v.Addr().Interface().(encoding.TextUnmarshaler).UnmarshalText(node.Data) - if err != nil { - return false, unstable.NewParserError(d.p.Raw(node.Raw), "%w", err) +// walkTable processes a [table] or [[array table]] header: it creates the +// intermediate containers, appends array-table elements, applies the strict +// policy, registers unmarshaler-interface captures, and caches the target +// container so that the key-values that follow are stored directly. +// +// Map values are not addressable: when one needs in-place mutations (struct +// or array values), a copy is made and registered to be stored back when the +// table changes (see flushTable). Maps and slices are references and are +// traversed without copies. +func (d *decoder) walkTable(root reflect.Value, expr *unstable.Node, isArrayTable bool, first bool) error { + v := root + pf := slotWriter{kind: 1, slot: root} + idx := 0 + +walk: + for { + // Dereference pointers in place. + for v.Kind() == reflect.Ptr { + if v.IsNil() { + v.Set(reflect.New(v.Type().Elem())) + } + elem := v.Elem() + pf = slotWriter{kind: 1, slot: elem} + v = elem + } + + // Tables assigned to a type implementing the unmarshaler interface + // are captured as raw bytes, delivered once the document is read. + if d.unmarshalerInterface && hasUnmarshaler(v) { + d.startCapture(idx, expr) + return nil + } + + if idx >= len(d.tableKey) { + break walk } - return true, nil + name := d.tableKey[idx] + + switch v.Kind() { + case reflect.Interface: + if !v.IsNil() { + c := v.Elem() + if k := c.Kind(); k == reflect.Map || k == reflect.Slice { + // Reference types: mutations are visible through the + // existing interface value. + v = c + continue + } + } + // Anything else is replaced by a fresh generic map. + if !mapStringInterfaceType.AssignableTo(v.Type()) { + return unstable.NewParserError(d.p.Raw(expr.Raw), "cannot store a table in a %s", v.Type()) + } + fresh := reflect.ValueOf(map[string]interface{}{}) + d.storeSlot(&pf, fresh) + v = fresh + case reflect.Slice: + if v.Len() == 0 { + // Implicit creation of the first element: the array table + // that would create it has not been seen yet (issue 995). + if v.IsNil() { + v = reflect.MakeSlice(v.Type(), 0, 4) + } + v = reflect.Append(v, newContainerElem(v.Type().Elem())) + d.storeSlot(&pf, v) + } + n := v.Len() - 1 + d.segIdx[idx] = n + elem := v.Index(n) + pf = slotWriter{kind: 1, slot: elem} + v = elem + case reflect.Array: + key := d.joinPath(d.tableKey[:idx]) + cnt := d.arrayCount(key) + if cnt == 0 { + cnt = 1 + d.setArrayCount(key, 1) + } + if cnt > v.Len() { + return unstable.NewParserError(d.p.Raw(expr.Raw), "cannot reach element %d of array of size %d", cnt-1, v.Len()) + } + d.segIdx[idx] = cnt - 1 + elem := v.Index(cnt - 1) + pf = slotWriter{kind: 1, slot: elem} + v = elem + case reflect.Map: + if v.IsNil() { + nm := reflect.MakeMap(v.Type()) + d.storeSlot(&pf, nm) + v = nm + } + var key reflect.Value + var w slotWriter + if v.Type().Key() == stringType { + key = d.stringMapKey(name) + w = slotWriter{kind: 3, m: v, ks: name} + } else { + k, err := makeMapKey(v.Type().Key(), name) + if err != nil { + return err + } + key = k + w = slotWriter{kind: 2, m: v, k: k} + } + + elem := v.MapIndex(key) + + // The last part of an array table is finalized as a slice + // container: do not materialize a table for it. + if isArrayTable && idx == len(d.tableKey)-1 { + et := v.Type().Elem() + switch et.Kind() { + case reflect.Interface, reflect.Slice, reflect.Array: + if elem.IsValid() { + v = elem + } else { + v = reflect.Zero(et) + } + pf = w + idx++ + continue + default: + } + } + + if elem.IsValid() { + ce := elem + ceIface := false + if ce.Kind() == reflect.Interface { + ceIface = true + if !ce.IsNil() { + ce = ce.Elem() + } + } + switch ce.Kind() { + case reflect.Map, reflect.Slice: + pf = w + v = ce + case reflect.Ptr: + if ce.IsNil() { + np := reflect.New(ce.Type().Elem()) + d.storeSlot(&w, np) + ce = np + } + pf = w + v = ce + case reflect.Struct, reflect.Array: + if ceIface { + // Interface-held non-generic content is replaced. + fresh := reflect.ValueOf(map[string]interface{}{}) + d.storeSlot(&w, fresh) + pf = w + v = fresh + } else { + tmp := reflect.New(elem.Type()).Elem() + tmp.Set(elem) + d.tableFlush = append(d.tableFlush, flushOp{w: w, val: tmp}) + pf = slotWriter{kind: 1, slot: tmp} + v = tmp + } + default: + if !ceIface { + return unstable.NewParserError(d.p.Raw(expr.Raw), "cannot store a table in a %s", ce.Type()) + } + fresh := reflect.ValueOf(map[string]interface{}{}) + d.storeSlot(&w, fresh) + pf = w + v = fresh + } + } else { + et := v.Type().Elem() + switch et.Kind() { + case reflect.Interface: + if !mapStringInterfaceType.AssignableTo(et) { + return unstable.NewParserError(d.p.Raw(expr.Raw), "cannot store a table in a %s", et) + } + fresh := reflect.ValueOf(map[string]interface{}{}) + d.storeSlot(&w, fresh) + pf = w + v = fresh + case reflect.Map: + nm := reflect.MakeMap(et) + d.storeSlot(&w, nm) + pf = w + v = nm + case reflect.Ptr: + np := reflect.New(et.Elem()) + d.storeSlot(&w, np) + pf = w + v = np + case reflect.Struct, reflect.Array, reflect.Slice: + tmp := reflect.New(et).Elem() + d.tableFlush = append(d.tableFlush, flushOp{w: w, val: tmp}) + pf = slotWriter{kind: 1, slot: tmp} + v = tmp + default: + return unstable.NewParserError(d.p.Raw(expr.Raw), "cannot store a table in a %s", et) + } + } + idx++ + case reflect.Struct: + plan := planForType(v.Type()) + f, found := plan.lookup(name) + if !found { + d.strict.MissingTable(expr) + d.skipUntilTable = true + return nil + } + fv := fieldByIndexAlloc(v, f.index) + pf = slotWriter{kind: 1, slot: fv} + v = fv + idx++ + default: + return unstable.NewParserError(d.p.Raw(expr.Raw), "cannot store a table in a %s", v.Kind()) + } } - return false, nil -} + if isArrayTable { + akey := d.joinPath(d.tableKey) + d.resetChildArrayCounts(akey) -func (d *decoder) handleValue(value *unstable.Node, v reflect.Value) error { - for v.Kind() == reflect.Ptr { - v = initAndDereferencePointer(v) + // Unwrap an interface container. + if v.Kind() == reflect.Interface { + var slice []interface{} + if !v.IsNil() { + if s, ok := v.Elem().Interface().([]interface{}); ok { + slice = s + } + } + if first { + slice = slice[:0] + } + m := map[string]interface{}{} + slice = append(slice, m) + sv := reflect.ValueOf(slice) + d.storeSlot(&pf, sv) + d.setArrayCount(akey, len(slice)) + d.segIdx[len(d.tableKey)] = len(slice) - 1 + d.tableTarget = reflect.ValueOf(m) + d.tableParentSlot = slotWriter{kind: 1, slot: sv.Index(len(slice) - 1)} + d.tableTargetValid = true + return nil + } + + switch v.Kind() { + case reflect.Slice: + if v.IsNil() { + v = reflect.MakeSlice(v.Type(), 0, 4) + } else if first { + v = v.Slice(0, 0) + } + v = reflect.Append(v, newContainerElem(v.Type().Elem())) + d.storeSlot(&pf, v) + n := v.Len() - 1 + d.setArrayCount(akey, n+1) + d.segIdx[len(d.tableKey)] = n + elem := v.Index(n) + if d.unmarshalerInterface && hasUnmarshaler(elem) { + d.startCapture(len(d.tableKey), expr) + return nil + } + pf = slotWriter{kind: 1, slot: elem} + v = elem + case reflect.Array: + cnt := d.arrayCount(akey) + if first { + cnt = 0 + } + if cnt >= v.Len() { + return unstable.NewParserError(d.p.Raw(expr.Raw), "array of size %d is too small to store this array table", v.Len()) + } + v.Index(cnt).Set(reflect.Zero(v.Type().Elem())) + d.setArrayCount(akey, cnt+1) + d.segIdx[len(d.tableKey)] = cnt + elem := v.Index(cnt) + if d.unmarshalerInterface && hasUnmarshaler(elem) { + d.startCapture(len(d.tableKey), expr) + return nil + } + pf = slotWriter{kind: 1, slot: elem} + v = elem + default: + return fmt.Errorf("toml: cannot store an array table in a %s", v.Kind()) + } } - if d.unmarshalerInterface { - if v.CanAddr() && v.Addr().CanInterface() { - if outi, ok := v.Addr().Interface().(unstable.Unmarshaler); ok { - // Pass raw bytes from the original document - return outi.UnmarshalTOML(d.p.Raw(value.Raw)) + // Settle on the concrete container for the key-values that follow. + for { + switch v.Kind() { + case reflect.Ptr: + if v.IsNil() { + if !v.CanSet() { + return nil + } + v.Set(reflect.New(v.Type().Elem())) + } + elem := v.Elem() + pf = slotWriter{kind: 1, slot: elem} + v = elem + continue + case reflect.Interface: + if !v.IsNil() { + c := v.Elem() + if c.Type() == mapStringInterfaceType || c.Type() == sliceInterfaceType { + v = c + continue + } + } + if !mapStringInterfaceType.AssignableTo(v.Type()) { + return fmt.Errorf("toml: cannot store a table in a %s", v.Type()) + } + fresh := reflect.ValueOf(map[string]interface{}{}) + d.storeSlot(&pf, fresh) + v = fresh + continue + case reflect.Slice: + if v.Len() == 0 { + if v.IsNil() { + v = reflect.MakeSlice(v.Type(), 0, 4) + } + v = reflect.Append(v, newContainerElem(v.Type().Elem())) + d.storeSlot(&pf, v) } + n := v.Len() - 1 + d.segIdx[len(d.tableKey)] = n + elem := v.Index(n) + pf = slotWriter{kind: 1, slot: elem} + v = elem + continue + case reflect.Map, reflect.Struct: + d.tableTarget = v + d.tableParentSlot = pf + d.tableTargetValid = true + return nil + default: + return fmt.Errorf("toml: cannot store a table in a %s", v.Kind()) } } +} - // Only try TextUnmarshaler for scalar types. For Array and InlineTable, - // fall through to struct/map unmarshaling to allow flexible unmarshaling - // where a type can implement UnmarshalText for string values but still - // be populated field-by-field from a table. See issue #974. - if value.Kind != unstable.Array && value.Kind != unstable.InlineTable { - ok, err := d.tryTextUnmarshaler(value, v) - if ok || err != nil { - return err +// resumeCapture looks for an existing capture this table expression belongs +// to. It returns true if the expression was consumed. +func (d *decoder) resumeCapture(expr *unstable.Node) bool { + // Iterate in reverse, so that tables attach to the latest element of + // array tables. + for i := len(d.captures) - 1; i >= 0; i-- { + c := &d.captures[i] + if len(d.tableKey) < len(c.names) { + continue } + if expr.Kind == unstable.ArrayTable && len(d.tableKey) == len(c.names) { + // A new element of an array table is not part of the capture of + // the previous element. + continue + } + match := true + for j, p := range c.names { + if d.tableKey[j] != p { + match = false + break + } + } + if !match { + continue + } + d.captureIdx = i + if len(d.tableKey) > len(c.names) { + d.appendCaptureHeader(c, expr, len(c.names)) + } + return true } + return false +} - switch value.Kind { - case unstable.String: - return d.unmarshalString(value, v) - case unstable.Integer: - return d.unmarshalInteger(value, v) - case unstable.Float: - return d.unmarshalFloat(value, v) - case unstable.Bool: - return d.unmarshalBool(value, v) - case unstable.DateTime: - return d.unmarshalDateTime(value, v) - case unstable.LocalDate: - return d.unmarshalLocalDate(value, v) - case unstable.LocalTime: - return d.unmarshalLocalTime(value, v) - case unstable.LocalDateTime: - return d.unmarshalLocalDateTime(value, v) - case unstable.InlineTable: - return d.unmarshalInlineTable(value, v) - case unstable.Array: - return d.unmarshalArray(value, v) - default: - panic(fmt.Errorf("handleValue not implemented for %s", value.Kind)) +// appendCaptureHeader writes the table header of expr in the capture buffer, +// adjusted to be relative to the capture root. +func (d *decoder) appendCaptureHeader(c *rawCapture, expr *unstable.Node, skip int) { + c.buf = append(c.buf, '[') + if expr.Kind == unstable.ArrayTable { + c.buf = append(c.buf, '[') + } + c.buf = append(c.buf, d.rawKeySuffix(expr, skip)...) + c.buf = append(c.buf, ']') + if expr.Kind == unstable.ArrayTable { + c.buf = append(c.buf, ']') + } + c.buf = append(c.buf, '\n') +} + +// rawKeySuffix returns the raw bytes of the key of the expression, skipping +// the first n parts. +func (d *decoder) rawKeySuffix(expr *unstable.Node, n int) []byte { + it := expr.Key() + idx := 0 + var start, end unstable.Range + for it.Next() { + if idx >= n { + r := it.Node().Raw + if start.Length == 0 && start.Offset == 0 && idx == n { + start = r + } + end = r + } + idx++ + } + return d.p.Data()[start.Offset : end.Offset+end.Length] +} + +// startCapture registers a new capture for the table at the given path +// (prefix of tableKey). +func (d *decoder) startCapture(pathLen int, expr *unstable.Node) { + names := make([]string, pathLen) + copy(names, d.tableKey[:pathLen]) + indexes := make([]int, pathLen+1) + copy(indexes, d.segIdx[:pathLen+1]) + d.captures = append(d.captures, rawCapture{ + names: names, + indexes: indexes, + }) + d.captureIdx = len(d.captures) - 1 + if pathLen < len(d.tableKey) { + d.appendCaptureHeader(&d.captures[d.captureIdx], expr, pathLen) } } -func (d *decoder) unmarshalArray(array *unstable.Node, v reflect.Value) error { +// resolveCapture walks back to the target of a capture and delivers the +// accumulated raw bytes to its UnmarshalTOML implementation. +func (d *decoder) resolveCapture(v reflect.Value, c *rawCapture, idx int, indexed bool) (reflect.Value, error) { + if v.Kind() == reflect.Ptr { + if v.Type().Implements(unmarshalerType) && idx == len(c.names) { + u, _ := unmarshalerOf(v) + return v, u.UnmarshalTOML(c.buf) + } + if v.IsNil() { + v.Set(reflect.New(v.Type().Elem())) + } + nv, err := d.resolveCapture(v.Elem(), c, idx, indexed) + if err != nil { + return reflect.Value{}, err + } + if nv.IsValid() { + v.Elem().Set(nv) + } + return v, nil + } + + if !indexed && (v.Kind() == reflect.Slice || v.Kind() == reflect.Array) && c.indexes[idx] >= 0 { + i := c.indexes[idx] + if i >= v.Len() { + return reflect.Value{}, errors.New("toml: internal error: capture index out of range") + } + elem := v.Index(i) + nv, err := d.resolveCapture(elem, c, idx, true) + if err != nil { + return reflect.Value{}, err + } + if nv.IsValid() { + elem.Set(nv) + } + return v, nil + } + + if idx == len(c.names) { + u, ok := unmarshalerOf(v) + if !ok { + return reflect.Value{}, errors.New("toml: internal error: capture target does not implement UnmarshalTOML") + } + return v, u.UnmarshalTOML(c.buf) + } + + name := c.names[idx] + switch v.Kind() { - case reflect.Slice: + case reflect.Struct: + plan := planForType(v.Type()) + f, found := plan.lookup(name) + if !found { + return v, nil + } + fv := fieldByIndexAlloc(v, f.index) + nv, err := d.resolveCapture(fv, c, idx+1, false) + if err != nil { + return reflect.Value{}, err + } + if nv.IsValid() && fv.CanSet() { + fv.Set(nv) + } + return v, nil + case reflect.Map: + key, err := makeMapKey(v.Type().Key(), name) + if err != nil { + return reflect.Value{}, err + } if v.IsNil() { - v.Set(reflect.MakeSlice(v.Type(), 0, 16)) - } else { - v.SetLen(0) + v = reflect.MakeMap(v.Type()) } - case reflect.Array: - // arrays are always initialized - case reflect.Interface: - elem := v.Elem() - if !elem.IsValid() { - elem = reflect.New(sliceInterfaceType).Elem() - elem.Set(reflect.MakeSlice(sliceInterfaceType, 0, 16)) - } else if elem.Kind() == reflect.Slice { - if elem.Type() != sliceInterfaceType { - elem = reflect.New(sliceInterfaceType).Elem() - elem.Set(reflect.MakeSlice(sliceInterfaceType, 0, 16)) - } else if !elem.CanSet() { - nelem := reflect.New(sliceInterfaceType).Elem() - nelem.Set(reflect.MakeSlice(sliceInterfaceType, elem.Len(), elem.Cap())) - reflect.Copy(nelem, elem) - elem = nelem - } + elem := reflect.New(v.Type().Elem()).Elem() + if existing := v.MapIndex(key); existing.IsValid() { + elem.Set(existing) } - err := d.unmarshalArray(array, elem) + nv, err := d.resolveCapture(elem, c, idx+1, false) if err != nil { - return err + return reflect.Value{}, err } - v.Set(elem) - return nil + if nv.IsValid() { + v.SetMapIndex(key, nv) + } + return v, nil + case reflect.Interface: + elem := elemOrNewMap(v) + nv, err := d.resolveCapture(elem, c, idx, indexed) + if err != nil || !nv.IsValid() { + return reflect.Value{}, err + } + return nv, nil default: - // TODO: use newDecodeError, but first the parser needs to fill - // array.Data. - return d.typeMismatchError("array", v.Type()) + return reflect.Value{}, fmt.Errorf("toml: internal error: cannot resolve capture target through %s", v.Kind()) } +} - elemType := v.Type().Elem() - - it := array.Children() - idx := 0 - for it.Next() { - n := it.Node() +// captureKeyValue appends the raw bytes of a key-value expression to the +// current capture. +func (d *decoder) captureKeyValue(expr *unstable.Node) { + c := &d.captures[d.captureIdx] + c.buf = append(c.buf, d.p.Raw(expr.Raw)...) + c.buf = append(c.buf, '\n') +} - // TODO: optimize - if v.Kind() == reflect.Slice { - elem := reflect.New(elemType).Elem() +// hasUnmarshaler reports whether v can provide an unstable.Unmarshaler, +// without allocating anything. +func hasUnmarshaler(v reflect.Value) bool { + t := v.Type() + return t.Implements(unmarshalerType) || (v.CanAddr() && reflect.PtrTo(t).Implements(unmarshalerType)) +} - err := d.handleValue(n, elem) +// makeMapKey converts a TOML key into a value usable as the given map key +// type. +func makeMapKey(kt reflect.Type, name string) (reflect.Value, error) { + switch kt.Kind() { + case reflect.String: + return reflect.ValueOf(name).Convert(kt), nil + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + i, err := strconv.ParseInt(name, 10, 64) + if err != nil { + return reflect.Value{}, fmt.Errorf("toml: cannot parse map key %q as %s: %w", name, kt, err) + } + k := reflect.New(kt).Elem() + if k.OverflowInt(i) { + return reflect.Value{}, fmt.Errorf("toml: map key %q overflows %s", name, kt) + } + k.SetInt(i) + return k, nil + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + u, err := strconv.ParseUint(name, 10, 64) + if err != nil { + return reflect.Value{}, fmt.Errorf("toml: cannot parse map key %q as %s: %w", name, kt, err) + } + k := reflect.New(kt).Elem() + if k.OverflowUint(u) { + return reflect.Value{}, fmt.Errorf("toml: map key %q overflows %s", name, kt) + } + k.SetUint(u) + return k, nil + case reflect.Float32, reflect.Float64: + f, err := strconv.ParseFloat(name, 64) + if err != nil { + return reflect.Value{}, fmt.Errorf("toml: cannot parse map key %q as %s: %w", name, kt, err) + } + k := reflect.New(kt).Elem() + k.SetFloat(f) + return k, nil + case reflect.Ptr: + if kt.Implements(textUnmarshalerType) { + k := reflect.New(kt.Elem()) + err := k.Interface().(encoding.TextUnmarshaler).UnmarshalText([]byte(name)) if err != nil { - return err + return reflect.Value{}, fmt.Errorf("toml: error unmarshaling map key %q: %w", name, err) } - - v.Set(reflect.Append(v, elem)) - } else { // array - if idx >= v.Len() { - return nil - } - elem := v.Index(idx) - err := d.handleValue(n, elem) + return k, nil + } + default: + if reflect.PtrTo(kt).Implements(textUnmarshalerType) { + k := reflect.New(kt) + err := k.Interface().(encoding.TextUnmarshaler).UnmarshalText([]byte(name)) if err != nil { - return err + return reflect.Value{}, fmt.Errorf("toml: error unmarshaling map key %q: %w", name, err) } - idx++ + return k.Elem(), nil + } + } + return reflect.Value{}, fmt.Errorf("toml: cannot decode a key into a map with key type %s", kt) +} + +// elemOrNewMap unwraps an interface value to descend into it. Contents that +// can hold a table (generic maps and slices) are kept; anything else is +// replaced by a fresh map[string]interface{}. Maps and slices are reference +// types: they are returned directly, not copied. +func elemOrNewMap(v reflect.Value) reflect.Value { + if !v.IsNil() { + concrete := v.Elem() + t := concrete.Type() + if t == mapStringInterfaceType || t == sliceInterfaceType { + return concrete + } + } + return reflect.ValueOf(map[string]interface{}{}) +} + +// handleKeyValueExpression stores the value of a top-level key-value +// expression, relative to the current table. +func (d *decoder) handleKeyValueExpression(expr *unstable.Node, root reflect.Value) error { + d.path = d.path[:0] + + target := root + useCache := d.tableTargetValid && len(d.tableKey) > 0 + if useCache { + target = d.tableTarget + } else { + for _, name := range d.tableKey { + d.path = append(d.path, pathPart{name: name}) } } + it := expr.Key() + for it.Next() { + d.path = append(d.path, pathPart{node: it.Node()}) + } + + nv, err := d.descend(target, d.path, 0, expr, expr.Value()) + if err != nil { + return d.contextualizeError(err, useCache) + } + if !nv.IsValid() { + return nil + } + if useCache { + // The target may have been replaced (e.g. a nil map allocated): + // re-link it into its parent. + if nv.Kind() == reflect.Map && nv.Pointer() != d.tableTarget.Pointer() { + d.storeSlot(&d.tableParentSlot, nv) + d.tableTarget = nv + } + } else { + if root.CanSet() { + root.Set(nv) + } + } return nil } -func (d *decoder) unmarshalInlineTable(itable *unstable.Node, v reflect.Value) error { - // Make sure v is an initialized object. +// descend walks the given key path into v, and assigns the value at the +// end. It returns the value to store back at this level. An invalid value +// means nothing should be stored (e.g. unknown field). +func (d *decoder) descend(v reflect.Value, path []pathPart, idx int, expr *unstable.Node, value *unstable.Node) (reflect.Value, error) { + if idx == len(path) { + return d.assignValue(v, expr, value) + } + + if v.Kind() == reflect.Ptr { + if v.IsNil() { + v.Set(reflect.New(v.Type().Elem())) + } + nv, err := d.descend(v.Elem(), path, idx, expr, value) + if err != nil || !nv.IsValid() { + return reflect.Value{}, err + } + v.Elem().Set(nv) + return v, nil + } + + // A target implementing the unmarshaler interface consumes the value, + // whatever the remaining parts of the key are. + if d.unmarshalerInterface { + if u, ok := unmarshalerOf(v); ok { + return v, u.UnmarshalTOML(d.rawValue(expr, value)) + } + } + + part := path[idx] + switch v.Kind() { case reflect.Map: + // Native fast path for the most common generic target: walk the + // remaining dotted-key path with plain Go map operations and decode + // the value directly, skipping the reflect.Value round-trips + // (stringMapKey, MapIndex, New, SetMapIndex) entirely. + if !d.unmarshalerInterface && v.Type() == mapStringInterfaceType { + return d.descendStrMap(v, path, idx, value) + } + var name string + var key reflect.Value + var err error + fastKey := v.Type().Key() == stringType + if fastKey { + name = d.partString(&part) + key = d.stringMapKey(name) + } else { + key, err = makeMapKey(v.Type().Key(), d.partString(&part)) + if err != nil { + return reflect.Value{}, err + } + } if v.IsNil() { - v.Set(reflect.MakeMap(v.Type())) + v = reflect.MakeMap(v.Type()) + } + elemType := v.Type().Elem() + existing := v.MapIndex(key) + var elem reflect.Value + switch { + case existing.IsValid(): + elem = reflect.New(elemType).Elem() + elem.Set(existing) + case idx+1 == len(path) && elemType.Kind() == reflect.Interface: + // Fast path: a fresh interface element does not need to be + // materialized, the assigned value is stored directly. + elem = reflect.Zero(elemType) + default: + elem = reflect.New(elemType).Elem() } + nv, err := d.descend(elem, path, idx+1, expr, value) + if err != nil { + return reflect.Value{}, err + } + if nv.IsValid() { + if fastKey { + // The recursion may have overwritten the key buffer. + key = d.stringMapKey(name) + } + v.SetMapIndex(key, nv) + } + return v, nil case reflect.Struct: - // structs are always initialized. + plan := planForType(v.Type()) + f, found := plan.lookupBytes(part.bytes()) + if !found { + if part.node != nil { + d.strict.MissingField(expr) + } + return v, nil + } + fv := fieldByIndexAlloc(v, f.index) + var nv reflect.Value + var err error + if idx+1 == len(path) { + // Leaf field: assign directly. descend's first action for a + // fully-consumed path is exactly this call, so skipping the extra + // frame is equivalent and avoids a call per scalar field. + nv, err = d.assignValue(fv, expr, value) + } else { + nv, err = d.descend(fv, path, idx+1, expr, value) + } + if err != nil { + var mm *typeMismatchError + if errors.As(err, &mm) { + err = &unstable.ParserError{ + Highlight: mm.highlight, + Message: fmt.Sprintf("cannot decode TOML %s into struct field %s.%s of type %s", + mm.toml, v.Type(), f.fieldName, mm.target), + } + } + return reflect.Value{}, err + } + if nv.IsValid() && fv.CanSet() { + fv.Set(nv) + } + return v, nil case reflect.Interface: - elem := v.Elem() - if !elem.IsValid() { - elem = makeMapStringInterface() - v.Set(elem) + elem := elemOrNewMap(v) + nv, err := d.descend(elem, path, idx, expr, value) + if err != nil || !nv.IsValid() { + return reflect.Value{}, err + } + return nv, nil + case reflect.Slice: + if v.Len() == 0 { + if v.IsNil() { + v = reflect.MakeSlice(v.Type(), 0, 4) + } + v = reflect.Append(v, reflect.New(v.Type().Elem()).Elem()) + } + elem := v.Index(v.Len() - 1) + nv, err := d.descend(elem, path, idx, expr, value) + if err != nil { + return reflect.Value{}, err + } + if nv.IsValid() { + elem.Set(nv) } - return d.unmarshalInlineTable(itable, elem) + return v, nil + case reflect.Array: + names := make([]string, idx) + for i := range names { + names[i] = path[i].str() + } + cnt := d.arrayCount(d.joinPath(names)) + if cnt == 0 { + cnt = 1 + } + elemIdx := cnt - 1 + if elemIdx >= v.Len() { + return reflect.Value{}, unstable.NewParserError(keyHighlight(d.p.Data(), part.node), + "cannot reach element %d of array of size %d", elemIdx, v.Len()) + } + elem := v.Index(elemIdx) + nv, err := d.descend(elem, path, idx, expr, value) + if err != nil { + return reflect.Value{}, err + } + if nv.IsValid() { + elem.Set(nv) + } + return v, nil default: - return unstable.NewParserError(d.p.Raw(itable.Raw), "cannot store inline table in Go type %s", v.Kind()) + return reflect.Value{}, d.typeMismatchError("table", v.Type(), keyHighlight(d.p.Data(), part.node)) } +} - it := itable.Children() - for it.Next() { - n := it.Node() +// descendStrMap assigns into a native map[string]interface{} target, following +// the remaining dotted-key parts with plain Go map operations and decoding the +// value with decodeAny. It returns the map to store back at this level: a new +// map when v was nil, otherwise v unchanged, since maps are reference types and +// are mutated in place. +func (d *decoder) descendStrMap(v reflect.Value, path []pathPart, idx int, value *unstable.Node) (reflect.Value, error) { + var m map[string]interface{} + if v.IsNil() { + m = make(map[string]interface{}) + v = reflect.ValueOf(m) + } else { + m = v.Interface().(map[string]interface{}) + } - x, err := d.handleKeyValue(n, v) - if err != nil { - return err - } - if x.IsValid() { - v = x + // Walk intermediate parts, creating or reusing nested generic maps. A + // non-map value at an intermediate key can only occur in a document the + // seen-tracker has already rejected; replacing it mirrors the reflect + // path (elemOrNewMap). + for ; idx < len(path)-1; idx++ { + name := d.partString(&path[idx]) + child, _ := m[name].(map[string]interface{}) + if child == nil { + child = make(map[string]interface{}) + m[name] = child } + m = child } - return nil -} - -func (d *decoder) unmarshalDateTime(value *unstable.Node, v reflect.Value) error { - dt, err := parseDateTime(value.Data) + av, err := d.decodeAny(value) if err != nil { - return err + return reflect.Value{}, err } + m[d.partString(&path[idx])] = av + return v, nil +} - if v.Kind() != reflect.Interface && v.Type() != timeType { - return unstable.NewParserError(d.p.Raw(value.Raw), "%s", d.typeMismatchString("datetime", v.Type())) +// keyHighlight returns a highlight for the given key part node, falling back +// to the start of the document. +func keyHighlight(doc []byte, node *unstable.Node) []byte { + if node == nil { + return doc[0:0] } - v.Set(reflect.ValueOf(dt)) - return nil + return doc[node.Raw.Offset : node.Raw.Offset+node.Raw.Length] } -func (d *decoder) unmarshalLocalDate(value *unstable.Node, v reflect.Value) error { - ld, err := parseLocalDate(value.Data) - if err != nil { - return err +// rawValue returns the raw bytes of the value of a key-value expression. +func (d *decoder) rawValue(expr *unstable.Node, value *unstable.Node) []byte { + if value.Kind != unstable.InlineTable && value.Kind != unstable.Array { + return d.p.Raw(value.Raw) + } + if expr == nil || expr.Kind != unstable.KeyValue { + // Inline container nested in another container: best effort. + return d.p.Raw(value.Raw) + } + // Reconstruct the span of the value: it starts after the equal sign + // following the last part of the key, and stops at the end of the + // expression. + var last unstable.Range + it := expr.Key() + for it.Next() { + last = it.Node().Raw + } + doc := d.p.Data() + i := int(last.Offset + last.Length) + for i < len(doc) && (doc[i] == ' ' || doc[i] == '\t') { + i++ } + i++ // equal sign + for i < len(doc) && (doc[i] == ' ' || doc[i] == '\t') { + i++ + } + end := int(expr.Raw.Offset + expr.Raw.Length) + return doc[i:end] +} - if v.Kind() != reflect.Interface && v.Type() != timeType { - return unstable.NewParserError(d.p.Raw(value.Raw), "%s", d.typeMismatchString("local date", v.Type())) +// unmarshalerOf returns the unstable.Unmarshaler implementation of v, if +// any. It allocates intermediate pointers as needed. +func unmarshalerOf(v reflect.Value) (unstable.Unmarshaler, bool) { + t := v.Type() + if t.Implements(unmarshalerType) { + if v.Kind() == reflect.Ptr && v.IsNil() { + v.Set(reflect.New(t.Elem())) + } + return v.Interface().(unstable.Unmarshaler), true } - if v.Type() == timeType { - v.Set(reflect.ValueOf(ld.AsTime(time.Local))) - return nil + if v.CanAddr() && reflect.PtrTo(t).Implements(unmarshalerType) { + return v.Addr().Interface().(unstable.Unmarshaler), true } - v.Set(reflect.ValueOf(ld)) - return nil + return nil, false } -func (d *decoder) unmarshalLocalTime(value *unstable.Node, v reflect.Value) error { - lt, rest, err := parseLocalTime(value.Data) - if err != nil { - return err +var unmarshalerType = reflect.TypeOf(new(unstable.Unmarshaler)).Elem() + +// assignValue stores the TOML value carried by the node into v. +func (d *decoder) assignValue(v reflect.Value, expr *unstable.Node, value *unstable.Node) (reflect.Value, error) { + if v.Kind() == reflect.Ptr { + if d.unmarshalerInterface { + if u, ok := unmarshalerOf(v); ok { + return v, u.UnmarshalTOML(d.rawValue(expr, value)) + } + } + if v.IsNil() { + v.Set(reflect.New(v.Type().Elem())) + } + nv, err := d.assignValue(v.Elem(), expr, value) + if err != nil || !nv.IsValid() { + return reflect.Value{}, err + } + v.Elem().Set(nv) + return v, nil } - if len(rest) > 0 { - return unstable.NewParserError(rest, "extra characters at the end of a local time") + if d.unmarshalerInterface { + if u, ok := unmarshalerOf(v); ok { + return v, u.UnmarshalTOML(d.rawValue(expr, value)) + } } - if v.Kind() != reflect.Interface { - return unstable.NewParserError(d.p.Raw(value.Raw), "%s", d.typeMismatchString("local time", v.Type())) + switch value.Kind { + case unstable.String: + return d.assignString(v, value) + case unstable.Integer: + return d.assignInteger(v, value) + case unstable.Float: + return d.assignFloat(v, value) + case unstable.Bool: + return d.assignBool(v, value) + case unstable.DateTime: + return d.assignDateTime(v, value) + case unstable.LocalDateTime: + return d.assignLocalDateTime(v, value) + case unstable.LocalDate: + return d.assignLocalDate(v, value) + case unstable.LocalTime: + return d.assignLocalTime(v, value) + case unstable.Array: + return d.assignArray(v, expr, value) + case unstable.InlineTable: + return d.assignInlineTable(v, expr, value) + default: + return reflect.Value{}, unstable.NewParserError(value.Data, "unsupported value kind %s", value.Kind) } - v.Set(reflect.ValueOf(lt)) - return nil } -func (d *decoder) unmarshalLocalDateTime(value *unstable.Node, v reflect.Value) error { - ldt, rest, err := parseLocalDateTime(value.Data) - if err != nil { - return err +func (d *decoder) assignString(v reflect.Value, value *unstable.Node) (reflect.Value, error) { + switch v.Kind() { + case reflect.String: + v.SetString(string(value.Data)) + return v, nil + case reflect.Interface: + return boxInto(v, reflect.ValueOf(string(value.Data))) + default: } - - if len(rest) > 0 { - return unstable.NewParserError(rest, "extra characters at the end of a local date time") + if v.CanAddr() && v.Addr().Type().Implements(textUnmarshalerType) { + err := v.Addr().Interface().(encoding.TextUnmarshaler).UnmarshalText(value.Data) + if err != nil { + return reflect.Value{}, unstable.NewParserError(d.p.Raw(value.Raw), "%s", err) + } + return v, nil } + return reflect.Value{}, d.typeMismatchError("string", v.Type(), d.p.Raw(value.Raw)) +} - if v.Kind() != reflect.Interface && v.Type() != timeType { - return unstable.NewParserError(d.p.Raw(value.Raw), "%s", d.typeMismatchString("local datetime", v.Type())) +func (d *decoder) assignInteger(v reflect.Value, value *unstable.Node) (reflect.Value, error) { + // Integer values targeting a float field are parsed as floats: they can + // represent (approximately) numbers beyond the int64 range. + if k := v.Kind(); k == reflect.Float32 || k == reflect.Float64 { + return d.assignFloat(v, value) } - if v.Type() == timeType { - v.Set(reflect.ValueOf(ldt.AsTime(time.Local))) - return nil + + i, err := parseInteger(value.Data) + if err != nil { + return reflect.Value{}, err } - v.Set(reflect.ValueOf(ldt)) - return nil -} - -func (d *decoder) unmarshalBool(value *unstable.Node, v reflect.Value) error { - b := value.Data[0] == 't' switch v.Kind() { - case reflect.Bool: - v.SetBool(b) + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + if v.OverflowInt(i) { + return reflect.Value{}, unstable.NewParserError(value.Data, "integer value %d cannot be stored in %s", i, v.Type()) + } + v.SetInt(i) + return v, nil + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + if i < 0 { + return reflect.Value{}, unstable.NewParserError(value.Data, "negative integer value %d cannot be stored in %s", i, v.Type()) + } + if v.OverflowUint(uint64(i)) { + return reflect.Value{}, unstable.NewParserError(value.Data, "integer value %d cannot be stored in %s", i, v.Type()) + } + v.SetUint(uint64(i)) + return v, nil case reflect.Interface: - v.Set(reflect.ValueOf(b)) + return boxInto(v, reflect.ValueOf(i)) default: - return unstable.NewParserError(value.Data, "cannot assign boolean to a %t", b) } + if ok, err := tryTextUnmarshaler(v, value.Data); ok { + return v, err + } + return reflect.Value{}, d.typeMismatchError("integer", v.Type(), d.p.Raw(value.Raw)) +} - return nil +// tryTextUnmarshaler attempts to deliver the raw text of a value to a target +// implementing encoding.TextUnmarshaler. +func tryTextUnmarshaler(v reflect.Value, text []byte) (bool, error) { + if v.CanAddr() && v.Addr().Type().Implements(textUnmarshalerType) { + return true, v.Addr().Interface().(encoding.TextUnmarshaler).UnmarshalText(text) + } + return false, nil } -func (d *decoder) unmarshalFloat(value *unstable.Node, v reflect.Value) error { +func (d *decoder) assignFloat(v reflect.Value, value *unstable.Node) (reflect.Value, error) { f, err := parseFloat(value.Data) if err != nil { - return err + return reflect.Value{}, err } switch v.Kind() { case reflect.Float64: v.SetFloat(f) + return v, nil case reflect.Float32: - if f > math.MaxFloat32 { - return unstable.NewParserError(value.Data, "number %f does not fit in a float32", f) + if !math.IsInf(f, 0) && math.Abs(f) > math.MaxFloat32 { + return reflect.Value{}, unstable.NewParserError(value.Data, "float value %f cannot be stored in float32", f) } v.SetFloat(f) + return v, nil case reflect.Interface: - v.Set(reflect.ValueOf(f)) + return boxInto(v, reflect.ValueOf(f)) default: - return unstable.NewParserError(value.Data, "float cannot be assigned to %s", v.Kind()) } - - return nil + if ok, err := tryTextUnmarshaler(v, value.Data); ok { + return v, err + } + return reflect.Value{}, d.typeMismatchError("float", v.Type(), d.p.Raw(value.Raw)) } -const ( - maxInt = int64(^uint(0) >> 1) - minInt = -maxInt - 1 -) - -// Maximum value of uint for decoding. Currently the decoder parses the integer -// into an int64. As a result, on architectures where uint is 64 bits, the -// effective maximum uint we can decode is the maximum of int64. On -// architectures where uint is 32 bits, the maximum value we can decode is -// lower: the maximum of uint32. I didn't find a way to figure out this value at -// compile time, so it is computed during initialization. -var maxUint int64 = math.MaxInt64 +func (d *decoder) assignBool(v reflect.Value, value *unstable.Node) (reflect.Value, error) { + b := value.Data[0] == 't' -func init() { //nolint:gochecknoinits - m := uint64(^uint(0)) - // #nosec G115 - if m < uint64(maxUint) { - maxUint = int64(m) + switch v.Kind() { + case reflect.Bool: + v.SetBool(b) + return v, nil + case reflect.Interface: + return boxInto(v, reflect.ValueOf(b)) + default: } -} - -func (d *decoder) unmarshalInteger(value *unstable.Node, v reflect.Value) error { - kind := v.Kind() - if kind == reflect.Float32 || kind == reflect.Float64 { - return d.unmarshalFloat(value, v) + if ok, err := tryTextUnmarshaler(v, value.Data); ok { + return v, err } + return reflect.Value{}, d.typeMismatchError("boolean", v.Type(), d.p.Raw(value.Raw)) +} - i, err := parseInteger(value.Data) +func (d *decoder) assignDateTime(v reflect.Value, value *unstable.Node) (reflect.Value, error) { + t, err := parseDateTime(value.Data) if err != nil { - return err + return reflect.Value{}, err } - var r reflect.Value - - switch kind { - case reflect.Int64: - v.SetInt(i) - return nil - case reflect.Int32: - if i < math.MinInt32 || i > math.MaxInt32 { - return fmt.Errorf("toml: number %d does not fit in an int32", i) - } - - r = reflect.ValueOf(int32(i)) - case reflect.Int16: - if i < math.MinInt16 || i > math.MaxInt16 { - return fmt.Errorf("toml: number %d does not fit in an int16", i) - } - - r = reflect.ValueOf(int16(i)) - case reflect.Int8: - if i < math.MinInt8 || i > math.MaxInt8 { - return fmt.Errorf("toml: number %d does not fit in an int8", i) - } - - r = reflect.ValueOf(int8(i)) - case reflect.Int: - if i < minInt || i > maxInt { - return fmt.Errorf("toml: number %d does not fit in an int", i) - } - - r = reflect.ValueOf(int(i)) - case reflect.Uint64: - if i < 0 { - return fmt.Errorf("toml: negative number %d does not fit in an uint64", i) - } - - r = reflect.ValueOf(uint64(i)) - case reflect.Uint32: - if i < 0 || i > math.MaxUint32 { - return fmt.Errorf("toml: negative number %d does not fit in an uint32", i) - } - - r = reflect.ValueOf(uint32(i)) - case reflect.Uint16: - if i < 0 || i > math.MaxUint16 { - return fmt.Errorf("toml: negative number %d does not fit in an uint16", i) - } - - r = reflect.ValueOf(uint16(i)) - case reflect.Uint8: - if i < 0 || i > math.MaxUint8 { - return fmt.Errorf("toml: negative number %d does not fit in an uint8", i) - } - - r = reflect.ValueOf(uint8(i)) - case reflect.Uint: - if i < 0 || i > maxUint { - return fmt.Errorf("toml: negative number %d does not fit in an uint", i) - } - - r = reflect.ValueOf(uint(i)) - case reflect.Interface: - r = reflect.ValueOf(i) - default: - return unstable.NewParserError(d.p.Raw(value.Raw), "%s", d.typeMismatchString("integer", v.Type())) + if v.Type() == timeType { + v.Set(reflect.ValueOf(t)) + return v, nil } - - if !r.Type().AssignableTo(v.Type()) { - r = r.Convert(v.Type()) + if v.Kind() == reflect.Interface { + return boxInto(v, reflect.ValueOf(t)) } - - v.Set(r) - - return nil + return reflect.Value{}, d.typeMismatchError("datetime", v.Type(), d.p.Raw(value.Raw)) } -func (d *decoder) unmarshalString(value *unstable.Node, v reflect.Value) error { - switch v.Kind() { - case reflect.String: - v.SetString(string(value.Data)) - case reflect.Interface: - v.Set(reflect.ValueOf(string(value.Data))) - default: - return unstable.NewParserError(d.p.Raw(value.Raw), "%s", d.typeMismatchString("string", v.Type())) +func (d *decoder) assignLocalDateTime(v reflect.Value, value *unstable.Node) (reflect.Value, error) { + dt, rest, err := parseLocalDateTime(value.Data) + if err != nil { + return reflect.Value{}, err + } + if len(rest) > 0 { + return reflect.Value{}, unstable.NewParserError(rest, "extra characters at the end of a local date time") } - return nil + switch v.Type() { + case localDateTimeType: + v.Set(reflect.ValueOf(dt)) + return v, nil + case timeType: + v.Set(reflect.ValueOf(dt.AsTime(time.Local))) + return v, nil + } + if v.Kind() == reflect.Interface { + return boxInto(v, reflect.ValueOf(dt)) + } + return reflect.Value{}, d.typeMismatchError("local datetime", v.Type(), d.p.Raw(value.Raw)) } -func (d *decoder) handleKeyValue(expr *unstable.Node, v reflect.Value) (reflect.Value, error) { - d.strict.EnterKeyValue(expr) - - v, err := d.handleKeyValueInner(expr.Key(), expr.Value(), v) - if d.skipUntilTable { - d.strict.MissingField(expr) - d.skipUntilTable = false +func (d *decoder) assignLocalDate(v reflect.Value, value *unstable.Node) (reflect.Value, error) { + date, err := parseLocalDate(value.Data) + if err != nil { + return reflect.Value{}, err } - d.strict.ExitKeyValue(expr) - - return v, err -} - -func (d *decoder) handleKeyValueInner(key unstable.Iterator, value *unstable.Node, v reflect.Value) (reflect.Value, error) { - if key.Next() { - // Still scoping the key - return d.handleKeyValuePart(key, value, v) + switch v.Type() { + case localDateType: + v.Set(reflect.ValueOf(date)) + return v, nil + case timeType: + v.Set(reflect.ValueOf(date.AsTime(time.Local))) + return v, nil } - // Done scoping the key. - // v is whatever Go value we need to fill. - return reflect.Value{}, d.handleValue(value, v) + if v.Kind() == reflect.Interface { + return boxInto(v, reflect.ValueOf(date)) + } + return reflect.Value{}, d.typeMismatchError("local date", v.Type(), d.p.Raw(value.Raw)) } -func (d *decoder) keyFromData(keyType reflect.Type, data []byte) (reflect.Value, error) { - switch { - case stringType.AssignableTo(keyType): - return reflect.ValueOf(string(data)), nil +func (d *decoder) assignLocalTime(v reflect.Value, value *unstable.Node) (reflect.Value, error) { + t, rest, err := parseLocalTime(value.Data) + if err != nil { + return reflect.Value{}, err + } + if len(rest) > 0 { + return reflect.Value{}, unstable.NewParserError(rest, "extra characters at the end of a local time") + } - case stringType.ConvertibleTo(keyType): - return reflect.ValueOf(string(data)).Convert(keyType), nil + switch v.Type() { + case localTimeType: + v.Set(reflect.ValueOf(t)) + return v, nil + case timeType: + v.Set(reflect.ValueOf(time.Date(0, 1, 1, t.Hour, t.Minute, t.Second, t.Nanosecond, time.Local))) + return v, nil + } + if v.Kind() == reflect.Interface { + return boxInto(v, reflect.ValueOf(t)) + } + return reflect.Value{}, d.typeMismatchError("local time", v.Type(), d.p.Raw(value.Raw)) +} - case keyType.Implements(textUnmarshalerType): - mk := reflect.New(keyType.Elem()) - if err := mk.Interface().(encoding.TextUnmarshaler).UnmarshalText(data); err != nil { - return reflect.Value{}, fmt.Errorf("toml: error unmarshalling key type %s from text: %w", stringType, err) +func (d *decoder) assignArray(v reflect.Value, expr *unstable.Node, value *unstable.Node) (reflect.Value, error) { + // Count the elements to allocate the target in one go. + count := 0 + cit := value.Children() + for cit.Next() { + if cit.Node().Kind != unstable.Comment { + count++ } - return mk, nil + } - case reflect.PointerTo(keyType).Implements(textUnmarshalerType): - mk := reflect.New(keyType) - if err := mk.Interface().(encoding.TextUnmarshaler).UnmarshalText(data); err != nil { - return reflect.Value{}, fmt.Errorf("toml: error unmarshalling key type %s from text: %w", stringType, err) + switch v.Kind() { + case reflect.Slice: + // Allocate the backing array once at its final length and assign each + // element in place. This avoids a reflect.New allocation per element + // and the repeated growth checks of reflect.Append. + slice := reflect.MakeSlice(v.Type(), count, count) + i := 0 + it := value.Children() + for it.Next() { + n := it.Node() + if n.Kind == unstable.Comment { + continue + } + elem := slice.Index(i) + nv, err := d.assignValue(elem, nil, n) + if err != nil { + return reflect.Value{}, err + } + if nv.IsValid() { + elem.Set(nv) + } + i++ + } + return slice, nil + case reflect.Array: + it := value.Children() + i := 0 + for it.Next() { + n := it.Node() + if n.Kind == unstable.Comment { + continue + } + if i >= v.Len() { + // Extra elements are dropped when the target array is too + // small. + break + } + elem := v.Index(i) + nv, err := d.assignValue(elem, nil, n) + if err != nil { + return reflect.Value{}, err + } + elem.Set(nv) + i++ + } + return v, nil + case reflect.Interface: + // Build the []interface{} natively: each element is decoded straight + // into a Go value with no intermediate addressable reflect.Value and + // no reflect round-trip, and nested arrays recurse the same way. + slice := make([]interface{}, 0, count) + it := value.Children() + for it.Next() { + n := it.Node() + if n.Kind == unstable.Comment { + continue + } + ev, err := d.decodeAny(n) + if err != nil { + return reflect.Value{}, err + } + slice = append(slice, ev) } - return mk.Elem(), nil + return boxInto(v, reflect.ValueOf(slice)) + default: } + return reflect.Value{}, d.typeMismatchError("array", v.Type(), d.rawValue(expr, value)) +} - switch keyType.Kind() { - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - key, err := strconv.ParseInt(string(data), 10, 64) - if err != nil { - return reflect.Value{}, fmt.Errorf("toml: error parsing key of type %s from integer: %w", stringType, err) +// decodeAny decodes a value node into a native Go value (the representation +// used for interface{} targets), without going through reflect. Scalars and +// arrays are handled directly; inline tables still defer to the reflect-based +// path so that their dotted-key merge semantics remain identical. +func (d *decoder) decodeAny(n *unstable.Node) (interface{}, error) { + switch n.Kind { + case unstable.String: + return string(n.Data), nil + case unstable.Integer: + i, err := parseInteger(n.Data) + return i, err + case unstable.Float: + f, err := parseFloat(n.Data) + return f, err + case unstable.Bool: + return n.Data[0] == 't', nil + case unstable.Array: + count := 0 + cit := n.Children() + for cit.Next() { + if cit.Node().Kind != unstable.Comment { + count++ + } } - return reflect.ValueOf(key).Convert(keyType), nil - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: - key, err := strconv.ParseUint(string(data), 10, 64) - if err != nil { - return reflect.Value{}, fmt.Errorf("toml: error parsing key of type %s from unsigned integer: %w", stringType, err) + slice := make([]interface{}, 0, count) + it := n.Children() + for it.Next() { + c := it.Node() + if c.Kind == unstable.Comment { + continue + } + ev, err := d.decodeAny(c) + if err != nil { + return nil, err + } + slice = append(slice, ev) } - return reflect.ValueOf(key).Convert(keyType), nil - - case reflect.Float32: - key, err := strconv.ParseFloat(string(data), 32) + return slice, nil + case unstable.InlineTable: + // Build the map natively: navigate each (possibly dotted) key with + // plain Go map operations and decode each value with decodeAny. The + // seen-tracker has already rejected duplicate or conflicting keys, so + // intermediate parts can be created/merged without revalidation. + count := 0 + cit := n.Children() + for cit.Next() { + count++ + } + m := make(map[string]interface{}, count) + it := n.Children() + for it.Next() { + kv := it.Node() + if err := d.setAnyKey(m, kv.Key(), kv.Value()); err != nil { + return nil, err + } + } + return m, nil + case unstable.DateTime: + t, err := parseDateTime(n.Data) + return t, err + case unstable.LocalDateTime: + dt, rest, err := parseLocalDateTime(n.Data) if err != nil { - return reflect.Value{}, fmt.Errorf("toml: error parsing key of type %s from float: %w", stringType, err) + return nil, err } - return reflect.ValueOf(float32(key)), nil - - case reflect.Float64: - key, err := strconv.ParseFloat(string(data), 64) + if len(rest) > 0 { + return nil, unstable.NewParserError(rest, "extra characters at the end of a local date time") + } + return dt, nil + case unstable.LocalDate: + date, err := parseLocalDate(n.Data) + return date, err + case unstable.LocalTime: + t, rest, err := parseLocalTime(n.Data) if err != nil { - return reflect.Value{}, fmt.Errorf("toml: error parsing key of type %s from float: %w", stringType, err) + return nil, err } - return reflect.ValueOf(float64(key)), nil - + if len(rest) > 0 { + return nil, unstable.NewParserError(rest, "extra characters at the end of a local time") + } + return t, nil default: - return reflect.Value{}, fmt.Errorf("toml: cannot convert map key of type %s to expected type %s", stringType, keyType) + return nil, unstable.NewParserError(n.Data, "unsupported value kind %s", n.Kind) } } -func (d *decoder) handleKeyValuePart(key unstable.Iterator, value *unstable.Node, v reflect.Value) (reflect.Value, error) { - // contains the replacement for v - var rv reflect.Value - - // First, dispatch over v to make sure it is a valid object. - // There is no guarantee over what it could be. +func (d *decoder) assignInlineTable(v reflect.Value, expr *unstable.Node, value *unstable.Node) (reflect.Value, error) { switch v.Kind() { case reflect.Map: - vt := v.Type() - - mk, err := d.keyFromData(vt.Key(), key.Node().Data) + // Inline tables are self-contained: they fully replace the target. + v = reflect.MakeMap(v.Type()) + case reflect.Struct: + // fields are set in place + case reflect.Interface: + elem := reflect.ValueOf(map[string]interface{}{}) + nv, err := d.assignInlineTable(elem, expr, value) if err != nil { return reflect.Value{}, err } + return boxInto(v, nv) + default: + return reflect.Value{}, d.typeMismatchError("inline table", v.Type(), d.rawValue(expr, value)) + } - // If the map does not exist, create it. - if v.IsNil() { - v = reflect.MakeMap(vt) - rv = v - } - - mv := v.MapIndex(mk) - set := false - if !mv.IsValid() || key.IsLast() { - set = true - mv = reflect.New(v.Type().Elem()).Elem() - } - - nv, err := d.handleKeyValueInner(key, value, mv) + it := value.Children() + for it.Next() { + kv := it.Node() + // Build the path from the key of this key-value. Keys of inline + // tables rarely have more than a few parts. + var pathBuf [4]pathPart + path := pathBuf[:0] + kit := kv.Key() + for kit.Next() { + path = append(path, pathPart{node: kit.Node()}) + } + nv, err := d.descend(v, path, 0, kv, kv.Value()) if err != nil { return reflect.Value{}, err } if nv.IsValid() { - mv = nv - set = true - } - - if set { - v.SetMapIndex(mk, mv) - } - case reflect.Struct: - path, found := structFieldPath(v, string(key.Node().Data)) - if !found { - // If no matching struct field is found but the target implements the - // unstable.Unmarshaler interface (and it is enabled), delegate the - // decoding of this value to the custom unmarshaler. - if d.unmarshalerInterface { - if v.CanAddr() && v.Addr().CanInterface() { - if outi, ok := v.Addr().Interface().(unstable.Unmarshaler); ok { - // Pass raw bytes from the original document - return reflect.Value{}, outi.UnmarshalTOML(d.p.Raw(value.Raw)) - } - } - } - // Otherwise, keep previous behavior and skip until the next table. - d.skipUntilTable = true - break + v = nv } + } + return v, nil +} - if d.errorContext == nil { - d.errorContext = new(errorContext) - } - t := v.Type() - d.errorContext.Struct = t - d.errorContext.Field = path +// boxInto returns the value to store in place of the interface value v. The +// caller stores the result in the slot v was found in, which performs the +// interface conversion, so the concrete value can be returned as-is. +func boxInto(v reflect.Value, c reflect.Value) (reflect.Value, error) { + if !c.Type().AssignableTo(v.Type()) { + return reflect.Value{}, fmt.Errorf("toml: cannot store %s into %s", c.Type(), v.Type()) + } + return c, nil +} - f := fieldByIndex(v, path) +var ( + interfaceType = reflect.TypeOf(new(interface{})).Elem() + localDateType = reflect.TypeOf(LocalDate{}) + localTimeType = reflect.TypeOf(LocalTime{}) + localDateTimeType = reflect.TypeOf(LocalDateTime{}) +) - if !f.CanAddr() { - // If the field is not addressable, need to take a slower path and - // make a copy of the struct itself to a new location. - nvp := reflect.New(v.Type()) - nvp.Elem().Set(v) - v = nvp.Elem() - _, err := d.handleKeyValuePart(key, value, v) - if err != nil { - return reflect.Value{}, err - } - return nvp.Elem(), nil - } - x, err := d.handleKeyValueInner(key, value, f) - if err != nil { - return reflect.Value{}, err - } +// structPlan caches the mapping between TOML keys and the fields of a struct +// type. byFold, keyed by the lowercased name, resolves any key on its own when +// no two fields fold to the same name (the overwhelmingly common case, marked +// by hasCollision == false): TOML keys are usually lowercase and never match +// the exact (capitalized) Go field names, so the byName probe was always a +// wasted lookup. byName (the exact names) is only consulted, first, when +// fields do collide under folding, to preserve the exact-match-wins tiebreak. +type structPlan struct { + byName map[string]structField + byFold map[string]structField + hasCollision bool +} - if x.IsValid() { - f.Set(x) - } - d.errorContext.Struct = nil - d.errorContext.Field = nil - case reflect.Interface: - v = v.Elem() +type structField struct { + index []int + fieldName string +} - // Following encoding/json: decoding an object into an - // interface{}, it needs to always hold a - // map[string]interface{}. This is for the types to be - // consistent whether a previous value was set or not. - if !v.IsValid() || v.Type() != mapStringInterfaceType { - v = makeMapStringInterface() - } +// foldBufSize bounds the stack buffer used to lowercase keys without +// allocating. Keys longer than this (extremely rare) take the strings.ToLower +// fallback. +const foldBufSize = 68 - x, err := d.handleKeyValuePart(key, value, v) - if err != nil { - return reflect.Value{}, err - } - if x.IsValid() { - v = x - } - rv = v - case reflect.Ptr: - elem := v.Elem() - if !elem.IsValid() { - ptr := reflect.New(v.Type().Elem()) - v.Set(ptr) - rv = v - elem = ptr.Elem() +// lookup and lookupBytes keep the hot path to a single inlinable byFold lookup. +// byFold is indexed by both the exact field/tag names and their lowercased +// forms, so that lookup resolves the two common cases — a lowercase key, or a +// key matching the field's own casing — directly. byName is consulted first +// only for types whose fields collide under case-folding, to preserve the +// exact-match-wins tiebreak. The buffer-fold for other casings lives +// out-of-line so it does not bloat the hot path. +func (p *structPlan) lookup(name string) (structField, bool) { + if p.hasCollision { + if f, ok := p.byName[name]; ok { + return f, true } + } + if f, ok := p.byFold[name]; ok { + return f, true + } + return p.lookupFoldStr(name) +} - elem2, err := d.handleKeyValuePart(key, value, elem) - if err != nil { - return reflect.Value{}, err - } - if elem2.IsValid() { - elem = elem2 +func (p *structPlan) lookupBytes(name []byte) (structField, bool) { + if p.hasCollision { + if f, ok := p.byName[string(name)]; ok { // does not allocate + return f, true } - v.Elem().Set(elem) - default: - return reflect.Value{}, fmt.Errorf("unhandled kv part: %s", v.Kind()) } - - return rv, nil + if f, ok := p.byFold[string(name)]; ok { // does not allocate + return f, true + } + return p.lookupFold(name) } -func initAndDereferencePointer(v reflect.Value) reflect.Value { - var elem reflect.Value - if v.IsNil() { - ptr := reflect.New(v.Type().Elem()) - v.Set(ptr) +// lookupFold resolves keys whose casing matches neither the exact nor the +// lowercased index: it folds to lowercase (in a stack buffer for ASCII, so no +// allocation) and retries; only non-ASCII or oversized keys hit strings.ToLower. +func (p *structPlan) lookupFold(name []byte) (structField, bool) { + if len(name) <= foldBufSize { + // Fold into a stack buffer: len(name) <= cap(buf), so the append + // never reallocates and nothing escapes to the heap. + var buf [foldBufSize]byte + b := buf[:0] + ascii := true + for _, c := range name { + if c >= 0x80 { + ascii = false + break + } + if c >= 'A' && c <= 'Z' { + c += 'a' - 'A' + } + b = append(b, c) + } + if ascii { + f, ok := p.byFold[string(b)] // does not allocate + return f, ok + } } - elem = v.Elem() - return elem + f, ok := p.byFold[strings.ToLower(string(name))] + return f, ok } -// Same as reflect.Value.FieldByIndex, but creates pointers if needed. -func fieldByIndex(v reflect.Value, path []int) reflect.Value { - for _, x := range path { - v = v.Field(x) - - if v.Kind() == reflect.Ptr { - if v.IsNil() { - v.Set(reflect.New(v.Type().Elem())) +func (p *structPlan) lookupFoldStr(name string) (structField, bool) { + if len(name) <= foldBufSize { + // Fold into a stack buffer: len(name) <= cap(buf), so the append + // never reallocates and nothing escapes to the heap. + var buf [foldBufSize]byte + b := buf[:0] + ascii := true + for i := 0; i < len(name); i++ { + c := name[i] + if c >= 0x80 { + ascii = false + break } - v = v.Elem() + if c >= 'A' && c <= 'Z' { + c += 'a' - 'A' + } + b = append(b, c) + } + if ascii { + f, ok := p.byFold[string(b)] // does not allocate + return f, ok } } - return v + f, ok := p.byFold[strings.ToLower(name)] + return f, ok } -type fieldPathsMap = map[string][]int - -var globalFieldPathsCache atomic.Value // map[reflect.Type]fieldPathsMap +var structPlans sync.Map // reflect.Type -> *structPlan -func structFieldPath(v reflect.Value, name string) ([]int, bool) { - t := v.Type() - - cache, _ := globalFieldPathsCache.Load().(map[reflect.Type]fieldPathsMap) - fieldPaths, ok := cache[t] - - if !ok { - fieldPaths = map[string][]int{} - - forEachField(t, nil, func(name string, path []int) { - fieldPaths[name] = path - // extra copy for the case-insensitive match - fieldPaths[strings.ToLower(name)] = path - }) - - newCache := make(map[reflect.Type]fieldPathsMap, len(cache)+1) - newCache[t] = fieldPaths - for k, v := range cache { - newCache[k] = v - } - globalFieldPathsCache.Store(newCache) +func planForType(t reflect.Type) *structPlan { + if plan, ok := structPlans.Load(t); ok { + return plan.(*structPlan) } + plan := buildPlan(t) + structPlans.Store(t, plan) + return plan +} - path, ok := fieldPaths[name] - if !ok { - path, ok = fieldPaths[strings.ToLower(name)] +func buildPlan(t reflect.Type) *structPlan { + plan := &structPlan{ + byName: map[string]structField{}, + byFold: map[string]structField{}, } - return path, ok + addFields(plan, t, nil) + return plan } -func forEachField(t reflect.Type, path []int, do func(name string, path []int)) { - n := t.NumField() - for i := 0; i < n; i++ { +func addFields(plan *structPlan, t reflect.Type, prefix []int) { + var embedded []reflect.StructField + for i := 0; i < t.NumField(); i++ { f := t.Field(i) - - if !f.Anonymous && f.PkgPath != "" { - // only consider exported fields. - continue + tag, tagged := f.Tag.Lookup("toml") + name := f.Name + if tagged { + // A tag of exactly "-" drops the field. "-," names it "-". + if tag == "-" { + continue + } + parts := strings.SplitN(tag, ",", 2) + if parts[0] != "" { + name = parts[0] + } } - - fieldPath := make([]int, 0, len(path)+1) - fieldPath = append(fieldPath, path...) - fieldPath = append(fieldPath, i) - fieldPath = fieldPath[:len(fieldPath):len(fieldPath)] - - name := f.Tag.Get("toml") - if name == "-" { + if f.Anonymous { + ft := f.Type + if ft.Kind() == reflect.Ptr { + ft = ft.Elem() + } + if ft.Kind() != reflect.Struct { + // Embedded non-struct fields are not decoded into. + continue + } + if !tagged { + // Untagged embedded structs are flattened, even when their + // type is unexported: only their own exported fields are + // reachable. + embedded = append(embedded, f) + continue + } + // A tagged embedded struct acts as a regular named field. + } else if f.PkgPath != "" { + // unexported continue } - - if i := strings.IndexByte(name, ','); i >= 0 { - name = name[:i] + index := make([]int, 0, len(prefix)+1) + index = append(index, prefix...) + index = append(index, i) + sf := structField{index: index, fieldName: f.Name} + if _, ok := plan.byName[name]; !ok { + plan.byName[name] = sf } - - if f.Anonymous && name == "" { - t2 := f.Type - if t2.Kind() == reflect.Ptr { - t2 = t2.Elem() - } - - if t2.Kind() == reflect.Struct { - forEachField(t2, fieldPath, do) + lower := strings.ToLower(name) + if _, ok := plan.byFold[lower]; !ok { + plan.byFold[lower] = sf + } else { + // Two distinct fields fold to the same name: case-insensitive + // matching is ambiguous, so lookups must consult byName first to + // keep the exact-match-wins tiebreak deterministic. + plan.hasCollision = true + } + // Index the exact (cased) name as well, so a key written with the + // field's own casing resolves in a single byFold lookup. Only fields + // whose name is not already lowercase need this extra entry. Any name + // that would conflict here also collides under folding (handled + // above), so byName-first preserves the exact tiebreak in that case. + if name != lower { + if _, ok := plan.byFold[name]; !ok { + plan.byFold[name] = sf } - continue } - - if name == "" { - name = f.Name + } + // Embedded structs are flattened after the regular fields, so that + // shallower fields win. + for _, f := range embedded { + ft := f.Type + if ft.Kind() == reflect.Ptr { + ft = ft.Elem() } + index := make([]int, 0, len(prefix)+1) + index = append(index, prefix...) + idx := f.Index[0] + index = append(index, idx) + addFields(plan, ft, index) + } +} - do(name, fieldPath) +// fieldByIndexAlloc returns the field of v at the given index path, +// allocating intermediate embedded pointers as needed. +func fieldByIndexAlloc(v reflect.Value, index []int) reflect.Value { + // Fast path for non-embedded fields, which have a single-element index: + // no intermediate pointer dereferencing is possible. + if len(index) == 1 { + return v.Field(index[0]) + } + for i, x := range index { + if i > 0 { + for v.Kind() == reflect.Ptr { + if v.IsNil() { + v.Set(reflect.New(v.Type().Elem())) + } + v = v.Elem() + } + } + v = v.Field(x) } + return v } diff --git a/vendor/github.com/pelletier/go-toml/v2/unstable/ast.go b/vendor/github.com/pelletier/go-toml/v2/unstable/ast.go index 6b21592d..2cf3bdb6 100644 --- a/vendor/github.com/pelletier/go-toml/v2/unstable/ast.go +++ b/vendor/github.com/pelletier/go-toml/v2/unstable/ast.go @@ -17,43 +17,30 @@ import ( // // do something with n // } type Iterator struct { - nodes *[]Node - idx int32 started bool + node *Node } -// Next moves the iterator forward and returns true if points to a -// node, false otherwise. +// Next moves the iterator forward and returns true if points to a node, false +// otherwise. func (c *Iterator) Next() bool { - if c.nodes == nil { - return false - } - nodes := *c.nodes if !c.started { c.started = true - } else { - idx := c.idx - if idx >= 0 && int(idx) < len(nodes) { - c.idx = nodes[idx].next - } + } else if c.node.Valid() { + c.node = c.node.Next() } - return c.idx >= 0 && int(c.idx) < len(nodes) + return c.node.Valid() } // IsLast returns true if the current node of the iterator is the last -// one. Subsequent calls to Next() will return false. +// one. Subsequent calls to Next() will return false. func (c *Iterator) IsLast() bool { - return c.nodes == nil || c.idx < 0 || (*c.nodes)[c.idx].next < 0 + return c.node.next == 0 } // Node returns a pointer to the node pointed at by the iterator. func (c *Iterator) Node() *Node { - if c.nodes == nil || c.idx < 0 { - return nil - } - n := &(*c.nodes)[c.idx] - n.nodes = c.nodes - return n + return c.node } // Node in a TOML expression AST. @@ -64,8 +51,8 @@ func (c *Iterator) Node() *Node { // - Array have one child per element in the array. // - InlineTable have one child per key-value in the table (each of kind // InlineTable). -// - KeyValue have at least two children. The first one is the value. The rest -// make a potentially dotted key. +// - KeyValue have at least two children. The first one is the value. The +// rest make a potentially dotted key. // - Table and ArrayTable's children represent a dotted key (same as // KeyValue, but without the first node being the value). // @@ -76,68 +63,56 @@ type Node struct { Raw Range // Raw bytes from the input. Data []byte // Node value (either allocated or referencing the input). - // Absolute indices into the backing nodes slice. -1 means none. - next int32 - child int32 - - // Reference to the backing nodes slice for navigation. - nodes *[]Node -} - -// Range of bytes in the document. -type Range struct { - Offset uint32 - Length uint32 + // References to other nodes, as 1-based indexes into the parser's arena. + // 0 means no node. + parser *Parser + next int32 + child int32 } // Next returns a pointer to the next node, or nil if there is no next node. func (n *Node) Next() *Node { - if n.next < 0 { + if n.next == 0 { return nil } - next := &(*n.nodes)[n.next] - next.nodes = n.nodes - return next + return &n.parser.nodes[n.next-1] } // Child returns a pointer to the first child node of this node. Other children -// can be accessed calling Next on the first child. Returns nil if this Node -// has no child. +// can be accessed calling Next on the first child. Returns nil if there is no +// child node. func (n *Node) Child() *Node { - if n.child < 0 { + if n.child == 0 { return nil } - child := &(*n.nodes)[n.child] - child.nodes = n.nodes - return child + return &n.parser.nodes[n.child-1] } // Valid returns true if the node's kind is set (not to Invalid). func (n *Node) Valid() bool { - return n != nil + return n != nil && n.Kind != Invalid } // Key returns the children nodes making the Key on a supported node. Panics -// otherwise. They are guaranteed to be all be of the Kind Key. A simple key +// otherwise. They are guaranteed to be all be of the Kind Key. A simple key // would return just one element. func (n *Node) Key() Iterator { switch n.Kind { case KeyValue: - child := n.child - if child < 0 { + value := n.Child() + if !value.Valid() { panic(errors.New("KeyValue should have at least two children")) } - valueNode := &(*n.nodes)[child] - return Iterator{nodes: n.nodes, idx: valueNode.next} + return Iterator{node: value.Next()} case Table, ArrayTable: - return Iterator{nodes: n.nodes, idx: n.child} + return Iterator{node: n.Child()} default: panic(fmt.Errorf("Key() is not supported on a %s", n.Kind)) } } // Value returns a pointer to the value node of a KeyValue. -// Guaranteed to be non-nil. Panics if not called on a KeyValue node, +// Guaranteed to be non-nil. Panics if not called on a KeyValue node, // or if the Children are malformed. func (n *Node) Value() *Node { return n.Child() @@ -145,5 +120,5 @@ func (n *Node) Value() *Node { // Children returns an iterator over a node's children. func (n *Node) Children() Iterator { - return Iterator{nodes: n.nodes, idx: n.child} + return Iterator{node: n.Child()} } diff --git a/vendor/github.com/pelletier/go-toml/v2/unstable/bridge.go b/vendor/github.com/pelletier/go-toml/v2/unstable/bridge.go new file mode 100644 index 00000000..5482bde0 --- /dev/null +++ b/vendor/github.com/pelletier/go-toml/v2/unstable/bridge.go @@ -0,0 +1,21 @@ +package unstable + +import "github.com/pelletier/go-toml/v2/internal/parserbridge" + +// Expose the non-AST scanners to the root toml package without committing to +// them in the public API. See internal/parserbridge for the rationale. +// +//nolint:gochecknoinits // load-time wiring of an internal bridge (see internal/parserbridge) +func init() { + parserbridge.ScanScalar = func(p any, b []byte) (kind int, raw, value, rest []byte, err error) { + k, raw, value, rest, err := p.(*Parser).scanScalar(b) + return int(k), raw, value, rest, err + } + parserbridge.ScanKey = func(p any, b []byte, dst [][]byte) (parts [][]byte, raw, rest []byte, err error) { + return p.(*Parser).scanKey(b, dst) + } + parserbridge.ScanComment = scanComment + parserbridge.ParseValue = func(p any, b []byte) (node any, rest []byte, err error) { + return p.(*Parser).parseValue(b) + } +} diff --git a/vendor/github.com/pelletier/go-toml/v2/unstable/builder.go b/vendor/github.com/pelletier/go-toml/v2/unstable/builder.go deleted file mode 100644 index e4354985..00000000 --- a/vendor/github.com/pelletier/go-toml/v2/unstable/builder.go +++ /dev/null @@ -1,64 +0,0 @@ -package unstable - -// root contains a full AST. -// -// It is immutable once constructed with Builder. -type root struct { - nodes []Node -} - -func (r *root) at(idx reference) *Node { - return &r.nodes[idx] -} - -type reference int - -const invalidReference reference = -1 - -func (r reference) Valid() bool { - return r != invalidReference -} - -type builder struct { - tree root - lastIdx int -} - -func (b *builder) NodeAt(ref reference) *Node { - n := b.tree.at(ref) - n.nodes = &b.tree.nodes - return n -} - -func (b *builder) Reset() { - b.tree.nodes = b.tree.nodes[:0] - b.lastIdx = 0 -} - -func (b *builder) Push(n Node) reference { - b.lastIdx = len(b.tree.nodes) - n.next = -1 - n.child = -1 - b.tree.nodes = append(b.tree.nodes, n) - return reference(b.lastIdx) -} - -func (b *builder) PushAndChain(n Node) reference { - newIdx := len(b.tree.nodes) - n.next = -1 - n.child = -1 - b.tree.nodes = append(b.tree.nodes, n) - if b.lastIdx >= 0 { - b.tree.nodes[b.lastIdx].next = int32(newIdx) //nolint:gosec // TOML ASTs are small - } - b.lastIdx = newIdx - return reference(b.lastIdx) -} - -func (b *builder) AttachChild(parent reference, child reference) { - b.tree.nodes[parent].child = int32(child) //nolint:gosec // TOML ASTs are small -} - -func (b *builder) Chain(from reference, to reference) { - b.tree.nodes[from].next = int32(to) //nolint:gosec // TOML ASTs are small -} diff --git a/vendor/github.com/pelletier/go-toml/v2/unstable/kind.go b/vendor/github.com/pelletier/go-toml/v2/unstable/kind.go index f87a95a7..54b19f9d 100644 --- a/vendor/github.com/pelletier/go-toml/v2/unstable/kind.go +++ b/vendor/github.com/pelletier/go-toml/v2/unstable/kind.go @@ -79,5 +79,5 @@ func (k Kind) String() string { case DateTime: return "DateTime" } - panic(fmt.Errorf("Kind.String() not implemented for '%d'", k)) + panic(fmt.Errorf("Kind.String() not implemented for kind %d", int(k))) } diff --git a/vendor/github.com/pelletier/go-toml/v2/unstable/parser.go b/vendor/github.com/pelletier/go-toml/v2/unstable/parser.go index 15383012..91e22699 100644 --- a/vendor/github.com/pelletier/go-toml/v2/unstable/parser.go +++ b/vendor/github.com/pelletier/go-toml/v2/unstable/parser.go @@ -2,11 +2,10 @@ package unstable import ( "bytes" + "encoding/binary" + "errors" "fmt" - "reflect" - "unicode" - - "github.com/pelletier/go-toml/v2/internal/characters" + "unicode/utf8" ) // ParserError describes an error relative to the content of the document. @@ -43,21 +42,19 @@ func NewParserError(highlight []byte, format string, args ...interface{}) error // Don't forget to check Error() after you're done parsing. // // Each top-level expression needs to be fully processed before calling -// NextExpression() again. Otherwise, calls to various Node methods may panic if -// the parser has moved on the next expression. +// NextExpression() again. Otherwise, calls to various Node methods may panic +// if the parser has moved on the next expression. // // For performance reasons, go-toml doesn't make a copy of the input bytes to // the parser. Make sure to copy all the bytes you need to outlive the slice // given to the parser. type Parser struct { - data []byte - builder builder - ref reference - left []byte - err error - first bool - KeepComments bool + + data []byte + left []byte + nodes []Node + err error } // Data returns the slice provided to the last call to Reset. @@ -69,33 +66,18 @@ func (p *Parser) Data() []byte { // input. If the argument is not a subslice of the parser input, this function // panics. func (p *Parser) Range(b []byte) Range { - return Range{ - Offset: uint32(p.subsliceOffset(b)), //nolint:gosec // TOML documents are small - Length: uint32(len(b)), //nolint:gosec // TOML documents are small - } -} - -// rangeOfToken computes the Range of a token given the remaining bytes after the token. -// This is used when the token was extracted from the beginning of some position, -// and 'rest' is what remains after the token. -func (p *Parser) rangeOfToken(token, rest []byte) Range { - offset := len(p.data) - len(token) - len(rest) - return Range{Offset: uint32(offset), Length: uint32(len(token))} //nolint:gosec // TOML documents are small -} - -// subsliceOffset returns the byte offset of subslice b within p.data. -// b must share the same backing array as p.data. -func (p *Parser) subsliceOffset(b []byte) int { - if len(b) == 0 { - return len(p.data) + // b is a subslice of p.data if and only if they share the same backing + // array. In that case, because subslicing cannot extend capacity, the + // number of bytes between the start of b and the end of the backing array + // (its capacity) identifies the offset of b within data. + offset := cap(p.data) - cap(b) + if offset < 0 || offset+len(b) > len(p.data) { + panic(errors.New("not a slice of the data slice")) } - dataPtr := reflect.ValueOf(p.data).Pointer() - subPtr := reflect.ValueOf(b).Pointer() - offset := int(subPtr - dataPtr) - if offset < 0 || offset > len(p.data) { - panic("subslice is not within data") + return Range{ + Offset: uint32(offset), //nolint:gosec // TOML documents are small + Length: uint32(len(b)), //nolint:gosec // TOML documents are small } - return offset } // Raw returns the slice corresponding to the bytes in the given range. @@ -106,58 +88,10 @@ func (p *Parser) Raw(raw Range) []byte { // Reset brings the parser to its initial state for a given input. It wipes an // reuses internal storage to reduce allocation. func (p *Parser) Reset(b []byte) { - p.builder.Reset() - p.ref = invalidReference p.data = b p.left = b + p.nodes = p.nodes[:0] p.err = nil - p.first = true -} - -// NextExpression parses the next top-level expression. If an expression was -// successfully parsed, it returns true. If the parser is at the end of the -// document or an error occurred, it returns false. -// -// Retrieve the parsed expression with Expression(). -func (p *Parser) NextExpression() bool { - if len(p.left) == 0 || p.err != nil { - return false - } - - p.builder.Reset() - p.ref = invalidReference - - for { - if len(p.left) == 0 || p.err != nil { - return false - } - - if !p.first { - p.left, p.err = p.parseNewline(p.left) - } - - if len(p.left) == 0 || p.err != nil { - return false - } - - p.ref, p.left, p.err = p.parseExpression(p.left) - - if p.err != nil { - return false - } - - p.first = false - - if p.ref.Valid() { - return true - } - } -} - -// Expression returns a pointer to the node representing the last successfully -// parsed expression. -func (p *Parser) Expression() *Node { - return p.builder.NodeAt(p.ref) } // Error returns any error that has occurred during parsing. @@ -165,6 +99,12 @@ func (p *Parser) Error() error { return p.err } +// Range of bytes in the document. +type Range struct { + Offset uint32 + Length uint32 +} + // Position describes a position in the input. type Position struct { // Number of bytes from the beginning of the input. @@ -181,1095 +121,1538 @@ type Shape struct { End Position } +func (p *Parser) position(offset int) Position { + pos := Position{ + Offset: offset, + Line: 1, + Column: 1, + } + b := p.data[:offset] + for { + idx := bytes.IndexByte(b, '\n') + if idx < 0 { + break + } + pos.Line++ + b = b[idx+1:] + } + pos.Column = len(b) + 1 + return pos +} + // Shape returns the shape of the given range in the input. Will // panic if the range is not a subslice of the input. func (p *Parser) Shape(r Range) Shape { + raw := p.Raw(r) return Shape{ - Start: p.positionAt(int(r.Offset)), - End: p.positionAt(int(r.Offset + r.Length)), + Start: p.position(int(r.Offset)), + End: p.position(int(r.Offset) + len(raw)), } } -// positionAt returns the position at the given byte offset in the document. -func (p *Parser) positionAt(offset int) Position { - lead := p.data[:offset] - - return Position{ - Offset: offset, - Line: bytes.Count(lead, []byte{'\n'}) + 1, - Column: len(lead) - bytes.LastIndex(lead, []byte{'\n'}), +// Expression returns a pointer to the node representing the last successfully +// parsed expression. +func (p *Parser) Expression() *Node { + if len(p.nodes) == 0 { + return nil } + return &p.nodes[0] } -func (p *Parser) parseNewline(b []byte) ([]byte, error) { - if b[0] == '\n' { - return b[1:], nil +// push appends a node to the arena and returns its handle (1-based index). +func (p *Parser) push(n Node) int32 { + if len(p.nodes) == cap(p.nodes) { + // Grow by 2x: large expressions (huge arrays) would otherwise grow + // the arena in small steps, copying it repeatedly. + newCap := 2 * cap(p.nodes) + if newCap < 64 { + newCap = 64 + } + nodes := make([]Node, len(p.nodes), newCap) + copy(nodes, p.nodes) + p.nodes = nodes } + n.parser = p + p.nodes = append(p.nodes, n) + return int32(len(p.nodes)) //nolint:gosec // node counts are bounded by document size +} - if b[0] == '\r' { - _, rest, err := scanWindowsNewline(b) - return rest, err - } +// at returns a pointer to the node with the given handle. Only valid until +// the next call to push. +func (p *Parser) at(handle int32) *Node { + return &p.nodes[handle-1] +} - return nil, NewParserError(b[0:1], "expected newline but got %#U", b[0]) +// offsetOf returns the offset of b within the parser's data. b must be a +// subslice of p.data. +func (p *Parser) offsetOf(b []byte) int { + return cap(p.data) - cap(b) } -func (p *Parser) parseComment(b []byte) (reference, []byte, error) { - ref := invalidReference - data, rest, err := scanComment(b) - if p.KeepComments && err == nil { - ref = p.builder.Push(Node{ - Kind: Comment, - Raw: p.rangeOfToken(data, rest), - Data: data, - }) - } - return ref, rest, err +// rangeFrom returns the Range covering bytes from the start of `from` to the +// start of `to`. Both must be subslices of p.data. +func (p *Parser) rangeFrom(from, to []byte) Range { + start := p.offsetOf(from) + end := p.offsetOf(to) + return Range{ + Offset: uint32(start), //nolint:gosec // TOML documents are small + Length: uint32(end - start), //nolint:gosec // TOML documents are small + } } -func (p *Parser) parseExpression(b []byte) (reference, []byte, error) { - // expression = ws [ comment ] - // expression =/ ws keyval ws [ comment ] - // expression =/ ws table ws [ comment ] - ref := invalidReference +// NextExpression parses the next top-level expression. If an expression was +// successfully parsed, it returns true. If the parser is at the end of the +// document or an error occurred, it returns false. +// +// Retrieve the parsed expression with Expression(). +func (p *Parser) NextExpression() bool { + if p.err != nil { + return false + } - b = p.parseWhitespace(b) + p.nodes = p.nodes[:0] - if len(b) == 0 { - return ref, b, nil - } + for { + b := skipWhitespace(p.left) + if len(b) == 0 { + p.left = b + return false + } - if b[0] == '#' { - ref, rest, err := p.parseComment(b) - return ref, rest, err - } + var err error + switch b[0] { + case '\n': + p.left = b[1:] + continue + case '\r': + if len(b) > 1 && b[1] == '\n' { + p.left = b[2:] + continue + } + err = NewParserError(b[:1], "expected newline but got %#U", b[0]) + case '#': + var comment, rest []byte + comment, rest, err = scanComment(b) + if err == nil { + rest, err = consumeEOL(rest) + } + if err == nil { + if p.KeepComments { + p.push(Node{ + Kind: Comment, + Raw: p.Range(comment), + Data: comment, + }) + p.left = rest + return true + } + p.left = rest + continue + } + case '[': + var rest []byte + rest, err = p.parseExprTable(b) + if err == nil { + p.left = rest + return true + } + default: + var rest []byte + rest, err = p.parseExprKeyval(b) + if err == nil { + p.left = rest + return true + } + } - if b[0] == '\n' || b[0] == '\r' { - return ref, b, nil - } + // Errors at the end of the input have an empty highlight. Extend + // them to the last byte of the input so that they carry a usable + // position. + var perr *ParserError + if errors.As(err, &perr) && len(perr.Highlight) == 0 { + if offset := p.offsetOf(perr.Highlight); offset > 0 && offset == len(p.data) { + perr.Highlight = p.data[offset-1 : offset] + } + } - var err error - if b[0] == '[' { - ref, b, err = p.parseTable(b) - } else { - ref, b, err = p.parseKeyval(b) + p.err = err + return false } +} - if err != nil { - return ref, nil, err +// consumeEOL consumes a newline (LF or CRLF) or end of input. +func consumeEOL(b []byte) ([]byte, error) { + if len(b) == 0 { + return b, nil } + switch b[0] { + case '\n': + return b[1:], nil + case '\r': + if len(b) > 1 && b[1] == '\n' { + return b[2:], nil + } + } + return nil, NewParserError(b[:1], "expected newline but got %#U", b[0]) +} - b = p.parseWhitespace(b) - +// finishLine handles `ws [comment] (newline|eof)` after a top-level +// expression. If a comment is present and KeepComments is set, it is attached +// as the next sibling of the expression's root node. +func (p *Parser) finishLine(root int32, b []byte) ([]byte, error) { + b = skipWhitespace(b) if len(b) > 0 && b[0] == '#' { - cref, rest, err := p.parseComment(b) - if cref != invalidReference { - p.builder.Chain(ref, cref) + comment, rest, err := scanComment(b) + if err != nil { + return nil, err + } + if p.KeepComments { + h := p.push(Node{ + Kind: Comment, + Raw: p.Range(comment), + Data: comment, + }) + p.at(root).next = h } - return ref, rest, err + b = rest } - - return ref, b, nil + return consumeEOL(b) } -func (p *Parser) parseTable(b []byte) (reference, []byte, error) { - // table = std-table / array-table - if len(b) > 1 && b[1] == '[' { - return p.parseArrayTable(b) +// parseExprKeyval parses a top-level `key = value` expression, including its +// line termination. +func (p *Parser) parseExprKeyval(b []byte) ([]byte, error) { + root, rest, err := p.parseKeyval(b) + if err != nil { + return nil, err } - - return p.parseStdTable(b) + return p.finishLine(root, rest) } -func (p *Parser) parseArrayTable(b []byte) (reference, []byte, error) { - // array-table = array-table-open key array-table-close - // array-table-open = %x5B.5B ws ; [[ Double left square bracket - // array-table-close = ws %x5D.5D ; ]] Double right square bracket - ref := p.builder.Push(Node{ - Kind: ArrayTable, - }) - - b = b[2:] - b = p.parseWhitespace(b) - - k, b, err := p.parseKey(b) +// parseExprTable parses a `[table]` or `[[array table]]` expression, +// including its line termination. b starts at '['. +func (p *Parser) parseExprTable(b []byte) ([]byte, error) { + var root int32 + var err error + var rest []byte + if len(b) > 1 && b[1] == '[' { + root, rest, err = p.parseArrayTableHeader(b) + } else { + root, rest, err = p.parseTableHeader(b) + } if err != nil { - return ref, nil, err + return nil, err } + return p.finishLine(root, rest) +} - p.builder.AttachChild(ref, k) - b = p.parseWhitespace(b) +// parseTableHeader parses `[ ws key ws ]`. b starts at '['. +func (p *Parser) parseTableHeader(b []byte) (int32, []byte, error) { + root := p.push(Node{Kind: Table}) - b, err = expect(']', b) + first, b, err := p.parseKey(skipWhitespace(b[1:])) if err != nil { - return ref, nil, err + return 0, nil, err } + p.at(root).child = first - b, err = expect(']', b) - - return ref, b, err + if len(b) == 0 || b[0] != ']' { + return 0, nil, NewParserError(highlight1(b), "expected ']' to close table name") + } + return root, b[1:], nil } -func (p *Parser) parseStdTable(b []byte) (reference, []byte, error) { - // std-table = std-table-open key std-table-close - // std-table-open = %x5B ws ; [ Left square bracket - // std-table-close = ws %x5D ; ] Right square bracket - ref := p.builder.Push(Node{ - Kind: Table, - }) +// parseArrayTableHeader parses `[[ ws key ws ]]`. b starts at '[['. +func (p *Parser) parseArrayTableHeader(b []byte) (int32, []byte, error) { + root := p.push(Node{Kind: ArrayTable}) - b = b[1:] - b = p.parseWhitespace(b) - - key, b, err := p.parseKey(b) + first, b, err := p.parseKey(skipWhitespace(b[2:])) if err != nil { - return ref, nil, err + return 0, nil, err } + p.at(root).child = first - p.builder.AttachChild(ref, key) - - b = p.parseWhitespace(b) - - b, err = expect(']', b) - - return ref, b, err + if len(b) < 2 || b[0] != ']' || b[1] != ']' { + return 0, nil, NewParserError(highlight1(b), "expected ']]' to close array table name") + } + return root, b[2:], nil } -func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) { - // keyval = key keyval-sep val - // Track the start position for Raw range - startB := b +// parseKeyval parses `key keyval-sep val`. Returns the handle to the KeyValue +// node. +func (p *Parser) parseKeyval(b []byte) (int32, []byte, error) { + root := p.push(Node{Kind: KeyValue}) + start := b - ref := p.builder.Push(Node{ - Kind: KeyValue, - }) - - key, b, err := p.parseKey(b) + firstKey, b, err := p.parseKey(b) if err != nil { - return invalidReference, nil, err + return 0, nil, err } - // keyval-sep = ws %x3D ws ; = - - b = p.parseWhitespace(b) - - if len(b) == 0 { - return invalidReference, nil, NewParserError(startB[:len(startB)-len(b)], "expected = after a key, but the document ends there") + if len(b) == 0 || b[0] != '=' { + return 0, nil, NewParserError(highlight1(b), "expected '=' after key") } + b = skipWhitespace(b[1:]) - b, err = expect('=', b) + value, b, err := p.parseVal(b) if err != nil { - return invalidReference, nil, err + return 0, nil, err } - b = p.parseWhitespace(b) - - valRef, b, err := p.parseVal(b) - if err != nil { - return ref, b, err - } + p.at(root).child = value + p.at(value).next = firstKey + p.at(root).Raw = p.rangeFrom(start, b) + return root, b, nil +} - p.builder.Chain(valRef, key) - p.builder.AttachChild(ref, valRef) +// parseKey parses a potentially dotted key. It consumes the whitespace +// following the key, so that the caller can directly check for the next +// expected character ('=', ']', ...). Returns the handle of the first Key +// node; subsequent parts are chained via next. +func (p *Parser) parseKey(b []byte) (int32, []byte, error) { + var first, last int32 + for { + h, rest, err := p.parseSimpleKey(b) + if err != nil { + return 0, nil, err + } + if first == 0 { + first = h + } else { + p.at(last).next = h + } + last = h - // Set Raw to span the entire key-value expression. - // Access the node directly in the slice to avoid the write barrier - // that NodeAt's nodes-pointer setup would trigger. - p.builder.tree.nodes[ref].Raw = p.rangeOfToken(startB[:len(startB)-len(b)], b) + b = skipWhitespace(rest) + if len(b) > 0 && b[0] == '.' { + b = skipWhitespace(b[1:]) + continue + } + return first, b, nil + } +} - return ref, b, err +func isUnquotedKeyChar(c byte) bool { + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '-' || c == '_' } -//nolint:cyclop,funlen -func (p *Parser) parseVal(b []byte) (reference, []byte, error) { - // val = string / boolean / array / inline-table / date-time / float / integer - ref := invalidReference +// parseSimpleKey parses one key part: either a bare key or a quoted key. +func (p *Parser) parseSimpleKey(b []byte) (int32, []byte, error) { + raw, value, rest, err := p.scanSimpleKey(b) + if err != nil { + return 0, nil, err + } + h := p.push(Node{Kind: Key, Raw: p.Range(raw), Data: value}) + return h, rest, nil +} +// scanSimpleKey scans one key part (bare or quoted) without building an AST +// node. It returns the raw bytes, the decoded key value, and the rest of the +// input. +func (p *Parser) scanSimpleKey(b []byte) (raw, value, rest []byte, err error) { if len(b) == 0 { - return ref, nil, NewParserError(b, "expected value, not eof") + return nil, nil, nil, NewParserError(b, "expected key but reached end of input") } - var err error - c := b[0] - - switch c { + switch b[0] { + case '\'': + return p.parseLiteralString(b) case '"': - var raw []byte - var v []byte - if scanFollowsMultilineBasicStringDelimiter(b) { - raw, v, b, err = p.parseMultilineBasicString(b) - } else { - raw, v, b, err = p.parseBasicString(b) + return p.parseBasicString(b) + default: + i := 0 + for i < len(b) && isUnquotedKeyChar(b[i]) { + i++ } - - if err == nil { - ref = p.builder.Push(Node{ - Kind: String, - Raw: p.rangeOfToken(raw, b), - Data: v, - }) + if i == 0 { + return nil, nil, nil, NewParserError(b[:1], "invalid character at start of key: %#U", b[0]) } + return b[:i], b[:i], b[i:], nil + } +} - return ref, b, err - case '\'': - var raw []byte - var v []byte - if scanFollowsMultilineLiteralStringDelimiter(b) { - raw, v, b, err = p.parseMultilineLiteralString(b) +// parseVal parses a TOML value and returns the handle to its node. +func (p *Parser) parseVal(b []byte) (int32, []byte, error) { + if len(b) == 0 { + return 0, nil, NewParserError(b, "expected value, not end of input") + } + + c := b[0] + switch { + case c == '"': + var raw, value, rest []byte + var err error + if len(b) > 2 && b[1] == '"' && b[2] == '"' { + raw, value, rest, err = p.parseMultilineBasicString(b) } else { - raw, v, b, err = p.parseLiteralString(b) + raw, value, rest, err = p.parseBasicString(b) } - - if err == nil { - ref = p.builder.Push(Node{ - Kind: String, - Raw: p.rangeOfToken(raw, b), - Data: v, - }) + if err != nil { + return 0, nil, err } - - return ref, b, err - case 't': - if !scanFollowsTrue(b) { - return ref, nil, NewParserError(atmost(b, 4), "expected 'true'") + h := p.push(Node{Kind: String, Raw: p.Range(raw), Data: value}) + return h, rest, nil + case c == '\'': + var raw, value, rest []byte + var err error + if len(b) > 2 && b[1] == '\'' && b[2] == '\'' { + raw, value, rest, err = p.parseMultilineLiteralString(b) + } else { + raw, value, rest, err = p.parseLiteralString(b) } - - ref = p.builder.Push(Node{ - Kind: Bool, - Data: b[:4], - }) - - return ref, b[4:], nil - case 'f': - if !scanFollowsFalse(b) { - return ref, nil, NewParserError(atmost(b, 5), "expected 'false'") + if err != nil { + return 0, nil, err } - - ref = p.builder.Push(Node{ - Kind: Bool, - Data: b[:5], - }) - - return ref, b[5:], nil - case '[': + h := p.push(Node{Kind: String, Raw: p.Range(raw), Data: value}) + return h, rest, nil + case c == 't': + return p.parseKeyword(b, "true", Bool) + case c == 'f': + return p.parseKeyword(b, "false", Bool) + case c == 'i': + return p.parseKeyword(b, "inf", Float) + case c == 'n': + return p.parseKeyword(b, "nan", Float) + case c == '[': return p.parseValArray(b) - case '{': + case c == '{': return p.parseInlineTable(b) + case c == '+' || c == '-': + return p.parseIntOrFloat(b) + case c >= '0' && c <= '9': + if isDateTimeStart(b) { + return p.parseDateTime(b) + } + return p.parseIntOrFloat(b) default: - return p.parseIntOrFloatOrDateTime(b) - } -} - -func atmost(b []byte, n int) []byte { - if n >= len(b) { - return b + return 0, nil, NewParserError(b[:1], "unexpected character %#U at start of value", c) } - - return b[:n] } -func (p *Parser) parseLiteralString(b []byte) ([]byte, []byte, []byte, error) { - v, rest, err := scanLiteralString(b) - if err != nil { - return nil, nil, nil, err +// scanScalar scans a single scalar TOML value (string, integer, float, +// boolean, or date/time) without building any AST node. It returns the kind of +// the value, its raw bytes, its decoded value bytes (for strings: quotes +// removed and escapes resolved; identical to raw for the other kinds), and the +// rest of the input. Arrays and inline tables are not scalars and produce an +// error: use parseValue for those. +// +// It is exposed to the root toml package through internal/parserbridge for the +// fused generic-decode path; it is not part of the public API. +func (p *Parser) scanScalar(b []byte) (kind Kind, raw, value, rest []byte, err error) { + if len(b) == 0 { + return Invalid, nil, nil, nil, NewParserError(b, "expected value, not end of input") } - return v, v[1 : len(v)-1], rest, nil -} - -func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) { - // inline-table = inline-table-open [ inline-table-keyvals ] inline-table-close - // inline-table-open = %x7B ws ; { - // inline-table-close = ws %x7D ; } - // inline-table-sep = ws %x2C ws ; , Comma - // inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ] - parent := p.builder.Push(Node{ - Kind: InlineTable, - Raw: p.rangeOfToken(b[:1], b[1:]), - }) - - first := true - - var child reference - - b = b[1:] - - var err error - - for len(b) > 0 { - previousB := b - b = p.parseWhitespace(b) - - if len(b) == 0 { - return parent, nil, NewParserError(previousB[:1], "inline table is incomplete") + c := b[0] + switch { + case c == '"': + if len(b) > 2 && b[1] == '"' && b[2] == '"' { + raw, value, rest, err = p.parseMultilineBasicString(b) + } else { + raw, value, rest, err = p.parseBasicString(b) } - - if b[0] == '}' { - break + return String, raw, value, rest, err + case c == '\'': + if len(b) > 2 && b[1] == '\'' && b[2] == '\'' { + raw, value, rest, err = p.parseMultilineLiteralString(b) + } else { + raw, value, rest, err = p.parseLiteralString(b) } - - if !first { - b, err = expect(',', b) - if err != nil { - return parent, nil, err - } - b = p.parseWhitespace(b) + return String, raw, value, rest, err + case c == 't': + return scanKeyword(b, "true", Bool) + case c == 'f': + return scanKeyword(b, "false", Bool) + case c == 'i': + return scanKeyword(b, "inf", Float) + case c == 'n': + return scanKeyword(b, "nan", Float) + case c == '+' || c == '-': + return scanIntOrFloat(b) + case c >= '0' && c <= '9': + if isDateTimeStart(b) { + return scanDateTime(b) } + return scanIntOrFloat(b) + default: + return Invalid, nil, nil, nil, NewParserError(b[:1], "unexpected character %#U at start of value", c) + } +} - var kv reference - - kv, b, err = p.parseKeyval(b) +// scanKey scans a potentially dotted key without building AST nodes, +// appending the decoded value of each part to dst (pass dst[:0] to reuse a +// buffer). It consumes the whitespace following the key, so the caller can +// directly check for the next expected character ('=', ']', ...). It returns +// the parts, the raw bytes spanning the whole key (from the first part to the +// end of the last one, excluding trailing whitespace, usable as an error +// highlight), the rest of the input, and any error. +// +// It is exposed to the root toml package through internal/parserbridge for the +// fused generic-decode path; it is not part of the public API. +func (p *Parser) scanKey(b []byte, dst [][]byte) (parts [][]byte, raw, rest []byte, err error) { + parts = dst + start := b + for { + _, value, r, err := p.scanSimpleKey(b) if err != nil { - return parent, nil, err + return nil, nil, nil, err } + parts = append(parts, value) - if first { - p.builder.AttachChild(parent, kv) - } else { - p.builder.Chain(child, kv) + // r points just past the current part: the key spans from start to + // here, ignoring any whitespace that follows. + raw = start[:len(start)-len(r)] + + b = skipWhitespace(r) + if len(b) > 0 && b[0] == '.' { + b = skipWhitespace(b[1:]) + continue } - child = kv + return parts, raw, b, nil + } +} - first = false +// parseValue parses a single TOML value, which may be an array or inline table, +// into the parser's arena. It returns the root node of the value and the rest +// of the input. It resets the arena, so any node returned by a previous call to +// parseValue, Expression, or NextExpression is invalidated. +// +// It is exposed to the root toml package through internal/parserbridge for the +// fused generic-decode path; it is not part of the public API. +func (p *Parser) parseValue(b []byte) (*Node, []byte, error) { + p.nodes = p.nodes[:0] + h, rest, err := p.parseVal(b) + if err != nil { + return nil, nil, err } + return &p.nodes[h-1], rest, nil +} - rest, err := expect('}', b) +func (p *Parser) parseKeyword(b []byte, kw string, kind Kind) (int32, []byte, error) { + k, raw, _, rest, err := scanKeyword(b, kw, kind) + if err != nil { + return 0, nil, err + } + h := p.push(Node{Kind: k, Raw: p.Range(raw), Data: raw}) + return h, rest, nil +} - return parent, rest, err +// scanKeyword scans a keyword value (true, false, inf, nan) without building +// an AST node. raw and value are identical (the keyword bytes). +func scanKeyword(b []byte, kw string, kind Kind) (Kind, []byte, []byte, []byte, error) { + if len(b) < len(kw) || string(b[:len(kw)]) != kw { + n := len(kw) + if len(b) < n { + n = len(b) + } + return Invalid, nil, nil, nil, NewParserError(b[:n], "expected keyword %q", kw) + } + return kind, b[:len(kw)], b[:len(kw)], b[len(kw):], nil } -//nolint:funlen,cyclop -func (p *Parser) parseValArray(b []byte) (reference, []byte, error) { - // array = array-open [ array-values ] ws-comment-newline array-close - // array-open = %x5B ; [ - // array-close = %x5D ; ] - // array-values = ws-comment-newline val ws-comment-newline array-sep array-values - // array-values =/ ws-comment-newline val ws-comment-newline [ array-sep ] - // array-sep = %x2C ; , Comma - // ws-comment-newline = *( wschar / [ comment ] newline ) - arrayStart := b +// parseValArray parses an array value. b starts at '['. +func (p *Parser) parseValArray(b []byte) (int32, []byte, error) { + arr := p.push(Node{Kind: Array}) b = b[1:] - parent := p.builder.Push(Node{ - Kind: Array, - }) - - // First indicates whether the parser is looking for the first element - // (non-comment) of the array. - first := true - - lastChild := invalidReference - - addChild := func(valueRef reference) { - if lastChild == invalidReference { - p.builder.AttachChild(parent, valueRef) + var lastChild int32 + appendChild := func(h int32) { + if lastChild == 0 { + p.at(arr).child = h } else { - p.builder.Chain(lastChild, valueRef) + p.at(lastChild).next = h } - lastChild = valueRef + lastChild = h } - var err error - for len(b) > 0 { - var cref reference - cref, b, err = p.parseOptionalWhitespaceCommentNewline(b) - if err != nil { - return parent, nil, err - } - - if cref != invalidReference { - addChild(cref) - } + // Comments inside the array are attached as follows: the first comment + // of a "run" (consecutive comments with no value in between) becomes a + // child of the array, interleaved with values; subsequent comments of the + // run are attached as children of the first one. + var runFirst, runLast int32 + // afterValue is true when a value has been parsed and a comma (or the + // closing bracket) is expected before the next one. + afterValue := false + for { + b = skipWhitespace(b) if len(b) == 0 { - return parent, nil, NewParserError(arrayStart[:1], "array is incomplete") + return 0, nil, NewParserError(b, "array is incomplete") } - if b[0] == ']' { - break - } - - if b[0] == ',' { - if first { - return parent, nil, NewParserError(b[0:1], "array cannot start with comma") - } + switch b[0] { + case ']': + return arr, b[1:], nil + case '\n': b = b[1:] - - cref, b, err = p.parseOptionalWhitespaceCommentNewline(b) + continue + case '\r': + if len(b) > 1 && b[1] == '\n' { + b = b[2:] + continue + } + return 0, nil, NewParserError(b[:1], "expected newline but got %#U", b[0]) + case '#': + comment, rest, err := scanComment(b) if err != nil { - return parent, nil, err + return 0, nil, err } - if cref != invalidReference { - addChild(cref) + if p.KeepComments { + h := p.push(Node{Kind: Comment, Raw: p.Range(comment), Data: comment}) + switch { + case runFirst == 0: + appendChild(h) + runFirst = h + case runLast == runFirst: + p.at(runFirst).child = h + default: + p.at(runLast).next = h + } + runLast = h } - } else if !first { - return parent, nil, NewParserError(b[0:1], "array elements must be separated by commas") - } - - // TOML allows trailing commas in arrays. - if len(b) > 0 && b[0] == ']' { - break - } - - var valueRef reference - valueRef, b, err = p.parseVal(b) - if err != nil { - return parent, nil, err - } - - addChild(valueRef) - - cref, b, err = p.parseOptionalWhitespaceCommentNewline(b) - if err != nil { - return parent, nil, err - } - if cref != invalidReference { - addChild(cref) + b = rest + continue + case ',': + if !afterValue { + return 0, nil, NewParserError(b[:1], "expected value but got %#U", b[0]) + } + afterValue = false + b = b[1:] + continue + default: + if afterValue { + return 0, nil, NewParserError(b[:1], "expected ',' or ']' after array value") + } + h, rest, err := p.parseVal(b) + if err != nil { + return 0, nil, err + } + appendChild(h) + afterValue = true + runFirst, runLast = 0, 0 + b = rest + continue } - - first = false } - - rest, err := expect(']', b) - - return parent, rest, err } -func (p *Parser) parseOptionalWhitespaceCommentNewline(b []byte) (reference, []byte, error) { - rootCommentRef := invalidReference - latestCommentRef := invalidReference +// parseInlineTable parses an inline table value. b starts at '{'. +// +// Per TOML v1.1.0, inline tables may span multiple lines (whitespace, +// comments and newlines are allowed between elements) and may contain a +// trailing comma. +func (p *Parser) parseInlineTable(b []byte) (int32, []byte, error) { + tbl := p.push(Node{Kind: InlineTable, Raw: p.Range(b[:1])}) + b = b[1:] - addComment := func(ref reference) { - switch { - case rootCommentRef == invalidReference: - rootCommentRef = ref - case latestCommentRef == invalidReference: - p.builder.AttachChild(rootCommentRef, ref) - latestCommentRef = ref - default: - p.builder.Chain(latestCommentRef, ref) - latestCommentRef = ref + var lastChild int32 + appendChild := func(h int32) { + if lastChild == 0 { + p.at(tbl).child = h + } else { + p.at(lastChild).next = h } + lastChild = h } - for len(b) > 0 { - var err error - b = p.parseWhitespace(b) - - if len(b) > 0 && b[0] == '#' { - var ref reference - ref, b, err = p.parseComment(b) - if err != nil { - return invalidReference, nil, err - } - if ref != invalidReference { - addComment(ref) - } - } + // Comments are attached as in arrays: the first comment of a "run" + // (consecutive comments with no key-value in between) becomes a child of + // the table, interleaved with key-values; subsequent comments of the run + // hang off the first one. + var runFirst, runLast int32 + // afterValue is true when a key-value has been parsed and a comma (or the + // closing brace) is expected before the next one. + afterValue := false + for { + b = skipWhitespace(b) if len(b) == 0 { - break + return 0, nil, NewParserError(b, "inline table is incomplete") } - if b[0] == '\n' || b[0] == '\r' { - b, err = p.parseNewline(b) + switch b[0] { + case '}': + return tbl, b[1:], nil + case '\n': + b = b[1:] + continue + case '\r': + if len(b) > 1 && b[1] == '\n' { + b = b[2:] + continue + } + return 0, nil, NewParserError(b[:1], "expected newline but got %#U", b[0]) + case '#': + comment, rest, err := scanComment(b) if err != nil { - return invalidReference, nil, err + return 0, nil, err } - } else { - break + if p.KeepComments { + h := p.push(Node{Kind: Comment, Raw: p.Range(comment), Data: comment}) + switch { + case runFirst == 0: + appendChild(h) + runFirst = h + case runLast == runFirst: + p.at(runFirst).child = h + default: + p.at(runLast).next = h + } + runLast = h + } + b = rest + continue + case ',': + if !afterValue { + return 0, nil, NewParserError(b[:1], "unexpected comma in inline table") + } + afterValue = false + b = b[1:] + continue + default: + if afterValue { + return 0, nil, NewParserError(b[:1], "expected ',' or '}' after inline table key-value") + } + h, rest, err := p.parseKeyval(b) + if err != nil { + return 0, nil, err + } + appendChild(h) + afterValue = true + runFirst, runLast = 0, 0 + b = rest + continue } } - - return rootCommentRef, b, nil } -func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) { - token, rest, err := scanMultilineLiteralString(b) - if err != nil { - return nil, nil, nil, err - } +func isDigit(c byte) bool { + return c >= '0' && c <= '9' +} - i := 3 +func isHexDigit(c byte) bool { + return isDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') +} - // skip the immediate new line - if token[i] == '\n' { - i++ - } else if token[i] == '\r' && token[i+1] == '\n' { - i += 2 +// isDateTimeStart reports whether b looks like the start of a date or time +// value instead of a number. Values starting with two digits followed by a +// colon are times; values starting with four digits followed by a dash are +// dates. +func isDateTimeStart(b []byte) bool { + if len(b) >= 3 && isDigit(b[1]) && b[2] == ':' { + return true } - - return token, token[i : len(token)-3], rest, err + if len(b) >= 5 && isDigit(b[1]) && isDigit(b[2]) && isDigit(b[3]) && b[4] == '-' { + return true + } + return false } -//nolint:funlen,gocognit,cyclop -func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, error) { - // ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body - // ml-basic-string-delim - // ml-basic-string-delim = 3quotation-mark - // ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ] - // - // mlb-content = mlb-char / newline / mlb-escaped-nl - // mlb-char = mlb-unescaped / escaped - // mlb-quotes = 1*2quotation-mark - // mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii - // mlb-escaped-nl = escape ws newline *( wschar / newline ) - token, escaped, rest, err := scanMultilineBasicString(b) +// expectDigits checks that the n first bytes of b are digits. +// parseDateTime parses date and/or time values. b starts with a digit. +// +// The parser is lenient: it scans the characters that can be part of a date +// and/or time value to delimit and classify the token, but leaves the +// validation of its contents to the document consumer. This keeps the value +// in one piece, so that errors about its content can point at the right +// place. +func (p *Parser) parseDateTime(b []byte) (int32, []byte, error) { + kind, raw, _, rest, err := scanDateTime(b) if err != nil { - return nil, nil, nil, err - } - - i := 3 - - // skip the immediate new line - if token[i] == '\n' { - i++ - } else if token[i] == '\r' && token[i+1] == '\n' { - i += 2 + return 0, nil, err } + h := p.push(Node{Kind: kind, Raw: p.Range(raw), Data: raw}) + return h, rest, nil +} - // fast path - startIdx := i - endIdx := len(token) - len(`"""`) - - if !escaped { - str := token[startIdx:endIdx] - highlight := characters.Utf8TomlValidAlreadyEscaped(str) - if len(highlight) == 0 { - return token, str, rest, nil +// scanDateTime classifies and delimits a date/time value without building an +// AST node. raw and value are identical (the token bytes). +func scanDateTime(b []byte) (Kind, []byte, []byte, []byte, error) { + // Greedily scan the characters that may compose a date/time value. A + // space is part of the value only when it serves as the delimiter + // between the date and the time, which is approximated by requiring a + // digit right after it. + i := 0 + delim := -1 + for i < len(b) { + c := b[i] + if isDigit(c) || c == ':' || c == '-' || c == '+' || c == '.' || c == 'Z' || c == 'z' { + i++ + continue + } + if c == 'T' || c == 't' || (c == ' ' && i+1 < len(b) && isDigit(b[i+1])) { + if delim < 0 { + delim = i + } + i++ + continue } - return nil, nil, nil, NewParserError(highlight, "invalid UTF-8") + break } + tok := b[:i] - var builder bytes.Buffer - - // The scanner ensures that the token starts and ends with quotes and that - // escapes are balanced. - for i < len(token)-3 { - c := token[i] - - //nolint:nestif - if c == '\\' { - // When the last non-whitespace character on a line is an unescaped \, - // it will be trimmed along with all whitespace (including newlines) up - // to the next non-whitespace character or closing delimiter. + var kind Kind + switch { + case tok[2] == ':': + kind = LocalTime + case delim < 0: + kind = LocalDate + case bytes.ContainsAny(tok[delim+1:], "Zz+-"): + kind = DateTime + default: + kind = LocalDateTime + } - isLastNonWhitespaceOnLine := false - j := 1 - findEOLLoop: - for ; j < len(token)-3-i; j++ { - switch token[i+j] { - case ' ', '\t': - continue - case '\r': - if token[i+j+1] == '\n' { - continue - } - case '\n': - isLastNonWhitespaceOnLine = true - } - break findEOLLoop - } - if isLastNonWhitespaceOnLine { - i += j - for ; i < len(token)-3; i++ { - c := token[i] - if c != '\n' && c != '\r' && c != ' ' && c != '\t' { - i-- - break - } - } - i++ - continue - } + return kind, tok, tok, b[i:], nil +} - // handle escaping +// scanDigitsWithUnderscores scans a run of digits potentially separated by +// underscores. b starts right after the first digit of the run. isInRange +// selects the kind of digits. Returns the index after the run. +func scanDigitsWithUnderscores(b []byte, i int, isInRange func(byte) bool) (int, error) { + for i < len(b) { + c := b[i] + if isInRange(c) { i++ - c = token[i] - - switch c { - case '"', '\\': - builder.WriteByte(c) - case 'b': - builder.WriteByte('\b') - case 'f': - builder.WriteByte('\f') - case 'n': - builder.WriteByte('\n') - case 'r': - builder.WriteByte('\r') - case 't': - builder.WriteByte('\t') - case 'e': - builder.WriteByte(0x1B) - case 'u': - x, err := hexToRune(atmost(token[i+1:], 4), 4) - if err != nil { - return nil, nil, nil, err - } - builder.WriteRune(x) - i += 4 - case 'U': - x, err := hexToRune(atmost(token[i+1:], 8), 8) - if err != nil { - return nil, nil, nil, err + continue + } + if c == '_' { + if i+1 >= len(b) || !isInRange(b[i+1]) { + end := i + 2 + if end > len(b) { + end = len(b) } - - builder.WriteRune(x) - i += 8 - default: - return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c) - } - i++ - } else { - size := characters.Utf8ValidNext(token[i:]) - if size == 0 { - return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c) + return 0, NewParserError(b[i:end], "number must have at least one digit between underscores") } - builder.Write(token[i : i+size]) - i += size + i += 2 + continue } + break } - - return token, builder.Bytes(), rest, nil + return i, nil } -func (p *Parser) parseKey(b []byte) (reference, []byte, error) { - // key = simple-key / dotted-key - // simple-key = quoted-key / unquoted-key - // - // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ - // quoted-key = basic-string / literal-string - // dotted-key = simple-key 1*( dot-sep simple-key ) - // - // dot-sep = ws %x2E ws ; . Period - raw, key, b, err := p.parseSimpleKey(b) +// parseIntOrFloat parses integer and float values, including the special +// values inf and nan with an optional sign. +func (p *Parser) parseIntOrFloat(b []byte) (int32, []byte, error) { + kind, raw, _, rest, err := scanIntOrFloat(b) if err != nil { - return invalidReference, nil, err + return 0, nil, err } + h := p.push(Node{Kind: kind, Raw: p.Range(raw), Data: raw}) + return h, rest, nil +} - ref := p.builder.Push(Node{ - Kind: Key, - Raw: p.rangeOfToken(raw, b), - Data: key, - }) - - for { - b = p.parseWhitespace(b) - if len(b) > 0 && b[0] == '.' { - b = p.parseWhitespace(b[1:]) - - raw, key, b, err = p.parseSimpleKey(b) - if err != nil { - return ref, nil, err - } +// scanIntOrFloat delimits and classifies an integer or float value (including +// the special floats inf and nan with an optional sign) without building an +// AST node. raw and value are identical (the token bytes). +func scanIntOrFloat(b []byte) (Kind, []byte, []byte, []byte, error) { + i := 0 + if b[i] == '+' || b[i] == '-' { + i++ + } + if i >= len(b) { + return Invalid, nil, nil, nil, NewParserError(b, "expected number after sign") + } - p.builder.PushAndChain(Node{ - Kind: Key, - Raw: p.rangeOfToken(raw, b), - Data: key, - }) - } else { - break + // special floats + if b[i] == 'i' || b[i] == 'n' { + kw := "inf" + if b[i] == 'n' { + kw = "nan" + } + if len(b) < i+3 || string(b[i:i+3]) != kw { + return Invalid, nil, nil, nil, NewParserError(b[i:i+1], "expected %q", kw) } + i += 3 + return Float, b[:i], b[:i], b[i:], nil } - return ref, b, nil -} - -func (p *Parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) { - if len(b) == 0 { - return nil, nil, nil, NewParserError(b, "expected key but found none") + if !isDigit(b[i]) { + return Invalid, nil, nil, nil, NewParserError(b[i:i+1], "expected digit but got %#U", b[i]) } - // simple-key = quoted-key / unquoted-key - // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ - // quoted-key = basic-string / literal-string - switch { - case b[0] == '\'': - return p.parseLiteralString(b) - case b[0] == '"': - return p.parseBasicString(b) - case isUnquotedKeyChar(b[0]): - key, rest = scanUnquotedKey(b) - return key, key, rest, nil - default: - return nil, nil, nil, NewParserError(b[0:1], "invalid character at start of key: %c", b[0]) + // radix prefixes + if b[i] == '0' && i+1 < len(b) && (b[i+1] == 'x' || b[i+1] == 'o' || b[i+1] == 'b') { + if i != 0 { + return Invalid, nil, nil, nil, NewParserError(b[:2], "sign is not allowed on numbers with a radix prefix") + } + var isInRange func(byte) bool + switch b[1] { + case 'x': + isInRange = isHexDigit + case 'o': + isInRange = func(c byte) bool { return c >= '0' && c <= '7' } + case 'b': + isInRange = func(c byte) bool { return c == '0' || c == '1' } + } + i = 2 + if i >= len(b) || !isInRange(b[i]) { + return Invalid, nil, nil, nil, NewParserError(b[:2], "radix prefix must be followed by at least one digit") + } + i++ + var err error + i, err = scanDigitsWithUnderscores(b, i, isInRange) + if err != nil { + return Invalid, nil, nil, nil, err + } + return Integer, b[:i], b[:i], b[i:], nil } -} -//nolint:funlen,cyclop -func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) { - // basic-string = quotation-mark *basic-char quotation-mark - // quotation-mark = %x22 ; " - // basic-char = basic-unescaped / escaped - // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii - // escaped = escape escape-seq-char - // escape-seq-char = %x22 ; " quotation mark U+0022 - // escape-seq-char =/ %x5C ; \ reverse solidus U+005C - // escape-seq-char =/ %x62 ; b backspace U+0008 - // escape-seq-char =/ %x66 ; f form feed U+000C - // escape-seq-char =/ %x6E ; n line feed U+000A - // escape-seq-char =/ %x72 ; r carriage return U+000D - // escape-seq-char =/ %x74 ; t tab U+0009 - // escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX - // escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX - token, escaped, rest, err := scanBasicString(b) + // decimal integer part + leadingZero := b[i] == '0' + digitsStart := i + i++ + var err error + i, err = scanDigitsWithUnderscores(b, i, isDigit) if err != nil { - return nil, nil, nil, err + return Invalid, nil, nil, nil, err + } + if leadingZero && i > digitsStart+1 { + return Invalid, nil, nil, nil, NewParserError(b[digitsStart:digitsStart+2], "integers cannot have leading zeroes") } - startIdx := len(`"`) - endIdx := len(token) - len(`"`) + kind := Integer - // Fast path. If there is no escape sequence, the string should just be - // an UTF-8 encoded string, which is the same as Go. In that case, - // validate the string and return a direct reference to the buffer. - if !escaped { - str := token[startIdx:endIdx] - highlight := characters.Utf8TomlValidAlreadyEscaped(str) - if len(highlight) == 0 { - return token, str, rest, nil + // fractional part + if i < len(b) && b[i] == '.' { + i++ + if i >= len(b) || !isDigit(b[i]) { + return Invalid, nil, nil, nil, NewParserError(highlight1(b[i:]), "decimal point must be followed by a digit") + } + i++ + i, err = scanDigitsWithUnderscores(b, i, isDigit) + if err != nil { + return Invalid, nil, nil, nil, err } - return nil, nil, nil, NewParserError(highlight, "invalid UTF-8") + kind = Float } - i := startIdx - - var builder bytes.Buffer - - // The scanner ensures that the token starts and ends with quotes and that - // escapes are balanced. - for i < len(token)-1 { - c := token[i] - if c == '\\' { + // exponent + if i < len(b) && (b[i] == 'e' || b[i] == 'E') { + i++ + if i < len(b) && (b[i] == '+' || b[i] == '-') { i++ - c = token[i] - - switch c { - case '"', '\\': - builder.WriteByte(c) - case 'b': - builder.WriteByte('\b') - case 'f': - builder.WriteByte('\f') - case 'n': - builder.WriteByte('\n') - case 'r': - builder.WriteByte('\r') - case 't': - builder.WriteByte('\t') - case 'e': - builder.WriteByte(0x1B) - case 'u': - x, err := hexToRune(token[i+1:len(token)-1], 4) - if err != nil { - return nil, nil, nil, err - } - - builder.WriteRune(x) - i += 4 - case 'U': - x, err := hexToRune(token[i+1:len(token)-1], 8) - if err != nil { - return nil, nil, nil, err - } + } + if i >= len(b) || !isDigit(b[i]) { + return Invalid, nil, nil, nil, NewParserError(highlight1(b[i:]), "exponent must contain at least one digit") + } + i++ + i, err = scanDigitsWithUnderscores(b, i, isDigit) + if err != nil { + return Invalid, nil, nil, nil, err + } + kind = Float + } - builder.WriteRune(x) - i += 8 - default: - return nil, nil, nil, NewParserError(token[i:i+1], "invalid escaped character %#U", c) - } - i++ - } else { - size := characters.Utf8ValidNext(token[i:]) - if size == 0 { - return nil, nil, nil, NewParserError(token[i:i+1], "invalid character %#U", c) - } - builder.Write(token[i : i+size]) - i += size + // A letter right after the number means it was meant to be a string that + // was left unquoted (e.g. "20s"). Report that instead of the misleading + // "expected newline" raised later (issue #413). + if i < len(b) { + if c := b[i]; (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') { + return Invalid, nil, nil, nil, NewParserError(b[i:i+1], "strings must be quoted") } } - return token, builder.Bytes(), rest, nil + return kind, b[:i], b[:i], b[i:], nil } -func hexToRune(b []byte, length int) (rune, error) { - if len(b) < length { - return -1, NewParserError(b, "unicode point needs %d character, not %d", length, len(b)) +// highlight1 returns a 1-byte highlight at the start of b, or b itself if it +// is empty. +func highlight1(b []byte) []byte { + if len(b) > 0 { + return b[:1] } - b = b[:length] + return b +} - var r uint32 - for i, c := range b { - var d uint32 - switch { - case '0' <= c && c <= '9': - d = uint32(c - '0') - case 'a' <= c && c <= 'f': - d = uint32(c - 'a' + 10) - case 'A' <= c && c <= 'F': - d = uint32(c - 'A' + 10) - default: - return -1, NewParserError(b[i:i+1], "non-hex character") - } - r = r*16 + d +func skipWhitespace(b []byte) []byte { + for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') { + b = b[1:] } + return b +} - if r > unicode.MaxRune || 0xD800 <= r && r < 0xE000 { - return -1, NewParserError(b, "escape sequence is invalid Unicode code point") - } +// Word-at-a-time byte scanning helpers. These detect, within an 8-byte word, +// the presence of bytes that need special handling, so that runs of plain +// ASCII characters can be skipped 8 bytes at a time. +const ( + lsb = 0x0101010101010101 + msb = 0x8080808080808080 +) - return rune(r), nil +// hasByteBelow reports whether any byte of the word x is strictly below n. +// Only meaningful when combined with a check that no byte has its high bit +// set. +func hasByteBelow(x uint64, n uint64) uint64 { + return (x - n*lsb) & ^x & msb } -func (p *Parser) parseWhitespace(b []byte) []byte { - // ws = *wschar - // wschar = %x20 ; Space - // wschar =/ %x09 ; Horizontal tab - _, rest := scanWhitespace(b) - - return rest +// hasByteEqual reports whether any byte of the word x equals c. Only +// meaningful for bytes without their high bit set. +func hasByteEqual(x uint64, c uint64) uint64 { + y := x ^ (c * lsb) + return (y - lsb) & ^y & msb } -//nolint:cyclop -func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) { - switch b[0] { - case 'i': - if !scanFollowsInf(b) { - return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'inf'") +// scanComment parses a comment, starting at the '#' character. It returns the +// comment bytes (including '#', excluding the line ending) and the rest of +// the input. +func scanComment(b []byte) ([]byte, []byte, error) { + i := 1 + for i < len(b) { + // Fast path: skip 8 bytes at a time as long as they are all plain + // printable ASCII. + for i+8 <= len(b) { + x := binary.LittleEndian.Uint64(b[i:]) + if (x&msb)|hasByteBelow(x, 0x20)|hasByteEqual(x, 0x7f) != 0 { + break + } + i += 8 } - - return p.builder.Push(Node{ - Kind: Float, - Data: b[:3], - Raw: p.rangeOfToken(b[:3], b[3:]), - }), b[3:], nil - case 'n': - if !scanFollowsNan(b) { - return invalidReference, nil, NewParserError(atmost(b, 3), "expected 'nan'") + if i >= len(b) { + break } - return p.builder.Push(Node{ - Kind: Float, - Data: b[:3], - Raw: p.rangeOfToken(b[:3], b[3:]), - }), b[3:], nil - case '+', '-': - return p.scanIntOrFloat(b) - } - - if len(b) < 3 { - return p.scanIntOrFloat(b) - } - - s := 5 - if len(b) < s { - s = len(b) - } - - for idx, c := range b[:s] { - if isDigit(c) { + c := b[i] + if c >= 0x80 { + var ok bool + i, ok = scanUtf8Run(b, i) + if !ok { + return nil, nil, NewParserError(b[i:i+1], "invalid UTF-8 character in comment") + } continue } + switch { + case c >= 0x20 && c < 0x7f: + i++ + case c == '\n': + return b[:i], b[i:], nil + case c == '\r': + if i+1 < len(b) && b[i+1] == '\n' { + return b[:i], b[i:], nil + } + return nil, nil, NewParserError(b[i:i+1], "carriage returns are not allowed in comments") + case c == '\t': + i++ + default: + return nil, nil, NewParserError(b[i:i+1], "control characters are not allowed in comments") + } + } + return b[:i], b[i:], nil +} - if idx == 2 && c == ':' || (idx == 4 && c == '-') { - return p.scanDateTime(b) +// parseLiteralString parses a single-line literal string, starting at the +// opening quote. Returns the raw bytes (with quotes), the string value +// (without quotes) and the rest of the input. +func (p *Parser) parseLiteralString(b []byte) ([]byte, []byte, []byte, error) { + i := 1 + for { + // Fast path over plain ASCII. + for i+8 <= len(b) { + x := binary.LittleEndian.Uint64(b[i:]) + if (x&msb)|hasByteBelow(x, 0x20)|hasByteEqual(x, '\'')|hasByteEqual(x, 0x7f) != 0 { + break + } + i += 8 + } + if i >= len(b) { + return nil, nil, nil, NewParserError(b[len(b):], "unterminated literal string") } - break + c := b[i] + switch { + case c == '\'': + return b[:i+1], b[1:i], b[i+1:], nil + case c >= 0x20 && c < 0x7f: + i++ + case c == '\t': + i++ + case c == '\n' || c == '\r': + return nil, nil, nil, NewParserError(b[i:i+1], "literal strings cannot have new lines") + case c < 0x80: + return nil, nil, nil, NewParserError(b[i:i+1], "literal strings cannot have control characters") + default: + var ok bool + i, ok = scanUtf8Run(b, i) + if !ok { + return nil, nil, nil, NewParserError(b[i:i+1], "invalid UTF-8 character in literal string") + } + } } - - return p.scanIntOrFloat(b) } -func (p *Parser) scanDateTime(b []byte) (reference, []byte, error) { - // scans for contiguous characters in [0-9T:Z.+-], and up to one space if - // followed by a digit. - hasDate := false - hasTime := false - hasTz := false - seenSpace := false +// parseMultilineLiteralString parses a multi-line literal string, starting at +// the opening triple quote. +func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, error) { + i := 3 + // trim the newline right after the opening delimiter + if i < len(b) && b[i] == '\n' { + i++ + } else if i+1 < len(b) && b[i] == '\r' && b[i+1] == '\n' { + i += 2 + } + contentStart := i - i := 0 -byteLoop: - for ; i < len(b); i++ { + for i < len(b) { c := b[i] - switch { - case isDigit(c): - case c == '-': - hasDate = true - const minOffsetOfTz = 8 - if i >= minOffsetOfTz { - hasTz = true + case c == '\'': + // count consecutive quotes + j := i + for j < len(b) && b[j] == '\'' { + j++ } - case c == 'T' || c == 't' || c == ':' || c == '.': - hasTime = true - case c == '+' || c == 'Z' || c == 'z': - hasTz = true - case c == ' ': - if !seenSpace && i+1 < len(b) && isDigit(b[i+1]) { - i += 2 - // Avoid reaching past the end of the document in case the time - // is malformed. See TestIssue585. - if i >= len(b) { - i-- + n := j - i + if n >= 3 { + if n > 5 { + return nil, nil, nil, NewParserError(b[i:j], "too many quotes at the end of a multiline literal string") } - seenSpace = true - hasTime = true - } else { - break byteLoop + // n-3 quotes belong to the content; the last 3 close the + // string. + contentEnd := i + n - 3 + return b[:j], b[contentStart:contentEnd], b[j:], nil + } + i = j + case c >= 0x20 && c < 0x7f: + i++ + case c == '\t' || c == '\n': + i++ + case c == '\r': + if i+1 < len(b) && b[i+1] == '\n' { + i += 2 + continue } + return nil, nil, nil, NewParserError(b[i:i+1], "carriage returns must be followed by a newline character") + case c < 0x80: + return nil, nil, nil, NewParserError(b[i:i+1], "multiline literal strings cannot have control characters") default: - break byteLoop + var ok bool + i, ok = scanUtf8Run(b, i) + if !ok { + return nil, nil, nil, NewParserError(b[i:i+1], "invalid UTF-8 character in multiline literal string") + } } } + return nil, nil, nil, NewParserError(b[len(b):], "multiline literal string not terminated by '''") +} - var kind Kind +// parseBasicString parses a single-line basic string, starting at the opening +// quote. The value is a subslice of the input if the string contains no +// escape sequence, or a new allocation otherwise. +func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) { + i := 1 + // First pass: handle strings without escape sequences without allocating. + for { + for i+8 <= len(b) { + x := binary.LittleEndian.Uint64(b[i:]) + if (x&msb)|hasByteBelow(x, 0x20)|hasByteEqual(x, '"')|hasByteEqual(x, '\\')|hasByteEqual(x, 0x7f) != 0 { + break + } + i += 8 + } + if i >= len(b) { + return nil, nil, nil, NewParserError(b[len(b):], "unterminated basic string") + } - if hasTime { - if hasDate { - if hasTz { - kind = DateTime - } else { - kind = LocalDateTime + c := b[i] + switch { + case c == '"': + return b[:i+1], b[1:i], b[i+1:], nil + case c == '\\': + // switch to the escape-aware parser, copying what has been + // scanned so far + return p.parseBasicStringEscaped(b, i) + case c >= 0x20 && c < 0x7f: + i++ + case c == '\t': + i++ + case c == '\n' || c == '\r': + return nil, nil, nil, NewParserError(b[i:i+1], "basic strings cannot have new lines") + case c < 0x80: + return nil, nil, nil, NewParserError(b[i:i+1], "basic strings cannot have control characters") + default: + var ok bool + i, ok = scanUtf8Run(b, i) + if !ok { + return nil, nil, nil, NewParserError(b[i:i+1], "invalid UTF-8 character in basic string") } - } else { - kind = LocalTime } - } else { - kind = LocalDate } - - return p.builder.Push(Node{ - Kind: kind, - Data: b[:i], - }), b[i:], nil } -//nolint:funlen,gocognit,cyclop -func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) { - i := 0 - - if len(b) > 2 && b[0] == '0' && b[1] != '.' && b[1] != 'e' && b[1] != 'E' { - var isValidRune validRuneFn - - switch b[1] { - case 'x': - isValidRune = isValidHexRune - case 'o': - isValidRune = isValidOctalRune - case 'b': - isValidRune = isValidBinaryRune +// findBasicStringEnd returns the index of the quote closing a basic string, +// or -1 if the string is not terminated. i is the index of the first +// character after the opening quote. It does not validate the content: it +// only skips over escape sequences so that escaped quotes do not terminate +// the string. +func findBasicStringEnd(b []byte, i int) int { + for i < len(b) { + switch b[i] { + case '"': + return i + case '\\': + i += 2 default: i++ } - - if isValidRune != nil { - i += 2 - for ; i < len(b); i++ { - if !isValidRune(b[i]) { - break - } - } - } - - return p.builder.Push(Node{ - Kind: Integer, - Data: b[:i], - Raw: p.rangeOfToken(b[:i], b[i:]), - }), b[i:], nil } + return -1 +} - isFloat := false +// parseBasicStringEscaped continues parsing a basic string that contains +// escape sequences. i is the index of the first backslash. +func (p *Parser) parseBasicStringEscaped(b []byte, i int) ([]byte, []byte, []byte, error) { + // Escape sequences only ever shrink, so the content length before + // unescaping is enough to never reallocate. + bufCap := len(b) - 1 + if end := findBasicStringEnd(b, i); end >= 0 { + bufCap = end - 1 + } + buf := make([]byte, i-1, bufCap) + copy(buf, b[1:i]) - for ; i < len(b); i++ { + for i < len(b) { c := b[i] - - if c >= '0' && c <= '9' || c == '+' || c == '-' || c == '_' { - continue - } - - if c == '.' || c == 'e' || c == 'E' { - isFloat = true - - continue - } - - if c == 'i' { - if scanFollowsInf(b[i:]) { - return p.builder.Push(Node{ - Kind: Float, - Data: b[:i+3], - Raw: p.rangeOfToken(b[:i+3], b[i+3:]), - }), b[i+3:], nil + switch { + case c == '"': + return b[:i+1], buf, b[i+1:], nil + case c == '\\': + i++ + if i >= len(b) { + return nil, nil, nil, NewParserError(b[i-1:], `need a character after \`) } - - return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'i' while scanning for a number") - } - - if c == 'n' { - if scanFollowsNan(b[i:]) { - return p.builder.Push(Node{ - Kind: Float, - Data: b[:i+3], - Raw: p.rangeOfToken(b[:i+3], b[i+3:]), - }), b[i+3:], nil + var err error + buf, i, err = unescape(buf, b, i) + if err != nil { + return nil, nil, nil, err } - - return invalidReference, nil, NewParserError(b[i:i+1], "unexpected character 'n' while scanning for a number") + case c >= 0x20 && c < 0x7f: + buf = append(buf, c) + i++ + case c == '\t': + buf = append(buf, c) + i++ + case c == '\n' || c == '\r': + return nil, nil, nil, NewParserError(b[i:i+1], "basic strings cannot have new lines") + case c < 0x80: + return nil, nil, nil, NewParserError(b[i:i+1], "basic strings cannot have control characters") + default: + j, ok := scanUtf8Run(b, i) + if !ok { + return nil, nil, nil, NewParserError(b[i:i+1], "invalid UTF-8 character in basic string") + } + buf = append(buf, b[i:j]...) + i = j } - - break } + return nil, nil, nil, NewParserError(b[len(b):], "unterminated basic string") +} - if i == 0 { - return invalidReference, b, NewParserError(b, "incomplete number") - } - - kind := Integer - - if isFloat { - kind = Float +// unescape processes one escape sequence. i is the index of the character +// right after the backslash. It returns the updated buffer and index. +func unescape(buf []byte, b []byte, i int) ([]byte, int, error) { + c := b[i] + switch c { + case '"': + return append(buf, '"'), i + 1, nil + case '\\': + return append(buf, '\\'), i + 1, nil + case 'b': + return append(buf, '\b'), i + 1, nil + case 'f': + return append(buf, '\f'), i + 1, nil + case 'n': + return append(buf, '\n'), i + 1, nil + case 'r': + return append(buf, '\r'), i + 1, nil + case 't': + return append(buf, '\t'), i + 1, nil + case 'e': + // TOML v1.1.0: \e is the escape character (U+001B). + return append(buf, 0x1B), i + 1, nil + case 'x': + // TOML v1.1.0: \xHH is a two-digit hexadecimal code point. + return unescapeUnicode(buf, b, i+1, 2) + case 'u': + return unescapeUnicode(buf, b, i+1, 4) + case 'U': + return unescapeUnicode(buf, b, i+1, 8) + default: + return nil, 0, NewParserError(b[i-1:i+1], "invalid escape character %#U", c) } - - return p.builder.Push(Node{ - Kind: kind, - Data: b[:i], - Raw: p.rangeOfToken(b[:i], b[i:]), - }), b[i:], nil } -func isDigit(r byte) bool { - return r >= '0' && r <= '9' +// unescapeUnicode handles \uXXXX and \UXXXXXXXX escape sequences. i is the +// index of the first hex digit. +func unescapeUnicode(buf []byte, b []byte, i int, n int) ([]byte, int, error) { + if i+n > len(b) { + return nil, 0, NewParserError(b[i-2:], "unicode escape sequence is too short") + } + var r uint32 + for k := 0; k < n; k++ { + c := b[i+k] + var d uint32 + switch { + case c >= '0' && c <= '9': + d = uint32(c - '0') + case c >= 'a' && c <= 'f': + d = uint32(c-'a') + 10 + case c >= 'A' && c <= 'F': + d = uint32(c-'A') + 10 + default: + return nil, 0, NewParserError(b[i+k:i+k+1], "invalid hexadecimal digit in unicode escape sequence") + } + r = r<<4 | d + } + if r > utf8.MaxRune || (r >= 0xD800 && r <= 0xDFFF) { + return nil, 0, NewParserError(b[i-2:i+n], "escape sequence is not a valid unicode code point") + } + return utf8.AppendRune(buf, rune(r)), i + n, nil } -type validRuneFn func(r byte) bool - -func isValidHexRune(r byte) bool { - return r >= 'a' && r <= 'f' || - r >= 'A' && r <= 'F' || - r >= '0' && r <= '9' || - r == '_' -} +// parseMultilineBasicString parses a multi-line basic string, starting at the +// opening triple quote. +func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, error) { + i := 3 + // trim the newline right after the opening delimiter + if i < len(b) && b[i] == '\n' { + i++ + } else if i+1 < len(b) && b[i] == '\r' && b[i+1] == '\n' { + i += 2 + } + contentStart := i -func isValidOctalRune(r byte) bool { - return r >= '0' && r <= '7' || r == '_' + // First pass without allocating, until an escape sequence is found. + for i < len(b) { + c := b[i] + switch { + case c == '"': + j := i + for j < len(b) && b[j] == '"' { + j++ + } + n := j - i + if n >= 3 { + if n > 5 { + return nil, nil, nil, NewParserError(b[i:j], "too many quotes at the end of a multiline basic string") + } + contentEnd := i + n - 3 + return b[:j], b[contentStart:contentEnd], b[j:], nil + } + i = j + case c == '\\': + return p.parseMultilineBasicStringEscaped(b, contentStart, i) + case c >= 0x20 && c < 0x7f: + i++ + case c == '\t' || c == '\n': + i++ + case c == '\r': + if i+1 < len(b) && b[i+1] == '\n' { + i += 2 + continue + } + return nil, nil, nil, NewParserError(b[i:i+1], "carriage returns must be followed by a newline character") + case c < 0x80: + return nil, nil, nil, NewParserError(b[i:i+1], "multiline basic strings cannot have control characters") + default: + var ok bool + i, ok = scanUtf8Run(b, i) + if !ok { + return nil, nil, nil, NewParserError(b[i:i+1], "invalid UTF-8 character in multiline basic string") + } + } + } + return nil, nil, nil, NewParserError(b[len(b):], `multiline basic string not terminated by """`) } -func isValidBinaryRune(r byte) bool { - return r == '0' || r == '1' || r == '_' +// findMultilineBasicStringEnd returns the index of the first quote of the +// run of quotes closing a multi-line basic string, or -1 if the string is +// not terminated. It does not validate the content: it only skips over +// escape sequences so that escaped quotes do not terminate the string. +func findMultilineBasicStringEnd(b []byte, i int) int { + for { + j := bytes.IndexAny(b[i:], "\"\\") + if j < 0 { + return -1 + } + i += j + if b[i] == '\\' { + i += 2 + if i > len(b) { + return -1 + } + continue + } + j = i + for j < len(b) && b[j] == '"' { + j++ + } + if j-i >= 3 { + return i + } + i = j + } } -func expect(x byte, b []byte) ([]byte, error) { - if len(b) == 0 { - return nil, NewParserError(b, "expected character %c but the document ended here", x) +// parseMultilineBasicStringEscaped continues parsing a multi-line basic +// string that contains escape sequences. i is the index of the first +// backslash; content starts at contentStart. +func (p *Parser) parseMultilineBasicStringEscaped(b []byte, contentStart, i int) ([]byte, []byte, []byte, error) { + // Escape sequences only ever shrink, so the content length before + // unescaping is enough to never reallocate. The closing run of quotes + // can lend up to two quotes to the content. + bufCap := len(b) - contentStart + if end := findMultilineBasicStringEnd(b, i); end >= 0 { + bufCap = end + 2 - contentStart } + buf := make([]byte, i-contentStart, bufCap) + copy(buf, b[contentStart:i]) - if b[0] != x { - return nil, NewParserError(b[0:1], "expected character %c", x) + for i < len(b) { + c := b[i] + switch { + case c == '"': + j := i + for j < len(b) && b[j] == '"' { + j++ + } + n := j - i + if n >= 3 { + if n > 5 { + return nil, nil, nil, NewParserError(b[i:j], "too many quotes at the end of a multiline basic string") + } + buf = append(buf, b[i:i+n-3]...) + return b[:j], buf, b[j:], nil + } + buf = append(buf, b[i:j]...) + i = j + case c == '\\': + i++ + if i >= len(b) { + return nil, nil, nil, NewParserError(b[i-1:], `need a character after \`) + } + // Escaped newline: backslash, optional whitespace, newline, + // then all following whitespace and newlines are trimmed. + if b[i] == ' ' || b[i] == '\t' || b[i] == '\n' || b[i] == '\r' { + j := i + for j < len(b) && (b[j] == ' ' || b[j] == '\t') { + j++ + } + if j < len(b) && b[j] == '\r' { + j++ + } + if j >= len(b) || b[j] != '\n' { + return nil, nil, nil, NewParserError(b[i-1:i+1], "invalid escape character %#U", b[i]) + } + j++ + for j < len(b) && (b[j] == ' ' || b[j] == '\t' || b[j] == '\n' || b[j] == '\r') { + // note: a lone \r not followed by \n will be caught on + // the next iteration of the outer loop. + if b[j] == '\r' { + if j+1 >= len(b) || b[j+1] != '\n' { + break + } + j++ + } + j++ + } + i = j + continue + } + var err error + buf, i, err = unescape(buf, b, i) + if err != nil { + return nil, nil, nil, err + } + case c >= 0x20 && c < 0x7f: + buf = append(buf, c) + i++ + case c == '\t' || c == '\n': + buf = append(buf, c) + i++ + case c == '\r': + if i+1 < len(b) && b[i+1] == '\n' { + buf = append(buf, '\r', '\n') + i += 2 + continue + } + return nil, nil, nil, NewParserError(b[i:i+1], "carriage returns must be followed by a newline character") + case c < 0x80: + return nil, nil, nil, NewParserError(b[i:i+1], "multiline basic strings cannot have control characters") + default: + j, ok := scanUtf8Run(b, i) + if !ok { + return nil, nil, nil, NewParserError(b[i:i+1], "invalid UTF-8 character in multiline basic string") + } + buf = append(buf, b[i:j]...) + i = j + } } + return nil, nil, nil, NewParserError(b[len(b):], `multiline basic string not terminated by """`) +} - return b[1:], nil +// scanUtf8Run consumes a run of valid non-ASCII UTF-8 runes starting at +// b[i]. It returns the index of the first byte after the run, and whether +// the run was entirely valid. Processing whole runs amortizes the cost of +// the call compared to validating rune by rune. +func scanUtf8Run(b []byte, i int) (int, bool) { + for i < len(b) { + c := b[i] + switch { + case c < 0x80: + return i, true + case c < 0xC2: + return i, false + case c < 0xE0: + if i+1 >= len(b) || b[i+1]&0xC0 != 0x80 { + return i, false + } + i += 2 + case c < 0xF0: + if i+2 >= len(b) || b[i+2]&0xC0 != 0x80 { + return i, false + } + b1 := b[i+1] + switch c { + case 0xE0: + if b1 < 0xA0 || b1 > 0xBF { + return i, false + } + case 0xED: + // exclude surrogates + if b1 < 0x80 || b1 > 0x9F { + return i, false + } + default: + if b1&0xC0 != 0x80 { + return i, false + } + } + i += 3 + case c < 0xF5: + if i+3 >= len(b) || b[i+2]&0xC0 != 0x80 || b[i+3]&0xC0 != 0x80 { + return i, false + } + b1 := b[i+1] + switch c { + case 0xF0: + if b1 < 0x90 || b1 > 0xBF { + return i, false + } + case 0xF4: + if b1 < 0x80 || b1 > 0x8F { + return i, false + } + default: + if b1&0xC0 != 0x80 { + return i, false + } + } + i += 4 + default: + return i, false + } + } + return i, true } diff --git a/vendor/github.com/pelletier/go-toml/v2/unstable/scanner.go b/vendor/github.com/pelletier/go-toml/v2/unstable/scanner.go deleted file mode 100644 index 0512181d..00000000 --- a/vendor/github.com/pelletier/go-toml/v2/unstable/scanner.go +++ /dev/null @@ -1,270 +0,0 @@ -package unstable - -import "github.com/pelletier/go-toml/v2/internal/characters" - -func scanFollows(b []byte, pattern string) bool { - n := len(pattern) - - return len(b) >= n && string(b[:n]) == pattern -} - -func scanFollowsMultilineBasicStringDelimiter(b []byte) bool { - return scanFollows(b, `"""`) -} - -func scanFollowsMultilineLiteralStringDelimiter(b []byte) bool { - return scanFollows(b, `'''`) -} - -func scanFollowsTrue(b []byte) bool { - return scanFollows(b, `true`) -} - -func scanFollowsFalse(b []byte) bool { - return scanFollows(b, `false`) -} - -func scanFollowsInf(b []byte) bool { - return scanFollows(b, `inf`) -} - -func scanFollowsNan(b []byte) bool { - return scanFollows(b, `nan`) -} - -func scanUnquotedKey(b []byte) ([]byte, []byte) { - // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ - for i := 0; i < len(b); i++ { - if !isUnquotedKeyChar(b[i]) { - return b[:i], b[i:] - } - } - - return b, b[len(b):] -} - -func isUnquotedKeyChar(r byte) bool { - return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_' -} - -func scanLiteralString(b []byte) ([]byte, []byte, error) { - // literal-string = apostrophe *literal-char apostrophe - // apostrophe = %x27 ; ' apostrophe - // literal-char = %x09 / %x20-26 / %x28-7E / non-ascii - for i := 1; i < len(b); { - switch b[i] { - case '\'': - return b[:i+1], b[i+1:], nil - case '\n', '\r': - return nil, nil, NewParserError(b[i:i+1], "literal strings cannot have new lines") - } - size := characters.Utf8ValidNext(b[i:]) - if size == 0 { - return nil, nil, NewParserError(b[i:i+1], "invalid character") - } - i += size - } - - return nil, nil, NewParserError(b[len(b):], "unterminated literal string") -} - -func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) { - // ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body - // ml-literal-string-delim - // ml-literal-string-delim = 3apostrophe - // ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ] - // - // mll-content = mll-char / newline - // mll-char = %x09 / %x20-26 / %x28-7E / non-ascii - // mll-quotes = 1*2apostrophe - for i := 3; i < len(b); { - switch b[i] { - case '\'': - if scanFollowsMultilineLiteralStringDelimiter(b[i:]) { - i += 3 - - // At that point we found 3 apostrophe, and i is the - // index of the byte after the third one. The scanner - // needs to be eager, because there can be an extra 2 - // apostrophe that can be accepted at the end of the - // string. - - if i >= len(b) || b[i] != '\'' { - return b[:i], b[i:], nil - } - i++ - - if i >= len(b) || b[i] != '\'' { - return b[:i], b[i:], nil - } - i++ - - if i < len(b) && b[i] == '\'' { - return nil, nil, NewParserError(b[i-3:i+1], "''' not allowed in multiline literal string") - } - - return b[:i], b[i:], nil - } - case '\r': - if len(b) < i+2 { - return nil, nil, NewParserError(b[len(b):], `need a \n after \r`) - } - if b[i+1] != '\n' { - return nil, nil, NewParserError(b[i:i+2], `need a \n after \r`) - } - i += 2 // skip the \n - continue - } - size := characters.Utf8ValidNext(b[i:]) - if size == 0 { - return nil, nil, NewParserError(b[i:i+1], "invalid character") - } - i += size - } - - return nil, nil, NewParserError(b[len(b):], `multiline literal string not terminated by '''`) -} - -func scanWindowsNewline(b []byte) ([]byte, []byte, error) { - const lenCRLF = 2 - if len(b) < lenCRLF { - return nil, nil, NewParserError(b, "windows new line expected") - } - - if b[1] != '\n' { - return nil, nil, NewParserError(b, `windows new line should be \r\n`) - } - - return b[:lenCRLF], b[lenCRLF:], nil -} - -func scanWhitespace(b []byte) ([]byte, []byte) { - for i := 0; i < len(b); i++ { - switch b[i] { - case ' ', '\t': - continue - default: - return b[:i], b[i:] - } - } - - return b, b[len(b):] -} - -func scanComment(b []byte) ([]byte, []byte, error) { - // comment-start-symbol = %x23 ; # - // non-ascii = %x80-D7FF / %xE000-10FFFF - // non-eol = %x09 / %x20-7F / non-ascii - // - // comment = comment-start-symbol *non-eol - - for i := 1; i < len(b); { - if b[i] == '\n' { - return b[:i], b[i:], nil - } - if b[i] == '\r' { - if i+1 < len(b) && b[i+1] == '\n' { - return b[:i+1], b[i+1:], nil - } - return nil, nil, NewParserError(b[i:i+1], "invalid character in comment") - } - size := characters.Utf8ValidNext(b[i:]) - if size == 0 { - return nil, nil, NewParserError(b[i:i+1], "invalid character in comment") - } - - i += size - } - - return b, b[len(b):], nil -} - -func scanBasicString(b []byte) ([]byte, bool, []byte, error) { - // basic-string = quotation-mark *basic-char quotation-mark - // quotation-mark = %x22 ; " - // basic-char = basic-unescaped / escaped - // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii - // escaped = escape escape-seq-char - escaped := false - i := 1 - - for ; i < len(b); i++ { - switch b[i] { - case '"': - return b[:i+1], escaped, b[i+1:], nil - case '\n', '\r': - return nil, escaped, nil, NewParserError(b[i:i+1], "basic strings cannot have new lines") - case '\\': - if len(b) < i+2 { - return nil, escaped, nil, NewParserError(b[i:i+1], "need a character after \\") - } - escaped = true - i++ // skip the next character - } - } - - return nil, escaped, nil, NewParserError(b[len(b):], `basic string not terminated by "`) -} - -func scanMultilineBasicString(b []byte) ([]byte, bool, []byte, error) { - // ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body - // ml-basic-string-delim - // ml-basic-string-delim = 3quotation-mark - // ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ] - // - // mlb-content = mlb-char / newline / mlb-escaped-nl - // mlb-char = mlb-unescaped / escaped - // mlb-quotes = 1*2quotation-mark - // mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii - // mlb-escaped-nl = escape ws newline *( wschar / newline ) - - escaped := false - i := 3 - - for ; i < len(b); i++ { - switch b[i] { - case '"': - if scanFollowsMultilineBasicStringDelimiter(b[i:]) { - i += 3 - - // At that point we found 3 apostrophe, and i is the - // index of the byte after the third one. The scanner - // needs to be eager, because there can be an extra 2 - // apostrophe that can be accepted at the end of the - // string. - - if i >= len(b) || b[i] != '"' { - return b[:i], escaped, b[i:], nil - } - i++ - - if i >= len(b) || b[i] != '"' { - return b[:i], escaped, b[i:], nil - } - i++ - - if i < len(b) && b[i] == '"' { - return nil, escaped, nil, NewParserError(b[i-3:i+1], `""" not allowed in multiline basic string`) - } - - return b[:i], escaped, b[i:], nil - } - case '\\': - if len(b) < i+2 { - return nil, escaped, nil, NewParserError(b[len(b):], "need a character after \\") - } - escaped = true - i++ // skip the next character - case '\r': - if len(b) < i+2 { - return nil, escaped, nil, NewParserError(b[len(b):], `need a \n after \r`) - } - if b[i+1] != '\n' { - return nil, escaped, nil, NewParserError(b[i:i+2], `need a \n after \r`) - } - i++ // skip the \n - } - } - - return nil, escaped, nil, NewParserError(b[len(b):], `multiline basic string not terminated by """`) -} diff --git a/vendor/github.com/pelletier/go-toml/v2/unstable/unmarshaler.go b/vendor/github.com/pelletier/go-toml/v2/unstable/unmarshaler.go index 5a79da88..e417040a 100644 --- a/vendor/github.com/pelletier/go-toml/v2/unstable/unmarshaler.go +++ b/vendor/github.com/pelletier/go-toml/v2/unstable/unmarshaler.go @@ -1,18 +1,18 @@ package unstable -// Unmarshaler is implemented by types that can unmarshal a TOML -// description of themselves. The input is a valid TOML document -// containing the relevant portion of the parsed document. +// Unmarshaler is implemented by types that can unmarshal a TOML description +// of themselves. The input is a valid TOML document containing the relevant +// portion of the parsed document. // -// For tables (including split tables defined in multiple places), -// the data contains the raw key-value bytes from the original document -// with adjusted table headers to be relative to the unmarshaling target. +// For tables (including split tables defined in multiple places), the data +// contains the raw key-value bytes from the original document with adjusted +// table headers to be relative to the unmarshaling target. type Unmarshaler interface { UnmarshalTOML(data []byte) error } -// RawMessage is a raw encoded TOML value. It implements Unmarshaler -// and can be used to delay TOML decoding or capture raw content. +// RawMessage is a raw encoded TOML value. It implements Unmarshaler and can +// be used to delay TOML decoding or capture raw content. // // Example usage: // @@ -27,6 +27,6 @@ type RawMessage []byte // UnmarshalTOML implements Unmarshaler. func (m *RawMessage) UnmarshalTOML(data []byte) error { - *m = append((*m)[0:0], data...) + *m = append((*m)[:0], data...) return nil } diff --git a/vendor/modules.txt b/vendor/modules.txt index 2f3a0107..a1e2d5d0 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -26,10 +26,10 @@ github.com/mattn/go-isatty # github.com/ncruces/go-strftime v1.0.0 ## explicit; go 1.17 github.com/ncruces/go-strftime -# github.com/pelletier/go-toml/v2 v2.3.1 +# github.com/pelletier/go-toml/v2 v2.4.0 ## explicit; go 1.21.0 github.com/pelletier/go-toml/v2 -github.com/pelletier/go-toml/v2/internal/characters +github.com/pelletier/go-toml/v2/internal/parserbridge github.com/pelletier/go-toml/v2/internal/tracker github.com/pelletier/go-toml/v2/unstable # github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec