Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions cmd/csvpp/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,10 @@ func runConvert(cmd *cobra.Command, _ []string) (retErr error) {

// Infer input format from output format for stdin
if inputFormat == "" && inputFile == "" {
// If output is CSVPP, input must be JSON or YAML (default to JSON)
// Otherwise, input is CSVPP
if outFormat == FormatCSVPP {
inputFormat = FormatJSON // Default to JSON when converting to CSVPP from stdin
} else {
inputFormat = FormatCSVPP
return fmt.Errorf("--from flag is required when reading from stdin and converting to csvpp (specify json or yaml)")
}
inputFormat = FormatCSVPP
}

// Open input
Expand Down
298 changes: 271 additions & 27 deletions cmd/csvpp/internal/converter/decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
package converter

import (
"bytes"
"encoding/json"
"fmt"
"io"
Expand All @@ -11,55 +12,285 @@ import (
"github.com/osamingo/go-csvpp"
)

// keyOrderInfo holds the ordered keys extracted from the first record.
type keyOrderInfo struct {
keys []string
nested map[string]*keyOrderInfo
}

// FromJSON reads JSON array and converts to CSVPP headers and records.
// The JSON must be an array of objects with consistent keys.
func FromJSON(r io.Reader) ([]*csvpp.ColumnHeader, [][]*csvpp.Field, error) {
var data []map[string]any
if err := json.NewDecoder(r).Decode(&data); err != nil {
data, err := io.ReadAll(r)
if err != nil {
return nil, nil, fmt.Errorf("failed to read input: %w", err)
}

var records []map[string]any
if err := json.Unmarshal(data, &records); err != nil {
return nil, nil, fmt.Errorf("failed to decode JSON: %w", err)
}

if len(data) == 0 {
if len(records) == 0 {
return nil, nil, nil
}

headers := inferHeaders(data)
records := convertRecords(headers, data)
order, err := extractJSONKeyOrder(data)
if err != nil {
return nil, nil, fmt.Errorf("failed to extract JSON key order: %w", err)
}

return headers, records, nil
headers := inferHeaders(records, order)
fields := convertRecords(headers, records)

return headers, fields, nil
}

// FromYAML reads YAML array and converts to CSVPP headers and records.
// The YAML must be an array of objects with consistent keys.
func FromYAML(r io.Reader) ([]*csvpp.ColumnHeader, [][]*csvpp.Field, error) {
var data []map[string]any
if err := yaml.NewDecoder(r).Decode(&data); err != nil {
data, err := io.ReadAll(r)
if err != nil {
return nil, nil, fmt.Errorf("failed to read input: %w", err)
}

var records []map[string]any
if err := yaml.Unmarshal(data, &records); err != nil {
return nil, nil, fmt.Errorf("failed to decode YAML: %w", err)
}

if len(data) == 0 {
if len(records) == 0 {
return nil, nil, nil
}

headers := inferHeaders(data)
records := convertRecords(headers, data)
order, err := extractYAMLKeyOrder(data)
if err != nil {
return nil, nil, fmt.Errorf("failed to extract YAML key order: %w", err)
}

headers := inferHeaders(records, order)
fields := convertRecords(headers, records)

return headers, fields, nil
}

// extractJSONKeyOrder extracts key order from the first record in a JSON array.
func extractJSONKeyOrder(data []byte) (*keyOrderInfo, error) {
var raw []json.RawMessage
if err := json.Unmarshal(data, &raw); err != nil {
return nil, err
}
if len(raw) == 0 {
return &keyOrderInfo{nested: make(map[string]*keyOrderInfo)}, nil
}
return readJSONObjectOrder(json.NewDecoder(bytes.NewReader(raw[0])))
}

// readJSONObjectOrder reads one JSON object from a decoder and extracts ordered keys.
func readJSONObjectOrder(dec *json.Decoder) (*keyOrderInfo, error) {
t, err := dec.Token()
if err != nil {
return nil, err
}
if d, ok := t.(json.Delim); !ok || d != '{' {
return nil, fmt.Errorf("expected '{', got %v", t)
}

info := &keyOrderInfo{nested: make(map[string]*keyOrderInfo)}

for dec.More() {
t, err = dec.Token()
if err != nil {
return nil, err
}
key, ok := t.(string)
if !ok {
return nil, fmt.Errorf("expected string key, got %T", t)
}
info.keys = append(info.keys, key)

nested, err := readJSONValueOrder(dec)
if err != nil {
return nil, err
}
if nested != nil {
info.nested[key] = nested
}
}

// consume closing '}'
_, err = dec.Token()
return info, err
}

// readJSONValueOrder reads one JSON value, extracting key order if it's an object or array of objects.
func readJSONValueOrder(dec *json.Decoder) (*keyOrderInfo, error) {
t, err := dec.Token()
if err != nil {
return nil, err
}

d, ok := t.(json.Delim)
if !ok {
return nil, nil // scalar value
}

switch d {
case '{':
// Object value - extract component keys
info := &keyOrderInfo{nested: make(map[string]*keyOrderInfo)}
for dec.More() {
t, err = dec.Token()
if err != nil {
return nil, err
}
key, ok := t.(string)
if !ok {
return nil, fmt.Errorf("expected string key, got %T", t)
}
info.keys = append(info.keys, key)
if err := skipJSONValue(dec); err != nil {
return nil, err
}
}
_, err = dec.Token() // '}'
return info, err

case '[':
// Array value - check if first element is an object
if !dec.More() {
_, err = dec.Token() // ']'
return nil, err
}

t, err = dec.Token()
if err != nil {
return nil, err
}

if d2, ok := t.(json.Delim); ok && d2 == '{' {
// Array of objects - extract keys from the first object
info := &keyOrderInfo{nested: make(map[string]*keyOrderInfo)}
for dec.More() {
t, err = dec.Token()
if err != nil {
return nil, err
}
key, ok := t.(string)
if !ok {
return nil, fmt.Errorf("expected string key, got %T", t)
}
info.keys = append(info.keys, key)
if err := skipJSONValue(dec); err != nil {
return nil, err
}
}
_, err = dec.Token() // closing '}' of first object
if err != nil {
return nil, err
}
// Skip remaining array elements
for dec.More() {
if err := skipJSONValue(dec); err != nil {
return nil, err
}
}
_, err = dec.Token() // ']'
return info, err
}

// First element is not an object - skip the rest
if d2, ok := t.(json.Delim); ok {
if err := skipJSONDelimContent(dec, d2); err != nil {
return nil, err
}
}
for dec.More() {
if err := skipJSONValue(dec); err != nil {
return nil, err
}
}
_, err = dec.Token() // ']'
return nil, err
}

return nil, nil
}

// skipJSONValue reads and discards one complete JSON value.
func skipJSONValue(dec *json.Decoder) error {
t, err := dec.Token()
if err != nil {
return err
}
d, ok := t.(json.Delim)
if !ok {
return nil // scalar
}
return skipJSONDelimContent(dec, d)
}

// skipJSONDelimContent skips content after an opening delimiter ('{' or '[') was consumed.
func skipJSONDelimContent(dec *json.Decoder, open json.Delim) error {
for dec.More() {
if open == '{' {
if _, err := dec.Token(); err != nil { // skip key
return err
}
}
if err := skipJSONValue(dec); err != nil {
return err
}
}
_, err := dec.Token() // closing delimiter
return err
}

// extractYAMLKeyOrder extracts key order from the first record in a YAML sequence.
func extractYAMLKeyOrder(data []byte) (*keyOrderInfo, error) {
var records []yaml.MapSlice
if err := yaml.Unmarshal(data, &records); err != nil {
return nil, err
}
if len(records) == 0 {
return &keyOrderInfo{nested: make(map[string]*keyOrderInfo)}, nil
}
return buildYAMLKeyOrder(records[0]), nil
}

// buildYAMLKeyOrder builds keyOrderInfo from a yaml.MapSlice.
func buildYAMLKeyOrder(ms yaml.MapSlice) *keyOrderInfo {
info := &keyOrderInfo{nested: make(map[string]*keyOrderInfo)}
for _, item := range ms {
key := fmt.Sprintf("%v", item.Key)
info.keys = append(info.keys, key)

return headers, records, nil
switch v := item.Value.(type) {
case yaml.MapSlice:
info.nested[key] = buildYAMLKeyOrder(v)
case []any:
if len(v) > 0 {
if ms2, ok := v[0].(yaml.MapSlice); ok {
info.nested[key] = buildYAMLKeyOrder(ms2)
}
}
}
}
return info
}

// inferHeaders infers CSVPP headers from JSON/YAML data structure.
// inferHeaders infers CSVPP headers from data using the provided key order.
//
// Header inference rules:
// - string → SimpleField
// - []string → ArrayField
// - map[string]any → StructuredField
// - []map[string]any → ArrayStructuredField
func inferHeaders(data []map[string]any) []*csvpp.ColumnHeader {
func inferHeaders(data []map[string]any, order *keyOrderInfo) []*csvpp.ColumnHeader {
if len(data) == 0 {
return nil
}

// Collect all unique keys from all records (first record defines order)
keyOrder := collectKeyOrder(data[0])
keyTypes := make(map[string]csvpp.FieldKind)
keyComponents := make(map[string][]*csvpp.ColumnHeader)

Expand All @@ -80,30 +311,41 @@ func inferHeaders(data []map[string]any) []*csvpp.ColumnHeader {
}
}

// Build headers maintaining key order
headers := make([]*csvpp.ColumnHeader, 0, len(keyOrder))
for _, key := range keyOrder {
// Build headers using preserved key order
headers := make([]*csvpp.ColumnHeader, 0, len(order.keys))
for _, key := range order.keys {
components := keyComponents[key]
if nestedOrder, ok := order.nested[key]; ok && len(components) > 0 {
components = reorderComponents(components, nestedOrder)
}

header := &csvpp.ColumnHeader{
Name: key,
Kind: keyTypes[key],
ArrayDelimiter: csvpp.DefaultArrayDelimiter,
ComponentDelimiter: csvpp.DefaultComponentDelimiter,
Components: keyComponents[key],
Components: components,
}
headers = append(headers, header)
}

return headers
}

// collectKeyOrder returns keys in iteration order (Go 1.12+ maps have random order).
// Uses first record to determine key order.
func collectKeyOrder(record map[string]any) []string {
keys := make([]string, 0, len(record))
for key := range record {
keys = append(keys, key)
// reorderComponents reorders component headers according to the key order info.
func reorderComponents(components []*csvpp.ColumnHeader, order *keyOrderInfo) []*csvpp.ColumnHeader {
compMap := make(map[string]*csvpp.ColumnHeader, len(components))
for _, c := range components {
compMap[c.Name] = c
}

ordered := make([]*csvpp.ColumnHeader, 0, len(order.keys))
for _, key := range order.keys {
if c, ok := compMap[key]; ok {
ordered = append(ordered, c)
}
}
return keys
return ordered
}

// inferFieldKind determines the FieldKind from a value.
Expand Down Expand Up @@ -134,6 +376,8 @@ func inferFieldKind(value any) (csvpp.FieldKind, []*csvpp.ColumnHeader) {
}

// inferComponentHeaders creates headers for structured field components.
// Note: The returned order may be non-deterministic (map iteration).
// Callers should use reorderComponents to fix the order.
func inferComponentHeaders(m map[string]any) []*csvpp.ColumnHeader {
headers := make([]*csvpp.ColumnHeader, 0, len(m))
for key, value := range m {
Expand Down
Loading
Loading