Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified bin/repl
Binary file not shown.
92 changes: 91 additions & 1 deletion dialect_compliance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ func TestDialectSpecificInArrayField(t *testing.T) {
expected map[Dialect]string
}

schema := NewSchema([]FieldSchema{
schema, _ := NewSchema([]FieldSchema{
{Name: "test.tags", Type: FieldTypeArray},
{Name: "test.scores", Type: FieldTypeArray},
})
Expand Down Expand Up @@ -289,6 +289,96 @@ func TestDialectSpecificInArrayField(t *testing.T) {
}
}

// TestDialectSpecificIdentifierQuoting tests that numeric-leading path segments
// are quoted with the correct dialect-specific character.
func TestDialectSpecificIdentifierQuoting(t *testing.T) {
type testCase struct {
name string
input string
expected map[Dialect]string
}

tests := []testCase{
{
name: "numeric-leading segment in comparison",
input: `{">=": [{"var": "data.user_transaction_history.24h.tx.sum"}, 50000]}`,
expected: map[Dialect]string{
DialectBigQuery: "WHERE data.user_transaction_history.`24h`.tx.sum >= 50000",
DialectSpanner: "WHERE data.user_transaction_history.`24h`.tx.sum >= 50000",
DialectPostgreSQL: `WHERE data.user_transaction_history."24h".tx.sum >= 50000`,
DialectDuckDB: `WHERE data.user_transaction_history."24h".tx.sum >= 50000`,
DialectClickHouse: "WHERE data.user_transaction_history.`24h`.tx.sum >= 50000",
},
},
{
name: "normal segments remain unquoted",
input: `{">": [{"var": "user.amount"}, 100]}`,
expected: map[Dialect]string{
DialectBigQuery: "WHERE user.amount > 100",
DialectSpanner: "WHERE user.amount > 100",
DialectPostgreSQL: "WHERE user.amount > 100",
DialectDuckDB: "WHERE user.amount > 100",
DialectClickHouse: "WHERE user.amount > 100",
},
},
{
name: "multiple numeric-leading segments",
input: `{"==": [{"var": "stats.7d.10m.count"}, 0]}`,
expected: map[Dialect]string{
DialectBigQuery: "WHERE stats.`7d`.`10m`.count = 0",
DialectSpanner: "WHERE stats.`7d`.`10m`.count = 0",
DialectPostgreSQL: `WHERE stats."7d"."10m".count = 0`,
DialectDuckDB: `WHERE stats."7d"."10m".count = 0`,
DialectClickHouse: "WHERE stats.`7d`.`10m`.count = 0",
},
},
{
name: "missing operator with numeric-leading segment",
input: `{"missing": "data.history.24h.tx.count"}`,
expected: map[Dialect]string{
DialectBigQuery: "WHERE data.history.`24h`.tx.count IS NULL",
DialectSpanner: "WHERE data.history.`24h`.tx.count IS NULL",
DialectPostgreSQL: `WHERE data.history."24h".tx.count IS NULL`,
DialectDuckDB: `WHERE data.history."24h".tx.count IS NULL`,
DialectClickHouse: "WHERE data.history.`24h`.tx.count IS NULL",
},
},
{
name: "var with default and numeric-leading segment",
input: `{"==": [{"var": ["metrics.30d.total", 0]}, 0]}`,
expected: map[Dialect]string{
DialectBigQuery: "WHERE COALESCE(metrics.`30d`.total, 0) = 0",
DialectSpanner: "WHERE COALESCE(metrics.`30d`.total, 0) = 0",
DialectPostgreSQL: `WHERE COALESCE(metrics."30d".total, 0) = 0`,
DialectDuckDB: `WHERE COALESCE(metrics."30d".total, 0) = 0`,
DialectClickHouse: "WHERE COALESCE(metrics.`30d`.total, 0) = 0",
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
for d, expected := range tt.expected {
t.Run(d.String(), func(t *testing.T) {
tr, err := NewTranspiler(d)
if err != nil {
t.Fatalf("Failed to create transpiler for %s: %v", d.String(), err)
}

result, err := tr.Transpile(tt.input)
if err != nil {
t.Errorf("[%s] Transpile() error = %v", d.String(), err)
return
}
if result != expected {
t.Errorf("[%s] Transpile() = %q, want %q", d.String(), result, expected)
}
})
}
})
}
}

// TestDialectSpecificStringFunctions tests string position functions across dialects.
func TestDialectSpecificStringFunctions(t *testing.T) {
type testCase struct {
Expand Down
4 changes: 2 additions & 2 deletions docs/api-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -293,10 +293,10 @@ Check if error has specific code.
### NewSchema

```go
func NewSchema(fields []FieldSchema) *Schema
func NewSchema(fields []FieldSchema) (*Schema, error)
```

Create a new schema from field definitions.
Create a new schema from field definitions. Returns an error if any field name contains quote characters (backtick, double quote, or single quote) — field names must be raw, unquoted identifiers.

### NewSchemaFromJSON

Expand Down
14 changes: 14 additions & 0 deletions docs/dialects.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,20 @@ All JSON Logic operators are supported across all dialects. The library generate
| **Array** | `in`, `map`, `filter`, `reduce`, `all`, `some`, `none`, `merge` | ✓ | ✓ | ✓ | ✓ | ✓ |
| **String** | `in`, `cat`, `substr` | ✓ | ✓ | ✓ | ✓ | ✓ |

## Identifier Quoting

Path segments that are not valid unquoted SQL identifiers (e.g. start with a digit) are automatically quoted using the dialect-appropriate character:

| Dialect | Quote Character | Example |
|---------|----------------|---------|
| BigQuery | Backtick (`` ` ``) | `` data.history.`24h`.tx.sum `` |
| Spanner | Backtick (`` ` ``) | `` data.history.`24h`.tx.sum `` |
| PostgreSQL | Double quote (`"`) | `data.history."24h".tx.sum` |
| DuckDB | Double quote (`"`) | `data.history."24h".tx.sum` |
| ClickHouse | Backtick (`` ` ``) | `` data.history.`24h`.tx.sum `` |

Segments that only contain letters, digits, and underscores (and don't start with a digit) remain unquoted.

## Dialect-Specific SQL Generation

Some operators generate different SQL based on the target dialect:
Expand Down
6 changes: 3 additions & 3 deletions docs/error-handling.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ _, err := transpiler.Transpile(`{"unknownOp": [1, 2]}`)
### Field Not in Schema

```go
schema := jsonlogic2sql.NewSchema([]jsonlogic2sql.FieldSchema{
schema, _ := jsonlogic2sql.NewSchema([]jsonlogic2sql.FieldSchema{
{Name: "known_field", Type: jsonlogic2sql.FieldTypeString},
})
transpiler.SetSchema(schema)
Expand All @@ -152,7 +152,7 @@ _, err := transpiler.Transpile(`{"==": [{"var": "unknown_field"}, "test"]}`)
### Invalid Enum Value

```go
schema := jsonlogic2sql.NewSchema([]jsonlogic2sql.FieldSchema{
schema, _ := jsonlogic2sql.NewSchema([]jsonlogic2sql.FieldSchema{
{Name: "status", Type: jsonlogic2sql.FieldTypeEnum, AllowedValues: []string{"active", "pending"}},
})
transpiler.SetSchema(schema)
Expand All @@ -164,7 +164,7 @@ _, err := transpiler.Transpile(`{"==": [{"var": "status"}, "invalid"]}`)
### Type Mismatch

```go
schema := jsonlogic2sql.NewSchema([]jsonlogic2sql.FieldSchema{
schema, _ := jsonlogic2sql.NewSchema([]jsonlogic2sql.FieldSchema{
{Name: "name", Type: jsonlogic2sql.FieldTypeString},
})
transpiler.SetSchema(schema)
Expand Down
7 changes: 4 additions & 3 deletions docs/getting-started.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,14 @@ transpiler, _ := jsonlogic2sql.NewTranspiler(jsonlogic2sql.DialectClickHouse)

## Variable Naming

The transpiler preserves JSON Logic variable names as-is in the SQL output:
The transpiler preserves JSON Logic variable names in the SQL output, with automatic quoting for segments that are not valid unquoted SQL identifiers:

- Dot notation is preserved: `transaction.amount` → `transaction.amount`
- Nested variables: `user.account.age` → `user.account.age`
- Simple variables remain unchanged: `amount` → `amount`

This allows for proper JSON column access in databases that support it (like PostgreSQL with JSONB columns).
- Segments starting with a digit are quoted automatically:
- BigQuery/Spanner/ClickHouse: `data.history.24h.tx.sum` → `` data.history.`24h`.tx.sum ``
- PostgreSQL/DuckDB: `data.history.24h.tx.sum` → `data.history."24h".tx.sum`

## Next Steps

Expand Down
2 changes: 2 additions & 0 deletions docs/operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ WHERE data[1]
WHERE COALESCE(status, 'pending')
```

> **Note:** Path segments that start with a digit (e.g. `24h`, `7d`) are automatically quoted using the dialect-appropriate character. See [Identifier Quoting](dialects.md#identifier-quoting) for details.

### Missing Field Check (Single)

```json
Expand Down
10 changes: 6 additions & 4 deletions docs/schema-validation.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import (

func main() {
// Create a schema with field definitions
schema := jsonlogic2sql.NewSchema([]jsonlogic2sql.FieldSchema{
schema, _ := jsonlogic2sql.NewSchema([]jsonlogic2sql.FieldSchema{
{Name: "order.amount", Type: jsonlogic2sql.FieldTypeInteger},
{Name: "order.status", Type: jsonlogic2sql.FieldTypeString},
{Name: "user.verified", Type: jsonlogic2sql.FieldTypeBoolean},
Expand All @@ -39,6 +39,8 @@ func main() {
}
```

**Note:** Field names must be raw, unquoted identifiers. `NewSchema` returns an error if any field name contains quote characters (backtick, double quote, or single quote). The transpiler handles identifier quoting automatically based on the target dialect.

## Loading Schema from JSON

```go
Expand Down Expand Up @@ -90,7 +92,7 @@ When a schema is provided, operators perform strict type validation:
### Example

```go
schema := jsonlogic2sql.NewSchema([]jsonlogic2sql.FieldSchema{
schema, _ := jsonlogic2sql.NewSchema([]jsonlogic2sql.FieldSchema{
{Name: "amount", Type: jsonlogic2sql.FieldTypeInteger},
{Name: "tags", Type: jsonlogic2sql.FieldTypeArray},
{Name: "name", Type: jsonlogic2sql.FieldTypeString},
Expand Down Expand Up @@ -157,7 +159,7 @@ Enum fields allow you to define a fixed set of allowed values:

```go
// Define schema with enum field
schema := jsonlogic2sql.NewSchema([]jsonlogic2sql.FieldSchema{
schema, _ := jsonlogic2sql.NewSchema([]jsonlogic2sql.FieldSchema{
{Name: "status", Type: jsonlogic2sql.FieldTypeEnum, AllowedValues: []string{"active", "pending", "cancelled"}},
{Name: "priority", Type: jsonlogic2sql.FieldTypeEnum, AllowedValues: []string{"low", "medium", "high"}},
})
Expand Down Expand Up @@ -191,7 +193,7 @@ _, err = transpiler.Transpile(`{"==": [{"var": "status"}, "invalid"]}`)

```go
// Schema creation
schema := jsonlogic2sql.NewSchema(fields []FieldSchema)
schema, err := jsonlogic2sql.NewSchema(fields []FieldSchema)
schema, err := jsonlogic2sql.NewSchemaFromJSON(data []byte)
schema, err := jsonlogic2sql.NewSchemaFromFile(filepath string)

Expand Down
48 changes: 47 additions & 1 deletion internal/dialect/dialect.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
// Package dialect provides SQL dialect definitions for the transpiler.
package dialect

import "fmt"
import (
"fmt"
"strings"
"unicode"
)

// Dialect represents a SQL dialect that the transpiler can target.
type Dialect int
Expand Down Expand Up @@ -62,3 +66,45 @@ func (d Dialect) Validate() error {
}
return nil
}

// NeedsQuoting returns true if an identifier segment requires quoting.
// A segment needs quoting if it starts with a digit or contains characters
// other than letters, digits, and underscores.
func NeedsQuoting(segment string) bool {
if segment == "" {
return false
}
first := rune(segment[0])
if unicode.IsDigit(first) {
return true
}
for _, r := range segment {
if !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '_' {
return true
}
}
return false
}

// ContainsQuoteCharacters returns true if the segment contains backticks, double
// quotes, or single quotes. These characters are used for identifier quoting and
// must not appear in raw variable names — the transpiler handles quoting automatically.
func ContainsQuoteCharacters(segment string) bool {
return strings.ContainsAny(segment, "`\"'")
}

// QuoteIdentifierSegment wraps a single identifier segment with dialect-appropriate
// quote characters. It also escapes any embedded quote characters within the segment.
// - BigQuery / Spanner / ClickHouse: backtick (`)
// - PostgreSQL / DuckDB: double quote (")
func QuoteIdentifierSegment(segment string, d Dialect) string {
//nolint:exhaustive // default uses backtick (safe for GoogleSQL family)
switch d {
case DialectPostgreSQL, DialectDuckDB:
escaped := strings.ReplaceAll(segment, `"`, `""`)
return `"` + escaped + `"`
default:
escaped := strings.ReplaceAll(segment, "`", "``")
return "`" + escaped + "`"
}
}
79 changes: 79 additions & 0 deletions internal/dialect/dialect_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,82 @@ func TestDialect_Validate(t *testing.T) {
})
}
}

func TestNeedsQuoting(t *testing.T) {
tests := []struct {
segment string
expected bool
}{
{"name", false},
{"user_name", false},
{"_private", false},
{"tx", false},
{"24h", true},
{"7d", true},
{"10m", true},
{"120d", true},
{"col-name", true},
{"has space", true},
{"", false},
}

for _, tt := range tests {
t.Run(tt.segment, func(t *testing.T) {
if got := NeedsQuoting(tt.segment); got != tt.expected {
t.Errorf("NeedsQuoting(%q) = %v, want %v", tt.segment, got, tt.expected)
}
})
}
}

func TestQuoteIdentifierSegment(t *testing.T) {
tests := []struct {
name string
segment string
dialect Dialect
expected string
}{
{"BigQuery backtick", "24h", DialectBigQuery, "`24h`"},
{"Spanner backtick", "24h", DialectSpanner, "`24h`"},
{"ClickHouse backtick", "7d", DialectClickHouse, "`7d`"},
{"PostgreSQL double quote", "24h", DialectPostgreSQL, `"24h"`},
{"DuckDB double quote", "10m", DialectDuckDB, `"10m"`},
{"BigQuery escapes embedded backtick", "ab`cd", DialectBigQuery, "`ab``cd`"},
{"PostgreSQL escapes embedded double quote", `ab"cd`, DialectPostgreSQL, `"ab""cd"`},
{"Unspecified uses backtick", "24h", DialectUnspecified, "`24h`"},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := QuoteIdentifierSegment(tt.segment, tt.dialect); got != tt.expected {
t.Errorf("QuoteIdentifierSegment(%q, %v) = %q, want %q", tt.segment, tt.dialect, got, tt.expected)
}
})
}
}

func TestContainsQuoteCharacters(t *testing.T) {
tests := []struct {
segment string
expected bool
}{
{"normal", false},
{"24h", false},
{"_field", false},
{"", false},
{"`quoted`", true},
{`"quoted"`, true},
{"has`tick", true},
{`has"quote`, true},
{"'quoted'", true},
{"has'single", true},
}

for _, tt := range tests {
t.Run(tt.segment, func(t *testing.T) {
if got := ContainsQuoteCharacters(tt.segment); got != tt.expected {
t.Errorf("ContainsQuoteCharacters(%q) = %v, want %v", tt.segment, got, tt.expected)
}
})
}
}
Loading