Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.0.0
3.0.1
29 changes: 29 additions & 0 deletions src/glassflow/etl/models/data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,17 @@

class KafkaDataType(CaseInsensitiveStrEnum):
STRING = "string"
INT = "int"
INT8 = "int8"
INT16 = "int16"
INT32 = "int32"
INT64 = "int64"
UINT = "uint"
UINT8 = "uint8"
UINT16 = "uint16"
UINT32 = "uint32"
UINT64 = "uint64"
FLOAT = "float"
FLOAT32 = "float32"
FLOAT64 = "float64"
BOOL = "bool"
Expand All @@ -19,6 +26,10 @@ class ClickhouseDataType(CaseInsensitiveStrEnum):
INT16 = "Int16"
INT32 = "Int32"
INT64 = "Int64"
UINT8 = "UInt8"
UINT16 = "UInt16"
UINT32 = "UInt32"
UINT64 = "UInt64"
FLOAT32 = "Float32"
FLOAT64 = "Float64"
STRING = "String"
Expand Down Expand Up @@ -62,6 +73,12 @@ class ClickhouseDataType(CaseInsensitiveStrEnum):
ClickhouseDataType.LC_FIXEDSTRING,
ClickhouseDataType.LC_DATETIME,
],
KafkaDataType.INT: [
ClickhouseDataType.INT8,
ClickhouseDataType.INT16,
ClickhouseDataType.INT32,
ClickhouseDataType.INT64,
],
KafkaDataType.INT8: [ClickhouseDataType.INT8, ClickhouseDataType.LC_INT8],
KafkaDataType.INT16: [ClickhouseDataType.INT16, ClickhouseDataType.LC_INT16],
KafkaDataType.INT32: [ClickhouseDataType.INT32, ClickhouseDataType.LC_INT32],
Expand All @@ -72,6 +89,18 @@ class ClickhouseDataType(CaseInsensitiveStrEnum):
ClickhouseDataType.LC_INT64,
ClickhouseDataType.LC_DATETIME,
],
KafkaDataType.UINT: [
ClickhouseDataType.UINT8,
ClickhouseDataType.UINT16,
ClickhouseDataType.UINT32,
ClickhouseDataType.UINT64,
],
KafkaDataType.UINT8: [ClickhouseDataType.UINT8],
KafkaDataType.UINT16: [ClickhouseDataType.UINT16],
KafkaDataType.UINT32: [ClickhouseDataType.UINT32],
KafkaDataType.UINT64: [ClickhouseDataType.UINT64],
KafkaDataType.UINT8: [ClickhouseDataType.UINT8],
KafkaDataType.FLOAT: [ClickhouseDataType.FLOAT64],
KafkaDataType.FLOAT32: [ClickhouseDataType.FLOAT32, ClickhouseDataType.LC_FLOAT32],
KafkaDataType.FLOAT64: [
ClickhouseDataType.FLOAT64,
Expand Down
15 changes: 2 additions & 13 deletions src/glassflow/etl/models/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,9 @@ def validate_deduplication_fields(cls, values):

# Validate id_field_type is a valid type when enabled
id_field_type = values.get("id_field_type")
if id_field_type not in [
KafkaDataType.STRING,
KafkaDataType.INT32,
KafkaDataType.INT64,
]:
if id_field_type not in [KafkaDataType.STRING, KafkaDataType.INT]:
raise ValueError(
"id_field_type must be a string, int32, or int64 when "
"id_field_type must be a string or int when "
"deduplication is enabled"
)

Expand Down Expand Up @@ -121,13 +117,6 @@ def validate_deduplication_id_field(
"the event schema"
)

# Check if the field type matches the deduplication ID field type
if field.type.value != v.id_field_type.value:
raise ValueError(
f"Deduplication ID field type '{v.id_field_type.value}' does not match "
f"schema field type '{field.type.value}' for field '{v.id_field}'"
)

return v


Expand Down
20 changes: 0 additions & 20 deletions tests/test_models/test_topic.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,26 +53,6 @@ def test_topic_config_deduplication_id_field_validation(self):
)
assert "does not exist in the event schema" in str(exc_info.value)

# Test with mismatched field type
with pytest.raises(ValueError) as exc_info:
models.TopicConfig(
name="test-topic",
consumer_group_initial_offset=models.ConsumerGroupOffset.EARLIEST,
schema=models.Schema(
type=models.SchemaType.JSON,
fields=[
models.SchemaField(name="id", type=models.KafkaDataType.INT64),
],
),
deduplication=models.DeduplicationConfig(
enabled=True,
id_field="id",
id_field_type=models.KafkaDataType.STRING,
time_window="1h",
),
)
assert "does not match schema field type" in str(exc_info.value)

# Test with disabled deduplication (should not validate)
config = models.TopicConfig(
name="test-topic",
Expand Down