From 99cd2f8ce3f9f936084a209c9f1d41f1b8be67bc Mon Sep 17 00:00:00 2001 From: Javier de la Torre Date: Fri, 20 Mar 2026 05:40:02 +0100 Subject: [PATCH 1/2] feat: propagate GEOMETRY CRS through PostGIS in both directions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DuckDB 1.5 introduced CRS metadata on the GEOMETRY type (e.g., GEOMETRY('EPSG:4326')). This change makes that CRS survive a full round trip through PostgreSQL/PostGIS via postgres_scanner. Write side (DuckDB → PostGIS): - WriteGeometry detects CRS on the LogicalType and writes EWKB instead of plain WKB so PostGIS receives the SRID: WKB: [byte_order:1] [type:4] [payload...] EWKB: [byte_order:1] [type|0x20000000:4] [srid:4] [payload...] Falls back to plain WKB when no CRS is set, preserving the previous behavior. (Uses GeoType::HasCRS / GeoType::GetCRS — the earlier draft mistakenly referenced LogicalType::GetCRS, which does not exist on DuckDB 1.5.) Read side (PostGIS → DuckDB): - TypeToLogicalType decodes the SRID from PostGIS's column-level typmod for `geometry(TYPE, SRID)` columns and returns LogicalType::GEOMETRY("EPSG:N") instead of plain GEOMETRY(). The typmod layout is: bits 0 : has-M bits 1 : has-Z bits 2-7 : geometry type code (POINT=1, LINESTRING=2, ...) bits 8-29 : SRID Untyped `geometry` columns carry typmod -1 and fall back to plain GEOMETRY (no CRS), matching the previous behavior. Before: DuckDB GEOMETRY('EPSG:4326') → PostGIS geometry(SRID=0) PostGIS geometry(POINT,4326) → DuckDB GEOMETRY (no CRS) After: DuckDB GEOMETRY('EPSG:4326') → PostGIS geometry(SRID=4326) PostGIS geometry(POINT,4326) → DuckDB GEOMETRY('EPSG:4326') Co-Authored-By: Claude Opus 4.7 (1M context) --- src/include/postgres_binary_writer.hpp | 41 +++++++++++++++++++++++++- src/postgres_utils.cpp | 14 +++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/src/include/postgres_binary_writer.hpp b/src/include/postgres_binary_writer.hpp index fef2fafe0..6f423930a 100644 --- a/src/include/postgres_binary_writer.hpp +++ b/src/include/postgres_binary_writer.hpp @@ -10,6 +10,7 @@ #include "duckdb.hpp" #include "duckdb/common/types/interval.hpp" +#include "duckdb/common/types/geometry_crs.hpp" #include "duckdb/common/serializer/memory_stream.hpp" #include "postgres_conversion.hpp" @@ -207,6 +208,44 @@ class PostgresBinaryWriter { stream.WriteData(const_data_ptr_cast(str_data), str_size); } + //! Write GEOMETRY to PostGIS. If the type carries CRS metadata, writes EWKB + //! (WKB with SRID) so PostGIS receives the correct SRID. + void WriteGeometry(string_t value, const LogicalType &type) { + if (!GeoType::HasCRS(type)) { + WriteRawBlob(value); + return; + } + auto &crs = GeoType::GetCRS(type); + + // Extract SRID from CRS identifier (e.g., "EPSG:4326" → 4326) + auto &id = crs.GetIdentifier(); + auto colon = id.find(':'); + if (colon == string::npos) { + WriteRawBlob(value); + return; + } + int32_t srid; + try { + srid = std::stoi(id.substr(colon + 1)); + } catch (...) { + WriteRawBlob(value); + return; + } + + // Write EWKB: WKB with SRID flag set on the type field + 4-byte SRID inserted. + // [byte_order:1] [type|0x20000000:4] [srid:4] [coordinates...] + auto wkb_size = value.GetSize(); + auto wkb_data = const_data_ptr_cast(value.GetData()); + WriteRawInteger(NumericCast(wkb_size + 4)); + stream.WriteData(wkb_data, 1); // byte order + uint32_t wkb_type; + memcpy(&wkb_type, wkb_data + 1, 4); + wkb_type |= 0x20000000; + stream.WriteData(const_data_ptr_cast(reinterpret_cast(&wkb_type)), 4); // type + SRID flag + stream.WriteData(const_data_ptr_cast(reinterpret_cast(&srid)), 4); // SRID (LE, matching WKB) + stream.WriteData(wkb_data + 5, wkb_size - 5); // rest of payload + } + void WriteVarchar(string_t value) { auto str_size = value.GetSize(); auto str_data = value.GetData(); @@ -354,7 +393,7 @@ class PostgresBinaryWriter { } case LogicalTypeId::GEOMETRY: { auto data = FlatVector::GetData(col)[r]; - WriteRawBlob(data); + WriteGeometry(data, type); break; } case LogicalTypeId::ENUM: { diff --git a/src/postgres_utils.cpp b/src/postgres_utils.cpp index 646696b62..afd8fac1e 100644 --- a/src/postgres_utils.cpp +++ b/src/postgres_utils.cpp @@ -147,6 +147,20 @@ LogicalType PostgresUtils::TypeToLogicalType(optional_ptr t postgres_type.info = PostgresTypeAnnotation::JSONB; return LogicalType::VARCHAR; } else if (pgtypename == "geometry") { + // PostGIS encodes the column-level SRID in the type modifier of + // `geometry(TYPE, SRID)` columns. The bit layout is: + // bits 0 : has-M + // bits 1 : has-Z + // bits 2-7 : geometry type code (POINT=1, LINESTRING=2, ...) + // bits 8-29 : SRID + // Untyped `geometry` columns carry typmod -1 and we fall back to + // plain GEOMETRY (no CRS), matching the previous behavior. + if (type_info.type_modifier > 0) { + int32_t srid = static_cast((static_cast(type_info.type_modifier) & 0x0FFFFF00u) >> 8); + if (srid > 0) { + return LogicalType::GEOMETRY("EPSG:" + std::to_string(srid)); + } + } return LogicalType::GEOMETRY(); } else if (pgtypename == "date") { return LogicalType::DATE; From 640b9fac8fc241bf5851450557c80de44258e30d Mon Sep 17 00:00:00 2001 From: Javier de la Torre Date: Sat, 25 Apr 2026 23:28:24 +0200 Subject: [PATCH 2/2] fix(postgres_utils): preserve GEOMETRY CRS through CREATE TABLE AS The previous commit added the EWKB write path and typmod read decode, but on CREATE TABLE AS the CRS was being stripped before reaching either side, so the resulting PostGIS column landed as plain `geometry` (typmod -1) and the read-back lost the SRID. Two strip points in this file: 1. ToPostgresType (called by AddCastToPostgresTypes during CTAS planning) returned LogicalType::GEOMETRY() unconditionally, dropping CRS before the projection that feeds the new physical plan. Now returns the input unchanged for GEOMETRY so CRS survives the cast/projection. 2. TypeToString returned plain "GEOMETRY" for any GEOMETRY type and would have been short-circuited anyway by the HasAlias() early return (GEOMETRY('EPSG:N') carries an alias of "GEOMETRY"). The new branch runs *before* HasAlias, detects CRS via GeoType::HasCRS, and emits `geometry(Geometry, N)` so PostGIS records the SRID in pg_attribute.atttypmod and the existing read-side typmod decode picks it up. Verified end-to-end through the streamability harness in duckdb-warehouse-transfer: postgres -> oracle and postgres -> snowflake now round-trip GEOMETRY('EPSG:3857') with byte-perfect WKB and CRS preserved at the destination, with no plan-level materialization. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/postgres_utils.cpp | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/postgres_utils.cpp b/src/postgres_utils.cpp index afd8fac1e..671fca1e9 100644 --- a/src/postgres_utils.cpp +++ b/src/postgres_utils.cpp @@ -2,6 +2,7 @@ #include "storage/postgres_schema_entry.hpp" #include "storage/postgres_transaction.hpp" #include "postgres_type_oids.hpp" +#include "duckdb/common/types/geometry_crs.hpp" namespace duckdb { @@ -20,6 +21,26 @@ PGconn *PostgresUtils::PGConnect(const string &dsn, const string &attach_path) { } string PostgresUtils::TypeToString(const LogicalType &input) { + // Handle GEOMETRY('EPSG:N') first: the CRS-bearing GEOMETRY type can have + // an alias of "GEOMETRY", which would otherwise short-circuit the alias + // branch below and emit an untyped `geometry` column (typmod -1). When + // CRS is present, emit a typmod-bearing PostGIS column type so the SRID + // survives a CREATE TABLE round trip; otherwise fall through. + if (input.id() == LogicalTypeId::GEOMETRY && GeoType::HasCRS(input)) { + auto &crs = GeoType::GetCRS(input); + auto &id = crs.GetIdentifier(); + auto colon = id.find(':'); + if (colon != string::npos) { + try { + int32_t srid = std::stoi(id.substr(colon + 1)); + if (srid > 0) { + return "geometry(Geometry, " + std::to_string(srid) + ")"; + } + } catch (...) { + // fall through to plain GEOMETRY + } + } + } if (input.HasAlias()) { return input.GetAlias(); } @@ -275,7 +296,11 @@ LogicalType PostgresUtils::ToPostgresType(const LogicalType &input) { case LogicalTypeId::HUGEINT: return LogicalType::DOUBLE; case LogicalTypeId::GEOMETRY: - return LogicalType::GEOMETRY(); + // Preserve CRS metadata so downstream TypeToString can emit a typmod- + // bearing PostGIS column type and the SRID survives a CREATE TABLE + // AS round trip. Returning a CRS-less GEOMETRY here strips the CRS + // before it ever reaches the schema-emission path. + return input; default: return LogicalType::VARCHAR; }