Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions be/src/core/data_type_serde/data_type_timestamptz_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,4 +250,21 @@ std::string DataTypeTimeStampTzSerDe::to_olap_string(const Field& field) const {
return CastToString::from_timestamptz(field.get<TYPE_TIMESTAMPTZ>(), 6);
}

void DataTypeTimeStampTzSerDe::write_one_cell_to_binary(const IColumn& src_column,
ColumnString::Chars& chars,
int64_t row_num) const {
const auto type = static_cast<uint8_t>(FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ);
const auto& data_ref = assert_cast<const ColumnTimeStampTz&>(src_column).get_data_at(row_num);
const auto sc = static_cast<uint8_t>(_scale);

const size_t old_size = chars.size();
const size_t new_size = old_size + sizeof(uint8_t) + sizeof(uint8_t) + data_ref.size;
chars.resize(new_size);
memcpy(chars.data() + old_size, reinterpret_cast<const char*>(&type), sizeof(uint8_t));
memcpy(chars.data() + old_size + sizeof(uint8_t), reinterpret_cast<const char*>(&sc),
sizeof(uint8_t));
memcpy(chars.data() + old_size + sizeof(uint8_t) + sizeof(uint8_t), data_ref.data,
data_ref.size);
}

} // namespace doris
4 changes: 4 additions & 0 deletions be/src/core/data_type_serde/data_type_timestamptz_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ class DataTypeTimeStampTzSerDe : public DataTypeNumberSerDe<PrimitiveType::TYPE_
int64_t start, int64_t end, Arena& arena,
const FormatOptions& options) const override;

// Override needed: paired reader skips a scale byte; the inherited number-serde writer omits it.
void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars,
int64_t row_num) const override;

std::string to_olap_string(const Field& field) const override;

protected:
Expand Down
83 changes: 83 additions & 0 deletions be/test/core/data_type_serde/data_type_serde_timestamptz_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,25 @@

#include "core/assert_cast.h"
#include "core/column/column.h"
#include "core/column/column_nullable.h"
#include "core/column/column_string.h"
#include "core/data_type/common_data_type_serder_test.h"
#include "core/data_type/common_data_type_test.h"
#include "core/data_type/data_type.h"
#include "core/data_type_serde/data_type_timestamptz_serde.h"
#include "core/field.h"
#include "testutil/test_util.h"
#include "util/slice.h"
#include "util/string_util.h"
namespace doris {
static std::string test_data_dir;

static auto serde_tz_0 = std::make_shared<DataTypeTimeStampTzSerDe>(0);
static auto serde_tz_3 = std::make_shared<DataTypeTimeStampTzSerDe>(3);
static auto serde_tz_6 = std::make_shared<DataTypeTimeStampTzSerDe>(6);

static ColumnTimeStampTz::MutablePtr column_tz_0;
static ColumnTimeStampTz::MutablePtr column_tz_3;
static ColumnTimeStampTz::MutablePtr column_tz_6;

class DataTypeTimeStampTzSerDeTest : public ::testing::Test {
Expand All @@ -51,6 +56,7 @@ class DataTypeTimeStampTzSerDeTest : public ::testing::Test {
test_data_dir = root_dir + "/be/test/data/vec/columns";

column_tz_0 = ColumnTimeStampTz::create();
column_tz_3 = ColumnTimeStampTz::create();
column_tz_6 = ColumnTimeStampTz::create();

load_columns_data();
Expand All @@ -70,6 +76,7 @@ class DataTypeTimeStampTzSerDeTest : public ::testing::Test {
EXPECT_TRUE(!column->empty());
};
test_func(column_tz_0->get_ptr(), serde_tz_0, "TIMESTAMPTZ(0).csv");
test_func(column_tz_3->get_ptr(), serde_tz_3, "TIMESTAMPTZ(3).csv");
test_func(column_tz_6->get_ptr(), serde_tz_6, "TIMESTAMPTZ(6).csv");

std::cout << "loading test dataset done" << std::endl;
Expand Down Expand Up @@ -189,6 +196,82 @@ TEST_F(DataTypeTimeStampTzSerDeTest, serdes) {
}
};
test_func(*serde_tz_0, column_tz_0);
test_func(*serde_tz_3, column_tz_3);
test_func(*serde_tz_6, column_tz_6);
}

// Roundtrip via write_one_cell_to_binary + DataTypeSerDe::deserialize_binary_to_*
// (the variant sparse-column path). Regression for DORIS-25915: the writer
// previously inherited DataTypeNumberSerDe's default, which omitted the scale
// byte, while the reader expected a scale byte — a 1-byte layout mismatch that
// silently corrupted timestamptz values stored in variant sparse paths.
TEST_F(DataTypeTimeStampTzSerDeTest, binary_roundtrip) {
auto test_func = [&](const DataTypeTimeStampTzSerDe& serde,
const ColumnTimeStampTz::MutablePtr& source_column, uint32_t scale) {
const size_t row_count = source_column->size();
ASSERT_GT(row_count, 0);

// Per-row layout: [type:1][scale:1][value:8] = 10 bytes.
constexpr size_t kBytesPerRow =
sizeof(uint8_t) + sizeof(uint8_t) + sizeof(TimestampTzValue::underlying_value);

auto chars_col = ColumnString::create();
ColumnString::Chars& chars = chars_col->get_chars();
std::vector<size_t> row_offsets;
row_offsets.reserve(row_count + 1);
row_offsets.push_back(0);
for (size_t i = 0; i < row_count; ++i) {
serde.write_one_cell_to_binary(*source_column, chars, i);
row_offsets.push_back(chars.size());
}

// Layout: each row contributes exactly 10 bytes; first byte is the
// TIMESTAMPTZ FieldType; second is the scale.
ASSERT_EQ(chars.size(), row_count * kBytesPerRow);
for (size_t i = 0; i < row_count; ++i) {
ASSERT_EQ(row_offsets[i + 1] - row_offsets[i], kBytesPerRow);
EXPECT_EQ(chars[row_offsets[i]],
static_cast<uint8_t>(FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ));
EXPECT_EQ(chars[row_offsets[i] + 1], static_cast<uint8_t>(scale));
}

// Roundtrip via deserialize_binary_to_column (column path).
{
auto inner_col = ColumnTimeStampTz::create();
auto null_map = ColumnUInt8::create();
auto nullable_col = ColumnNullable::create(std::move(inner_col), std::move(null_map));
for (size_t i = 0; i < row_count; ++i) {
const uint8_t* start = chars.data() + row_offsets[i];
const uint8_t* end =
DataTypeSerDe::deserialize_binary_to_column(start, *nullable_col);
EXPECT_EQ(end - start, kBytesPerRow);
}
const auto& read_col =
assert_cast<const ColumnTimeStampTz&>(nullable_col->get_nested_column());
for (size_t i = 0; i < row_count; ++i) {
EXPECT_EQ(read_col.get_element(i), source_column->get_element(i))
<< "row " << i << " mismatched after binary roundtrip";
}
}

// Roundtrip via deserialize_binary_to_field (field path); scale
// should propagate into FieldInfo.
{
for (size_t i = 0; i < row_count; ++i) {
const uint8_t* start = chars.data() + row_offsets[i];
Field f;
FieldInfo info;
const uint8_t* end = DataTypeSerDe::deserialize_binary_to_field(start, f, info);
EXPECT_EQ(end - start, kBytesPerRow);
EXPECT_EQ(info.scale, static_cast<int>(scale));
EXPECT_EQ(f.get<TYPE_TIMESTAMPTZ>(), source_column->get_element(i))
<< "row " << i << " mismatched after field roundtrip";
}
}
};

test_func(*serde_tz_0, column_tz_0, 0);
test_func(*serde_tz_3, column_tz_3, 3);
test_func(*serde_tz_6, column_tz_6, 6);
}
} // namespace doris
16 changes: 16 additions & 0 deletions be/test/data/vec/columns/TIMESTAMPTZ(3).csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
NULL
0000-01-01 00:00:00 +00:00
0000-01-01 00:00:00.000 +00:00
0000-01-01 00:00:00.001 +00:00
0000-01-01 00:00:00.123 +00:00
0000-01-01 00:00:00.999 +00:00
2023-08-08 20:20:20 +00:00
2023-08-08 20:20:20.000 +00:00
2023-08-08 20:20:20.001 +00:00
2023-08-08 20:20:20.123 +00:00
2023-08-08 20:20:20.999 +00:00
9999-12-31 23:59:59 +08:00
9999-12-31 23:59:59.000 +08:00
9999-12-31 23:59:59.001 +08:00
9999-12-31 23:59:59.123 +08:00
9999-12-31 23:59:59.999 +08:00
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select_all --
1 2003-12-21 10:29:00+08:00 2003-12-21 10:29:00+08:00 2003-12-21 10:29:00.000+08:00 2003-12-21 15:29:00.000+08:00 2003-12-21 15:29:00.000000+08:00 2003-12-21 07:29:00.000000+08:00

-- !select_cast_ts0 --
1 2003-12-21 10:29:00+08:00 2003-12-21 10:29:00.000+08:00 2003-12-21 15:29:00.000000+08:00

Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

// Reproduces DORIS-25915:
// When VARIANT has typed paths that exceed variant_max_subcolumns_count and
// variant_enable_typed_paths_to_sparse=true, timestamptz values that fall to
// the sparse path are read back as only the timezone suffix (e.g. "+00:00")
// instead of the full timestamp.
suite("test_variant_timestamptz_sparse", "p0"){
sql " set time_zone = '+08:00' "

def tableName = "test_variant_timestamptz_sparse_repro"
sql "DROP TABLE IF EXISTS ${tableName}"

sql """
CREATE TABLE ${tableName} (
pk int,
var VARIANT<
'c1':bigint,
'c2':bigint,
'd1':date,
'd2':date,
'dt0':datetime(0),
'dt0n':datetime(0),
'dt3':datetime(3),
'dt3n':datetime(3),
'dt6':datetime(6),
'dt6n':datetime(6),
'ts0':timestamptz(0),
'ts0n':timestamptz(0),
'ts3':timestamptz(3),
'ts3n':timestamptz(3),
'ts6':timestamptz(6),
'ts6n':timestamptz(6),
'v1':string,
'v2':string,
properties("variant_max_subcolumns_count" = "2","variant_enable_typed_paths_to_sparse" = "true")
> NULL,
INDEX c1 (`var`) USING INVERTED PROPERTIES("field_pattern" = "c1"),
INDEX c2 (`var`) USING INVERTED PROPERTIES("field_pattern" = "c2"),
INDEX d1 (`var`) USING INVERTED PROPERTIES("field_pattern" = "d1"),
INDEX d2 (`var`) USING INVERTED PROPERTIES("field_pattern" = "d2"),
INDEX dt0 (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt0"),
INDEX dt0n (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt0n"),
INDEX dt3 (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt3"),
INDEX dt3n (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt3n"),
INDEX dt6 (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt6"),
INDEX dt6n (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt6n"),
INDEX v1 (`var`) USING INVERTED PROPERTIES("field_pattern" = "v1"),
INDEX v2 (`var`) USING INVERTED PROPERTIES("field_pattern" = "v2")
) ENGINE=OLAP
DUPLICATE KEY(pk)
DISTRIBUTED BY HASH(pk) BUCKETS 1
PROPERTIES("replication_num" = "1");
"""

sql """
INSERT INTO ${tableName} VALUES
(1, '{"c1":1699516324,"c2":1690122421,"d1":"2024-02-25","d2":"2024-11-27","dt0":"2002-12-25 12:42:19","dt0n":"2015-02-05 02:36:13","dt3":"2015-02-22 02:09:01","dt3n":"2015-09-16 02:55:07","dt6":"2001-09-19 09:53:52","dt6n":"2003-12-21 02:29:00","ts0":"2003-12-21 02:29:00 +00:00","ts0n":"2003-12-21 02:29:00 +00:00","ts3":"2003-12-21 02:29:00 +00:00","ts3n":"2003-12-21 02:29:00 -05:00","ts6":"2003-12-21 02:29:00 -05:00","ts6n":"2003-12-21 02:29:00 +03:00","v1":"2024-12-10 10:16:19","v2":"2023-12-31 23:59:59"}');
"""

sql "SYNC"

qt_select_all """
SELECT
pk,
CAST(var['ts0'] AS string) AS str_ts0,
CAST(var['ts0n'] AS string) AS str_ts0n,
CAST(var['ts3'] AS string) AS str_ts3,
CAST(var['ts3n'] AS string) AS str_ts3n,
CAST(var['ts6'] AS string) AS str_ts6,
CAST(var['ts6n'] AS string) AS str_ts6n
FROM ${tableName}
ORDER BY pk;
"""

qt_select_cast_ts0 """
SELECT
pk,
CAST(var['ts0'] AS timestamptz(0)) AS cast_ts0,
CAST(var['ts3'] AS timestamptz(3)) AS cast_ts3,
CAST(var['ts6'] AS timestamptz(6)) AS cast_ts6
FROM ${tableName}
ORDER BY pk;
"""

// Explicit assertion that reproduces the bug.
// When the bug is present, str_ts0 == "+00:00" (only the timezone), and this
// assertion fails. When the fix lands, the value contains the full timestamp
// "2003-12-21 ..." and the assertion passes.
def rows = sql """
SELECT
CAST(var['ts0'] AS string) AS str_ts0,
CAST(var['ts3n'] AS string) AS str_ts3n,
CAST(var['ts6'] AS string) AS str_ts6
FROM ${tableName}
WHERE pk = 1;
"""
logger.info("DORIS-25915 repro rows: ${rows}")

def str_ts0 = rows[0][0].toString()
def str_ts3n = rows[0][1].toString()
def str_ts6 = rows[0][2].toString()

assertTrue(str_ts0.contains("2003-12-21"),
"DORIS-25915 bug present: var['ts0'] in sparse path returned '${str_ts0}', expected to contain '2003-12-21'")
assertTrue(str_ts3n.contains("2003-12-21"),
"DORIS-25915 bug present: var['ts3n'] in sparse path returned '${str_ts3n}', expected to contain '2003-12-21'")
assertTrue(str_ts6.contains("2003-12-21"),
"DORIS-25915 bug present: var['ts6'] in sparse path returned '${str_ts6}', expected to contain '2003-12-21'")

sql "DROP TABLE IF EXISTS ${tableName}"
}
Loading