diff --git a/pkg/ddl/create_table.go b/pkg/ddl/create_table.go index 431264809a438..0a3d691493b0b 100644 --- a/pkg/ddl/create_table.go +++ b/pkg/ddl/create_table.go @@ -786,6 +786,14 @@ func BuildSessionTemporaryTableInfo(ctx *metabuild.Context, store kv.Storage, is // BuildTableInfoWithStmt builds model.TableInfo from a SQL statement without validity check func BuildTableInfoWithStmt(ctx *metabuild.Context, s *ast.CreateTableStmt, dbCharset, dbCollate string, placementPolicyRef *model.PolicyRefInfo) (*model.TableInfo, error) { + // Apply tidb_enable_descending_index gate up front, before any constraint + // processing. See ApplyDescGateToIndexParts for the rationale: baking the + // decision in at submission time prevents a `SET GLOBAL` between statement + // submission and DDL owner replay from silently flipping the persisted + // schema. + for _, c := range s.Constraints { + ApplyDescGateToIndexParts(c.Keys) + } colDefs := s.Cols tableCharset, tableCollate, err := GetCharsetAndCollateInTableOption(0, s.Options, ctx.GetDefaultCollationForUTF8MB4()) if err != nil { @@ -1337,6 +1345,18 @@ func BuildTableInfo( isSingleIntPK := isSingleIntPKFromTableInfo(constr, tbInfo) if ShouldBuildClusteredIndex(ctx.GetClusteredIndexDefMode(), constr.Option, isSingleIntPK) { + // Reject DESC on the columns of a clustered PRIMARY KEY. + // For PKIsHandle (single-int PK) the column becomes the + // row's int handle directly and BuildIndexInfo is never + // invoked, so this guard catches that case. For + // IsCommonHandle the same check fires again inside + // BuildIndexInfo for defence-in-depth. See pingcap/tidb#2519. + for _, key := range constr.Keys { + if key.Desc { + return nil, errors.Errorf( + "DESC is not supported on the columns of a clustered PRIMARY KEY; either drop the DESC keyword or declare the primary key as NONCLUSTERED") + } + } if isSingleIntPK { tbInfo.PKIsHandle = true } else { diff --git a/pkg/ddl/db_integration_test.go b/pkg/ddl/db_integration_test.go index 73eaf6b283dc5..0dd96b9f56beb 100644 --- a/pkg/ddl/db_integration_test.go +++ b/pkg/ddl/db_integration_test.go @@ -3124,6 +3124,406 @@ func TestIssue52680(t *testing.T) { tk.MustQuery("select * from issue52680").Check(testkit.Rows("1", "2", "3")) } +func TestDescendingIndexMetadataPlumbing(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + + // With the feature gate off (the default), DESC is silently ignored at + // DDL time — preserving historical MySQL 5.7 behavior. + tk.MustExec("set @@global.tidb_enable_descending_index = off") + tk.MustExec("create table t_desc_off (a int, b int, index idx_off (a, b desc))") + is := domain.GetDomain(tk.Session()).InfoSchema() + tbl, err := is.TableByName(context.Background(), ast.NewCIStr("test"), ast.NewCIStr("t_desc_off")) + require.NoError(t, err) + idx := tbl.Meta().Indices[0] + require.False(t, idx.Columns[0].Desc) + require.False(t, idx.Columns[1].Desc, "DESC must be dropped when the feature gate is off") + require.Equal(t, uint8(0), idx.Version) + tk.MustQuery("select collation from information_schema.statistics where index_name='idx_off' order by seq_in_index"). + Check(testkit.Rows("A", "A")) + + // With the feature gate on, DESC is persisted and reflected in SHOW CREATE + // TABLE and information_schema.STATISTICS. IndexInfo.Version bumps to 1 so + // an older TiDB reading this schema will refuse to serve queries. + tk.MustExec("set @@global.tidb_enable_descending_index = on") + defer tk.MustExec("set @@global.tidb_enable_descending_index = default") + tk.MustExec("create table t_desc_on (a int, b int, index idx_on (a, b desc))") + is = domain.GetDomain(tk.Session()).InfoSchema() + tbl, err = is.TableByName(context.Background(), ast.NewCIStr("test"), ast.NewCIStr("t_desc_on")) + require.NoError(t, err) + idx = tbl.Meta().Indices[0] + require.False(t, idx.Columns[0].Desc) + require.True(t, idx.Columns[1].Desc) + require.Equal(t, uint8(1), idx.Version) + + tk.MustQuery("select collation from information_schema.statistics where index_name='idx_on' order by seq_in_index"). + Check(testkit.Rows("A", "D")) + + showCreate := tk.MustQuery("show create table t_desc_on").Rows()[0][1].(string) + require.Contains(t, showCreate, "`b` DESC", "SHOW CREATE TABLE must preserve DESC in the index definition") + require.NotContains(t, showCreate, "`a` DESC", "ascending columns must render without an explicit direction") +} + +// explainHas reports whether any line of an EXPLAIN rowset contains the given substring. +func explainHasString(rows [][]any, substr string) bool { + for _, row := range rows { + for _, col := range row { + if s, ok := col.(string); ok && strings.Contains(s, substr) { + return true + } + } + } + return false +} + +// TestDescendingIndexScanDirection verifies that the planner picks the right +// scan direction once an index column is stored descending. It does NOT verify +// result correctness — that depends on the physical-encoding work in a later +// phase. The assertion here is purely on plan shape. +func TestDescendingIndexScanDirection(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("set @@global.tidb_enable_descending_index = on") + defer tk.MustExec("set @@global.tidb_enable_descending_index = default") + + // Descending-stored single-column index. A forward scan on this index + // (physically) yields rows in the declared order; the planner should + // therefore reach the required direction by flipping scan.Desc rather + // than mirroring the ORDER BY direction unconditionally. + tk.MustExec("drop table if exists t_desc") + tk.MustExec("create table t_desc (a int, b int, index idx_b_desc (b desc))") + + // ORDER BY b DESC on a DESC-stored index: a FORWARD scan satisfies the + // property, so the IndexRangeScan must NOT be marked as a reverse scan. + rows := tk.MustQuery("explain format='brief' select b from t_desc use index(idx_b_desc) order by b desc").Rows() + require.True(t, explainHasString(rows, "keep order:true"), + "DESC idx + ORDER BY DESC: expected keep order:true") + require.False(t, explainHasString(rows, "keep order:true, desc"), + "DESC idx + ORDER BY DESC: expected forward scan (no ', desc'), got reverse") + + // ORDER BY b ASC on a DESC-stored index: a REVERSE scan satisfies the + // property. The scan must be marked as desc. + rows = tk.MustQuery("explain format='brief' select b from t_desc use index(idx_b_desc) order by b asc").Rows() + require.True(t, explainHasString(rows, "keep order:true, desc"), + "DESC idx + ORDER BY ASC: expected reverse scan (', desc')") + + // Mixed-direction composite (a ASC, b DESC) — this is the MySQL 8.0 + // flagship use case. Forward scan satisfies "ORDER BY a ASC, b DESC" + // directly; reverse scan satisfies "ORDER BY a DESC, b ASC"; uniform + // "ORDER BY a, b" is unsatisfiable by either direction and must fall + // back to a Sort. + tk.MustExec("drop table if exists t_mixed") + tk.MustExec("create table t_mixed (a int, b int, index idx_mixed (a, b desc))") + + // Matching the index direction-for-direction: forward scan, no Sort. + rows = tk.MustQuery("explain format='brief' select a, b from t_mixed use index(idx_mixed) order by a asc, b desc").Rows() + require.False(t, explainHasString(rows, "Sort"), + "INDEX(a, b DESC) ORDER BY a ASC, b DESC must use the index without a Sort") + require.True(t, explainHasString(rows, "keep order:true"), + "forward scan should keep order") + require.False(t, explainHasString(rows, "keep order:true, desc"), + "forward scan must not be marked desc") + + // Inverted direction-for-direction: reverse scan, still no Sort. + rows = tk.MustQuery("explain format='brief' select a, b from t_mixed use index(idx_mixed) order by a desc, b asc").Rows() + require.False(t, explainHasString(rows, "Sort"), + "INDEX(a, b DESC) ORDER BY a DESC, b ASC must use the index in reverse without a Sort") + require.True(t, explainHasString(rows, "keep order:true, desc"), + "reverse scan should be marked desc") + + // Mixed mismatch: forward gives (a ASC, b DESC), reverse gives (a DESC, + // b ASC); neither matches "ORDER BY a, b" uniformly ASC, so a Sort is + // required. + rows = tk.MustQuery("explain format='brief' select a, b from t_mixed use index(idx_mixed) order by a, b").Rows() + require.True(t, explainHasString(rows, "Sort"), + "INDEX(a, b DESC) ORDER BY a ASC, b ASC is unsatisfiable; expected a Sort") + + // With the feature gate off, DESC is dropped at DDL time, so the same + // index behaves like an ascending one — ORDER BY DESC should drive a + // reverse scan, preserving historical behavior. + tk.MustExec("set @@global.tidb_enable_descending_index = off") + tk.MustExec("drop table if exists t_off") + tk.MustExec("create table t_off (a int, b int, index idx_off (b desc))") + rows = tk.MustQuery("explain format='brief' select b from t_off use index(idx_off) order by b desc").Rows() + require.True(t, explainHasString(rows, "keep order:true, desc"), + "feature gate off: DESC must be dropped and reverse scan must remain the path for ORDER BY DESC") +} + +// TestDescendingIndexSysvarIsCreateTimeOnly nails down the gate's semantic: +// tidb_enable_descending_index controls whether *new* CREATE INDEX statements +// persist DESC, but turning it off later must not invalidate or rewrite any +// existing DESC index. The persisted IndexColumn.Desc + IndexInfo.Version +// remain in metadata, the planner keeps using the index, and INSERT/SELECT +// continue to round-trip correctly. +func TestDescendingIndexSysvarIsCreateTimeOnly(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("set @@global.tidb_enable_descending_index = on") + defer tk.MustExec("set @@global.tidb_enable_descending_index = default") + tk2 := testkit.NewTestKit(t, store) + tk2.MustExec("use test") + + // Create the DESC index while the gate is ON. + tk2.MustExec("drop table if exists t_sysvar_flip") + tk2.MustExec("create table t_sysvar_flip (a int, b int, index idx_b (b desc))") + tk2.MustExec("insert into t_sysvar_flip values (1, 5), (2, 20), (3, 12)") + + // Flip the gate OFF in a fresh session and reload the schema. + tk.MustExec("set @@global.tidb_enable_descending_index = off") + tk3 := testkit.NewTestKit(t, store) + tk3.MustExec("use test") + + // Existing DESC index metadata must survive the flip. + is := domain.GetDomain(tk3.Session()).InfoSchema() + tbl, err := is.TableByName(context.Background(), ast.NewCIStr("test"), ast.NewCIStr("t_sysvar_flip")) + require.NoError(t, err) + idx := tbl.Meta().Indices[0] + require.True(t, idx.Columns[0].Desc, "existing DESC index must remain DESC after sysvar flip") + require.Equal(t, uint8(1), idx.Version) + + // Reads against the DESC index continue to work. + tk3.MustQuery("select b from t_sysvar_flip use index(idx_b) order by b desc"). + Check(testkit.Rows("20", "12", "5")) + + // Inserts continue to be encoded correctly (the table's existing index + // has Desc=true, so GenIndexKey will go through the DESC path even + // though the gate is off). + tk3.MustExec("insert into t_sysvar_flip values (4, 30)") + tk3.MustQuery("select b from t_sysvar_flip use index(idx_b) order by b desc"). + Check(testkit.Rows("30", "20", "12", "5")) + + // A *new* index built while the gate is off must drop DESC silently + // (preserving MySQL 5.7 compatibility for migration scripts). + tk3.MustExec("alter table t_sysvar_flip add index idx_a (a desc)") + is = domain.GetDomain(tk3.Session()).InfoSchema() + tbl, err = is.TableByName(context.Background(), ast.NewCIStr("test"), ast.NewCIStr("t_sysvar_flip")) + require.NoError(t, err) + var newIdx *model.IndexInfo + for _, idx := range tbl.Meta().Indices { + if idx.Name.L == "idx_a" { + newIdx = idx + break + } + } + // Assert presence outside the loop — without this, an upstream + // regression that fails to create idx_a (or renames it) would make + // the assertions vacuously skip and the test would falsely pass. + require.NotNil(t, newIdx, "expected idx_a to be present after ALTER TABLE") + require.False(t, newIdx.Columns[0].Desc, "new index built with gate off must drop DESC") + require.Equal(t, uint8(0), newIdx.Version, "new ASC-only index must stay at Version=0") +} + +// TestDescOnClusteredPrimaryKeyIsRejected verifies the DDL guard that +// refuses DESC on the columns of a clustered PRIMARY KEY. The PK columns +// determine the row's physical key bytes; allowing DESC there would +// require coordinated changes across many code paths that currently +// assume the row key is ordered ascending. The DDL guard protects us +// until that work is done. +func TestDescOnClusteredPrimaryKeyIsRejected(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("set @@global.tidb_enable_descending_index = on") + defer tk.MustExec("set @@global.tidb_enable_descending_index = default") + tk2 := testkit.NewTestKit(t, store) + tk2.MustExec("use test") + tk2.MustExec("set @@tidb_enable_clustered_index = ON") + + // Sanity: sysvar is actually on for this session before exercising DDL. + tk2.MustQuery("select @@global.tidb_enable_descending_index").Check(testkit.Rows("1")) + + // Multi-column PRIMARY KEY ... CLUSTERED with DESC must error. + tk2.MustExec("drop table if exists t_clustered_desc") + err := tk2.ExecToErr("create table t_clustered_desc (a int, b int, c int, primary key (a, b desc) clustered)") + require.Error(t, err) + require.Contains(t, err.Error(), "DESC is not supported on the columns of a clustered PRIMARY KEY") + + // Single-int PRIMARY KEY with DESC also rejected (PKIsHandle path). + err = tk2.ExecToErr("create table t_pk_handle_desc (a int, b int, primary key (a desc) clustered)") + require.Error(t, err) + require.Contains(t, err.Error(), "DESC is not supported on the columns of a clustered PRIMARY KEY") + + // NONCLUSTERED primary key with DESC is fine — at this layer it's + // just a unique secondary index. + tk2.MustExec("drop table if exists t_nonclustered_desc") + tk2.MustExec("create table t_nonclustered_desc (a int, b int, primary key (a desc) nonclustered)") + + // Regular DESC index on a clustered table is fine — only the PK + // columns themselves are restricted. + tk2.MustExec("drop table if exists t_clustered_secondary_desc") + tk2.MustExec("create table t_clustered_secondary_desc (a varchar(10), b int, primary key (a) clustered, index idx_b (b desc))") +} + +// TestExpressionIndexDescPersists verifies that DESC on an expression index +// part is preserved end-to-end. The parser already accepts +// `INDEX((expr) DESC)`; this test asserts the rest of the pipeline (DDL, +// SHOW CREATE TABLE, INFORMATION_SCHEMA) reflects the order correctly. +func TestExpressionIndexDescPersists(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("set @@global.tidb_enable_descending_index = on") + defer tk.MustExec("set @@global.tidb_enable_descending_index = default") + tk2 := testkit.NewTestKit(t, store) + tk2.MustExec("use test") + + tk2.MustExec("drop table if exists t_expr_desc") + tk2.MustExec("create table t_expr_desc (a int, b int, key idx_expr ((a + b) desc))") + + is := domain.GetDomain(tk2.Session()).InfoSchema() + tbl, err := is.TableByName(context.Background(), ast.NewCIStr("test"), ast.NewCIStr("t_expr_desc")) + require.NoError(t, err) + idx := tbl.Meta().Indices[0] + require.Equal(t, "idx_expr", idx.Name.L) + require.True(t, idx.Columns[0].Desc, "expression index column must be marked DESC") + require.Equal(t, uint8(1), idx.Version, "DESC expression index must bump IndexInfo.Version") + + // SHOW CREATE TABLE must round-trip the DESC. + showCreate := tk2.MustQuery("show create table t_expr_desc").Rows()[0][1].(string) + require.Contains(t, showCreate, "DESC", "SHOW CREATE TABLE must preserve DESC on the expression part") + + // information_schema reports collation 'D' for the expression part. + tk2.MustQuery("select collation from information_schema.statistics where table_name='t_expr_desc' and index_name='idx_expr'"). + Check(testkit.Rows("D")) +} + +// TestMixedDirectionCompositeIndex covers the MySQL 8.0 flagship use case: +// a composite index with mixed ASC/DESC columns must satisfy "ORDER BY" +// requests that match its direction vector (forward) or are the bitwise +// complement of it (reverse), with row data coming back correctly ordered. +func TestMixedDirectionCompositeIndex(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("set @@global.tidb_enable_descending_index = on") + defer tk.MustExec("set @@global.tidb_enable_descending_index = default") + tk2 := testkit.NewTestKit(t, store) + tk2.MustExec("use test") + + tk2.MustExec("drop table if exists t_mixed_e2e") + tk2.MustExec("create table t_mixed_e2e (a int, b int, index idx_ab (a, b desc))") + // Two values of a, several b values per a, and one DISTINCT b across + // the two groups so the join key isn't trivial. + tk2.MustExec("insert into t_mixed_e2e values (1, 10), (1, 5), (1, 15), (2, 20), (2, 8), (2, 12)") + + // Forward scan satisfies ORDER BY a ASC, b DESC: rows must come back + // (a ascending, b descending within each a). + tk2.MustQuery("select a, b from t_mixed_e2e use index(idx_ab) order by a asc, b desc").Check(testkit.Rows( + "1 15", "1 10", "1 5", + "2 20", "2 12", "2 8", + )) + + // Reverse scan satisfies ORDER BY a DESC, b ASC. + tk2.MustQuery("select a, b from t_mixed_e2e use index(idx_ab) order by a desc, b asc").Check(testkit.Rows( + "2 8", "2 12", "2 20", + "1 5", "1 10", "1 15", + )) + + // "ORDER BY a, b" is unsatisfiable by either direction; results must + // still be correct (planner falls back to a Sort) but we don't assert + // scan direction here, just row order. + tk2.MustQuery("select a, b from t_mixed_e2e use index(idx_ab) order by a, b").Check(testkit.Rows( + "1 5", "1 10", "1 15", + "2 8", "2 12", "2 20", + )) +} + +// TestDescendingIndexEndToEnd verifies INSERT + SELECT through a DESC index +// end-to-end: writes go through the complement-encoded path, the mutation +// consistency check tolerates them, and the unistore coprocessor emulation +// decodes the returned keys correctly via auto-detecting DESC flag bytes. +func TestDescendingIndexEndToEnd(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("set @@global.tidb_enable_descending_index = on") + defer tk.MustExec("set @@global.tidb_enable_descending_index = default") + tk2 := testkit.NewTestKit(t, store) + tk2.MustExec("use test") + + tk2.MustExec("drop table if exists t_desc_e2e") + tk2.MustExec("create table t_desc_e2e (a int, b int, index idx_b (b desc))") + tk2.MustExec("insert into t_desc_e2e values (1, 10), (2, 20), (3, 5), (4, 30), (5, 15)") + + // Sanity: the index really is stored as DESC. + tk2.MustQuery("select collation from information_schema.statistics where table_name='t_desc_e2e' and index_name='idx_b'"). + Check(testkit.Rows("D")) + + // Point lookup on DESC column must find the row. + tk2.MustQuery("select a from t_desc_e2e use index(idx_b) where b = 20").Check(testkit.Rows("2")) + tk2.MustQuery("select a from t_desc_e2e use index(idx_b) where b = 5").Check(testkit.Rows("3")) + tk2.MustQuery("select a from t_desc_e2e use index(idx_b) where b = 999").Check(testkit.Rows()) + + // Full ordered scan of the DESC index returns rows in b-descending order + // via a forward keyspace scan (no reverse-scan flag on the IndexReader). + tk2.MustQuery("select b from t_desc_e2e use index(idx_b) order by b desc").Check(testkit.Rows("30", "20", "15", "10", "5")) + // The ORDER BY ASC variant exercises the reverse-scan path on the DESC + // index and must also return the correct order. + tk2.MustQuery("select b from t_desc_e2e use index(idx_b) order by b asc").Check(testkit.Rows("5", "10", "15", "20", "30")) +} + +// TestUnservableIndexRejectsQueries exercises the IsServable fence: if a +// table contains an index whose metadata Version is newer than this binary +// understands, every plan-building entry point (SELECT, INSERT) must surface +// a clear error rather than silently produce wrong rows. +func TestUnservableIndexRejectsQueries(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t_unservable") + tk.MustExec("create table t_unservable (a int, b int, key idx_b (b))") + + // Forge a forward-version index by mutating the in-memory schema. We + // can't create one through DDL because the current TiDB binary won't + // emit Version > maxKnownIndexVersion — the point of the fence is to + // guard a downlevel binary reading what some future binary wrote. + is := dom.InfoSchema() + tbl, err := is.TableByName(context.Background(), ast.NewCIStr("test"), ast.NewCIStr("t_unservable")) + require.NoError(t, err) + tblInfo := tbl.Meta() + require.Len(t, tblInfo.Indices, 1) + tblInfo.Indices[0].Version = 99 + + // SELECT planning must reject the table because the planner's + // getPossibleAccessPaths walks tableInfo.Indices. + _, err = tk.Exec("select * from t_unservable where b = 1") + require.Error(t, err) + require.Contains(t, err.Error(), "metadata version 99") + require.Contains(t, err.Error(), "DROP INDEX") + + // INSERT VALUES doesn't enumerate access paths but still lands on the + // same fence via checkAllIndicesServable in buildInsert. + _, err = tk.Exec("insert into t_unservable values (1, 2)") + require.Error(t, err) + require.Contains(t, err.Error(), "metadata version 99") + + // Writable non-public states must also be fenced. tables.IsIndexWritable + // returns true for StateWriteOnly and StateWriteReorganization, so the + // mutation paths in pkg/table/tables/tables.go would write to an index + // in those states. checkAllIndicesServable must reject them too, + // otherwise an older TiDB binary running DML against a table whose + // new-format index is mid-DDL would silently corrupt the index. + for _, st := range []model.SchemaState{model.StateWriteOnly, model.StateWriteReorganization} { + tblInfo.Indices[0].State = st + _, err = tk.Exec("insert into t_unservable values (3, 4)") + require.Errorf(t, err, "INSERT must fail when index is in %s with unservable version", st) + require.Contains(t, err.Error(), "metadata version 99") + } + + // And the inverse: StateDeleteOnly / StateDeleteReorganization are NOT + // writable, so the fence intentionally lets them through. Restore to + // public afterwards so the table-cleanup teardown is well-formed. + for _, st := range []model.SchemaState{model.StateDeleteOnly, model.StateDeleteReorganization} { + tblInfo.Indices[0].State = st + _, err = tk.Exec("insert into t_unservable values (5, 6)") + require.NoErrorf(t, err, "INSERT must pass when index is in %s (not writable, fence intentionally skips)", st) + } + tblInfo.Indices[0].State = model.StatePublic +} + func TestCreateIndexWithChangeMaxIndexLength(t *testing.T) { store := testkit.CreateMockStore(t) originCfg := config.GetGlobalConfig() diff --git a/pkg/ddl/desc_index_version_check_test.go b/pkg/ddl/desc_index_version_check_test.go new file mode 100644 index 0000000000000..121a8e1f32ae0 --- /dev/null +++ b/pkg/ddl/desc_index_version_check_test.go @@ -0,0 +1,153 @@ +// Copyright 2026 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ddl + +import ( + "testing" + + "github.com/pingcap/kvproto/pkg/metapb" + "github.com/pingcap/tidb/pkg/ddl/placement" + "github.com/stretchr/testify/require" +) + +// store builds a synthetic *metapb.Store for the version-check tests. +// Pass an empty version to simulate a store that hasn't reported one yet. +func storeAt(id uint64, version string, isTiFlash bool) *metapb.Store { + s := &metapb.Store{Id: id, Version: version, Address: "mock"} + if isTiFlash { + s.Labels = []*metapb.StoreLabel{{Key: placement.EngineLabelKey, Value: placement.EngineLabelTiFlash}} + } + return s +} + +func TestCheckStoresMeetDescIndexMinVersion(t *testing.T) { + const minVer = "9.0.0" + const failClosed = false // production semantics + + t.Run("all TiKV stores are new enough", func(t *testing.T) { + stores := []*metapb.Store{ + storeAt(1, "9.0.0", false), + storeAt(2, "9.1.0", false), + storeAt(3, "v9.0.0", false), // leading-v normalisation + } + require.NoError(t, checkStoresMeetDescIndexMinVersion(stores, minVer, failClosed)) + }) + + t.Run("an old TiKV store fails the gate", func(t *testing.T) { + stores := []*metapb.Store{ + storeAt(1, "9.0.0", false), + storeAt(2, "8.5.0", false), + } + err := checkStoresMeetDescIndexMinVersion(stores, minVer, failClosed) + require.Error(t, err) + require.Contains(t, err.Error(), "store 2") + require.Contains(t, err.Error(), "8.5.0") + require.Contains(t, err.Error(), minVer) + require.Contains(t, err.Error(), "upgrade TiKV") + }) + + t.Run("TiFlash stores are excluded from the check", func(t *testing.T) { + // A TiFlash store on an old version must not block the gate; the + // check is for TiKV only because TiKV alone runs the coprocessor + // path that needs the new decoder. + stores := []*metapb.Store{ + storeAt(1, "9.0.0", false), + storeAt(2, "1.0.0", true), // ancient TiFlash, ignored + } + require.NoError(t, checkStoresMeetDescIndexMinVersion(stores, minVer, failClosed)) + }) + + t.Run("tombstone stores are skipped", func(t *testing.T) { + old := storeAt(2, "8.0.0", false) + old.State = metapb.StoreState_Tombstone + stores := []*metapb.Store{ + storeAt(1, "9.0.0", false), + old, + } + require.NoError(t, checkStoresMeetDescIndexMinVersion(stores, minVer, failClosed)) + }) + + t.Run("empty version fails closed in production", func(t *testing.T) { + // A store that hasn't reported a clean semver yet cannot prove it + // can decode descending-order keys. The gate must reject the DDL + // rather than fall through and risk silent corruption. The operator + // can retry once PD has reconciled. + stores := []*metapb.Store{ + storeAt(1, "9.0.0", false), + storeAt(2, "", false), + } + err := checkStoresMeetDescIndexMinVersion(stores, minVer, failClosed) + require.Error(t, err) + require.Contains(t, err.Error(), "store 2") + require.Contains(t, err.Error(), "has not reported a version") + }) + + t.Run("empty version is tolerated in tests", func(t *testing.T) { + // In `intest` builds the mock store fixture reports empty versions + // for every replica, so the gate must let DDL through to allow + // integration tests to run. + stores := []*metapb.Store{ + storeAt(1, "9.0.0", false), + storeAt(2, "", false), + } + require.NoError(t, checkStoresMeetDescIndexMinVersion(stores, minVer, true)) + }) + + t.Run("garbage version always fails closed", func(t *testing.T) { + // Unparsable version strings are unambiguously a bug or a + // misconfigured store; never tolerate them, even in tests. + stores := []*metapb.Store{ + storeAt(1, "9.0.0", false), + storeAt(2, "not-a-semver", false), + } + err := checkStoresMeetDescIndexMinVersion(stores, minVer, true) + require.Error(t, err) + require.Contains(t, err.Error(), "store 2") + require.Contains(t, err.Error(), "unparsable") + }) + + t.Run("malformed minVersion is reported", func(t *testing.T) { + err := checkStoresMeetDescIndexMinVersion(nil, "definitely-not-semver", failClosed) + require.Error(t, err) + }) + + t.Run("pre-release at the floor base version is accepted", func(t *testing.T) { + // Strict semver treats `9.0.0-beta.2` < `9.0.0`, but a nightly / + // release-candidate cluster cut from the branch that contains the + // feature does carry the new decoder. Reject only when the base + // (Major.Minor.Patch) is below the floor; pre-release tags at the + // floor base pass. tiup playground reports versions like + // `9.0.0-beta.2` so this is what the e2e runner relies on. + stores := []*metapb.Store{ + storeAt(1, "9.0.0-beta.2", false), + storeAt(2, "9.0.0-rc.1", false), + } + require.NoError(t, checkStoresMeetDescIndexMinVersion(stores, minVer, failClosed)) + }) + + t.Run("pre-release below the floor base still fails", func(t *testing.T) { + // `8.5.0-beta.2`'s base (8.5.0) is below the floor (9.0.0), so the + // pre-release relaxation must NOT let it through. This guards + // against accidentally degrading the check to "any pre-release + // passes". + stores := []*metapb.Store{ + storeAt(1, "8.5.0-beta.2", false), + } + err := checkStoresMeetDescIndexMinVersion(stores, minVer, failClosed) + require.Error(t, err) + require.Contains(t, err.Error(), "store 1") + require.Contains(t, err.Error(), "8.5.0-beta.2") + }) +} diff --git a/pkg/ddl/executor.go b/pkg/ddl/executor.go index 32cdacbba4f01..f1e43870c5ecd 100644 --- a/pkg/ddl/executor.go +++ b/pkg/ddl/executor.go @@ -29,12 +29,15 @@ import ( "time" "unicode/utf8" + "github.com/coreos/go-semver/semver" "github.com/pingcap/errors" "github.com/pingcap/failpoint" + "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/tidb/pkg/config" "github.com/pingcap/tidb/pkg/config/kerneltype" "github.com/pingcap/tidb/pkg/ddl/label" "github.com/pingcap/tidb/pkg/ddl/logutil" + "github.com/pingcap/tidb/pkg/ddl/placement" "github.com/pingcap/tidb/pkg/ddl/resourcegroup" sess "github.com/pingcap/tidb/pkg/ddl/session" ddlutil "github.com/pingcap/tidb/pkg/ddl/util" @@ -78,10 +81,26 @@ import ( "github.com/pingcap/tidb/pkg/util/traceevent" "github.com/pingcap/tidb/pkg/util/tracing" "github.com/tikv/client-go/v2/oracle" + "github.com/tikv/client-go/v2/tikv" pdhttp "github.com/tikv/pd/client/http" "go.uber.org/zap" ) +// MinTiKVVersionForDescIndex is the lowest TiKV semver that contains the +// DESC-aware coprocessor decoder added for pingcap/tidb#2519. The CREATE +// INDEX path refuses to persist a column with Desc=true unless every TiKV +// store reports at least this version, so the cluster can never end up in +// a state where TiDB writes complemented index bytes that TiKV mis-decodes. +// +// TODO(release): bump this in lockstep with the TiKV release that contains +// the matching Rust changes (split_datum / decode_one_with_desc auto-detect +// of complemented flag bytes). The placeholder below intentionally sits +// well past the most recent stable release so the gate fails closed in any +// pre-release deployment — operators who flip tidb_enable_descending_index +// on a cluster that hasn't been upgraded will see a clear "upgrade TiKV" +// error rather than silently corrupting their indexes. +const MinTiKVVersionForDescIndex = "9.0.0" + const ( expressionIndexPrefix = "_V$" tableNotExist = -1 @@ -1123,6 +1142,20 @@ func (e *executor) createTableWithInfoJob( } } + // Run the descending-index TiKV-version gate at the shared chokepoint + // for CREATE TABLE so CreateTableWithInfo and BatchCreateTableWithInfo + // are covered alongside CreateTable. Placed AFTER the OnExistIgnore + // short-circuit so a no-op `CREATE TABLE IF NOT EXISTS` against an + // existing table never makes a PD round-trip. See pingcap/tidb#2519. + for _, idx := range tbInfo.Indices { + if idx.HasDescColumn() { + if err = checkTiKVSupportsDescIndex(ctx); err != nil { + return nil, errors.Trace(err) + } + break + } + } + if err := checkTableInfoValidExtra(ctx.GetSessionVars().StmtCtx.ErrCtx(), ctx.GetStore(), dbName, tbInfo); err != nil { return nil, err } @@ -4684,6 +4717,11 @@ func (e *executor) CreatePrimaryKey(ctx sessionctx.Context, ti ast.Ident, indexN return infoschema.ErrMultiplePriKey } + // Apply tidb_enable_descending_index gate at the SQL frontend, before + // the DDL job is enqueued. See ApplyDescGateToIndexParts for the + // rationale (avoid races between SET GLOBAL and DDL owner replay). + ApplyDescGateToIndexParts(indexPartSpecifications) + // Primary keys cannot include expression index parts. A primary key requires the generated column to be stored, // but expression index parts are implemented as virtual generated columns, not stored generated columns. for _, idxPart := range indexPartSpecifications { @@ -4703,6 +4741,11 @@ func (e *executor) CreatePrimaryKey(ctx sessionctx.Context, ti ast.Ident, indexN if err != nil { return errors.Trace(err) } + if hasDescIndexColumn(indexColumns) { + if err := checkTiKVSupportsDescIndex(ctx); err != nil { + return err + } + } if _, err = CheckPKOnGeneratedColumn(tblInfo, indexPartSpecifications); err != nil { return err } @@ -4839,6 +4882,14 @@ func (e *executor) createColumnarIndex(ctx sessionctx.Context, ti ast.Ident, ind return errors.Trace(err) } + // Apply tidb_enable_descending_index gate at the SQL frontend, before + // the DDL job is enqueued. With the gate off, Desc is cleared here so + // the columnar-DESC reject in buildIndexColumns never fires — that + // preserves the historical "DESC silently dropped" behaviour for + // columnar indexes when the feature is disabled. With the gate on, + // columnar DESC stays Desc=true and gets a clear error downstream. + ApplyDescGateToIndexParts(indexPartSpecifications) + var columnarIndexType model.ColumnarIndexType switch indexOption.Tp { case ast.IndexTypeInverted: @@ -4943,6 +4994,123 @@ func (e *executor) CreateIndex(ctx sessionctx.Context, stmt *ast.CreateIndexStmt stmt.IndexPartSpecifications, stmt.IndexOption, stmt.IfNotExists) } +// hasDescIndexColumn reports whether any built IndexColumn carries Desc=true. +func hasDescIndexColumn(cols []*model.IndexColumn) bool { + for _, c := range cols { + if c.Desc { + return true + } + } + return false +} + +// checkTiKVSupportsDescIndex returns an error if any non-TiFlash store in +// the cluster runs a TiKV version below MinTiKVVersionForDescIndex. Called +// when CREATE INDEX would persist a column with Desc=true. +// +// Two test-only escape hatches keep the check from blocking development: +// +// 1. Stores that never satisfy `tikv.Storage` — i.e. backends like the +// pure-Go mock used by some unit tests — short-circuit at the type +// assertion below; the unistore coprocessor used by those backends +// auto-detects DESC flag bytes, so the version gate is unnecessary. +// +// 2. MockStore-backed integration tests (`intest` builds) DO satisfy +// `tikv.Storage` and reach the PD-lookup path, but the mock fixture +// never fills in the `Version` string on its stores. The +// `tolerateMissingVersion=intest.InTest` flag passed to +// checkStoresMeetDescIndexMinVersion is what lets those tests +// proceed; production callers pass `false` and fail closed on a +// missing version. Do NOT remove the intest branch — it is not +// dead code, it covers MockStore. +func checkTiKVSupportsDescIndex(ctx sessionctx.Context) error { + tikvStore, ok := ctx.GetStore().(tikv.Storage) + if !ok { + return nil + } + pdCli := tikvStore.GetRegionCache().PDClient() + if pdCli == nil { + return nil + } + // Bound the PD round-trip so an unreachable PD doesn't hang DDL + // indefinitely. 5s matches the pattern used elsewhere for short PD + // metadata lookups in this package. + pdCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + stores, err := pdCli.GetAllStores(pdCtx) + if err != nil { + return errors.Annotate(err, "checking TiKV cluster version for descending index") + } + // In production we fail closed on missing versions. The mock store + // fixture used in `intest` builds reports empty versions for every + // replica; tolerate that case so integration tests can exercise + // descending-index DDL without spinning up a real PD. + return checkStoresMeetDescIndexMinVersion(stores, MinTiKVVersionForDescIndex, intest.InTest) +} + +// checkStoresMeetDescIndexMinVersion is the pure version-comparison core of +// checkTiKVSupportsDescIndex, split out so the caller can pass a mocked +// store list in unit tests without spinning up a PD client. +// +// tolerateMissingVersion controls how to treat stores that report an empty +// version string. The production caller passes false (fail closed); the +// in-process test caller passes true so the mock-store fixture, which never +// fills in a version, doesn't block descending-index DDL. +// Garbage (unparsable) version strings always fail closed because they are +// unambiguously a misconfiguration, never a fixture artifact. +func checkStoresMeetDescIndexMinVersion(stores []*metapb.Store, minVersion string, tolerateMissingVersion bool) error { + required, err := semver.NewVersion(minVersion) + if err != nil { + return errors.Trace(err) + } + for _, s := range stores { + if s.GetState() == metapb.StoreState_Tombstone { + continue + } + if storeIsTiFlash(s) { + continue + } + if s.Version == "" { + if tolerateMissingVersion { + continue + } + return errors.Errorf( + "cannot create descending-order index: TiKV store %d at %s has not reported a version yet; retry after the cluster has reported its store versions", + s.Id, s.Address) + } + actual, parseErr := semver.NewVersion(strings.TrimPrefix(s.Version, "v")) + if parseErr != nil { + return errors.Annotatef(parseErr, + "cannot create descending-order index: TiKV store %d at %s reports an unparsable version %q", + s.Id, s.Address, s.Version) + } + // Compare on Major.Minor.Patch only — strict semver treats pre-release + // labels as lower than the corresponding stable, so a `9.0.0-beta.2` + // nightly that DOES contain the feature would otherwise be rejected + // by a `9.0.0` floor. Operators routinely run pre-release/RC clusters + // for testing; once the feature lands on a release branch the + // nightly/beta tag for that branch is a stronger guarantee than the + // strict-semver lexicographic ordering, not a weaker one. + base := *actual + base.PreRelease = "" + if base.LessThan(*required) { + return errors.Errorf( + "cannot create descending-order index: TiKV store %d at %s reports version %s, which is below the minimum %s required by this feature; upgrade TiKV before retrying", + s.Id, s.Address, s.Version, minVersion) + } + } + return nil +} + +func storeIsTiFlash(store *metapb.Store) bool { + for _, label := range store.Labels { + if label.Key == placement.EngineLabelKey && label.Value == placement.EngineLabelTiFlash { + return true + } + } + return false +} + // addHypoIndexIntoCtx adds this index as a hypo-index into this ctx. func (*executor) addHypoIndexIntoCtx(ctx sessionctx.Context, schemaName, tableName ast.CIStr, indexInfo *model.IndexInfo) error { sctx := ctx.GetSessionVars() @@ -4984,6 +5152,10 @@ func (e *executor) createIndex(ctx sessionctx.Context, ti ast.Ident, keyType ast return errors.Trace(dbterror.ErrOptOnCacheTable.GenWithStackByArgs("Create Index")) } + // Apply tidb_enable_descending_index gate at the SQL frontend, before + // the DDL job is enqueued. See ApplyDescGateToIndexParts. + ApplyDescGateToIndexParts(indexPartSpecifications) + metaBuildCtx := NewMetaBuildContextWithSctx(ctx) indexName, hiddenCols, err := checkIndexNameAndColumns(metaBuildCtx, t, indexName, indexPartSpecifications, model.ColumnarIndexTypeNA, ifNotExists) if err != nil { @@ -5009,6 +5181,20 @@ func (e *executor) createIndex(ctx sessionctx.Context, ti ast.Ident, keyType ast return errors.Trace(err) } + // Skip the cluster-version preflight for hypothetical indexes + // (CREATE HYPO INDEX). Hypo indexes are planner-only — they never + // produce KV mutations and never reach a TiKV coprocessor — so an + // older or unhealthy TiKV cluster has no bearing on whether the + // index can be defined. Without this guard, `CREATE HYPO INDEX + // ... DESC` would either fail on an out-of-date cluster or block + // for the PD round-trip even though it is a no-op on storage. + isHypo := indexOption != nil && indexOption.Tp == ast.IndexTypeHypo + if hasDescIndexColumn(indexColumns) && !isHypo { + if err := checkTiKVSupportsDescIndex(ctx); err != nil { + return err + } + } + if err = checkCreateGlobalIndex(ctx.GetSessionVars().StmtCtx.ErrCtx(), tblInfo, indexName.O, indexColumns, unique, indexOption != nil && indexOption.Global); err != nil { return err } diff --git a/pkg/ddl/index.go b/pkg/ddl/index.go index d030e41017b77..3100ee1f26cba 100644 --- a/pkg/ddl/index.go +++ b/pkg/ddl/index.go @@ -110,6 +110,29 @@ var DefaultCumulativeTimeout = 1 * time.Minute // exported for testing. var DefaultAnalyzeCheckInterval = 10 * time.Second +// ApplyDescGateToIndexParts implements the create-time-only semantics of the +// `tidb_enable_descending_index` system variable (pingcap/tidb#2519). +// +// It is called by SQL-frontend DDL paths (CreateTable, CreateIndex, +// CreatePrimaryKey, ...) on the AST IndexPartSpecification slice BEFORE the +// DDL job is enqueued. When the gate is off, every Desc flag is cleared so +// the rest of the DDL pipeline — including the DDL owner replaying the job — +// sees an unconditional ASC layout. When the gate is on, Desc is left intact. +// +// Re-reading the global atomic from buildIndexColumns at job-execution time +// would be wrong: a `SET GLOBAL tidb_enable_descending_index` between +// statement submission and DDL owner replay would silently flip the persisted +// schema. Baking the decision in here at submission time makes the result +// independent of any later toggling. +func ApplyDescGateToIndexParts(parts []*ast.IndexPartSpecification) { + if vardef.EnableDescendingIndex.Load() { + return + } + for _, ip := range parts { + ip.Desc = false + } +} + func buildIndexColumns(ctx *metabuild.Context, columns []*model.ColumnInfo, indexPartSpecifications []*ast.IndexPartSpecification, columnarIndexType model.ColumnarIndexType) ([]*model.IndexColumn, bool, error) { // Build offsets. idxParts := make([]*model.IndexColumn, 0, len(indexPartSpecifications)) @@ -132,6 +155,16 @@ func buildIndexColumns(ctx *metabuild.Context, columns []*model.ColumnInfo, inde if columnarIndexType == model.ColumnarIndexTypeFulltext && !types.IsString(col.FieldType.GetType()) { return nil, false, dbterror.ErrUnsupportedAddColumnarIndex.FastGenByArgs(fmt.Sprintf("only support string type, but this is type: %s", col.FieldType.String())) } + // Columnar indexes (vector / inverted / fulltext) cannot support + // descending order regardless of the gate. ApplyDescGateToIndexParts + // has already cleared Desc when the gate is off, so reaching this + // branch means the user explicitly asked for DESC on a columnar + // index with the gate ON — surface the limitation instead of + // silently dropping the keyword. Fulltext repeats this check in + // buildFullTextInfoWithCheck for defence in depth. + if ip.Desc && columnarIndexType != model.ColumnarIndexTypeNA { + return nil, false, dbterror.ErrUnsupportedAddColumnarIndex.FastGenByArgs(fmt.Sprintf("%s does not support DESC order", columnarIndexType.SQLName())) + } // return error in strict sql mode if columnarIndexType == model.ColumnarIndexTypeNA { @@ -175,10 +208,18 @@ func buildIndexColumns(ctx *metabuild.Context, columns []*model.ColumnInfo, inde ctx.AppendWarning(dbterror.ErrTooLongKey.FastGenByArgs(sumLength, maxIndexLength)) } + // ip.Desc has already been gated at the SQL frontend by + // ApplyDescGateToIndexParts: when tidb_enable_descending_index is OFF, + // every part comes in with Desc=false, preserving historical MySQL 5.7 + // behaviour for legacy migration scripts. We can therefore copy the + // flag through unconditionally — re-reading the global atomic here + // would race with `SET GLOBAL` between job submission and DDL owner + // replay. idxParts = append(idxParts, &model.IndexColumn{ Name: col.Name, Offset: col.Offset, Length: indexColLen, + Desc: ip.Desc, }) } @@ -431,6 +472,24 @@ func BuildIndexInfo( if err != nil { return nil, errors.Trace(err) } + // Reject DESC on a clustered PRIMARY KEY. For PKIsHandle the PK column is + // the row's int handle itself (encoded ascending and never going through + // the DESC-aware index encoder), and for IsCommonHandle the PK columns + // determine the row-key byte order, which the common-handle range builder + // does not yet encode with per-column DESC awareness. Allowing DESC here + // would silently miscompare row keys, so error out at DDL time. A + // NONCLUSTERED primary key is encoded just like any unique secondary + // index and supports DESC normally. + if isPrimary && idxInfo.HasDescColumn() && (tblInfo.IsCommonHandle || tblInfo.PKIsHandle) { + return nil, errors.Errorf( + "DESC is not supported on the columns of a clustered PRIMARY KEY; either drop the DESC keyword or declare the primary key as NONCLUSTERED") + } + // Bump the index-metadata version when any descending column is present so + // that an older TiDB reading this schema refuses to serve queries against + // the index rather than returning wrong results — see IndexInfo.IsServable. + if idxInfo.HasDescColumn() { + idxInfo.Version = 1 + } if indexOption != nil { idxInfo.Comment = indexOption.Comment diff --git a/pkg/distsql/request_builder.go b/pkg/distsql/request_builder.go index 97447aeea7fef..b2e2bc9f36e95 100644 --- a/pkg/distsql/request_builder.go +++ b/pkg/distsql/request_builder.go @@ -15,6 +15,7 @@ package distsql import ( + "bytes" "fmt" "math" "sort" @@ -701,15 +702,31 @@ func PartitionHandlesToKVRanges(handles []kv.Handle) ([]kv.KeyRange, []int) { return krs, hints } -// IndexRangesToKVRanges converts index ranges to "KeyRange". +// IndexRangesToKVRanges converts index ranges to "KeyRange". For indexes with +// any descending-order column, use IndexRangesToKVRangesWithDesc. func IndexRangesToKVRanges(dctx *distsqlctx.DistSQLContext, tid, idxID int64, ranges []*ranger.Range) (*kv.KeyRanges, error) { - return IndexRangesToKVRangesWithInterruptSignal(dctx, tid, idxID, ranges, nil, nil) + return IndexRangesToKVRangesWithDescAndInterruptSignal(dctx, tid, idxID, nil, ranges, nil, nil) +} + +// IndexRangesToKVRangesWithDesc is like IndexRangesToKVRanges but threads a +// per-column desc flag slice down to the byte encoder. Columns marked +// descending are complemented and, if the resulting byte low/high pair is +// inverted, automatically swapped. See EncodeIndexKeyWithDesc. +func IndexRangesToKVRangesWithDesc(dctx *distsqlctx.DistSQLContext, tid, idxID int64, desc []bool, ranges []*ranger.Range) (*kv.KeyRanges, error) { + return IndexRangesToKVRangesWithDescAndInterruptSignal(dctx, tid, idxID, desc, ranges, nil, nil) } // IndexRangesToKVRangesWithInterruptSignal converts index ranges to "KeyRange". // The process can be interrupted by set `interruptSignal` to true. func IndexRangesToKVRangesWithInterruptSignal(dctx *distsqlctx.DistSQLContext, tid, idxID int64, ranges []*ranger.Range, memTracker *memory.Tracker, interruptSignal *atomic.Value) (*kv.KeyRanges, error) { - keyRanges, err := indexRangesToKVRangesForTablesWithInterruptSignal(dctx, []int64{tid}, idxID, ranges, memTracker, interruptSignal) + return IndexRangesToKVRangesWithDescAndInterruptSignal(dctx, tid, idxID, nil, ranges, memTracker, interruptSignal) +} + +// IndexRangesToKVRangesWithDescAndInterruptSignal is the full-featured entry +// point that takes both per-column desc flags and an interrupt signal. The +// other IndexRangesToKVRanges* helpers delegate here with sensible defaults. +func IndexRangesToKVRangesWithDescAndInterruptSignal(dctx *distsqlctx.DistSQLContext, tid, idxID int64, desc []bool, ranges []*ranger.Range, memTracker *memory.Tracker, interruptSignal *atomic.Value) (*kv.KeyRanges, error) { + keyRanges, err := indexRangesToKVRangesForTablesWithInterruptSignal(dctx, []int64{tid}, idxID, desc, ranges, memTracker, interruptSignal) if err != nil { return nil, err } @@ -719,13 +736,19 @@ func IndexRangesToKVRangesWithInterruptSignal(dctx *distsqlctx.DistSQLContext, t // IndexRangesToKVRangesForTables converts indexes ranges to "KeyRange". func IndexRangesToKVRangesForTables(dctx *distsqlctx.DistSQLContext, tids []int64, idxID int64, ranges []*ranger.Range) (*kv.KeyRanges, error) { - return indexRangesToKVRangesForTablesWithInterruptSignal(dctx, tids, idxID, ranges, nil, nil) + return indexRangesToKVRangesForTablesWithInterruptSignal(dctx, tids, idxID, nil, ranges, nil, nil) } -// IndexRangesToKVRangesForTablesWithInterruptSignal converts indexes ranges to "KeyRange". -// The process can be interrupted by set `interruptSignal` to true. -func indexRangesToKVRangesForTablesWithInterruptSignal(dctx *distsqlctx.DistSQLContext, tids []int64, idxID int64, ranges []*ranger.Range, memTracker *memory.Tracker, interruptSignal *atomic.Value) (*kv.KeyRanges, error) { - return indexRangesToKVWithoutSplit(dctx, tids, idxID, ranges, memTracker, interruptSignal) +// IndexRangesToKVRangesForTablesWithDesc is the desc-aware variant of +// IndexRangesToKVRangesForTables. +func IndexRangesToKVRangesForTablesWithDesc(dctx *distsqlctx.DistSQLContext, tids []int64, idxID int64, desc []bool, ranges []*ranger.Range) (*kv.KeyRanges, error) { + return indexRangesToKVRangesForTablesWithInterruptSignal(dctx, tids, idxID, desc, ranges, nil, nil) +} + +// indexRangesToKVRangesForTablesWithInterruptSignal converts index ranges to +// "KeyRange". The process can be interrupted by setting `interruptSignal` to true. +func indexRangesToKVRangesForTablesWithInterruptSignal(dctx *distsqlctx.DistSQLContext, tids []int64, idxID int64, desc []bool, ranges []*ranger.Range, memTracker *memory.Tracker, interruptSignal *atomic.Value) (*kv.KeyRanges, error) { + return indexRangesToKVWithoutSplit(dctx, tids, idxID, desc, ranges, memTracker, interruptSignal) } // CommonHandleRangesToKVRanges converts common handle ranges to "KeyRange". @@ -782,7 +805,7 @@ func VerifyTxnScope(txnScope string, physicalTableID int64, is infoschema.MetaOn return true } -func indexRangesToKVWithoutSplit(dctx *distsqlctx.DistSQLContext, tids []int64, idxID int64, ranges []*ranger.Range, memTracker *memory.Tracker, interruptSignal *atomic.Value) (*kv.KeyRanges, error) { +func indexRangesToKVWithoutSplit(dctx *distsqlctx.DistSQLContext, tids []int64, idxID int64, desc []bool, ranges []*ranger.Range, memTracker *memory.Tracker, interruptSignal *atomic.Value) (*kv.KeyRanges, error) { krs := make([][]kv.KeyRange, len(tids)) for i := range krs { krs[i] = make([]kv.KeyRange, 0, len(ranges)) @@ -796,7 +819,7 @@ func indexRangesToKVWithoutSplit(dctx *distsqlctx.DistSQLContext, tids []int64, // encodeIndexKey and EncodeIndexSeekKey is time-consuming, thus we need to // check the interrupt signal periodically. for i, ran := range ranges { - low, high, err := EncodeIndexKey(dctx, ran) + low, high, err := EncodeIndexKeyWithDesc(dctx, ran, desc) if err != nil { return nil, err } @@ -827,8 +850,30 @@ func indexRangesToKVWithoutSplit(dctx *distsqlctx.DistSQLContext, tids []int64, return kv.NewPartitionedKeyRanges(krs), nil } -// EncodeIndexKey gets encoded keys containing low and high +// EncodeIndexKey gets encoded keys containing low and high. +// For indexes that contain descending-order columns, use EncodeIndexKeyWithDesc. func EncodeIndexKey(dctx *distsqlctx.DistSQLContext, ran *ranger.Range) (low, high []byte, err error) { + return EncodeIndexKeyWithDesc(dctx, ran, nil) +} + +// EncodeIndexKeyWithDesc encodes a range's bounds into the byte low/high pair +// fed to TiKV, honoring per-column descending-order flags (pingcap/tidb#2519). +// +// When desc is nil or all-false the result is byte-identical to EncodeIndexKey: +// no allocations beyond what the ascending path already needs, no semantic +// change. When any column is descending, its memcomparable bytes are +// bitwise-complemented so forward byte-order iteration yields the declared +// direction. +// +// Descending encoding reverses comparison order, so a semantic range like +// `col IN (low, high)` can encode to byte low ≥ byte high. If that happens +// we swap the two byte strings and swap the LowExclude/HighExclude flags, +// restoring a well-ordered [byteLow, byteHigh] pair before the usual +// PrefixNext adjustments run. This one-shot swap is correct for single- +// column DESC ranges and for composite ranges whose differentiating column +// is DESC; point ranges (byteLow == byteHigh) skip the swap and behave +// identically to the ascending path. +func EncodeIndexKeyWithDesc(dctx *distsqlctx.DistSQLContext, ran *ranger.Range, desc []bool) (low, high []byte, err error) { tz := time.UTC errCtx := errctx.StrictNoWarningContext if dctx != nil { @@ -836,26 +881,68 @@ func EncodeIndexKey(dctx *distsqlctx.DistSQLContext, ran *ranger.Range) (low, hi errCtx = dctx.ErrCtx } - low, err = codec.EncodeKey(tz, nil, ran.LowVal...) + hasDesc := false + for _, d := range desc { + if d { + hasDesc = true + break + } + } + + if !hasDesc { + low, err = codec.EncodeKey(tz, nil, ran.LowVal...) + } else { + low, err = codec.EncodeKeyWithDesc(tz, nil, desc, ran.LowVal...) + } err = errCtx.HandleError(err) if err != nil { return nil, nil, err } - if ran.LowExclude { - low = kv.Key(low).PrefixNext() + if !hasDesc { + high, err = codec.EncodeKey(tz, nil, ran.HighVal...) + } else { + high, err = codec.EncodeKeyWithDesc(tz, nil, desc, ran.HighVal...) } - high, err = codec.EncodeKey(tz, nil, ran.HighVal...) err = errCtx.HandleError(err) if err != nil { return nil, nil, err } - if !ran.HighExclude { + lowExclude, highExclude := ran.LowExclude, ran.HighExclude + if hasDesc && bytes.Compare(low, high) > 0 { + // The semantic low encoded to a larger byte than the semantic high + // because descending columns flip byte ordering. Swap so the caller + // always sees a well-ordered [low, high] pair, and swap the exclude + // flags so PrefixNext gets applied to the correct side. + low, high = high, low + lowExclude, highExclude = highExclude, lowExclude + } + + if lowExclude { + low = kv.Key(low).PrefixNext() + } + if !highExclude { high = kv.Key(high).PrefixNext() } return low, high, nil } +// indexDescFlags returns a per-column descending-order slice derived from +// idxInfo, or nil when the index has no DESC columns. Returning nil lets +// downstream encoders stay on the ascending fast path. Used at distsql +// callsites that have an *IndexInfo in hand but were originally written +// against the legacy idxID-only range helpers. +func indexDescFlags(idxInfo *model.IndexInfo) []bool { + if idxInfo == nil || !idxInfo.HasDescColumn() { + return nil + } + desc := make([]bool, len(idxInfo.Columns)) + for i, c := range idxInfo.Columns { + desc[i] = c.Desc + } + return desc +} + // BuildTableRanges returns the key ranges encompassing the entire table, // and its partitions if exists. func BuildTableRanges(tbl *model.TableInfo) ([]kv.KeyRange, error) { @@ -871,7 +958,7 @@ func BuildTableRanges(tbl *model.TableInfo) ([]kv.KeyRange, error) { if idx.State != model.StatePublic || !idx.Global { continue } - idxRanges, err := IndexRangesToKVRanges(nil, tbl.ID, idx.ID, ranger.FullRange()) + idxRanges, err := IndexRangesToKVRangesWithDesc(nil, tbl.ID, idx.ID, indexDescFlags(idx), ranger.FullRange()) if err != nil { return nil, err } @@ -908,7 +995,7 @@ func appendRanges(tbl *model.TableInfo, tblID int64) ([]kv.KeyRange, error) { continue } ranges = ranger.FullRange() - idxRanges, err := IndexRangesToKVRanges(nil, tblID, index.ID, ranges) + idxRanges, err := IndexRangesToKVRangesWithDesc(nil, tblID, index.ID, indexDescFlags(index), ranges) if err != nil { return nil, errors.Trace(err) } diff --git a/pkg/executor/admin.go b/pkg/executor/admin.go index ed9814a08e264..88ae4353f9305 100644 --- a/pkg/executor/admin.go +++ b/pkg/executor/admin.go @@ -818,7 +818,7 @@ func (e *CleanupIndexExec) buildIndexScan(ctx context.Context, txn kv.Transactio } var builder distsql.RequestBuilder ranges := ranger.FullRange() - keyRanges, err := distsql.IndexRangesToKVRanges(e.Ctx().GetDistSQLCtx(), e.physicalID, e.index.Meta().ID, ranges) + keyRanges, err := distsql.IndexRangesToKVRangesWithDesc(e.Ctx().GetDistSQLCtx(), e.physicalID, e.index.Meta().ID, indexColDescFlags(e.index.Meta()), ranges) if err != nil { return nil, err } diff --git a/pkg/executor/builder.go b/pkg/executor/builder.go index 134d56b366a71..1cce8eaa36e1f 100644 --- a/pkg/executor/builder.go +++ b/pkg/executor/builder.go @@ -4134,7 +4134,8 @@ func (builder *dataReaderBuilder) buildPartitionedTableReaderKVRangesForIndexJoi } if canLocateByKey { for pid, contents := range contentsByPID { - tmp, err := buildKvRangesForIndexJoin(dctx, rctx, pid, -1, contents, indexRanges, keyOff2IdxOff, cwc, kvRangeMemTracker, interruptSignal) + // Common handle PK is always ascending (no per-column DESC), so desc=nil. + tmp, err := buildKvRangesForIndexJoin(dctx, rctx, pid, -1, nil, contents, indexRanges, keyOff2IdxOff, cwc, kvRangeMemTracker, interruptSignal) if err != nil { return nil, err } @@ -4142,7 +4143,7 @@ func (builder *dataReaderBuilder) buildPartitionedTableReaderKVRangesForIndexJoi } } else { for _, p := range usedPartitionList { - tmp, err := buildKvRangesForIndexJoin(dctx, rctx, p.GetPhysicalID(), -1, lookUpContents, indexRanges, keyOff2IdxOff, cwc, kvRangeMemTracker, interruptSignal) + tmp, err := buildKvRangesForIndexJoin(dctx, rctx, p.GetPhysicalID(), -1, nil, lookUpContents, indexRanges, keyOff2IdxOff, cwc, kvRangeMemTracker, interruptSignal) if err != nil { return nil, err } @@ -5111,7 +5112,7 @@ func (builder *dataReaderBuilder) buildTableReaderForIndexJoin(ctx context.Conte tbInfo := e.table.Meta() if tbInfo.GetPartitionInfo() == nil || !builder.ctx.GetSessionVars().StmtCtx.UseDynamicPartitionPrune() { if v.IsCommonHandle { - kvRanges, err := buildKvRangesForIndexJoin(e.dctx, e.rctx, getPhysicalTableID(e.table), -1, lookUpContents, indexRanges, keyOff2IdxOff, cwc, memTracker, interruptSignal) + kvRanges, err := buildKvRangesForIndexJoin(e.dctx, e.rctx, getPhysicalTableID(e.table), -1, nil, lookUpContents, indexRanges, keyOff2IdxOff, cwc, memTracker, interruptSignal) if err != nil { return nil, err } @@ -5285,7 +5286,7 @@ func (builder *dataReaderBuilder) buildIndexReaderForIndexJoin(ctx context.Conte } tbInfo := e.table.Meta() if tbInfo.GetPartitionInfo() == nil || !builder.ctx.GetSessionVars().StmtCtx.UseDynamicPartitionPrune() { - kvRanges, err := buildKvRangesForIndexJoin(e.dctx, e.rctx, e.physicalTableID, e.index.ID, lookUpContents, indexRanges, keyOff2IdxOff, cwc, memoryTracker, interruptSignal) + kvRanges, err := buildKvRangesForIndexJoin(e.dctx, e.rctx, e.physicalTableID, e.index.ID, indexColDescFlags(e.index), lookUpContents, indexRanges, keyOff2IdxOff, cwc, memoryTracker, interruptSignal) if err != nil { return nil, err } @@ -5341,7 +5342,7 @@ func (builder *dataReaderBuilder) buildIndexLookUpReaderForIndexJoin(ctx context tbInfo := e.table.Meta() if tbInfo.GetPartitionInfo() == nil || !builder.ctx.GetSessionVars().StmtCtx.UseDynamicPartitionPrune() { - kvRange, err := buildKvRangesForIndexJoin(e.dctx, e.rctx, getPhysicalTableID(e.table), e.index.ID, lookUpContents, indexRanges, keyOff2IdxOff, cwc, memTracker, interruptSignal) + kvRange, err := buildKvRangesForIndexJoin(e.dctx, e.rctx, getPhysicalTableID(e.table), e.index.ID, indexColDescFlags(e.index), lookUpContents, indexRanges, keyOff2IdxOff, cwc, memTracker, interruptSignal) if err != nil { return nil, err } @@ -5473,7 +5474,9 @@ func buildRangesForIndexJoin(rctx *rangerctx.RangerContext, lookUpContents []*jo } // buildKvRangesForIndexJoin builds kv ranges for index join when the inner plan is index scan plan. -func buildKvRangesForIndexJoin(dctx *distsqlctx.DistSQLContext, pctx *rangerctx.RangerContext, tableID, indexID int64, lookUpContents []*join.IndexJoinLookUpContent, +// desc carries the per-column descending flags of the index (nil or all-false for +// ascending-only indexes, which preserves the original encoder fast path). +func buildKvRangesForIndexJoin(dctx *distsqlctx.DistSQLContext, pctx *rangerctx.RangerContext, tableID, indexID int64, desc []bool, lookUpContents []*join.IndexJoinLookUpContent, ranges []*ranger.Range, keyOff2IdxOff []int, cwc *physicalop.ColWithCmpFuncManager, memTracker *memory.Tracker, interruptSignal *atomic.Value, ) (_ []kv.KeyRange, err error) { kvRanges := make([]kv.KeyRange, 0, len(ranges)*len(lookUpContents)) @@ -5496,7 +5499,7 @@ func buildKvRangesForIndexJoin(dctx *distsqlctx.DistSQLContext, pctx *rangerctx. if indexID == -1 { tmpKvRanges, err = distsql.CommonHandleRangesToKVRanges(dctx, []int64{tableID}, ranges) } else { - tmpKvRanges, err = distsql.IndexRangesToKVRangesWithInterruptSignal(dctx, tableID, indexID, ranges, memTracker, interruptSignal) + tmpKvRanges, err = distsql.IndexRangesToKVRangesWithDescAndInterruptSignal(dctx, tableID, indexID, desc, ranges, memTracker, interruptSignal) } if err != nil { return nil, err @@ -5544,10 +5547,24 @@ func buildKvRangesForIndexJoin(dctx *distsqlctx.DistSQLContext, pctx *rangerctx. tmpKeyRanges, err := distsql.CommonHandleRangesToKVRanges(dctx, []int64{tableID}, tmpDatumRanges) return tmpKeyRanges.FirstPartitionRange(), err } - tmpKeyRanges, err := distsql.IndexRangesToKVRangesWithInterruptSignal(dctx, tableID, indexID, tmpDatumRanges, memTracker, interruptSignal) + tmpKeyRanges, err := distsql.IndexRangesToKVRangesWithDescAndInterruptSignal(dctx, tableID, indexID, desc, tmpDatumRanges, memTracker, interruptSignal) return tmpKeyRanges.FirstPartitionRange(), err } +// indexColDescFlags returns a per-column descending-order slice derived from +// idxInfo, or nil when the index has no DESC columns. Returning nil lets +// downstream encoders stay on the ascending fast path. +func indexColDescFlags(idxInfo *model.IndexInfo) []bool { + if idxInfo == nil || !idxInfo.HasDescColumn() { + return nil + } + desc := make([]bool, len(idxInfo.Columns)) + for i, c := range idxInfo.Columns { + desc[i] = c.Desc + } + return desc +} + func (b *executorBuilder) buildWindow(v *physicalop.PhysicalWindow) exec.Executor { childExec := b.build(v.Children()[0]) if b.err != nil { diff --git a/pkg/executor/distsql.go b/pkg/executor/distsql.go index b6329e59379c3..48af60e42ca05 100644 --- a/pkg/executor/distsql.go +++ b/pkg/executor/distsql.go @@ -336,7 +336,7 @@ func (e *IndexReaderExecutor) buildKVRangesForIndexReader() ([]kv.KeyRange, erro results := make([]kv.KeyRange, 0, len(groupedRanges)) for _, ranges := range groupedRanges { - kvRanges, err := buildKeyRanges(e.dctx, ranges, e.partRangeMap, tableIDs, e.index.ID, nil) + kvRanges, err := buildKeyRanges(e.dctx, ranges, e.partRangeMap, tableIDs, e.index.ID, indexColDescFlags(e.index), nil) if err != nil { return nil, err } @@ -619,6 +619,7 @@ func buildKeyRanges(dctx *distsqlctx.DistSQLContext, rangeOverrideForPartitionID map[int64][]*ranger.Range, physicalIDs []int64, indexID int64, + desc []bool, memTracker *memory.Tracker, ) ([][]kv.KeyRange, error) { results := make([][]kv.KeyRange, 0, len(physicalIDs)) @@ -633,7 +634,7 @@ func buildKeyRanges(dctx *distsqlctx.DistSQLContext, } results = append(results, rRanges.FirstPartitionRange()) } else { - singleRanges, err := distsql.IndexRangesToKVRangesWithInterruptSignal(dctx, physicalID, indexID, ranges, memTracker, nil) + singleRanges, err := distsql.IndexRangesToKVRangesWithDescAndInterruptSignal(dctx, physicalID, indexID, desc, ranges, memTracker, nil) if err != nil { return nil, err } @@ -661,7 +662,7 @@ func (e *IndexLookUpExecutor) buildTableKeyRanges() (err error) { kvRanges := make([][]kv.KeyRange, 0, len(groupedRanges)) physicalTblIDsForPartitionKVRanges := make([]int64, 0, len(tableIDs)*len(groupedRanges)) for _, ranges := range groupedRanges { - kvRange, err := buildKeyRanges(e.dctx, ranges, e.partitionRangeMap, tableIDs, e.index.ID, e.memTracker) + kvRange, err := buildKeyRanges(e.dctx, ranges, e.partitionRangeMap, tableIDs, e.index.ID, indexColDescFlags(e.index), e.memTracker) if err != nil { return err } diff --git a/pkg/executor/executor_pkg_test.go b/pkg/executor/executor_pkg_test.go index ef72b8fde2220..9e8c898058e1c 100644 --- a/pkg/executor/executor_pkg_test.go +++ b/pkg/executor/executor_pkg_test.go @@ -64,7 +64,7 @@ func TestBuildKvRangesForIndexJoinWithoutCwc(t *testing.T) { keyOff2IdxOff := []int{1, 3} ctx := mock.NewContext() - kvRanges, err := buildKvRangesForIndexJoin(ctx.GetDistSQLCtx(), ctx.GetRangerCtx(), 0, 0, joinKeyRows, indexRanges, keyOff2IdxOff, nil, nil, nil) + kvRanges, err := buildKvRangesForIndexJoin(ctx.GetDistSQLCtx(), ctx.GetRangerCtx(), 0, 0, nil, joinKeyRows, indexRanges, keyOff2IdxOff, nil, nil, nil) require.NoError(t, err) // Check the kvRanges is in order. for i, kvRange := range kvRanges { @@ -94,7 +94,7 @@ func TestBuildKvRangesForIndexJoinWithoutCwcAndWithMemoryTracker(t *testing.T) { keyOff2IdxOff := []int{1, 3} ctx := mock.NewContext() memTracker := memory.NewTracker(memory.LabelForIndexWorker, -1) - kvRanges, err := buildKvRangesForIndexJoin(ctx.GetDistSQLCtx(), ctx.GetRangerCtx(), 0, 0, joinKeyRows, indexRanges, keyOff2IdxOff, nil, memTracker, nil) + kvRanges, err := buildKvRangesForIndexJoin(ctx.GetDistSQLCtx(), ctx.GetRangerCtx(), 0, 0, nil, joinKeyRows, indexRanges, keyOff2IdxOff, nil, memTracker, nil) require.NoError(t, err) // Check the kvRanges is in order. for i, kvRange := range kvRanges { @@ -116,7 +116,7 @@ func TestBuildKvRangesForIndexJoinWithoutCwcAndWithMemoryTracker(t *testing.T) { keyOff2IdxOff := []int{1, 3} ctx := mock.NewContext() memTracker := memory.NewTracker(memory.LabelForIndexWorker, -1) - kvRanges, err := buildKvRangesForIndexJoin(ctx.GetDistSQLCtx(), ctx.GetRangerCtx(), 0, 0, joinKeyRows, indexRanges, keyOff2IdxOff, nil, memTracker, nil) + kvRanges, err := buildKvRangesForIndexJoin(ctx.GetDistSQLCtx(), ctx.GetRangerCtx(), 0, 0, nil, joinKeyRows, indexRanges, keyOff2IdxOff, nil, memTracker, nil) require.NoError(t, err) // Check the kvRanges is in order. for i, kvRange := range kvRanges { diff --git a/pkg/executor/index_merge_reader.go b/pkg/executor/index_merge_reader.go index 908400d36dbd3..abfa3815a4d6b 100644 --- a/pkg/executor/index_merge_reader.go +++ b/pkg/executor/index_merge_reader.go @@ -191,7 +191,10 @@ func (e *IndexMergeReaderExecutor) Open(_ context.Context) (err error) { } for i, idx := range e.indexes { if idx != nil && idx.Global { - keyRange, _ := distsql.IndexRangesToKVRanges(e.ctx.GetDistSQLCtx(), e.table.Meta().ID, idx.ID, e.ranges[i]) + keyRange, err := distsql.IndexRangesToKVRangesWithDesc(e.ctx.GetDistSQLCtx(), e.table.Meta().ID, idx.ID, indexColDescFlags(idx), e.ranges[i]) + if err != nil { + return err + } e.partitionKeyRanges[i] = [][]kv.KeyRange{keyRange.FirstPartitionRange()} } else { for _, pKeyRanges := range tmpPartitionKeyRanges { @@ -253,7 +256,7 @@ func (e *IndexMergeReaderExecutor) buildKeyRangesForTable(tbl table.Table) (rang ranges = append(ranges, keyRanges) continue } - keyRange, err := distsql.IndexRangesToKVRanges(dctx, getPhysicalTableID(tbl), e.indexes[i].ID, e.ranges[i]) + keyRange, err := distsql.IndexRangesToKVRangesWithDesc(dctx, getPhysicalTableID(tbl), e.indexes[i].ID, indexColDescFlags(e.indexes[i]), e.ranges[i]) if err != nil { return nil, err } diff --git a/pkg/executor/infoschema_reader.go b/pkg/executor/infoschema_reader.go index 8b50ab852272e..6bb48e013265d 100644 --- a/pkg/executor/infoschema_reader.go +++ b/pkg/executor/infoschema_reader.go @@ -547,6 +547,11 @@ func (e *memtableRetriever) setDataForStatisticsInTable( subPart = key.Length } + collation := "A" + if key.Desc { + collation = "D" + } + record := types.MakeDatums( infoschema.CatalogVal, // TABLE_CATALOG schema.O, // TABLE_SCHEMA @@ -556,7 +561,7 @@ func (e *memtableRetriever) setDataForStatisticsInTable( index.Name.O, // INDEX_NAME i+1, // SEQ_IN_INDEX colName, // COLUMN_NAME - "A", // COLLATION + collation, // COLLATION 0, // CARDINALITY subPart, // SUB_PART nil, // PACKED diff --git a/pkg/executor/show.go b/pkg/executor/show.go index 790bb2f953892..1b0691b49789d 100644 --- a/pkg/executor/show.go +++ b/pkg/executor/show.go @@ -885,13 +885,17 @@ func (e *ShowExec) fetchShowIndex() error { ndv = colStats.NDV } + collation := "A" + if col.Desc { + collation = "D" + } e.appendRow([]any{ tb.Meta().Name.O, // Table nonUniq, // Non_unique idx.Meta().Name.O, // Key_name i + 1, // Seq_in_index colName, // Column_name - "A", // Collation + collation, // Collation ndv, // Cardinality subPart, // Sub_part nil, // Packed @@ -1250,6 +1254,9 @@ func constructResultOfShowCreateTable(ctx sessionctx.Context, dbName *ast.CIStr, colInfo = fmt.Sprintf("%s(%s)", colInfo, strconv.Itoa(c.Length)) } } + if c.Desc { + colInfo += " DESC" + } cols = append(cols, colInfo) } if idxInfo.VectorInfo != nil { diff --git a/pkg/meta/model/index.go b/pkg/meta/model/index.go index 82e3a618291d3..bd646a0b00be5 100644 --- a/pkg/meta/model/index.go +++ b/pkg/meta/model/index.go @@ -19,6 +19,7 @@ import ( "strings" "sync/atomic" + "github.com/pingcap/errors" "github.com/pingcap/tidb/pkg/config/kerneltype" "github.com/pingcap/tidb/pkg/parser" "github.com/pingcap/tidb/pkg/parser/ast" @@ -284,6 +285,51 @@ type IndexInfo struct { // 2=v2 with partition ID in key only (TODO). GlobalIndexVersion uint8 `json:"global_index_version,omitempty"` RegionSplitPolicy *RegionSplitPolicy `json:"region_split_policy,omitempty"` // RegionSplitPolicy is the persistent split policy. + // Version is a monotonic feature fence for the index metadata format. + // 0 = legacy; all features representable in older TiDB versions. + // 1 = uses per-column descending order (any Columns[i].Desc == true). + // An older TiDB reading an index with Version greater than maxKnownIndexVersion + // must refuse to serve queries against it to avoid returning wrong results. + Version uint8 `json:"version,omitempty"` +} + +// maxKnownIndexVersion is the highest IndexInfo.Version this TiDB binary understands. +// Bump this when adding index-level features that older binaries cannot safely ignore. +const maxKnownIndexVersion uint8 = 1 + +// IsServable reports whether this TiDB binary can safely serve queries against +// the index. It returns false if the index metadata uses a newer format version +// than this binary knows about — the caller should surface a clear error rather +// than risk returning wrong rows. +func (index *IndexInfo) IsServable() bool { + return index.Version <= maxKnownIndexVersion +} + +// UnservableErr returns the error to surface when an unservable index is +// encountered during query planning or DDL. The message names the index and +// the version mismatch so an operator can tell whether to upgrade TiDB or +// drop the index before downgrading. +func (index *IndexInfo) UnservableErr(tableName string) error { + return errors.Errorf( + "index `%s`%s uses metadata version %d, which is newer than this TiDB binary supports (max %d); upgrade TiDB or DROP INDEX to roll back", + index.Name.O, formatTableQualifier(tableName), index.Version, maxKnownIndexVersion) +} + +func formatTableQualifier(tableName string) string { + if tableName == "" { + return "" + } + return " on table `" + tableName + "`" +} + +// HasDescColumn reports whether any column of the index is stored in descending order. +func (index *IndexInfo) HasDescColumn() bool { + for _, col := range index.Columns { + if col.Desc { + return true + } + } + return false } // Hash64 implement HashEquals interface. @@ -498,6 +544,10 @@ type IndexColumn struct { Length int `json:"length"` // Whether this index column use changing type UseChangingType bool `json:"using_changing_type,omitempty"` + // Desc indicates that this index column is stored in descending order. + // False (the zero value) means ascending order; this preserves backward + // compatibility with schemas written before descending indexes were supported. + Desc bool `json:"desc,omitempty"` } // Clone clones IndexColumn. diff --git a/pkg/meta/model/index_test.go b/pkg/meta/model/index_test.go index 546975050a640..e340392d257cf 100644 --- a/pkg/meta/model/index_test.go +++ b/pkg/meta/model/index_test.go @@ -15,6 +15,7 @@ package model import ( + "encoding/json" "fmt" "testing" @@ -76,3 +77,68 @@ func TestGlobalIndexV1SupportedForNextGen(t *testing.T) { require.True(t, GetGlobalIndexV1Supported()) } } + +func TestIndexColumnDescRoundTrip(t *testing.T) { + // Ascending is the zero value and must be omitted from JSON for backward + // compatibility with schemas written before descending indexes were supported. + ascCol := IndexColumn{Name: ast.NewCIStr("a"), Offset: 0, Length: -1} + ascJSON, err := json.Marshal(ascCol) + require.NoError(t, err) + require.NotContains(t, string(ascJSON), "desc") + + descCol := IndexColumn{Name: ast.NewCIStr("b"), Offset: 1, Length: -1, Desc: true} + descJSON, err := json.Marshal(descCol) + require.NoError(t, err) + require.Contains(t, string(descJSON), `"desc":true`) + + // Unknown fields from a newer TiDB must decode cleanly on an older binary. + var decoded IndexColumn + require.NoError(t, json.Unmarshal(descJSON, &decoded)) + require.True(t, decoded.Desc) + + // Clone preserves Desc. + require.True(t, descCol.Clone().Desc) +} + +func TestIndexInfoHasDescColumn(t *testing.T) { + c0 := newColumnForTest(0, 0) + c1 := newColumnForTest(1, 1) + + idx := newIndexForTest(10, c0, c1) + require.False(t, idx.HasDescColumn()) + + idx.Columns[1].Desc = true + require.True(t, idx.HasDescColumn()) +} + +func TestIndexInfoIsServable(t *testing.T) { + idx := newIndexForTest(1, newColumnForTest(0, 0)) + + // Version 0 (legacy) and 1 (introduces Desc) are both serviceable today. + require.True(t, idx.IsServable()) + idx.Version = 1 + require.True(t, idx.IsServable()) + + // A newer-than-known version must be rejected so old binaries refuse to + // serve queries against indexes whose semantics they do not understand. + idx.Version = 255 + require.False(t, idx.IsServable()) +} + +func TestIndexInfoUnservableErr(t *testing.T) { + idx := newIndexForTest(7, newColumnForTest(0, 0)) + idx.Version = 99 + + err := idx.UnservableErr("orders") + require.Error(t, err) + // The message must name the index, the table, and the version mismatch + // so an operator can decide whether to upgrade or DROP INDEX. + msg := err.Error() + require.Contains(t, msg, idx.Name.O) + require.Contains(t, msg, "orders") + require.Contains(t, msg, "99") + require.Contains(t, msg, "DROP INDEX") + + // Empty table name is allowed (callers without context just leave it off). + require.NotPanics(t, func() { _ = idx.UnservableErr("") }) +} diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go index 9083001989667..4510ee1948d63 100644 --- a/pkg/planner/core/find_best_task.go +++ b/pkg/planner/core/find_best_task.go @@ -1080,10 +1080,12 @@ func matchProperty(ds *logicalop.DataSource, path *util.AccessPath, prop *proper } // Match SortItems physical property. - all, _ := prop.AllSameOrder() - // When the prop is empty or `all` is false, `matchProperty` is better to be `PropNotMatched` because - // it needs not to keep order for index scan. - if prop.IsSortItemEmpty() || !all { + // We no longer reject mixed-direction sort items up-front (pingcap/tidb#2519): + // the per-item loop below decides whether a forward or reverse scan can + // satisfy the request, and rejects the path only when the per-column + // mismatch pattern is itself mixed (some columns naturally satisfied, + // others requiring a flipped scan — unsatisfiable by any single scan). + if prop.IsSortItemEmpty() { return property.PropNotMatched } @@ -1153,11 +1155,40 @@ func matchProperty(ds *logicalop.DataSource, path *util.AccessPath, prop *proper matchResult := property.PropMatched groupByColIdxs := make([]int, 0) colIdx := 0 + // matchedScanDescSet/matchedScanDesc record whether a forward or a reverse + // cop scan satisfies the property. For each (sortItem, idxCol) pair the + // "natural" direction is forward iff sortItem.Desc == idxCol.Desc; + // otherwise the scan would have to be reversed for that single column. + // All matched pairs must agree — if some need forward and others reverse, + // no single scan direction can satisfy the property and we reject the + // path. This is the unlock for mixed-direction composites such as + // INDEX(a ASC, b DESC) satisfying ORDER BY a ASC, b DESC with a forward + // scan, or INDEX(a, b) (both ASC) satisfying ORDER BY a DESC, b DESC + // with a reverse scan. See pingcap/tidb#2519. + var matchedScanDesc bool + matchedScanDescSet := false for _, sortItem := range prop.SortItems { found := false for ; colIdx < len(idxCols); colIdx++ { // Case 1: this sort item is satisfied by the index column, go to match the next sort item. if idxColLens[colIdx] == types.UnspecifiedLength && sortItem.Col.EqualColumn(idxCols[colIdx]) { + // Index columns at colIdx >= len(path.Index.Columns) are the + // appended CommonHandle PK suffix. DDL rejects DESC on the + // columns of a clustered PRIMARY KEY (see BuildIndexInfo in + // pkg/ddl/index.go), so those suffix columns are guaranteed + // to be ascending and the default below is safe. If that + // guard is ever loosened, this code must be updated to read + // per-column direction from the primary index metadata. + thisIdxDesc := false + if path.Index != nil && colIdx < len(path.Index.Columns) { + thisIdxDesc = path.Index.Columns[colIdx].Desc + } + thisScanDesc := sortItem.Desc != thisIdxDesc + if matchedScanDescSet && thisScanDesc != matchedScanDesc { + return property.PropNotMatched + } + matchedScanDesc = thisScanDesc + matchedScanDescSet = true found = true colIdx++ break @@ -1220,9 +1251,13 @@ func matchProperty(ds *logicalop.DataSource, path *util.AccessPath, prop *proper if len(groups) > 0 { path.GroupedRanges = groups path.GroupByColIdxs = groupByColIdxs + path.MatchedScanDesc = matchedScanDesc return property.PropMatchedNeedMergeSort } } + if matchResult == property.PropMatched { + path.MatchedScanDesc = matchedScanDesc + } return matchResult } @@ -2978,7 +3013,10 @@ func convertToBatchPointGet(ds *logicalop.DataSource, prop *property.PhysicalPro } if !prop.IsSortItemEmpty() { batchPointGetPlan.KeepOrder = true - batchPointGetPlan.Desc = prop.SortItems[0].Desc + // matchProperty already encoded "do we need to flip the byte + // scan?" into MatchedScanDesc, accounting for any descending + // columns in the index. Just consume that decision. + batchPointGetPlan.Desc = candidate.path.MatchedScanDesc } if candidate.path.IsSingleScan { batchPointGetPlan.SetAccessCols(candidate.path.IdxCols) diff --git a/pkg/planner/core/operator/physicalop/physical_index_scan.go b/pkg/planner/core/operator/physicalop/physical_index_scan.go index c24d4a58b8021..111c4e0d59382 100644 --- a/pkg/planner/core/operator/physicalop/physical_index_scan.go +++ b/pkg/planner/core/operator/physicalop/physical_index_scan.go @@ -695,7 +695,18 @@ func GetOriginalPhysicalIndexScan(ds *logicalop.DataSource, prop *property.Physi } // Index scan should maintain order (true for both normal sorting via SortItems and partial order via PartialOrderInfo) if prop.NeedKeepOrder() { - is.Desc = prop.GetSortDescForKeepOrder() + // matchProperty already determined whether the byte scan must flow + // forward or in reverse to satisfy this property, including the + // per-column XOR with descending storage directions. Consume that + // decision directly. PartialOrderInfo cases (TopN with prefix + // ordering) still rely on the uniform-direction reverse-scan path + // because matchPartialOrderProperty does not yet record per-column + // directions; fall back to the legacy formula there. + if path != nil && len(prop.SortItems) > 0 { + is.Desc = path.MatchedScanDesc + } else { + is.Desc = prop.GetSortDescForKeepOrder() + } is.KeepOrder = true } return is diff --git a/pkg/planner/core/planbuilder.go b/pkg/planner/core/planbuilder.go index af50a5d15fb31..159c971b9ccbd 100644 --- a/pkg/planner/core/planbuilder.go +++ b/pkg/planner/core/planbuilder.go @@ -1271,6 +1271,34 @@ func checkAutoForceIndexLookUpPushDown(ctx base.PlanContext, tblInfo *model.Tabl return checkIndexLookUpPushDownSupported(ctx, tblInfo, index, true) } +// checkAllIndicesServable returns an error if tableInfo carries any +// writable index whose metadata format is newer than this TiDB binary +// understands. Used by DML builders (INSERT / REPLACE / UPDATE / DELETE +// / LOAD DATA / IMPORT INTO) to fail fast before reaching table-level +// mutation paths that would silently mis-maintain the index. +// +// "Writable" here matches `tables.IsIndexWritable`, i.e. every state +// except StateDeleteOnly / StateDeleteReorganization. Restricting to +// StatePublic is wrong: StateWriteOnly and StateWriteReorganization +// indexes (mid-DDL) are also written by `addRowIndices` and +// `rebuildUpdateRecordIndices` in pkg/table/tables/tables.go via the +// same IsIndexWritable predicate. An older TiDB binary running DML +// against a table whose new DESC index is in WriteOnly would otherwise +// pass the planner gate and corrupt the index in the mutation path. +func checkAllIndicesServable(tableInfo *model.TableInfo) error { + for _, idx := range tableInfo.Indices { + // Mirror tables.IsIndexWritable on the bare IndexInfo, so the + // fence stays in lockstep with the actual write predicate. + if idx.State == model.StateDeleteOnly || idx.State == model.StateDeleteReorganization { + continue + } + if !idx.IsServable() { + return idx.UnservableErr(tableInfo.Name.O) + } + } + return nil +} + func getPossibleAccessPaths(ctx base.PlanContext, tableHints *hint.PlanHints, indexHints []*ast.IndexHint, tbl table.Table, dbName, tblName ast.CIStr, check bool, hasFlagPartitionProcessor bool) ([]*util.AccessPath, error) { tblInfo := tbl.Meta() publicPaths := make([]*util.AccessPath, 0, len(tblInfo.Indices)+2) @@ -1346,6 +1374,18 @@ func getPossibleAccessPaths(ctx base.PlanContext, tableHints *hint.PlanHints, in continue } } + // Refuse to plan against an index whose metadata format is newer + // than this binary understands (pingcap/tidb#2519). Silently + // skipping would risk wrong results — surface a clear error so + // the operator upgrades TiDB or drops the index. + // + // This check fires AFTER latest-index reconciliation so a snapshot + // copy of an index that the latest schema has dropped or moved out + // of StatePublic gets skipped before we error on its (now + // irrelevant) format version. + if !index.IsServable() { + return nil, index.UnservableErr(tblName.O) + } if index.InvertedInfo != nil { invertedIndexes[index.Name.L] = struct{}{} continue @@ -4072,6 +4112,14 @@ func (b *PlanBuilder) buildInsert(ctx context.Context, insert *ast.InsertStmt) ( } return nil, err } + // Refuse INSERT/REPLACE if any of the table's indexes uses a metadata + // version newer than this TiDB binary understands — maintaining such an + // index would risk wrong rows or mismatched encoding (pingcap/tidb#2519). + // SELECT/UPDATE/DELETE plans are guarded by getPossibleAccessPaths, but + // INSERT VALUES never enumerates access paths, so check explicitly here. + if err := checkAllIndicesServable(tableInfo); err != nil { + return nil, err + } // Build Schema with DBName otherwise ColumnRef with DBName cannot match any Column in Schema. schema, names, err := expression.TableInfo2SchemaAndNames(b.ctx.GetExprCtx(), tn.Schema, tableInfo) if err != nil { @@ -4483,6 +4531,14 @@ func (b *PlanBuilder) buildLoadData(ctx context.Context, ld *ast.LoadDataStmt) ( options = append(options, &loadDataOpt) } tnW := b.resolveCtx.GetTableName(ld.Table) + // Same servability fence as INSERT (pingcap/tidb#2519): LOAD DATA writes + // every secondary index of the target table, so an unservable index + // would be silently mis-maintained without this guard. + if tnW != nil && tnW.TableInfo != nil { + if err := checkAllIndicesServable(tnW.TableInfo); err != nil { + return nil, err + } + } p := LoadData{ FileLocRef: ld.FileLocRef, OnDuplicate: ld.OnDuplicate, @@ -4784,6 +4840,15 @@ func (b *PlanBuilder) buildImportInto(ctx context.Context, ld *ast.ImportIntoStm db := b.ctx.GetSessionVars().CurrentDB return nil, infoschema.ErrTableNotExists.FastGenByArgs(db, tableInfo.Name.O) } + // Same servability fence as INSERT (pingcap/tidb#2519): IMPORT INTO + // writes every secondary index of the target table. Run it against + // the same latest-schema snapshot the rest of buildImportInto uses + // — `tnW.TableInfo` can be stale here, so checking it would let an + // older TiDB miss a writable newer-format index that exists only in + // the latest schema. + if err := checkAllIndicesServable(tableInPlan.Meta()); err != nil { + return nil, err + } schema, names, err := expression.TableInfo2SchemaAndNames(b.ctx.GetExprCtx(), ast.NewCIStr(""), tableInfo) if err != nil { return nil, err diff --git a/pkg/planner/util/path.go b/pkg/planner/util/path.go index 9a315ff65ac85..8ff55ad2f1f9d 100644 --- a/pkg/planner/util/path.go +++ b/pkg/planner/util/path.go @@ -132,6 +132,19 @@ type AccessPath struct { // IsIntHandlePath indicates whether this path is table path. IsIntHandlePath bool IsCommonHandlePath bool + + // MatchedScanDesc records whether the cop scan must be reversed in order + // to satisfy the PhysicalProperty matched by matchProperty. False means a + // forward scan over the encoded keyspace already produces rows in the + // requested order; true means the consumer should set IndexScan.Desc=true + // so TiKV streams keys in reverse byte order. Meaningful only when + // matchProperty returns PropMatched or PropMatchedNeedMergeSort. + // + // Computing this in matchProperty (per (sortItem, idxCol) pair) lets us + // support indexes whose columns mix ASC and DESC storage directions + // (pingcap/tidb#2519): so long as the per-column mismatch pattern is + // uniform, the scan direction is well-defined. + MatchedScanDesc bool // Forced means this path is generated by `use/force index()`. Forced bool ForceKeepOrder bool @@ -193,6 +206,7 @@ func (path *AccessPath) Clone() *AccessPath { MinAccessCondsForDNFCond: path.MinAccessCondsForDNFCond, IsIntHandlePath: path.IsIntHandlePath, IsCommonHandlePath: path.IsCommonHandlePath, + MatchedScanDesc: path.MatchedScanDesc, Forced: path.Forced, ForceKeepOrder: path.ForceKeepOrder, ForceNoKeepOrder: path.ForceNoKeepOrder, diff --git a/pkg/planner/util/path_test.go b/pkg/planner/util/path_test.go index 8187a9d61fdc1..dfa47a8d330fd 100644 --- a/pkg/planner/util/path_test.go +++ b/pkg/planner/util/path_test.go @@ -121,3 +121,21 @@ func TestOnlyPointRange(t *testing.T) { indexPath.Ranges = []*ranger.Range{&onePointRange} require.False(t, indexPath.OnlyPointRange(tc)) } + +func TestAccessPathCloneCopiesMatchedScanDesc(t *testing.T) { + // Regression for pingcap/tidb#2519: AccessPath.Clone() must propagate + // MatchedScanDesc, otherwise plan rebuilds and index-merge alternative + // cloning silently reset the field to false and a path that needed a + // reverse cop scan ends up doing a forward scan instead, producing + // rows in the wrong order. + original := &util.AccessPath{ + IsCommonHandlePath: true, + MatchedScanDesc: true, + } + cloned := original.Clone() + require.True(t, cloned.MatchedScanDesc, "MatchedScanDesc must survive Clone()") + + // Mutating the clone must not leak back. + cloned.MatchedScanDesc = false + require.True(t, original.MatchedScanDesc) +} diff --git a/pkg/sessionctx/vardef/tidb_vars.go b/pkg/sessionctx/vardef/tidb_vars.go index 7f1a62f5e63c3..aed1cebadd2fc 100644 --- a/pkg/sessionctx/vardef/tidb_vars.go +++ b/pkg/sessionctx/vardef/tidb_vars.go @@ -992,6 +992,14 @@ const ( // TODO(crazycs520): remove this after foreign key GA. TiDBEnableForeignKey = "tidb_enable_foreign_key" + // TiDBEnableDescendingIndex controls whether CREATE INDEX ... (col DESC) is + // honored. When off (the default), the DESC keyword is silently ignored to + // preserve historical behavior and allow migration scripts written for MySQL + // 5.7 to continue working. When on, the descending order is persisted into + // IndexColumn and (once supporting TiKV is available) used for physical + // encoding. See pingcap/tidb#2519. + TiDBEnableDescendingIndex = "tidb_enable_descending_index" + // TiDBForeignKeyCheckInSharedLock indicates whether to use shared lock for foreign key check. TiDBForeignKeyCheckInSharedLock = "tidb_foreign_key_check_in_shared_lock" @@ -1580,6 +1588,7 @@ const ( DefTiDBEnableCollectExecutionInfo = true DefTiDBAllowAutoRandExplicitInsert = false DefTiDBEnableClusteredIndex = ClusteredIndexDefModeOn + DefTiDBEnableDescendingIndex = false DefTiDBRedactLog = Off DefTiDBRestrictedReadOnly = false DefTiDBSuperReadOnly = false @@ -1885,8 +1894,11 @@ var ( // DDLDiskQuota is the temporary variable for set disk quota for lightning DDLDiskQuota = atomic.NewUint64(DefTiDBDDLDiskQuota) // EnableForeignKey indicates whether to enable foreign key feature. - EnableForeignKey = atomic.NewBool(true) - EnableRCReadCheckTS = atomic.NewBool(false) + EnableForeignKey = atomic.NewBool(true) + // EnableDescendingIndex gates persisting the DESC flag on index columns at + // CREATE INDEX / CREATE TABLE time. See TiDBEnableDescendingIndex. + EnableDescendingIndex = atomic.NewBool(DefTiDBEnableDescendingIndex) + EnableRCReadCheckTS = atomic.NewBool(false) // EnableRowLevelChecksum indicates whether to append checksum to row values. EnableRowLevelChecksum = atomic.NewBool(DefTiDBEnableRowLevelChecksum) LowResolutionTSOUpdateInterval = atomic.NewUint32(DefTiDBLowResolutionTSOUpdateInterval) diff --git a/pkg/sessionctx/variable/sysvar.go b/pkg/sessionctx/variable/sysvar.go index 7ef27374078b5..43970319de196 100644 --- a/pkg/sessionctx/variable/sysvar.go +++ b/pkg/sessionctx/variable/sysvar.go @@ -2002,6 +2002,12 @@ var defaultSysVars = []*SysVar{ }, GetGlobal: func(_ context.Context, s *SessionVars) (string, error) { return BoolToOnOff(vardef.EnableForeignKey.Load()), nil }}, + {Scope: vardef.ScopeGlobal, Name: vardef.TiDBEnableDescendingIndex, Value: BoolToOnOff(vardef.DefTiDBEnableDescendingIndex), Type: vardef.TypeBool, SetGlobal: func(_ context.Context, s *SessionVars, val string) error { + vardef.EnableDescendingIndex.Store(TiDBOptOn(val)) + return nil + }, GetGlobal: func(_ context.Context, s *SessionVars) (string, error) { + return BoolToOnOff(vardef.EnableDescendingIndex.Load()), nil + }}, {Scope: vardef.ScopeGlobal | vardef.ScopeSession, Name: vardef.CollationDatabase, Value: mysql.DefaultCollationName, skipInit: true, Validation: func(vars *SessionVars, normalizedValue string, originalValue string, scope vardef.ScopeFlag) (string, error) { return checkCollation(vars, normalizedValue, originalValue, scope) }, SetSession: func(s *SessionVars, val string) error { diff --git a/pkg/table/tables/mutation_checker.go b/pkg/table/tables/mutation_checker.go index b4a65422b5623..58df654bc826f 100644 --- a/pkg/table/tables/mutation_checker.go +++ b/pkg/table/tables/mutation_checker.go @@ -27,6 +27,7 @@ import ( "github.com/pingcap/tidb/pkg/table" "github.com/pingcap/tidb/pkg/tablecodec" "github.com/pingcap/tidb/pkg/types" + "github.com/pingcap/tidb/pkg/util/codec" "github.com/pingcap/tidb/pkg/util/collate" "github.com/pingcap/tidb/pkg/util/dbterror" "github.com/pingcap/tidb/pkg/util/logutil" @@ -272,6 +273,26 @@ func checkIndexKeys( loc := tc.Location() for i, v := range decodedIndexValues { fieldType := t.Columns[indexInfo.Columns[i].Offset].FieldType.ArrayType() + // DecodeIndexKV returns column bytes whose origin depends on the + // index encoding version: for some columns the bytes were lifted + // from the key (and therefore bitwise-complemented when the + // column is DESC), for others they came from the index value + // (the restored-data path used for _bin collations and clustered + // v1 indexes), where the bytes are never complemented regardless + // of the column direction. + // + // Auto-detect rather than blindly trust IndexColumn.Desc: only + // un-complement when the leading flag byte is recognisably a + // DESC marker. ASC bytes (whether from key or value) decode + // directly. Restored bytes for a DESC column appear as ASC here + // and are correctly left untouched. + if indexInfo.Columns[i].Desc && len(v) > 0 && codec.IsDescFlag(v[0]) { + cp := make([]byte, len(v)) + for j := range v { + cp[j] = v[j] ^ 0xFF + } + v = cp + } datum, err := tablecodec.DecodeColumnValue(v, fieldType, loc) if err != nil { return errors.Trace(err) @@ -351,7 +372,10 @@ func collectTableMutationsFromBufferStage(t *TableCommon, memBuffer kv.MemBuffer } } } else { - _, m.indexID, _, err = tablecodec.DecodeIndexKey(m.key) + // Only the index ID is consumed downstream. Avoid DecodeIndexKey, + // which also parses every column value as a string and therefore + // fails on descending-complemented bytes. + _, m.indexID, _, err = tablecodec.DecodeKeyHead(m.key) if err != nil { err = errors.Trace(err) } diff --git a/pkg/tablecodec/tablecodec.go b/pkg/tablecodec/tablecodec.go index d6ef6d97e21fd..796ed409e41fc 100644 --- a/pkg/tablecodec/tablecodec.go +++ b/pkg/tablecodec/tablecodec.go @@ -1247,7 +1247,18 @@ func GenIndexKey(loc *time.Location, tblInfo *model.TableInfo, idxInfo *model.In key = GetIndexKeyBuf(buf, RecordRowKeyLen+len(indexedValues)*9+9) key = appendTableIndexPrefix(key, phyTblID) key = codec.EncodeInt(key, idxInfo.ID) - key, err = codec.EncodeKey(loc, key, indexedValues...) + // For all-ascending indexes (the overwhelmingly common case) the fast + // path through EncodeKey produces byte-identical output to the DESC-aware + // encoder, so stay on it to avoid building a []bool per insert. + if idxInfo.HasDescColumn() { + desc := make([]bool, len(idxInfo.Columns)) + for i, c := range idxInfo.Columns { + desc[i] = c.Desc + } + key, err = codec.EncodeKeyWithDesc(loc, key, desc, indexedValues...) + } else { + key, err = codec.EncodeKey(loc, key, indexedValues...) + } if err != nil { return nil, false, err } diff --git a/pkg/tablecodec/tablecodec_test.go b/pkg/tablecodec/tablecodec_test.go index b63438a2fae48..58f243826eb85 100644 --- a/pkg/tablecodec/tablecodec_test.go +++ b/pkg/tablecodec/tablecodec_test.go @@ -375,6 +375,89 @@ func TestCutKeyNew(t *testing.T) { require.Equal(t, types.NewIntDatum(100), handleVal) } +func TestGenIndexKeyWithDescColumns(t *testing.T) { + tblInfo := &model.TableInfo{ + ID: 1, + Name: ast.NewCIStr("t"), + Columns: []*model.ColumnInfo{ + {ID: 1, Name: ast.NewCIStr("a"), Offset: 0, FieldType: *types.NewFieldType(mysql.TypeLonglong)}, + {ID: 2, Name: ast.NewCIStr("b"), Offset: 1, FieldType: *types.NewFieldType(mysql.TypeLonglong)}, + }, + } + // Ascending-only index: the DESC-aware code path must produce byte-for- + // byte identical output to the ordinary path so existing indexes read by + // older TiDB binaries keep working. + ascIdx := &model.IndexInfo{ + ID: 10, + Name: ast.NewCIStr("idx_asc"), + Columns: []*model.IndexColumn{ + {Name: ast.NewCIStr("a"), Offset: 0, Length: types.UnspecifiedLength}, + {Name: ast.NewCIStr("b"), Offset: 1, Length: types.UnspecifiedLength}, + }, + } + // Mixed composite: a ASC, b DESC. The second column's bytes must be + // bitwise-complemented relative to the ascending encoding so a forward + // RocksDB iterator yields (a ASC, b DESC) row order. + mixedIdx := &model.IndexInfo{ + ID: 11, + Name: ast.NewCIStr("idx_mixed"), + Columns: []*model.IndexColumn{ + {Name: ast.NewCIStr("a"), Offset: 0, Length: types.UnspecifiedLength, Desc: false}, + {Name: ast.NewCIStr("b"), Offset: 1, Length: types.UnspecifiedLength, Desc: true}, + }, + Version: 1, + } + + loc := time.UTC + vals := []types.Datum{types.NewIntDatum(5), types.NewIntDatum(7)} + + ascKey, _, err := GenIndexKey(loc, tblInfo, ascIdx, tblInfo.ID, vals, kv.IntHandle(1), nil) + require.NoError(t, err) + + // Recreate an all-ascending key using EncodeKeyWithDesc(nil) to assert + // the fast path and the DESC-aware path agree bit-for-bit. + explicitAsc, _, err := GenIndexKey(loc, tblInfo, &model.IndexInfo{ + ID: ascIdx.ID, Name: ascIdx.Name, Columns: []*model.IndexColumn{ + {Name: ast.NewCIStr("a"), Offset: 0, Length: types.UnspecifiedLength, Desc: false}, + {Name: ast.NewCIStr("b"), Offset: 1, Length: types.UnspecifiedLength, Desc: false}, + }, + }, tblInfo.ID, vals, kv.IntHandle(1), nil) + require.NoError(t, err) + require.Equal(t, ascKey, explicitAsc, "all-ASC desc flags must match EncodeKey's output") + + mixedKey, _, err := GenIndexKey(loc, tblInfo, mixedIdx, tblInfo.ID, vals, kv.IntHandle(1), nil) + require.NoError(t, err) + + // The two keys share the table/index prefix plus column a's encoding, + // and diverge on column b: the mixed key has b's bytes complemented. + require.NotEqual(t, ascKey, mixedKey, "mixed-direction index must produce different bytes") + + // Re-encode the column region ourselves and compare against the + // trailing portion of each generated key (the prefix is identical + // because we supplied the same tblInfo/idxID). + ascCols, err := codec.EncodeKey(loc, nil, vals...) + require.NoError(t, err) + mixedCols, err := codec.EncodeKeyWithDesc(loc, nil, []bool{false, true}, vals...) + require.NoError(t, err) + // Locate each column region inside the generated keys: the prefix length is + // identical for both because we supplied the same tblInfo/idxID. + require.True(t, len(ascKey) >= len(ascCols)) + require.True(t, len(mixedKey) >= len(mixedCols)) + require.Equal(t, ascCols, ascKey[len(ascKey)-len(ascCols)-9:len(ascKey)-9], + "ASC index column bytes must match EncodeKey output (9 bytes trailing handle suffix)") + require.Equal(t, mixedCols, mixedKey[len(mixedKey)-len(mixedCols)-9:len(mixedKey)-9], + "mixed index column bytes must match EncodeKeyWithDesc output") + + // Decoding round-trip: cut past the column bytes using DESC-aware cutter. + colRegion := mixedKey[len(mixedKey)-len(mixedCols)-9 : len(mixedKey)-9] + rem, d0, err := codec.DecodeOneWithDesc(colRegion, false) + require.NoError(t, err) + require.Equal(t, int64(5), d0.GetInt64()) + _, d1, err := codec.DecodeOneWithDesc(rem, true) + require.NoError(t, err) + require.Equal(t, int64(7), d1.GetInt64()) +} + func TestCutKey(t *testing.T) { colIDs := []int64{1, 2, 3} values := []types.Datum{types.NewIntDatum(1), types.NewBytesDatum([]byte("abc")), types.NewFloat64Datum(5.5)} diff --git a/pkg/util/codec/codec.go b/pkg/util/codec/codec.go index 6974393011437..296eeb3e7f7f3 100644 --- a/pkg/util/codec/codec.go +++ b/pkg/util/codec/codec.go @@ -57,6 +57,26 @@ const ( // IntHandleFlag is only used to encode int handle key. const IntHandleFlag = intFlag +// IsDescFlag reports whether b is a leading flag byte from an EncodeKeyWithDesc +// descending-column encoding (the bitwise complement of one of the ascending +// flag bytes supported by peek/DecodeOne). Callers that received column bytes +// from a heterogeneous source (e.g. tablecodec.DecodeIndexKV mixes key-derived +// and value-derived bytes) can use this to decide whether the bytes need to be +// un-complemented before being interpreted as a regular memcomparable datum. +// See pingcap/tidb#2519. +func IsDescFlag(b byte) bool { + switch b { + case ^NilFlag, ^bytesFlag, ^compactBytesFlag, ^intFlag, ^uintFlag, + ^floatFlag, ^decimalFlag, ^durationFlag: + return true + } + return false +} + +// isDescFlag is the unexported alias kept for in-package call sites that +// predate IsDescFlag. +func isDescFlag(b byte) bool { return IsDescFlag(b) } + const ( sizeUint64 = unsafe.Sizeof(uint64(0)) sizeUint8 = unsafe.Sizeof(uint8(0)) @@ -307,6 +327,103 @@ func EncodeKey(loc *time.Location, b []byte, v ...types.Datum) ([]byte, error) { return encode(loc, b, v, true) } +// EncodeKeyWithDesc is like EncodeKey, but bitwise-complements the encoded +// bytes of every datum whose corresponding entry in desc is true. Complementing +// a memcomparable-encoded datum reverses its byte comparison order, so a +// forward iterator over keys built this way yields descending order for those +// columns. Columns whose desc entry is false (or whose index is past len(desc)) +// are encoded identically to EncodeKey, preserving mixed-direction composite +// indexes like INDEX(a ASC, b DESC). See pingcap/tidb#2519. +// +// Returns an error if any DESC column has a Kind whose underlying encoding is +// not memcomparable in the sense that "complement of encoded bytes" preserves +// reverse order — currently KindMysqlJSON and KindVectorFloat32. Those types +// aren't usable as ordinary index columns anyway (json/vector indexes go +// through their own pipeline), but the explicit rejection prevents a +// caller from accidentally producing keys whose comparison semantics would +// be undefined. +func EncodeKeyWithDesc(loc *time.Location, b []byte, desc []bool, v ...types.Datum) ([]byte, error) { + for i := range v { + isDesc := i < len(desc) && desc[i] + if isDesc { + switch v[i].Kind() { + case types.KindMysqlJSON, types.KindVectorFloat32: + return b, errors.Errorf( + "descending-order encoding is not supported for column kind %v; this kind has no memcomparable form whose bitwise complement preserves reverse order", + v[i].Kind()) + } + } + start := len(b) + var err error + b, err = encode(loc, b, v[i:i+1], true) + if err != nil { + return b, err + } + if isDesc { + for j := start; j < len(b); j++ { + b[j] ^= 0xFF + } + } + } + return b, nil +} + +// DecodeOneWithDesc decodes a single datum from a byte slice produced by +// EncodeKeyWithDesc. When desc is false it is equivalent to DecodeOne; when +// true it inverts the encoded bytes and dispatches through the existing +// decoder, so all types supported by DecodeOne work unchanged. +// +// This allocates a scratch buffer to hold the complemented prefix. Callers on +// hot paths that decode many consecutive descending columns should consider a +// streaming variant; none of the current callsites are hot enough to justify +// the added complexity. +func DecodeOneWithDesc(b []byte, desc bool) (remain []byte, d types.Datum, err error) { + if !desc { + return DecodeOne(b) + } + if len(b) < 1 { + return nil, d, errors.New("invalid encoded key") + } + // peek() inspects the flag byte and computes the encoded length, so we + // complement b into a scratch buffer first. A full copy keeps the logic + // simple — DecodeOne handles every type uniformly on the scratch copy. + cp := make([]byte, len(b)) + for i := range b { + cp[i] = b[i] ^ 0xFF + } + rem, d, err := DecodeOne(cp) + if err != nil { + return nil, d, errors.Trace(err) + } + consumed := len(b) - len(rem) + return b[consumed:], d, nil +} + +// CutOneWithDesc cuts the first encoded datum from b, where the column may be +// stored with descending-complemented bytes (desc=true). It is equivalent to +// CutOne when desc is false. Used by index-key decoders that only need to +// advance past a column without materialising its value. +func CutOneWithDesc(b []byte, desc bool) (data []byte, remain []byte, err error) { + if !desc { + return CutOne(b) + } + if len(b) < 1 { + return nil, nil, errors.New("invalid encoded key") + } + // peek() in cmpl-space gives the encoded length. We only need one byte of + // scratch (the flag) plus up to the encoded length for variable-length + // types, so take the minimum slice needed. + cp := make([]byte, len(b)) + for i := range b { + cp[i] = b[i] ^ 0xFF + } + l, err := peek(cp) + if err != nil { + return nil, nil, errors.Trace(err) + } + return b[:l], b[l:], nil +} + // EncodeValue appends the encoded values to byte slice b, returning the appended // slice. It does not guarantee the order for comparison. func EncodeValue(loc *time.Location, b []byte, v ...types.Datum) ([]byte, error) { @@ -1511,6 +1628,34 @@ func peek(b []byte) (length int, err error) { l, err = types.PeekBytesAsJSON(b) case vectorFloat32Flag: l, err = types.PeekBytesAsVectorFloat32(b) + // Descending-order flags (pingcap/tidb#2519). EncodeKeyWithDesc writes + // bitwise-complemented bytes, so the leading flag becomes ^original. We + // compute the length of the column exactly as for the ascending variant + // because complement preserves byte count. Variable-length types need to + // walk group markers in complement space — we do so by complementing a + // scratch copy of the body. + case ^NilFlag: + // NULL in descending encoding has no body bytes. + case ^intFlag, ^uintFlag, ^floatFlag, ^durationFlag: + l = 8 + case ^bytesFlag: + tmp := make([]byte, len(b)) + for i := range b { + tmp[i] = b[i] ^ 0xFF + } + l, err = peekBytes(tmp) + case ^compactBytesFlag: + tmp := make([]byte, len(b)) + for i := range b { + tmp[i] = b[i] ^ 0xFF + } + l, err = peekCompactBytes(tmp) + case ^decimalFlag: + tmp := make([]byte, len(b)) + for i := range b { + tmp[i] = b[i] ^ 0xFF + } + l, err = types.DecimalPeak(tmp) default: return 0, errors.Errorf("invalid encoded key flag %v", flag) } @@ -1584,6 +1729,10 @@ type Decoder struct { // buf is only used for DecodeBytes to avoid the cost of makeslice. buf []byte + // descBuf is a reusable scratch buffer for the descending-column + // auto-detect path so a per-row index scan does not allocate a fresh + // slice for every DESC column it decodes (pingcap/tidb#2519). + descBuf []byte } // NewDecoder creates a Decoder. @@ -1595,10 +1744,45 @@ func NewDecoder(chk *chunk.Chunk, timezone *time.Location) *Decoder { } // DecodeOne decodes one value to chunk and returns the remained bytes. +// +// Descending-order columns (pingcap/tidb#2519) are written as bitwise- +// complemented memcomparable bytes. Their leading flag byte is the bit- +// complement of the ascending flag (e.g. intFlag=0x03 becomes 0xFC, NilFlag +// =0x00 becomes 0xFF). When the input begins with one of those complemented +// flags we transparently invert just the bytes this column consumes and fall +// through to the ascending switch — callers need no schema awareness. The +// 0xFA collision between ^floatFlag and maxFlag is not observable on stored +// rows because maxFlag only appears in range sentinels, which never reach +// this decoder. func (decoder *Decoder) DecodeOne(b []byte, colIdx int, ft *types.FieldType) (remain []byte, err error) { if len(b) < 1 { return nil, errors.New("invalid encoded key") } + if isDescFlag(b[0]) { + length, err := peek(b) + if err != nil { + return nil, errors.Trace(err) + } + // Reuse decoder.descBuf instead of allocating a fresh scratch slice + // per DESC column. This is on the per-row decode hot path; for a + // composite DESC index every row would otherwise drop a few-hundred- + // byte allocation. The buffer's contents are not referenced after + // the recursive DecodeOne call returns, so reuse is safe even for + // variable-length types whose decoded form (Bytes) gets copied into + // the chunk inside the ASC branch. + if cap(decoder.descBuf) < length { + decoder.descBuf = make([]byte, length) + } else { + decoder.descBuf = decoder.descBuf[:length] + } + for i := range length { + decoder.descBuf[i] = b[i] ^ 0xFF + } + if _, err := decoder.DecodeOne(decoder.descBuf, colIdx, ft); err != nil { + return nil, errors.Trace(err) + } + return b[length:], nil + } chk := decoder.chk flag := b[0] b = b[1:] diff --git a/pkg/util/codec/codec_test.go b/pkg/util/codec/codec_test.go index 88f5c6d677ecf..23b47b7dce69e 100644 --- a/pkg/util/codec/codec_test.go +++ b/pkg/util/codec/codec_test.go @@ -113,6 +113,220 @@ func estimateValuesSize(typeCtx types.Context, vals []types.Datum) (int, error) return size, nil } +func TestEncodeKeyWithDescRoundTrip(t *testing.T) { + // Every datum kind we care about for an index key. Passing nil for desc + // or all-false must be identical to EncodeKey so the encoding is a strict + // superset of the ascending path. + typeCtx := types.DefaultStmtNoWarningContext.WithLocation(time.Local) + vals := types.MakeDatums(int64(-5), uint64(42), float64(3.14), []byte("hello"), "world", nil) + + asc, err := EncodeKey(typeCtx.Location(), nil, vals...) + require.NoError(t, err) + same, err := EncodeKeyWithDesc(typeCtx.Location(), nil, nil, vals...) + require.NoError(t, err) + require.Equal(t, asc, same, "nil desc slice must match EncodeKey byte-for-byte") + + // Every column descending: each column's encoded prefix is bitwise- + // complemented compared to the ascending form. We decode one datum at a + // time with DecodeOneWithDesc and check we recover the original. + desc := []bool{true, true, true, true, true, true} + allDesc, err := EncodeKeyWithDesc(typeCtx.Location(), nil, desc, vals...) + require.NoError(t, err) + remain := allDesc + for i, v := range vals { + var got types.Datum + remain, got, err = DecodeOneWithDesc(remain, true) + require.NoError(t, err, "column %d", i) + switch v.Kind() { + case types.KindInt64: + require.Equal(t, types.KindInt64, got.Kind(), "column %d kind", i) + require.Equal(t, v.GetInt64(), got.GetInt64(), "column %d", i) + case types.KindUint64: + require.Equal(t, types.KindUint64, got.Kind(), "column %d kind", i) + require.Equal(t, v.GetUint64(), got.GetUint64(), "column %d", i) + case types.KindFloat64: + require.Equal(t, types.KindFloat64, got.Kind(), "column %d kind", i) + require.Equal(t, v.GetFloat64(), got.GetFloat64(), "column %d", i) + case types.KindBytes, types.KindString: + // Strings are encoded via bytesFlag and decoded as KindBytes — + // this mirrors the ascending DecodeOne behaviour. + require.Equal(t, types.KindBytes, got.Kind(), "column %d kind", i) + require.Equal(t, v.GetBytes(), got.GetBytes(), "column %d", i) + case types.KindNull: + require.Equal(t, types.KindNull, got.Kind(), "column %d kind", i) + } + } + require.Empty(t, remain, "every byte should have been consumed") + + // Mixed composite: (a ASC, b DESC, c ASC). The ASC columns' bytes must + // match EncodeKey's output and only the DESC column's bytes should be + // complemented. + mixedVals := types.MakeDatums(int64(7), int64(9), int64(11)) + mixed, err := EncodeKeyWithDesc(typeCtx.Location(), nil, []bool{false, true, false}, mixedVals...) + require.NoError(t, err) + // First column bytes: same as EncodeKey of just int64(7). + prefix, err := EncodeKey(typeCtx.Location(), nil, mixedVals[0]) + require.NoError(t, err) + require.Equal(t, prefix, mixed[:len(prefix)]) + // Decoding round-trips with per-column desc flags. + rem := mixed + rem, d0, err := DecodeOneWithDesc(rem, false) + require.NoError(t, err) + require.Equal(t, int64(7), d0.GetInt64()) + rem, d1, err := DecodeOneWithDesc(rem, true) + require.NoError(t, err) + require.Equal(t, int64(9), d1.GetInt64()) + rem, d2, err := DecodeOneWithDesc(rem, false) + require.NoError(t, err) + require.Equal(t, int64(11), d2.GetInt64()) + require.Empty(t, rem) +} + +func TestEncodeKeyWithDescOrdering(t *testing.T) { + // The core property: for a DESC column, user value a < b must produce + // encoded(a) > encoded(b) in byte order. A forward scan over the encoded + // keyspace then yields descending user order. + typeCtx := types.DefaultStmtNoWarningContext.WithLocation(time.Local) + enc := func(v int64, desc bool) []byte { + b, err := EncodeKeyWithDesc(typeCtx.Location(), nil, []bool{desc}, types.NewIntDatum(v)) + require.NoError(t, err) + return b + } + // Ascending sanity: a < b → encoded(a) < encoded(b). + require.Less(t, bytes.Compare(enc(-100, false), enc(-1, false)), 0) + require.Less(t, bytes.Compare(enc(-1, false), enc(0, false)), 0) + require.Less(t, bytes.Compare(enc(0, false), enc(1, false)), 0) + require.Less(t, bytes.Compare(enc(1, false), enc(1<<30, false)), 0) + + // Descending: byte order inverts. + require.Greater(t, bytes.Compare(enc(-100, true), enc(-1, true)), 0) + require.Greater(t, bytes.Compare(enc(-1, true), enc(0, true)), 0) + require.Greater(t, bytes.Compare(enc(0, true), enc(1, true)), 0) + require.Greater(t, bytes.Compare(enc(1, true), enc(1<<30, true)), 0) + + // NULL on a DESC column must sort AFTER real values (MySQL 8.0's + // "NULLS LAST" default for ORDER BY ... DESC). Encoded as NilFlag=0x00 + // complemented to 0xFF, it's the largest possible first byte, so a + // forward scan reaches it last. + nullDesc, err := EncodeKeyWithDesc(typeCtx.Location(), nil, []bool{true}, types.NewDatum(nil)) + require.NoError(t, err) + require.Greater(t, bytes.Compare(nullDesc, enc(1<<62, true)), 0, + "NULL must sort after any real value on a DESC column") +} + +func TestEncodeKeyWithDescRangeSentinels(t *testing.T) { + // Range builders place MinNotNullDatum and MaxValueDatum at infinity-side + // boundaries. When such a sentinel hits a DESC column its encoded bytes + // are bitwise-complemented along with the rest, and the relative byte + // order between sentinels, real values, and NULL must still produce a + // well-formed scan range. + // + // The semantics we lock down here: + // * NULL on a DESC column sorts LAST in byte order (MySQL "DESC NULLS + // LAST" default), so a forward scan reaches it after every real value. + // * MaxValueDatum on a DESC column sorts FIRST (it's the user-visible + // +∞, so "largest first" comes first in DESC order). + // * MinNotNullDatum on a DESC column sorts BETWEEN real values and NULL + // (it's the user-visible "smallest non-null", so it comes just before + // NULL in DESC order). + typeCtx := types.DefaultStmtNoWarningContext.WithLocation(time.Local) + + encDesc := func(d types.Datum) []byte { + out, err := EncodeKeyWithDesc(typeCtx.Location(), nil, []bool{true}, d) + require.NoError(t, err) + return out + } + + nullBytes := encDesc(types.NewDatum(nil)) + maxBytes := encDesc(types.MaxValueDatum()) + minBytes := encDesc(types.MinNotNullDatum()) + smallVal := encDesc(types.NewIntDatum(1)) + largeVal := encDesc(types.NewIntDatum(1 << 30)) + + // MaxValue ≤ every real value (in user terms +∞ ≥ x; in DESC byte order + // that flips to "comes first"). + require.Less(t, bytes.Compare(maxBytes, smallVal), 0, + "DESC MaxValueDatum must sort before any real value") + require.Less(t, bytes.Compare(maxBytes, largeVal), 0) + + // Real values still sort largest→smallest among themselves. + require.Less(t, bytes.Compare(largeVal, smallVal), 0, + "DESC: larger user value -> smaller byte encoding") + + // MinNotNull ≥ every real value (it represents -∞ on the non-null side). + require.Greater(t, bytes.Compare(minBytes, smallVal), 0, + "DESC MinNotNullDatum must sort after every real value") + require.Greater(t, bytes.Compare(minBytes, largeVal), 0) + + // NULL sorts strictly after MinNotNull. + require.Greater(t, bytes.Compare(nullBytes, minBytes), 0, + "DESC NULL must sort after MinNotNullDatum") + + // And after every real value (i.e. NULLs come last in DESC order). + require.Greater(t, bytes.Compare(nullBytes, smallVal), 0) + require.Greater(t, bytes.Compare(nullBytes, largeVal), 0) + + // FullRange [Null, MaxValue] in user terms, after the swap that the + // distsql layer performs, gives a well-ordered byte range: + // byteLow = encode_DESC(MaxValue) (smaller) + // byteHigh = encode_DESC(Null) (larger) + // Walking the keyspace forward from byteLow to byteHigh hits every real + // value plus NULL. + require.Less(t, bytes.Compare(maxBytes, nullBytes), 0, + "forward walk from MaxValue to NULL must be a non-empty byte range") +} + +func TestEncodeKeyWithDescRejectsUnsupportedKinds(t *testing.T) { + // JSON and VECTOR_FLOAT32 don't have a memcomparable form whose bitwise + // complement preserves reverse order, so EncodeKeyWithDesc must refuse + // rather than silently producing a key whose comparison semantics are + // undefined. ASC encoding of these kinds remains unaffected. + typeCtx := types.DefaultStmtNoWarningContext.WithLocation(time.Local) + + jsonDatum := types.NewDatum(types.CreateBinaryJSON(map[string]any{"k": "v"})) + _, err := EncodeKeyWithDesc(typeCtx.Location(), nil, []bool{true}, jsonDatum) + require.Error(t, err, "DESC encoding of KindMysqlJSON must be rejected") + require.Contains(t, err.Error(), "descending-order encoding is not supported") + + // ASC encoding of the same datum still works. + _, err = EncodeKeyWithDesc(typeCtx.Location(), nil, []bool{false}, jsonDatum) + require.NoError(t, err, "ASC encoding of JSON must keep working") + + // Mixed: an ASC JSON column followed by a DESC int column is fine. + mixed, err := EncodeKeyWithDesc(typeCtx.Location(), nil, []bool{false, true}, + jsonDatum, types.NewIntDatum(7)) + require.NoError(t, err) + require.NotEmpty(t, mixed) +} + +func TestCutOneWithDesc(t *testing.T) { + // CutOneWithDesc must advance past a DESC-encoded column by exactly the + // same number of bytes the encoder wrote — otherwise subsequent columns + // of a composite key would be misaligned. + typeCtx := types.DefaultStmtNoWarningContext.WithLocation(time.Local) + composite, err := EncodeKeyWithDesc( + typeCtx.Location(), nil, + []bool{false, true, false}, + types.NewIntDatum(1), types.NewBytesDatum([]byte("middle")), types.NewIntDatum(3), + ) + require.NoError(t, err) + + rem := composite + part0, rem, err := CutOneWithDesc(rem, false) + require.NoError(t, err) + require.NotEmpty(t, part0) + part1, rem, err := CutOneWithDesc(rem, true) + require.NoError(t, err) + require.NotEmpty(t, part1) + part2, rem, err := CutOneWithDesc(rem, false) + require.NoError(t, err) + require.NotEmpty(t, part2) + require.Empty(t, rem) + + // The three parts must concatenate back to the original key. + require.Equal(t, composite, append(append(append([]byte{}, part0...), part1...), part2...)) +} + func TestCodecKeyCompare(t *testing.T) { table := []struct { Left []types.Datum diff --git a/tests/desc_index_e2e.sql b/tests/desc_index_e2e.sql new file mode 100644 index 0000000000000..12d34f867f7ff --- /dev/null +++ b/tests/desc_index_e2e.sql @@ -0,0 +1,170 @@ +-- End-to-end smoke test for descending-order indexes (pingcap/tidb#2519). +-- +-- Run via the wrapper at tests/run_desc_index_e2e.sh, which boots a tiup +-- playground cluster against caller-supplied TiDB and TiKV binaries: +-- +-- TIDB_BIN=/path/to/bin/tidb-server \ +-- TIKV_BIN=/path/to/target/release/tikv-server \ +-- ./tests/run_desc_index_e2e.sh +-- +-- Or directly against any cluster that exposes 127.0.0.1:4000: +-- +-- mysql -h 127.0.0.1 -P 4000 -u root --force < tests/desc_index_e2e.sql +-- +-- Each section runs a small SQL block with the expected behaviour annotated +-- above it. A failure (wrong row order, missing rows, unexpected error) +-- indicates the wire format between TiDB and TiKV is inconsistent for that +-- scenario. Section 3 deliberately exercises a DDL that must be rejected, +-- so use --force when running interactively. + +-- ============================================================================ +-- 0. Setup +-- ============================================================================ +CREATE DATABASE IF NOT EXISTS test; +USE test; + +-- Drop any leftover state from a previous run. +DROP TABLE IF EXISTS desc_e2e_single; +DROP TABLE IF EXISTS desc_e2e_mixed; +DROP TABLE IF EXISTS desc_e2e_pk_clustered_should_fail; +DROP TABLE IF EXISTS desc_e2e_pk_nonclustered; +DROP TABLE IF EXISTS desc_e2e_strings; +DROP TABLE IF EXISTS desc_e2e_expression; + +-- The feature is opt-in. +SELECT @@global.tidb_enable_descending_index AS before_set; +SET @@global.tidb_enable_descending_index = ON; +SELECT @@global.tidb_enable_descending_index AS after_set; + +-- ============================================================================ +-- 1. Single descending column: INDEX(b DESC) +-- ============================================================================ +CREATE TABLE desc_e2e_single (a INT, b INT, INDEX idx_b (b DESC)); +INSERT INTO desc_e2e_single VALUES (1, 10), (2, 20), (3, 5), (4, 30), (5, 15); + +-- Metadata round-trip +SHOW CREATE TABLE desc_e2e_single; +SELECT collation FROM information_schema.statistics WHERE table_name='desc_e2e_single' AND index_name='idx_b'; +-- Expected: D + +-- Forward scan satisfies ORDER BY b DESC: no Sort, no 'desc' flag. +EXPLAIN FORMAT='brief' SELECT b FROM desc_e2e_single USE INDEX(idx_b) ORDER BY b DESC; +SELECT b FROM desc_e2e_single USE INDEX(idx_b) ORDER BY b DESC; +-- Expected order: 30, 20, 15, 10, 5 + +-- Reverse scan satisfies ORDER BY b ASC: 'keep order:true, desc' in plan. +EXPLAIN FORMAT='brief' SELECT b FROM desc_e2e_single USE INDEX(idx_b) ORDER BY b ASC; +SELECT b FROM desc_e2e_single USE INDEX(idx_b) ORDER BY b ASC; +-- Expected order: 5, 10, 15, 20, 30 + +-- Point lookup on the DESC column. +EXPLAIN FORMAT='brief' SELECT a FROM desc_e2e_single USE INDEX(idx_b) WHERE b = 20; +SELECT a FROM desc_e2e_single USE INDEX(idx_b) WHERE b = 20; +-- Expected: 2 + +SELECT a FROM desc_e2e_single USE INDEX(idx_b) WHERE b = 999; +-- Expected: empty result + +-- Range scan +EXPLAIN FORMAT='brief' SELECT b FROM desc_e2e_single USE INDEX(idx_b) WHERE b BETWEEN 10 AND 25 ORDER BY b DESC; +SELECT b FROM desc_e2e_single USE INDEX(idx_b) WHERE b BETWEEN 10 AND 25 ORDER BY b DESC; +-- Expected: 20, 15, 10 + +-- ============================================================================ +-- 2. Mixed-direction composite: INDEX(a, b DESC) — MySQL 8.0 flagship case +-- ============================================================================ +CREATE TABLE desc_e2e_mixed (a INT, b INT, INDEX idx_ab (a, b DESC)); +INSERT INTO desc_e2e_mixed VALUES (1, 10), (1, 5), (1, 15), (2, 20), (2, 8), (2, 12); + +-- Forward scan satisfies ORDER BY a ASC, b DESC (column-by-column match). +EXPLAIN FORMAT='brief' SELECT a, b FROM desc_e2e_mixed USE INDEX(idx_ab) ORDER BY a ASC, b DESC; +SELECT a, b FROM desc_e2e_mixed USE INDEX(idx_ab) ORDER BY a ASC, b DESC; +-- Expected: (1,15)(1,10)(1,5)(2,20)(2,12)(2,8) + +-- Reverse scan satisfies ORDER BY a DESC, b ASC (bitwise complement). +EXPLAIN FORMAT='brief' SELECT a, b FROM desc_e2e_mixed USE INDEX(idx_ab) ORDER BY a DESC, b ASC; +SELECT a, b FROM desc_e2e_mixed USE INDEX(idx_ab) ORDER BY a DESC, b ASC; +-- Expected: (2,8)(2,12)(2,20)(1,5)(1,10)(1,15) + +-- Unsatisfiable by either direction: planner inserts Sort. +EXPLAIN FORMAT='brief' SELECT a, b FROM desc_e2e_mixed USE INDEX(idx_ab) ORDER BY a, b; +SELECT a, b FROM desc_e2e_mixed USE INDEX(idx_ab) ORDER BY a, b; +-- Expected: (1,5)(1,10)(1,15)(2,8)(2,12)(2,20) + +-- ============================================================================ +-- 3. PRIMARY KEY direction guards +-- ============================================================================ + +-- Clustered PK with DESC must be rejected at DDL time. +-- Expected: error "DESC is not supported on the columns of a clustered PRIMARY KEY" +CREATE TABLE desc_e2e_pk_clustered_should_fail (a INT, b INT, PRIMARY KEY (a, b DESC) CLUSTERED); + +-- NONCLUSTERED PK with DESC is allowed (encoded as a unique secondary index). +CREATE TABLE desc_e2e_pk_nonclustered (a INT, b INT, PRIMARY KEY (a DESC) NONCLUSTERED); +INSERT INTO desc_e2e_pk_nonclustered VALUES (10, 1), (5, 2), (20, 3), (1, 4); +SELECT a FROM desc_e2e_pk_nonclustered USE INDEX(`PRIMARY`) ORDER BY a DESC; +-- Expected: 20, 10, 5, 1 + +-- ============================================================================ +-- 4. String column DESC +-- ============================================================================ +CREATE TABLE desc_e2e_strings (id INT, name VARCHAR(32), INDEX idx_name (name DESC)); +INSERT INTO desc_e2e_strings VALUES (1,'apple'),(2,'banana'),(3,'cherry'),(4,'date'),(5,'elderberry'); + +EXPLAIN FORMAT='brief' SELECT name FROM desc_e2e_strings USE INDEX(idx_name) ORDER BY name DESC; +SELECT name FROM desc_e2e_strings USE INDEX(idx_name) ORDER BY name DESC; +-- Expected: elderberry, date, cherry, banana, apple + +SELECT id FROM desc_e2e_strings USE INDEX(idx_name) WHERE name = 'cherry'; +-- Expected: 3 + +-- ============================================================================ +-- 5. Expression index with DESC +-- ============================================================================ +CREATE TABLE desc_e2e_expression (a INT, b INT, INDEX idx_expr ((a + b) DESC)); +INSERT INTO desc_e2e_expression VALUES (1,10),(2,20),(3,5),(4,30),(5,15); + +SHOW CREATE TABLE desc_e2e_expression; +SELECT collation FROM information_schema.statistics WHERE table_name='desc_e2e_expression'; +-- Expected: D + +EXPLAIN FORMAT='brief' SELECT a, b FROM desc_e2e_expression USE INDEX(idx_expr) ORDER BY (a + b) DESC; +SELECT a, b FROM desc_e2e_expression USE INDEX(idx_expr) ORDER BY (a + b) DESC; +-- Expected: rows with a+b in descending order: (4,30)=34, (2,20)=22, (5,15)=20, (1,10)=11, (3,5)=8 + +-- ============================================================================ +-- 6. UPDATE / DELETE against DESC indexes +-- ============================================================================ +UPDATE desc_e2e_single SET a = a + 100 WHERE b = 20; +SELECT * FROM desc_e2e_single USE INDEX(idx_b) ORDER BY b DESC; +-- Expected: row (102, 20) shows updated a + +DELETE FROM desc_e2e_single WHERE b = 5; +SELECT b FROM desc_e2e_single USE INDEX(idx_b) ORDER BY b DESC; +-- Expected: 30, 20, 15, 10 (5 is gone) + +-- ============================================================================ +-- 7. Sysvar create-time-only semantics +-- ============================================================================ +SET @@global.tidb_enable_descending_index = OFF; +-- Existing DESC tables continue to work after the flip. +SELECT b FROM desc_e2e_single USE INDEX(idx_b) ORDER BY b DESC; +-- Expected: 30, 20, 15, 10 + +-- New CREATE INDEX silently drops DESC when the gate is off. +CREATE TABLE desc_e2e_off_new (x INT, INDEX idx_x (x DESC)); +SELECT collation FROM information_schema.statistics WHERE table_name='desc_e2e_off_new'; +-- Expected: A (DESC was dropped) +DROP TABLE desc_e2e_off_new; + +-- ============================================================================ +-- 8. Cleanup +-- ============================================================================ +SET @@global.tidb_enable_descending_index = DEFAULT; +DROP TABLE IF EXISTS desc_e2e_single; +DROP TABLE IF EXISTS desc_e2e_mixed; +DROP TABLE IF EXISTS desc_e2e_pk_nonclustered; +DROP TABLE IF EXISTS desc_e2e_strings; +DROP TABLE IF EXISTS desc_e2e_expression; + +SELECT 'e2e test complete' AS status; diff --git a/tests/run_desc_index_e2e.sh b/tests/run_desc_index_e2e.sh new file mode 100755 index 0000000000000..cb17b697efee1 --- /dev/null +++ b/tests/run_desc_index_e2e.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +# End-to-end smoke test runner for descending-order indexes +# (pingcap/tidb#2519). +# +# Boots a tiup playground cluster with caller-supplied TiDB and TiKV +# binaries, runs tests/desc_index_e2e.sql against it via the mysql +# client, and tears the cluster down on exit. Output is split into +# "playground.log" (TiDB / TiKV / PD logs) and "e2e.out" (SQL output +# you'll want to diff against the expectations annotated in the SQL +# file). +# +# Usage: +# TIDB_BIN=/path/to/bin/tidb-server \ +# TIKV_BIN=/path/to/target/release/tikv-server \ +# ./tests/run_desc_index_e2e.sh +# +# Optional environment overrides: +# SQL_FILE path to the SQL script (default: alongside this script) +# TIUP_TAG playground tag (default: desc-e2e) +# WAIT_SECS seconds to wait for TiDB to come up (default: 120) +# OUT_DIR where to write playground.log + e2e.out (default: mktemp) +# +# Requires: tiup, mysql client, plus the two binaries above. The TiKV +# binary must include the descending-order coprocessor changes from +# tikv/tikv#19558. + +set -euo pipefail + +: "${TIDB_BIN:?set TIDB_BIN to the path of the desc-index TiDB binary}" +: "${TIKV_BIN:?set TIKV_BIN to the path of the desc-index TiKV binary}" +SQL_FILE="${SQL_FILE:-$(dirname "$0")/desc_index_e2e.sql}" +TIUP_TAG="${TIUP_TAG:-desc-e2e}" +WAIT_SECS="${WAIT_SECS:-120}" +OUT_DIR="${OUT_DIR:-$(mktemp -d -t desc-e2e-XXXXXX)}" + +for f in "$TIDB_BIN" "$TIKV_BIN" "$SQL_FILE"; do + if [[ ! -f "$f" ]]; then + echo "missing: $f" >&2 + exit 1 + fi +done + +echo ">>> output directory: $OUT_DIR" +echo ">>> tidb-server: $TIDB_BIN" +echo ">>> tikv-server: $TIKV_BIN" +echo ">>> sql file: $SQL_FILE" + +# Kick off tiup playground; record its PID so we can tear down cleanly. +# Flag names: --db / --kv / --pd (not --tidb/--tikv); binpath flags +# follow the same convention. +tiup playground nightly \ + --kv 1 --db 1 --pd 1 \ + --kv.binpath "$TIKV_BIN" \ + --db.binpath "$TIDB_BIN" \ + --tag "$TIUP_TAG" \ + --without-monitor \ + > "$OUT_DIR/playground.log" 2>&1 & +PLAYGROUND_PID=$! + +cleanup() { + echo + echo ">>> tearing down playground (pid=$PLAYGROUND_PID)" + # tiup playground installs a signal handler that closes the children. + kill "$PLAYGROUND_PID" 2>/dev/null || true + wait "$PLAYGROUND_PID" 2>/dev/null || true + echo ">>> logs: $OUT_DIR/playground.log" + echo ">>> output: $OUT_DIR/e2e.out" +} +trap cleanup EXIT + +# Wait for TiDB to start accepting connections (port 4000). +echo ">>> waiting up to ${WAIT_SECS}s for TiDB to come up on 127.0.0.1:4000" +for _ in $(seq 1 "$WAIT_SECS"); do + if mysql -h 127.0.0.1 -P 4000 -u root -e "SELECT 1" >/dev/null 2>&1; then + echo ">>> TiDB is up" + break + fi + sleep 1 +done + +if ! mysql -h 127.0.0.1 -P 4000 -u root -e "SELECT 1" >/dev/null 2>&1; then + echo "TiDB did not come up within ${WAIT_SECS}s; see $OUT_DIR/playground.log" >&2 + exit 1 +fi + +# Run the SQL file. Use --table for human-readable output (with column +# headers), --comments so the section banners survive into the log, and +# --force so the deliberate error case in Section 3 (CLUSTERED PRIMARY +# KEY with DESC must be rejected) does not abort the rest of the script. +# Pipe via stdin rather than `-e "source $SQL_FILE"` because `source` +# runs in single-statement mode and silently ignores --force. Both +# stdout and stderr are tee'd so expected-error lines show up inline +# next to the matching SQL statement. +echo ">>> running $SQL_FILE" +mysql -h 127.0.0.1 -P 4000 -u root --table --comments --force \ + --default-character-set=utf8mb4 \ + < "$SQL_FILE" 2>&1 \ + | tee "$OUT_DIR/e2e.out" + +echo +echo ">>> e2e run finished — review $OUT_DIR/e2e.out for unexpected rows."