From f8e69c8aa3b65bc4c4d968aa43da1c493713f9a2 Mon Sep 17 00:00:00 2001 From: fivetran-kwoodbeck Date: Tue, 19 May 2026 11:08:04 -0400 Subject: [PATCH 1/6] fix for pivot-string-literal-column-names bug --- sqlglot-integration-tests | 2 +- sqlglot/generators/duckdb.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/sqlglot-integration-tests b/sqlglot-integration-tests index b18cff087e..27342497eb 160000 --- a/sqlglot-integration-tests +++ b/sqlglot-integration-tests @@ -1 +1 @@ -Subproject commit b18cff087e4cde658867eece52333691f203c272 +Subproject commit 27342497eb8d4702af0186cf4d8036e4919b2be2 diff --git a/sqlglot/generators/duckdb.py b/sqlglot/generators/duckdb.py index 49a4d486b5..69b8b71fdb 100644 --- a/sqlglot/generators/duckdb.py +++ b/sqlglot/generators/duckdb.py @@ -1452,6 +1452,26 @@ def _sha_sql( return self.func("UNHEX", result) if is_binary else result +def _fix_pivot_string_column_names(expression: exp.Expr) -> exp.Expr: + """Fix Snowflake-style quoted pivot column identifiers for DuckDB. + Snowflake PIVOT IN-list string literals include quotes: "'JAN'", whereas DuckDB does not. + + The fix is needed after qualify() expands SELECT *, columns and aliases must be updated. + """ + for node in expression.walk(): + ident = None + if isinstance(node, exp.Column): + ident = node.this + elif isinstance(node, exp.Alias): + ident = node.args.get("alias") + if isinstance(ident, exp.Identifier): + name = ident.name + if len(name) >= 2 and name[0] == "'" and name[-1] == "'": + ident.set("this", name[1:-1]) + + return expression + + class DuckDBGenerator(generator.Generator): PARAMETER_TOKEN = "$" NAMED_PLACEHOLDER_TOKEN = "$" @@ -2181,6 +2201,9 @@ class DuckDBGenerator(generator.Generator): """ ) + def preprocess(self, expression: exp.Expr) -> exp.Expr: + return _fix_pivot_string_column_names(super().preprocess(expression)) + def _array_bag_sql(self, condition: exp.Expr, arr1: exp.Expr, arr2: exp.Expr) -> str: cond = exp.Paren(this=exp.replace_placeholders(condition, arr1=arr1, arr2=arr2)) return self.sql( From c0fdbfb342e1a4f7f1e0568b7866bf174532a0d1 Mon Sep 17 00:00:00 2001 From: fivetran-kwoodbeck Date: Tue, 19 May 2026 13:18:59 -0400 Subject: [PATCH 2/6] updated implementation to set column aliases --- sqlglot/generators/duckdb.py | 49 ++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/sqlglot/generators/duckdb.py b/sqlglot/generators/duckdb.py index 69b8b71fdb..07f1035a2c 100644 --- a/sqlglot/generators/duckdb.py +++ b/sqlglot/generators/duckdb.py @@ -1452,26 +1452,6 @@ def _sha_sql( return self.func("UNHEX", result) if is_binary else result -def _fix_pivot_string_column_names(expression: exp.Expr) -> exp.Expr: - """Fix Snowflake-style quoted pivot column identifiers for DuckDB. - Snowflake PIVOT IN-list string literals include quotes: "'JAN'", whereas DuckDB does not. - - The fix is needed after qualify() expands SELECT *, columns and aliases must be updated. - """ - for node in expression.walk(): - ident = None - if isinstance(node, exp.Column): - ident = node.this - elif isinstance(node, exp.Alias): - ident = node.args.get("alias") - if isinstance(ident, exp.Identifier): - name = ident.name - if len(name) >= 2 and name[0] == "'" and name[-1] == "'": - ident.set("this", name[1:-1]) - - return expression - - class DuckDBGenerator(generator.Generator): PARAMETER_TOKEN = "$" NAMED_PLACEHOLDER_TOKEN = "$" @@ -2201,8 +2181,33 @@ class DuckDBGenerator(generator.Generator): """ ) - def preprocess(self, expression: exp.Expr) -> exp.Expr: - return _fix_pivot_string_column_names(super().preprocess(expression)) + def pivot_sql(self, expression: exp.Pivot) -> str: + # Snowflake PIVOT with string-literal IN-list values names output columns with quotes included (e.g. "'JAN'"). + # DuckDB defaults to unquoted names, so after qualify() expands SELECT *, we inject an explicit + # column alias list on the PIVOT alias so DuckDB uses the same column names as Snowflake. + pivot_cols = expression.args.get("columns") or [] + if not any(c.name.startswith("'") and c.name.endswith("'") for c in pivot_cols): + return super().pivot_sql(expression) + + alias_node = expression.args.get("alias") + parent_select = expression.find_ancestor(exp.Select) + if not alias_node or not parent_select: + return super().pivot_sql(expression) + + pivot_alias = alias_node.name + all_cols = [ + e.alias_or_name + for e in parent_select.expressions + if isinstance(e, exp.Alias) + and isinstance(e.this, exp.Column) + and e.this.table == pivot_alias + ] + + if not all_cols: + return super().pivot_sql(expression) + + alias_node.set("columns", [exp.to_identifier(name, quoted=True) for name in all_cols]) + return super().pivot_sql(expression) def _array_bag_sql(self, condition: exp.Expr, arr1: exp.Expr, arr2: exp.Expr) -> str: cond = exp.Paren(this=exp.replace_placeholders(condition, arr1=arr1, arr2=arr2)) From 02a828d8a5d067ea7d0167aeedbe4157be4436b3 Mon Sep 17 00:00:00 2001 From: fivetran-kwoodbeck Date: Tue, 19 May 2026 15:30:09 -0400 Subject: [PATCH 3/6] switched fix over to optimizer/qualify --- sqlglot/generators/duckdb.py | 28 -------------------------- sqlglot/optimizer/qualify_columns.py | 15 ++++++++++++++ tests/fixtures/optimizer/optimizer.sql | 2 +- 3 files changed, 16 insertions(+), 29 deletions(-) diff --git a/sqlglot/generators/duckdb.py b/sqlglot/generators/duckdb.py index 07f1035a2c..49a4d486b5 100644 --- a/sqlglot/generators/duckdb.py +++ b/sqlglot/generators/duckdb.py @@ -2181,34 +2181,6 @@ class DuckDBGenerator(generator.Generator): """ ) - def pivot_sql(self, expression: exp.Pivot) -> str: - # Snowflake PIVOT with string-literal IN-list values names output columns with quotes included (e.g. "'JAN'"). - # DuckDB defaults to unquoted names, so after qualify() expands SELECT *, we inject an explicit - # column alias list on the PIVOT alias so DuckDB uses the same column names as Snowflake. - pivot_cols = expression.args.get("columns") or [] - if not any(c.name.startswith("'") and c.name.endswith("'") for c in pivot_cols): - return super().pivot_sql(expression) - - alias_node = expression.args.get("alias") - parent_select = expression.find_ancestor(exp.Select) - if not alias_node or not parent_select: - return super().pivot_sql(expression) - - pivot_alias = alias_node.name - all_cols = [ - e.alias_or_name - for e in parent_select.expressions - if isinstance(e, exp.Alias) - and isinstance(e.this, exp.Column) - and e.this.table == pivot_alias - ] - - if not all_cols: - return super().pivot_sql(expression) - - alias_node.set("columns", [exp.to_identifier(name, quoted=True) for name in all_cols]) - return super().pivot_sql(expression) - def _array_bag_sql(self, condition: exp.Expr, arr1: exp.Expr, arr2: exp.Expr) -> str: cond = exp.Paren(this=exp.replace_placeholders(condition, arr1=arr1, arr2=arr2)) return self.sql( diff --git a/sqlglot/optimizer/qualify_columns.py b/sqlglot/optimizer/qualify_columns.py index db7d9b3b6a..cd03c1509d 100644 --- a/sqlglot/optimizer/qualify_columns.py +++ b/sqlglot/optimizer/qualify_columns.py @@ -839,6 +839,21 @@ def _expand_stars( pivot_columns = pivot.output_columns(columns) or pivot.alias_column_names if pivot_columns: + names = list(pivot_columns) + # When the source dialect uses IDENTIFY_PIVOT_STRINGS (e.g. Snowflake), string + # literals in the PIVOT IN-list become output column names with embedded quotes + # (e.g. 'JAN'). Record the full alias list on the PIVOT alias so generators can + # emit it and produce columns that match these qualified references. + if not pivot.unpivot and getattr( + dialect.parser_class, "IDENTIFY_PIVOT_STRINGS", False + ): + pivot_alias = pivot.args.get("alias") + if pivot_alias and not pivot_alias.args.get("columns"): + pivot_alias.set( + "columns", + [exp.to_identifier(name, quoted=True) for name in names], + ) + new_selections.extend( alias(exp.column(name, table=pivot.alias), name, copy=False) for name in pivot_columns diff --git a/tests/fixtures/optimizer/optimizer.sql b/tests/fixtures/optimizer/optimizer.sql index 21fdab1c65..9fb8abedc0 100644 --- a/tests/fixtures/optimizer/optimizer.sql +++ b/tests/fixtures/optimizer/optimizer.sql @@ -642,7 +642,7 @@ SELECT "_0"."'x'" AS "'x'", "_0"."'y'" AS "'y'" FROM "U" AS "U" -PIVOT(SUM("U"."F") FOR "U"."H" IN ('x', 'y')) AS "_0"; +PIVOT(SUM("U"."F") FOR "U"."H" IN ('x', 'y')) AS "_0"("G", "'x'", "'y'"); # title: selecting all columns from a pivoted source and generating spark # note: spark doesn't allow pivot aliases or qualified columns for the pivot's "field" (`h`) From e083b31a2070096967d5dfcdcf550138514caeaf Mon Sep 17 00:00:00 2001 From: fivetran-kwoodbeck Date: Fri, 22 May 2026 14:14:39 -0400 Subject: [PATCH 4/6] hooked into pivot_sql in generator --- Makefile | 21 +++++++- sqlglot/generator.py | 68 +++++++++++++++++++++++--- sqlglot/optimizer/qualify_columns.py | 15 ------ tests/fixtures/optimizer/optimizer.sql | 2 +- 4 files changed, 82 insertions(+), 24 deletions(-) diff --git a/Makefile b/Makefile index 0f7b3dd7ed..be18be3bde 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: install install-dev install-devc install-devc-release install-pre-commit bench bench-parse bench-transpile bench-optimize test test-fast unit testc unitc leakcheck style check docs docs-serve hidec showc clean resolve-integration-conflicts update-fixtures +.PHONY: install install-dev install-devc install-devc-release install-pre-commit bench bench-parse bench-transpile bench-optimize test test-fast test-branch unit testc unitc leakcheck style check docs docs-serve hidec showc clean resolve-integration-conflicts update-fixtures ifdef UV PIP := uv pip @@ -81,6 +81,25 @@ test: hidec test-fast: python -m unittest --failfast +BRANCH_BASE ?= main + +test-branch: + @changed_main=$$({ git diff origin/$(BRANCH_BASE)...HEAD --name-only 2>/dev/null; git diff HEAD --name-only; git diff --cached --name-only; } \ + | sort -u | grep '^tests/.*\.py$$' | grep -v '__init__' | sed 's/\.py$$//' | tr '/' '.'); \ + changed_int=$$({ git -C sqlglot-integration-tests log --name-only --format='' origin/HEAD..HEAD 2>/dev/null; git -C sqlglot-integration-tests diff HEAD --name-only 2>/dev/null; } \ + | sort -u | grep '^tests/sqlglot/.*\.py$$' | grep -v '__init__'); \ + if [ -z "$$changed_main" ] && [ -z "$$changed_int" ]; then \ + echo "No test files changed vs $(BRANCH_BASE)"; exit 0; \ + fi; \ + if [ -n "$$changed_main" ]; then \ + echo "Main tests:"; echo "$$changed_main" | tr ' ' '\n' | sed 's/^/ /'; \ + python -m unittest $$changed_main || exit 1; \ + fi; \ + if [ -n "$$changed_int" ]; then \ + echo "Integration tests:"; echo "$$changed_int" | tr ' ' '\n' | sed 's/^/ /'; \ + cd sqlglot-integration-tests && PYTHONPATH=. pytest $$changed_int; \ + fi + unit: hidec trap '$(MAKE) showc' EXIT; SKIP_INTEGRATION=1 python -m unittest diff --git a/sqlglot/generator.py b/sqlglot/generator.py index d76d49de72..7dad6e4cad 100644 --- a/sqlglot/generator.py +++ b/sqlglot/generator.py @@ -2459,6 +2459,42 @@ def tablesample_sql( return f" {tablesample_keyword or self.TABLESAMPLE_KEYWORDS} {method}{expr}{seed}" + def _pivot_in_value_aliases(self, expression: exp.Pivot) -> list[exp.Expression] | None: + # Returns the rewritten field.expressions list with PivotAlias wrappers injected where the + # stored column name (from pivot.args["columns"]) differs from the target dialect's natural + # name. Returns None if no rewrite is needed or applicable. + stored = expression.args.get("columns", []) + if not stored or len(expression.fields) != 1: + return None + + # Only inject when SELECT * has already been expanded by qualify() + parent_select = expression.find_ancestor(exp.Select) + if not parent_select or any(isinstance(e, exp.Star) for e in parent_select.expressions): + return None + + agg_aliases = [agg.alias for agg in expression.expressions if agg.alias] + step = len(agg_aliases) or 1 + suffix = ("_" + agg_aliases[0]) if agg_aliases else "" + + new_exprs: list[exp.Expression] = [] + modified = False + i = 0 + for e in expression.fields[0].expressions: + if not isinstance(e, (exp.Literal, exp.PivotAlias)): + return None + if i >= len(stored) or (suffix and not stored[i].name.endswith(suffix)): + return None + stored_name = stored[i].name[: -len(suffix)] if suffix else stored[i].name + if not isinstance(e, exp.PivotAlias) and stored_name.lower() != e.alias_or_name.lower(): + new_exprs.append( + exp.PivotAlias(this=e, alias=exp.to_identifier(stored_name, quoted=True)) + ) + modified = True + else: + new_exprs.append(e) + i += step + return new_exprs if modified else None + def pivot_sql(self, expression: exp.Pivot) -> str: expressions = self.expressions(expression, flat=True) direction = "UNPIVOT" if expression.unpivot else "PIVOT" @@ -2478,6 +2514,14 @@ def pivot_sql(self, expression: exp.Pivot) -> str: sql = f"{direction} {this}{on}{into}{using}{group}" return self.prepend_ctes(expression, sql) + if not expression.unpivot: + # Wrap IN-list values with explicit aliases where the target dialect's natural column + # name would differ from the stored name recorded by the source dialect's parser. + new_field_exprs = self._pivot_in_value_aliases(expression) + if new_field_exprs is not None: + expression = expression.copy() + expression.fields[0].set("expressions", new_field_exprs) + alias = self.sql(expression, "alias") alias = f" AS {alias}" if alias else "" @@ -3855,14 +3899,24 @@ def pivotalias_sql(self, expression: exp.PivotAlias) -> str: parent = expression.parent pivot = parent and parent.parent - if isinstance(pivot, exp.Pivot) and pivot.unpivot: - identifier_alias = isinstance(alias, exp.Identifier) - literal_alias = isinstance(alias, exp.Literal) + if isinstance(pivot, exp.Pivot): + if pivot.unpivot: + identifier_alias = isinstance(alias, exp.Identifier) + literal_alias = isinstance(alias, exp.Literal) - if identifier_alias and not self.UNPIVOT_ALIASES_ARE_IDENTIFIERS: - alias.replace(exp.Literal.string(alias.output_name)) - elif not identifier_alias and literal_alias and self.UNPIVOT_ALIASES_ARE_IDENTIFIERS: - alias.replace(exp.to_identifier(alias.output_name)) + if identifier_alias and not self.UNPIVOT_ALIASES_ARE_IDENTIFIERS: + alias.replace(exp.Literal.string(alias.output_name)) + elif ( + not identifier_alias and literal_alias and self.UNPIVOT_ALIASES_ARE_IDENTIFIERS + ): + alias.replace(exp.to_identifier(alias.output_name)) + elif getattr( + getattr(self.dialect, "parser_class", None), "IDENTIFY_PIVOT_STRINGS", False + ): + # For IDENTIFY_PIVOT_STRINGS targets (e.g. Snowflake), string literals in the + # IN-list already produce the correct column names natively, so strip any alias + # that _pivot_in_value_aliases may have injected for other dialects. + return self.sql(expression, "this") return self.alias_sql(expression) diff --git a/sqlglot/optimizer/qualify_columns.py b/sqlglot/optimizer/qualify_columns.py index cd03c1509d..db7d9b3b6a 100644 --- a/sqlglot/optimizer/qualify_columns.py +++ b/sqlglot/optimizer/qualify_columns.py @@ -839,21 +839,6 @@ def _expand_stars( pivot_columns = pivot.output_columns(columns) or pivot.alias_column_names if pivot_columns: - names = list(pivot_columns) - # When the source dialect uses IDENTIFY_PIVOT_STRINGS (e.g. Snowflake), string - # literals in the PIVOT IN-list become output column names with embedded quotes - # (e.g. 'JAN'). Record the full alias list on the PIVOT alias so generators can - # emit it and produce columns that match these qualified references. - if not pivot.unpivot and getattr( - dialect.parser_class, "IDENTIFY_PIVOT_STRINGS", False - ): - pivot_alias = pivot.args.get("alias") - if pivot_alias and not pivot_alias.args.get("columns"): - pivot_alias.set( - "columns", - [exp.to_identifier(name, quoted=True) for name in names], - ) - new_selections.extend( alias(exp.column(name, table=pivot.alias), name, copy=False) for name in pivot_columns diff --git a/tests/fixtures/optimizer/optimizer.sql b/tests/fixtures/optimizer/optimizer.sql index 9fb8abedc0..21fdab1c65 100644 --- a/tests/fixtures/optimizer/optimizer.sql +++ b/tests/fixtures/optimizer/optimizer.sql @@ -642,7 +642,7 @@ SELECT "_0"."'x'" AS "'x'", "_0"."'y'" AS "'y'" FROM "U" AS "U" -PIVOT(SUM("U"."F") FOR "U"."H" IN ('x', 'y')) AS "_0"("G", "'x'", "'y'"); +PIVOT(SUM("U"."F") FOR "U"."H" IN ('x', 'y')) AS "_0"; # title: selecting all columns from a pivoted source and generating spark # note: spark doesn't allow pivot aliases or qualified columns for the pivot's "field" (`h`) From a9f6778d2c9beeb3433f9689a60b7042437f01c5 Mon Sep 17 00:00:00 2001 From: fivetran-kwoodbeck Date: Fri, 22 May 2026 14:21:37 -0400 Subject: [PATCH 5/6] revert makefile changes --- Makefile | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/Makefile b/Makefile index be18be3bde..0f7b3dd7ed 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: install install-dev install-devc install-devc-release install-pre-commit bench bench-parse bench-transpile bench-optimize test test-fast test-branch unit testc unitc leakcheck style check docs docs-serve hidec showc clean resolve-integration-conflicts update-fixtures +.PHONY: install install-dev install-devc install-devc-release install-pre-commit bench bench-parse bench-transpile bench-optimize test test-fast unit testc unitc leakcheck style check docs docs-serve hidec showc clean resolve-integration-conflicts update-fixtures ifdef UV PIP := uv pip @@ -81,25 +81,6 @@ test: hidec test-fast: python -m unittest --failfast -BRANCH_BASE ?= main - -test-branch: - @changed_main=$$({ git diff origin/$(BRANCH_BASE)...HEAD --name-only 2>/dev/null; git diff HEAD --name-only; git diff --cached --name-only; } \ - | sort -u | grep '^tests/.*\.py$$' | grep -v '__init__' | sed 's/\.py$$//' | tr '/' '.'); \ - changed_int=$$({ git -C sqlglot-integration-tests log --name-only --format='' origin/HEAD..HEAD 2>/dev/null; git -C sqlglot-integration-tests diff HEAD --name-only 2>/dev/null; } \ - | sort -u | grep '^tests/sqlglot/.*\.py$$' | grep -v '__init__'); \ - if [ -z "$$changed_main" ] && [ -z "$$changed_int" ]; then \ - echo "No test files changed vs $(BRANCH_BASE)"; exit 0; \ - fi; \ - if [ -n "$$changed_main" ]; then \ - echo "Main tests:"; echo "$$changed_main" | tr ' ' '\n' | sed 's/^/ /'; \ - python -m unittest $$changed_main || exit 1; \ - fi; \ - if [ -n "$$changed_int" ]; then \ - echo "Integration tests:"; echo "$$changed_int" | tr ' ' '\n' | sed 's/^/ /'; \ - cd sqlglot-integration-tests && PYTHONPATH=. pytest $$changed_int; \ - fi - unit: hidec trap '$(MAKE) showc' EXIT; SKIP_INTEGRATION=1 python -m unittest From 2919fabe9c9180ae159fee035455d7315f3452bd Mon Sep 17 00:00:00 2001 From: fivetran-kwoodbeck Date: Wed, 27 May 2026 12:36:46 -0400 Subject: [PATCH 6/6] inject IN-list aliases --- sqlglot/expressions/query.py | 3 + sqlglot/generator.py | 105 +++++++++++++++++++++------------- sqlglot/parser.py | 4 ++ sqlglot/parsers/duckdb.py | 1 + sqlglot/parsers/spark2.py | 1 + tests/dialects/test_duckdb.py | 5 ++ tests/dialects/test_spark.py | 6 +- 7 files changed, 81 insertions(+), 44 deletions(-) diff --git a/sqlglot/expressions/query.py b/sqlglot/expressions/query.py index 98294c57c6..058cb07e69 100644 --- a/sqlglot/expressions/query.py +++ b/sqlglot/expressions/query.py @@ -1758,6 +1758,9 @@ class Pivot(Expression): "default_on_null": False, "into": False, "with_": False, + "identify_pivot_strings": False, + "prefixed_pivot_columns": False, + "pivot_column_naming": False, } @property diff --git a/sqlglot/generator.py b/sqlglot/generator.py index 7dad6e4cad..20ddcb2150 100644 --- a/sqlglot/generator.py +++ b/sqlglot/generator.py @@ -2460,39 +2460,74 @@ def tablesample_sql( return f" {tablesample_keyword or self.TABLESAMPLE_KEYWORDS} {method}{expr}{seed}" def _pivot_in_value_aliases(self, expression: exp.Pivot) -> list[exp.Expression] | None: - # Returns the rewritten field.expressions list with PivotAlias wrappers injected where the - # stored column name (from pivot.args["columns"]) differs from the target dialect's natural - # name. Returns None if no rewrite is needed or applicable. - stored = expression.args.get("columns", []) - if not stored or len(expression.fields) != 1: + # Returns the rewritten field.expressions list with PivotAlias wrappers injected where + # the stored column name differs from the target dialect's natural output. + columns = expression.args.get("columns") + if not columns or len(expression.fields) != 1: return None - # Only inject when SELECT * has already been expanded by qualify() - parent_select = expression.find_ancestor(exp.Select) - if not parent_select or any(isinstance(e, exp.Star) for e in parent_select.expressions): + parser_cls = self.dialect.parser_class + # if the source and target emit identical values, exit early + if ( + expression.args.get("identify_pivot_strings") == parser_cls.IDENTIFY_PIVOT_STRINGS + and expression.args.get("prefixed_pivot_columns") == parser_cls.PREFIXED_PIVOT_COLUMNS + and expression.args.get("pivot_column_naming") == parser_cls.PIVOT_COLUMN_NAMING + ): return None - agg_aliases = [agg.alias for agg in expression.expressions if agg.alias] - step = len(agg_aliases) or 1 - suffix = ("_" + agg_aliases[0]) if agg_aliases else "" + in_exprs = expression.fields[0].expressions + step = len(columns) // len(in_exprs) + + # Derive the per-value suffix from the first stored column vs the first IN-list value. + # This correctly handles dialects (e.g. Spark single-agg) that ignore agg aliases. + source_identify = expression.args.get("identify_pivot_strings", False) + first_base = in_exprs[0].sql() if source_identify else in_exprs[0].alias_or_name + first_stored = columns[0].name + + # exit if only suffix matches, not prefix. (e.g. BigQuery, which cannot be fixed) + if not first_stored.lower().startswith(first_base.lower()): + # Should we emit an unsupported here? + return None + suffix = first_stored[len(first_base) :] + + target_identify = parser_cls.IDENTIFY_PIVOT_STRINGS + target_naming = parser_cls.PIVOT_COLUMN_NAMING + + # Whether the target dialect would append an agg-name suffix for this pivot. + # Spark single-agg uniquely drops the agg alias entirely. + target_has_suffix = (len(expression.expressions) > 1 or target_naming != "spark") and any( + getattr(a, "alias", None) for a in expression.expressions + ) + source_has_suffix = suffix != "" new_exprs: list[exp.Expression] = [] modified = False - i = 0 - for e in expression.fields[0].expressions: - if not isinstance(e, (exp.Literal, exp.PivotAlias)): - return None - if i >= len(stored) or (suffix and not stored[i].name.endswith(suffix)): - return None - stored_name = stored[i].name[: -len(suffix)] if suffix else stored[i].name - if not isinstance(e, exp.PivotAlias) and stored_name.lower() != e.alias_or_name.lower(): + for val_idx, e in enumerate(in_exprs): + i = val_idx * step + stored_full = columns[i].name + stored_value = stored_full[: -len(suffix)] if suffix else stored_full + target_value = e.sql() if target_identify else e.alias_or_name + + if isinstance(e, exp.PivotAlias): + new_exprs.append(e) + continue + + # Source had a suffix, target won't apply one (e.g. DuckDB→Spark single-agg + # aliased): inject the full stored column name as the IN-list alias so the + # target uses it verbatim as the column name. + if source_has_suffix and not target_has_suffix: new_exprs.append( - exp.PivotAlias(this=e, alias=exp.to_identifier(stored_name, quoted=True)) + exp.PivotAlias(this=e, alias=exp.to_identifier(stored_full, quoted=True)) + ) + modified = True + # Value-part mismatch (e.g. Snowflake's literal-style values vs others). + elif stored_value.lower() != target_value.lower(): + new_exprs.append( + exp.PivotAlias(this=e, alias=exp.to_identifier(stored_value, quoted=True)) ) modified = True else: new_exprs.append(e) - i += step return new_exprs if modified else None def pivot_sql(self, expression: exp.Pivot) -> str: @@ -2515,11 +2550,9 @@ def pivot_sql(self, expression: exp.Pivot) -> str: return self.prepend_ctes(expression, sql) if not expression.unpivot: - # Wrap IN-list values with explicit aliases where the target dialect's natural column - # name would differ from the stored name recorded by the source dialect's parser. + # Wrap IN-list values with explicit aliases where the target dialect would differ new_field_exprs = self._pivot_in_value_aliases(expression) if new_field_exprs is not None: - expression = expression.copy() expression.fields[0].set("expressions", new_field_exprs) alias = self.sql(expression, "alias") @@ -3899,24 +3932,14 @@ def pivotalias_sql(self, expression: exp.PivotAlias) -> str: parent = expression.parent pivot = parent and parent.parent - if isinstance(pivot, exp.Pivot): - if pivot.unpivot: - identifier_alias = isinstance(alias, exp.Identifier) - literal_alias = isinstance(alias, exp.Literal) + if isinstance(pivot, exp.Pivot) and pivot.unpivot: + identifier_alias = isinstance(alias, exp.Identifier) + literal_alias = isinstance(alias, exp.Literal) - if identifier_alias and not self.UNPIVOT_ALIASES_ARE_IDENTIFIERS: - alias.replace(exp.Literal.string(alias.output_name)) - elif ( - not identifier_alias and literal_alias and self.UNPIVOT_ALIASES_ARE_IDENTIFIERS - ): - alias.replace(exp.to_identifier(alias.output_name)) - elif getattr( - getattr(self.dialect, "parser_class", None), "IDENTIFY_PIVOT_STRINGS", False - ): - # For IDENTIFY_PIVOT_STRINGS targets (e.g. Snowflake), string literals in the - # IN-list already produce the correct column names natively, so strip any alias - # that _pivot_in_value_aliases may have injected for other dialects. - return self.sql(expression, "this") + if identifier_alias and not self.UNPIVOT_ALIASES_ARE_IDENTIFIERS: + alias.replace(exp.Literal.string(alias.output_name)) + elif not identifier_alias and literal_alias and self.UNPIVOT_ALIASES_ARE_IDENTIFIERS: + alias.replace(exp.to_identifier(alias.output_name)) return self.alias_sql(expression) diff --git a/sqlglot/parser.py b/sqlglot/parser.py index b29327ac56..dea686af5e 100644 --- a/sqlglot/parser.py +++ b/sqlglot/parser.py @@ -1755,6 +1755,7 @@ def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expr | None: PREFIXED_PIVOT_COLUMNS: t.ClassVar = False IDENTIFY_PIVOT_STRINGS: t.ClassVar = False + PIVOT_COLUMN_NAMING: t.ClassVar[str] = "" LOG_DEFAULTS_TO_LN: t.ClassVar = False @@ -5252,6 +5253,9 @@ def _parse_pivot(self) -> exp.Pivot | None: columns.append(exp.to_identifier("_".join(fld_parts))) pivot.set("columns", columns) + pivot.set("identify_pivot_strings", self.IDENTIFY_PIVOT_STRINGS) + pivot.set("prefixed_pivot_columns", self.PREFIXED_PIVOT_COLUMNS) + pivot.set("pivot_column_naming", self.PIVOT_COLUMN_NAMING) return pivot diff --git a/sqlglot/parsers/duckdb.py b/sqlglot/parsers/duckdb.py index 40d144d26d..da9bc791b7 100644 --- a/sqlglot/parsers/duckdb.py +++ b/sqlglot/parsers/duckdb.py @@ -73,6 +73,7 @@ def _convert_text_type(dtype: exp.DataType) -> exp.DataType: class DuckDBParser(parser.Parser): MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True + PIVOT_COLUMN_NAMING = "duckdb" NO_PAREN_FUNCTIONS = { **parser.Parser.NO_PAREN_FUNCTIONS, diff --git a/sqlglot/parsers/spark2.py b/sqlglot/parsers/spark2.py index c9354eb30a..5857b3d524 100644 --- a/sqlglot/parsers/spark2.py +++ b/sqlglot/parsers/spark2.py @@ -20,6 +20,7 @@ def build_as_cast(to_type: str) -> t.Callable[[list], exp.Expr]: class Spark2Parser(HiveParser): TRIM_PATTERN_FIRST = True CHANGE_COLUMN_ALTER_SYNTAX = True + PIVOT_COLUMN_NAMING = "spark" FUNCTIONS = { **HiveParser.FUNCTIONS, diff --git a/tests/dialects/test_duckdb.py b/tests/dialects/test_duckdb.py index 0732628798..51c4081ba1 100644 --- a/tests/dialects/test_duckdb.py +++ b/tests/dialects/test_duckdb.py @@ -827,6 +827,11 @@ def test_duckdb(self): "SELECT * FROM produce PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2'))", read={ "duckdb": "SELECT * FROM produce PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2'))", + }, + ) + self.validate_all( + "SELECT * FROM produce PIVOT(SUM(sales) FOR quarter IN ('Q1' AS \"'Q1'\", 'Q2' AS \"'Q2'\"))", + read={ "snowflake": "SELECT * FROM produce PIVOT(SUM(produce.sales) FOR produce.quarter IN ('Q1', 'Q2'))", }, ) diff --git a/tests/dialects/test_spark.py b/tests/dialects/test_spark.py index 41e1b6ca01..8d0b890211 100644 --- a/tests/dialects/test_spark.py +++ b/tests/dialects/test_spark.py @@ -712,19 +712,19 @@ def test_spark(self): }, ) self.validate_all( - "SELECT piv.Q1 FROM (SELECT * FROM produce PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2'))) AS piv", + "SELECT piv.Q1 FROM (SELECT * FROM produce PIVOT(SUM(sales) FOR quarter IN ('Q1' AS `'Q1'`, 'Q2' AS `'Q2'`))) AS piv", read={ "snowflake": "SELECT piv.Q1 FROM produce PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) piv", }, ) self.validate_all( - "SELECT piv.Q1 FROM (SELECT * FROM (SELECT * FROM produce) PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2'))) AS piv", + "SELECT piv.Q1 FROM (SELECT * FROM (SELECT * FROM produce) PIVOT(SUM(sales) FOR quarter IN ('Q1' AS `'Q1'`, 'Q2' AS `'Q2'`))) AS piv", read={ "snowflake": "SELECT piv.Q1 FROM (SELECT * FROM produce) PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) piv", }, ) self.validate_all( - "SELECT * FROM produce PIVOT(SUM(produce.sales) FOR quarter IN ('Q1', 'Q2'))", + "SELECT * FROM produce PIVOT(SUM(produce.sales) FOR quarter IN ('Q1' AS `'Q1'`, 'Q2' AS `'Q2'`))", read={ "snowflake": "SELECT * FROM produce PIVOT (SUM(produce.sales) FOR produce.quarter IN ('Q1', 'Q2'))", },