tobymao · fivetran-kwoodbeck · Jun 29, 2026 · Jun 30, 2026 · Jul 2, 2026 · Jul 2, 2026
diff --git a/sqlglot-integration-tests b/sqlglot-integration-tests
diff --git a/sqlglot/dialects/dialect.py b/sqlglot/dialects/dialect.py
@@ -379,6 +379,12 @@ class Dialect(metaclass=_Dialect):
     e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
     """
 
+    PRESERVE_ORIGINAL_OUTPUT_NAME_CASE: bool = False
+    """
+    Whether the dialect preserves the original case of column aliases. When True, 
+    qualify_outputs will not apply normalize_identifier to synthesized aliases.
+    """
+
     LOG_BASE_FIRST: bool | None = True
     """
     Whether the base comes first in the `LOG` function.

diff --git a/sqlglot/dialects/spark.py b/sqlglot/dialects/spark.py
@@ -14,6 +14,7 @@ class Spark(Spark2):
     SUPPORTS_LIMIT_ALL = True
     SUPPORTS_NULL_TYPE = True
     ARRAY_FUNCS_PROPAGATES_NULLS = True
+    PRESERVE_ORIGINAL_OUTPUT_NAME_CASE = True
     EXPRESSION_METADATA = EXPRESSION_METADATA.copy()
 
     class Tokenizer(Spark2.Tokenizer):

diff --git a/sqlglot/generators/tsql.py b/sqlglot/generators/tsql.py
@@ -74,6 +74,7 @@ def qualify_derived_table_outputs(expression: exp.Expr) -> exp.Expr:
         and isinstance(alias, exp.TableAlias)
         and not alias.columns
     ):
+        from sqlglot.dialects.tsql import TSQL
         from sqlglot.optimizer.qualify_columns import qualify_outputs
 
         # We keep track of the unaliased column projection indexes instead of the expressions
@@ -84,7 +85,7 @@ def qualify_derived_table_outputs(expression: exp.Expr) -> exp.Expr:
             i for i, c in enumerate(query.selects) if isinstance(c, exp.Column) and not c.alias
         )
 
-        qualify_outputs(query)
+        qualify_outputs(query, dialect=TSQL())
 
         # Preserve the quoting information of columns for newly added Alias nodes
         query_selects = query.selects

diff --git a/sqlglot/optimizer/qualify_columns.py b/sqlglot/optimizer/qualify_columns.py
@@ -101,7 +101,7 @@ def qualify_columns(
                     pseudocolumns,
                     annotator,
                 )
-            qualify_outputs(scope)
+            qualify_outputs(scope, dialect=dialect)
 
         _expand_group_by(scope, dialect)
 
@@ -831,7 +831,8 @@ def _expand_stars(
             continue
 
         for table in tables:
-            if table not in scope.sources:
+            source = scope.sources.get(table)
+            if source is None:
                 raise OptimizeError(f"Unknown table: {table}")
 
             columns = resolver.get_source_columns(table, only_visible=True)
@@ -848,6 +849,15 @@ def _expand_stars(
             renamed_columns = rename_columns.get(table_id, {})
             replaced_columns = replace_columns.get(table_id, {})
 
+            # Preserve case-sensitivity of quoted source columns when expanding stars,
+            # so the generated alias isn't folded by dialect normalization
+            source_expression = source.expression if isinstance(source, Scope) else None
+            quoted_columns = (
+                {s.output_name: _output_identifier_quoted(s) for s in source_expression.selects}
+                if isinstance(source_expression, exp.Query)
+                else {}
+            )
-            quoted_columns = (
-                {s.output_name: _output_identifier_quoted(s) for s in source_expression.selects}
-                if isinstance(source_expression, exp.Query)
-                else {}
-            )
+            quoted_columns = {
+                s.output_name
+                for s in source_expression.selects
+                if isinstance(source_expression, exp.Query) and _output_identifier_quoted(s)
+            }
-            quoted_columns = (
-                {s.output_name: _output_identifier_quoted(s) for s in source_expression.selects}
-                if isinstance(source_expression, exp.Query)
-                else {}
-            )
+            quoted_columns = {
+                s.output_name
+                for s in source_expression.selects
+                if isinstance(source_expression, exp.Query) and _output_identifier_quoted(s)
+            }
+
             if pivot:
                 pivot_columns = pivot.output_columns(columns) or pivot.alias_column_names
 
@@ -875,7 +885,19 @@ def _expand_stars(
                     )
                 else:
                     alias_ = renamed_columns.get(name, name)
-                    selection_expr = replaced_columns.get(name) or exp.column(name, table=table)
+                    quoted = quoted_columns.get(name) or (
+                        # if it has characters that the dialect would have changed, infer that it was quoted.
+                        isinstance(source, exp.Table) and dialect.case_sensitive(name)
+                    )
+                    selection_expr = replaced_columns.get(name) or exp.column(
+                        name, table=table, quoted=quoted
+                    )
+                    if (
+                        quoted
+                        and isinstance(selection_expr, exp.Column)
+                        and not selection_expr.this.quoted
+                    ):
+                        selection_expr.this.set("quoted", True)
                     new_selections.append(
                         alias(selection_expr, alias_, copy=False)
                         if alias_ != name
@@ -887,6 +909,18 @@ def _expand_stars(
         scope_expression.set("expressions", new_selections)
 
 
+def _output_identifier_quoted(selection: exp.Expr) -> bool:
+    """Whether a projection's output column name is a quoted (case-sensitive) identifier."""
+    if isinstance(selection, exp.Alias):
+        identifier = selection.args.get("alias")
+    elif isinstance(selection, exp.Column):
+        identifier = selection.this
+    else:
+        identifier = None
+
+    return isinstance(identifier, exp.Identifier) and identifier.quoted
+
+
 def _add_ilike_columns(expression: exp.Expr) -> str | None:
     ilike = expression.args.get("ilike")
 
@@ -936,7 +970,7 @@ def _add_replace_columns(
         replace_columns[id(table)] = columns
 
 
-def qualify_outputs(scope_or_expression: Scope | exp.Expr) -> None:
+def qualify_outputs(scope_or_expression: Scope | exp.Expr, dialect: Dialect) -> None:
     """Ensure all output columns are aliased"""
     if isinstance(scope_or_expression, exp.Expr):
         scope = build_scope(scope_or_expression)
@@ -960,13 +994,21 @@ def qualify_outputs(scope_or_expression: Scope | exp.Expr) -> None:
 
         if isinstance(selection, exp.Subquery):
             if not selection.output_name:
-                selection.set("alias", exp.TableAlias(this=exp.to_identifier(f"_col_{i}")))
+                alias_identifier = exp.to_identifier(f"_col_{i}")
+                if dialect and not (dialect.PRESERVE_ORIGINAL_OUTPUT_NAME_CASE):
+                    dialect.normalize_identifier(alias_identifier)
+                selection.set("alias", exp.TableAlias(this=alias_identifier))
         elif not isinstance(selection, (exp.Alias, exp.Aliases)) and not selection.is_star:
+            source_quoted = isinstance(selection, exp.Column) and selection.this.quoted
             selection = alias(
                 selection,
                 alias=selection.output_name or f"_col_{i}",
                 copy=False,
             )
+            if source_quoted:
+                selection.args["alias"].set("quoted", True)
+            if dialect and not (dialect.PRESERVE_ORIGINAL_OUTPUT_NAME_CASE):
+                dialect.normalize_identifier(selection.args["alias"])
         if aliased_column:
             selection.set("alias", exp.to_identifier(aliased_column))
 

diff --git a/tests/fixtures/optimizer/optimizer.sql b/tests/fixtures/optimizer/optimizer.sql
@@ -1588,5 +1588,5 @@ CROSS JOIN LATERAL FLATTEN(input => "OBJ"."DATA") AS "F"("SEQ", "KEY", "PATH", "
 SELECT array_agg(id) WITHIN GROUP (ORDER BY id) OVER (PARTITION BY grp) FROM t;
 SELECT
   ARRAY_AGG("T"."ID") WITHIN GROUP (ORDER BY
-    "T"."ID") OVER (PARTITION BY "T"."GRP") AS "_col_0"
+    "T"."ID") OVER (PARTITION BY "T"."GRP") AS "_COL_0"
 FROM "T" AS "T";
diff --git a/tests/fixtures/optimizer/pushdown_projections.sql b/tests/fixtures/optimizer/pushdown_projections.sql
@@ -122,23 +122,23 @@ SELECT _0.a AS a, _0.b AS b FROM (WITH cte1 AS (SELECT 1 AS a, 2 AS b) SELECT ct
 
 # dialect: snowflake
 SELECT OBJECT_CONSTRUCT(*) FROM (SELECT a, b FROM x) AS t;
-SELECT OBJECT_CONSTRUCT(*) AS _col_0 FROM (SELECT a AS a, b AS b FROM x AS x) AS t;
+SELECT OBJECT_CONSTRUCT(*) AS _COL_0 FROM (SELECT a AS A, b AS B FROM x AS x) AS t;
 
 # dialect: snowflake
 WITH base AS (SELECT 1 AS a, 2 AS b, 3 AS c, 4 AS d) SELECT OBJECT_INSERT(OBJECT_CONSTRUCT(*), 'e', 5) FROM base;
-WITH base AS (SELECT 1 AS a, 2 AS b, 3 AS c, 4 AS d) SELECT OBJECT_INSERT(OBJECT_CONSTRUCT(*), 'e', 5) AS _col_0 FROM base AS base;
+WITH base AS (SELECT 1 AS a, 2 AS b, 3 AS c, 4 AS d) SELECT OBJECT_INSERT(OBJECT_CONSTRUCT(*), 'e', 5) AS _COL_0 FROM base AS base;
 
 # dialect: snowflake
 WITH base AS (SELECT 1 AS a, 2 AS b, 3 AS c, 4 AS d) SELECT obj:A, obj:B FROM (SELECT OBJECT_INSERT(OBJECT_CONSTRUCT(*), 'e', 5) AS obj, a FROM base) AS t;
 WITH base AS (SELECT 1 AS a, 2 AS b, 3 AS c, 4 AS d) SELECT GET_PATH(t.obj, 'A') AS A, GET_PATH(t.obj, 'B') AS B FROM (SELECT OBJECT_INSERT(OBJECT_CONSTRUCT(*), 'e', 5) AS obj FROM base AS base) AS t;
 
 # dialect: snowflake
 WITH cte AS (SELECT 1 AS a, 2 as b) SELECT HASH_AGG(*) FROM cte;
-WITH cte AS (SELECT 1 AS a, 2 AS b) SELECT HASH_AGG(*) AS _col_0 FROM cte AS cte;
+WITH cte AS (SELECT 1 AS a, 2 AS b) SELECT HASH_AGG(*) AS _COL_0 FROM cte AS cte;
 
 # dialect: snowflake
 WITH cte AS (SELECT a, b FROM x) SELECT COUNT(* EXCLUDE a) FROM cte;
-WITH cte AS (SELECT a AS a, b AS b FROM x AS x) SELECT COUNT(* EXCLUDE (a)) AS _col_0 FROM cte AS cte;
+WITH cte AS (SELECT a AS A, b AS B FROM x AS x) SELECT COUNT(* EXCLUDE (a)) AS _COL_0 FROM cte AS cte;
 
 WITH cte1 AS (SELECT a, SUM(b) AS sale FROM x GROUP BY a), cte2 AS (SELECT cte1.a, COUNT(*) AS cnt FROM cte1 GROUP BY cte1.a) SELECT a, cnt FROM cte2;
 WITH cte1 AS (SELECT x.a AS a FROM x AS x GROUP BY x.a), cte2 AS (SELECT cte1.a AS a, COUNT(*) AS cnt FROM cte1 AS cte1 GROUP BY cte1.a) SELECT cte2.a AS a, cte2.cnt AS cnt FROM cte2 AS cte2;

diff --git a/tests/fixtures/optimizer/qualify_columns.sql b/tests/fixtures/optimizer/qualify_columns.sql
@@ -5,7 +5,7 @@ SELECT a FROM x;
 SELECT x.a AS a FROM x AS x;
 
 SELECT "a" FROM x;
-SELECT x."a" AS a FROM x AS x;
+SELECT x."a" AS "a" FROM x AS x;
 
 # execute: false
 SELECT a FROM zz GROUP BY a ORDER BY a;
@@ -110,7 +110,7 @@ SELECT T."col" AS "col" FROM TBL T;
 # execute: false
 # dialect: oracle
 WITH base AS (SELECT x.dummy AS COL_1 FROM dual x) SELECT b."COL_1" FROM base b;
-WITH BASE AS (SELECT X.DUMMY AS COL_1 FROM DUAL X) SELECT B."COL_1" AS COL_1 FROM BASE B;
+WITH BASE AS (SELECT X.DUMMY AS COL_1 FROM DUAL X) SELECT B."COL_1" AS "COL_1" FROM BASE B;
 
 # execute: false
 -- this query seems to be invalid in postgres and duckdb but valid in bigquery

diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py
@@ -1345,9 +1345,8 @@ def test_canonicalize_internal_names(self):
         self.assertEqual(canon_pg_a, canon_pg_qa)
 
         # In Snowflake (upper-folding), unquoted `a` becomes `A`, while quoted `"a"` stays
-        # lowercase — they reference *different* columns. Base-table names are preserved,
-        # and the quote state on the lowercase column is retained because dropping it
-        # would let Snowflake re-case-fold `a` back to `A` (changing semantics).
+        # lowercase — they reference *different* columns. The generated alias for the quoted
+        # column keeps its exact spelling, since folding it would re-case-fold `a` back to `A`.
         sf_schema = {"X": {"A": "INT", '"a"': "INT"}}
         canon_sf = qualify_then_canonicalize(
             parse_one('SELECT a, "a" FROM x', dialect="snowflake"),
@@ -2424,7 +2423,7 @@ def test_quotes(self):
         schema = {
             "example": {
                 '"source"': {
-                    "id": "text",
+                    '"ID"': "text",
                     '"name"': "text",
                     '"payload"': "text",
                 }
@@ -2762,7 +2761,7 @@ def _parse_and_optimize(query: str, dialect: str) -> exp.Expr:
         sql = _parse_and_optimize("SELECT col:A.a, col:a.A FROM t", dialect="snowflake")
         assert (
             sql
-            == '''SELECT GET_PATH("T"."COL", 'A.a') AS "a", GET_PATH("T"."COL", 'a.A') AS "A" FROM "T" AS "T"'''
+            == '''SELECT GET_PATH("T"."COL", 'A.a') AS "A", GET_PATH("T"."COL", 'a.A') AS "A" FROM "T" AS "T"'''
         )
 
         query = parse_one(