From dc96f373ec6573d899b367bb15419616c78761c3 Mon Sep 17 00:00:00 2001
From: "richard.hughes" <richard.hughes@amperity.com>
Date: Tue, 26 May 2026 19:23:50 -0700
Subject: [PATCH 1/2] test(databricks): assert LCT string promotion for
 COALESCE/IF/CASE [CLAUDE]

Databricks string-promotes least-common-type functions when an argument
is text and the rest are non-boolean/non-binary atomics. Verified on a
Databricks serverless (always-ANSI) warehouse:

  typeof(coalesce(cast(1 as int), 'abc'))  -> string
  typeof(coalesce(cast(1.5 as double), 'abc')) -> string
  typeof(coalesce(cast('2020-01-01' as date), 'abc')) -> string
  typeof(coalesce(interval 1 day, 'abc')) -> string
  typeof(if(true, cast(1 as int), 'abc')) -> string
  typeof(case when true then cast(1 as int) else 'abc' end) -> string

boolean+string and binary+string raise DATATYPE_MISMATCH in Databricks,
so those rows keep their current (best-effort) annotation.

These fixtures fail until the annotator fix lands.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../fixtures/optimizer/annotate_functions.sql | 34 ++++++++++++++-----
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/tests/fixtures/optimizer/annotate_functions.sql b/tests/fixtures/optimizer/annotate_functions.sql
index 4487444e34..2cef6f271f 100644
--- a/tests/fixtures/optimizer/annotate_functions.sql
+++ b/tests/fixtures/optimizer/annotate_functions.sql
@@ -307,11 +307,11 @@ STRING;
 
 # dialect: databricks
 IF(cond, tbl.str_col, tbl.double_col);
-DOUBLE;
+STRING;
 
 # dialect: databricks
 IF(cond, tbl.double_col, tbl.str_col);
-DOUBLE;
+STRING;
 
 # dialect: hive, spark2, spark
 IF(cond, tbl.date_col, tbl.str_col);
@@ -323,11 +323,11 @@ STRING;
 
 # dialect: databricks
 IF(cond, tbl.date_col, tbl.str_col);
-DATE;
+STRING;
 
 # dialect: databricks
 IF(cond, tbl.str_col, tbl.date_col);
-DATE;
+STRING;
 
 # dialect: hive, spark2, spark, databricks
 IF(cond, tbl.date_col, tbl.timestamp_col);
@@ -371,19 +371,19 @@ STRING;
 
 # dialect: databricks
 COALESCE(tbl.str_col, tbl.bigint_col);
-BIGINT;
+STRING;
 
 # dialect: databricks
 COALESCE(tbl.bigint_col, tbl.str_col);
-BIGINT;
+STRING;
 
 # dialect: databricks
 COALESCE(tbl.str_col, NULL, tbl.bigint_col);
-BIGINT;
+STRING;
 
 # dialect: databricks
 COALESCE(tbl.bigint_col, NULL, tbl.str_col);
-BIGINT;
+STRING;
 
 # dialect: databricks
 COALESCE(tbl.bool_col, tbl.str_col);
@@ -395,12 +395,28 @@ STRING;
 
 # dialect: databricks
 COALESCE(tbl.interval_col, tbl.str_col);
-INTERVAL;
+STRING;
 
 # dialect: databricks
 COALESCE(tbl.bin_col, tbl.str_col);
 BINARY;
 
+# dialect: databricks
+COALESCE(tbl.int_col, tbl.str_col);
+STRING;
+
+# dialect: databricks
+NVL(tbl.int_col, tbl.str_col);
+STRING;
+
+# dialect: databricks
+CASE WHEN cond THEN tbl.int_col ELSE tbl.str_col END;
+STRING;
+
+# dialect: databricks
+COALESCE(tbl.int_col, tbl.bigint_col);
+BIGINT;
+
 # dialect: spark, databricks
 LOCALTIMESTAMP();
 TIMESTAMPNTZ;

From 6d5187ef6ce9e8da3bbcc102accee1031732ba64 Mon Sep 17 00:00:00 2001
From: "richard.hughes" <richard.hughes@amperity.com>
Date: Tue, 26 May 2026 19:34:45 -0700
Subject: [PATCH 2/2] fix(databricks): string-promote COALESCE/IF/CASE per
 findWiderCommonType [CLAUDE]

Databricks resolves least-common-type functions (COALESCE/IFNULL/NVL,
IF, CASE) with Spark's findWiderCommonType string promotion: when a value
argument is text and the rest are non-boolean/non-binary atomics, the
result is text. Previously these folded through Databricks' COERCES_TO
lattice (text coerces into numeric/temporal), so coalesce(int_col,
str_col) annotated as BIGINT instead of STRING, independent of argument
order.

Verified on a Databricks serverless (always-ANSI) warehouse: coalesce/if/
case of text + numeric/date/interval -> string; text + boolean/binary
raises DATATYPE_MISMATCH, so those defer to the existing numeric-widening
fallback. Note this diverges from open-source Spark AnsiTypeCoercion
(which promotes string+int to long under ANSI) -- Databricks SQL string-
promotes these functions regardless of ANSI mode.

Scoped to Databricks; Spark/Hive (non-ANSI, already string-promoting via
their inverted lattice) and other dialects are unaffected.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sqlglot/dialects/databricks.py | 44 ++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/sqlglot/dialects/databricks.py b/sqlglot/dialects/databricks.py
index 44291626d0..ce3b9c411f 100644
--- a/sqlglot/dialects/databricks.py
+++ b/sqlglot/dialects/databricks.py
@@ -9,6 +9,43 @@
 from sqlglot.parsers.databricks import DatabricksParser
 from sqlglot.tokens import TokenType
 from sqlglot.optimizer.annotate_types import TypeAnnotator
+from sqlglot.typing.spark import EXPRESSION_METADATA as SPARK_EXPRESSION_METADATA
+
+
+def _string_promotes(values: list[exp.Expression]) -> bool:
+    """
+    Whether a least-common-type function string-promotes given its value arguments.
+
+    Databricks resolves COALESCE/IF/CASE via Spark's findWiderCommonType string
+    promotion: when an argument is text and the rest are non-boolean/non-binary
+    atomics, the common type is text. boolean+text and binary+text have no common
+    type (query-time DATATYPE_MISMATCH), so we defer those to numeric widening.
+    """
+    return any(v.is_type(*exp.DataType.TEXT_TYPES) for v in values) and not any(
+        v.is_type(exp.DType.BOOLEAN, exp.DType.BINARY) for v in values
+    )
+
+
+def _annotate_coalesce(self: TypeAnnotator, e: exp.Coalesce) -> exp.Coalesce:
+    if _string_promotes([v for v in (e.this, *e.expressions) if v]):
+        self._set_type(e, exp.DType.TEXT)
+        return e
+    return self._annotate_by_args(e, "this", "expressions", promote=True)
+
+
+def _annotate_if(self: TypeAnnotator, e: exp.If) -> exp.If:
+    if _string_promotes([v for v in (e.args.get("true"), e.args.get("false")) if v]):
+        self._set_type(e, exp.DType.TEXT)
+        return e
+    return self._annotate_by_args(e, "true", "false", promote=True)
+
+
+def _annotate_case(self: TypeAnnotator, e: exp.Case) -> exp.Case:
+    thens = [if_expr.args["true"] for if_expr in e.args["ifs"]]
+    if _string_promotes([v for v in (*thens, e.args.get("default")) if v]):
+        self._set_type(e, exp.DType.TEXT)
+        return e
+    return self._annotate_by_args(e, *thens, "default")
 
 
 class Databricks(Spark):
@@ -25,6 +62,13 @@ class Databricks(Spark):
             exp.DType.INTERVAL,
         }
 
+    EXPRESSION_METADATA = {
+        **SPARK_EXPRESSION_METADATA,
+        exp.Coalesce: {"annotator": _annotate_coalesce},
+        exp.If: {"annotator": _annotate_if},
+        exp.Case: {"annotator": _annotate_case},
+    }
+
     class JSONPathTokenizer(Spark.JSONPathTokenizer):
         IDENTIFIERS = ["`", '"']