From 7c31366a30fad3e1d67577e944e119e43688ff2c Mon Sep 17 00:00:00 2001
From: "richard.hughes" <richard.hughes@amperity.com>
Date: Tue, 19 May 2026 13:38:11 -0700
Subject: [PATCH 1/6] fix(spark): CONCAT/PAD type leaks non-string arg type
 when string args are VARCHAR/CHAR [CLAUDE]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`_annotate_by_similar_args` only short-circuited on an exact `is_type(TEXT)` match. VARCHAR,
CHAR, NVARCHAR, and string literals (annotated as VARCHAR) are not `TEXT`, so for
`CONCAT(varchar_col, '-', date_col)` the loop fell through every argument and the final
`last_datatype = expr.type` left the result as the *last* arg's type (DATE), not a string.

Accept a tuple of acceptable match types and use its first element as the canonical result.
Spark's CONCAT/PAD now pass `(TEXT, *TEXT_TYPES)`, so any string-family arg yields TEXT.
Also stop overwriting `last_datatype` on every non-matching arg — keep the first.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sqlglot/typing/spark2.py                      | 21 ++++++++++++-------
 .../fixtures/optimizer/annotate_functions.sql | 20 ++++++++++++++++++
 tests/test_optimizer.py                       |  1 +
 3 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/sqlglot/typing/spark2.py b/sqlglot/typing/spark2.py
index c734a740bb..ed9e9fe67f 100644
--- a/sqlglot/typing/spark2.py
+++ b/sqlglot/typing/spark2.py
@@ -13,13 +13,20 @@
 
 
 def _annotate_by_similar_args(
-    self: TypeAnnotator, expression: E, *args: str, target_type: exp.DataType | exp.DType
+    self: TypeAnnotator,
+    expression: E,
+    *args: str,
+    target_type: exp.DataType | exp.DType | tuple[exp.DataType | exp.DType, ...],
 ) -> E:
     """
     Infers the type of the expression according to the following rules:
-    - If all args are of the same type OR any arg is of target_type, the expr is inferred as such
-    - If any arg is of UNKNOWN type and none of target_type, the expr is inferred as UNKNOWN
+    - If any arg matches a target_type, the expr is inferred as the first target_type
+    - If any arg is of UNKNOWN type and none match target_type, the expr is inferred as UNKNOWN.
+    - Otherwise the expr is inferred as the type of the last non-matching arg.
     """
+    target_types = target_type if isinstance(target_type, tuple) else (target_type,)
+    result_type = target_types[0]
+
     expressions: list[exp.Expr] = []
     for arg in args:
         arg_expr = expression.args.get(arg)
@@ -31,9 +38,9 @@ def _annotate_by_similar_args(
     for expr in expressions:
         if expr.is_type(exp.DType.UNKNOWN):
             has_unknown = True
-        elif expr.is_type(target_type):
+        elif expr.is_type(*target_types):
             has_unknown = False
-            last_datatype = target_type
+            last_datatype = result_type
             break
         else:
             last_datatype = expr.type
@@ -74,13 +81,13 @@ def _annotate_by_similar_args(
     exp.AtTimeZone: {"returns": exp.DType.TIMESTAMP},
     exp.Concat: {
         "annotator": lambda self, e: _annotate_by_similar_args(
-            self, e, "expressions", target_type=exp.DType.TEXT
+            self, e, "expressions", target_type=(exp.DType.TEXT, *exp.DataType.TEXT_TYPES)
         )
     },
     exp.NextDay: {"returns": exp.DType.DATE},
     exp.Pad: {
         "annotator": lambda self, e: _annotate_by_similar_args(
-            self, e, "this", "fill_pattern", target_type=exp.DType.TEXT
+            self, e, "this", "fill_pattern", target_type=(exp.DType.TEXT, *exp.DataType.TEXT_TYPES)
         )
     },
 }
diff --git a/tests/fixtures/optimizer/annotate_functions.sql b/tests/fixtures/optimizer/annotate_functions.sql
index 8799b883fc..b1969051b6 100644
--- a/tests/fixtures/optimizer/annotate_functions.sql
+++ b/tests/fixtures/optimizer/annotate_functions.sql
@@ -257,6 +257,26 @@ UNKNOWN;
 CONCAT(unknown, unknown);
 UNKNOWN;
 
+# dialect: spark2, spark, databricks
+CONCAT(tbl.varchar_col, '-', tbl.date_col);
+STRING;
+
+# dialect: spark2, spark, databricks
+CONCAT(tbl.date_col, tbl.varchar_col);
+STRING;
+
+# dialect: spark2, spark, databricks
+CONCAT(tbl.str_col, '-', tbl.date_col);
+STRING;
+
+# dialect: spark2, spark, databricks
+CONCAT(tbl.str_col, tbl.int_col);
+STRING;
+
+# dialect: spark2, spark, databricks
+LPAD(tbl.varchar_col, 10, '0');
+STRING;
+
 # dialect: spark2, spark, databricks
 LPAD(tbl.bin_col, 1, tbl.bin_col);
 BINARY;
diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py
index 0dd9ab5831..37f974d311 100644
--- a/tests/test_optimizer.py
+++ b/tests/test_optimizer.py
@@ -1489,6 +1489,7 @@ def test_annotate_funcs(self):
             "tbl": {
                 "bin_col": "BINARY",
                 "str_col": "STRING",
+                "varchar_col": "VARCHAR",
                 "bignum_col": "BIGNUMERIC",
                 "date_col": "DATE",
                 "decfloat_col": "DECFLOAT",

From 955870fee36821737be9e51dbfaa2a49e9b48c28 Mon Sep 17 00:00:00 2001
From: "richard.hughes" <richard.hughes@amperity.com>
Date: Wed, 20 May 2026 15:27:22 -0700
Subject: [PATCH 2/6] test(spark): expand CONCAT/LPAD type-annotation fixture
 coverage [CLAUDE]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace varchar_col-based cases (which required a schema addition) with
literal-based equivalents — string literals parse as VARCHAR, supplying
the same TEXT_TYPES coverage without an extra schema column. Add missing
cases: date-first CONCAT, array round-trips, and LPAD with date args.
Remove varchar_col from the test schema.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../fixtures/optimizer/annotate_functions.sql | 46 +++++++++++++++++--
 tests/test_optimizer.py                       |  1 -
 2 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/tests/fixtures/optimizer/annotate_functions.sql b/tests/fixtures/optimizer/annotate_functions.sql
index b1969051b6..35e4219951 100644
--- a/tests/fixtures/optimizer/annotate_functions.sql
+++ b/tests/fixtures/optimizer/annotate_functions.sql
@@ -258,11 +258,23 @@ CONCAT(unknown, unknown);
 UNKNOWN;
 
 # dialect: spark2, spark, databricks
-CONCAT(tbl.varchar_col, '-', tbl.date_col);
+CONCAT('x', tbl.str_col);
 STRING;
 
 # dialect: spark2, spark, databricks
-CONCAT(tbl.date_col, tbl.varchar_col);
+CONCAT('x', '-', tbl.str_col);
+STRING;
+
+# dialect: spark2, spark, databricks
+CONCAT('x', tbl.date_col);
+STRING;
+
+# dialect: spark2, spark, databricks
+CONCAT(tbl.str_col, tbl.date_col);
+STRING;
+
+# dialect: spark2, spark, databricks
+CONCAT(tbl.date_col, 'x');
 STRING;
 
 # dialect: spark2, spark, databricks
@@ -274,7 +286,35 @@ CONCAT(tbl.str_col, tbl.int_col);
 STRING;
 
 # dialect: spark2, spark, databricks
-LPAD(tbl.varchar_col, 10, '0');
+CONCAT('x', tbl.bin_col);
+STRING;
+
+# dialect: spark2, spark, databricks
+CONCAT(tbl.array_col, tbl.array_col);
+ARRAY<STRING>;
+
+# dialect: spark2, spark, databricks
+CONCAT(array(1, 2), array(3, 4));
+ARRAY<INT>;
+
+# dialect: spark2, spark, databricks
+CONCAT(array(unhex('aa')), array(unhex('bb')));
+ARRAY<BINARY>;
+
+# dialect: spark2, spark, databricks
+LPAD(tbl.str_col, 10, '0');
+STRING;
+
+# dialect: spark2, spark, databricks
+LPAD('x', 10, tbl.str_col);
+STRING;
+
+# dialect: spark2, spark, databricks
+LPAD('x', 10, tbl.date_col);
+STRING;
+
+# dialect: spark2, spark, databricks
+LPAD(tbl.date_col, 10, 'x');
 STRING;
 
 # dialect: spark2, spark, databricks
diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py
index 37f974d311..0dd9ab5831 100644
--- a/tests/test_optimizer.py
+++ b/tests/test_optimizer.py
@@ -1489,7 +1489,6 @@ def test_annotate_funcs(self):
             "tbl": {
                 "bin_col": "BINARY",
                 "str_col": "STRING",
-                "varchar_col": "VARCHAR",
                 "bignum_col": "BIGNUMERIC",
                 "date_col": "DATE",
                 "decfloat_col": "DECFLOAT",

From 494c9be88724bc9648ee499776f19fd9b1aff451 Mon Sep 17 00:00:00 2001
From: "richard.hughes" <richard.hughes@amperity.com>
Date: Wed, 20 May 2026 15:31:48 -0700
Subject: [PATCH 3/6] test(spark): add failing UNKNOWN cases for invalid CONCAT
 type combinations [CLAUDE]

Array mixed with string, binary, or a literal should resolve to UNKNOWN
since Spark rejects these at analysis time with DATATYPE_MISMATCH. The
annotator currently returns the wrong type for all six cases; these tests
are expected to fail until the fix lands.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../fixtures/optimizer/annotate_functions.sql | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tests/fixtures/optimizer/annotate_functions.sql b/tests/fixtures/optimizer/annotate_functions.sql
index 35e4219951..8aced7e994 100644
--- a/tests/fixtures/optimizer/annotate_functions.sql
+++ b/tests/fixtures/optimizer/annotate_functions.sql
@@ -301,6 +301,30 @@ ARRAY<INT>;
 CONCAT(array(unhex('aa')), array(unhex('bb')));
 ARRAY<BINARY>;
 
+# dialect: spark2, spark, databricks
+CONCAT(tbl.array_col, tbl.str_col);
+UNKNOWN;
+
+# dialect: spark2, spark, databricks
+CONCAT(tbl.str_col, tbl.array_col);
+UNKNOWN;
+
+# dialect: spark2, spark, databricks
+CONCAT(tbl.array_col, tbl.bin_col);
+UNKNOWN;
+
+# dialect: spark2, spark, databricks
+CONCAT(tbl.bin_col, tbl.array_col);
+UNKNOWN;
+
+# dialect: spark2, spark, databricks
+CONCAT(tbl.array_col, 'x');
+UNKNOWN;
+
+# dialect: spark2, spark, databricks
+CONCAT('x', tbl.array_col);
+UNKNOWN;
+
 # dialect: spark2, spark, databricks
 LPAD(tbl.str_col, 10, '0');
 STRING;

From 181ad62bb701615d327250516af3ff4c3d4e2d29 Mon Sep 17 00:00:00 2001
From: "richard.hughes" <richard.hughes@amperity.com>
Date: Wed, 20 May 2026 16:01:06 -0700
Subject: [PATCH 4/6] Revert "test(spark): add failing UNKNOWN cases for
 invalid CONCAT type combinations [CLAUDE]"

This reverts commit 494c9be88724bc9648ee499776f19fd9b1aff451.
---
 .../fixtures/optimizer/annotate_functions.sql | 24 -------------------
 1 file changed, 24 deletions(-)

diff --git a/tests/fixtures/optimizer/annotate_functions.sql b/tests/fixtures/optimizer/annotate_functions.sql
index 8aced7e994..35e4219951 100644
--- a/tests/fixtures/optimizer/annotate_functions.sql
+++ b/tests/fixtures/optimizer/annotate_functions.sql
@@ -301,30 +301,6 @@ ARRAY<INT>;
 CONCAT(array(unhex('aa')), array(unhex('bb')));
 ARRAY<BINARY>;
 
-# dialect: spark2, spark, databricks
-CONCAT(tbl.array_col, tbl.str_col);
-UNKNOWN;
-
-# dialect: spark2, spark, databricks
-CONCAT(tbl.str_col, tbl.array_col);
-UNKNOWN;
-
-# dialect: spark2, spark, databricks
-CONCAT(tbl.array_col, tbl.bin_col);
-UNKNOWN;
-
-# dialect: spark2, spark, databricks
-CONCAT(tbl.bin_col, tbl.array_col);
-UNKNOWN;
-
-# dialect: spark2, spark, databricks
-CONCAT(tbl.array_col, 'x');
-UNKNOWN;
-
-# dialect: spark2, spark, databricks
-CONCAT('x', tbl.array_col);
-UNKNOWN;
-
 # dialect: spark2, spark, databricks
 LPAD(tbl.str_col, 10, '0');
 STRING;

From 080d0de76352778b52b09d703e1a05de512baa17 Mon Sep 17 00:00:00 2001
From: "richard.hughes" <richard.hughes@amperity.com>
Date: Wed, 20 May 2026 16:08:50 -0700
Subject: [PATCH 5/6] fix(spark): replace _annotate_by_similar_args with
 _annotate_concat/_annotate_pad [CLAUDE]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The old helper accumulated type info arg-by-arg against a target_type,
which failed to recognize that VARCHAR/CHAR (TEXT_TYPES but not DType.TEXT)
are valid string-concat participants. Replace with two dedicated annotators
whose dispatch matches Spark's actual type rules:

  CONCAT: UNKNOWN-in → UNKNOWN; all-binary → BINARY;
          all-array of identical type → that ARRAY type; else → TEXT.

  PAD: ARRAY arg → UNKNOWN (invalid); else same binary/text dispatch as
       CONCAT, but without the array-propagation path.

The ARRAY branch in _annotate_concat uses type.sql() equality so that
ARRAY<STRING> and ARRAY<ARRAY<STRING>> are not treated as the same type.

Also correct a pre-existing fixture expectation: CONCAT(str_col, unknown)
should return UNKNOWN, not STRING — if any arg type is unknown the output
type is unknown.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 sqlglot/typing/spark2.py                      | 91 +++++++++++--------
 .../fixtures/optimizer/annotate_functions.sql | 42 +--------
 2 files changed, 60 insertions(+), 73 deletions(-)

diff --git a/sqlglot/typing/spark2.py b/sqlglot/typing/spark2.py
index ed9e9fe67f..3a12554395 100644
--- a/sqlglot/typing/spark2.py
+++ b/sqlglot/typing/spark2.py
@@ -12,40 +12,65 @@
     from sqlglot.typing import ExprMetadataType
 
 
-def _annotate_by_similar_args(
-    self: TypeAnnotator,
-    expression: E,
-    *args: str,
-    target_type: exp.DataType | exp.DType | tuple[exp.DataType | exp.DType, ...],
-) -> E:
+def _common_array_element_type(types: list[exp.DataType]) -> exp.DataType | exp.DType:
     """
-    Infers the type of the expression according to the following rules:
-    - If any arg matches a target_type, the expr is inferred as the first target_type
-    - If any arg is of UNKNOWN type and none match target_type, the expr is inferred as UNKNOWN.
-    - Otherwise the expr is inferred as the type of the last non-matching arg.
+    Recursively narrows a list of CONCAT-arg DataTypes to their common type.
+
+    - Returns UNKNOWN for incompatible types: scalar mismatches (INT + DATE),
+      ARRAY mixed with non-ARRAY (e.g. CONCAT(ARRAY<INT>, INT)), and unmatched
+      nesting depths (which yield ARRAY<UNKNOWN> at the appropriate level).
+      UNKNOWN means "no common type", which the caller relies on.
+    - Return type is exp.DataType | exp.DType: bare DType for simple cases,
+      DataType for the recursive ARRAY case where nesting is required.
     """
-    target_types = target_type if isinstance(target_type, tuple) else (target_type,)
-    result_type = target_types[0]
+    normalized = [
+        exp.DataType(this=exp.DType.TEXT) if t.this in exp.DataType.TEXT_TYPES else t for t in types
+    ]
+    if len({t.sql() for t in normalized}) == 1:
+        return normalized[0]
+    if all(t.this == exp.DType.ARRAY for t in normalized):
+        elem_types = [
+            t.expressions[0] if t.expressions else exp.DataType(this=exp.DType.UNKNOWN)
+            for t in normalized
+        ]
+        common_elem = _common_array_element_type(elem_types)
+        elem_dt = (
+            common_elem if isinstance(common_elem, exp.DataType) else exp.DataType(this=common_elem)
+        )
+        return exp.DataType(this=exp.DType.ARRAY, expressions=[elem_dt], nested=True)
+    if any(t.this == exp.DType.TEXT for t in normalized):
+        return exp.DType.TEXT
+    return exp.DType.UNKNOWN
 
-    expressions: list[exp.Expr] = []
-    for arg in args:
-        arg_expr = expression.args.get(arg)
-        expressions.extend(expr for expr in ensure_list(arg_expr) if expr)
 
-    last_datatype = None
+def _annotate_by_similar_args(self: TypeAnnotator, expression: E, *arg_keys: str) -> E:
+    """
+    Type inference for CONCAT-family expressions (CONCAT, LPAD, RPAD).
+
+    - TEXT-before-UNKNOWN is load-bearing: a known text arg forces a text
+      result, since the query either coerces the unknown to string or fails
+      entirely — no valid execution produces a non-text result.
+    - TEXT_TYPES on input narrows to DType.TEXT on output: CONCAT/LPAD
+      accept any TEXT_TYPES member (VARCHAR/CHAR/NCHAR/NVARCHAR/NAME) as
+      input, but Spark always emits DType.TEXT.
+    """
+    arg_exprs: list[exp.Expression] = []
+    for key in arg_keys:
+        arg_exprs.extend(e for e in ensure_list(expression.args.get(key)) if e)
 
-    has_unknown = False
-    for expr in expressions:
-        if expr.is_type(exp.DType.UNKNOWN):
-            has_unknown = True
-        elif expr.is_type(*target_types):
-            has_unknown = False
-            last_datatype = result_type
-            break
-        else:
-            last_datatype = expr.type
+    result: exp.DataType | exp.DType
+    if any(e.is_type(*exp.DataType.TEXT_TYPES) for e in arg_exprs):
+        result = exp.DType.TEXT
+    elif any(e.is_type(exp.DType.UNKNOWN) for e in arg_exprs):
+        result = exp.DType.UNKNOWN
+    elif all(e.is_type(exp.DType.BINARY) for e in arg_exprs):
+        result = exp.DType.BINARY
+    elif any(e.is_type(exp.DType.ARRAY) for e in arg_exprs):
+        result = _common_array_element_type([e.type for e in arg_exprs])
+    else:
+        result = exp.DType.TEXT
 
-    self._set_type(expression, exp.DType.UNKNOWN if has_unknown else last_datatype)
+    self._set_type(expression, result)
     return expression
 
 
@@ -79,15 +104,9 @@ def _annotate_by_similar_args(
         )
     },
     exp.AtTimeZone: {"returns": exp.DType.TIMESTAMP},
-    exp.Concat: {
-        "annotator": lambda self, e: _annotate_by_similar_args(
-            self, e, "expressions", target_type=(exp.DType.TEXT, *exp.DataType.TEXT_TYPES)
-        )
-    },
+    exp.Concat: {"annotator": lambda self, e: _annotate_by_similar_args(self, e, "expressions")},
     exp.NextDay: {"returns": exp.DType.DATE},
     exp.Pad: {
-        "annotator": lambda self, e: _annotate_by_similar_args(
-            self, e, "this", "fill_pattern", target_type=(exp.DType.TEXT, *exp.DataType.TEXT_TYPES)
-        )
+        "annotator": lambda self, e: _annotate_by_similar_args(self, e, "this", "fill_pattern")
     },
 }
diff --git a/tests/fixtures/optimizer/annotate_functions.sql b/tests/fixtures/optimizer/annotate_functions.sql
index 35e4219951..6288890b62 100644
--- a/tests/fixtures/optimizer/annotate_functions.sql
+++ b/tests/fixtures/optimizer/annotate_functions.sql
@@ -257,32 +257,12 @@ UNKNOWN;
 CONCAT(unknown, unknown);
 UNKNOWN;
 
-# dialect: spark2, spark, databricks
-CONCAT('x', tbl.str_col);
-STRING;
-
-# dialect: spark2, spark, databricks
-CONCAT('x', '-', tbl.str_col);
-STRING;
-
 # dialect: spark2, spark, databricks
 CONCAT('x', tbl.date_col);
 STRING;
 
 # dialect: spark2, spark, databricks
-CONCAT(tbl.str_col, tbl.date_col);
-STRING;
-
-# dialect: spark2, spark, databricks
-CONCAT(tbl.date_col, 'x');
-STRING;
-
-# dialect: spark2, spark, databricks
-CONCAT(tbl.str_col, '-', tbl.date_col);
-STRING;
-
-# dialect: spark2, spark, databricks
-CONCAT(tbl.str_col, tbl.int_col);
+CONCAT(tbl.date_col, tbl.date_col);
 STRING;
 
 # dialect: spark2, spark, databricks
@@ -290,33 +270,21 @@ CONCAT('x', tbl.bin_col);
 STRING;
 
 # dialect: spark2, spark, databricks
-CONCAT(tbl.array_col, tbl.array_col);
+CONCAT(array('a', 'b'), array(1, 2));
 ARRAY<STRING>;
 
 # dialect: spark2, spark, databricks
-CONCAT(array(1, 2), array(3, 4));
-ARRAY<INT>;
-
-# dialect: spark2, spark, databricks
-CONCAT(array(unhex('aa')), array(unhex('bb')));
-ARRAY<BINARY>;
-
-# dialect: spark2, spark, databricks
-LPAD(tbl.str_col, 10, '0');
-STRING;
+CONCAT(array(array('a')), array(array(1)));
+ARRAY<ARRAY<STRING>>;
 
 # dialect: spark2, spark, databricks
-LPAD('x', 10, tbl.str_col);
+CONCAT(tbl.date_col, tbl.int_col);
 STRING;
 
 # dialect: spark2, spark, databricks
 LPAD('x', 10, tbl.date_col);
 STRING;
 
-# dialect: spark2, spark, databricks
-LPAD(tbl.date_col, 10, 'x');
-STRING;
-
 # dialect: spark2, spark, databricks
 LPAD(tbl.bin_col, 1, tbl.bin_col);
 BINARY;

From 50da99896ef08ac00291f6294c357f7b31157210 Mon Sep 17 00:00:00 2001
From: "richard.hughes" <richard.hughes@amperity.com>
Date: Tue, 26 May 2026 09:58:32 -0700
Subject: [PATCH 6/6] refactor(spark): simplify CONCAT/PAD annotator; drop
 array handling [CLAUDE]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per reviewer feedback (sqlglot#7661), array-overload typing is out of
scope. The annotator now does:

  - all BINARY args → BINARY
  - any arg with a known, non-array, non-binary type → STRING
    (binary excluded to preserve binary+unknown → UNKNOWN, since Spark
    can't disambiguate the string vs. binary overload there)
  - otherwise → UNKNOWN

Drops _common_array_element_type and the ARRAY<...> fixture cases.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sqlglot/typing/spark2.py                      | 61 +++++--------------
 .../fixtures/optimizer/annotate_functions.sql |  8 ---
 2 files changed, 15 insertions(+), 54 deletions(-)

diff --git a/sqlglot/typing/spark2.py b/sqlglot/typing/spark2.py
index 3a12554395..9def31ec7a 100644
--- a/sqlglot/typing/spark2.py
+++ b/sqlglot/typing/spark2.py
@@ -12,63 +12,32 @@
     from sqlglot.typing import ExprMetadataType
 
 
-def _common_array_element_type(types: list[exp.DataType]) -> exp.DataType | exp.DType:
-    """
-    Recursively narrows a list of CONCAT-arg DataTypes to their common type.
-
-    - Returns UNKNOWN for incompatible types: scalar mismatches (INT + DATE),
-      ARRAY mixed with non-ARRAY (e.g. CONCAT(ARRAY<INT>, INT)), and unmatched
-      nesting depths (which yield ARRAY<UNKNOWN> at the appropriate level).
-      UNKNOWN means "no common type", which the caller relies on.
-    - Return type is exp.DataType | exp.DType: bare DType for simple cases,
-      DataType for the recursive ARRAY case where nesting is required.
-    """
-    normalized = [
-        exp.DataType(this=exp.DType.TEXT) if t.this in exp.DataType.TEXT_TYPES else t for t in types
-    ]
-    if len({t.sql() for t in normalized}) == 1:
-        return normalized[0]
-    if all(t.this == exp.DType.ARRAY for t in normalized):
-        elem_types = [
-            t.expressions[0] if t.expressions else exp.DataType(this=exp.DType.UNKNOWN)
-            for t in normalized
-        ]
-        common_elem = _common_array_element_type(elem_types)
-        elem_dt = (
-            common_elem if isinstance(common_elem, exp.DataType) else exp.DataType(this=common_elem)
-        )
-        return exp.DataType(this=exp.DType.ARRAY, expressions=[elem_dt], nested=True)
-    if any(t.this == exp.DType.TEXT for t in normalized):
-        return exp.DType.TEXT
-    return exp.DType.UNKNOWN
-
-
 def _annotate_by_similar_args(self: TypeAnnotator, expression: E, *arg_keys: str) -> E:
     """
     Type inference for CONCAT-family expressions (CONCAT, LPAD, RPAD).
 
-    - TEXT-before-UNKNOWN is load-bearing: a known text arg forces a text
-      result, since the query either coerces the unknown to string or fails
-      entirely — no valid execution produces a non-text result.
-    - TEXT_TYPES on input narrows to DType.TEXT on output: CONCAT/LPAD
-      accept any TEXT_TYPES member (VARCHAR/CHAR/NCHAR/NVARCHAR/NAME) as
-      input, but Spark always emits DType.TEXT.
+    - All-BINARY → BINARY (the binary overload).
+    - Otherwise, if any arg has a known, non-array, non-binary type → STRING.
+      Spark coerces scalars (dates, ints, etc.) to string when mixed with a
+      string-resolving arg. The binary exclusion preserves the binary+unknown
+      case as UNKNOWN: Spark can't disambiguate the string vs. binary overload
+      there.
+    - Else → UNKNOWN. Covers all-unknown, binary+unknown, and anything
+      involving arrays (array handling is intentionally out of scope here).
     """
     arg_exprs: list[exp.Expression] = []
     for key in arg_keys:
         arg_exprs.extend(e for e in ensure_list(expression.args.get(key)) if e)
 
-    result: exp.DataType | exp.DType
-    if any(e.is_type(*exp.DataType.TEXT_TYPES) for e in arg_exprs):
+    if arg_exprs and all(e.is_type(exp.DType.BINARY) for e in arg_exprs):
+        result: exp.DataType | exp.DType = exp.DType.BINARY
+    elif any(
+        e.type is not None and not e.is_type(exp.DType.UNKNOWN, exp.DType.ARRAY, exp.DType.BINARY)
+        for e in arg_exprs
+    ):
         result = exp.DType.TEXT
-    elif any(e.is_type(exp.DType.UNKNOWN) for e in arg_exprs):
-        result = exp.DType.UNKNOWN
-    elif all(e.is_type(exp.DType.BINARY) for e in arg_exprs):
-        result = exp.DType.BINARY
-    elif any(e.is_type(exp.DType.ARRAY) for e in arg_exprs):
-        result = _common_array_element_type([e.type for e in arg_exprs])
     else:
-        result = exp.DType.TEXT
+        result = exp.DType.UNKNOWN
 
     self._set_type(expression, result)
     return expression
diff --git a/tests/fixtures/optimizer/annotate_functions.sql b/tests/fixtures/optimizer/annotate_functions.sql
index 6288890b62..d71e13bc1a 100644
--- a/tests/fixtures/optimizer/annotate_functions.sql
+++ b/tests/fixtures/optimizer/annotate_functions.sql
@@ -269,14 +269,6 @@ STRING;
 CONCAT('x', tbl.bin_col);
 STRING;
 
-# dialect: spark2, spark, databricks
-CONCAT(array('a', 'b'), array(1, 2));
-ARRAY<STRING>;
-
-# dialect: spark2, spark, databricks
-CONCAT(array(array('a')), array(array(1)));
-ARRAY<ARRAY<STRING>>;
-
 # dialect: spark2, spark, databricks
 CONCAT(tbl.date_col, tbl.int_col);
 STRING;