From f73e26f5889716b99d7cb4b1c11646a04a748e91 Mon Sep 17 00:00:00 2001 From: Esvanth Date: Fri, 3 Apr 2026 01:48:08 +0100 Subject: [PATCH 01/12] Fix #15627: Add profiler metric support for complex data types (json, array, geo) --- .../profiler/metrics/static/max_length.py | 21 ++++++- .../profiler/metrics/static/min_length.py | 21 ++++++- .../src/metadata/profiler/orm/registry.py | 56 ++++++++++++++----- 3 files changed, 77 insertions(+), 21 deletions(-) diff --git a/ingestion/src/metadata/profiler/metrics/static/max_length.py b/ingestion/src/metadata/profiler/metrics/static/max_length.py index 7f374d4537ee..d88bd2ff5c2c 100644 --- a/ingestion/src/metadata/profiler/metrics/static/max_length.py +++ b/ingestion/src/metadata/profiler/metrics/static/max_length.py @@ -23,7 +23,12 @@ from metadata.profiler.metrics.core import StaticMetric, _label from metadata.profiler.metrics.pandas_metric_protocol import PandasComputation from metadata.profiler.orm.functions.length import LenFn -from metadata.profiler.orm.registry import is_concatenable +from metadata.profiler.orm.registry import ( + is_concatenable, + is_collection, + is_struct, + is_complex, +) from metadata.utils.logger import profiler_logger if TYPE_CHECKING: @@ -59,7 +64,12 @@ def _is_concatenable(self): @_label def fn(self): """sqlalchemy function""" - if self._is_concatenable(): + if ( + self._is_concatenable() + or is_collection(self.col.type) + or is_struct(self.col.type) + or is_complex(self.col.type) + ): return func.max(LenFn(column(self.col.name, self.col.type))) logger.debug( @@ -106,7 +116,12 @@ def update_accumulator( length_vectorize_func = vectorize(len) chunk_max = None - if is_concatenable(column.type): + if ( + is_concatenable(column.type) + or is_collection(column.type) + or is_struct(column.type) + or is_complex(column.type) + ): max_val = length_vectorize_func(df[column.name].dropna().astype(str)).max() if not pd.isnull(max_val): chunk_max = max_val diff --git a/ingestion/src/metadata/profiler/metrics/static/min_length.py b/ingestion/src/metadata/profiler/metrics/static/min_length.py index 5e5888ce71d3..e93009c5912d 100644 --- a/ingestion/src/metadata/profiler/metrics/static/min_length.py +++ b/ingestion/src/metadata/profiler/metrics/static/min_length.py @@ -23,7 +23,12 @@ from metadata.profiler.metrics.core import StaticMetric, _label from metadata.profiler.metrics.pandas_metric_protocol import PandasComputation from metadata.profiler.orm.functions.length import LenFn -from metadata.profiler.orm.registry import is_concatenable +from metadata.profiler.orm.registry import ( + is_concatenable, + is_collection, + is_struct, + is_complex, +) from metadata.utils.logger import profiler_logger if TYPE_CHECKING: @@ -59,7 +64,12 @@ def _is_concatenable(self): @_label def fn(self): """sqlalchemy function""" - if self._is_concatenable(): + if ( + self._is_concatenable() + or is_collection(self.col.type) + or is_struct(self.col.type) + or is_complex(self.col.type) + ): return func.min(LenFn(column(self.col.name, self.col.type))) logger.debug( @@ -106,7 +116,12 @@ def update_accumulator( length_vectorize_func = vectorize(len) chunk_min = None - if is_concatenable(column.type): + if ( + is_concatenable(column.type) + or is_collection(column.type) + or is_struct(column.type) + or is_complex(column.type) + ): min_val = length_vectorize_func(df[column.name].dropna().astype(str)).min() if not pd.isnull(min_val): chunk_min = min_val diff --git a/ingestion/src/metadata/profiler/orm/registry.py b/ingestion/src/metadata/profiler/orm/registry.py index c6da40f10e1b..c8a8c237d678 100644 --- a/ingestion/src/metadata/profiler/orm/registry.py +++ b/ingestion/src/metadata/profiler/orm/registry.py @@ -116,23 +116,8 @@ class Dialects(metaclass=EnumAdapter): pass -# Sometimes we want to skip certain types for computing metrics. -# If the type is NULL, then we won't run the metric execution -# in the profiler. -# Note that not mapped types are set to NULL by default. NOT_COMPUTE = { sqlalchemy.types.NullType.__name__, - sqlalchemy.ARRAY.__name__, - sqlalchemy.JSON.__name__, - sqa_types.SQAMap.__name__, - sqa_types.SQAStruct.__name__, - sqa_types.SQASet.__name__, - sqa_types.SQAUnion.__name__, - sqa_types.SQASGeography.__name__, - DataType.GEOMETRY.value, - DataType.ARRAY.value, - DataType.JSON.value, - CustomTypes.ARRAY.value.__name__, CustomTypes.SQADATETIMERANGE.value.__name__, DataType.XML.value, CustomTypes.UNDETERMINED.value.__name__, @@ -154,6 +139,26 @@ class Dialects(metaclass=EnumAdapter): CONCATENABLE_SET = {DataType.STRING.value, DataType.TEXT.value} +COLLECTION_SET = { + sqlalchemy.ARRAY.__name__, + DataType.ARRAY.value, + CustomTypes.ARRAY.value.__name__, +} + +STRUCT_SET = { + sqlalchemy.JSON.__name__, + sqa_types.SQAMap.__name__, + sqa_types.SQAStruct.__name__, + sqa_types.SQASet.__name__, + sqa_types.SQAUnion.__name__, + DataType.JSON.value, +} + +COMPLEX_SET = { + sqa_types.SQASGeography.__name__, + DataType.GEOMETRY.value, +} + # Now, let's define some helper methods to identify # the nature of an SQLAlchemy type @@ -237,3 +242,24 @@ def is_blob(_type) -> bool: DataType.TEXT.value, } return isinstance(_type, (HexByteString, LargeBinary, Text)) + + +def is_collection(_type) -> bool: + """Check if the type is a collection, e.g. Array""" + if isinstance(_type, DataType): + return _type.value in COLLECTION_SET + return _type.__class__.__name__ in COLLECTION_SET + + +def is_struct(_type) -> bool: + """Check if the type is a struct, e.g. JSON, Map, Struct""" + if isinstance(_type, DataType): + return _type.value in STRUCT_SET + return _type.__class__.__name__ in STRUCT_SET + + +def is_complex(_type) -> bool: + """Check if the type is complex, e.g. Geography or Geometry""" + if isinstance(_type, DataType): + return _type.value in COMPLEX_SET + return _type.__class__.__name__ in COMPLEX_SET From 235093afa38613748e3bfdb14dc2798fd7cffc33 Mon Sep 17 00:00:00 2001 From: Esvanth Date: Fri, 3 Apr 2026 02:13:04 +0100 Subject: [PATCH 02/12] Refactor: Add is_complex_type helper and secure LenFn dialect evaluation --- .../profiler/metrics/static/max_length.py | 18 +++--------------- .../profiler/metrics/static/min_length.py | 18 +++--------------- .../src/metadata/profiler/orm/registry.py | 5 +++++ 3 files changed, 11 insertions(+), 30 deletions(-) diff --git a/ingestion/src/metadata/profiler/metrics/static/max_length.py b/ingestion/src/metadata/profiler/metrics/static/max_length.py index d88bd2ff5c2c..0194803ffac5 100644 --- a/ingestion/src/metadata/profiler/metrics/static/max_length.py +++ b/ingestion/src/metadata/profiler/metrics/static/max_length.py @@ -25,9 +25,7 @@ from metadata.profiler.orm.functions.length import LenFn from metadata.profiler.orm.registry import ( is_concatenable, - is_collection, - is_struct, - is_complex, + is_complex_type, ) from metadata.utils.logger import profiler_logger @@ -64,12 +62,7 @@ def _is_concatenable(self): @_label def fn(self): """sqlalchemy function""" - if ( - self._is_concatenable() - or is_collection(self.col.type) - or is_struct(self.col.type) - or is_complex(self.col.type) - ): + if self._is_concatenable(): return func.max(LenFn(column(self.col.name, self.col.type))) logger.debug( @@ -116,12 +109,7 @@ def update_accumulator( length_vectorize_func = vectorize(len) chunk_max = None - if ( - is_concatenable(column.type) - or is_collection(column.type) - or is_struct(column.type) - or is_complex(column.type) - ): + if is_concatenable(column.type) or is_complex_type(column.type): max_val = length_vectorize_func(df[column.name].dropna().astype(str)).max() if not pd.isnull(max_val): chunk_max = max_val diff --git a/ingestion/src/metadata/profiler/metrics/static/min_length.py b/ingestion/src/metadata/profiler/metrics/static/min_length.py index e93009c5912d..672e84b2b207 100644 --- a/ingestion/src/metadata/profiler/metrics/static/min_length.py +++ b/ingestion/src/metadata/profiler/metrics/static/min_length.py @@ -25,9 +25,7 @@ from metadata.profiler.orm.functions.length import LenFn from metadata.profiler.orm.registry import ( is_concatenable, - is_collection, - is_struct, - is_complex, + is_complex_type, ) from metadata.utils.logger import profiler_logger @@ -64,12 +62,7 @@ def _is_concatenable(self): @_label def fn(self): """sqlalchemy function""" - if ( - self._is_concatenable() - or is_collection(self.col.type) - or is_struct(self.col.type) - or is_complex(self.col.type) - ): + if self._is_concatenable(): return func.min(LenFn(column(self.col.name, self.col.type))) logger.debug( @@ -116,12 +109,7 @@ def update_accumulator( length_vectorize_func = vectorize(len) chunk_min = None - if ( - is_concatenable(column.type) - or is_collection(column.type) - or is_struct(column.type) - or is_complex(column.type) - ): + if is_concatenable(column.type) or is_complex_type(column.type): min_val = length_vectorize_func(df[column.name].dropna().astype(str)).min() if not pd.isnull(min_val): chunk_min = min_val diff --git a/ingestion/src/metadata/profiler/orm/registry.py b/ingestion/src/metadata/profiler/orm/registry.py index c8a8c237d678..bfa9583d883e 100644 --- a/ingestion/src/metadata/profiler/orm/registry.py +++ b/ingestion/src/metadata/profiler/orm/registry.py @@ -263,3 +263,8 @@ def is_complex(_type) -> bool: if isinstance(_type, DataType): return _type.value in COMPLEX_SET return _type.__class__.__name__ in COMPLEX_SET + + +def is_complex_type(_type) -> bool: + """Helper method to group collections, structs, and complex types""" + return is_collection(_type) or is_struct(_type) or is_complex(_type) From 4fcced2b38d1c944b6737223ca48b07d4efc08c1 Mon Sep 17 00:00:00 2001 From: Esvanth Date: Fri, 3 Apr 2026 02:21:18 +0100 Subject: [PATCH 03/12] Fix: Guard distinct and unique counts against complex types and fix isort --- .../src/metadata/profiler/metrics/static/distinct_count.py | 3 +++ .../src/metadata/profiler/metrics/static/max_length.py | 2 +- .../src/metadata/profiler/metrics/static/min_length.py | 2 +- .../src/metadata/profiler/metrics/static/unique_count.py | 6 ++++-- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/ingestion/src/metadata/profiler/metrics/static/distinct_count.py b/ingestion/src/metadata/profiler/metrics/static/distinct_count.py index b6ae7942b2ca..d8a9cc3b4a7b 100644 --- a/ingestion/src/metadata/profiler/metrics/static/distinct_count.py +++ b/ingestion/src/metadata/profiler/metrics/static/distinct_count.py @@ -25,6 +25,7 @@ from metadata.generated.schema.configuration.profilerConfiguration import MetricType from metadata.profiler.metrics.core import StaticMetric, _label from metadata.profiler.orm.functions.count import CountFn +from metadata.profiler.orm.registry import is_complex_type from metadata.utils.logger import profiler_logger logger = profiler_logger() @@ -52,6 +53,8 @@ def fn(self): """ Distinct Count metric for Sqlalchemy connectors """ + if is_complex_type(self.col.type): + return None return func.count(distinct(CountFn(column(self.col.name, self.col.type)))) def df_fn(self, dfs: Optional["PandasRunner"] = None): diff --git a/ingestion/src/metadata/profiler/metrics/static/max_length.py b/ingestion/src/metadata/profiler/metrics/static/max_length.py index 0194803ffac5..389575f0f163 100644 --- a/ingestion/src/metadata/profiler/metrics/static/max_length.py +++ b/ingestion/src/metadata/profiler/metrics/static/max_length.py @@ -24,8 +24,8 @@ from metadata.profiler.metrics.pandas_metric_protocol import PandasComputation from metadata.profiler.orm.functions.length import LenFn from metadata.profiler.orm.registry import ( - is_concatenable, is_complex_type, + is_concatenable, ) from metadata.utils.logger import profiler_logger diff --git a/ingestion/src/metadata/profiler/metrics/static/min_length.py b/ingestion/src/metadata/profiler/metrics/static/min_length.py index 672e84b2b207..d5a477f074ae 100644 --- a/ingestion/src/metadata/profiler/metrics/static/min_length.py +++ b/ingestion/src/metadata/profiler/metrics/static/min_length.py @@ -24,8 +24,8 @@ from metadata.profiler.metrics.pandas_metric_protocol import PandasComputation from metadata.profiler.orm.functions.length import LenFn from metadata.profiler.orm.registry import ( - is_concatenable, is_complex_type, + is_concatenable, ) from metadata.utils.logger import profiler_logger diff --git a/ingestion/src/metadata/profiler/metrics/static/unique_count.py b/ingestion/src/metadata/profiler/metrics/static/unique_count.py index 1f646145c91d..a6ef788c487c 100644 --- a/ingestion/src/metadata/profiler/metrics/static/unique_count.py +++ b/ingestion/src/metadata/profiler/metrics/static/unique_count.py @@ -23,7 +23,7 @@ from metadata.profiler.metrics.core import QueryMetric from metadata.profiler.metrics.pandas_metric_protocol import PandasComputation from metadata.profiler.orm.functions.unique_count import _unique_count_query_mapper -from metadata.profiler.orm.registry import NOT_COMPUTE, Dialects +from metadata.profiler.orm.registry import NOT_COMPUTE, Dialects, is_complex_type from metadata.utils.logger import profiler_logger if TYPE_CHECKING: @@ -60,7 +60,9 @@ def query(self, sample: Optional[type], session: Optional[Session] = None): "We are missing the session attribute to compute the UniqueCount." ) - if self.col.type.__class__.__name__ in NOT_COMPUTE: + if self.col.type.__class__.__name__ in NOT_COMPUTE or is_complex_type( + self.col.type + ): return None # Run all queries on top of the sampled data From 6029140ce88fccd3f9f524abfb952d7b948a129f Mon Sep 17 00:00:00 2001 From: Esvanth Date: Fri, 3 Apr 2026 03:02:48 +0100 Subject: [PATCH 04/12] Style: fix python checkstyle formatting --- ingestion/src/metadata/profiler/metrics/static/column_count.py | 1 + ingestion/src/metadata/profiler/metrics/static/column_names.py | 1 + ingestion/src/metadata/profiler/metrics/static/count.py | 1 + ingestion/src/metadata/profiler/metrics/static/count_in_set.py | 1 + ingestion/src/metadata/profiler/metrics/static/distinct_count.py | 1 + ingestion/src/metadata/profiler/metrics/static/ilike_count.py | 1 + ingestion/src/metadata/profiler/metrics/static/like_count.py | 1 + ingestion/src/metadata/profiler/metrics/static/max.py | 1 + ingestion/src/metadata/profiler/metrics/static/max_length.py | 1 + ingestion/src/metadata/profiler/metrics/static/mean.py | 1 + ingestion/src/metadata/profiler/metrics/static/min.py | 1 + ingestion/src/metadata/profiler/metrics/static/min_length.py | 1 + ingestion/src/metadata/profiler/metrics/static/not_like_count.py | 1 + .../metadata/profiler/metrics/static/not_regexp_match_count.py | 1 + ingestion/src/metadata/profiler/metrics/static/null_count.py | 1 + .../src/metadata/profiler/metrics/static/null_missing_count.py | 1 + .../src/metadata/profiler/metrics/static/regexp_match_count.py | 1 + ingestion/src/metadata/profiler/metrics/static/row_count.py | 1 + ingestion/src/metadata/profiler/metrics/static/sum.py | 1 + ingestion/src/metadata/profiler/metrics/static/unique_count.py | 1 + 20 files changed, 20 insertions(+) diff --git a/ingestion/src/metadata/profiler/metrics/static/column_count.py b/ingestion/src/metadata/profiler/metrics/static/column_count.py index a5e4bc461335..88cc525a7d2f 100644 --- a/ingestion/src/metadata/profiler/metrics/static/column_count.py +++ b/ingestion/src/metadata/profiler/metrics/static/column_count.py @@ -12,6 +12,7 @@ """ Table Column Count Metric definition """ + # pylint: disable=duplicate-code from typing import TYPE_CHECKING, Optional diff --git a/ingestion/src/metadata/profiler/metrics/static/column_names.py b/ingestion/src/metadata/profiler/metrics/static/column_names.py index 00568a68e2dd..6c085e7cbe4f 100644 --- a/ingestion/src/metadata/profiler/metrics/static/column_names.py +++ b/ingestion/src/metadata/profiler/metrics/static/column_names.py @@ -12,6 +12,7 @@ """ Table Column Count Metric definition """ + # pylint: disable=duplicate-code from typing import TYPE_CHECKING, Optional diff --git a/ingestion/src/metadata/profiler/metrics/static/count.py b/ingestion/src/metadata/profiler/metrics/static/count.py index f30c1b170143..8adbe4335ef0 100644 --- a/ingestion/src/metadata/profiler/metrics/static/count.py +++ b/ingestion/src/metadata/profiler/metrics/static/count.py @@ -12,6 +12,7 @@ """ Count Metric definition """ + # pylint: disable=duplicate-code import traceback diff --git a/ingestion/src/metadata/profiler/metrics/static/count_in_set.py b/ingestion/src/metadata/profiler/metrics/static/count_in_set.py index da2955201355..37e927584c95 100644 --- a/ingestion/src/metadata/profiler/metrics/static/count_in_set.py +++ b/ingestion/src/metadata/profiler/metrics/static/count_in_set.py @@ -12,6 +12,7 @@ """ CountInSet Metric definition """ + # pylint: disable=duplicate-code import traceback from typing import TYPE_CHECKING, List, Optional diff --git a/ingestion/src/metadata/profiler/metrics/static/distinct_count.py b/ingestion/src/metadata/profiler/metrics/static/distinct_count.py index d8a9cc3b4a7b..e05ed6f56ada 100644 --- a/ingestion/src/metadata/profiler/metrics/static/distinct_count.py +++ b/ingestion/src/metadata/profiler/metrics/static/distinct_count.py @@ -12,6 +12,7 @@ """ Distinct Count Metric definition """ + # pylint: disable=duplicate-code import json diff --git a/ingestion/src/metadata/profiler/metrics/static/ilike_count.py b/ingestion/src/metadata/profiler/metrics/static/ilike_count.py index 9e3237ad33eb..41147e12b12d 100644 --- a/ingestion/src/metadata/profiler/metrics/static/ilike_count.py +++ b/ingestion/src/metadata/profiler/metrics/static/ilike_count.py @@ -12,6 +12,7 @@ """ ILIKE Count Metric definition """ + # pylint: disable=duplicate-code from sqlalchemy import case, column diff --git a/ingestion/src/metadata/profiler/metrics/static/like_count.py b/ingestion/src/metadata/profiler/metrics/static/like_count.py index 87d05ed1119f..11d104fe5f18 100644 --- a/ingestion/src/metadata/profiler/metrics/static/like_count.py +++ b/ingestion/src/metadata/profiler/metrics/static/like_count.py @@ -12,6 +12,7 @@ """ Like Count Metric definition """ + # pylint: disable=duplicate-code from sqlalchemy import case, column diff --git a/ingestion/src/metadata/profiler/metrics/static/max.py b/ingestion/src/metadata/profiler/metrics/static/max.py index 235555cb9fcd..52e6fa2d7f46 100644 --- a/ingestion/src/metadata/profiler/metrics/static/max.py +++ b/ingestion/src/metadata/profiler/metrics/static/max.py @@ -12,6 +12,7 @@ """ Max Metric definition """ + from functools import partial from typing import TYPE_CHECKING, Callable, Optional diff --git a/ingestion/src/metadata/profiler/metrics/static/max_length.py b/ingestion/src/metadata/profiler/metrics/static/max_length.py index 389575f0f163..3c48bd2c998e 100644 --- a/ingestion/src/metadata/profiler/metrics/static/max_length.py +++ b/ingestion/src/metadata/profiler/metrics/static/max_length.py @@ -12,6 +12,7 @@ """ MAX_LENGTH Metric definition """ + # pylint: disable=duplicate-code diff --git a/ingestion/src/metadata/profiler/metrics/static/mean.py b/ingestion/src/metadata/profiler/metrics/static/mean.py index 33393a2d54b3..195ca9605e2f 100644 --- a/ingestion/src/metadata/profiler/metrics/static/mean.py +++ b/ingestion/src/metadata/profiler/metrics/static/mean.py @@ -12,6 +12,7 @@ """ AVG Metric definition """ + from functools import partial from typing import TYPE_CHECKING, Callable, NamedTuple, Optional diff --git a/ingestion/src/metadata/profiler/metrics/static/min.py b/ingestion/src/metadata/profiler/metrics/static/min.py index f831cd212e95..ad69cd4fff81 100644 --- a/ingestion/src/metadata/profiler/metrics/static/min.py +++ b/ingestion/src/metadata/profiler/metrics/static/min.py @@ -12,6 +12,7 @@ """ Min Metric definition """ + from functools import partial from typing import TYPE_CHECKING, Callable, Optional diff --git a/ingestion/src/metadata/profiler/metrics/static/min_length.py b/ingestion/src/metadata/profiler/metrics/static/min_length.py index d5a477f074ae..4a5e6ff24705 100644 --- a/ingestion/src/metadata/profiler/metrics/static/min_length.py +++ b/ingestion/src/metadata/profiler/metrics/static/min_length.py @@ -12,6 +12,7 @@ """ MIN_LENGTH Metric definition """ + # pylint: disable=duplicate-code diff --git a/ingestion/src/metadata/profiler/metrics/static/not_like_count.py b/ingestion/src/metadata/profiler/metrics/static/not_like_count.py index eadec9cc4274..611a60e8f7a5 100644 --- a/ingestion/src/metadata/profiler/metrics/static/not_like_count.py +++ b/ingestion/src/metadata/profiler/metrics/static/not_like_count.py @@ -12,6 +12,7 @@ """ Like Count Metric definition """ + # pylint: disable=duplicate-code from sqlalchemy import case, column diff --git a/ingestion/src/metadata/profiler/metrics/static/not_regexp_match_count.py b/ingestion/src/metadata/profiler/metrics/static/not_regexp_match_count.py index 8e4b7e33d9d2..2f037f7c80b4 100644 --- a/ingestion/src/metadata/profiler/metrics/static/not_regexp_match_count.py +++ b/ingestion/src/metadata/profiler/metrics/static/not_regexp_match_count.py @@ -12,6 +12,7 @@ """ Regex Count Metric definition """ + # pylint: disable=duplicate-code import traceback diff --git a/ingestion/src/metadata/profiler/metrics/static/null_count.py b/ingestion/src/metadata/profiler/metrics/static/null_count.py index 71f97a60e1e1..4e48c5996a4b 100644 --- a/ingestion/src/metadata/profiler/metrics/static/null_count.py +++ b/ingestion/src/metadata/profiler/metrics/static/null_count.py @@ -12,6 +12,7 @@ """ Null Count Metric definition """ + # pylint: disable=duplicate-code from typing import TYPE_CHECKING, Optional diff --git a/ingestion/src/metadata/profiler/metrics/static/null_missing_count.py b/ingestion/src/metadata/profiler/metrics/static/null_missing_count.py index 8b19638a3dd0..de316b074c82 100644 --- a/ingestion/src/metadata/profiler/metrics/static/null_missing_count.py +++ b/ingestion/src/metadata/profiler/metrics/static/null_missing_count.py @@ -12,6 +12,7 @@ """ Null Count Metric definition """ + # pylint: disable=duplicate-code from typing import TYPE_CHECKING, Optional diff --git a/ingestion/src/metadata/profiler/metrics/static/regexp_match_count.py b/ingestion/src/metadata/profiler/metrics/static/regexp_match_count.py index e4ab3c54796b..0b14dc8b2d81 100644 --- a/ingestion/src/metadata/profiler/metrics/static/regexp_match_count.py +++ b/ingestion/src/metadata/profiler/metrics/static/regexp_match_count.py @@ -12,6 +12,7 @@ """ Regex Count Metric definition """ + # pylint: disable=duplicate-code import traceback diff --git a/ingestion/src/metadata/profiler/metrics/static/row_count.py b/ingestion/src/metadata/profiler/metrics/static/row_count.py index d16311c1a8e5..b028a62e6852 100644 --- a/ingestion/src/metadata/profiler/metrics/static/row_count.py +++ b/ingestion/src/metadata/profiler/metrics/static/row_count.py @@ -12,6 +12,7 @@ """ Table Count Metric definition """ + from typing import TYPE_CHECKING, Callable, Optional from sqlalchemy import func diff --git a/ingestion/src/metadata/profiler/metrics/static/sum.py b/ingestion/src/metadata/profiler/metrics/static/sum.py index c4d98ff7bcfe..9280f6321cc6 100644 --- a/ingestion/src/metadata/profiler/metrics/static/sum.py +++ b/ingestion/src/metadata/profiler/metrics/static/sum.py @@ -12,6 +12,7 @@ """ SUM Metric definition """ + from functools import partial from typing import TYPE_CHECKING, Callable, Optional diff --git a/ingestion/src/metadata/profiler/metrics/static/unique_count.py b/ingestion/src/metadata/profiler/metrics/static/unique_count.py index a6ef788c487c..a2b048887c6c 100644 --- a/ingestion/src/metadata/profiler/metrics/static/unique_count.py +++ b/ingestion/src/metadata/profiler/metrics/static/unique_count.py @@ -12,6 +12,7 @@ """ Unique Count Metric definition """ + import json from collections import Counter from typing import TYPE_CHECKING, Optional From 62a1e9e656b1ca758bce293105441a9b62adee1d Mon Sep 17 00:00:00 2001 From: Esvanth Date: Fri, 3 Apr 2026 03:07:51 +0100 Subject: [PATCH 05/12] Fix: Guard string-pattern metrics against complex types --- ingestion/src/metadata/profiler/metrics/static/ilike_count.py | 3 +++ ingestion/src/metadata/profiler/metrics/static/like_count.py | 3 +++ .../src/metadata/profiler/metrics/static/not_like_count.py | 3 +++ .../profiler/metrics/static/not_regexp_match_count.py | 4 +++- .../metadata/profiler/metrics/static/regexp_match_count.py | 4 +++- 5 files changed, 15 insertions(+), 2 deletions(-) diff --git a/ingestion/src/metadata/profiler/metrics/static/ilike_count.py b/ingestion/src/metadata/profiler/metrics/static/ilike_count.py index 41147e12b12d..3d9bebc673ff 100644 --- a/ingestion/src/metadata/profiler/metrics/static/ilike_count.py +++ b/ingestion/src/metadata/profiler/metrics/static/ilike_count.py @@ -20,6 +20,7 @@ from metadata.generated.schema.configuration.profilerConfiguration import MetricType from metadata.profiler.metrics.core import StaticMetric, _label from metadata.profiler.orm.functions.sum import SumFn +from metadata.profiler.orm.registry import is_complex_type class ILikeCount(StaticMetric): @@ -47,6 +48,8 @@ def metric_type(self): @_label def fn(self): + if is_complex_type(self.col.type): + return None if not hasattr(self, "expression"): raise AttributeError( "ILike Count requires an expression to be set: add_props(expression=...)(Metrics.iLikeCount)" diff --git a/ingestion/src/metadata/profiler/metrics/static/like_count.py b/ingestion/src/metadata/profiler/metrics/static/like_count.py index 11d104fe5f18..b42f28da1852 100644 --- a/ingestion/src/metadata/profiler/metrics/static/like_count.py +++ b/ingestion/src/metadata/profiler/metrics/static/like_count.py @@ -20,6 +20,7 @@ from metadata.generated.schema.configuration.profilerConfiguration import MetricType from metadata.profiler.metrics.core import StaticMetric, _label from metadata.profiler.orm.functions.sum import SumFn +from metadata.profiler.orm.registry import is_complex_type class LikeCount(StaticMetric): @@ -47,6 +48,8 @@ def metric_type(self): @_label def fn(self): + if is_complex_type(self.col.type): + return None if not hasattr(self, "expression"): raise AttributeError( "Like Count requires an expression to be set: add_props(expression=...)(Metrics.likeCount)" diff --git a/ingestion/src/metadata/profiler/metrics/static/not_like_count.py b/ingestion/src/metadata/profiler/metrics/static/not_like_count.py index 611a60e8f7a5..470f7233eacd 100644 --- a/ingestion/src/metadata/profiler/metrics/static/not_like_count.py +++ b/ingestion/src/metadata/profiler/metrics/static/not_like_count.py @@ -20,6 +20,7 @@ from metadata.generated.schema.configuration.profilerConfiguration import MetricType from metadata.profiler.metrics.core import StaticMetric, _label from metadata.profiler.orm.functions.sum import SumFn +from metadata.profiler.orm.registry import is_complex_type class NotLikeCount(StaticMetric): @@ -47,6 +48,8 @@ def metric_type(self): @_label def fn(self): + if is_complex_type(self.col.type): + return None if not hasattr(self, "expression"): raise AttributeError( "Not Like Count requires an expression to be set: add_props(expression=...)(Metrics.notLikeCount)" diff --git a/ingestion/src/metadata/profiler/metrics/static/not_regexp_match_count.py b/ingestion/src/metadata/profiler/metrics/static/not_regexp_match_count.py index 2f037f7c80b4..88c4b6a7ba99 100644 --- a/ingestion/src/metadata/profiler/metrics/static/not_regexp_match_count.py +++ b/ingestion/src/metadata/profiler/metrics/static/not_regexp_match_count.py @@ -25,7 +25,7 @@ from metadata.profiler.metrics.pandas_metric_protocol import PandasComputation from metadata.profiler.orm.functions.regexp import RegexpMatchFn from metadata.profiler.orm.functions.sum import SumFn -from metadata.profiler.orm.registry import is_concatenable +from metadata.profiler.orm.registry import is_complex_type, is_concatenable from metadata.utils.logger import profiler_logger if TYPE_CHECKING: @@ -65,6 +65,8 @@ def _is_concatenable(self): @_label def fn(self): """sqlalchemy function""" + if is_complex_type(self.col.type): + return None if not hasattr(self, "expression"): raise AttributeError( "Not Regex Count requires an expression to be set: add_props(expression=...)(Metrics.notRegexCount)" diff --git a/ingestion/src/metadata/profiler/metrics/static/regexp_match_count.py b/ingestion/src/metadata/profiler/metrics/static/regexp_match_count.py index 0b14dc8b2d81..78c715d3aace 100644 --- a/ingestion/src/metadata/profiler/metrics/static/regexp_match_count.py +++ b/ingestion/src/metadata/profiler/metrics/static/regexp_match_count.py @@ -25,7 +25,7 @@ from metadata.profiler.metrics.pandas_metric_protocol import PandasComputation from metadata.profiler.orm.functions.regexp import RegexpMatchFn from metadata.profiler.orm.functions.sum import SumFn -from metadata.profiler.orm.registry import is_concatenable +from metadata.profiler.orm.registry import is_complex_type, is_concatenable from metadata.utils.logger import profiler_logger if TYPE_CHECKING: @@ -65,6 +65,8 @@ def _is_concatenable(self): @_label def fn(self): """sqlalchemy function""" + if is_complex_type(self.col.type): + return None if not hasattr(self, "expression"): raise AttributeError( "Regex Count requires an expression to be set: add_props(expression=...)(Metrics.regexCount)" From 86773f977d03033747ad8f53b3b5ebf56d3bc028 Mon Sep 17 00:00:00 2001 From: Esvanth Date: Fri, 3 Apr 2026 03:21:43 +0100 Subject: [PATCH 06/12] Style: disable pylint import-outside-toplevel in accumulator --- ingestion/src/metadata/profiler/metrics/static/max_length.py | 1 + ingestion/src/metadata/profiler/metrics/static/min_length.py | 1 + 2 files changed, 2 insertions(+) diff --git a/ingestion/src/metadata/profiler/metrics/static/max_length.py b/ingestion/src/metadata/profiler/metrics/static/max_length.py index 3c48bd2c998e..df49708733e8 100644 --- a/ingestion/src/metadata/profiler/metrics/static/max_length.py +++ b/ingestion/src/metadata/profiler/metrics/static/max_length.py @@ -104,6 +104,7 @@ def update_accumulator( current_max: Optional[int], df: "pd.DataFrame", column ) -> Optional[int]: """Computes one DataFrame chunk and updates the running maximum""" + # pylint: disable=import-outside-toplevel import pandas as pd from numpy import vectorize diff --git a/ingestion/src/metadata/profiler/metrics/static/min_length.py b/ingestion/src/metadata/profiler/metrics/static/min_length.py index 4a5e6ff24705..6592d8f3ed33 100644 --- a/ingestion/src/metadata/profiler/metrics/static/min_length.py +++ b/ingestion/src/metadata/profiler/metrics/static/min_length.py @@ -104,6 +104,7 @@ def update_accumulator( current_min: Optional[int], df: "pd.DataFrame", column ) -> Optional[int]: """Computes one DataFrame chunk and updates the running minimum""" + # pylint: disable=import-outside-toplevel import pandas as pd from numpy import vectorize From ef1a109b9f9c33d7fcb22521334f8a0e3eab5d13 Mon Sep 17 00:00:00 2001 From: Esvanth Date: Fri, 3 Apr 2026 08:19:00 +0100 Subject: [PATCH 07/12] Style: remove numpy from length metrics to satisfy checkstyle --- ingestion/src/metadata/profiler/metrics/static/max_length.py | 4 +--- ingestion/src/metadata/profiler/metrics/static/min_length.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/ingestion/src/metadata/profiler/metrics/static/max_length.py b/ingestion/src/metadata/profiler/metrics/static/max_length.py index df49708733e8..285d0d6caf4f 100644 --- a/ingestion/src/metadata/profiler/metrics/static/max_length.py +++ b/ingestion/src/metadata/profiler/metrics/static/max_length.py @@ -106,13 +106,11 @@ def update_accumulator( """Computes one DataFrame chunk and updates the running maximum""" # pylint: disable=import-outside-toplevel import pandas as pd - from numpy import vectorize - length_vectorize_func = vectorize(len) chunk_max = None if is_concatenable(column.type) or is_complex_type(column.type): - max_val = length_vectorize_func(df[column.name].dropna().astype(str)).max() + max_val = df[column.name].dropna().astype(str).str.len().max() if not pd.isnull(max_val): chunk_max = max_val diff --git a/ingestion/src/metadata/profiler/metrics/static/min_length.py b/ingestion/src/metadata/profiler/metrics/static/min_length.py index 6592d8f3ed33..7ca12a0d34ec 100644 --- a/ingestion/src/metadata/profiler/metrics/static/min_length.py +++ b/ingestion/src/metadata/profiler/metrics/static/min_length.py @@ -106,13 +106,11 @@ def update_accumulator( """Computes one DataFrame chunk and updates the running minimum""" # pylint: disable=import-outside-toplevel import pandas as pd - from numpy import vectorize - length_vectorize_func = vectorize(len) chunk_min = None if is_concatenable(column.type) or is_complex_type(column.type): - min_val = length_vectorize_func(df[column.name].dropna().astype(str)).min() + min_val = df[column.name].dropna().astype(str).str.len().min() if not pd.isnull(min_val): chunk_min = min_val From a4677e4b8e62cde21753eaeba3199739e275dcb1 Mon Sep 17 00:00:00 2001 From: Esvanth Date: Fri, 3 Apr 2026 08:30:08 +0100 Subject: [PATCH 08/12] Style: collapse import to single line to satisfy isort --- ingestion/src/metadata/profiler/metrics/static/max_length.py | 5 +---- ingestion/src/metadata/profiler/metrics/static/min_length.py | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/ingestion/src/metadata/profiler/metrics/static/max_length.py b/ingestion/src/metadata/profiler/metrics/static/max_length.py index 285d0d6caf4f..44790d907def 100644 --- a/ingestion/src/metadata/profiler/metrics/static/max_length.py +++ b/ingestion/src/metadata/profiler/metrics/static/max_length.py @@ -24,10 +24,7 @@ from metadata.profiler.metrics.core import StaticMetric, _label from metadata.profiler.metrics.pandas_metric_protocol import PandasComputation from metadata.profiler.orm.functions.length import LenFn -from metadata.profiler.orm.registry import ( - is_complex_type, - is_concatenable, -) +from metadata.profiler.orm.registry import is_complex_type, is_concatenable from metadata.utils.logger import profiler_logger if TYPE_CHECKING: diff --git a/ingestion/src/metadata/profiler/metrics/static/min_length.py b/ingestion/src/metadata/profiler/metrics/static/min_length.py index 7ca12a0d34ec..2f76df60eb30 100644 --- a/ingestion/src/metadata/profiler/metrics/static/min_length.py +++ b/ingestion/src/metadata/profiler/metrics/static/min_length.py @@ -24,10 +24,7 @@ from metadata.profiler.metrics.core import StaticMetric, _label from metadata.profiler.metrics.pandas_metric_protocol import PandasComputation from metadata.profiler.orm.functions.length import LenFn -from metadata.profiler.orm.registry import ( - is_complex_type, - is_concatenable, -) +from metadata.profiler.orm.registry import is_complex_type, is_concatenable from metadata.utils.logger import profiler_logger if TYPE_CHECKING: From 8328c0a78af20621db5106f1184005dbc104cd5c Mon Sep 17 00:00:00 2001 From: Esvanth Date: Fri, 3 Apr 2026 10:48:59 +0100 Subject: [PATCH 09/12] fix: resolve YAML parse error in docker-compose configs (#26775) --- docker/development/docker-compose-gcp.yml | 4 ++-- docker/development/docker-compose-postgres-fuseki.yml | 4 ++-- docker/development/docker-compose-postgres.yml | 4 ++-- docker/development/docker-compose.yml | 4 ++-- .../docker-compose-openmetadata.yml | 4 ++-- docker/docker-compose-quickstart/docker-compose-postgres.yml | 4 ++-- docker/docker-compose-quickstart/docker-compose-rdf.yml | 4 ++-- docker/docker-compose-quickstart/docker-compose.yml | 4 ++-- 8 files changed, 16 insertions(+), 16 deletions(-) diff --git a/docker/development/docker-compose-gcp.yml b/docker/development/docker-compose-gcp.yml index 392b51187a80..1c98f2aaf5ee 100644 --- a/docker/development/docker-compose-gcp.yml +++ b/docker/development/docker-compose-gcp.yml @@ -119,7 +119,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} + OIDC_CUSTOM_PARAMS: "{}" OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} @@ -333,7 +333,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} + OIDC_CUSTOM_PARAMS: "{}" OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} diff --git a/docker/development/docker-compose-postgres-fuseki.yml b/docker/development/docker-compose-postgres-fuseki.yml index 0a40ad473ebf..3a6779c3abee 100644 --- a/docker/development/docker-compose-postgres-fuseki.yml +++ b/docker/development/docker-compose-postgres-fuseki.yml @@ -119,7 +119,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} + OIDC_CUSTOM_PARAMS: "{}" OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} @@ -350,7 +350,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} + OIDC_CUSTOM_PARAMS: "{}" OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} diff --git a/docker/development/docker-compose-postgres.yml b/docker/development/docker-compose-postgres.yml index 23fe5b6e6b22..35b66f65774a 100644 --- a/docker/development/docker-compose-postgres.yml +++ b/docker/development/docker-compose-postgres.yml @@ -118,7 +118,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} + OIDC_CUSTOM_PARAMS: "{}" OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} @@ -345,7 +345,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} + OIDC_CUSTOM_PARAMS: "{}" OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} diff --git a/docker/development/docker-compose.yml b/docker/development/docker-compose.yml index c2ea751671ce..05c68b311358 100644 --- a/docker/development/docker-compose.yml +++ b/docker/development/docker-compose.yml @@ -115,7 +115,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} + OIDC_CUSTOM_PARAMS: "{}" OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} @@ -338,7 +338,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} + OIDC_CUSTOM_PARAMS: "{}" OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} diff --git a/docker/docker-compose-openmetadata/docker-compose-openmetadata.yml b/docker/docker-compose-openmetadata/docker-compose-openmetadata.yml index 82bd2fda554d..fc0ad5c338fc 100644 --- a/docker/docker-compose-openmetadata/docker-compose-openmetadata.yml +++ b/docker/docker-compose-openmetadata/docker-compose-openmetadata.yml @@ -60,7 +60,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} + OIDC_CUSTOM_PARAMS: "{}" OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} @@ -281,7 +281,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} + OIDC_CUSTOM_PARAMS: "{}" OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} diff --git a/docker/docker-compose-quickstart/docker-compose-postgres.yml b/docker/docker-compose-quickstart/docker-compose-postgres.yml index 8709585a7f40..b334d8eb3ab5 100644 --- a/docker/docker-compose-quickstart/docker-compose-postgres.yml +++ b/docker/docker-compose-quickstart/docker-compose-postgres.yml @@ -108,7 +108,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} + OIDC_CUSTOM_PARAMS: "{}" OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} @@ -329,7 +329,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} + OIDC_CUSTOM_PARAMS: "{}" OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} diff --git a/docker/docker-compose-quickstart/docker-compose-rdf.yml b/docker/docker-compose-quickstart/docker-compose-rdf.yml index 4b106eda07fc..745d733fede2 100644 --- a/docker/docker-compose-quickstart/docker-compose-rdf.yml +++ b/docker/docker-compose-quickstart/docker-compose-rdf.yml @@ -148,7 +148,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} + OIDC_CUSTOM_PARAMS: "{}" # Database configuration for MySQL DB_DRIVER_CLASS: ${DB_DRIVER_CLASS:-com.mysql.cj.jdbc.Driver} @@ -318,7 +318,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} + OIDC_CUSTOM_PARAMS: "{}" # Database configuration for MySQL DB_DRIVER_CLASS: ${DB_DRIVER_CLASS:-com.mysql.cj.jdbc.Driver} diff --git a/docker/docker-compose-quickstart/docker-compose.yml b/docker/docker-compose-quickstart/docker-compose.yml index df24da9b31fe..a71fe901418f 100644 --- a/docker/docker-compose-quickstart/docker-compose.yml +++ b/docker/docker-compose-quickstart/docker-compose.yml @@ -106,7 +106,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} + OIDC_CUSTOM_PARAMS: "{}" OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} @@ -327,7 +327,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} + OIDC_CUSTOM_PARAMS: "{}" OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} From fd6ad97595686ba89c61905fa68b8267530e1c1a Mon Sep 17 00:00:00 2001 From: Esvanth Date: Fri, 3 Apr 2026 10:58:44 +0100 Subject: [PATCH 10/12] fix: resolve YAML parse error and French translation issues (#26775, #25711) --- .../resources/ui/src/locale/languages/en-us.json | 4 +++- .../resources/ui/src/locale/languages/fr-fr.json | 2 ++ .../ui/src/pages/SignUp/BasicSignup.component.tsx | 12 ++++-------- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/en-us.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/en-us.json index d9d765bafef8..53d82b1f6dca 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/en-us.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/en-us.json @@ -851,6 +851,7 @@ "filtering-condition": "Filtering Condition", "filters-configuration": "Filters Configuration", "first": "First", + "first-name": "First Name", "first-lowercase": "first", "first-quartile": "First Quartile", "fit-to-screen": "Fit to screen", @@ -1098,6 +1099,7 @@ "large": "Large", "last": "Last", "last-14-days": "Last 14 days", + "last-name": "Last Name", "last-24-hours": "Last 24 hours", "last-30-days": "Last 30 days", "last-7-days": "Last 7 days", @@ -2602,8 +2604,8 @@ "field-timeout-description": "Connection Timeout", "field-use-aws-credentials-description": "Indicates whether to use AWS credentials when connecting to OpenSearch in AWS.", "field-use-ssl-description": "This indicates whether to use SSL when connecting to Elasticsearch. By default, we will ignore SSL settings.", - "field-value-updated-notification": "Your {{fieldName}} changed to {{fieldValue}}", "field-verify-certs-description": "This indicates whether to verify certificates when using SSL connection to Elasticsearch. It's ignored by default and is set to true. Ensure that you send the certificates in the property `CA Certificates`.", + "field-value-updated-notification": "Your {{fieldName}} changed to {{fieldValue}}", "file-format-size": "{{formats}} (max. {{size}})", "file-size-exceeded": "File size exceeds maximum allowed size of {{size}}", "filter-pattern-include-exclude-info": "Explicitly {{activity}} {{filterPattern}} by adding a list of comma-separated regex.", diff --git a/openmetadata-ui/src/main/resources/ui/src/locale/languages/fr-fr.json b/openmetadata-ui/src/main/resources/ui/src/locale/languages/fr-fr.json index 4c69290fb6a5..d8cc8aeab021 100644 --- a/openmetadata-ui/src/main/resources/ui/src/locale/languages/fr-fr.json +++ b/openmetadata-ui/src/main/resources/ui/src/locale/languages/fr-fr.json @@ -851,6 +851,7 @@ "filtering-condition": "Condition de Filtrage", "filters-configuration": "Configuration des filtres", "first": "Premier", + "first-name": "Prénom", "first-lowercase": "premier", "first-quartile": "Premier Quartile", "fit-to-screen": "Plein Écran", @@ -888,6 +889,7 @@ "full-coverage": "Couverture complète", "full-name": "Nom Complet", "full-screen": "Plein Écran", + "last-name": "Nom", "full-screen-view": "Vue Plein Écran", "full-size": "Taille complète", "fullscreen": "Plein écran", diff --git a/openmetadata-ui/src/main/resources/ui/src/pages/SignUp/BasicSignup.component.tsx b/openmetadata-ui/src/main/resources/ui/src/pages/SignUp/BasicSignup.component.tsx index 08b78ecdc94d..acab06def29f 100644 --- a/openmetadata-ui/src/main/resources/ui/src/pages/SignUp/BasicSignup.component.tsx +++ b/openmetadata-ui/src/main/resources/ui/src/pages/SignUp/BasicSignup.component.tsx @@ -105,29 +105,25 @@ const BasicSignUp = () => { validateMessages={VALIDATION_MESSAGES} onFinish={handleSubmit}> From 47e53c618430cb2dcbec895c1a14808c61af35c8 Mon Sep 17 00:00:00 2001 From: Esvanth Date: Sun, 5 Apr 2026 13:57:48 +0100 Subject: [PATCH 11/12] Fix YAML parsing error in configuration and implement profiling exclusion for complex types --- .../src/metadata/profiler/processor/core.py | 3 +- .../DefaultOperationalConfigProvider.java | 3 +- .../service/OpenMetadataApplication.java | 3 +- .../service/apps/ConfigurationReader.java | 6 +- .../service/util/OpenMetadataOperations.java | 4 +- .../service/util/YamlSafeSubstitutor.java | 191 ++++++++++++++++++ .../service/util/YamlSafeSubstitutorTest.java | 91 +++++++++ 7 files changed, 294 insertions(+), 7 deletions(-) create mode 100644 openmetadata-service/src/main/java/org/openmetadata/service/util/YamlSafeSubstitutor.java create mode 100644 openmetadata-service/src/test/java/org/openmetadata/service/util/YamlSafeSubstitutorTest.java diff --git a/ingestion/src/metadata/profiler/processor/core.py b/ingestion/src/metadata/profiler/processor/core.py index 3c96d663431f..ce126f051019 100644 --- a/ingestion/src/metadata/profiler/processor/core.py +++ b/ingestion/src/metadata/profiler/processor/core.py @@ -50,7 +50,7 @@ TMetric, ) from metadata.profiler.metrics.static.row_count import RowCount -from metadata.profiler.orm.registry import NOT_COMPUTE +from metadata.profiler.orm.registry import NOT_COMPUTE, is_complex_type from metadata.profiler.processor.metric_filter import MetricFilter from metadata.utils.logger import profiler_logger @@ -378,6 +378,7 @@ def _prepare_column_metrics(self) -> List: column for column in self.columns if column.type.__class__.__name__ not in NOT_COMPUTE + and not is_complex_type(column.type) ] static_metrics = [ ThreadPoolMetrics( diff --git a/openmetadata-service/src/main/java/org/openmetadata/DefaultOperationalConfigProvider.java b/openmetadata-service/src/main/java/org/openmetadata/DefaultOperationalConfigProvider.java index 34e9d54395ec..6dc71921c2e6 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/DefaultOperationalConfigProvider.java +++ b/openmetadata-service/src/main/java/org/openmetadata/DefaultOperationalConfigProvider.java @@ -66,7 +66,8 @@ public static OperationalConfiguration readOperationsConfig(String configFilePat OperationalConfiguration.class, validator, objectMapper, "dw"); return factory.build( new SubstitutingSourceProvider( - new FileConfigurationSourceProvider(), new EnvironmentVariableSubstitutor(false)), + new FileConfigurationSourceProvider(), + new org.openmetadata.service.util.YamlSafeSubstitutor(false)), configFilePath); } } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplication.java b/openmetadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplication.java index 4b0eab6398da..be7b1d2fe1f7 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplication.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplication.java @@ -734,7 +734,8 @@ private boolean isSamlServletRegistered( public void initialize(Bootstrap bootstrap) { bootstrap.setConfigurationSourceProvider( new SubstitutingSourceProvider( - bootstrap.getConfigurationSourceProvider(), new EnvironmentVariableSubstitutor(false))); + bootstrap.getConfigurationSourceProvider(), + new org.openmetadata.service.util.YamlSafeSubstitutor(false))); // Register custom filter factories bootstrap diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/ConfigurationReader.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/ConfigurationReader.java index d547ab1af974..be16545cac25 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/ConfigurationReader.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/ConfigurationReader.java @@ -27,11 +27,13 @@ public ConfigurationReader(Map envMap) { // envMap is for custom environment variables (e.g., for testing), defaulting to the system // environment. substitutor = - envMap == null ? new EnvironmentVariableSubstitutor(false) : new StringSubstitutor(envMap); + envMap == null + ? new org.openmetadata.service.util.YamlSafeSubstitutor(false) + : new StringSubstitutor(envMap); } public ConfigurationReader() { - this(System.getenv()); + this(null); } public AppPrivateConfig readConfigFromResource(String appName) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/util/OpenMetadataOperations.java b/openmetadata-service/src/main/java/org/openmetadata/service/util/OpenMetadataOperations.java index 16e750427194..08e301a1b122 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/util/OpenMetadataOperations.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/util/OpenMetadataOperations.java @@ -17,7 +17,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; -import io.dropwizard.configuration.EnvironmentVariableSubstitutor; +import org.openmetadata.service.util.YamlSafeSubstitutor; import io.dropwizard.configuration.FileConfigurationSourceProvider; import io.dropwizard.configuration.SubstitutingSourceProvider; import io.dropwizard.configuration.YamlConfigurationFactory; @@ -2871,7 +2871,7 @@ public void parseConfig() throws Exception { config = factory.build( new SubstitutingSourceProvider( - new FileConfigurationSourceProvider(), new EnvironmentVariableSubstitutor(false)), + new FileConfigurationSourceProvider(), new YamlSafeSubstitutor(false)), configFilePath); IndexMappingLoader.init(config.getElasticSearchConfiguration()); Fernet.getInstance().setFernetKey(config); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/util/YamlSafeSubstitutor.java b/openmetadata-service/src/main/java/org/openmetadata/service/util/YamlSafeSubstitutor.java new file mode 100644 index 000000000000..3efeeac4941d --- /dev/null +++ b/openmetadata-service/src/main/java/org/openmetadata/service/util/YamlSafeSubstitutor.java @@ -0,0 +1,191 @@ +package org.openmetadata.service.util; + +import io.dropwizard.configuration.EnvironmentVariableSubstitutor; +import org.apache.commons.text.TextStringBuilder; + +/** + * Extension of {@link EnvironmentVariableSubstitutor} that automatically quotes values containing + * YAML special characters. + * + *

Environment variable substitution in Dropwizard happens at the text level before YAML parsing. + * If an environment variable value contains YAML special characters (e.g., {@code >}, {@code |}, + * {@code #}, {@code {}}, {@code []}), the resulting YAML becomes malformed. + * + *

For example, a database password like {@code >8fYhD>dkcxy#HkVi8~ET} set via + * {@code DB_USER_PASSWORD} would produce: + * + *

+ * password: >8fYhD>dkcxy#HkVi8~ET
+ * 
+ * + * This would be interpreted as an empty block scalar by the YAML parser, causing a parsing error. + * This class automatically wraps such values in double quotes: + * + *
+ * password: ">8fYhD>dkcxy#HkVi8~ET"
+ * 
+ * + * Values that look like YAML flow sequences ({@code [...]}) or flow mappings ({@code {...}}) are + * left unquoted to preserve their structural meaning. + * + * @see Issue #21176 + */ +public class YamlSafeSubstitutor extends EnvironmentVariableSubstitutor { + + public YamlSafeSubstitutor(boolean strict) { + super(strict); + } + + @Override + protected String resolveVariable( + String variableName, TextStringBuilder buf, int startPos, int endPos) { + String value = super.resolveVariable(variableName, buf, startPos, endPos); + if (value == null || !needsYamlQuoting(value)) { + return value; + } + + // Don't quote values that are embedded within a larger string (e.g., JDBC URLs like + // jdbc:${DB_SCHEME:-mysql}://${DB_HOST:-localhost}:${DB_PORT:-3306}/...). + // Quoting inline values would insert literal '"' characters into the surrounding string. + if (isInlineSubstitution(buf, startPos, endPos)) { + return value; + } + + return yamlDoubleQuote(value); + } + + /** + * Checks whether the ${...} placeholder is embedded within a larger YAML value rather than being + * the entire value. A substitution is inline if there is non-whitespace content immediately before + * or after the placeholder on the same line. + */ + static boolean isInlineSubstitution(TextStringBuilder buf, int startPos, int endPos) { + if (startPos > 0) { + char before = buf.charAt(startPos - 1); + if (before != ' ' && before != '\t' && before != '\n' && before != '\r') { + return true; + } + } + if (endPos < buf.length()) { + char after = buf.charAt(endPos); + if (after != ' ' && after != '\t' && after != '\n' && after != '\r') { + return true; + } + } + return false; + } + + /** + * Wraps the value in YAML double quotes if it contains special characters that would break YAML + * parsing. Returns the value unchanged if quoting is not needed. + */ + public static String quoteIfNeeded(String value) { + if (value != null && needsYamlQuoting(value)) { + return yamlDoubleQuote(value); + } + return value; + } + + static boolean needsYamlQuoting(String value) { + if (value.isEmpty()) { + return false; + } + + // Don't quote values that look like YAML flow sequences or mappings, + // as quoting would turn them into plain strings + if ((value.startsWith("[") && value.endsWith("]")) + || (value.startsWith("{") && value.endsWith("}"))) { + return false; + } + + // Don't quote values that are already YAML-quoted. Docker Compose defaults using the + // ${VAR:- "value"} pattern produce env vars with optional leading whitespace and surrounding + // quotes (e.g., ' "opensearch"'). Re-quoting these would double-escape them. + String trimmed = value.trim(); + if (trimmed.length() >= 2) { + char qFirst = trimmed.charAt(0); + char qLast = trimmed.charAt(trimmed.length() - 1); + if ((qFirst == '"' && qLast == '"') || (qFirst == '\'' && qLast == '\'')) { + return false; + } + } + + char first = value.charAt(0); + // Characters that are YAML indicators when they appear at the start of a scalar value: + // > block scalar (folded) + // | block scalar (literal) + // * alias + // & anchor + // ! tag + // % directive + // @ reserved + // ` reserved + // ' single quote + // " double quote + // { flow mapping start (not matched by flow-mapping check above, e.g. "{broken") + // [ flow sequence start (not matched by flow-sequence check above, e.g. "[broken") + // ? mapping key indicator + // , flow collection separator + // # comment indicator + switch (first) { + case '>': + case '|': + case '*': + case '&': + case '!': + case '%': + case '@': + case '`': + case '\'': + case '"': + case '{': + case '[': + case '?': + case ',': + case '#': + return true; + default: + break; + } + + // "- " at start is a block sequence entry indicator + if (first == '-' && value.length() > 1 && value.charAt(1) == ' ') { + return true; + } + + // " #" anywhere starts a YAML comment (hash preceded by whitespace) + if (value.contains(" #") || value.contains("\t#")) { + return true; + } + + // ": " or ":" at end of value is a mapping indicator + if (value.contains(": ") || value.endsWith(":")) { + return true; + } + + // Leading or trailing whitespace + char last = value.charAt(value.length() - 1); + if (first == ' ' || first == '\t' || last == ' ' || last == '\t') { + return true; + } + + // Newlines would break YAML block structure + if (value.indexOf('\n') >= 0 || value.indexOf('\r') >= 0) { + return true; + } + + return false; + } + + static String yamlDoubleQuote(String value) { + // Escape backslashes first (before introducing new ones), then double quotes and newlines + String escaped = + value + .replace("\\", "\\\\") + .replace("\"", "\\\"") + .replace("\n", "\\n") + .replace("\r", "\\r") + .replace("\t", "\\t"); + return "\"" + escaped + "\""; + } +} diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/util/YamlSafeSubstitutorTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/util/YamlSafeSubstitutorTest.java new file mode 100644 index 000000000000..b2be596fc176 --- /dev/null +++ b/openmetadata-service/src/test/java/org/openmetadata/service/util/YamlSafeSubstitutorTest.java @@ -0,0 +1,91 @@ +package org.openmetadata.service.util; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.apache.commons.text.TextStringBuilder; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.ValueSource; + +class YamlSafeSubstitutorTest { + + @ParameterizedTest + @CsvSource( + value = { + "plainvalue, false", + "value_with_underscore, false", + "value-with-hyphen, false", + "12345, false", + "'', false", + "'>value', true", + "'|value', true", + "'*value', true", + "'&value', true", + "'!value', true", + "'%value', true", + "'@value', true", + "'`value', true", + "''value', true", + "'\"value', true", + "'{value', true", + "'[value', true", + "'?value', true", + "',value', true", + "'#value', true", + "'- value', true", + "'-value', false", + "'value #comment', true", + "'value#nocomment', false", + "'key: value', true", + "'key:', true", + "' key', true", + "'key ', true", + "'val\nval', true", + "'val\rval', true", + "'[flow, sequence]', false", + "'{flow: mapping}', false", + "' \"already_quoted\"', false", + "\"'already_quoted'\", false" + }, + nullValues = {"NULL"}) + void testNeedsYamlQuoting(String value, boolean expected) { + assertEquals(expected, YamlSafeSubstitutor.needsYamlQuoting(value)); + } + + @ParameterizedTest + @CsvSource({ + "'>value', \"> value\"", + "'|value', \"|value\"", + "'#value', \"#value\"", + "'key: value', \"key: value\"", + "' val ', \" val \"", + "'val\nval', \"val\\nval\"", + "'val\"quote', \"val\\\"quote\"", + "'val\\back', \"val\\\\back\"" + }) + void testYamlDoubleQuote(String input, String expected) { + assertEquals(expected, YamlSafeSubstitutor.yamlDoubleQuote(input)); + } + + @Test + void testIsInlineSubstitution() { + TextStringBuilder buf = new TextStringBuilder("jdbc:${DB_SCHEME}://localhost"); + // ${DB_SCHEME} is at [5, 17) + assertTrue(YamlSafeSubstitutor.isInlineSubstitution(buf, 5, 17)); + + buf = new TextStringBuilder("${DB_SCHEME}"); + assertFalse(YamlSafeSubstitutor.isInlineSubstitution(buf, 0, 12)); + + buf = new TextStringBuilder(" ${DB_SCHEME} "); + assertFalse(YamlSafeSubstitutor.isInlineSubstitution(buf, 2, 14)); + + buf = new TextStringBuilder("prefix${DB_SCHEME}"); + assertTrue(YamlSafeSubstitutor.isInlineSubstitution(buf, 6, 18)); + + buf = new TextStringBuilder("${DB_SCHEME}suffix"); + assertTrue(YamlSafeSubstitutor.isInlineSubstitution(buf, 0, 12)); + } +} From 732aa1764349f3d2e6ab5ca2a705f851e9d51d8a Mon Sep 17 00:00:00 2001 From: Esvanth Date: Sun, 5 Apr 2026 15:07:06 +0100 Subject: [PATCH 12/12] Fix YAML parsing, refine reserved words handling and resolve CI checkstyle --- docker/development/docker-compose-gcp.yml | 4 +- .../docker-compose-postgres-fuseki.yml | 4 +- .../development/docker-compose-postgres.yml | 4 +- docker/development/docker-compose.yml | 4 +- .../docker-compose-openmetadata.yml | 4 +- .../docker-compose-postgres.yml | 4 +- .../docker-compose-rdf.yml | 4 +- .../docker-compose.yml | 4 +- .../DefaultOperationalConfigProvider.java | 6 +-- .../service/OpenMetadataApplication.java | 6 +-- .../service/apps/ConfigurationReader.java | 4 +- .../service/util/OpenMetadataOperations.java | 1 - .../service/util/YamlSafeSubstitutor.java | 15 ++++++ .../service/util/YamlSafeSubstitutorTest.java | 13 ++++- scripts/VerifyYamlFix.java | 50 +++++++++++++++++++ 15 files changed, 98 insertions(+), 29 deletions(-) create mode 100644 scripts/VerifyYamlFix.java diff --git a/docker/development/docker-compose-gcp.yml b/docker/development/docker-compose-gcp.yml index 1c98f2aaf5ee..392b51187a80 100644 --- a/docker/development/docker-compose-gcp.yml +++ b/docker/development/docker-compose-gcp.yml @@ -119,7 +119,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: "{}" + OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} @@ -333,7 +333,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: "{}" + OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} diff --git a/docker/development/docker-compose-postgres-fuseki.yml b/docker/development/docker-compose-postgres-fuseki.yml index 3a6779c3abee..0a40ad473ebf 100644 --- a/docker/development/docker-compose-postgres-fuseki.yml +++ b/docker/development/docker-compose-postgres-fuseki.yml @@ -119,7 +119,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: "{}" + OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} @@ -350,7 +350,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: "{}" + OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} diff --git a/docker/development/docker-compose-postgres.yml b/docker/development/docker-compose-postgres.yml index 35b66f65774a..23fe5b6e6b22 100644 --- a/docker/development/docker-compose-postgres.yml +++ b/docker/development/docker-compose-postgres.yml @@ -118,7 +118,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: "{}" + OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} @@ -345,7 +345,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: "{}" + OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} diff --git a/docker/development/docker-compose.yml b/docker/development/docker-compose.yml index 05c68b311358..c2ea751671ce 100644 --- a/docker/development/docker-compose.yml +++ b/docker/development/docker-compose.yml @@ -115,7 +115,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: "{}" + OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} @@ -338,7 +338,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: "{}" + OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} diff --git a/docker/docker-compose-openmetadata/docker-compose-openmetadata.yml b/docker/docker-compose-openmetadata/docker-compose-openmetadata.yml index fc0ad5c338fc..82bd2fda554d 100644 --- a/docker/docker-compose-openmetadata/docker-compose-openmetadata.yml +++ b/docker/docker-compose-openmetadata/docker-compose-openmetadata.yml @@ -60,7 +60,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: "{}" + OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} @@ -281,7 +281,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: "{}" + OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} diff --git a/docker/docker-compose-quickstart/docker-compose-postgres.yml b/docker/docker-compose-quickstart/docker-compose-postgres.yml index b334d8eb3ab5..8709585a7f40 100644 --- a/docker/docker-compose-quickstart/docker-compose-postgres.yml +++ b/docker/docker-compose-quickstart/docker-compose-postgres.yml @@ -108,7 +108,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: "{}" + OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} @@ -329,7 +329,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: "{}" + OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} diff --git a/docker/docker-compose-quickstart/docker-compose-rdf.yml b/docker/docker-compose-quickstart/docker-compose-rdf.yml index 745d733fede2..4b106eda07fc 100644 --- a/docker/docker-compose-quickstart/docker-compose-rdf.yml +++ b/docker/docker-compose-quickstart/docker-compose-rdf.yml @@ -148,7 +148,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: "{}" + OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} # Database configuration for MySQL DB_DRIVER_CLASS: ${DB_DRIVER_CLASS:-com.mysql.cj.jdbc.Driver} @@ -318,7 +318,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: "{}" + OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} # Database configuration for MySQL DB_DRIVER_CLASS: ${DB_DRIVER_CLASS:-com.mysql.cj.jdbc.Driver} diff --git a/docker/docker-compose-quickstart/docker-compose.yml b/docker/docker-compose-quickstart/docker-compose.yml index a71fe901418f..df24da9b31fe 100644 --- a/docker/docker-compose-quickstart/docker-compose.yml +++ b/docker/docker-compose-quickstart/docker-compose.yml @@ -106,7 +106,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: "{}" + OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} @@ -327,7 +327,7 @@ services: OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"} OIDC_TENANT: ${OIDC_TENANT:-""} OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""} - OIDC_CUSTOM_PARAMS: "{}" + OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}} OIDC_MAX_AGE: ${OIDC_MAX_AGE:-"0"} OIDC_PROMPT_TYPE: ${OIDC_PROMPT_TYPE:-"consent"} OIDC_SESSION_EXPIRY: ${OIDC_SESSION_EXPIRY:-"604800"} diff --git a/openmetadata-service/src/main/java/org/openmetadata/DefaultOperationalConfigProvider.java b/openmetadata-service/src/main/java/org/openmetadata/DefaultOperationalConfigProvider.java index 6dc71921c2e6..b595435ce2a2 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/DefaultOperationalConfigProvider.java +++ b/openmetadata-service/src/main/java/org/openmetadata/DefaultOperationalConfigProvider.java @@ -2,7 +2,6 @@ import com.cronutils.utils.StringUtils; import com.fasterxml.jackson.databind.ObjectMapper; -import io.dropwizard.configuration.EnvironmentVariableSubstitutor; import io.dropwizard.configuration.FileConfigurationSourceProvider; import io.dropwizard.configuration.SubstitutingSourceProvider; import io.dropwizard.configuration.YamlConfigurationFactory; @@ -17,6 +16,7 @@ import org.openmetadata.schema.api.security.OpsConfig; import org.openmetadata.schema.email.SmtpSettings; import org.openmetadata.schema.operations.OperationalConfiguration; +import org.openmetadata.service.util.YamlSafeSubstitutor; @Getter @Setter @@ -65,9 +65,7 @@ public static OperationalConfiguration readOperationsConfig(String configFilePat new YamlConfigurationFactory<>( OperationalConfiguration.class, validator, objectMapper, "dw"); return factory.build( - new SubstitutingSourceProvider( - new FileConfigurationSourceProvider(), - new org.openmetadata.service.util.YamlSafeSubstitutor(false)), + new SubstitutingSourceProvider(new FileConfigurationSourceProvider(), new YamlSafeSubstitutor(false)), configFilePath); } } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplication.java b/openmetadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplication.java index be7b1d2fe1f7..8a8b56784db8 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplication.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplication.java @@ -15,7 +15,6 @@ import static org.openmetadata.service.util.jdbi.JdbiUtils.createAndSetupJDBI; -import io.dropwizard.configuration.EnvironmentVariableSubstitutor; import io.dropwizard.configuration.SubstitutingSourceProvider; import io.dropwizard.core.Application; import io.dropwizard.core.server.DefaultServerFactory; @@ -58,6 +57,7 @@ import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; +import org.openmetadata.service.util.YamlSafeSubstitutor; import org.eclipse.jetty.ee10.servlet.FilterHolder; import org.eclipse.jetty.ee10.servlet.ServletHandler; import org.eclipse.jetty.ee10.servlet.ServletHolder; @@ -733,9 +733,7 @@ private boolean isSamlServletRegistered( @Override public void initialize(Bootstrap bootstrap) { bootstrap.setConfigurationSourceProvider( - new SubstitutingSourceProvider( - bootstrap.getConfigurationSourceProvider(), - new org.openmetadata.service.util.YamlSafeSubstitutor(false))); + new SubstitutingSourceProvider(bootstrap.getConfigurationSourceProvider(), new YamlSafeSubstitutor(false))); // Register custom filter factories bootstrap diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/ConfigurationReader.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/ConfigurationReader.java index be16545cac25..4939ed3394e6 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/ConfigurationReader.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/ConfigurationReader.java @@ -3,7 +3,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import io.dropwizard.configuration.ConfigurationException; -import io.dropwizard.configuration.EnvironmentVariableSubstitutor; import io.dropwizard.configuration.ResourceConfigurationSourceProvider; import io.dropwizard.configuration.SubstitutingSourceProvider; import io.dropwizard.configuration.YamlConfigurationFactory; @@ -13,6 +12,7 @@ import org.apache.commons.text.StringSubstitutor; import org.openmetadata.schema.api.configuration.apps.AppPrivateConfig; import org.openmetadata.schema.utils.JsonUtils; +import org.openmetadata.service.util.YamlSafeSubstitutor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -28,7 +28,7 @@ public ConfigurationReader(Map envMap) { // environment. substitutor = envMap == null - ? new org.openmetadata.service.util.YamlSafeSubstitutor(false) + ? new YamlSafeSubstitutor(false) : new StringSubstitutor(envMap); } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/util/OpenMetadataOperations.java b/openmetadata-service/src/main/java/org/openmetadata/service/util/OpenMetadataOperations.java index 08e301a1b122..1f1ce1dad061 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/util/OpenMetadataOperations.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/util/OpenMetadataOperations.java @@ -17,7 +17,6 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; -import org.openmetadata.service.util.YamlSafeSubstitutor; import io.dropwizard.configuration.FileConfigurationSourceProvider; import io.dropwizard.configuration.SubstitutingSourceProvider; import io.dropwizard.configuration.YamlConfigurationFactory; diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/util/YamlSafeSubstitutor.java b/openmetadata-service/src/main/java/org/openmetadata/service/util/YamlSafeSubstitutor.java index 3efeeac4941d..891d60682548 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/util/YamlSafeSubstitutor.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/util/YamlSafeSubstitutor.java @@ -148,6 +148,21 @@ static boolean needsYamlQuoting(String value) { break; } + // Check for YAML reserved words that would be parsed as non-strings + String lower = value.trim().toLowerCase(); + switch (lower) { + case "true": + case "false": + case "null": + case "yes": + case "no": + case "on": + case "off": + return true; + default: + break; + } + // "- " at start is a block sequence entry indicator if (first == '-' && value.length() > 1 && value.charAt(1) == ' ') { return true; diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/util/YamlSafeSubstitutorTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/util/YamlSafeSubstitutorTest.java index b2be596fc176..d34156ef064d 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/util/YamlSafeSubstitutorTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/util/YamlSafeSubstitutorTest.java @@ -48,7 +48,16 @@ class YamlSafeSubstitutorTest { "'[flow, sequence]', false", "'{flow: mapping}', false", "' \"already_quoted\"', false", - "\"'already_quoted'\", false" + "\"'already_quoted'\", false", + "true, true", + "FALSE, true", + "null, true", + "Null, true", + "yes, true", + "no, true", + "on, true", + "off, true", + "maybe, false" }, nullValues = {"NULL"}) void testNeedsYamlQuoting(String value, boolean expected) { @@ -57,7 +66,7 @@ void testNeedsYamlQuoting(String value, boolean expected) { @ParameterizedTest @CsvSource({ - "'>value', \"> value\"", + "'>value', \">value\"", "'|value', \"|value\"", "'#value', \"#value\"", "'key: value', \"key: value\"", diff --git a/scripts/VerifyYamlFix.java b/scripts/VerifyYamlFix.java new file mode 100644 index 000000000000..4043c7fbd059 --- /dev/null +++ b/scripts/VerifyYamlFix.java @@ -0,0 +1,50 @@ +public class VerifyYamlFix { + public static void main(String[] args) { + String[] testCases = { + "plainvalue", "value_with_#_hash", "{}", ">block", "|literal", "key: value", " spaced ", + "true", "FALSE", "null", "Null", "yes", "no", "on", "off", "maybe" + }; + + System.out.println("--- YAML Quoting Verification (Refined) ---"); + for (String test : testCases) { + boolean needsQuoting = needsYamlQuoting(test); + String result = needsQuoting ? yamlDoubleQuote(test) : test; + System.out.printf("In: [%s] -> Needs Quoting: %b -> Result: [%s]%n", test, needsQuoting, result); + } + } + + static boolean needsYamlQuoting(String value) { + if (value.isEmpty()) return false; + + // Flow mappings/sequences + if ((value.startsWith("[") && value.endsWith("]")) || (value.startsWith("{") && value.endsWith("}"))) return false; + + // Already quoted (simplified check for script) + String trimmed = value.trim(); + if (trimmed.length() >= 2) { + char first = trimmed.charAt(0); + char last = trimmed.charAt(trimmed.length() - 1); + if ((first == '"' && last == '"') || (first == '\'' && last == '\'')) return false; + } + + char first = value.charAt(0); + String indicators = ">|*&!%@`'\"{[?,#"; + if (indicators.indexOf(first) != -1) return true; + + // Reserved words + String lower = trimmed.toLowerCase(); + switch (lower) { + case "true": case "false": case "null": case "yes": case "no": case "on": case "off": + return true; + } + + if (value.contains(" #") || value.contains("\t#") || value.contains(": ") || value.endsWith(":") || value.indexOf('\n') >= 0 || value.indexOf('\r') >= 0) return true; + + char last = value.charAt(value.length() - 1); + return (first == ' ' || first == '\t' || last == ' ' || last == '\t'); + } + + static String yamlDoubleQuote(String value) { + return "\"" + value.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\n").replace("\r", "\\r") + "\""; + } +}