diff --git a/databricks/koalas/indexes.py b/databricks/koalas/indexes.py index 951a5a48dc..1d54e3afe6 100644 --- a/databricks/koalas/indexes.py +++ b/databricks/koalas/indexes.py @@ -53,6 +53,7 @@ from databricks.koalas.series import Series, first_series from databricks.koalas.spark.accessors import SparkIndexMethods from databricks.koalas.utils import ( + combine_frames, compare_disallow_null, default_session, is_name_like_tuple, @@ -3318,6 +3319,40 @@ def item(self) -> Tuple[Scalar, ...]: """ return self._kdf.head(2)._to_internal_pandas().index.item() + def equal_levels(self, other): + """ + Return True if the levels of both MultiIndex objects are the same + + Examples + -------- + >>> from databricks.koalas.config import set_option, reset_option + >>> set_option("compute.ops_on_diff_frames", True) + + >>> kmidx1 = ks.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")]) + >>> kmidx2 = ks.MultiIndex.from_tuples([("b", "y"), ("a", "x"), ("c", "z")]) + >>> kmidx1.equal_levels(kmidx2) + True + + >>> kmidx2 = ks.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "j")]) + >>> kmidx1.equal_levels(kmidx2) + False + + >>> reset_option("compute.ops_on_diff_frames") + """ + nlevels = self.nlevels + if nlevels != other.nlevels: + return False + self_frame = self.sort_values().to_frame() + other_frame = other.sort_values().to_frame() + with option_context("compute.ops_on_diff_frames", True): + combined = combine_frames(self_frame, other_frame) + + sdf = combined._internal.spark_frame + that_index_name = combined["that"]._internal.data_spark_column_names[0] + that_index_scol = scol_for(sdf, that_index_name) + + return len(sdf.filter(that_index_scol.isNull()).head(1)) == 0 + def intersection(self, other) -> "MultiIndex": """ Form the intersection of two Index objects. diff --git a/databricks/koalas/missing/indexes.py b/databricks/koalas/missing/indexes.py index 0928d3b212..60ea6171e1 100644 --- a/databricks/koalas/missing/indexes.py +++ b/databricks/koalas/missing/indexes.py @@ -103,7 +103,6 @@ class MissingPandasLikeMultiIndex(object): # Functions argsort = _unsupported_function("argsort") asof_locs = _unsupported_function("asof_locs") - equal_levels = _unsupported_function("equal_levels") factorize = _unsupported_function("factorize") format = _unsupported_function("format") get_indexer = _unsupported_function("get_indexer") diff --git a/databricks/koalas/tests/test_indexes.py b/databricks/koalas/tests/test_indexes.py index 32af3f25c4..69c724cbde 100644 --- a/databricks/koalas/tests/test_indexes.py +++ b/databricks/koalas/tests/test_indexes.py @@ -1889,6 +1889,29 @@ def test_multiindex_is_unique(self): self.assertEqual(kdf.index.is_unique, expected) + def test_multiindex_equal_levels(self): + pmidx1 = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")]) + pmidx2 = pd.MultiIndex.from_tuples([("b", "y"), ("a", "x"), ("c", "z")]) + kmidx1 = ks.from_pandas(pmidx1) + kmidx2 = ks.from_pandas(pmidx2) + self.assert_eq(pmidx1.equal_levels(pmidx2), kmidx1.equal_levels(kmidx2)) + + pmidx2 = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "j")]) + kmidx2 = ks.from_pandas(pmidx2) + self.assert_eq(pmidx1.equal_levels(pmidx2), kmidx1.equal_levels(kmidx2)) + + pmidx2 = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("a", "x")]) + kmidx2 = ks.from_pandas(pmidx2) + self.assert_eq(pmidx1.equal_levels(pmidx2), kmidx1.equal_levels(kmidx2)) + + pmidx2 = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y")]) + kmidx2 = ks.from_pandas(pmidx2) + self.assert_eq(pmidx1.equal_levels(pmidx2), kmidx1.equal_levels(kmidx2)) + + pmidx2 = pd.MultiIndex.from_tuples([("a", "x", "q"), ("b", "y", "w"), ("c", "z", "e")]) + kmidx2 = ks.from_pandas(pmidx2) + self.assert_eq(pmidx1.equal_levels(pmidx2), kmidx1.equal_levels(kmidx2)) + def test_view(self): pidx = pd.Index([1, 2, 3, 4], name="Koalas") kidx = ks.from_pandas(pidx) diff --git a/docs/source/reference/indexing.rst b/docs/source/reference/indexing.rst index cb14fe3aec..f7484e6138 100644 --- a/docs/source/reference/indexing.rst +++ b/docs/source/reference/indexing.rst @@ -218,6 +218,7 @@ MultiIndex Modifying and computations :toctree: api/ MultiIndex.equals + MultiIndex.equal_levels MultiIndex.identical MultiIndex.insert MultiIndex.drop