pysal · tensor-calculus · Mar 7, 2026 · Mar 7, 2026 · Mar 7, 2026 · Mar 7, 2026
diff --git a/docs/_static/references.bib b/docs/_static/references.bib
@@ -38,6 +38,21 @@ @article{Rey_2016
 	Year = 2016,
 	Bdsk-Url-1 = {http://dx.doi.org/10.1111/tgis.12236}}
 
+@article{Mu_2019,
+    Author = {Mu, Wangshu and Tong, Daoqin},
+    Doi = {10.1080/24694452.2018.1549971},
+    Issn = {2469-4452},
+    Journal = {Annals of the American Association of Geographers},
+    Month = {Mar},
+    Number = 5,
+    Pages = {1493--1510},
+    Publisher = {Taylor \& Francis},
+    Title = {Choropleth Mapping with Uncertainty: A Maximum Likelihood-Based Classification Scheme},
+    Url = {https://doi.org/10.1080/24694452.2018.1549971},
+    Volume = 109,
+    Year = 2019,
+    Bdsk-Url-1 = {https://doi.org/10.1080/24694452.2018.1549971}}
+
 @book{Slocum_2009,
 	Author = {Slocum, Terry A. and McMaster, Robert B. and Kessler, Fritz C. and Howard, Hugh H.},
 	Publisher = {Pearson Prentice Hall, Upper Saddle River},

diff --git a/docs/api.rst b/docs/api.rst
@@ -24,6 +24,7 @@ Classifiers
     mapclassify.JenksCaspallSampled
     mapclassify.MaxP
     mapclassify.MaximumBreaks
+    mapclassify.MaximumLikelihood
     mapclassify.NaturalBreaks
     mapclassify.Percentiles
     mapclassify.PrettyBreaks

diff --git a/mapclassify/__init__.py b/mapclassify/__init__.py
@@ -15,6 +15,7 @@
     JenksCaspallSampled,
     KClassifiers,
     MaximumBreaks,
+    MaximumLikelihood,
     MaxP,
     NaturalBreaks,
     Percentiles,

diff --git a/mapclassify/classifiers.py b/mapclassify/classifiers.py
@@ -27,6 +27,7 @@
     "HeadTailBreaks",
     "MaxP",
     "MaximumBreaks",
+    "MaximumLikelihood",
     "NaturalBreaks",
     "Quantiles",
     "Percentiles",
@@ -49,6 +50,7 @@
     "JenksCaspallSampled",
     "MaxP",
     "MaximumBreaks",
+    "MaximumLikelihood",
     "NaturalBreaks",
     "Quantiles",
     "Percentiles",
@@ -3109,3 +3111,115 @@ def __init__(self, y, pct=0.8):
                 pct0 = pct1
         self.results = results
         self.best = best[1]
+
+
+class MaximumLikelihood(MapClassifier):
+    """
+    Maximumum Likelihood Estimation based Map Classification.
+
+    Parameters
+    ----------
+
+    y : numpy.array
+        (n, 1), values to classify.
+    sigma : numpy.array
+        (n, 1), standard deviation corresponding to each value in y.
+    k : int (default 5)
+        The number of classes required.
+
+    Attributes
+    ----------
+
+    yb = numpy.array
+        :math:`(n,1)`, bin IDs for observations.
+    bins : numpy.array
+        :math:`(k,1)`, the upper bounds of each class.
+    k : int
+        The number of classes.
+    counts : numpy.array
+        :math:`(k,1)`, the number of observations falling in each class.
+
+    Notes
+    -----
+
+    This classification scheme incorporates data uncertainty (standard deviation)
+    into the creation of choropleth maps. It assumes the existence of a
+    representative value for each class and determines class breaks using
+    a dynamic programming approach to minimize the overall within-class
+    deviation while accounting for the uncertainty. :cite:`Mu_2019`
+
+    Examples
+    --------
+
+    >>> import numpy as np
+    >>> import mapclassify
+    >>> y = [32000, 45000, 46000, 50000, 61000, 62000, 85000, 90000]
+    >>> sigma = [1500, 2000, 2100, 1800, 3000, 3100, 4500, 5000]
+    >>> ml = mapclassify.MaximumLikelihood(y, sigma, k=3)
+    >>> ml.bins
+    array([32000, 62000, 90000])
+    For e.g. FisherJenks would return array([50000., 62000., 90000.])
+    >>> ml.counts.tolist()
+    [3, 3, 2]
+
+    """
+
+    def __init__(self, y, sigma, k=5):
+        self.sigma = np.asarray(sigma)
+        self.k = k
+        self.name = "MaximumLikelihood"
+        MapClassifier.__init__(self, y)
+
+    def _set_bins(self):
+        y = self.y
+        sigma = self.sigma
+        k = self.k
+        n = len(y)
+
+        # The algorithm requires sorted areal values
+        sort_idx = np.argsort(y)
+        y_sorted = y[sort_idx]
+        sigma_sorted = sigma[sort_idx]
+
+        # omega[m][i] -> cost of segment (m+1, i)
+        omega = np.zeros((n, n))
+        for i in range(n):
+            for j in range(i, n):
+                y_slice = y_sorted[i : j + 1]
+                sig_slice = sigma_sorted[i : j + 1]
+
+                # Calculate the representative value for each class
+                variance_inv = 1.0 / (sig_slice**2)
+                v_val = np.sum(y_slice * variance_inv) / np.sum(variance_inv)
+
+                # Calculate the cost for each class
+                omega[i, j] = np.sum(((v_val - y_slice) ** 2) * variance_inv)
+
+        #  dp[i][j] stores minimum cost to split first i elements into j classes
+        dp = np.full((n + 1, k + 1), np.inf)
+        dp[0, 0] = 0
+
+        # backtrack[i][j] stores the index where the j-th class starts
+        backtrack = np.zeros((n + 1, k + 1), dtype=int)
+
+        # j -> no.of classes, j = k is the required solution
+        for j in range(1, k + 1):
+            # i -> no.of elements considered, i must be >= j
+            for i in range(j, n + 1):
+                # try all possible breakpoints, m -> end of previous classs
+                for m in range(j - 1, i):
+                    cost = dp[m, j - 1] + omega[m, i - 1]
+                    if cost < dp[i, j]:
+                        dp[i, j] = cost
+                        backtrack[i, j] = m
+
+        # find the breakpoints by looping backwards on dp.
+        breaks_idx = []
+        curr_i = n
+        for j in range(k, 0, -1):
+            breaks_idx.append(curr_i - 1)
+            curr_i = backtrack[curr_i, j]
+        breaks_idx.reverse()
+        bins = [y_sorted[idx] for idx in breaks_idx]
+
+        self.bins = np.array(bins)
diff --git a/mapclassify/tests/test_mapclassify.py b/mapclassify/tests/test_mapclassify.py
@@ -15,6 +15,7 @@
     JenksCaspallSampled,
     KClassifiers,
     MaximumBreaks,
+    MaximumLikelihood,
     MaxP,
     NaturalBreaks,
     Percentiles,
@@ -790,3 +791,47 @@ def test_histogram_plot_linewidth(self):
             linewidth=3, linecolor="red", color="yellow"
         )
         return ax.get_figure()
+
+
+class TestMaximumLikelihood:
+    def test_MaximumLikelihood(self):
+        # An ordered dataset that has 2 groups
+        # Normal classifier would group 90 as its own class
+        # but MLE will group it with 50 and instead split the lower
+        # valued elements with low uncertainity
+        y = [10, 12, 14, 50, 90]
+        sigma = [1, 1, 1, 1, 20]
+        ml = MaximumLikelihood(y, sigma, k=3)
+
+        # not needed but just checking :)
+        assert ml.k == 3
+        assert len(ml.counts) == 3
+        # the last value in bin should be the maximum value (invariant)
+        assert ml.bins[-1] == max(y)
+        # total count should equal the length of input
+        assert ml.counts.sum() == len(y)
+        # each class must have atleast one element
+        assert numpy.all(ml.counts > 0)
+
+        numpy.testing.assert_array_equal(ml.bins, numpy.array([10, 14, 90]))
+        numpy.testing.assert_array_equal(ml.counts, numpy.array([1, 2, 2]))
+
+    def test_MaximumLikelihood_unordered(self):
+        # Ordered and unordered data should give same result
+        y_unordered = numpy.array([10, 90, 14, 50, 12])
+        sigma_unordered = numpy.array([1, 20, 1, 1, 1])
+
+        ml = MaximumLikelihood(y_unordered, sigma_unordered, k=3)
+
+        # not needed but just checking :)
+        assert ml.k == 3
+        assert len(ml.counts) == 3
+        # the last value in bin should be the maximum value (invariant)
+        assert ml.bins[-1] == max(y_unordered)
+        # total count should equal the length of input
+        assert ml.counts.sum() == len(y_unordered)
+        # each class must have atleast one element
+        assert numpy.all(ml.counts > 0)
+
+        numpy.testing.assert_array_equal(ml.bins, numpy.array([10, 14, 90]))
+        numpy.testing.assert_array_equal(ml.counts, numpy.array([1, 2, 2]))