diff --git a/dpsynth/discrete_mechanisms/independent.py b/dpsynth/discrete_mechanisms/independent.py index 046c411..60076f2 100644 --- a/dpsynth/discrete_mechanisms/independent.py +++ b/dpsynth/discrete_mechanisms/independent.py @@ -70,7 +70,9 @@ def run_mechanism( potentials = initial_potentials if potentials is not None: - potentials = potentials.expand([m.clique for m in measurements]) + # De-duplicate cliques (order-preserving); `expand` requires unique cliques. + unique_cliques = list(dict.fromkeys(m.clique for m in measurements)) + potentials = potentials.expand(unique_cliques) model = mbi.estimation.mirror_descent( data.domain, diff --git a/dpsynth/discrete_mechanisms/independent_test.py b/dpsynth/discrete_mechanisms/independent_test.py index 88c58da..e45d405 100644 --- a/dpsynth/discrete_mechanisms/independent_test.py +++ b/dpsynth/discrete_mechanisms/independent_test.py @@ -31,6 +31,35 @@ def test_fits_one_way_marginals(self): actual = synthetic.project([col]).datavector() np.testing.assert_allclose(actual, expected, atol=0.1) + def test_duplicate_one_way_cliques_with_initial_potentials(self): + # Regression test: when `initial_potentials` is provided (e.g. the empty + # CliqueVector for the no-constraints case) and `initial_measurements` + # already holds the one-way marginals, the cliques measured in the loop + # duplicate them. `CliqueVector.expand` rejects duplicate cliques, so the + # mechanism used to crash with "Cliques must be unique.". + domain = mbi.Domain(["a", "b", "c"], [3, 4, 5]) + data = mbi.Dataset.synthetic(domain, N=1000) + + initial_measurements = [ + mbi.LinearMeasurement(data.project((col,)).datavector(), (col,)) + for col in data.domain + ] + initial_potentials = mbi.CliqueVector(domain, [], {}) + + config = independent.IndependentConfig(pgm_iters=500) + synthetic = independent.run_mechanism( + data, + config, + zcdp_rho=10000, + initial_measurements=initial_measurements, + initial_potentials=initial_potentials, + ) + + for col in data.domain: + expected = data.project([col]).datavector() + actual = synthetic.project([col]).datavector() + np.testing.assert_allclose(actual, expected, atol=0.1) + if __name__ == "__main__": absltest.main()