From 1755e90c0a31dfa54adc453aea81610f64a5f0e5 Mon Sep 17 00:00:00 2001 From: gghatano Date: Wed, 3 Jun 2026 20:04:19 +0900 Subject: [PATCH 1/2] Fix INDEPENDENT mechanism crash on duplicate cliques `independent.run_mechanism` builds `measurements` from `initial_measurements` (the one-way marginals that `data_generation_v2.generate` always passes in) and then appends a freshly measured one-way marginal for every attribute. When `initial_potentials` is not None (e.g. the empty CliqueVector returned by `constraints.get_initial_parameters` for the no-constraints case), the code calls `potentials.expand([m.clique for m in measurements])` with a clique list that now contains duplicate one-way cliques. With current `mbi`, `CliqueVector.expand` requires unique cliques and raises: ValueError: Cliques must be unique. so `dpsynth.generate(..., discrete_config=IndependentConfig())` fails. De-dup the clique list (order-preserving) before expanding. Measurements themselves are left unchanged, so the estimation/accounting are unaffected. --- dpsynth/discrete_mechanisms/independent.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dpsynth/discrete_mechanisms/independent.py b/dpsynth/discrete_mechanisms/independent.py index 046c411..82da37d 100644 --- a/dpsynth/discrete_mechanisms/independent.py +++ b/dpsynth/discrete_mechanisms/independent.py @@ -70,7 +70,13 @@ def run_mechanism( potentials = initial_potentials if potentials is not None: - potentials = potentials.expand([m.clique for m in measurements]) + # `measurements` can contain the same clique more than once (the one-way + # marginals passed in via `initial_measurements` plus the one-way marginals + # measured in the loop above). `CliqueVector.expand` requires unique cliques + # and otherwise raises "Cliques must be unique.", so de-duplicate while + # preserving order before expanding. + unique_cliques = list(dict.fromkeys(m.clique for m in measurements)) + potentials = potentials.expand(unique_cliques) model = mbi.estimation.mirror_descent( data.domain, From 55c1f4bb6a6939e8e8d3b8db003a7a8c544d560b Mon Sep 17 00:00:00 2001 From: gghatano Date: Mon, 15 Jun 2026 18:26:35 +0900 Subject: [PATCH 2/2] Address review: shorten comment and add test coverage - Reduce the inline comment in `run_mechanism` to a single line. - Add a regression test exercising the duplicate-clique path (initial potentials + initial one-way measurements) that previously raised "Cliques must be unique.". --- dpsynth/discrete_mechanisms/independent.py | 6 +--- .../discrete_mechanisms/independent_test.py | 29 +++++++++++++++++++ 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/dpsynth/discrete_mechanisms/independent.py b/dpsynth/discrete_mechanisms/independent.py index 82da37d..60076f2 100644 --- a/dpsynth/discrete_mechanisms/independent.py +++ b/dpsynth/discrete_mechanisms/independent.py @@ -70,11 +70,7 @@ def run_mechanism( potentials = initial_potentials if potentials is not None: - # `measurements` can contain the same clique more than once (the one-way - # marginals passed in via `initial_measurements` plus the one-way marginals - # measured in the loop above). `CliqueVector.expand` requires unique cliques - # and otherwise raises "Cliques must be unique.", so de-duplicate while - # preserving order before expanding. + # De-duplicate cliques (order-preserving); `expand` requires unique cliques. unique_cliques = list(dict.fromkeys(m.clique for m in measurements)) potentials = potentials.expand(unique_cliques) diff --git a/dpsynth/discrete_mechanisms/independent_test.py b/dpsynth/discrete_mechanisms/independent_test.py index 88c58da..e45d405 100644 --- a/dpsynth/discrete_mechanisms/independent_test.py +++ b/dpsynth/discrete_mechanisms/independent_test.py @@ -31,6 +31,35 @@ def test_fits_one_way_marginals(self): actual = synthetic.project([col]).datavector() np.testing.assert_allclose(actual, expected, atol=0.1) + def test_duplicate_one_way_cliques_with_initial_potentials(self): + # Regression test: when `initial_potentials` is provided (e.g. the empty + # CliqueVector for the no-constraints case) and `initial_measurements` + # already holds the one-way marginals, the cliques measured in the loop + # duplicate them. `CliqueVector.expand` rejects duplicate cliques, so the + # mechanism used to crash with "Cliques must be unique.". + domain = mbi.Domain(["a", "b", "c"], [3, 4, 5]) + data = mbi.Dataset.synthetic(domain, N=1000) + + initial_measurements = [ + mbi.LinearMeasurement(data.project((col,)).datavector(), (col,)) + for col in data.domain + ] + initial_potentials = mbi.CliqueVector(domain, [], {}) + + config = independent.IndependentConfig(pgm_iters=500) + synthetic = independent.run_mechanism( + data, + config, + zcdp_rho=10000, + initial_measurements=initial_measurements, + initial_potentials=initial_potentials, + ) + + for col in data.domain: + expected = data.project([col]).datavector() + actual = synthetic.project([col]).datavector() + np.testing.assert_allclose(actual, expected, atol=0.1) + if __name__ == "__main__": absltest.main()