diff --git a/dpsynth/pipeline_transformations/swift.py b/dpsynth/pipeline_transformations/swift.py index 3d6b1fb..8b6d57d 100644 --- a/dpsynth/pipeline_transformations/swift.py +++ b/dpsynth/pipeline_transformations/swift.py @@ -51,6 +51,10 @@ def fit_model( additional_output: Any | None = None, ) -> types.Collection[mbi.MarkovRandomField]: """Fits the model.""" + if not 0 < parameters.select_budget_frac < 1: + raise ValueError( + f'{parameters.select_budget_frac=} must be between 0 and 1.' + ) # 1. Generate workload. domain = backend.map(descriptor, lambda x: x.compressed_domain, 'Get domain') @@ -84,28 +88,33 @@ def compile_workload_fn(dom): # errors: singleton collection of dict[mbi.Clique, float] # 4. Select queries. - mechanism_spec = budget_accountant.request_budget( + select_mechanism_spec = budget_accountant.request_budget( pipeline_dp.budget_accounting.MechanismType.GAUSSIAN, - name='Swift Select Queries', + name='Swift Error Scores', weight=parameters.select_budget_frac, ) + measure_mechanism_spec = budget_accountant.request_budget( + pipeline_dp.budget_accounting.MechanismType.GAUSSIAN, + name='Swift Measure Marginals', + weight=1 - parameters.select_budget_frac, + ) def select_queries_fn( errors_dict, candidates_dict, domain_obj - ) -> tuple[dict[mbi.Clique, float], nx.Graph]: + ) -> tuple[dict[mbi.Clique, float], nx.Graph, dict[mbi.Clique, float]]: """Selects queries using SWIFT algorithm.""" - # `mechanism_spec` corresponds to the Gaussian mechanism that should be used - # to specify the total (epsilon, delta)-budget for the whole pipeline. - # Convert it to GDP budget. - gdp_budget = 1.0 / mechanism_spec.noise_standard_deviation**2 - return swift.select_queries( - errors_dict, + noised_errors = _add_noise_to_errors(errors_dict, select_mechanism_spec) + # `measure_mechanism_spec` corresponds to the Gaussian mechanism budget + # available for the final selected marginal measurements. + gdp_budget = 1.0 / measure_mechanism_spec.noise_standard_deviation**2 + selected, jtree = swift.select_queries( + noised_errors, candidates_dict, domain_obj, parameters.max_clique_size, gdp_budget, ) - # return selected, jtree + return selected, jtree, noised_errors selected_and_tree = backend.map_with_side_inputs( errors, @@ -121,6 +130,9 @@ def select_queries_fn( selected_and_tree, lambda x: list(x[0].keys()), 'Get selected queries' ) jtree = backend.map(selected_and_tree, lambda x: x[1], 'Get junction tree') + noised_errors = backend.map( + selected_and_tree, lambda x: x[2], 'Get noised Swift errors' + ) # 6. Measure selected marginals (add noise). def filter_selected_marginals(exact_marginal, selected): @@ -151,7 +163,9 @@ def filter_selected_marginals(exact_marginal, selected): and additional_output.diagnostic_info is not None ): errors_singleton = backend.map( - errors, lambda d: [(k, v) for k, v in d.items()], 'Errors to List' + noised_errors, + lambda d: [(k, v) for k, v in d.items()], + 'Noised Errors to List', ) additional_output.diagnostic_info = diagnostic_info.update_diagnostic_info( backend, @@ -181,6 +195,26 @@ def fit_model_fn(measurements_list, jtree_obj, domain_obj): ) +def _add_noise_to_errors( + errors_dict: dict[mbi.Clique, float], + mechanism_spec: pipeline_dp.budget_accounting.MechanismSpec, +) -> dict[mbi.Clique, float]: + """Adds DP noise to SWIFT selection errors as one vector query.""" + if not errors_dict: + return {} + + sorted_cliques = sorted(errors_dict) + errors = np.array([errors_dict[clique] for clique in sorted_cliques]) + sensitivities = pipeline_dp.dp_computations.Sensitivities( + l2=np.sqrt(len(sorted_cliques)) + ) + mechanism = pipeline_dp.dp_computations.create_additive_mechanism( + mechanism_spec, sensitivities + ) + noised_errors = mechanism.add_noise(errors) + return dict(zip(sorted_cliques, noised_errors)) + + def _add_noise_fn( clique_marginal: tuple[mbi.Clique, np.ndarray], selected_dict: dict[mbi.Clique, float], diff --git a/tests/pipeline_transformations/swift_test.py b/tests/pipeline_transformations/swift_test.py index bf9abb6..1de6cf1 100644 --- a/tests/pipeline_transformations/swift_test.py +++ b/tests/pipeline_transformations/swift_test.py @@ -14,6 +14,8 @@ """Tests for SWIFT pipeline transformations.""" +from unittest import mock + from absl.testing import absltest from dpsynth import data_generation from dpsynth.dataset_descriptors import dataset_descriptor @@ -35,6 +37,42 @@ def from_tuple(self, record, proto_object=None): class SwiftTest(absltest.TestCase): + def test_add_noise_to_errors(self): + mechanism_spec = pipeline_dp.budget_accounting.MechanismSpec( + mechanism_type=pipeline_dp.budget_accounting.MechanismType.GAUSSIAN, + name="test", + ) + + class FakeMechanism: + + def add_noise(self, values): + return values + np.array([10.0, 20.0]) + + with mock.patch.object( + pipeline_dp.dp_computations, + "create_additive_mechanism", + return_value=FakeMechanism(), + ) as mock_create: + noised = swift._add_noise_to_errors( + {(1,): 1.0, (0,): 2.0}, mechanism_spec + ) + + self.assertEqual(noised, {(0,): 12.0, (1,): 21.0}) + mock_create.assert_called_once() + + def test_fit_model_rejects_invalid_select_budget_fraction(self): + backend = pipeline_dp.LocalBackend() + budget_accountant = pipeline_dp.PLDBudgetAccountant(1.0, 1e-5) + + with self.assertRaisesRegex(ValueError, "must be between 0 and 1"): + swift.fit_model( + backend, + budget_accountant, + data=[], + descriptor=[], + parameters=swift.SwiftParameters(select_budget_frac=1.0), + ) + def test_fit_model(self): backend = pipeline_dp.LocalBackend() data = [(0, 1), (0, 1), (1, 0), (1, 1)]