Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 45 additions & 11 deletions dpsynth/pipeline_transformations/swift.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ def fit_model(
additional_output: Any | None = None,
) -> types.Collection[mbi.MarkovRandomField]:
"""Fits the model."""
if not 0 < parameters.select_budget_frac < 1:
raise ValueError(
f'{parameters.select_budget_frac=} must be between 0 and 1.'
)

# 1. Generate workload.
domain = backend.map(descriptor, lambda x: x.compressed_domain, 'Get domain')
Expand Down Expand Up @@ -84,28 +88,33 @@ def compile_workload_fn(dom):
# errors: singleton collection of dict[mbi.Clique, float]

# 4. Select queries.
mechanism_spec = budget_accountant.request_budget(
select_mechanism_spec = budget_accountant.request_budget(
pipeline_dp.budget_accounting.MechanismType.GAUSSIAN,
name='Swift Select Queries',
name='Swift Error Scores',
weight=parameters.select_budget_frac,
)
measure_mechanism_spec = budget_accountant.request_budget(
pipeline_dp.budget_accounting.MechanismType.GAUSSIAN,
name='Swift Measure Marginals',
weight=1 - parameters.select_budget_frac,
)

def select_queries_fn(
errors_dict, candidates_dict, domain_obj
) -> tuple[dict[mbi.Clique, float], nx.Graph]:
) -> tuple[dict[mbi.Clique, float], nx.Graph, dict[mbi.Clique, float]]:
"""Selects queries using SWIFT algorithm."""
# `mechanism_spec` corresponds to the Gaussian mechanism that should be used
# to specify the total (epsilon, delta)-budget for the whole pipeline.
# Convert it to GDP budget.
gdp_budget = 1.0 / mechanism_spec.noise_standard_deviation**2
return swift.select_queries(
errors_dict,
noised_errors = _add_noise_to_errors(errors_dict, select_mechanism_spec)
# `measure_mechanism_spec` corresponds to the Gaussian mechanism budget
# available for the final selected marginal measurements.
gdp_budget = 1.0 / measure_mechanism_spec.noise_standard_deviation**2
selected, jtree = swift.select_queries(
noised_errors,
candidates_dict,
domain_obj,
parameters.max_clique_size,
gdp_budget,
)
# return selected, jtree
return selected, jtree, noised_errors

selected_and_tree = backend.map_with_side_inputs(
errors,
Expand All @@ -121,6 +130,9 @@ def select_queries_fn(
selected_and_tree, lambda x: list(x[0].keys()), 'Get selected queries'
)
jtree = backend.map(selected_and_tree, lambda x: x[1], 'Get junction tree')
noised_errors = backend.map(
selected_and_tree, lambda x: x[2], 'Get noised Swift errors'
)

# 6. Measure selected marginals (add noise).
def filter_selected_marginals(exact_marginal, selected):
Expand Down Expand Up @@ -151,7 +163,9 @@ def filter_selected_marginals(exact_marginal, selected):
and additional_output.diagnostic_info is not None
):
errors_singleton = backend.map(
errors, lambda d: [(k, v) for k, v in d.items()], 'Errors to List'
noised_errors,
lambda d: [(k, v) for k, v in d.items()],
'Noised Errors to List',
)
additional_output.diagnostic_info = diagnostic_info.update_diagnostic_info(
backend,
Expand Down Expand Up @@ -181,6 +195,26 @@ def fit_model_fn(measurements_list, jtree_obj, domain_obj):
)


def _add_noise_to_errors(
errors_dict: dict[mbi.Clique, float],
mechanism_spec: pipeline_dp.budget_accounting.MechanismSpec,
) -> dict[mbi.Clique, float]:
"""Adds DP noise to SWIFT selection errors as one vector query."""
if not errors_dict:
return {}

sorted_cliques = sorted(errors_dict)
errors = np.array([errors_dict[clique] for clique in sorted_cliques])
sensitivities = pipeline_dp.dp_computations.Sensitivities(
l2=np.sqrt(len(sorted_cliques))
)
mechanism = pipeline_dp.dp_computations.create_additive_mechanism(
mechanism_spec, sensitivities
)
noised_errors = mechanism.add_noise(errors)
return dict(zip(sorted_cliques, noised_errors))


def _add_noise_fn(
clique_marginal: tuple[mbi.Clique, np.ndarray],
selected_dict: dict[mbi.Clique, float],
Expand Down
38 changes: 38 additions & 0 deletions tests/pipeline_transformations/swift_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

"""Tests for SWIFT pipeline transformations."""

from unittest import mock

from absl.testing import absltest
from dpsynth import data_generation
from dpsynth.dataset_descriptors import dataset_descriptor
Expand All @@ -35,6 +37,42 @@ def from_tuple(self, record, proto_object=None):

class SwiftTest(absltest.TestCase):

def test_add_noise_to_errors(self):
mechanism_spec = pipeline_dp.budget_accounting.MechanismSpec(
mechanism_type=pipeline_dp.budget_accounting.MechanismType.GAUSSIAN,
name="test",
)

class FakeMechanism:

def add_noise(self, values):
return values + np.array([10.0, 20.0])

with mock.patch.object(
pipeline_dp.dp_computations,
"create_additive_mechanism",
return_value=FakeMechanism(),
) as mock_create:
noised = swift._add_noise_to_errors(
{(1,): 1.0, (0,): 2.0}, mechanism_spec
)

self.assertEqual(noised, {(0,): 12.0, (1,): 21.0})
mock_create.assert_called_once()

def test_fit_model_rejects_invalid_select_budget_fraction(self):
backend = pipeline_dp.LocalBackend()
budget_accountant = pipeline_dp.PLDBudgetAccountant(1.0, 1e-5)

with self.assertRaisesRegex(ValueError, "must be between 0 and 1"):
swift.fit_model(
backend,
budget_accountant,
data=[],
descriptor=[],
parameters=swift.SwiftParameters(select_budget_frac=1.0),
)

def test_fit_model(self):
backend = pipeline_dp.LocalBackend()
data = [(0, 1), (0, 1), (1, 0), (1, 1)]
Expand Down