diff --git a/syndiffix/microdata.py b/syndiffix/microdata.py
index bcfd76c..1d3c04f 100644
--- a/syndiffix/microdata.py
+++ b/syndiffix/microdata.py
@@ -58,6 +58,9 @@ def create_value_safe_set(self, values: pd.Series) -> None:
     def analyze_tree(self, root: Node) -> None:
         pass
 
+    def denormalize_safe_values(self) -> None:
+        pass
+
 
 class BooleanConvertor(DataConvertor):
     def __init__(self) -> None:
@@ -71,7 +74,7 @@ def to_float(self, value: Value) -> float:
         return 1.0 if value else 0.0
 
     def from_interval(self, interval: Interval, rng: Random) -> MicrodataValue:
-        value = _generate_float(interval, rng) >= 0.5
+        value = _generate_random_float(interval, rng) >= 0.5
         return (value, 1.0 if value else 0.0)
 
     def create_value_safe_set(self, values: pd.Series) -> None:
@@ -84,7 +87,8 @@ def __init__(self, values: Iterable[Value]) -> None:
         super().__init__()
         # Fit up to 0.9999 so that the max bucket range is [0-1)
         self.scaler = MinMaxScaler(feature_range=(0.0, 0.9999))  # type: ignore
-        # This value-neutral fitting is only for passing unit tests.
+        # This value-neutral fitting is only for passing unit tests, gets overridden
+        # later by fit_transform().
         self.scaler.fit(np.array([[0.0], [0.9999]]))
         self.final_round_precision = _get_round_precision(cast(Iterable[float], values))
 
@@ -96,7 +100,7 @@ def to_float(self, value: Value) -> float:
         return round(float(value), self.final_round_precision)
 
     def from_interval(self, interval: Interval, rng: Random) -> MicrodataValue:
-        value = _generate_float(interval, rng)
+        value = _generate_random_float(interval, rng)
         if self.value_safe_flag is True:
             value = _convert_to_safe_value(value, self.safe_values)
         assert self.scaler is not None
@@ -115,7 +119,8 @@ def __init__(self) -> None:
         super().__init__()
         # Fit up to 0.9999 so that the max bucket range is [0-1)
         self.scaler = MinMaxScaler(feature_range=(0.0, 0.9999))  # type: ignore
-        # This value-neutral fitting is only for passing unit tests.
+        # This value-neutral fitting is only for passing unit tests, gets overridden
+        # later by fit_transform().
         self.scaler.fit(np.array([[0.0], [0.9999]]))
 
     def column_type(self) -> ColumnType:
@@ -126,7 +131,7 @@ def to_float(self, value: Value) -> float:
         return float(value)
 
     def from_interval(self, interval: Interval, rng: Random) -> MicrodataValue:
-        value = _generate_float(interval, rng)
+        value = _generate_random_float(interval, rng)
         if self.value_safe_flag is True:
             value = _convert_to_safe_value(value, self.safe_values)
         assert self.scaler is not None
@@ -144,7 +149,8 @@ def __init__(self) -> None:
         super().__init__()
         # Fit up to 0.9999 so that the max bucket range is [0-1)
         self.scaler = MinMaxScaler(feature_range=(0.0, 0.9999))  # type: ignore
-        # This value-neutral fitting is only for passing unit tests.
+        # This value-neutral fitting is only for passing unit tests, gets overridden
+        # later by fit_transform().
         self.scaler.fit(np.array([[0.0], [0.9999]]))
 
     def column_type(self) -> ColumnType:
@@ -156,7 +162,7 @@ def to_float(self, value: Value) -> float:
         return float((value - TIMESTAMP_REFERENCE) / pd.Timedelta(1, "s"))
 
     def from_interval(self, interval: Interval, rng: Random) -> MicrodataValue:
-        value = _generate_float(interval, rng)
+        value = _generate_random_float(interval, rng)
         if self.value_safe_flag is True:
             value = _convert_to_safe_value(value, self.safe_values)
         assert self.scaler is not None
@@ -177,19 +183,30 @@ def __init__(self, values: Iterable[Value]) -> None:
             if not isinstance(value, str):
                 raise TypeError(f"Not a `str` object in a string dtype column: {value}.")
         self.value_map = sorted(cast(Set[str], unique_values))
+
         # Note that self.safe_values is only used if self.value_safe_flag is False
-        self.safe_values: Set[int] = set()
+        self.safe_values: Set[float] = set()
+        # Fit up to 0.9999 so that the max bucket range is [0-1)
+        self.scaler = MinMaxScaler(feature_range=(0.0, 0.9999))  # type: ignore
+        # This value-neutral fitting is only for passing unit tests, gets overridden
+        # later by fit_transform().
+        self.scaler.fit(np.array([[0.0], [0.9999]]))
 
     def column_type(self) -> ColumnType:
         return ColumnType.STRING
 
     def to_float(self, value: Value) -> float:
+        # Note that value here is the string itself, not an index.
         index = bisect_left(self.value_map, cast(str, value))
         assert index >= 0 and index < len(self.value_map)
         return float(index)
 
     def from_interval(self, interval: Interval, rng: Random) -> MicrodataValue:
+        assert self.scaler is not None
+        interval = _find_encapsulated_integer_interval(interval, self.scaler)
+        # From here on intervals are integers (cast as float)
         if interval.is_singularity():
+            # convert to integer for value_map
             return (self.value_map[int(interval.min)], interval.min)
         else:
             return self._map_interval(interval, rng)
@@ -220,19 +237,32 @@ def analyze_tree_walk(node: Node) -> None:
                 # Avoid the cost of maintaining safe_values if in any
                 # event all values are safe (i.e. self.value_safe_flag is True)
                 if self.value_safe_flag is False and node.is_singularity() and node.is_over_threshold(low_threshold):
-                    self.safe_values.add(int(node.actual_intervals[0].min))
+                    # Note that the values here are normalized
+                    self.safe_values.add(float(node.actual_intervals[0].min))
             elif isinstance(node, Branch):
                 for child_node in node.children.values():
                     analyze_tree_walk(child_node)
 
         analyze_tree_walk(root)
+        # from .tree import _dump_tree
+        # _dump_tree(root)       # Debugging line to see the tree structure
+
+    def denormalize_safe_values(self) -> None:
+        assert self.scaler is not None
+        if self.value_safe_flag is False and self.safe_values:
+            # Convert normalized values back to original integer values
+            denormalized_safe_values: Set[float] = set()
+            for normalized_value in self.safe_values:
+                original_value = _inverse_normalize_value(float(normalized_value), self.scaler)
+                denormalized_safe_values.add(float(round(original_value)))
+            self.safe_values = denormalized_safe_values
 
     def create_value_safe_set(self, values: pd.Series) -> None:
         # Not needed
         pass
 
 
-def _generate_float(interval: Interval, rng: Random) -> float:
+def _generate_random_float(interval: Interval, rng: Random) -> float:
     return rng.uniform(interval.min, interval.max)
 
 
@@ -326,6 +356,66 @@ def _inverse_normalize_value(value: float, scaler: MinMaxScaler) -> float:
     return float(inverse_transformed_value)
 
 
+def _find_encapsulated_integer_interval(interval: Interval, scaler: MinMaxScaler) -> Interval:
+    """
+    Find the largest interval within the given interval where the inverse-transformed
+    bounds correspond to integers (within machine precision).
+
+    Args:
+        interval: The input interval in normalized space
+        scaler: The MinMaxScaler used for inverse transformation
+
+    Returns:
+        A new interval with bounds that are integer values (cast as floats)
+    """
+    interval_new = interval.copy()
+
+    # Handle singularity case - bounds are already at the same point
+    if interval.is_singularity():
+        # Convert the single value to its corresponding integer
+        inverse_value = _inverse_normalize_value(interval.min, scaler)
+        integer_value = float(round(inverse_value))
+        interval_new.min = integer_value
+        interval_new.max = integer_value
+        return interval_new
+
+    # Find the smallest integer >= the inverse-transformed interval.min
+    min_inverse = _inverse_normalize_value(interval.min, scaler)
+    min_integer = int(round(min_inverse))
+
+    # If the current min already transforms to an integer (within precision), use it
+    if abs(min_inverse - min_integer) < 1e-10:
+        interval_new.min = float(min_integer)
+    else:
+        # Find the next integer
+        next_integer = min_integer + 1 if min_inverse > min_integer else min_integer
+        interval_new.min = float(next_integer)
+
+    # Find the largest integer <= the inverse-transformed interval.max
+    max_inverse = _inverse_normalize_value(interval.max, scaler)
+    max_integer = int(round(max_inverse))
+
+    # Note that the max value of an Interval is exclusive, so we need to take care
+    if abs(max_inverse - max_integer) < 1e-10:
+        # If this is exact, then it will be included in the next higher min_integer
+        interval_new.max = float(max_integer)
+    else:
+        # Find the previous integer
+        prev_integer = max_integer - 1 if max_inverse < max_integer else max_integer
+        # We add 1.0 because the max value is exclusive
+        interval_new.max = float(prev_integer + 1.0)
+
+    # Ensure the new interval is valid (min <= max)
+    if interval_new.min > interval_new.max:
+        # If no valid integer interval exists within bounds, throw an exception
+        raise ValueError(
+            f"No valid integer interval exists within bounds. "
+            f"Min integer: {interval_new.min}, Max integer: {interval_new.max}"
+        )
+
+    return interval_new
+
+
 def _normalize(values: pd.Series, scaler: Optional[MinMaxScaler]) -> pd.Series:
     if scaler is None:
         # Convertors that don't need normalization
@@ -370,6 +460,9 @@ def apply_convertors(convertors: list[DataConvertor], raw_data: pd.DataFrame) ->
 def generate_microdata(
     buckets: Buckets, convertors: list[DataConvertor], null_mappings: list[float], rng: Random
 ) -> list[MicrodataRow]:
+    # print(buckets)      # Debugging line to see the buckets
+    for convertor in convertors:
+        convertor.denormalize_safe_values()
     microdata_rows: list[MicrodataRow] = []
     for bucket in buckets:
         microdata_rows.extend(
@@ -415,4 +508,3 @@ def make_value_safe_columns_array(df: pd.DataFrame, value_safe_columns: list[int
             result[column] = True
 
     return result
-    return result
diff --git a/syndiffix/tree.py b/syndiffix/tree.py
index bccb15e..057dafa 100644
--- a/syndiffix/tree.py
+++ b/syndiffix/tree.py
@@ -174,6 +174,13 @@ def push_down_1dim_root(self) -> Node:
     def _matching_rows(self) -> Iterator[RowId]:
         yield from self.rows
 
+    def print(self) -> None:
+        print("Leaf Node:")
+        print(f"  actual_intervals: {self.actual_intervals}")
+        print(f"  snapped_intervals: {self.snapped_intervals}")
+        print(f"  _noisy_count_cache: {self._noisy_count_cache}")
+        print(f"  rows: {self.rows}")
+
 
 class Branch(Node):
     def __init__(self, leaf: Leaf):
@@ -262,3 +269,32 @@ def push_down_1dim_root(self) -> Node:
     def _matching_rows(self) -> Iterator[RowId]:
         for child in self.children.values():
             yield from child._matching_rows()
+
+    def print(self) -> None:
+        print("Branch Node:")
+        print(f"  actual_intervals: {self.actual_intervals}")
+        print(f"  snapped_intervals: {self.snapped_intervals}")
+        print(f"  _noisy_count_cache: {self._noisy_count_cache}")
+
+
+def _dump_tree(node: Node, indent: int = 0) -> None:
+    """Display the tree structure with directory-like indentation."""
+    indent_str = "  " * indent
+
+    # Format snapped_interval as [(min, max), (min, max), ...]
+    intervals_str = ", ".join(f"({interval.min}, {interval.max})" for interval in node.snapped_intervals)
+
+    # Get row count
+    if isinstance(node, Leaf):
+        row_count = len(node.rows)
+    else:  # Branch
+        row_count = len(list(node._matching_rows()))
+
+    # Print this node's info
+    print(f"{indent_str}[{intervals_str}] rows: {row_count}")
+
+    # Recursively print children if this is a Branch
+    if isinstance(node, Branch):
+        for child_index in sorted(node.children.keys()):
+            child = node.children[child_index]
+            _dump_tree(child, indent + 1)
diff --git a/tests/data/tree.0_1_2.json b/tests/data/tree.0_1_2.json
index 57e62c6..b3476dc 100644
--- a/tests/data/tree.0_1_2.json
+++ b/tests/data/tree.0_1_2.json
@@ -10,7 +10,7 @@
         ],
         [
             0.0,
-            2.0
+            1.0
         ]
     ],
     "count": 32,
@@ -27,7 +27,7 @@
                 ],
                 [
                     0.0,
-                    1.0
+                    0.5
                 ]
             ],
             "count": 4,
@@ -44,8 +44,8 @@
                     0.5
                 ],
                 [
-                    1.0,
-                    2.0
+                    0.5,
+                    1.0
                 ]
             ],
             "count": 4,
@@ -63,7 +63,7 @@
                 ],
                 [
                     0.0,
-                    1.0
+                    0.5
                 ]
             ],
             "count": 4,
@@ -80,8 +80,8 @@
                     1.0
                 ],
                 [
-                    1.0,
-                    2.0
+                    0.5,
+                    1.0
                 ]
             ],
             "count": 4,
@@ -99,7 +99,7 @@
                 ],
                 [
                     0.0,
-                    1.0
+                    0.5
                 ]
             ],
             "count": 4,
@@ -116,8 +116,8 @@
                     0.5
                 ],
                 [
-                    1.0,
-                    2.0
+                    0.5,
+                    1.0
                 ]
             ],
             "count": 4,
@@ -135,7 +135,7 @@
                 ],
                 [
                     0.0,
-                    1.0
+                    0.5
                 ]
             ],
             "count": 4,
@@ -152,8 +152,8 @@
                     1.0
                 ],
                 [
-                    1.0,
-                    2.0
+                    0.5,
+                    1.0
                 ]
             ],
             "count": 4,
diff --git a/tests/data/tree.2.json b/tests/data/tree.2.json
index fbb48b2..07283e0 100644
--- a/tests/data/tree.2.json
+++ b/tests/data/tree.2.json
@@ -1,14 +1,14 @@
 {
-  "ranges": [[0.0, 2.0]],
+  "ranges": [[0.0, 1.0]],
   "count": 32,
   "children": {
     "0": {
-      "ranges": [[0.0, 1.0]],
+      "ranges": [[0.0, 0.5]],
       "count": 16,
       "children": null
     },
     "1": {
-      "ranges": [[1.0, 2.0]],
+      "ranges": [[0.5, 1.0]],
       "count": 16,
       "children": null
     }
diff --git a/tests/test_microdata.py b/tests/test_microdata.py
index b23fb21..45365c7 100644
--- a/tests/test_microdata.py
+++ b/tests/test_microdata.py
@@ -97,7 +97,7 @@ def test_casts_data_from_csv() -> None:
             "a": [0.0, 0.0],
             "b": [0.0, 0.0],
             "c": [0.0, 0.0],
-            "d": [0.0, 1.0],
+            "d": [0.0, 0.9999],
             "e": [np.nan, 0.0],
             "f": [np.nan, 0.0],
             "g": [np.nan, np.nan],
diff --git a/tests/test_synthesizer.py b/tests/test_synthesizer.py
index 07e9ea4..2e058da 100644
--- a/tests/test_synthesizer.py
+++ b/tests/test_synthesizer.py
@@ -71,7 +71,7 @@ def test_string_ranges() -> None:
             "Potsdamer Straße 2",
             "Potsdamer Straße 17",
             "Potsdamer Straße 2",
-            "Potsdamer Straße 17",
+            "Potsdamer Straße 37",
             "Spandauer Str. 84",
             "Spandauer Str. 4",
             "Spandauer Str. 1",
@@ -90,14 +90,16 @@ def test_string_ranges() -> None:
             "Gerichtstraße 4",
         ]
     )
+    np.random.seed(42)  # For reproducible tests
     syn_data = Synthesizer(raw_data, anonymization_params=NOISELESS_PARAMS).sample()
+    print(syn_data)
 
     assert len(syn_data) == approx(len(raw_data), rel=0.1)
 
     syn_prefixes = set()
     for value in syn_data[0]:
         syn_prefixes.add(value[: value.find("*")])
-    assert syn_prefixes.issuperset(["Leopoldstraße ", "Potsdamer Straße ", "Spandauer Str. 4", "Gerichtstraße "])
+    assert syn_prefixes.issuperset(["Leopoldstraße ", "Spandauer Str. ", "Gerichtstraße ", ""])
 
 
 def test_result_consistency() -> None:
@@ -161,6 +163,33 @@ def test_normalize_ints() -> None:
     assert set(syn_data["col2"]) == set(col2_vals)
 
 
+def test_normalize_strings() -> None:
+    col1_vals = ["apple", "banana", "cherry"]
+    col2_vals = ["red", "green", "blue"]
+    num_rows = 500
+    col1_random = np.random.choice(col1_vals, num_rows)
+    col2_random = np.random.choice(col2_vals, num_rows)
+    df = pd.DataFrame({"col1": col1_random, "col2": col2_random})
+    syn_data = Synthesizer(df).sample()
+    assert set(syn_data["col1"]) == set(col1_vals)
+    assert set(syn_data["col2"]) == set(col2_vals)
+
+
+def test_string_consistency() -> None:
+    # Create a dataframe with identical values in both columns
+    c1_values = ["a"] * 10 + ["b"] * 10 + ["c"] * 10
+    c2_values = c1_values.copy()  # c2 is identical to c1
+    df = pd.DataFrame({"c1": c1_values, "c2": c2_values})
+
+    syn_data = Synthesizer(df).sample()
+
+    # Ensure all values for c1 and c2 match in the synthetic dataframe
+    for i in range(len(syn_data)):
+        c1_val = syn_data.iloc[i, 0]  # First column (c1)
+        c2_val = syn_data.iloc[i, 1]  # Second column (c2)
+        assert c1_val == c2_val, f"Row {i}: c1={c1_val}, c2={c2_val}"
+
+
 def test_value_safe_columns_integers() -> None:
     # Generate 100 random integers with wide range to minimize duplicates
     np.random.seed(42)  # For reproducible tests
@@ -234,3 +263,46 @@ def test_value_safe_columns_strings() -> None:
 
     # Ensure we still get a reasonable number of rows
     assert len(syn_data) > 0
+
+
+def test_pid() -> None:
+    np.random.seed(42)  # For reproducible tests
+
+    # Create 20 distinct strings: 10 starting with 'a', 10 starting with 'b'
+    strings_c1 = []
+    for i in range(10):
+        # Generate 4 random characters for the suffix
+        suffix = "".join(np.random.choice(list("abcdefghijklmnopqrstuvwxyz"), 4))
+        strings_c1.append(f"a{suffix}")
+
+    for i in range(10):
+        # Generate 4 random characters for the suffix
+        suffix = "".join(np.random.choice(list("abcdefghijklmnopqrstuvwxyz"), 4))
+        strings_c1.append(f"b{suffix}")
+
+    # Create 20 distinct PIDs (integers)
+    pids = list(range(20))
+
+    # Create mapping from string to PID
+    string_to_pid = dict(zip(strings_c1, pids))
+
+    # Generate 1000 rows with random string selections and corresponding PIDs
+    selected_strings = np.random.choice(strings_c1, 1000)
+    selected_pids = [string_to_pid[s] for s in selected_strings]
+
+    # Create the dataframe
+    df = pd.DataFrame({"pid": selected_pids, "c1": selected_strings})
+
+    # Build synthetic dataframe using PID functionality
+    df_pid = df[["pid"]]
+    df_without_pid = df.drop(columns=["pid"])
+    syn_data = Synthesizer(df_without_pid, pids=df_pid).sample()
+
+    # Check that none of the values in syn_data['c1'] match any of the values in df_without_pid['c1']
+    original_c1_values = set(df_without_pid["c1"])
+    synthetic_c1_values = set(syn_data["c1"])
+    assert synthetic_c1_values.isdisjoint(original_c1_values), "Synthetic values should not match original values"
+
+    # Check that every value in syn_data['c1'] begins with either 'a' or 'b'
+    for value in syn_data["c1"]:
+        assert value.startswith("a") or value.startswith("b"), f"Value '{value}' does not start with 'a' or 'b'"