NGO-Algorithm-Audit · krstopro · May 15, 2025 · May 15, 2025 · May 15, 2025 · May 15, 2025
diff --git a/HBAC_scan/helper_functions.py b/HBAC_scan/helper_functions.py
diff --git a/classifiers/Loan_approval_classifier/german_dataset.py b/classifiers/Loan_approval_classifier/german_dataset.py
@@ -0,0 +1,162 @@
+import os
+import pandas as pd
+from aif360.datasets import StandardDataset
+
+
+default_mappings = {
+    "label_maps": [{0: "Good Credit", 1: "Bad Credit"}],
+    "protected_attribute_maps": [
+        {1.0: "Male", 0.0: "Female"},
+        {1.0: "Old", 0.0: "Young"},
+    ],
+}
+
+
+def default_preprocessing(df):
+    """Adds a derived sex attribute based on personal_status."""
+    # TODO: ignores the value of privileged_classes for 'sex'
+    status_map = {
+        "A91": "male",
+        "A93": "male",
+        "A94": "male",
+        "A92": "female",
+        "A95": "female",
+    }
+    df["sex"] = df["personal_status"].replace(status_map)
+
+    return df
+
+
+class GermanDataset(StandardDataset):
+    """German credit Dataset.
+
+    See :file:`aif360/data/raw/german/README.md`.
+    """
+
+    def __init__(
+        self,
+        label_name="credit",
+        favorable_classes=[0],
+        protected_attribute_names=[],
+        privileged_classes=[],
+        instance_weights_name=None,
+        categorical_features=[
+            "status",
+            "credit_history",
+            "purpose",
+            "savings",
+            "employment",
+            "other_debtors",
+            "property",
+            "installment_plans",
+            "housing",
+            "skill_level",
+            "telephone",
+            "foreign_worker",
+        ],
+        features_to_keep=[],
+        features_to_drop=["personal_status"],
+        na_values=[],
+        custom_preprocessing=default_preprocessing,
+        metadata=default_mappings,
+    ):
+        """See :obj:`StandardDataset` for a description of the arguments.
+
+        By default, this code converts the 'age' attribute to a binary value
+        where privileged is `age > 25` and unprivileged is `age <= 25` as
+        proposed by Kamiran and Calders [1]_.
+
+        References:
+            .. [1] F. Kamiran and T. Calders, "Classifying without
+               discriminating," 2nd International Conference on Computer,
+               Control and Communication, 2009.
+
+        Examples:
+            In some cases, it may be useful to keep track of a mapping from
+            `float -> str` for protected attributes and/or labels. If our use
+            case differs from the default, we can modify the mapping stored in
+            `metadata`:
+
+            >>> label_map = {1.0: 'Good Credit', 0.0: 'Bad Credit'}
+            >>> protected_attribute_maps = [{1.0: 'Male', 0.0: 'Female'}]
+            >>> gd = GermanDataset(protected_attribute_names=['sex'],
+            ... privileged_classes=[['male']], metadata={'label_map': label_map,
+            ... 'protected_attribute_maps': protected_attribute_maps})
+
+            Now this information will stay attached to the dataset and can be
+            used for more descriptive visualizations.
+        """
+
+        # change path
+        filepath = "../../data/GermanCredit_dataset/german.data"
+
+        # as given by german.doc
+        column_names = [
+            "status",
+            "month",
+            "credit_history",
+            "purpose",
+            "credit_amount",
+            "savings",
+            "employment",
+            "investment_as_income_percentage",
+            "personal_status",
+            "other_debtors",
+            "residence_since",
+            "property",
+            "age",
+            "installment_plans",
+            "housing",
+            "number_of_credits",
+            "skill_level",
+            "people_liable_for",
+            "telephone",
+            "foreign_worker",
+            "credit",
+        ]
+        try:
+            df = pd.read_csv(
+                filepath, sep=" ", header=None, names=column_names, na_values=na_values
+            )
+        except IOError as err:
+            print("IOError: {}".format(err))
+            print("To use this class, please download the following files:")
+            print(
+                "\n\thttps://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data"
+            )
+            print(
+                "\thttps://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.doc"
+            )
+            print("\nand place them, as-is, in the folder:")
+            print(
+                "\n\t{}\n".format(
+                    os.path.abspath(
+                        os.path.join(
+                            os.path.abspath(__file__),
+                            "..",
+                            "..",
+                            "data",
+                            "raw",
+                            "german",
+                        )
+                    )
+                )
+            )
+            import sys
+
+            sys.exit(1)
+
+        super(GermanDataset, self).__init__(
+            df=df,
+            label_name=label_name,
+            favorable_classes=favorable_classes,
+            protected_attribute_names=protected_attribute_names,
+            privileged_classes=privileged_classes,
+            instance_weights_name=instance_weights_name,
+            categorical_features=categorical_features,
+            features_to_keep=features_to_keep,
+            features_to_drop=features_to_drop,
+            na_values=na_values,
+            custom_preprocessing=custom_preprocessing,
+            metadata=metadata,
+        )
diff --git a/classifiers/Loan_approval_classifier/helper_functions.py b/classifiers/Loan_approval_classifier/helper_functions.py
@@ -0,0 +1,149 @@
+import os
+import pandas as pd
+from aif360.datasets import StandardDataset
+
+
+def default_preprocessing(df):
+    # default: 1, no default: 0
+    df["credit"] = df["credit"].replace({1.0: 0, 2.0: 1})
+
+    # sex
+    # male: 0, female: 1
+    status_map = {"A91": 0, "A93": 0, "A94": 0, "A92": 1, "A95": 1}
+    df["sex"] = df["personal_status"].replace(status_map)
+
+    return df
+
+
+class GermanDataset(StandardDataset):
+    """German credit Dataset.
+    See :file:`aif360/data/raw/german/README.md`.
+    """
+
+    def __init__(
+        self,
+        label_name="credit",
+        favorable_classes=[1],
+        protected_attribute_names=["sex", "age"],
+        privileged_classes=[],
+        instance_weights_name=None,
+        categorical_features=[
+            "status",
+            "credit_history",
+            "purpose",
+            "savings",
+            "employment",
+            "other_debtors",
+            "property",
+            "installment_plans",
+            "housing",
+            "skill_level",
+            "telephone",
+            "foreign_worker",
+        ],
+        features_to_keep=[],
+        features_to_drop=["personal_status"],
+        na_values=[],
+        custom_preprocessing=default_preprocessing,
+        metadata=None,
+    ):
+        """See :obj:`StandardDataset` for a description of the arguments.
+        By default, this code converts the 'age' attribute to a binary value
+        where privileged is `age > 25` and unprivileged is `age <= 25` as
+        proposed by Kamiran and Calders [1]_.
+        References:
+            .. [1] F. Kamiran and T. Calders, "Classifying without
+               discriminating," 2nd International Conference on Computer,
+               Control and Communication, 2009.
+        Examples:
+            In some cases, it may be useful to keep track of a mapping from
+            `float -> str` for protected attributes and/or labels. If our use
+            case differs from the default, we can modify the mapping stored in
+            `metadata`:
+            >>> label_map = {1.0: 'Good Credit', 0.0: 'Bad Credit'}
+            >>> protected_attribute_maps = [{1.0: 'Male', 0.0: 'Female'}]
+            >>> gd = GermanDataset(protected_attribute_names=['sex'],
+            ... privileged_classes=[['male']], metadata={'label_map': label_map,
+            ... 'protected_attribute_maps': protected_attribute_maps})
+            Now this information will stay attached to the dataset and can be
+            used for more descriptive visualizations.
+        """
+
+        filepath = os.path.join(
+            os.path.dirname(os.path.abspath(__file__)),
+            "..",
+            "..",
+            "data",
+            "GermanCredit_dataset",
+            "german.data",
+        )
+        # as given by german.doc
+        column_names = [
+            "status",
+            "month",
+            "credit_history",
+            "purpose",
+            "credit_amount",
+            "savings",
+            "employment",
+            "investment_as_income_percentage",
+            "personal_status",
+            "other_debtors",
+            "residence_since",
+            "property",
+            "age",
+            "installment_plans",
+            "housing",
+            "number_of_credits",
+            "skill_level",
+            "people_liable_for",
+            "telephone",
+            "foreign_worker",
+            "credit",
+        ]
+        try:
+            df = pd.read_csv(
+                filepath, sep=" ", header=None, names=column_names, na_values=na_values
+            )
+        except IOError as err:
+            print("IOError: {}".format(err))
+            print("To use this class, please download the following files:")
+            print(
+                "\n\thttps://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data"
+            )
+            print(
+                "\thttps://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.doc"
+            )
+            print("\nand place them, as-is, in the folder:")
+            print(
+                "\n\t{}\n".format(
+                    os.path.abspath(
+                        os.path.join(
+                            os.path.abspath(__file__),
+                            "..",
+                            "..",
+                            "data",
+                            "raw",
+                            "german",
+                        )
+                    )
+                )
+            )
+            import sys
+
+            sys.exit(1)
+
+        super(GermanDataset, self).__init__(
+            df=df,
+            label_name=label_name,
+            favorable_classes=favorable_classes,
+            protected_attribute_names=protected_attribute_names,
+            privileged_classes=privileged_classes,
+            instance_weights_name=instance_weights_name,
+            categorical_features=categorical_features,
+            features_to_keep=features_to_keep,
+            features_to_drop=features_to_drop,
+            na_values=na_values,
+            custom_preprocessing=custom_preprocessing,
+            metadata=metadata,
+        )