Source code for tests.conftest
import pytest # 3rd party packages
import pandas as pd
import config # local packages
[docs]@pytest.fixture(scope="package")
def df_wbcd() -> dict[str, pd.DataFrame]:
"""
Load the continuous Wisconsin Breast Cancer Dataset wbcd and delete ids.
:return: the dataframe containing the wbcd dataset, split into **train** and **test** sets
"""
data = pd.read_csv(config.WBCD_DATASET_FILEPATH)
data = data.drop(columns="Sample_code_number") # identifier not needed
data["Class"] = (data["Class"] / 2 - 1).astype(
"int"
) # Class 0 or 1 instead of 2 and 4
data["Normal_Nucleoli"] = data["Normal_Nucleoli"].astype(
str
) # Categorical variable
# Split train / test
df = {}
df["train"] = data.sample(frac=0.8, replace=False, random_state=66)
df["test"] = data.drop(index=df["train"].index).reset_index(drop=True)
df["train"] = df["train"].reset_index(drop=True)
return df
[docs]@pytest.fixture(scope="package")
def metadata_wbcd() -> dict:
"""
Return the metadata associating with the Wisconsin Breast Cancer Dataset wbcd dataset.
:return: a dict containing the metadata with the following keys:
**continuous**, **categorical** and **variable_to_predict**
"""
metadata = {
"continuous": [
"Clump_Thickness",
"Uniformity_of_Cell_Size",
"Uniformity_of_Cell_Shape",
"Marginal_Adhesion",
"Single_Epithelial_Cell_Size",
"Bland_Chromatin",
"Mitoses",
"Bare_Nuclei",
],
"categorical": ["Class", "Normal_Nucleoli"],
"variable_to_predict": "Class",
}
return metadata
[docs]@pytest.fixture(scope="package")
def preprocess_metadata_wbcd() -> dict:
"""
Return the preprocessing metadata associating with the Wisconsin Breast Cancer Dataset wbcd dataset.
:return: a dict containing the preprocessing metadata for training differentially private generator
"""
preprocess_metadata = {
"Clump_Thickness": {"min": 1, "max": 10},
"Uniformity_of_Cell_Size": {"min": 1, "max": 10},
"Uniformity_of_Cell_Shape": {"min": 1, "max": 10},
"Marginal_Adhesion": {"min": 1, "max": 10},
"Single_Epithelial_Cell_Size": {"min": 1, "max": 10},
"Bland_Chromatin": {"min": 1, "max": 10},
"Mitoses": {"min": 1, "max": 10},
"Bare_Nuclei": {"min": 1, "max": 10},
"Class": {"categories": [0, 1]},
"Normal_Nucleoli": {
"categories": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
},
}
return preprocess_metadata