Source code for tests.metrics.conftest

import pytest  # 3rd party packages
import pandas as pd
import numpy as np


[docs]@pytest.fixture(scope="package") def df_mock_wbcd(df_wbcd: dict[str, pd.DataFrame]) -> dict[str, pd.DataFrame]: """ Generate the continuous mock Wisconsin Breast Cancer Dataset wbcd without ids. :param df_wbcd: the wbcd dataset fixture, split into **train** and **test** sets :return: the dataframe containing the mock wbcd dataset, split into **train**, **test** and **2nd_gen** sets """ # Shuffle each column with replacement df = {} for set in ["train", "test"]: df[set] = df_wbcd[set].apply( lambda x: np.random.choice(x.unique(), size=len(x), replace=True) ) df["2nd_gen"] = df["train"].apply( lambda x: np.random.choice(x.unique(), size=len(x), replace=True) ) for set in ["train", "test", "2nd_gen"]: # Ensure the support coverage is different df[set] = df[set].replace( { "Clump_Thickness": 3, "Uniformity_of_Cell_Shape": 1, }, 8, ) # Ensure the consistency is different df[set] = df[set].replace({"Bland_Chromatin": 2}, 11) df[set] = df[set].replace({"Normal_Nucleoli": "2"}, "11") df[set] = df[set].replace({"Uniformity_of_Cell_Shape": 2}, 11) return df