Source code for tests.metrics.test_metareport

# Standard library
import pytest
import tempfile

# 3rd party packages
import pandas as pd
import numpy as np

# Local packages
from clover.metrics.metareport import Metareport


[docs]@pytest.fixture(scope="module") def metareport( df_wbcd: dict[str, pd.DataFrame], df_mock_wbcd: dict[str, pd.DataFrame], ) -> Metareport: """ Compute the metareport in different settings. :param df_wbcd: the real Wisconsin Breast Cancer Dataset fixture, split into **train** and **test** sets :param df_mock_wbcd: the mock wbcd dataset fixture, split into **train**, **test** and **2nd_gen** sets :return: an instance of the metareport """ metadata = { "continuous": ["Clump_Thickness", "Bland_Chromatin"], "categorical": ["Class", "Normal_Nucleoli"], "variable_to_predict": "Class", } df_wbcd_mix = {} df_mock_1 = {} df_mock_2 = {} sublist = metadata["continuous"] + metadata["categorical"] for set_ in ["train", "test"]: df_wbcd_mix[set_] = df_wbcd[set_].copy()[sublist] for set_ in ["train", "test", "2nd_gen"]: df_mock_1[set_] = df_mock_wbcd[set_].copy()[sublist] df_mock_2[set_] = ( df_mock_1[set_] .copy() .apply(lambda x: np.random.choice(x.unique(), size=len(x), replace=True)) ) parameters = { "num_repeat": 1, "num_kfolds": 2, "num_optuna_trials": 1, "sampling_frac": 1.0, "use_gpu": False, } report = Metareport( dataset_name="Wisconsin Breast Cancer Dataset", df_real=df_wbcd_mix, synthetic_datasets={"df_mock_1": df_mock_1, "df_mock_2": df_mock_2}, metadata=metadata, random_state=0, metrics=["Categorical Consistency", "DCR", "LOGAN"], params=parameters, ) report.compute() return report
[docs]def test_summary_report(metareport: Metareport) -> None: """ Test the summary metareport. :param metareport: the computed metareport fixture :return: *None* """ df_summary = metareport.summary() assert ( df_summary.shape[0] == 15 # the number of metrics computed and df_summary.shape[1] == 2 # the number of datasets to compare )
[docs]def test_save_load_report(metareport: Metareport) -> None: """ Test the save/load operations for the metareport. :param metareport: the computed metareport fixture :return: *None* """ df_summary = metareport.summary() with tempfile.TemporaryDirectory() as temp_dir: metareport.save(savepath=temp_dir) # save new_report = Metareport( metareport_folderpath={"df_mock_1": temp_dir, "df_mock_2": temp_dir} ) # load assert df_summary.equals( new_report.summary() ) # check the content of the new report