Source code for tests.metrics.utility.test_application

# Standard library
import pytest
from typing import Type, Tuple
from inspect import getfullargspec
from copy import deepcopy

# 3rd party packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Local packages
from clover.metrics.base import Metric
from clover.metrics.utility import application as app


[docs]def test_fscore() -> None: """ Test the F-score function. :return: None """ # Simulate data size = 100 group_ab = np.concatenate( [np.random.randint(4, size=size), np.random.randint(6, 10, size=size)] ) group_aa = np.concatenate( [np.random.randint(4, size=size), np.random.randint(4, size=size)] ) labels = [0] * size + [1] * size labels_wrong = [1] * size + [1] * size df_ab = pd.DataFrame.from_dict({"feature": group_ab, "class": labels}) df_aa = pd.DataFrame.from_dict({"feature": group_aa, "class": labels}) df_aa_wrong = pd.DataFrame.from_dict({"feature": group_aa, "class": labels_wrong}) df_labels_only = pd.DataFrame.from_dict({"class": labels}) with pytest.raises(AssertionError): app.FScore.fscore(df=df_ab, predicted_var="") with pytest.raises(AssertionError): app.FScore.fscore(df=df_ab["feature"], predicted_var="class") with pytest.raises(AssertionError): app.FScore.fscore(df=df_aa_wrong, predicted_var="class") with pytest.raises(AssertionError): app.FScore.fscore(df=df_labels_only, predicted_var="class") assert all(app.FScore.fscore(df=df_ab, predicted_var="class") > 2) assert all(app.FScore.fscore(df=df_aa, predicted_var="class") < 0.05)
test_params = [ {"metric_class": metric, "which_data": data} for metric in app.get_metrics() for data in ["different_datasets", "identical_datasets"] ] test_ids = [f"{d['metric_class'].name}-{d['which_data']}" for d in test_params]
[docs]@pytest.fixture(scope="module", params=test_params, ids=test_ids) def application_metrics_results( request, df_wbcd: dict[str, pd.DataFrame], df_mock_wbcd: dict[str, pd.DataFrame], metadata_wbcd: dict, ) -> Tuple[Type[Metric], str, dict]: """ Compute the application metrics in different settings. :param request: the number of continuous and categorical columns to test :param df_wbcd: the real Wisconsin Breast Cancer Dataset fixture, split into **train** and **test** sets :param df_mock_wbcd: the mock wbcd dataset fixture, contains **train** and **test** sets :param metadata_wbcd: the wbcd metadata fixture :return: a tuple containing the metric class, the dataset type and a dictionary containing the **average** scores of the metric and the **detailed** scores """ metric_class = request.param["metric_class"] which_data = request.param["which_data"] # Change the dependent var for testing in both cases dependent_var = ( "Uniformity_of_Cell_Size" if "Regression" in metric_class.name else "Class" ) metadata = deepcopy(metadata_wbcd) metadata["variable_to_predict"] = dependent_var # Instance parameters d = { "random_state": 0, "num_repeat": 1, "num_kfolds": 2, "num_optuna_trials": 1, "use_gpu": False, } # Select only the expected instance parameters args = getfullargspec(metric_class).args[1:] # remove self metric = metric_class(*[d[arg] for arg in args]) df_to_compare = df_mock_wbcd if which_data == "different_datasets" else df_wbcd scores = metric.compute(df_wbcd, df_to_compare, metadata) return metric_class, which_data, scores
[docs]def test_application_metrics_summary( application_metrics_results: Tuple[Type[Metric], str, dict], ) -> None: """ Test the application metrics average scores. :param application_metrics_results: a tuple containing the metric class, the dataset type and a dictionary containing the **average** scores of the metric and the **detailed** scores :return: None """ metric, which_data, scores = application_metrics_results scores = scores["average"] for submetric in metric.get_average_submetrics(): # Check the boundaries assert scores[submetric["submetric"]] >= submetric["min"] assert scores[submetric["submetric"]] <= submetric["max"] # Check the target diff_to_objective = abs( scores[submetric["submetric"]] - submetric[submetric["objective"]] ) if which_data == "different_datasets": assert diff_to_objective > 0.01 else: assert diff_to_objective < 0.01
[docs]def test_application_metrics_detailed( application_metrics_results: Tuple[Type[Metric], str, dict], ) -> None: """ Test the application metrics detailed scores. :param application_metrics_results: a tuple containing the metric class, the dataset type and a dictionary containing the **average** scores of the metric and the **detailed** scores :return: None """ metric, which_data, scores = application_metrics_results report = scores["detailed"] metric.draw(report=report, figsize=(8, 6)) plt.close("all")