How to generate a metareport?#
Create a metareport comparing synthetic datasets with respect to a list of metrics. /! Only for the summary.#
Assume that the synthetic datasets to compare are already generated
Wisconsin Breast Cancer Dataset (WBCD)
[ ]:
# Standard library
import sys
import tempfile
from pathlib import Path
sys.path.append("..")
# 3rd party packages
import pandas as pd
# Local packages
from clover.metrics.metareport import Metareport
Load the real and synthetic Wisconsin Breast Cancer Datasets#
[2]:
df_real = {}
df_real["train"] = pd.read_csv("../data/WBCD_train.csv")
df_real["test"] = pd.read_csv("../data/WBCD_test.csv")
df_real["train"].shape
[2]:
(455, 10)
Choose the synthetic dataset#
[3]:
# generated by Synthpop here
df_synth = {
"train": pd.read_csv(
"../results/attack/data/1st_generation/2025-06-18_Synthpop_455samples.csv"
),
"test": pd.read_csv(
"../results/attack/data/1st_generation/2025-06-18_Synthpop_228samples.csv"
),
"2nd_gen": pd.read_csv(
"../results/attack/data/2nd_generation/2025-06-18_Synthpop_455samples.csv"
),
}
# random synthetic dataset to compare to the one generated by Synthpop
df_mock = {
"train": df_synth["train"].apply(
lambda x: x.sample(frac=1, replace=True).to_numpy()
),
"test": df_synth["test"].apply(lambda x: x.sample(frac=1, replace=True).to_numpy()),
"2nd_gen": df_synth["2nd_gen"].apply(
lambda x: x.sample(frac=1, replace=True).to_numpy()
),
}
synth_datasets = {"synthpop": df_synth, "random": df_mock}
Configure the metadata dictionary#
The continuous and categorical variables need to be specified, as well as the variable to predict#
[4]:
metadata = {
"continuous": [
"Clump_Thickness",
"Uniformity_of_Cell_Size",
"Uniformity_of_Cell_Shape",
"Marginal_Adhesion",
"Single_Epithelial_Cell_Size",
"Bland_Chromatin",
"Normal_Nucleoli",
"Mitoses",
"Bare_Nuclei",
],
"categorical": ["Class"],
"variable_to_predict": "Class",
}
Generate the metareport#
[5]:
parameters = { # see the notebooks utility_report and privacy_report for more details
"cross_learning": False,
"num_repeat": 1,
"num_kfolds": 2,
"num_optuna_trials": 15,
"use_gpu": True,
"sampling_frac": 0.5,
}
[6]:
metareport = Metareport(
dataset_name="Wisconsin Breast Cancer Dataset",
df_real=df_real,
synthetic_datasets=synth_datasets,
metadata=metadata,
figsize=(8, 6), # will be automatically adjusted for larger or longer figures
random_state=42, # for reproducibility purposes
metareport_folderpath=None, # a dictionary containing the path of each already computed report to load and compare
metrics=None, # list of the metrics to compute. Can be utility or privacy metrics. If not specified, all the metrics are computed.
params=parameters, # the dictionary containing the parameters for both utility and privacy reports
)
[7]:
metareport.compute()
/data8/install/anaconda3/envs/synthetic_data_p3.10/lib/python3.10/site-packages/xgboost/core.py:158: UserWarning: [21:31:25] WARNING: /workspace/src/common/error_msg.cc:58: Falling back to prediction using DMatrix due to mismatched devices. This might lead to higher memory usage and slower performance. XGBoost is running on: cuda:0, while the input data is on: cpu.
Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.
This warning will only be shown once.
warnings.warn(smsg, UserWarning)
GAN-Leaks test set shape: (228, 10)
Monte Carlo Membership test set shape: (228, 10)
Detector test set shape: (228, 10)
LOGAN test set shape: (228, 10)
TableGan test set shape: (228, 10)
GAN-Leaks test set shape: (228, 10)
/data8/install/anaconda3/envs/synthetic_data_p3.10/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1531: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Monte Carlo Membership test set shape: (228, 10)
Detector test set shape: (228, 10)
LOGAN test set shape: (228, 10)
TableGan test set shape: (228, 10)
Get the summary report as a pandas dataframe#
[8]:
df_summary = metareport.summary()
[9]:
df_summary
[9]:
| compared | random | synthpop |
|---|---|---|
| metric | ||
| cat_consis-within_ratio | 1.000000 | 1.000000 |
| cat_stats-frequency_coverage | 0.975824 | 0.956044 |
| cat_stats-support_coverage | 1.000000 | 1.000000 |
| classif-diff_real_synth | 0.065878 | 0.004307 |
| collision-avg_num_appearance_collision_real | 6.428571 | 3.403509 |
| collision-avg_num_appearance_collision_synth | 1.285714 | 3.263158 |
| collision-avg_num_appearance_realcontrol | 1.349112 | 1.349112 |
| collision-avg_num_appearance_realtrain | 1.463023 | 1.463023 |
| collision-avg_num_appearance_synth | 1.006637 | 1.413043 |
| collision-f1_score | 0.000000 | 0.300792 |
| collision-precision | 0.000000 | 0.177019 |
| collision-recall | 0.000000 | 1.000000 |
| collision-recovery_rate | 0.000000 | 0.183280 |
| cont_consis-within_ratio | 1.000000 | 1.000000 |
| cont_stats-iqr_l1_distance | 0.092593 | 0.080247 |
| cont_stats-iqr_l1_distance_train_test_ref | 0.098765 | 0.098765 |
| cont_stats-median_l1_distance | 0.012346 | 0.012346 |
| cont_stats-median_l1_distance_train_test_ref | 0.000000 | 0.000000 |
| dcr-dcr_5th_percent_synthreal_control | 0.033333 | 0.000000 |
| dcr-dcr_5th_percent_synthreal_train | 0.033333 | 0.000000 |
| dcr-dcr_5th_percent_train_test_ref | 0.000000 | 0.000000 |
| dcr-nndr_5th_percent_synthreal_control | 0.723947 | 0.000000 |
| dcr-nndr_5th_percent_synthreal_train | 0.554167 | 0.000000 |
| dcr-nndr_5th_percent_train_test_ref | 0.000000 | 0.000000 |
| dcr-ratio_match_synthreal_control | 0.000000 | 0.333333 |
| dcr-ratio_match_synthreal_train | 0.000000 | 0.315789 |
| dcr-ratio_match_train_test_ref | 0.333333 | 0.333333 |
| detector-precision | 0.575000 | 0.525773 |
| detector-precision_top1% | 1.000000 | 0.500000 |
| detector-precision_top50% | 0.508772 | 0.517544 |
| detector-tpr_at_0.001%_fpr | 0.017544 | 0.008772 |
| detector-tpr_at_0.1%_fpr | 0.017544 | 0.008772 |
| dist-prediction_auc_rescaled | 0.950081 | 0.000000 |
| dist-prediction_mse | 0.588812 | 0.000078 |
| dist-propensity_mse | 0.607270 | 0.000158 |
| feature_imp-diff_permutation_importance | 0.026678 | 0.003163 |
| fscore-diff_f_score | 1.245316 | 0.359862 |
| ganleaks-precision_top1% | 0.500000 | 0.500000 |
| ganleaks-precision_top50% | 0.807018 | 0.894737 |
| hell_cat_univ_dist-hellinger_distance | 0.017793 | 0.032174 |
| hell_cat_univ_dist-hellinger_distance_train_test_ref | 0.001057 | 0.001057 |
| hell_cont_univ_dist-hellinger_distance | 0.053408 | 0.041094 |
| hell_cont_univ_dist-hellinger_distance_train_test_ref | 0.057075 | 0.057075 |
| kl_div_cat_univ_dist-kl_divergence | 0.001273 | 0.004175 |
| kl_div_cat_univ_dist-kl_divergence_train_test_ref | 0.000004 | 0.000004 |
| kl_div_cont_univ_dist-kl_divergence | 0.012086 | 0.007177 |
| kl_div_cont_univ_dist-kl_divergence_train_test_ref | 0.013798 | 0.013798 |
| logan-precision | 0.475309 | 0.527778 |
| logan-precision_top1% | 0.500000 | 1.000000 |
| logan-precision_top50% | 0.473684 | 0.543860 |
| logan-tpr_at_0.001%_fpr | 0.008772 | 0.070175 |
| logan-tpr_at_0.1%_fpr | 0.008772 | 0.070175 |
| mcmebership-precision_top1% | 0.500000 | 0.500000 |
| mcmebership-precision_top50% | 0.526316 | 0.526316 |
| pcd-norm | 5.161654 | 0.285290 |
| tablegan-precision | 0.585366 | 0.485981 |
| tablegan-precision_top1% | 1.000000 | 0.500000 |
| tablegan-precision_top50% | 0.561404 | 0.508772 |
| tablegan-tpr_at_0.001%_fpr | 0.035088 | 0.000000 |
| tablegan-tpr_at_0.1%_fpr | 0.035088 | 0.000000 |
Style the result#
The best value (minimum or maximal according to the submetric objective) is colored in green. The worst in yellow.
[10]:
s = df_summary.style.pipe(Metareport.make_pretty, metrics=list(df_summary.index))
s
[10]:
| compared | random | synthpop |
|---|---|---|
| metric | ||
| cat_consis-within_ratio | 1.00 | 1.00 |
| cat_stats-frequency_coverage | 0.98 | 0.96 |
| cat_stats-support_coverage | 1.00 | 1.00 |
| classif-diff_real_synth | 0.07 | 0.00 |
| collision-avg_num_appearance_collision_real | 6.43 | 3.40 |
| collision-avg_num_appearance_collision_synth | 1.29 | 3.26 |
| collision-avg_num_appearance_realcontrol | 1.35 | 1.35 |
| collision-avg_num_appearance_realtrain | 1.46 | 1.46 |
| collision-avg_num_appearance_synth | 1.01 | 1.41 |
| collision-f1_score | 0.00 | 0.30 |
| collision-precision | 0.00 | 0.18 |
| collision-recall | 0.00 | 1.00 |
| collision-recovery_rate | 0.00 | 0.18 |
| cont_consis-within_ratio | 1.00 | 1.00 |
| cont_stats-iqr_l1_distance | 0.09 | 0.08 |
| cont_stats-iqr_l1_distance_train_test_ref | 0.10 | 0.10 |
| cont_stats-median_l1_distance | 0.01 | 0.01 |
| cont_stats-median_l1_distance_train_test_ref | 0.00 | 0.00 |
| dcr-dcr_5th_percent_synthreal_control | 0.03 | 0.00 |
| dcr-dcr_5th_percent_synthreal_train | 0.03 | 0.00 |
| dcr-dcr_5th_percent_train_test_ref | 0.00 | 0.00 |
| dcr-nndr_5th_percent_synthreal_control | 0.72 | 0.00 |
| dcr-nndr_5th_percent_synthreal_train | 0.55 | 0.00 |
| dcr-nndr_5th_percent_train_test_ref | 0.00 | 0.00 |
| dcr-ratio_match_synthreal_control | 0.00 | 0.33 |
| dcr-ratio_match_synthreal_train | 0.00 | 0.32 |
| dcr-ratio_match_train_test_ref | 0.33 | 0.33 |
| detector-precision | 0.57 | 0.53 |
| detector-precision_top1% | 1.00 | 0.50 |
| detector-precision_top50% | 0.51 | 0.52 |
| detector-tpr_at_0.001%_fpr | 0.02 | 0.01 |
| detector-tpr_at_0.1%_fpr | 0.02 | 0.01 |
| dist-prediction_auc_rescaled | 0.95 | 0.00 |
| dist-prediction_mse | 0.59 | 0.00 |
| dist-propensity_mse | 0.61 | 0.00 |
| feature_imp-diff_permutation_importance | 0.03 | 0.00 |
| fscore-diff_f_score | 1.25 | 0.36 |
| ganleaks-precision_top1% | 0.50 | 0.50 |
| ganleaks-precision_top50% | 0.81 | 0.89 |
| hell_cat_univ_dist-hellinger_distance | 0.02 | 0.03 |
| hell_cat_univ_dist-hellinger_distance_train_test_ref | 0.00 | 0.00 |
| hell_cont_univ_dist-hellinger_distance | 0.05 | 0.04 |
| hell_cont_univ_dist-hellinger_distance_train_test_ref | 0.06 | 0.06 |
| kl_div_cat_univ_dist-kl_divergence | 0.00 | 0.00 |
| kl_div_cat_univ_dist-kl_divergence_train_test_ref | 0.00 | 0.00 |
| kl_div_cont_univ_dist-kl_divergence | 0.01 | 0.01 |
| kl_div_cont_univ_dist-kl_divergence_train_test_ref | 0.01 | 0.01 |
| logan-precision | 0.48 | 0.53 |
| logan-precision_top1% | 0.50 | 1.00 |
| logan-precision_top50% | 0.47 | 0.54 |
| logan-tpr_at_0.001%_fpr | 0.01 | 0.07 |
| logan-tpr_at_0.1%_fpr | 0.01 | 0.07 |
| mcmebership-precision_top1% | 0.50 | 0.50 |
| mcmebership-precision_top50% | 0.53 | 0.53 |
| pcd-norm | 5.16 | 0.29 |
| tablegan-precision | 0.59 | 0.49 |
| tablegan-precision_top1% | 1.00 | 0.50 |
| tablegan-precision_top50% | 0.56 | 0.51 |
| tablegan-tpr_at_0.001%_fpr | 0.04 | 0.00 |
| tablegan-tpr_at_0.1%_fpr | 0.04 | 0.00 |
Save the styled result as html#
[11]:
with tempfile.TemporaryDirectory() as temp_dir:
with open(Path(temp_dir) / "df.html", "w") as f:
print(s.to_html(), file=f)
Save and load the metareport#
[12]:
with tempfile.TemporaryDirectory() as temp_dir:
metareport.save(savepath=temp_dir) # save
new_report = Metareport(
metareport_folderpath={"synthpop": temp_dir, "random": temp_dir}
) # load
[ ]: