Source code for sysvar.fit_setup

#!/usr/bin/env python

from __future__ import annotations

import uproot
import logging
import typing as t
import numpy as np
import pandas as pd

from sysvar.utils import read_yaml

__all__ = [
    "save_existing_eigenvariations",
]

logging.basicConfig(
    format="%(levelname)s : %(funcName)s: %(lineno)d :  %(message)s",
    level=logging.INFO,
)


[docs] def save_existing_eigenvariations( df: pd.DataFrame, analysis: str, systematic: str, ) -> None: settings = read_yaml("template_setup", analysis) root_file_path = settings["output_filepath"] reco_channel_id_column: str = settings["reco_channel_id_column"] assert reco_channel_id_column in df.columns, reco_channel_id_column reco_channel_info: t.Dict[str, t.List[int]] = settings["reco_channels"] ctgy_id_column: str = settings["template_id_column"] assert ctgy_id_column in df.columns, ctgy_id_column template_names: t.List[str] = settings["templates"] total_weight: str = settings["total_weight"] assert total_weight in df.columns, total_weight N_eigen = settings["systematics"][systematic]["N_eigen"] syst_weight = settings["systematics"][systematic]["weight"] with uproot.update(root_file_path) as newfile: logging.info(f"Updating file with uproot: {root_file_path}") for reco_channel_name, reco_channel_ids in reco_channel_info.items(): binning = settings["bins"][reco_channel_name] for template_name in template_names: q = f"{ctgy_id_column} == '{template_name}' and {reco_channel_id_column} in {reco_channel_ids}" tmp_df = df.query(q) if len(tmp_df) > 0: logging.info( f"Computing templates in region: {reco_channel_ids} for template: {template_name}" ) for variation in range(N_eigen): hist_up = np.histogramdd( np.array(tmp_df[[*binning.keys()]]), bins=[bins for bins in binning.values()], weights=np.array( tmp_df[total_weight] / tmp_df[syst_weight] * tmp_df[f"{syst_weight}_up{variation}"].fillna(1) ), ) hist_down = np.histogramdd( np.array(tmp_df[[*binning.keys()]]), bins=[bins for bins in binning.values()], weights=tmp_df[total_weight] / tmp_df[syst_weight] * tmp_df[f"{syst_weight}_down{variation}"].fillna(1), ) newfile[ f"{reco_channel_name}/{template_name}/{systematic}_up{variation}" ] = hist_up[0].flatten(), np.linspace( 0, 1, hist_up[0].flatten().shape[0] + 1 ) newfile[ f"{reco_channel_name}/{template_name}/{systematic}_down{variation}" ] = hist_down[0].flatten(), np.linspace( 0, 1, hist_down[0].flatten().shape[0] + 1 ) else: logging.info( f"Skipping template in region: {reco_channel_ids} for template: {template_name}" ) continue