Source code for sbmlsim.diff

"""Helpers for numerical comparison of simulation results between different simulators.

Allows to test semi-automatically for problems with the various models.
Used to benchmark the simulation results.
"""

import logging
from pathlib import Path
from typing import Dict, List

import pandas as pd
from matplotlib import pyplot as plt

from sbmlsim.utils import timeit


[docs]logger = logging.getLogger(__name__)


[docs]def get_files_by_extension(base_path: Path, extension: str = ".json") -> Dict[str, str]:
    """Get all files by given extension.

    Simulation definitions are json files.
    """
    # get all files with extension in given path
    files = [f for f in base_path.glob("**/*") if f.is_file() and f.suffix == extension]
    offset = len(extension)
    keys = [f.name[:-offset] for f in files]

    return dict(zip(keys, files))  # type: ignore


[docs]class DataSetsComparison(object):
    """Comparing multiple simulation results.

    Only the subset of identical columns are compared. In the beginning a matching of column
    names is performed to find the subset of columns which can be compared.

    The simulations must contain a "time" column with identical time points.
    """

[docs]    tol_abs = 1e-4  # absolute tolerance for comparison
[docs]    tol_rel = 1e-4  # relative tolerance for comparison
[docs]    eps_plot = 1e-5 * tol_abs  # tolerance for plotting

    @timeit
    def __init__(
        self,
        dfs_dict: Dict[str, pd.DataFrame],
        columns_filter=None,
        time_column: bool = True,
        title: str = None,
        selections: Dict[str, str] = None,
        factors: Dict[str, float] = None,
    ):
        """Initialize the comparison.

        :param dfs_dict: data dictionary d[simulator_key] = df_result
        :param columns_filter: function which returns True if in Set or False if should be filtered.
        :param time_column: flag to check for time column
        """
        self.columns_filter = columns_filter

        # check that identical number of rows (mostly timepoints)
        nrow = 0
        for label, df in dfs_dict.items():
            if nrow == 0:
                nrow = len(df)

            if len(df) != nrow:
                raise ValueError(
                    f"DataFrame have different length (number of rows): "
                    f"{len(df)} != {nrow} ({label})"
                )

        # check that time column exist in data frames
        if time_column:
            for label, df in dfs_dict.items():
                if "time" not in df.columns:
                    raise ValueError(f"'time' column must exist in data ({label})")

        # handle selections replacements
        pd.set_option("display.max_columns", None)
        if selections:
            if factors is None:
                factors = {}
            # use the first keys for comparison
            colnames = list(selections.values())[1]
            for key, sel_keys in selections.items():
                print("***", key, "***")
                df = dfs_dict[key]
                # get subset
                df_new = df[sel_keys]
                # apply factors
                fs = factors.get(key, [1.0] * len(sel_keys))
                for k, sel in enumerate(sel_keys):
                    print(f"scaling: '{sel}' * {fs[k]}")  # type: ignore
                    # df_new[sel] = fs[k] * df_new[sel]
                    df_new.loc[:, sel] *= fs[k]  # type: ignore

                # do renaming
                df_new = df_new.rename(columns=dict(zip(sel_keys, colnames)))
                # store updated df
                print(df_new.head())
                dfs_dict[key] = df_new

        # get the subset of columns to compare
        columns, self.col_intersection, self.col_union = self._process_columns(dfs_dict)

        # filtered columns
        if columns_filter:
            columns = [col for col in columns if columns_filter(col)]
        self.columns = columns
        logger.info(f"Comparing: {self.columns}")

        # get common subset of data
        self.dfs, self.labels = self._filter_dfs(dfs_dict, self.columns)

        # set title
        self.title = title if title else " | ".join(self.labels)

        # calculate difference
        (
            self.diff,
            self.diff_abs,
            self.diff_rel,
            self.diff_tol,
            self.diff_tol_bool,
        ) = self.df_diff()

    @classmethod
[docs]    def _process_columns(cls, dataframes):
        """Get the intersection and union of columns.

        :param dataframes:
        :return:
        """
        numerics = ["int16", "int32", "int64", "float16", "float32", "float64"]

        # set of columns from the individual dataframes
        col_union = None
        col_intersection = None
        for _path, df in dataframes.items():
            # get all numeric columns
            num_df = df.select_dtypes(include=numerics)
            if len(num_df.columns) < len(df.columns):
                logger.warning(
                    f"Non-numeric columns in DataFrame: {set(df.columns)-set(num_df.columns)}"
                )

            cols = set(num_df.columns)

            if not col_union or not col_intersection:
                col_union = cols
                col_intersection = cols
            else:
                col_union = col_union.union(cols)
                col_intersection = col_intersection.intersection(cols)

        logger.info(f"Column Union #: {len(col_union)}")
        logger.info(f"Column Intersection #: {len(col_intersection)}")

        columns = list(col_intersection.copy())
        columns.remove("time")
        columns = ["time"] + sorted(columns)

        return columns, col_intersection, col_union

    @classmethod
[docs]    def _filter_dfs(cls, dataframes, columns):
        """Filter the dataframes using the column ids occurring in all datasets.

        The common set of columns is used for comparison.

        :param dataframes:
        :param columns:
        :return: List[pd.DataFrame], List[str], list of dataframes and simulator labels.
        """
        dfs = []
        labels = []
        for label, df in dataframes.items():
            try:
                df_filtered = df[columns]
            except KeyError:
                logger.error(
                    f"Some keys from '{columns}' do not exist in DataFrame columns "
                    f"'{df.columns}'"
                )
                raise ValueError
            dfs.append(df_filtered)
            labels.append(label)

        return dfs, labels

[docs]    def df_diff(self):
        """Dataframe of all differences between the files.

        https://github.com/sbmlteam/sbml-test-suite/blob/master/cases/semantic/README.md
        Let the following variables be defined:

        * `abs_tol` stand for the absolute tolerance for a test case,
        * `rel_tol` stand for the relative tolerance for a test case,
        * `c_ij` stand for the expected correct value for row `i`, column `j`, of the result data set for the test case
        * `u_ij` stand for the corresponding value produced by a given software simulation system run by the user

        These absolute and relative tolerances are used in the following way:
        a data point `u_ij` is considered to be within tolerances
        if and only if the following expression is true:

        |c_ij - u_ij| <= (abs_tol + rel_tol * |c_ij|)

        """
        c = self.dfs[0]
        u = self.dfs[1]

        # difference
        diff = c - u

        # absolute differences between all data frames
        diff_abs = diff.abs()

        # relative differences between data frames
        diff_rel = 2 * diff_abs / (self.dfs[0].abs() + self.dfs[1].abs())
        diff_rel[diff_rel.isnull()] = 0.0

        # difference based on tolerance
        # |c_ij - u_ij| <= (abs_tol + rel_tol * |c_ij|)

        # > 0 if difference
        diff_tol = (c - u).abs() - (self.tol_abs + self.tol_rel * c.abs())

        # boolean matrix: True if difference, False if identical
        diff_tol_bool = diff_tol > 0

        return diff, diff_abs, diff_rel, diff_tol, diff_tol_bool

[docs]    def is_equal(self):
        """Check if DataFrames are identical within numerical tolerance."""
        return not self.diff_tol_bool.any(axis=None)

[docs]    def __str__(self) -> str:
        """Get string."""
        return f"{self.__class__.__name__} ({self.labels})"

[docs]    def __repr__(self):
        """Get representation."""
        return f"{self.__class__.__name__} [{self.id}] ({self.labels})"

    @timeit
[docs]    def report_str(self) -> str:
        """Get report as string."""
        lines = [
            "-" * 80,
            str(self),
            str(self.title),
            "-" * 80,
            "# Elements (Nt, Nx)",
            str(self.diff.shape),
            "# Maximum column difference (above eps)",
        ]
        diff_max = self.diff_abs.max()
        diff_0 = self.diff_abs.iloc[0]
        diff_rel_max = self.diff_rel.max()
        diff_rel_0 = self.diff_rel.iloc[0]

        diff_info = pd.concat([diff_0, diff_rel_0, diff_max, diff_rel_max], axis=1)
        diff_info.columns = ["Delta_abs_0", "Delta_rel_0", "Delta_max", "Delta_rel_max"]

        # TODO: fixme
        diff_info = diff_info[diff_max >= DataSetsComparison.tol_abs]
        with pd.option_context("display.max_rows", None, "display.max_columns", None):
            lines.append(
                str(diff_info.sort_values(by=["Delta_rel_max"], ascending=False))
            )

        lines.append("# Maximum initial column difference")
        lines.append(str(self.diff.iloc[0].abs().max()))

        lines.append("# Maximum element difference")
        lines.append(str(self.diff.abs().max().max()))

        lines.append(
            "# Datasets are equal (|c_ij - u_ij| <= (tol_abs + tol_rel * |c_ij|))"
        )
        lines.append(str(self.is_equal()).upper())
        lines.append("-" * 80)
        if not self.is_equal():
            logging.warning("Datasets are not equal !")

        return "\n".join([str(item) for item in lines])

    @timeit
[docs]    def report(self):
        """Report."""
        # print report
        print(self.report_str())

        # plot figure
        f = self.plot_diff()
        return f

    @timeit
[docs]    def plot_diff(self):
        """Plot lines for entries which are above epsilon treshold."""

        import seaborn as sns

        # FIXME: only plot the top differences, otherwise plotting takes
        # very long
        # filter data
        diff_abs = self.diff_abs.copy()
        diff_rel = self.diff_rel.copy()
        diff_tol = self.diff_tol.copy()
        diff_max = diff_abs.max()
        column_index = diff_max >= DataSetsComparison.eps_plot
        # column_index = diff_max >= DataSetsComparison.eps

        # print(column_index)
        diff_abs = diff_abs.transpose()
        diff_abs = diff_abs[column_index]
        diff_abs = diff_abs.transpose()

        # plot all overview
        f1, ((ax1, ax2, ax3, ax4)) = plt.subplots(1, 4, figsize=(20, 4.5))
        f1.subplots_adjust(wspace=0.35)
        f1.suptitle(self.title, fontsize=14, fontweight="bold")

        sns.heatmap(data=self.diff_tol_bool, cmap="Blues", vmin=0, vmax=1, ax=ax1)
        ax1.set_title(f"equal = {str(self.is_equal()).upper()}", fontweight="bold")
        ax1.set_ylabel("Tolerance difference", fontweight="bold")

        # sns.heatmap(data=self.diff_tol, center=0, ax=ax2)

        for cid in diff_abs.columns:
            ax2.plot(diff_tol[cid], label=cid)
            ax3.plot(diff_abs[cid], label=cid)
            ax4.plot(diff_rel[cid], label=cid)

        ax2.set_ylabel("Tolerance difference", fontweight="bold")
        ax3.set_ylabel("Absolute difference", fontweight="bold")
        ax4.set_ylabel("Relative difference", fontweight="bold")

        for ax in (ax3, ax4):
            ax.set_xlabel("time index", fontweight="bold")
            ax.set_yscale("log")
            ax.set_ylim(bottom=1e-10)

            if ax.get_ylim()[1] < 10 * DataSetsComparison.tol_abs:
                ax.set_ylim(top=10 * DataSetsComparison.tol_abs)

        ax2.axhline(0.0, color="black", linestyle="--")
        for ax in (ax3, ax4):
            ax.axhline(DataSetsComparison.tol_abs, color="black", linestyle="--")

            # ax.legend()
            # ax.set_xlim(right=ax.get_xlim()[1] * 2)

        # ax3.imshow(self.diff_tol, cmap='Greys')

        return f1