Source code for sbmlsim.combine.sedml.parser

"""
SED-ML support for sbmlsim
==========================

This modules parses SED-ML based simulation experiments in the sbmlsim
SimulationExperiment format and executes them.

Overview SED-ML
----------------
SED-ML is build of the main classes
- DataDescription
- Model
- Simulation
- Task
- DataGenerator
- Output

DataDescription
---------------
The DataDescription allows to reference external data, and contains a
description on how to access the data, in what format it is, and what subset
of data to extract.

Model
-----
The Model class is used to reference the models used in the simulation
experiment. SED-ML itself is independent of the model encoding underlying the
models. The only requirement is that the model needs to be referenced by
using an unambiguous identifier which allows for finding it, for example
using a MIRIAM URI. To specify the language in which the model is encoded,
a set of predefined language URNs is provided. The SED-ML Change class allows
the application of changes to the referenced models, including changes on the
XML attributes, e.g. changing the value of an observable, computing the change
of a value using mathematics, or general changes on any XML element
of the model representation that is addressable by XPath expressions,
e.g. substituting a piece of XML by an updated one.

Simulation
----------
The Simulation class defines the simulation settings and the steps taken
during simulation. These include the particular type of simulation and the
algorithm used for the execution of the simulation; preferably an unambiguous
reference to such an algorithm should be given, using a controlled vocabulary,
or ontologies. One example for an ontology of simulation algorithms is the
Kinetic Simulation Algorithm Ontology KiSAO. Further information encodable
in the Simulation class includes the step size, simulation duration, and other
simulation-type dependent information.

Task
----
SED-ML makes use of the notion of a Task class to combine a defined model
(from the Model class) and a defined simulation setting
(from the Simulation class). A task always holds one reference each.
To refer to a specific model and to a specific simulation, the corresponding
IDs are used.

DataGenerator
-------------
The raw simulation result sometimes does not correspond to the desired output
of the simulation, e.g. one might want to normalise a plot before output,
or apply post-processing like mean-value calculation.
The DataGenerator class allows for the encoding of such post-processings
which need to be applied to the simulation result before output.
To define data generators, any addressable variable or parameter of any
defined model (from instances of the Model class) may be referenced,
and new entities might be specified using MathML definitions.

Output
-------
The Output class defines the output of the simulation, in the sense that it
specifies what shall be plotted in the output. To do so, an output type is
defined, e.g. 2D-plot, 3D-plot or data table, and the according axes or
columns are all assigned to one of the formerly specified instances of the
DataGenerator class.

For information about SED-ML please refer to http://www.sed-ml.org/
and the SED-ML specification.

SED-ML in sbmlsim: Supported Features
=====================================
sbmlsim supports SED-ML L1V4 with SBML as model format.
SBML models are fully supported

Supported input for SED-ML are either SED-ML files ('.sedml' extension),
SED-ML XML strings or combine archives ('.sedx'|'.omex' extension).

In the current implementation all SED-ML constructs with exception of
XML transformation changes of the model, i.e.,
- Change.RemoveXML
- Change.AddXML
- Change.ChangeXML
are supported.
"""
import importlib
import logging
import re
import warnings
from collections import OrderedDict, namedtuple
from pathlib import Path
from typing import Dict, List, Optional, Union

import libsedml
import numpy as np

from sbmlsim.combine.sedml.data import DataDescriptionParser
from sbmlsim.combine.sedml.io import read_sedml
from sbmlsim.combine.sedml.kisao import is_supported_algorithm_for_simulation_type
from sbmlsim.combine.sedml.task import Stack, TaskNode, TaskTree
from sbmlsim.data import Data, DataSet
from sbmlsim.experiment import ExperimentDict, SimulationExperiment
from sbmlsim.model import model_resources
from sbmlsim.model.model import AbstractModel
from sbmlsim.plot import Axis, Curve, Figure, Plot
from sbmlsim.simulation import AbstractSim, ScanSim, Timecourse, TimecourseSim
from sbmlsim.task import Task
from sbmlsim.units import UnitRegistry


[docs]logger = logging.getLogger(__file__)
'''
def experiment_from_omex(omex_path: Path):
    """Create SimulationExperiments from all SED-ML files."""
    tmp_dir = tempfile.mkdtemp()
    try:
        omex.extractCombineArchive(omex_path, directory=tmp_dir, method="zip")
        locations = omex.getLocationsByFormat(omex_path, "sed-ml")
        sedml_files = [os.path.join(tmp_dir, loc) for loc in locations]

        for k, sedml_file in enumerate(sedml_files):
            pystr = sedmlToPython(sedml_file)
            factory = SEDMLCodeFactory(inputStr, workingDir=workingDir)
            factory.to
            pycode[locations[k]] = pystr

    finally:
        shutil.rmtree(tmp_dir)
    return pycode
'''


[docs]class SEDMLParser(object):
    """Parsing SED-ML in internal format."""

    def __init__(self, sed_doc: libsedml.SedDocument, working_dir: Path):
        """Parses information from SedDocument."""
        self.sed_doc: libsedml.SedDocument = sed_doc
        self.working_dir = working_dir

        # unit registry to handle units throughout the simulation
        self.ureg = UnitRegistry()

        # Reference to the experiment class
        self.exp_class = None

        # --- Models ---
        self.models = {}

        # resolve original model source and changes
        model_sources, model_changes = self.resolve_model_changes()
        sed_model: libsedml.SedModel
        for sed_model in self.sed_doc.getListOfModels():
            mid = sed_model.getId()
            source = model_sources[mid]
            sed_changes = model_changes[mid]
            self.models[mid] = self.parse_model(
                sed_model, source=source, sed_changes=sed_changes
            )
        print(f"models: {self.models}")

        # --- DataDescriptions ---
        self.data_descriptions = {}
        sed_dd: libsedml.SedDataDescription
        for sed_dd in sed_doc.getListOfDataDescriptions():
            did = sed_dd.getId()
            self.data_descriptions[did] = DataDescriptionParser.parse(
                sed_dd, self.working_dir
            )
        print(f"data_descriptions: {self.data_descriptions}")

        # --- Simulations ---
        self.simulations: Dict[str, AbstractSim] = {}
        sed_sim: libsedml.SedSimulation
        for sed_sim in sed_doc.getListOfSimulations():
            self.simulations[sed_sim.getId()] = self.parse_simulation(sed_sim)
        print(f"simulations: {self.simulations}")

        # --- Tasks ---
        self.tasks: Dict[str, Task] = {}
        sed_task: libsedml.SedTask
        for sed_task in sed_doc.getListOfTasks():
            self.tasks[sed_task.getId()] = self.parse_task(sed_task)
        print(f"tasks: {self.tasks}")

        # --- Figures/Reports ---
        self.figures: Dict[str, Figure] = {}
        sed_output: libsedml.SedOutput
        for sed_output in sed_doc.getListOfOutputs():

            type_code = sed_output.getTypeCode()
            if type_code in [libsedml.SEDML_FIGURE, libsedml.SEDML_OUTPUT_PLOT2D]:
                self.figures[sed_output.getId()] = self.parse_figure(sed_output)
            elif type_code == libsedml.SEDML_OUTPUT_REPORT:
                # FIXME: implement
                logger.error("Output report not implemented.")
        print(f"figures: {self.figures}")

        # Create the experiment object
        def f_models(obj) -> Dict[str, AbstractModel]:
            return ExperimentDict(self.models)

        def f_datasets(obj) -> Dict[str, DataSet]:
            """Dataset definition (experimental data)."""

            # FIXME: convert to DataSets & add units
            return ExperimentDict(self.data_descriptions)

        def f_simulations(obj) -> Dict[str, AbstractSim]:
            return ExperimentDict(self.simulations)

        def f_tasks(obj) -> Dict[str, Task]:
            return ExperimentDict(self.tasks)

        def f_figures(obj) -> Dict[str, Figure]:
            return ExperimentDict(self.figures)

        self.exp_class = type(
            "SedmlExperiment",
            (SimulationExperiment,),
            {
                "models": f_models,
                "datasets": f_datasets,
                "simulations": f_simulations,
                "tasks": f_tasks,
                "figures": f_figures,
            },
        )
        fig: Figure

        self.experiment: SimulationExperiment = self.exp_class()
        self.experiment.initialize()

        # self.exp_class._data = None  # FIXME hack
        for fig in self.figures.values():
            print(fig)
            fig.experiment = self.experiment

            # this must happen automatically
            for plot in fig.plots:
                for curve in plot.curves:
                    if curve.x:
                        curve.x.experiment = self.experiment
                        curve.x._register_data()
                    if curve.y:
                        curve.y.experiment = self.experiment
                        curve.y._register_data()
                    # FIXME: also for xerr and yerr

    # --- MODELS ---
    @staticmethod
[docs]    def parse_xpath_target(xpath: str) -> str:
        """Resolve targets in xpath expression.

        Uses a heuristics to figure out the targets.
        # FIXME: SED-ML amount/species targets
        """

        # resolve target
        xpath = xpath.replace('"', "'")
        match = re.findall(r"id='(.*?)'", xpath)
        if (match is None) or (len(match) == 0):
            warnings.warn(f"xpath could not be resolved: {xpath}")
        target = match[0]

        if ("model" in xpath) and ("parameter" in xpath):
            # parameter value change
            pass
        elif ("model" in xpath) and ("species" in xpath):
            # species concentration change
            pass
        elif ("model" in xpath) and ("id" in xpath):
            # other
            pass
        else:
            raise ValueError(f"Unsupported target in xpath: {xpath}")

        return target

[docs]    def parse_model(
        self,
        sed_model: libsedml.SedModel,
        source: str,
        sed_changes: List[libsedml.SedChange],
    ) -> AbstractModel:
        """Convert SedModel to AbstractModel.

        :param sed_changes:
        :param source:
        :param sed_model:
        :return:
        """
        changes = dict()
        for sed_change in sed_changes:
            d = self.parse_change(sed_change)
            for xpath, value in d.items():
                target = self.parse_xpath_target(xpath)
                changes[target] = value

        mid = sed_model.getId()
        model = AbstractModel(
            source=source,
            sid=mid,
            name=sed_model.getName(),
            language=sed_model.getLanguage(),
            language_type=None,
            base_path=self.working_dir,
            changes=changes,
            selections=None,
        )

        return model

[docs]    def resolve_model_changes(self):
        """Resolves the original model sources and full change lists.

        Going through the tree of model upwards until root is reached and
        collecting changes on the way (example models m* and changes c*)
        m1 (source) -> m2 (c1, c2) -> m3 (c3, c4)
        resolves to
        m1 (source) []
        m2 (source) [c1,c2]
        m3 (source) [c1,c2,c3,c4]
        The order of changes is important (at least between nodes on different
        levels of hierarchies), because later changes of derived models could
        reverse earlier changes.

        Uses recursive search strategy, which should be okay as long as the
        model tree hierarchy is not getting to deep.
        """

        def findSource(mid, changes):
            """
            Recursive search for original model and store the
            changes which have to be applied in the list of changes

            :param mid:
            :param changes:
            :return:
            """
            # mid is node above
            if mid in model_sources and not model_sources[mid] == mid:
                # add changes for node
                for c in model_changes[mid]:
                    changes.append(c)
                # keep looking deeper
                return findSource(model_sources[mid], changes)
            # the source is no longer a key in the sources, it is the source
            return mid, changes

        # store original source and changes for model
        model_sources = {}
        model_changes = {}

        # collect direct source and changes
        for m in self.sed_doc.getListOfModels():  # type: libsedml.SedModel
            mid = m.getId()
            source = m.getSource()
            model_sources[mid] = source
            changes = []
            # store the changes unique for this model
            for c in m.getListOfChanges():
                changes.append(c)
            model_changes[mid] = changes

        # resolve source and changes recursively
        all_changes = {}
        mids = [m.getId() for m in self.sed_doc.getListOfModels()]
        for mid in mids:
            source, changes = findSource(mid, changes=list())
            model_sources[mid] = source
            all_changes[mid] = changes[::-1]

        return model_sources, all_changes

[docs]    def parse_change(self, sed_change: libsedml.SedChange) -> Dict:
        """Parses the change.

        Currently only a limited subset of model changes is supported.
        Namely changes of parameters and concentrations within a
        SedChangeAttribute.
        """
        xpath = sed_change.getTarget()

        if sed_change.getTypeCode() == libsedml.SEDML_CHANGE_ATTRIBUTE:
            # simple change which can be directly set in model
            value = float(sed_change.getNewValue())
            return {xpath: value}

        elif sed_change.getTypeCode() == libsedml.SEDML_CHANGE_COMPUTECHANGE:
            # change based on a model calculation (with optional parameters)

            logger.error("ComputeChange not implemented correctly")
            # TODO: implement compute change with model
            """
            # TODO: general calculation on model with amounts and concentrations
            variables = {}
            for par in sed_change.getListOfParameters():  # type: libsedml.SedParameter
                variables[par.getId()] = par.getValue()

            for var in sed_change.getListOfVariables():  # type: libsedml.SedVariable
                vid = var.getId()
                selection = SEDMLParser.selectionFromVariable(var, model)
                expr = selection.id
                if selection.type == "concentration":
                    expr = f"init([{selection.id}])"
                elif selection.type == "amount":
                    expr = f"init({selection.id})"
                variables[vid] = model[expr]

            # value is calculated with the current state of model
            value = evaluableMathML(sed_change.getMath(), variables=variables)
            """
            value = -1.0
            return {xpath: value}

        else:
            logger.error(f"Unsupported change: {sed_change.getElementName()}")
            # TODO: libsedml.SEDML_CHANGE_REMOVEXML
            # TODO: libsedml.SEDML_CHANGE_ADDXML
            # TODO: libsedml.SEDML_CHANGE_CHANGEXML
            return {}

[docs]    def parse_simulation(self, sed_sim: libsedml.SedSimulation) -> Union[TimecourseSim]:
        """Parse simulation information."""
        sim_type = sed_sim.getTypeCode()
        algorithm = sed_sim.getAlgorithm()
        if algorithm is None:
            logger.warning(
                f"Algorithm missing on simulation, defaulting to "
                f"'cvode: KISAO:0000019'"
            )
            algorithm = sed_sim.createAlgorithm()
            algorithm.setKisaoID("KISAO:0000019")

        kisao = algorithm.getKisaoID()

        # is supported algorithm
        if not is_supported_algorithm_for_simulation_type(
            kisao=kisao, sim_type=sim_type
        ):
            logger.error(
                f"Algorithm '{kisao}' unsupported for simulation "
                f"'{sed_sim.getId()}' of  type '{sim_type}'"
            )

        if sim_type == libsedml.SEDML_SIMULATION_UNIFORMTIMECOURSE:
            initial_time: float = sed_sim.getInitialTime()
            output_start_time: float = sed_sim.getOutputStartTime()
            output_end_time: float = sed_sim.getOutputEndTime()
            number_of_points: int = sed_sim.getNumberOfPoints()

            # FIXME: handle time offset correctly (either separate presimulation)
            # FIXME: impoartant to have the correct numbers of points
            tcsim = TimecourseSim(
                timecourses=[
                    Timecourse(
                        start=initial_time,
                        end=output_end_time,
                        steps=number_of_points - 1,
                    ),
                ],
                time_offset=output_start_time,
            )
            return tcsim

        elif sim_type == libsedml.SEDML_SIMULATION_ONESTEP:
            step: float = sed_sim.getStep()
            tcsim = TimecourseSim(
                timecourses=[
                    Timecourse(
                        start=0,
                        end=step,
                        steps=2,
                    ),
                ]
            )
            return tcsim

        elif sim_type == libsedml.SEDML_SIMULATION_STEADYSTATE:
            raise NotImplementedError("steady state simulation not yet supported")

        # TODO/FIXME: handle all the algorithm parameters as integrator parameters

[docs]    def parse_task(self, sed_task: libsedml.SedAbstractTask) -> Task:
        """Parse arbitrary task (repeated or simple, or simple repeated)."""
        # If no DataGenerator references the task, no execution is necessary
        dgs = self.data_generators_for_task(sed_task)
        if len(dgs) == 0:
            logger.warning(
                f"Task '{sed_task.getId()}' is not used in any DataGenerator."
            )

        # tasks contain other subtasks, which can contain subtasks. This
        # results in a tree of task dependencies where the
        # simple tasks are the node leaves. These tree has to be resolved to
        # generate code for more complex task dependencies.

        # resolve task tree (order & dependency of tasks)
        task_tree_root = TaskTree.from_sedml_task(self.sed_doc, root_task=sed_task)

        # go forward through task tree
        lines = []
        node_stack = Stack()
        tree_nodes = [n for n in task_tree_root]

        print("tree_nodes", tree_nodes)
        print(task_tree_root.info())

        for kn, node in enumerate(tree_nodes):
            task_type = node.task.getTypeCode()

            # Create simulation for task
            if task_type == libsedml.SEDML_TASK:
                task = self._parse_simple_task(task_node=node)
                return task

            # Repeated tasks are multi-dimensional scans
            elif task_type == libsedml.SEDML_TASK_REPEATEDTASK:
                self._parse_repeated_task(node=node)

            elif task_type == libsedml.SEDML_TASK_SIMPLEREPEATEDTASK:
                self._parse_simple_repeated_task(node=node)

            else:
                logger.error("Unsupported task: {}".format(task_type))

[docs]    def _parse_simple_task(self, task_node: TaskNode) -> Task:
        """Parse simple task"""
        sed_task: libsedml.SedTask = task_node.task
        model_id: str = sed_task.getModelReference()
        simulation_id: str = sed_task.getSimulationReference()
        return Task(model=model_id, simulation=simulation_id)

[docs]    def _parse_simple_repeated_task(self, node: TaskNode):
        print("parse simple repeated task")
        raise NotImplementedError(
            f"Task type is not supported: {node.task.getTypeCode()}"
        )
        # TODO: implement

        # for ksub, subtask in enumerate(subtasks):
        #     t = doc.getTask(subtask.getTask())
        #
        #     resultVariable = "__subtask__".format(t.getId())
        #     selections = SEDMLCodeFactory.selectionsForTask(doc=doc, task=task)
        #     if t.getTypeCode() == libsedml.SEDML_TASK:
        #         forLines.extend(
        #             SEDMLCodeFactory.subtaskToPython(
        #                 doc,
        #                 task=t,
        #                 selections=selections,
        #                 resultVariable=resultVariable,
        #             )
        #         )
        #         forLines.append("{}.extend([__subtask__])".format(task.getId()))
        #
        #     elif t.getTypeCode() == libsedml.SEDML_TASK_REPEATEDTASK:
        #         forLines.extend(SEDMLCodeFactory.repeatedTaskToPython(doc, task=t))
        #         forLines.append("{}.extend({})".format(task.getId(), t.getId()))

[docs]    def parse_figure(self, sed_output: libsedml.SedOutput) -> Figure:
        """Parse simulation information."""
        type_code = sed_output.getTypeCode()
        if type_code == libsedml.SEDML_OUTPUT_PLOT2D:
            sed_plot2d: libsedml.SedPlot = sed_output
            plots = [self.parse_plot2d(sed_plot2d)]
            f = Figure(
                experiment=None,
                sid=sed_plot2d.getId(),
                num_rows=1,
                num_cols=1,
            )
            f.add_plots(plots)
            return f
        elif type_code == libsedml.SEDML_FIGURE:
            raise NotImplementedError

[docs]    def parse_plot2d(self, sed_plot2d: libsedml.SedPlot2D) -> Plot:
        plot = Plot(
            sid=sed_plot2d.getId(),
            name=sed_plot2d.getName(),
            legend=sed_plot2d.getLegend(),
        )
        # axis
        plot.xaxis = self.parse_axis(sed_plot2d.getXAxis())
        plot.yaxis = self.parse_axis(sed_plot2d.getYAxis())

        # curves
        curves: List[Curve] = []
        sed_curve: libsedml.Curve
        for sed_curve in sed_plot2d.getListOfCurves():
            curves.append(self.parse_curve(sed_curve))
        plot.curves = curves
        return plot

[docs]    def parse_axis(self, sed_axis: libsedml.SedAxis) -> Axis:
        # FIXME: support style on axis
        if sed_axis is None:
            axis = Axis()
        else:
            axis = Axis(
                label=sed_axis.getName(),
                scale=sed_axis.getType(),
                min=sed_axis.getMin(),
                max=sed_axis.getMax(),
                grid=sed_axis.getGrid(),
            )
        return axis

[docs]    def parse_curve(self, sed_curve: libsedml.SedCurve) -> Curve:
        x: Data
        y: Data
        xerr: Data
        yerr: Data

        curve = Curve(
            x=self.data_from_datagenerator(sed_curve.getXDataReference()),
            y=self.data_from_datagenerator(sed_curve.getYDataReference()),
            # FIXME: handle errorbars via lower and upper
            xerr=self.data_from_datagenerator(sed_curve.getXErrorLower()),
            yerr=self.data_from_datagenerator(sed_curve.getYErrorLower()),
        )
        # FIXME: support yaxis
        # FIXME: support type
        # FIXME: parse style
        return curve

[docs]    def data_from_datagenerator(self, sed_dg_ref: Optional[str]) -> Optional[Data]:
        """This must all be evaluated with actual data"""
        # FIXME: do all the math on the data-generator
        if not sed_dg_ref:
            return None
        sed_dg: libsedml.SedDataGenerator = self.sed_doc.getDataGenerator(sed_dg_ref)
        print("DataGenerator", sed_dg)

        # sed_dg.getListOfVariables()
        # sed_dg.getMath()
        sed_var: libsedml.SedVariable = sed_dg.getVariable(0)
        task_id = sed_var.getTaskReference()

        sed_symbol = sed_var.isSetSymbol()
        if sed_var.isSetSymbol():
            # FIXME: check symbol
            index = "time"
        elif sed_var.isSetTarget():
            index = self.parse_xpath_target(sed_var.getTarget())

        return Data(experiment=self.exp_class, index=index, task=task_id)

[docs]    def _parse_repeated_task(self, node: TaskNode):
        print("repeated task")
        # repeated tasks will be translated into multidimensional scans
        raise NotImplementedError
        # TODO: implement

[docs]    def data_generators_for_task(
        self,
        sed_task: libsedml.SedTask,
    ) -> List[libsedml.SedDataGenerator]:
        """Get the DataGenerators which reference the given task."""
        sed_dgs = []
        for (
            sed_dg
        ) in self.sed_doc.getListOfDataGenerators():  # type: libsedml.SedDataGenerator
            for var in sed_dg.getListOfVariables():  # type: libsedml.SedVariable
                if var.getTaskReference() == sed_task.getId():
                    sed_dgs.append(sed_dg)
                    # DataGenerator is added, no need to look at rest of variables
                    break
        return sed_dgs

[docs]    def selections_for_task(self, sed_task: libsedml.SedTask):
        """Populate variable lists from the data generators for the given task.

        These are the timeCourseSelections and steadyStateSelections
        in RoadRunner.

        Search all data generators for variables which have to be part of the simulation.
        """
        model_id = sed_task.getModelReference()
        selections = set()
        for (
            sed_dg
        ) in self.doc.getListOfDataGenerators():  # type: libsedml.SedDataGenerator
            for var in sed_dg.getListOfVariables():
                if var.getTaskReference() == sed_task.getId():
                    # FIXME: resolve with model
                    selection = SEDMLCodeFactory.selectionFromVariable(var, model_id)
                    expr = selection.id
                    if selection.type == "concentration":
                        expr = "[{}]".format(selection.id)
                    selections.add(expr)

        return selections

    @staticmethod
[docs]    def get_ordered_subtasks(sed_task: libsedml.SedTask) -> List[libsedml.SedTask]:
        """Ordered list of subtasks for task."""
        subtasks = sed_task.getListOfSubTasks()
        subtask_order = [st.getOrder() for st in subtasks]

        # sort by order, if all subtasks have order (not required)
        if all(subtask_order) is not None:
            subtasks = [st for (stOrder, st) in sorted(zip(subtask_order, subtasks))]
        return subtasks