"""Parser for NuML data."""
import importlib
import warnings
from enum import Enum
from pathlib import Path
import libnuml
import libsbml
import libsedml
import numpy as np
import pandas as pd
from sbmlutils import log
[docs]logger = log.get_logger(__name__)
[docs]class NumlParser(object):
"""Helper class for parsing Numl data files."""
[docs] class Library(Enum):
"""Bugfix helper for managing the library issues."""
@classmethod
[docs] def read_numl_document(cls, path: Path) -> libnuml.NUMLDocument:
"""Read NuML document and check for errors.
:param path: path of file
:return: libnuml.NUMLDocument
"""
importlib.reload(libnuml)
doc_numl: libnuml.NUMLDocument = libnuml.readNUMLFromFile(path)
# check for errors
errorlog = doc_numl.getErrorLog()
msg = "NUML ERROR in '{}': {}".format(path, errorlog.toString())
if errorlog.getNumFailsWithSeverity(libnuml.LIBNUML_SEV_ERROR) > 0:
raise IOError(msg)
if errorlog.getNumFailsWithSeverity(libnuml.LIBNUML_SEV_FATAL) > 0:
raise IOError(msg)
if errorlog.getNumFailsWithSeverity(libnuml.LIBNUML_SEV_WARNING) > 0:
warnings.warn(msg)
if errorlog.getNumFailsWithSeverity(libnuml.LIBNUML_SEV_SCHEMA_ERROR) > 0:
warnings.warn(msg)
if errorlog.getNumFailsWithSeverity(libnuml.LIBNUML_SEV_GENERAL_WARNING) > 0:
warnings.warn(msg)
importlib.reload(libsbml)
return doc_numl
@classmethod
[docs] def load_numl_data(cls, path) -> pd.DataFrame:
"""Read NuML data from file.
This loads the complete numl data.
For more information see: https://github.com/numl/numl
:param path: NuML path
:return: data
"""
importlib.reload(libnuml)
path_str = path
if isinstance(path, Path):
path_str = str(path)
doc_numl = NumlParser.read_numl_document(path_str)
# reads all the resultComponents from the numl file
results = []
Nrc = doc_numl.getNumResultComponents()
rcs = doc_numl.getResultComponents()
logger.info("\nNumResultComponents:", Nrc)
for k in range(Nrc):
rc = rcs.get(k) # parse ResultComponent
rc_id = rc.getId()
# dimension info
description = rc.getDimensionDescription()
data_types = cls.parse_dimension_description(description)
# data
dimension = rc.getDimension()
assert isinstance(dimension, libnuml.Dimension)
data = [
cls._parse_dimension(dimension.get(k)) for k in range(dimension.size())
]
# create data frame
flat_data = []
for entry in data:
for part in entry:
flat_data.append(part)
# column ids from DimensionDescription
column_ids = []
for entry in data_types:
for cid, _dtype in entry.items():
column_ids.append(cid)
df = pd.DataFrame(flat_data, columns=column_ids)
# convert data types to actual data types
for entry in data_types:
for cid, dtype in entry.items():
if dtype == "double":
df[cid] = df[cid].astype(np.float64)
elif dtype == "string":
df[cid] = df[cid].astype(str)
# convert all the individual columns to the corresponding data types
# df = df.apply(pd.to_numeric, errors="ignore")
results.append([rc_id, df, data_types])
return results
@classmethod
[docs] def parse_dimension_description(
cls, description, library: Library = Library.LIBNUML
):
"""Parse the given dimension description.
Returns dictionary of { key: dtype }
:param description:
:return:
"""
if library == cls.Library.LIBNUML:
importlib.reload(libnuml)
assert description.getTypeCode() == libnuml.NUML_DIMENSIONDESCRIPTION
elif library == cls.Library.LIBSEDML:
importlib.reload(libsedml)
assert description.getTypeCode() == libsedml.NUML_DIMENSIONDESCRIPTION
info = [
cls._parse_description(description.get(k), library=library)
for k in range(description.size())
]
flat_info = []
for entry in info:
for part in entry:
flat_info.append(part)
return flat_info
@classmethod
[docs] def _parse_description(
cls, d, info=None, entry=None, library: Library = Library.LIBNUML
):
"""Parse the recursive DimensionDescription, TupleDescription, AtomicDescription.
This gets the dimension information from NuML.
<dimensionDescription>
<compositeDescription indexType="double" id="time" name="time">
<compositeDescription indexType="string" id="SpeciesIds" name="SpeciesIds">
<atomicDescription valueType="double" id="Concentrations" name="Concentrations" />
</compositeDescription>
</compositeDescription>
</dimensionDescription>
:param d:
:param info:
:return:
"""
type_code = d.getTypeCode()
if library == cls.Library.LIBNUML:
importlib.reload(libnuml)
assert type_code in [
libnuml.NUML_COMPOSITEDESCRIPTION,
libnuml.NUML_ATOMICDESCRIPTION,
libnuml.NUML_TUPLEDESCRIPTION,
]
# if type_code == libnuml.NUML_COMPOSITEDESCRIPTION:
# d = libnuml.CompositeDescription(d)
elif library == cls.Library.LIBSEDML:
importlib.reload(libsedml)
assert type_code in [
libsedml.NUML_COMPOSITEDESCRIPTION,
libsedml.NUML_ATOMICDESCRIPTION,
libsedml.NUML_TUPLEDESCRIPTION,
]
# if type_code == libsedml.NUML_COMPOSITEDESCRIPTION:
# d = libnuml.CompositeDescription(d)
if info is None:
info = []
if entry is None:
entry = []
print("typecode:", type_code)
print("type:", type(d))
print("object", object)
if (
library == cls.Library.LIBNUML
and type_code == libnuml.NUML_COMPOSITEDESCRIPTION
) or (
library == cls.Library.LIBSEDML
and type_code == libsedml.NUML_COMPOSITEDESCRIPTION
):
content = {d.getId(): d.getIndexType()}
info.append(content)
if d.isContentCompositeDescription():
for k in range(d.size()):
info = cls._parse_description(
d.getCompositeDescription(k), info, list(entry), library=library
)
elif d.isContentAtomicDescription():
info = cls._parse_description(
d.getAtomicDescription(), info, entry, library=library
)
elif (
library == cls.Library.LIBNUML
and type_code == libnuml.NUML_ATOMICDESCRIPTION
) or (
library == cls.Library.LIBSEDML
and type_code == libsedml.NUML_ATOMICDESCRIPTION
):
content = {d.getId(): d.getValueType()}
info.append(content)
elif (
library == cls.Library.LIBNUML
and type_code == libnuml.NUML_TUPLEDESCRIPTION
) or (
library == cls.Library.LIBSEDML
and type_code == libsedml.NUML_TUPLEDESCRIPTION
):
tuple_des = d.getTupleDescription()
Natomic = d.size()
valueTypes = []
for k in range(Natomic):
atomic = tuple_des.getAtomicDescription(k)
valueTypes.append(atomic.getValueType())
info.append(valueTypes)
else:
raise NotImplementedError("Type code: {}".format(type_code))
return info
@classmethod
[docs] def _parse_dimension(
cls, d, data=None, entry=None, library: Library = Library.LIBNUML
):
"""Parse the recursive CompositeValue, Tuple, AtomicValue.
This gets the actual data from NuML.
"""
if library == cls.Library.LIBNUML:
importlib.reload(libnuml)
elif library == cls.Library.LIBSEDML:
importlib.reload(libsedml)
if data is None:
data = []
if entry is None:
entry = []
type_code = d.getTypeCode()
if (
library == cls.Library.LIBNUML and type_code == libnuml.NUML_COMPOSITEVALUE
) or (
library == cls.Library.LIBSEDML
and type_code == libsedml.NUML_COMPOSITEVALUE
):
indexValue = d.getIndexValue()
entry.append(indexValue)
if d.isContentCompositeValue():
for k in range(d.size()):
# make copy, so every entry is own entry
data = cls._parse_dimension(
d.getCompositeValue(k), data, list(entry)
)
elif d.isContentAtomicValue():
data = cls._parse_dimension(d.getAtomicValue(), data, entry)
elif (
library == cls.Library.LIBNUML and type_code == libnuml.NUML_ATOMICVALUE
) or (
library == cls.Library.LIBSEDML and type_code == libsedml.NUML_ATOMICVALUE
):
# Data is converted to correct
# value = d.getDoubleValue()
value = d.getValue()
entry.append(value)
# entry finished, we are appending
data.append(entry)
elif (library == cls.Library.LIBNUML and type_code == libnuml.NUML_TUPLE) or (
library == cls.Library.LIBSEDML and type_code == libsedml.NUML_TUPLE
):
tuple = d.getTuple()
Natomic = d.size()
values = []
for k in range(Natomic):
atomic = tuple.getAtomicValue(k)
values.append(atomic.getDoubleValue())
data.append(values)
else:
raise NotImplementedError
return data