some new features
This commit is contained in:
14
.venv/lib/python3.12/site-packages/stanio/__init__.py
Normal file
14
.venv/lib/python3.12/site-packages/stanio/__init__.py
Normal file
@ -0,0 +1,14 @@
|
||||
from .csv import read_csv
|
||||
from .json import dump_stan_json, write_stan_json
|
||||
from .reshape import Variable, parse_header, stan_variables
|
||||
|
||||
__all__ = [
|
||||
"read_csv",
|
||||
"write_stan_json",
|
||||
"dump_stan_json",
|
||||
"Variable",
|
||||
"parse_header",
|
||||
"stan_variables",
|
||||
]
|
||||
|
||||
__version__ = "0.5.1"
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
50
.venv/lib/python3.12/site-packages/stanio/csv.py
Normal file
50
.venv/lib/python3.12/site-packages/stanio/csv.py
Normal file
@ -0,0 +1,50 @@
|
||||
"""
|
||||
Module to load the minimal information from a Stan CSV file.
|
||||
Only the header row and data are read, no metadata is parsed.
|
||||
"""
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
|
||||
|
||||
def read_csv(filenames: Union[str, List[str]]) -> Tuple[str, npt.NDArray[np.float64]]:
|
||||
"""
|
||||
Reads CSV files like those produced by Stan, returning the header and data.
|
||||
|
||||
If multiple files are given, the data is stacked along the first axis,
|
||||
so in typical usage, the shape of the returned data will be
|
||||
``(n_chains, n_samples, n_params)``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
filenames : Union[str, List[str]]
|
||||
Path to the CSV file(s) to read.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Tuple[str, npt.NDArray[np.float64]]
|
||||
The header row and data from the CSV file(s).
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If multiple files are given and the headers do not match between them.
|
||||
"""
|
||||
|
||||
if not isinstance(filenames, list):
|
||||
filenames = [filenames]
|
||||
|
||||
header = ""
|
||||
data: List[npt.NDArray[np.float64]] = [None for _ in range(len(filenames))] # type: ignore
|
||||
for i, f in enumerate(filenames):
|
||||
with open(f, "r") as fd:
|
||||
while (file_header := fd.readline()).startswith("#"):
|
||||
pass
|
||||
if header == "":
|
||||
header = file_header
|
||||
elif header != file_header:
|
||||
raise ValueError("Headers do not match")
|
||||
data[i] = np.loadtxt(fd, delimiter=",", comments="#")
|
||||
|
||||
return header.strip(), np.stack(data, axis=0)
|
||||
102
.venv/lib/python3.12/site-packages/stanio/json.py
Normal file
102
.venv/lib/python3.12/site-packages/stanio/json.py
Normal file
@ -0,0 +1,102 @@
|
||||
"""
|
||||
Utilities for writing Stan Json files
|
||||
"""
|
||||
try:
|
||||
import ujson as json
|
||||
|
||||
uj_version = tuple(map(int, json.__version__.split(".")))
|
||||
if uj_version < (5, 5, 0):
|
||||
raise ImportError("ujson version too old") # pragma: no cover
|
||||
UJSON_AVAILABLE = True
|
||||
except:
|
||||
UJSON_AVAILABLE = False
|
||||
import json
|
||||
|
||||
from typing import Any, Mapping
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def process_dictionary(d: Mapping[str, Any]) -> Mapping[str, Any]:
|
||||
return {k: process_value(v) for k, v in d.items()}
|
||||
|
||||
|
||||
# pylint: disable=too-many-return-statements
|
||||
def process_value(val: Any) -> Any:
|
||||
if val is None:
|
||||
return None
|
||||
if isinstance(val, bool): # stan uses 0, 1
|
||||
return int(val)
|
||||
if isinstance(val, complex): # treat as 2-long array
|
||||
return [val.real, val.imag]
|
||||
if isinstance(val, dict): # if a tuple was manually specified
|
||||
return process_dictionary(val)
|
||||
if isinstance(val, tuple): # otherwise, turn a tuple into a dict
|
||||
return dict(zip(range(1, len(val) + 1), map(process_value, val)))
|
||||
if isinstance(val, list):
|
||||
return [process_value(i) for i in val]
|
||||
original_module = getattr(type(val), "__module__", "")
|
||||
if (
|
||||
"numpy" in original_module
|
||||
or "xarray" in original_module
|
||||
or "pandas" in original_module
|
||||
):
|
||||
numpy_val = np.asanyarray(val)
|
||||
# fast paths for numeric types
|
||||
if numpy_val.dtype.kind in "iuf":
|
||||
return numpy_val.tolist()
|
||||
if numpy_val.dtype.kind == "c":
|
||||
return np.stack([np.asarray(numpy_val.real), np.asarray(numpy_val.imag)], axis=-1).tolist()
|
||||
if numpy_val.dtype.kind == "b":
|
||||
return numpy_val.astype(int).tolist()
|
||||
|
||||
# should only be object arrays (tuples, etc)
|
||||
return process_value(numpy_val.tolist())
|
||||
|
||||
return val
|
||||
|
||||
|
||||
def dump_stan_json(data: Mapping[str, Any]) -> str:
|
||||
"""
|
||||
Convert a mapping of strings to data to a JSON string.
|
||||
|
||||
Values can be any numeric type, a boolean (converted to int),
|
||||
or any collection compatible with :func:`numpy.asarray`, e.g a
|
||||
:class:`pandas.Series`.
|
||||
|
||||
Produces a string compatible with the
|
||||
`Json Format for Cmdstan
|
||||
<https://mc-stan.org/docs/cmdstan-guide/json.html>`__
|
||||
|
||||
:param data: A mapping from strings to values. This can be a dictionary
|
||||
or something more exotic like an :class:`xarray.Dataset`. This will be
|
||||
copied before type conversion, not modified
|
||||
"""
|
||||
return json.dumps(process_dictionary(data))
|
||||
|
||||
|
||||
def write_stan_json(path: str, data: Mapping[str, Any]) -> None:
|
||||
"""
|
||||
Dump a mapping of strings to data to a JSON file.
|
||||
|
||||
Values can be any numeric type, a boolean (converted to int),
|
||||
or any collection compatible with :func:`numpy.asarray`, e.g a
|
||||
:class:`pandas.Series`.
|
||||
|
||||
Produces a file compatible with the
|
||||
`Json Format for Cmdstan
|
||||
<https://mc-stan.org/docs/cmdstan-guide/json.html>`__
|
||||
|
||||
:param path: File path for the created json. Will be overwritten if
|
||||
already in existence.
|
||||
|
||||
:param data: A mapping from strings to values. This can be a dictionary
|
||||
or something more exotic like an :class:`xarray.Dataset`. This will be
|
||||
copied before type conversion, not modified
|
||||
"""
|
||||
with open(path, "w") as fd:
|
||||
if UJSON_AVAILABLE:
|
||||
json.dump(process_dictionary(data), fd)
|
||||
else:
|
||||
for chunk in json.JSONEncoder().iterencode(process_dictionary(data)):
|
||||
fd.write(chunk)
|
||||
0
.venv/lib/python3.12/site-packages/stanio/py.typed
Normal file
0
.venv/lib/python3.12/site-packages/stanio/py.typed
Normal file
252
.venv/lib/python3.12/site-packages/stanio/reshape.py
Normal file
252
.venv/lib/python3.12/site-packages/stanio/reshape.py
Normal file
@ -0,0 +1,252 @@
|
||||
"""
|
||||
Classes and functions for reshaping Stan output.
|
||||
|
||||
Especially with the addition of tuples, Stan writes
|
||||
flat arrays of data with a rich internal structure.
|
||||
"""
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from math import prod
|
||||
from typing import Any, Dict, Iterable, List, Tuple
|
||||
|
||||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
|
||||
|
||||
class VariableType(Enum):
|
||||
SCALAR = 1 # real or integer
|
||||
COMPLEX = 2 # complex number - requires striding
|
||||
TUPLE = 3 # tuples - require recursive handling
|
||||
|
||||
|
||||
@dataclass
|
||||
class Variable:
|
||||
"""
|
||||
This class represents a single output variable of a Stan model.
|
||||
|
||||
It contains information about the name, dimensions, and type of the
|
||||
variable, as well as the indices of where that variable is located in
|
||||
the flattened output array Stan models write.
|
||||
|
||||
Generally, this class should not be instantiated directly, but rather
|
||||
created by the :func:`parse_header()` function.
|
||||
"""
|
||||
|
||||
# name of the parameter as given in stan. For nested parameters, this is a dummy name
|
||||
name: str
|
||||
# where to start (resp. end) reading from the flattened array.
|
||||
# For arrays with nested parameters, this will be for the first element
|
||||
# and is relative to the start of the parent
|
||||
start_idx: int
|
||||
end_idx: int
|
||||
# rectangular dimensions of the parameter (e.g. (2, 3) for a 2x3 matrix)
|
||||
# For nested parameters, this will be the dimensions of the outermost array.
|
||||
dimensions: Tuple[int, ...]
|
||||
# type of the parameter
|
||||
type: VariableType
|
||||
# list of nested parameters
|
||||
contents: List["Variable"]
|
||||
|
||||
def dtype(self, top: bool = True) -> np.dtype:
|
||||
if self.type == VariableType.TUPLE:
|
||||
elts = [
|
||||
(str(i + 1), param.dtype(top=False))
|
||||
for i, param in enumerate(self.contents)
|
||||
]
|
||||
dtype = np.dtype(elts)
|
||||
elif self.type == VariableType.SCALAR:
|
||||
dtype = np.float64
|
||||
elif self.type == VariableType.COMPLEX:
|
||||
dtype = np.complex128
|
||||
|
||||
if top:
|
||||
return dtype
|
||||
else:
|
||||
return np.dtype((dtype, self.dimensions))
|
||||
|
||||
def columns(self) -> Iterable[int]:
|
||||
return range(self.start_idx, self.end_idx)
|
||||
|
||||
def num_elts(self) -> int:
|
||||
return prod(self.dimensions)
|
||||
|
||||
def elt_size(self) -> int:
|
||||
return self.end_idx - self.start_idx
|
||||
|
||||
# total size is elt_size * num_elts
|
||||
|
||||
def _extract_helper(self, src: np.ndarray, offset: int = 0) -> np.ndarray:
|
||||
start = self.start_idx + offset
|
||||
end = self.end_idx + offset
|
||||
if self.type == VariableType.SCALAR:
|
||||
return src[..., start:end].reshape(-1, *self.dimensions, order="F")
|
||||
elif self.type == VariableType.COMPLEX:
|
||||
ret = src[..., start:end].reshape(-1, 2, *self.dimensions, order="F")
|
||||
ret = ret[:, ::2] + 1j * ret[:, 1::2]
|
||||
return ret.squeeze().reshape(-1, *self.dimensions, order="F")
|
||||
elif self.type == VariableType.TUPLE:
|
||||
out: np.ndarray = np.empty(
|
||||
(prod(src.shape[:-1]), prod(self.dimensions)), dtype=object
|
||||
)
|
||||
for idx in range(self.num_elts()):
|
||||
off = idx * self.elt_size() // self.num_elts()
|
||||
elts = [
|
||||
param._extract_helper(src, offset=start + off)
|
||||
for param in self.contents
|
||||
]
|
||||
for i in range(elts[0].shape[0]):
|
||||
out[i, idx] = tuple(elt[i] for elt in elts)
|
||||
return out.reshape(-1, *self.dimensions, order="F")
|
||||
|
||||
def extract_reshape(self, src: np.ndarray, object: bool = True) -> npt.NDArray[Any]:
|
||||
"""
|
||||
Given an array where the final dimension is the flattened output of a
|
||||
Stan model, (e.g. one row of a Stan CSV file), extract the variable
|
||||
and reshape it to the correct type and dimensions.
|
||||
|
||||
This will most likely result in copies of the data being made if
|
||||
the variable is not a scalar.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
src : np.ndarray
|
||||
The array to extract from.
|
||||
|
||||
Indicies besides the final dimension are preserved
|
||||
in the output.
|
||||
|
||||
object : bool
|
||||
If True, the output of tuple types will be an object array,
|
||||
otherwise it will use custom dtypes to represent tuples.
|
||||
|
||||
Returns
|
||||
-------
|
||||
npt.NDArray[Any]
|
||||
The extracted variable, reshaped to the correct dimensions.
|
||||
If the variable is a tuple, this will be an object array,
|
||||
otherwise it will have a dtype of either float64 or complex128.
|
||||
"""
|
||||
out = self._extract_helper(src)
|
||||
if not object:
|
||||
out = out.astype(self.dtype())
|
||||
if src.ndim > 1:
|
||||
out = out.reshape(*src.shape[:-1], *self.dimensions, order="F")
|
||||
else:
|
||||
out = out.squeeze(axis=0)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def _munge_first_tuple(tup: str) -> str:
|
||||
return "dummy_" + tup.split(":", 1)[1]
|
||||
|
||||
|
||||
def _get_base_name(param: str) -> str:
|
||||
return param.split(".")[0].split(":")[0]
|
||||
|
||||
|
||||
def _from_header(header: str) -> List[Variable]:
|
||||
# appending __dummy ensures one extra iteration in the later loop
|
||||
header = header.strip() + ",__dummy"
|
||||
entries = header.split(",")
|
||||
params = []
|
||||
start_idx = 0
|
||||
name = _get_base_name(entries[0])
|
||||
for i in range(0, len(entries) - 1):
|
||||
entry = entries[i]
|
||||
next_name = _get_base_name(entries[i + 1])
|
||||
|
||||
if next_name != name:
|
||||
if ":" not in entry:
|
||||
dims = entry.split(".")[1:]
|
||||
if ".real" in entry or ".imag" in entry:
|
||||
type = VariableType.COMPLEX
|
||||
dims = dims[:-1]
|
||||
else:
|
||||
type = VariableType.SCALAR
|
||||
params.append(
|
||||
Variable(
|
||||
name=name,
|
||||
start_idx=start_idx,
|
||||
end_idx=i + 1,
|
||||
dimensions=tuple(map(int, dims)),
|
||||
type=type,
|
||||
contents=[],
|
||||
)
|
||||
)
|
||||
else:
|
||||
dims = entry.split(":")[0].split(".")[1:]
|
||||
munged_header = ",".join(
|
||||
dict.fromkeys(map(_munge_first_tuple, entries[start_idx : i + 1]))
|
||||
)
|
||||
|
||||
params.append(
|
||||
Variable(
|
||||
name=name,
|
||||
start_idx=start_idx,
|
||||
end_idx=i + 1,
|
||||
dimensions=tuple(map(int, dims)),
|
||||
type=VariableType.TUPLE,
|
||||
contents=_from_header(munged_header),
|
||||
)
|
||||
)
|
||||
|
||||
start_idx = i + 1
|
||||
name = next_name
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def parse_header(header: str) -> Dict[str, Variable]:
|
||||
"""
|
||||
Given a comma-separated list of names of Stan outputs, like
|
||||
that from the header row of a CSV file, parse it into a dictionary of
|
||||
:class:`Variable` objects.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
header : str
|
||||
Comma separated list of Stan variables, including index information.
|
||||
For example, an ``array[2] real foo` would be represented as
|
||||
``foo.1,foo.2``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Dict[str, Variable]
|
||||
A dictionary mapping the base name of each variable to a :class:`Variable`.
|
||||
"""
|
||||
return {param.name: param for param in _from_header(header)}
|
||||
|
||||
|
||||
def stan_variables(
|
||||
parameters: Dict[str, Variable],
|
||||
source: npt.NDArray[np.float64],
|
||||
*,
|
||||
object: bool = True,
|
||||
) -> Dict[str, npt.NDArray[Any]]:
|
||||
"""
|
||||
Given a dictionary of :class:`Variable` objects and a source array,
|
||||
extract the variables from the source array and reshape them to the
|
||||
correct dimensions.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
parameters : Dict[str, Variable]
|
||||
A dictionary of :class:`Variable` objects,
|
||||
like that returned by :func:`parse_header()`.
|
||||
source : npt.NDArray[np.float64]
|
||||
The array to extract from.
|
||||
object : bool
|
||||
If True, the output of tuple types will be an object array,
|
||||
otherwise it will use custom dtypes to represent tuples.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Dict[str, npt.NDArray[Any]]
|
||||
A dictionary mapping the base name of each variable to the extracted
|
||||
and reshaped data.
|
||||
"""
|
||||
return {
|
||||
param.name: param.extract_reshape(source, object=object)
|
||||
for param in parameters.values()
|
||||
}
|
||||
Reference in New Issue
Block a user