some new features
This commit is contained in:
269
.venv/lib/python3.12/site-packages/cmdstanpy/stanfit/__init__.py
Normal file
269
.venv/lib/python3.12/site-packages/cmdstanpy/stanfit/__init__.py
Normal file
@ -0,0 +1,269 @@
|
||||
"""Container objects for results of CmdStan run(s)."""
|
||||
|
||||
import glob
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from cmdstanpy.cmdstan_args import (
|
||||
CmdStanArgs,
|
||||
LaplaceArgs,
|
||||
OptimizeArgs,
|
||||
PathfinderArgs,
|
||||
SamplerArgs,
|
||||
VariationalArgs,
|
||||
)
|
||||
from cmdstanpy.utils import check_sampler_csv, get_logger, scan_config
|
||||
|
||||
from .gq import CmdStanGQ
|
||||
from .laplace import CmdStanLaplace
|
||||
from .mcmc import CmdStanMCMC
|
||||
from .metadata import InferenceMetadata
|
||||
from .mle import CmdStanMLE
|
||||
from .pathfinder import CmdStanPathfinder
|
||||
from .runset import RunSet
|
||||
from .vb import CmdStanVB
|
||||
|
||||
__all__ = [
|
||||
"RunSet",
|
||||
"InferenceMetadata",
|
||||
"CmdStanMCMC",
|
||||
"CmdStanMLE",
|
||||
"CmdStanVB",
|
||||
"CmdStanGQ",
|
||||
"CmdStanLaplace",
|
||||
"CmdStanPathfinder",
|
||||
]
|
||||
|
||||
|
||||
def from_csv(
|
||||
path: Union[str, List[str], os.PathLike, None] = None,
|
||||
method: Optional[str] = None,
|
||||
) -> Union[
|
||||
CmdStanMCMC, CmdStanMLE, CmdStanVB, CmdStanPathfinder, CmdStanLaplace, None
|
||||
]:
|
||||
"""
|
||||
Instantiate a CmdStan object from a the Stan CSV files from a CmdStan run.
|
||||
CSV files are specified from either a list of Stan CSV files or a single
|
||||
filepath which can be either a directory name, a Stan CSV filename, or
|
||||
a pathname pattern (i.e., a Python glob). The optional argument 'method'
|
||||
checks that the CSV files were produced by that method.
|
||||
Stan CSV files from CmdStan methods 'sample', 'optimize', and 'variational'
|
||||
result in objects of class CmdStanMCMC, CmdStanMLE, and CmdStanVB,
|
||||
respectively.
|
||||
|
||||
:param path: directory path
|
||||
:param method: method name (optional)
|
||||
|
||||
:return: either a CmdStanMCMC, CmdStanMLE, or CmdStanVB object
|
||||
"""
|
||||
if path is None:
|
||||
raise ValueError('Must specify path to Stan CSV files.')
|
||||
if method is not None and method not in [
|
||||
'sample',
|
||||
'optimize',
|
||||
'variational',
|
||||
'laplace',
|
||||
'pathfinder',
|
||||
]:
|
||||
raise ValueError(
|
||||
'Bad method argument {}, must be one of: '
|
||||
'"sample", "optimize", "variational"'.format(method)
|
||||
)
|
||||
|
||||
csvfiles = []
|
||||
if isinstance(path, list):
|
||||
csvfiles = path
|
||||
elif isinstance(path, str) and '*' in path:
|
||||
splits = os.path.split(path)
|
||||
if splits[0] is not None:
|
||||
if not (os.path.exists(splits[0]) and os.path.isdir(splits[0])):
|
||||
raise ValueError(
|
||||
'Invalid path specification, {} '
|
||||
' unknown directory: {}'.format(path, splits[0])
|
||||
)
|
||||
csvfiles = glob.glob(path)
|
||||
elif isinstance(path, (str, os.PathLike)):
|
||||
if os.path.exists(path) and os.path.isdir(path):
|
||||
for file in os.listdir(path):
|
||||
if os.path.splitext(file)[1] == ".csv":
|
||||
csvfiles.append(os.path.join(path, file))
|
||||
elif os.path.exists(path):
|
||||
csvfiles.append(str(path))
|
||||
else:
|
||||
raise ValueError('Invalid path specification: {}'.format(path))
|
||||
else:
|
||||
raise ValueError('Invalid path specification: {}'.format(path))
|
||||
|
||||
if len(csvfiles) == 0:
|
||||
raise ValueError('No CSV files found in directory {}'.format(path))
|
||||
for file in csvfiles:
|
||||
if not (os.path.exists(file) and os.path.splitext(file)[1] == ".csv"):
|
||||
raise ValueError(
|
||||
'Bad CSV file path spec,'
|
||||
' includes non-csv file: {}'.format(file)
|
||||
)
|
||||
|
||||
config_dict: Dict[str, Any] = {}
|
||||
try:
|
||||
with open(csvfiles[0], 'r') as fd:
|
||||
scan_config(fd, config_dict, 0)
|
||||
except (IOError, OSError, PermissionError) as e:
|
||||
raise ValueError('Cannot read CSV file: {}'.format(csvfiles[0])) from e
|
||||
if 'model' not in config_dict or 'method' not in config_dict:
|
||||
raise ValueError("File {} is not a Stan CSV file.".format(csvfiles[0]))
|
||||
if method is not None and method != config_dict['method']:
|
||||
raise ValueError(
|
||||
'Expecting Stan CSV output files from method {}, '
|
||||
' found outputs from method {}'.format(
|
||||
method, config_dict['method']
|
||||
)
|
||||
)
|
||||
try:
|
||||
if config_dict['method'] == 'sample':
|
||||
chains = len(csvfiles)
|
||||
sampler_args = SamplerArgs(
|
||||
iter_sampling=config_dict['num_samples'],
|
||||
iter_warmup=config_dict['num_warmup'],
|
||||
thin=config_dict['thin'],
|
||||
save_warmup=config_dict['save_warmup'],
|
||||
)
|
||||
# bugfix 425, check for fixed_params output
|
||||
try:
|
||||
check_sampler_csv(
|
||||
csvfiles[0],
|
||||
iter_sampling=config_dict['num_samples'],
|
||||
iter_warmup=config_dict['num_warmup'],
|
||||
thin=config_dict['thin'],
|
||||
save_warmup=config_dict['save_warmup'],
|
||||
)
|
||||
except ValueError:
|
||||
try:
|
||||
check_sampler_csv(
|
||||
csvfiles[0],
|
||||
is_fixed_param=True,
|
||||
iter_sampling=config_dict['num_samples'],
|
||||
iter_warmup=config_dict['num_warmup'],
|
||||
thin=config_dict['thin'],
|
||||
save_warmup=config_dict['save_warmup'],
|
||||
)
|
||||
sampler_args = SamplerArgs(
|
||||
iter_sampling=config_dict['num_samples'],
|
||||
iter_warmup=config_dict['num_warmup'],
|
||||
thin=config_dict['thin'],
|
||||
save_warmup=config_dict['save_warmup'],
|
||||
fixed_param=True,
|
||||
)
|
||||
except ValueError as e:
|
||||
raise ValueError(
|
||||
'Invalid or corrupt Stan CSV output file, '
|
||||
) from e
|
||||
|
||||
cmdstan_args = CmdStanArgs(
|
||||
model_name=config_dict['model'],
|
||||
model_exe=config_dict['model'],
|
||||
chain_ids=[x + 1 for x in range(chains)],
|
||||
method_args=sampler_args,
|
||||
)
|
||||
runset = RunSet(args=cmdstan_args, chains=chains)
|
||||
runset._csv_files = csvfiles
|
||||
for i in range(len(runset._retcodes)):
|
||||
runset._set_retcode(i, 0)
|
||||
fit = CmdStanMCMC(runset)
|
||||
fit.draws()
|
||||
return fit
|
||||
elif config_dict['method'] == 'optimize':
|
||||
if 'algorithm' not in config_dict:
|
||||
raise ValueError(
|
||||
"Cannot find optimization algorithm"
|
||||
" in file {}.".format(csvfiles[0])
|
||||
)
|
||||
optimize_args = OptimizeArgs(
|
||||
algorithm=config_dict['algorithm'],
|
||||
save_iterations=config_dict['save_iterations'],
|
||||
jacobian=config_dict.get('jacobian', 0),
|
||||
)
|
||||
cmdstan_args = CmdStanArgs(
|
||||
model_name=config_dict['model'],
|
||||
model_exe=config_dict['model'],
|
||||
chain_ids=None,
|
||||
method_args=optimize_args,
|
||||
)
|
||||
runset = RunSet(args=cmdstan_args)
|
||||
runset._csv_files = csvfiles
|
||||
for i in range(len(runset._retcodes)):
|
||||
runset._set_retcode(i, 0)
|
||||
return CmdStanMLE(runset)
|
||||
elif config_dict['method'] == 'variational':
|
||||
if 'algorithm' not in config_dict:
|
||||
raise ValueError(
|
||||
"Cannot find variational algorithm"
|
||||
" in file {}.".format(csvfiles[0])
|
||||
)
|
||||
variational_args = VariationalArgs(
|
||||
algorithm=config_dict['algorithm'],
|
||||
iter=config_dict['iter'],
|
||||
grad_samples=config_dict['grad_samples'],
|
||||
elbo_samples=config_dict['elbo_samples'],
|
||||
eta=config_dict['eta'],
|
||||
tol_rel_obj=config_dict['tol_rel_obj'],
|
||||
eval_elbo=config_dict['eval_elbo'],
|
||||
output_samples=config_dict['output_samples'],
|
||||
)
|
||||
cmdstan_args = CmdStanArgs(
|
||||
model_name=config_dict['model'],
|
||||
model_exe=config_dict['model'],
|
||||
chain_ids=None,
|
||||
method_args=variational_args,
|
||||
)
|
||||
runset = RunSet(args=cmdstan_args)
|
||||
runset._csv_files = csvfiles
|
||||
for i in range(len(runset._retcodes)):
|
||||
runset._set_retcode(i, 0)
|
||||
return CmdStanVB(runset)
|
||||
elif config_dict['method'] == 'laplace':
|
||||
laplace_args = LaplaceArgs(
|
||||
mode=config_dict['mode'],
|
||||
draws=config_dict['draws'],
|
||||
jacobian=config_dict['jacobian'],
|
||||
)
|
||||
cmdstan_args = CmdStanArgs(
|
||||
model_name=config_dict['model'],
|
||||
model_exe=config_dict['model'],
|
||||
chain_ids=None,
|
||||
method_args=laplace_args,
|
||||
)
|
||||
runset = RunSet(args=cmdstan_args)
|
||||
runset._csv_files = csvfiles
|
||||
for i in range(len(runset._retcodes)):
|
||||
runset._set_retcode(i, 0)
|
||||
mode: CmdStanMLE = from_csv(
|
||||
config_dict['mode'],
|
||||
method='optimize',
|
||||
) # type: ignore
|
||||
return CmdStanLaplace(runset, mode=mode)
|
||||
elif config_dict['method'] == 'pathfinder':
|
||||
pathfinder_args = PathfinderArgs(
|
||||
num_draws=config_dict['num_draws'],
|
||||
num_paths=config_dict['num_paths'],
|
||||
)
|
||||
cmdstan_args = CmdStanArgs(
|
||||
model_name=config_dict['model'],
|
||||
model_exe=config_dict['model'],
|
||||
chain_ids=None,
|
||||
method_args=pathfinder_args,
|
||||
)
|
||||
runset = RunSet(args=cmdstan_args)
|
||||
runset._csv_files = csvfiles
|
||||
for i in range(len(runset._retcodes)):
|
||||
runset._set_retcode(i, 0)
|
||||
return CmdStanPathfinder(runset)
|
||||
else:
|
||||
get_logger().info(
|
||||
'Unable to process CSV output files from method %s.',
|
||||
(config_dict['method']),
|
||||
)
|
||||
return None
|
||||
except (IOError, OSError, PermissionError) as e:
|
||||
raise ValueError(
|
||||
'An error occurred processing the CSV files:\n\t{}'.format(str(e))
|
||||
) from e
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
734
.venv/lib/python3.12/site-packages/cmdstanpy/stanfit/gq.py
Normal file
734
.venv/lib/python3.12/site-packages/cmdstanpy/stanfit/gq.py
Normal file
@ -0,0 +1,734 @@
|
||||
"""
|
||||
Container for the result of running the
|
||||
generate quantities (GQ) method
|
||||
"""
|
||||
|
||||
from collections import Counter
|
||||
from typing import (
|
||||
Any,
|
||||
Dict,
|
||||
Generic,
|
||||
Hashable,
|
||||
List,
|
||||
MutableMapping,
|
||||
NoReturn,
|
||||
Optional,
|
||||
Tuple,
|
||||
TypeVar,
|
||||
Union,
|
||||
overload,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
try:
|
||||
import xarray as xr
|
||||
|
||||
XARRAY_INSTALLED = True
|
||||
except ImportError:
|
||||
XARRAY_INSTALLED = False
|
||||
|
||||
|
||||
from cmdstanpy.cmdstan_args import Method
|
||||
from cmdstanpy.utils import build_xarray_data, flatten_chains, get_logger
|
||||
from cmdstanpy.utils.stancsv import scan_generic_csv
|
||||
|
||||
from .mcmc import CmdStanMCMC
|
||||
from .metadata import InferenceMetadata
|
||||
from .mle import CmdStanMLE
|
||||
from .runset import RunSet
|
||||
from .vb import CmdStanVB
|
||||
|
||||
Fit = TypeVar('Fit', CmdStanMCMC, CmdStanMLE, CmdStanVB)
|
||||
|
||||
|
||||
class CmdStanGQ(Generic[Fit]):
|
||||
"""
|
||||
Container for outputs from CmdStan generate_quantities run.
|
||||
Created by :meth:`CmdStanModel.generate_quantities`.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
runset: RunSet,
|
||||
previous_fit: Fit,
|
||||
) -> None:
|
||||
"""Initialize object."""
|
||||
if not runset.method == Method.GENERATE_QUANTITIES:
|
||||
raise ValueError(
|
||||
'Wrong runset method, expecting generate_quantities runset, '
|
||||
'found method {}'.format(runset.method)
|
||||
)
|
||||
self.runset = runset
|
||||
|
||||
self.previous_fit: Fit = previous_fit
|
||||
|
||||
self._draws: np.ndarray = np.array(())
|
||||
config = self._validate_csv_files()
|
||||
self._metadata = InferenceMetadata(config)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
repr = 'CmdStanGQ: model={} chains={}{}'.format(
|
||||
self.runset.model,
|
||||
self.chains,
|
||||
self.runset._args.method_args.compose(0, cmd=[]),
|
||||
)
|
||||
repr = '{}\n csv_files:\n\t{}\n output_files:\n\t{}'.format(
|
||||
repr,
|
||||
'\n\t'.join(self.runset.csv_files),
|
||||
'\n\t'.join(self.runset.stdout_files),
|
||||
)
|
||||
return repr
|
||||
|
||||
def __getattr__(self, attr: str) -> np.ndarray:
|
||||
"""Synonymous with ``fit.stan_variable(attr)"""
|
||||
if attr.startswith("_"):
|
||||
raise AttributeError(f"Unknown variable name {attr}")
|
||||
try:
|
||||
return self.stan_variable(attr)
|
||||
except ValueError as e:
|
||||
# pylint: disable=raise-missing-from
|
||||
raise AttributeError(*e.args)
|
||||
|
||||
def __getstate__(self) -> dict:
|
||||
# This function returns the mapping of objects to serialize with pickle.
|
||||
# See https://docs.python.org/3/library/pickle.html#object.__getstate__
|
||||
# for details. We call _assemble_generated_quantities to ensure
|
||||
# the data are loaded prior to serialization.
|
||||
self._assemble_generated_quantities()
|
||||
return self.__dict__
|
||||
|
||||
def _validate_csv_files(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Checks that Stan CSV output files for all chains are consistent
|
||||
and returns dict containing config and column names.
|
||||
|
||||
Raises exception when inconsistencies detected.
|
||||
"""
|
||||
dzero = {}
|
||||
for i in range(self.chains):
|
||||
if i == 0:
|
||||
dzero = scan_generic_csv(
|
||||
path=self.runset.csv_files[i],
|
||||
)
|
||||
else:
|
||||
drest = scan_generic_csv(
|
||||
path=self.runset.csv_files[i],
|
||||
)
|
||||
for key in dzero:
|
||||
if (
|
||||
key
|
||||
not in [
|
||||
'id',
|
||||
'fitted_params',
|
||||
'diagnostic_file',
|
||||
'metric_file',
|
||||
'profile_file',
|
||||
'init',
|
||||
'seed',
|
||||
'start_datetime',
|
||||
]
|
||||
and dzero[key] != drest[key]
|
||||
):
|
||||
raise ValueError(
|
||||
'CmdStan config mismatch in Stan CSV file {}: '
|
||||
'arg {} is {}, expected {}'.format(
|
||||
self.runset.csv_files[i],
|
||||
key,
|
||||
dzero[key],
|
||||
drest[key],
|
||||
)
|
||||
)
|
||||
return dzero
|
||||
|
||||
@property
|
||||
def chains(self) -> int:
|
||||
"""Number of chains."""
|
||||
return self.runset.chains
|
||||
|
||||
@property
|
||||
def chain_ids(self) -> List[int]:
|
||||
"""Chain ids."""
|
||||
return self.runset.chain_ids
|
||||
|
||||
@property
|
||||
def column_names(self) -> Tuple[str, ...]:
|
||||
"""
|
||||
Names of generated quantities of interest.
|
||||
"""
|
||||
return self._metadata.cmdstan_config['column_names'] # type: ignore
|
||||
|
||||
@property
|
||||
def metadata(self) -> InferenceMetadata:
|
||||
"""
|
||||
Returns object which contains CmdStan configuration as well as
|
||||
information about the names and structure of the inference method
|
||||
and model output variables.
|
||||
"""
|
||||
return self._metadata
|
||||
|
||||
def draws(
|
||||
self,
|
||||
*,
|
||||
inc_warmup: bool = False,
|
||||
inc_iterations: bool = False,
|
||||
concat_chains: bool = False,
|
||||
inc_sample: bool = False,
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Returns a numpy.ndarray over the generated quantities draws from
|
||||
all chains which is stored column major so that the values
|
||||
for a parameter are contiguous in memory, likewise all draws from
|
||||
a chain are contiguous. By default, returns a 3D array arranged
|
||||
(draws, chains, columns); parameter ``concat_chains=True`` will
|
||||
return a 2D array where all chains are flattened into a single column,
|
||||
preserving chain order, so that given M chains of N draws,
|
||||
the first N draws are from chain 1, ..., and the the last N draws
|
||||
are from chain M.
|
||||
|
||||
:param inc_warmup: When ``True`` and the warmup draws are present in
|
||||
the output, i.e., the sampler was run with ``save_warmup=True``,
|
||||
then the warmup draws are included. Default value is ``False``.
|
||||
|
||||
:param concat_chains: When ``True`` return a 2D array flattening all
|
||||
all draws from all chains. Default value is ``False``.
|
||||
|
||||
:param inc_sample: When ``True`` include all columns in the previous_fit
|
||||
draws array as well, excepting columns for variables already present
|
||||
in the generated quantities drawset. Default value is ``False``.
|
||||
|
||||
See Also
|
||||
--------
|
||||
CmdStanGQ.draws_pd
|
||||
CmdStanGQ.draws_xr
|
||||
CmdStanMCMC.draws
|
||||
"""
|
||||
self._assemble_generated_quantities()
|
||||
inc_warmup |= inc_iterations
|
||||
if inc_warmup:
|
||||
if (
|
||||
isinstance(self.previous_fit, CmdStanMCMC)
|
||||
and not self.previous_fit._save_warmup
|
||||
):
|
||||
get_logger().warning(
|
||||
"Sample doesn't contain draws from warmup iterations,"
|
||||
' rerun sampler with "save_warmup=True".'
|
||||
)
|
||||
elif (
|
||||
isinstance(self.previous_fit, CmdStanMLE)
|
||||
and not self.previous_fit._save_iterations
|
||||
):
|
||||
get_logger().warning(
|
||||
"MLE doesn't contain draws from pre-convergence iterations,"
|
||||
' rerun optimization with "save_iterations=True".'
|
||||
)
|
||||
elif isinstance(self.previous_fit, CmdStanVB):
|
||||
get_logger().warning(
|
||||
"Variational fit doesn't make sense with argument "
|
||||
'"inc_warmup=True"'
|
||||
)
|
||||
|
||||
if inc_sample:
|
||||
cols_1 = self.previous_fit.column_names
|
||||
cols_2 = self.column_names
|
||||
dups = [
|
||||
item
|
||||
for item, count in Counter(cols_1 + cols_2).items()
|
||||
if count > 1
|
||||
]
|
||||
drop_cols: List[int] = []
|
||||
for dup in dups:
|
||||
drop_cols.extend(
|
||||
self.previous_fit._metadata.stan_vars[dup].columns()
|
||||
)
|
||||
|
||||
start_idx, _ = self._draws_start(inc_warmup)
|
||||
previous_draws = self._previous_draws(True)
|
||||
if concat_chains and inc_sample:
|
||||
return flatten_chains(
|
||||
np.dstack(
|
||||
(
|
||||
np.delete(previous_draws, drop_cols, axis=1),
|
||||
self._draws,
|
||||
)
|
||||
)[start_idx:, :, :]
|
||||
)
|
||||
if concat_chains:
|
||||
return flatten_chains(self._draws[start_idx:, :, :])
|
||||
if inc_sample:
|
||||
return np.dstack(
|
||||
(
|
||||
np.delete(previous_draws, drop_cols, axis=1),
|
||||
self._draws,
|
||||
)
|
||||
)[start_idx:, :, :]
|
||||
return self._draws[start_idx:, :, :]
|
||||
|
||||
def draws_pd(
|
||||
self,
|
||||
vars: Union[List[str], str, None] = None,
|
||||
inc_warmup: bool = False,
|
||||
inc_sample: bool = False,
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Returns the generated quantities draws as a pandas DataFrame.
|
||||
Flattens all chains into single column. Container variables
|
||||
(array, vector, matrix) will span multiple columns, one column
|
||||
per element. E.g. variable 'matrix[2,2] foo' spans 4 columns:
|
||||
'foo[1,1], ... foo[2,2]'.
|
||||
|
||||
:param vars: optional list of variable names.
|
||||
|
||||
:param inc_warmup: When ``True`` and the warmup draws are present in
|
||||
the output, i.e., the sampler was run with ``save_warmup=True``,
|
||||
then the warmup draws are included. Default value is ``False``.
|
||||
|
||||
See Also
|
||||
--------
|
||||
CmdStanGQ.draws
|
||||
CmdStanGQ.draws_xr
|
||||
CmdStanMCMC.draws_pd
|
||||
"""
|
||||
if vars is not None:
|
||||
if isinstance(vars, str):
|
||||
vars_list = [vars]
|
||||
else:
|
||||
vars_list = vars
|
||||
|
||||
vars_list = list(dict.fromkeys(vars_list))
|
||||
|
||||
if inc_warmup:
|
||||
if (
|
||||
isinstance(self.previous_fit, CmdStanMCMC)
|
||||
and not self.previous_fit._save_warmup
|
||||
):
|
||||
get_logger().warning(
|
||||
"Sample doesn't contain draws from warmup iterations,"
|
||||
' rerun sampler with "save_warmup=True".'
|
||||
)
|
||||
elif (
|
||||
isinstance(self.previous_fit, CmdStanMLE)
|
||||
and not self.previous_fit._save_iterations
|
||||
):
|
||||
get_logger().warning(
|
||||
"MLE doesn't contain draws from pre-convergence iterations,"
|
||||
' rerun optimization with "save_iterations=True".'
|
||||
)
|
||||
elif isinstance(self.previous_fit, CmdStanVB):
|
||||
get_logger().warning(
|
||||
"Variational fit doesn't make sense with argument "
|
||||
'"inc_warmup=True"'
|
||||
)
|
||||
|
||||
self._assemble_generated_quantities()
|
||||
|
||||
all_columns = ['chain__', 'iter__', 'draw__'] + list(self.column_names)
|
||||
|
||||
gq_cols: List[str] = []
|
||||
mcmc_vars: List[str] = []
|
||||
if vars is not None:
|
||||
for var in vars_list:
|
||||
if var in self._metadata.stan_vars:
|
||||
info = self._metadata.stan_vars[var]
|
||||
gq_cols.extend(
|
||||
self.column_names[info.start_idx : info.end_idx]
|
||||
)
|
||||
elif (
|
||||
inc_sample and var in self.previous_fit._metadata.stan_vars
|
||||
):
|
||||
info = self.previous_fit._metadata.stan_vars[var]
|
||||
mcmc_vars.extend(
|
||||
self.previous_fit.column_names[
|
||||
info.start_idx : info.end_idx
|
||||
]
|
||||
)
|
||||
elif var in ['chain__', 'iter__', 'draw__']:
|
||||
gq_cols.append(var)
|
||||
else:
|
||||
raise ValueError('Unknown variable: {}'.format(var))
|
||||
else:
|
||||
gq_cols = all_columns
|
||||
vars_list = gq_cols
|
||||
|
||||
previous_draws_pd = self._previous_draws_pd(mcmc_vars, inc_warmup)
|
||||
|
||||
draws = self.draws(inc_warmup=inc_warmup)
|
||||
# add long-form columns for chain, iteration, draw
|
||||
n_draws, n_chains, _ = draws.shape
|
||||
chains_col = (
|
||||
np.repeat(np.arange(1, n_chains + 1), n_draws)
|
||||
.reshape(1, n_chains, n_draws)
|
||||
.T
|
||||
)
|
||||
iter_col = (
|
||||
np.tile(np.arange(1, n_draws + 1), n_chains)
|
||||
.reshape(1, n_chains, n_draws)
|
||||
.T
|
||||
)
|
||||
draw_col = (
|
||||
np.arange(1, (n_draws * n_chains) + 1)
|
||||
.reshape(1, n_chains, n_draws)
|
||||
.T
|
||||
)
|
||||
draws = np.concatenate([chains_col, iter_col, draw_col, draws], axis=2)
|
||||
|
||||
draws_pd = pd.DataFrame(
|
||||
data=flatten_chains(draws),
|
||||
columns=all_columns,
|
||||
)
|
||||
|
||||
if inc_sample and mcmc_vars:
|
||||
if gq_cols:
|
||||
return pd.concat(
|
||||
[
|
||||
previous_draws_pd,
|
||||
draws_pd[gq_cols],
|
||||
],
|
||||
axis='columns',
|
||||
)[vars_list]
|
||||
else:
|
||||
return previous_draws_pd
|
||||
elif inc_sample and vars is None:
|
||||
cols_1 = list(previous_draws_pd.columns)
|
||||
cols_2 = list(draws_pd.columns)
|
||||
dups = [
|
||||
item
|
||||
for item, count in Counter(cols_1 + cols_2).items()
|
||||
if count > 1
|
||||
]
|
||||
return pd.concat(
|
||||
[
|
||||
previous_draws_pd.drop(columns=dups).reset_index(drop=True),
|
||||
draws_pd,
|
||||
],
|
||||
axis=1,
|
||||
)
|
||||
elif gq_cols:
|
||||
return draws_pd[gq_cols]
|
||||
|
||||
return draws_pd
|
||||
|
||||
@overload
|
||||
def draws_xr(
|
||||
self: Union["CmdStanGQ[CmdStanMLE]", "CmdStanGQ[CmdStanVB]"],
|
||||
vars: Union[str, List[str], None] = None,
|
||||
inc_warmup: bool = False,
|
||||
inc_sample: bool = False,
|
||||
) -> NoReturn:
|
||||
...
|
||||
|
||||
@overload
|
||||
def draws_xr(
|
||||
self: "CmdStanGQ[CmdStanMCMC]",
|
||||
vars: Union[str, List[str], None] = None,
|
||||
inc_warmup: bool = False,
|
||||
inc_sample: bool = False,
|
||||
) -> "xr.Dataset":
|
||||
...
|
||||
|
||||
def draws_xr(
|
||||
self,
|
||||
vars: Union[str, List[str], None] = None,
|
||||
inc_warmup: bool = False,
|
||||
inc_sample: bool = False,
|
||||
) -> "xr.Dataset":
|
||||
"""
|
||||
Returns the generated quantities draws as a xarray Dataset.
|
||||
|
||||
This method can only be called when the underlying fit was made
|
||||
through sampling, it cannot be used on MLE or VB outputs.
|
||||
|
||||
:param vars: optional list of variable names.
|
||||
|
||||
:param inc_warmup: When ``True`` and the warmup draws are present in
|
||||
the MCMC sample, then the warmup draws are included.
|
||||
Default value is ``False``.
|
||||
|
||||
See Also
|
||||
--------
|
||||
CmdStanGQ.draws
|
||||
CmdStanGQ.draws_pd
|
||||
CmdStanMCMC.draws_xr
|
||||
"""
|
||||
if not XARRAY_INSTALLED:
|
||||
raise RuntimeError(
|
||||
'Package "xarray" is not installed, cannot produce draws array.'
|
||||
)
|
||||
if not isinstance(self.previous_fit, CmdStanMCMC):
|
||||
raise RuntimeError(
|
||||
'Method "draws_xr" is only available when '
|
||||
'original fit is done via Sampling.'
|
||||
)
|
||||
mcmc_vars_list = []
|
||||
dup_vars = []
|
||||
if vars is not None:
|
||||
if isinstance(vars, str):
|
||||
vars_list = [vars]
|
||||
else:
|
||||
vars_list = vars
|
||||
for var in vars_list:
|
||||
if var not in self._metadata.stan_vars:
|
||||
if inc_sample and (
|
||||
var in self.previous_fit._metadata.stan_vars
|
||||
):
|
||||
mcmc_vars_list.append(var)
|
||||
dup_vars.append(var)
|
||||
else:
|
||||
raise ValueError('Unknown variable: {}'.format(var))
|
||||
else:
|
||||
vars_list = list(self._metadata.stan_vars.keys())
|
||||
if inc_sample:
|
||||
for var in self.previous_fit._metadata.stan_vars.keys():
|
||||
if var not in vars_list and var not in mcmc_vars_list:
|
||||
mcmc_vars_list.append(var)
|
||||
for var in dup_vars:
|
||||
vars_list.remove(var)
|
||||
|
||||
self._assemble_generated_quantities()
|
||||
|
||||
num_draws = self.previous_fit.num_draws_sampling
|
||||
sample_config = self.previous_fit._metadata.cmdstan_config
|
||||
attrs: MutableMapping[Hashable, Any] = {
|
||||
"stan_version": f"{sample_config['stan_version_major']}."
|
||||
f"{sample_config['stan_version_minor']}."
|
||||
f"{sample_config['stan_version_patch']}",
|
||||
"model": sample_config["model"],
|
||||
"num_draws_sampling": num_draws,
|
||||
}
|
||||
if inc_warmup and sample_config['save_warmup']:
|
||||
num_draws += self.previous_fit.num_draws_warmup
|
||||
attrs["num_draws_warmup"] = self.previous_fit.num_draws_warmup
|
||||
|
||||
data: MutableMapping[Hashable, Any] = {}
|
||||
coordinates: MutableMapping[Hashable, Any] = {
|
||||
"chain": self.chain_ids,
|
||||
"draw": np.arange(num_draws),
|
||||
}
|
||||
|
||||
for var in vars_list:
|
||||
build_xarray_data(
|
||||
data,
|
||||
self._metadata.stan_vars[var],
|
||||
self.draws(inc_warmup=inc_warmup),
|
||||
)
|
||||
if inc_sample:
|
||||
for var in mcmc_vars_list:
|
||||
build_xarray_data(
|
||||
data,
|
||||
self.previous_fit._metadata.stan_vars[var],
|
||||
self.previous_fit.draws(inc_warmup=inc_warmup),
|
||||
)
|
||||
|
||||
return xr.Dataset(data, coords=coordinates, attrs=attrs).transpose(
|
||||
'chain', 'draw', ...
|
||||
)
|
||||
|
||||
def stan_variable(self, var: str, **kwargs: bool) -> np.ndarray:
|
||||
"""
|
||||
Return a numpy.ndarray which contains the set of draws
|
||||
for the named Stan program variable. Flattens the chains,
|
||||
leaving the draws in chain order. The first array dimension,
|
||||
corresponds to number of draws in the sample.
|
||||
The remaining dimensions correspond to
|
||||
the shape of the Stan program variable.
|
||||
|
||||
Underlyingly draws are in chain order, i.e., for a sample with
|
||||
N chains of M draws each, the first M array elements are from chain 1,
|
||||
the next M are from chain 2, and the last M elements are from chain N.
|
||||
|
||||
* If the variable is a scalar variable, the return array has shape
|
||||
( draws * chains, 1).
|
||||
* If the variable is a vector, the return array has shape
|
||||
( draws * chains, len(vector))
|
||||
* If the variable is a matrix, the return array has shape
|
||||
( draws * chains, size(dim 1), size(dim 2) )
|
||||
* If the variable is an array with N dimensions, the return array
|
||||
has shape ( draws * chains, size(dim 1), ..., size(dim N))
|
||||
|
||||
For example, if the Stan program variable ``theta`` is a 3x3 matrix,
|
||||
and the sample consists of 4 chains with 1000 post-warmup draws,
|
||||
this function will return a numpy.ndarray with shape (4000,3,3).
|
||||
|
||||
This functionaltiy is also available via a shortcut using ``.`` -
|
||||
writing ``fit.a`` is a synonym for ``fit.stan_variable("a")``
|
||||
|
||||
:param var: variable name
|
||||
|
||||
:param kwargs: Additional keyword arguments are passed to the underlying
|
||||
fit's ``stan_variable`` method if the variable is not a generated
|
||||
quantity.
|
||||
|
||||
See Also
|
||||
--------
|
||||
CmdStanGQ.stan_variables
|
||||
CmdStanMCMC.stan_variable
|
||||
CmdStanMLE.stan_variable
|
||||
CmdStanPathfinder.stan_variable
|
||||
CmdStanVB.stan_variable
|
||||
CmdStanLaplace.stan_variable
|
||||
"""
|
||||
model_var_names = self.previous_fit._metadata.stan_vars.keys()
|
||||
gq_var_names = self._metadata.stan_vars.keys()
|
||||
if not (var in model_var_names or var in gq_var_names):
|
||||
raise ValueError(
|
||||
f'Unknown variable name: {var}\n'
|
||||
'Available variables are '
|
||||
+ ", ".join(model_var_names | gq_var_names)
|
||||
)
|
||||
if var not in gq_var_names:
|
||||
# TODO(2.0) atleast1d may not be needed
|
||||
return np.atleast_1d( # type: ignore
|
||||
self.previous_fit.stan_variable(var, **kwargs)
|
||||
)
|
||||
|
||||
# is gq variable
|
||||
self._assemble_generated_quantities()
|
||||
|
||||
draw1, _ = self._draws_start(
|
||||
inc_warmup=kwargs.get('inc_warmup', False)
|
||||
or kwargs.get('inc_iterations', False)
|
||||
)
|
||||
draws = flatten_chains(self._draws[draw1:])
|
||||
out: np.ndarray = self._metadata.stan_vars[var].extract_reshape(draws)
|
||||
return out
|
||||
|
||||
def stan_variables(self, **kwargs: bool) -> Dict[str, np.ndarray]:
|
||||
"""
|
||||
Return a dictionary mapping Stan program variables names
|
||||
to the corresponding numpy.ndarray containing the inferred values.
|
||||
|
||||
:param kwargs: Additional keyword arguments are passed to the underlying
|
||||
fit's ``stan_variable`` method if the variable is not a generated
|
||||
quantity.
|
||||
|
||||
See Also
|
||||
--------
|
||||
CmdStanGQ.stan_variable
|
||||
CmdStanMCMC.stan_variables
|
||||
CmdStanMLE.stan_variables
|
||||
CmdStanPathfinder.stan_variables
|
||||
CmdStanVB.stan_variables
|
||||
CmdStanLaplace.stan_variables
|
||||
"""
|
||||
result = {}
|
||||
sample_var_names = self.previous_fit._metadata.stan_vars.keys()
|
||||
gq_var_names = self._metadata.stan_vars.keys()
|
||||
for name in gq_var_names:
|
||||
result[name] = self.stan_variable(name, **kwargs)
|
||||
for name in sample_var_names:
|
||||
if name not in gq_var_names:
|
||||
result[name] = self.stan_variable(name, **kwargs)
|
||||
return result
|
||||
|
||||
def _assemble_generated_quantities(self) -> None:
|
||||
if self._draws.shape != (0,):
|
||||
return
|
||||
# use numpy loadtxt
|
||||
_, num_draws = self._draws_start(inc_warmup=True)
|
||||
|
||||
gq_sample: np.ndarray = np.empty(
|
||||
(num_draws, self.chains, len(self.column_names)),
|
||||
dtype=float,
|
||||
order='F',
|
||||
)
|
||||
for chain in range(self.chains):
|
||||
with open(self.runset.csv_files[chain], 'r') as fd:
|
||||
lines = (line for line in fd if not line.startswith('#'))
|
||||
gq_sample[:, chain, :] = np.loadtxt(
|
||||
lines, dtype=np.ndarray, ndmin=2, skiprows=1, delimiter=','
|
||||
)
|
||||
self._draws = gq_sample
|
||||
|
||||
def _draws_start(self, inc_warmup: bool) -> Tuple[int, int]:
|
||||
draw1 = 0
|
||||
p_fit = self.previous_fit
|
||||
if isinstance(p_fit, CmdStanMCMC):
|
||||
num_draws = p_fit.num_draws_sampling
|
||||
if p_fit._save_warmup:
|
||||
if inc_warmup:
|
||||
num_draws += p_fit.num_draws_warmup
|
||||
else:
|
||||
draw1 = p_fit.num_draws_warmup
|
||||
|
||||
elif isinstance(p_fit, CmdStanMLE):
|
||||
num_draws = 1
|
||||
if p_fit._save_iterations:
|
||||
opt_iters = len(p_fit.optimized_iterations_np) # type: ignore
|
||||
if inc_warmup:
|
||||
num_draws = opt_iters
|
||||
else:
|
||||
draw1 = opt_iters - 1
|
||||
else: # CmdStanVB:
|
||||
draw1 = 1 # skip mean
|
||||
num_draws = p_fit.variational_sample.shape[0]
|
||||
if inc_warmup:
|
||||
num_draws += 1
|
||||
|
||||
return draw1, num_draws
|
||||
|
||||
def _previous_draws(self, inc_warmup: bool) -> np.ndarray:
|
||||
"""
|
||||
Extract the draws from self.previous_fit.
|
||||
Return is always 3-d
|
||||
"""
|
||||
p_fit = self.previous_fit
|
||||
if isinstance(p_fit, CmdStanMCMC):
|
||||
return p_fit.draws(inc_warmup=inc_warmup)
|
||||
elif isinstance(p_fit, CmdStanMLE):
|
||||
if inc_warmup and p_fit._save_iterations:
|
||||
return p_fit.optimized_iterations_np[:, None] # type: ignore
|
||||
|
||||
return np.atleast_2d( # type: ignore
|
||||
p_fit.optimized_params_np,
|
||||
)[:, None]
|
||||
else: # CmdStanVB:
|
||||
if inc_warmup:
|
||||
return np.vstack(
|
||||
[p_fit.variational_params_np, p_fit.variational_sample]
|
||||
)[:, None]
|
||||
return p_fit.variational_sample[:, None]
|
||||
|
||||
def _previous_draws_pd(
|
||||
self, vars: List[str], inc_warmup: bool
|
||||
) -> pd.DataFrame:
|
||||
if vars:
|
||||
sel: Union[List[str], slice] = vars
|
||||
else:
|
||||
sel = slice(None, None)
|
||||
|
||||
p_fit = self.previous_fit
|
||||
if isinstance(p_fit, CmdStanMCMC):
|
||||
return p_fit.draws_pd(vars or None, inc_warmup=inc_warmup)
|
||||
|
||||
elif isinstance(p_fit, CmdStanMLE):
|
||||
if inc_warmup and p_fit._save_iterations:
|
||||
return p_fit.optimized_iterations_pd[sel] # type: ignore
|
||||
else:
|
||||
return p_fit.optimized_params_pd[sel]
|
||||
else: # CmdStanVB:
|
||||
return p_fit.variational_sample_pd[sel]
|
||||
|
||||
def save_csvfiles(self, dir: Optional[str] = None) -> None:
|
||||
"""
|
||||
Move output CSV files to specified directory. If files were
|
||||
written to the temporary session directory, clean filename.
|
||||
E.g., save 'bernoulli-201912081451-1-5nm6as7u.csv' as
|
||||
'bernoulli-201912081451-1.csv'.
|
||||
|
||||
:param dir: directory path
|
||||
|
||||
See Also
|
||||
--------
|
||||
stanfit.RunSet.save_csvfiles
|
||||
cmdstanpy.from_csv
|
||||
"""
|
||||
self.runset.save_csvfiles(dir)
|
||||
|
||||
# TODO(2.0): remove
|
||||
@property
|
||||
def mcmc_sample(self) -> Union[CmdStanMCMC, CmdStanMLE, CmdStanVB]:
|
||||
get_logger().warning(
|
||||
"Property `mcmc_sample` is deprecated, use `previous_fit` instead"
|
||||
)
|
||||
return self.previous_fit
|
||||
304
.venv/lib/python3.12/site-packages/cmdstanpy/stanfit/laplace.py
Normal file
304
.venv/lib/python3.12/site-packages/cmdstanpy/stanfit/laplace.py
Normal file
@ -0,0 +1,304 @@
|
||||
"""
|
||||
Container for the result of running a laplace approximation.
|
||||
"""
|
||||
|
||||
from typing import (
|
||||
Any,
|
||||
Dict,
|
||||
Hashable,
|
||||
List,
|
||||
MutableMapping,
|
||||
Optional,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
try:
|
||||
import xarray as xr
|
||||
|
||||
XARRAY_INSTALLED = True
|
||||
except ImportError:
|
||||
XARRAY_INSTALLED = False
|
||||
|
||||
from cmdstanpy.cmdstan_args import Method
|
||||
from cmdstanpy.utils.data_munging import build_xarray_data
|
||||
from cmdstanpy.utils.stancsv import scan_generic_csv
|
||||
|
||||
from .metadata import InferenceMetadata
|
||||
from .mle import CmdStanMLE
|
||||
from .runset import RunSet
|
||||
|
||||
# TODO list:
|
||||
# - docs and example notebook
|
||||
# - make sure features like standalone GQ are updated/working
|
||||
|
||||
|
||||
class CmdStanLaplace:
|
||||
def __init__(self, runset: RunSet, mode: CmdStanMLE) -> None:
|
||||
"""Initialize object."""
|
||||
if not runset.method == Method.LAPLACE:
|
||||
raise ValueError(
|
||||
'Wrong runset method, expecting laplace runset, '
|
||||
'found method {}'.format(runset.method)
|
||||
)
|
||||
self._runset = runset
|
||||
self._mode = mode
|
||||
|
||||
self._draws: np.ndarray = np.array(())
|
||||
|
||||
config = scan_generic_csv(runset.csv_files[0])
|
||||
self._metadata = InferenceMetadata(config)
|
||||
|
||||
def _assemble_draws(self) -> None:
|
||||
if self._draws.shape != (0,):
|
||||
return
|
||||
|
||||
with open(self._runset.csv_files[0], 'r') as fd:
|
||||
while (fd.readline()).startswith("#"):
|
||||
pass
|
||||
self._draws = np.loadtxt(
|
||||
fd,
|
||||
dtype=float,
|
||||
ndmin=2,
|
||||
delimiter=',',
|
||||
comments="#",
|
||||
)
|
||||
|
||||
def stan_variable(self, var: str) -> np.ndarray:
|
||||
"""
|
||||
Return a numpy.ndarray which contains the estimates for the
|
||||
for the named Stan program variable where the dimensions of the
|
||||
numpy.ndarray match the shape of the Stan program variable.
|
||||
|
||||
This functionaltiy is also available via a shortcut using ``.`` -
|
||||
writing ``fit.a`` is a synonym for ``fit.stan_variable("a")``
|
||||
|
||||
:param var: variable name
|
||||
|
||||
See Also
|
||||
--------
|
||||
CmdStanMLE.stan_variables
|
||||
CmdStanMCMC.stan_variable
|
||||
CmdStanPathfinder.stan_variable
|
||||
CmdStanVB.stan_variable
|
||||
CmdStanGQ.stan_variable
|
||||
"""
|
||||
self._assemble_draws()
|
||||
try:
|
||||
out: np.ndarray = self._metadata.stan_vars[var].extract_reshape(
|
||||
self._draws
|
||||
)
|
||||
return out
|
||||
except KeyError:
|
||||
# pylint: disable=raise-missing-from
|
||||
raise ValueError(
|
||||
f'Unknown variable name: {var}\n'
|
||||
'Available variables are '
|
||||
+ ", ".join(self._metadata.stan_vars.keys())
|
||||
)
|
||||
|
||||
def stan_variables(self) -> Dict[str, np.ndarray]:
|
||||
"""
|
||||
Return a dictionary mapping Stan program variables names
|
||||
to the corresponding numpy.ndarray containing the inferred values.
|
||||
|
||||
:param inc_warmup: When ``True`` and the warmup draws are present in
|
||||
the MCMC sample, then the warmup draws are included.
|
||||
Default value is ``False``
|
||||
|
||||
See Also
|
||||
--------
|
||||
CmdStanGQ.stan_variable
|
||||
CmdStanMCMC.stan_variables
|
||||
CmdStanMLE.stan_variables
|
||||
CmdStanPathfinder.stan_variables
|
||||
CmdStanVB.stan_variables
|
||||
"""
|
||||
result = {}
|
||||
for name in self._metadata.stan_vars:
|
||||
result[name] = self.stan_variable(name)
|
||||
return result
|
||||
|
||||
def method_variables(self) -> Dict[str, np.ndarray]:
|
||||
"""
|
||||
Returns a dictionary of all sampler variables, i.e., all
|
||||
output column names ending in `__`. Assumes that all variables
|
||||
are scalar variables where column name is variable name.
|
||||
Maps each column name to a numpy.ndarray (draws x chains x 1)
|
||||
containing per-draw diagnostic values.
|
||||
"""
|
||||
self._assemble_draws()
|
||||
return {
|
||||
name: var.extract_reshape(self._draws)
|
||||
for name, var in self._metadata.method_vars.items()
|
||||
}
|
||||
|
||||
def draws(self) -> np.ndarray:
|
||||
"""
|
||||
Return a numpy.ndarray containing the draws from the
|
||||
approximate posterior distribution. This is a 2-D array
|
||||
of shape (draws, parameters).
|
||||
"""
|
||||
self._assemble_draws()
|
||||
return self._draws
|
||||
|
||||
def draws_pd(
|
||||
self,
|
||||
vars: Union[List[str], str, None] = None,
|
||||
) -> pd.DataFrame:
|
||||
if vars is not None:
|
||||
if isinstance(vars, str):
|
||||
vars_list = [vars]
|
||||
else:
|
||||
vars_list = vars
|
||||
|
||||
self._assemble_draws()
|
||||
cols = []
|
||||
if vars is not None:
|
||||
for var in dict.fromkeys(vars_list):
|
||||
if var in self._metadata.method_vars:
|
||||
cols.append(var)
|
||||
elif var in self._metadata.stan_vars:
|
||||
info = self._metadata.stan_vars[var]
|
||||
cols.extend(
|
||||
self.column_names[info.start_idx : info.end_idx]
|
||||
)
|
||||
else:
|
||||
raise ValueError(f'Unknown variable: {var}')
|
||||
|
||||
else:
|
||||
cols = list(self.column_names)
|
||||
|
||||
return pd.DataFrame(self._draws, columns=self.column_names)[cols]
|
||||
|
||||
def draws_xr(
|
||||
self,
|
||||
vars: Union[str, List[str], None] = None,
|
||||
) -> "xr.Dataset":
|
||||
"""
|
||||
Returns the sampler draws as a xarray Dataset.
|
||||
|
||||
:param vars: optional list of variable names.
|
||||
|
||||
See Also
|
||||
--------
|
||||
CmdStanMCMC.draws_xr
|
||||
CmdStanGQ.draws_xr
|
||||
"""
|
||||
if not XARRAY_INSTALLED:
|
||||
raise RuntimeError(
|
||||
'Package "xarray" is not installed, cannot produce draws array.'
|
||||
)
|
||||
|
||||
if vars is None:
|
||||
vars_list = list(self._metadata.stan_vars.keys())
|
||||
elif isinstance(vars, str):
|
||||
vars_list = [vars]
|
||||
else:
|
||||
vars_list = vars
|
||||
|
||||
self._assemble_draws()
|
||||
|
||||
meta = self._metadata.cmdstan_config
|
||||
attrs: MutableMapping[Hashable, Any] = {
|
||||
"stan_version": f"{meta['stan_version_major']}."
|
||||
f"{meta['stan_version_minor']}.{meta['stan_version_patch']}",
|
||||
"model": meta["model"],
|
||||
}
|
||||
|
||||
data: MutableMapping[Hashable, Any] = {}
|
||||
coordinates: MutableMapping[Hashable, Any] = {
|
||||
"draw": np.arange(self._draws.shape[0]),
|
||||
}
|
||||
|
||||
for var in vars_list:
|
||||
build_xarray_data(
|
||||
data,
|
||||
self._metadata.stan_vars[var],
|
||||
self._draws[:, np.newaxis, :],
|
||||
)
|
||||
return (
|
||||
xr.Dataset(data, coords=coordinates, attrs=attrs)
|
||||
.transpose('draw', ...)
|
||||
.squeeze()
|
||||
)
|
||||
|
||||
@property
|
||||
def mode(self) -> CmdStanMLE:
|
||||
"""
|
||||
Return the maximum a posteriori estimate (mode)
|
||||
as a :class:`CmdStanMLE` object.
|
||||
"""
|
||||
return self._mode
|
||||
|
||||
@property
|
||||
def metadata(self) -> InferenceMetadata:
|
||||
"""
|
||||
Returns object which contains CmdStan configuration as well as
|
||||
information about the names and structure of the inference method
|
||||
and model output variables.
|
||||
"""
|
||||
return self._metadata
|
||||
|
||||
def __repr__(self) -> str:
|
||||
mode = '\n'.join(
|
||||
['\t' + line for line in repr(self.mode).splitlines()]
|
||||
)[1:]
|
||||
rep = 'CmdStanLaplace: model={} \nmode=({})\n{}'.format(
|
||||
self._runset.model,
|
||||
mode,
|
||||
self._runset._args.method_args.compose(0, cmd=[]),
|
||||
)
|
||||
rep = '{}\n csv_files:\n\t{}\n output_files:\n\t{}'.format(
|
||||
rep,
|
||||
'\n\t'.join(self._runset.csv_files),
|
||||
'\n\t'.join(self._runset.stdout_files),
|
||||
)
|
||||
return rep
|
||||
|
||||
def __getattr__(self, attr: str) -> np.ndarray:
|
||||
"""Synonymous with ``fit.stan_variable(attr)"""
|
||||
if attr.startswith("_"):
|
||||
raise AttributeError(f"Unknown variable name {attr}")
|
||||
try:
|
||||
return self.stan_variable(attr)
|
||||
except ValueError as e:
|
||||
# pylint: disable=raise-missing-from
|
||||
raise AttributeError(*e.args)
|
||||
|
||||
def __getstate__(self) -> dict:
|
||||
# This function returns the mapping of objects to serialize with pickle.
|
||||
# See https://docs.python.org/3/library/pickle.html#object.__getstate__
|
||||
# for details. We call _assemble_draws to ensure posterior samples have
|
||||
# been loaded prior to serialization.
|
||||
self._assemble_draws()
|
||||
return self.__dict__
|
||||
|
||||
@property
|
||||
def column_names(self) -> Tuple[str, ...]:
|
||||
"""
|
||||
Names of all outputs from the sampler, comprising sampler parameters
|
||||
and all components of all model parameters, transformed parameters,
|
||||
and quantities of interest. Corresponds to Stan CSV file header row,
|
||||
with names munged to array notation, e.g. `beta[1]` not `beta.1`.
|
||||
"""
|
||||
return self._metadata.cmdstan_config['column_names'] # type: ignore
|
||||
|
||||
def save_csvfiles(self, dir: Optional[str] = None) -> None:
|
||||
"""
|
||||
Move output CSV files to specified directory. If files were
|
||||
written to the temporary session directory, clean filename.
|
||||
E.g., save 'bernoulli-201912081451-1-5nm6as7u.csv' as
|
||||
'bernoulli-201912081451-1.csv'.
|
||||
|
||||
:param dir: directory path
|
||||
|
||||
See Also
|
||||
--------
|
||||
stanfit.RunSet.save_csvfiles
|
||||
cmdstanpy.from_csv
|
||||
"""
|
||||
self._runset.save_csvfiles(dir)
|
||||
826
.venv/lib/python3.12/site-packages/cmdstanpy/stanfit/mcmc.py
Normal file
826
.venv/lib/python3.12/site-packages/cmdstanpy/stanfit/mcmc.py
Normal file
@ -0,0 +1,826 @@
|
||||
"""
|
||||
Container for the result of running the sample (MCMC) method
|
||||
"""
|
||||
|
||||
import math
|
||||
import os
|
||||
from io import StringIO
|
||||
from typing import (
|
||||
Any,
|
||||
Dict,
|
||||
Hashable,
|
||||
List,
|
||||
MutableMapping,
|
||||
Optional,
|
||||
Sequence,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
try:
|
||||
import xarray as xr
|
||||
|
||||
XARRAY_INSTALLED = True
|
||||
except ImportError:
|
||||
XARRAY_INSTALLED = False
|
||||
|
||||
from cmdstanpy import _CMDSTAN_SAMPLING, _CMDSTAN_THIN, _CMDSTAN_WARMUP, _TMPDIR
|
||||
from cmdstanpy.cmdstan_args import Method, SamplerArgs
|
||||
from cmdstanpy.utils import (
|
||||
EXTENSION,
|
||||
build_xarray_data,
|
||||
check_sampler_csv,
|
||||
cmdstan_path,
|
||||
cmdstan_version_before,
|
||||
create_named_text_file,
|
||||
do_command,
|
||||
flatten_chains,
|
||||
get_logger,
|
||||
)
|
||||
|
||||
from .metadata import InferenceMetadata
|
||||
from .runset import RunSet
|
||||
|
||||
|
||||
class CmdStanMCMC:
|
||||
"""
|
||||
Container for outputs from CmdStan sampler run.
|
||||
Provides methods to summarize and diagnose the model fit
|
||||
and accessor methods to access the entire sample or
|
||||
individual items. Created by :meth:`CmdStanModel.sample`
|
||||
|
||||
The sample is lazily instantiated on first access of either
|
||||
the resulting sample or the HMC tuning parameters, i.e., the
|
||||
step size and metric.
|
||||
"""
|
||||
|
||||
# pylint: disable=too-many-public-methods
|
||||
def __init__(
|
||||
self,
|
||||
runset: RunSet,
|
||||
) -> None:
|
||||
"""Initialize object."""
|
||||
if not runset.method == Method.SAMPLE:
|
||||
raise ValueError(
|
||||
'Wrong runset method, expecting sample runset, '
|
||||
'found method {}'.format(runset.method)
|
||||
)
|
||||
self.runset = runset
|
||||
|
||||
# info from runset to be exposed
|
||||
sampler_args = self.runset._args.method_args
|
||||
assert isinstance(
|
||||
sampler_args, SamplerArgs
|
||||
) # make the typechecker happy
|
||||
self._iter_sampling: int = _CMDSTAN_SAMPLING
|
||||
if sampler_args.iter_sampling is not None:
|
||||
self._iter_sampling = sampler_args.iter_sampling
|
||||
self._iter_warmup: int = _CMDSTAN_WARMUP
|
||||
if sampler_args.iter_warmup is not None:
|
||||
self._iter_warmup = sampler_args.iter_warmup
|
||||
self._thin: int = _CMDSTAN_THIN
|
||||
if sampler_args.thin is not None:
|
||||
self._thin = sampler_args.thin
|
||||
self._is_fixed_param = sampler_args.fixed_param
|
||||
self._save_warmup: bool = sampler_args.save_warmup
|
||||
self._sig_figs = runset._args.sig_figs
|
||||
|
||||
# info from CSV values, instantiated lazily
|
||||
self._draws: np.ndarray = np.array(())
|
||||
# only valid when not is_fixed_param
|
||||
self._metric: np.ndarray = np.array(())
|
||||
self._step_size: np.ndarray = np.array(())
|
||||
self._divergences: np.ndarray = np.zeros(self.runset.chains, dtype=int)
|
||||
self._max_treedepths: np.ndarray = np.zeros(
|
||||
self.runset.chains, dtype=int
|
||||
)
|
||||
|
||||
# info from CSV initial comments and header
|
||||
config = self._validate_csv_files()
|
||||
self._metadata: InferenceMetadata = InferenceMetadata(config)
|
||||
if not self._is_fixed_param:
|
||||
self._check_sampler_diagnostics()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
repr = 'CmdStanMCMC: model={} chains={}{}'.format(
|
||||
self.runset.model,
|
||||
self.runset.chains,
|
||||
self.runset._args.method_args.compose(0, cmd=[]),
|
||||
)
|
||||
repr = '{}\n csv_files:\n\t{}\n output_files:\n\t{}'.format(
|
||||
repr,
|
||||
'\n\t'.join(self.runset.csv_files),
|
||||
'\n\t'.join(self.runset.stdout_files),
|
||||
)
|
||||
# TODO - hamiltonian, profiling files
|
||||
return repr
|
||||
|
||||
def __getattr__(self, attr: str) -> np.ndarray:
|
||||
"""Synonymous with ``fit.stan_variable(attr)"""
|
||||
if attr.startswith("_"):
|
||||
raise AttributeError(f"Unknown variable name {attr}")
|
||||
try:
|
||||
return self.stan_variable(attr)
|
||||
except ValueError as e:
|
||||
# pylint: disable=raise-missing-from
|
||||
raise AttributeError(*e.args)
|
||||
|
||||
def __getstate__(self) -> dict:
|
||||
# This function returns the mapping of objects to serialize with pickle.
|
||||
# See https://docs.python.org/3/library/pickle.html#object.__getstate__
|
||||
# for details. We call _assemble_draws to ensure posterior samples have
|
||||
# been loaded prior to serialization.
|
||||
self._assemble_draws()
|
||||
return self.__dict__
|
||||
|
||||
@property
|
||||
def chains(self) -> int:
|
||||
"""Number of chains."""
|
||||
return self.runset.chains
|
||||
|
||||
@property
|
||||
def chain_ids(self) -> List[int]:
|
||||
"""Chain ids."""
|
||||
return self.runset.chain_ids
|
||||
|
||||
@property
|
||||
def num_draws_warmup(self) -> int:
|
||||
"""Number of warmup draws per chain, i.e., thinned warmup iterations."""
|
||||
return int(math.ceil((self._iter_warmup) / self._thin))
|
||||
|
||||
@property
|
||||
def num_draws_sampling(self) -> int:
|
||||
"""
|
||||
Number of sampling (post-warmup) draws per chain, i.e.,
|
||||
thinned sampling iterations.
|
||||
"""
|
||||
return int(math.ceil((self._iter_sampling) / self._thin))
|
||||
|
||||
@property
|
||||
def metadata(self) -> InferenceMetadata:
|
||||
"""
|
||||
Returns object which contains CmdStan configuration as well as
|
||||
information about the names and structure of the inference method
|
||||
and model output variables.
|
||||
"""
|
||||
return self._metadata
|
||||
|
||||
@property
|
||||
def column_names(self) -> Tuple[str, ...]:
|
||||
"""
|
||||
Names of all outputs from the sampler, comprising sampler parameters
|
||||
and all components of all model parameters, transformed parameters,
|
||||
and quantities of interest. Corresponds to Stan CSV file header row,
|
||||
with names munged to array notation, e.g. `beta[1]` not `beta.1`.
|
||||
"""
|
||||
return self._metadata.cmdstan_config['column_names'] # type: ignore
|
||||
|
||||
@property
|
||||
def metric_type(self) -> Optional[str]:
|
||||
"""
|
||||
Metric type used for adaptation, either 'diag_e' or 'dense_e', according
|
||||
to CmdStan arg 'metric'.
|
||||
When sampler algorithm 'fixed_param' is specified, metric_type is None.
|
||||
"""
|
||||
return (
|
||||
self._metadata.cmdstan_config['metric']
|
||||
if not self._is_fixed_param
|
||||
else None
|
||||
)
|
||||
|
||||
@property
|
||||
def metric(self) -> Optional[np.ndarray]:
|
||||
"""
|
||||
Metric used by sampler for each chain.
|
||||
When sampler algorithm 'fixed_param' is specified, metric is None.
|
||||
"""
|
||||
if self._is_fixed_param:
|
||||
return None
|
||||
if self._metadata.cmdstan_config['metric'] == 'unit_e':
|
||||
get_logger().info(
|
||||
'Unit diagnonal metric, inverse mass matrix size unknown.'
|
||||
)
|
||||
return None
|
||||
self._assemble_draws()
|
||||
return self._metric
|
||||
|
||||
@property
|
||||
def step_size(self) -> Optional[np.ndarray]:
|
||||
"""
|
||||
Step size used by sampler for each chain.
|
||||
When sampler algorithm 'fixed_param' is specified, step size is None.
|
||||
"""
|
||||
self._assemble_draws()
|
||||
return self._step_size if not self._is_fixed_param else None
|
||||
|
||||
@property
|
||||
def thin(self) -> int:
|
||||
"""
|
||||
Period between recorded iterations. (Default is 1).
|
||||
"""
|
||||
return self._thin
|
||||
|
||||
@property
|
||||
def divergences(self) -> Optional[np.ndarray]:
|
||||
"""
|
||||
Per-chain total number of post-warmup divergent iterations.
|
||||
When sampler algorithm 'fixed_param' is specified, returns None.
|
||||
"""
|
||||
return self._divergences if not self._is_fixed_param else None
|
||||
|
||||
@property
|
||||
def max_treedepths(self) -> Optional[np.ndarray]:
|
||||
"""
|
||||
Per-chain total number of post-warmup iterations where the NUTS sampler
|
||||
reached the maximum allowed treedepth.
|
||||
When sampler algorithm 'fixed_param' is specified, returns None.
|
||||
"""
|
||||
return self._max_treedepths if not self._is_fixed_param else None
|
||||
|
||||
def draws(
|
||||
self, *, inc_warmup: bool = False, concat_chains: bool = False
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Returns a numpy.ndarray over all draws from all chains which is
|
||||
stored column major so that the values for a parameter are contiguous
|
||||
in memory, likewise all draws from a chain are contiguous.
|
||||
By default, returns a 3D array arranged (draws, chains, columns);
|
||||
parameter ``concat_chains=True`` will return a 2D array where all
|
||||
chains are flattened into a single column, preserving chain order,
|
||||
so that given M chains of N draws, the first N draws are from chain 1,
|
||||
up through the last N draws from chain M.
|
||||
|
||||
:param inc_warmup: When ``True`` and the warmup draws are present in
|
||||
the output, i.e., the sampler was run with ``save_warmup=True``,
|
||||
then the warmup draws are included. Default value is ``False``.
|
||||
|
||||
:param concat_chains: When ``True`` return a 2D array flattening all
|
||||
all draws from all chains. Default value is ``False``.
|
||||
|
||||
See Also
|
||||
--------
|
||||
CmdStanMCMC.draws_pd
|
||||
CmdStanMCMC.draws_xr
|
||||
CmdStanGQ.draws
|
||||
"""
|
||||
self._assemble_draws()
|
||||
|
||||
if inc_warmup and not self._save_warmup:
|
||||
get_logger().warning(
|
||||
"Sample doesn't contain draws from warmup iterations,"
|
||||
' rerun sampler with "save_warmup=True".'
|
||||
)
|
||||
|
||||
start_idx = 0
|
||||
if not inc_warmup and self._save_warmup:
|
||||
start_idx = self.num_draws_warmup
|
||||
|
||||
if concat_chains:
|
||||
return flatten_chains(self._draws[start_idx:, :, :])
|
||||
return self._draws[start_idx:, :, :]
|
||||
|
||||
def _validate_csv_files(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Checks that Stan CSV output files for all chains are consistent
|
||||
and returns dict containing config and column names.
|
||||
|
||||
Tabulates sampling iters which are divergent or at max treedepth
|
||||
Raises exception when inconsistencies detected.
|
||||
"""
|
||||
dzero = {}
|
||||
for i in range(self.chains):
|
||||
if i == 0:
|
||||
dzero = check_sampler_csv(
|
||||
path=self.runset.csv_files[i],
|
||||
is_fixed_param=self._is_fixed_param,
|
||||
iter_sampling=self._iter_sampling,
|
||||
iter_warmup=self._iter_warmup,
|
||||
save_warmup=self._save_warmup,
|
||||
thin=self._thin,
|
||||
)
|
||||
if not self._is_fixed_param:
|
||||
self._divergences[i] = dzero['ct_divergences']
|
||||
self._max_treedepths[i] = dzero['ct_max_treedepth']
|
||||
else:
|
||||
drest = check_sampler_csv(
|
||||
path=self.runset.csv_files[i],
|
||||
is_fixed_param=self._is_fixed_param,
|
||||
iter_sampling=self._iter_sampling,
|
||||
iter_warmup=self._iter_warmup,
|
||||
save_warmup=self._save_warmup,
|
||||
thin=self._thin,
|
||||
)
|
||||
for key in dzero:
|
||||
# check args that matter for parsing, plus name, version
|
||||
if (
|
||||
key
|
||||
in [
|
||||
'stan_version_major',
|
||||
'stan_version_minor',
|
||||
'stan_version_patch',
|
||||
'stanc_version',
|
||||
'model',
|
||||
'num_samples',
|
||||
'num_warmup',
|
||||
'save_warmup',
|
||||
'thin',
|
||||
'refresh',
|
||||
]
|
||||
and dzero[key] != drest[key]
|
||||
):
|
||||
raise ValueError(
|
||||
'CmdStan config mismatch in Stan CSV file {}: '
|
||||
'arg {} is {}, expected {}'.format(
|
||||
self.runset.csv_files[i],
|
||||
key,
|
||||
dzero[key],
|
||||
drest[key],
|
||||
)
|
||||
)
|
||||
if not self._is_fixed_param:
|
||||
self._divergences[i] = drest['ct_divergences']
|
||||
self._max_treedepths[i] = drest['ct_max_treedepth']
|
||||
return dzero
|
||||
|
||||
def _check_sampler_diagnostics(self) -> None:
|
||||
"""
|
||||
Warn if any iterations ended in divergences or hit maxtreedepth.
|
||||
"""
|
||||
if np.any(self._divergences) or np.any(self._max_treedepths):
|
||||
diagnostics = ['Some chains may have failed to converge.']
|
||||
ct_iters = self._metadata.cmdstan_config['num_samples']
|
||||
for i in range(self.runset._chains):
|
||||
if self._divergences[i] > 0:
|
||||
diagnostics.append(
|
||||
f'Chain {i + 1} had {self._divergences[i]} '
|
||||
'divergent transitions '
|
||||
f'({((self._divergences[i]/ct_iters)*100):.1f}%)'
|
||||
)
|
||||
if self._max_treedepths[i] > 0:
|
||||
diagnostics.append(
|
||||
f'Chain {i + 1} had {self._max_treedepths[i]} '
|
||||
'iterations at max treedepth '
|
||||
f'({((self._max_treedepths[i]/ct_iters)*100):.1f}%)'
|
||||
)
|
||||
diagnostics.append(
|
||||
'Use the "diagnose()" method on the CmdStanMCMC object'
|
||||
' to see further information.'
|
||||
)
|
||||
get_logger().warning('\n\t'.join(diagnostics))
|
||||
|
||||
def _assemble_draws(self) -> None:
|
||||
"""
|
||||
Allocates and populates the step size, metric, and sample arrays
|
||||
by parsing the validated stan_csv files.
|
||||
"""
|
||||
if self._draws.shape != (0,):
|
||||
return
|
||||
num_draws = self.num_draws_sampling
|
||||
sampling_iter_start = 0
|
||||
if self._save_warmup:
|
||||
num_draws += self.num_draws_warmup
|
||||
sampling_iter_start = self.num_draws_warmup
|
||||
self._draws = np.empty(
|
||||
(num_draws, self.chains, len(self.column_names)),
|
||||
dtype=float,
|
||||
order='F',
|
||||
)
|
||||
self._step_size = np.empty(self.chains, dtype=float)
|
||||
for chain in range(self.chains):
|
||||
with open(self.runset.csv_files[chain], 'r') as fd:
|
||||
line = fd.readline().strip()
|
||||
# read initial comments, CSV header row
|
||||
while len(line) > 0 and line.startswith('#'):
|
||||
line = fd.readline().strip()
|
||||
if not self._is_fixed_param:
|
||||
# handle warmup draws, if any
|
||||
if self._save_warmup:
|
||||
for i in range(self.num_draws_warmup):
|
||||
line = fd.readline().strip()
|
||||
xs = line.split(',')
|
||||
self._draws[i, chain, :] = [float(x) for x in xs]
|
||||
line = fd.readline().strip()
|
||||
if line != '# Adaptation terminated': # shouldn't happen?
|
||||
while line != '# Adaptation terminated':
|
||||
line = fd.readline().strip()
|
||||
# step_size, metric (diag_e and dense_e only)
|
||||
line = fd.readline().strip()
|
||||
_, step_size = line.split('=')
|
||||
self._step_size[chain] = float(step_size.strip())
|
||||
if self._metadata.cmdstan_config['metric'] != 'unit_e':
|
||||
line = fd.readline().strip() # metric type
|
||||
line = fd.readline().lstrip(' #\t').rstrip()
|
||||
num_unconstrained_params = len(line.split(','))
|
||||
if chain == 0: # can't allocate w/o num params
|
||||
if self.metric_type == 'diag_e':
|
||||
self._metric = np.empty(
|
||||
(self.chains, num_unconstrained_params),
|
||||
dtype=float,
|
||||
)
|
||||
else:
|
||||
self._metric = np.empty(
|
||||
(
|
||||
self.chains,
|
||||
num_unconstrained_params,
|
||||
num_unconstrained_params,
|
||||
),
|
||||
dtype=float,
|
||||
)
|
||||
if line:
|
||||
if self.metric_type == 'diag_e':
|
||||
xs = line.split(',')
|
||||
self._metric[chain, :] = [float(x) for x in xs]
|
||||
else:
|
||||
xs = line.strip().split(',')
|
||||
self._metric[chain, 0, :] = [
|
||||
float(x) for x in xs
|
||||
]
|
||||
for i in range(1, num_unconstrained_params):
|
||||
line = fd.readline().lstrip(' #\t').rstrip()
|
||||
xs = line.split(',')
|
||||
self._metric[chain, i, :] = [
|
||||
float(x) for x in xs
|
||||
]
|
||||
else: # unit_e changed in 2.34 to have an extra line
|
||||
pos = fd.tell()
|
||||
line = fd.readline().strip()
|
||||
if not line.startswith('#'):
|
||||
fd.seek(pos)
|
||||
|
||||
# process draws
|
||||
for i in range(sampling_iter_start, num_draws):
|
||||
line = fd.readline().strip()
|
||||
xs = line.split(',')
|
||||
self._draws[i, chain, :] = [float(x) for x in xs]
|
||||
assert self._draws is not None
|
||||
|
||||
def summary(
|
||||
self,
|
||||
percentiles: Sequence[int] = (5, 50, 95),
|
||||
sig_figs: int = 6,
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Run cmdstan/bin/stansummary over all output CSV files, assemble
|
||||
summary into DataFrame object. The first row contains statistics
|
||||
for the total joint log probability `lp__`, but is omitted when the
|
||||
Stan model has no parameters. The remaining rows contain summary
|
||||
statistics for all parameters, transformed parameters, and generated
|
||||
quantities variables, in program declaration order.
|
||||
|
||||
:param percentiles: Ordered non-empty sequence of percentiles to report.
|
||||
Must be integers from (1, 99), inclusive. Defaults to
|
||||
``(5, 50, 95)``
|
||||
|
||||
:param sig_figs: Number of significant figures to report.
|
||||
Must be an integer between 1 and 18. If unspecified, the default
|
||||
precision for the system file I/O is used; the usual value is 6.
|
||||
If precision above 6 is requested, sample must have been produced
|
||||
by CmdStan version 2.25 or later and sampler output precision
|
||||
must equal to or greater than the requested summary precision.
|
||||
|
||||
:return: pandas.DataFrame
|
||||
"""
|
||||
if len(percentiles) == 0:
|
||||
raise ValueError(
|
||||
'Invalid percentiles argument, must be ordered'
|
||||
' non-empty list from (1, 99), inclusive.'
|
||||
)
|
||||
cur_pct = 0
|
||||
for pct in percentiles:
|
||||
if pct > 99 or not pct > cur_pct:
|
||||
raise ValueError(
|
||||
'Invalid percentiles spec, must be ordered'
|
||||
' non-empty list from (1, 99), inclusive.'
|
||||
)
|
||||
cur_pct = pct
|
||||
percentiles_str = (
|
||||
f"--percentiles= {','.join(str(x) for x in percentiles)}"
|
||||
)
|
||||
|
||||
if not isinstance(sig_figs, int) or sig_figs < 1 or sig_figs > 18:
|
||||
raise ValueError(
|
||||
'Keyword "sig_figs" must be an integer between 1 and 18,'
|
||||
' found {}'.format(sig_figs)
|
||||
)
|
||||
csv_sig_figs = self._sig_figs or 6
|
||||
if sig_figs > csv_sig_figs:
|
||||
get_logger().warning(
|
||||
'Requesting %d significant digits of output, but CSV files'
|
||||
' only have %d digits of precision.',
|
||||
sig_figs,
|
||||
csv_sig_figs,
|
||||
)
|
||||
sig_figs_str = f'--sig_figs={sig_figs}'
|
||||
cmd_path = os.path.join(
|
||||
cmdstan_path(), 'bin', 'stansummary' + EXTENSION
|
||||
)
|
||||
tmp_csv_file = 'stansummary-{}-'.format(self.runset._args.model_name)
|
||||
tmp_csv_path = create_named_text_file(
|
||||
dir=_TMPDIR, prefix=tmp_csv_file, suffix='.csv', name_only=True
|
||||
)
|
||||
csv_str = '--csv_filename={}'.format(tmp_csv_path)
|
||||
# TODO: remove at some future release
|
||||
if cmdstan_version_before(2, 24):
|
||||
csv_str = '--csv_file={}'.format(tmp_csv_path)
|
||||
cmd = [
|
||||
cmd_path,
|
||||
percentiles_str,
|
||||
sig_figs_str,
|
||||
csv_str,
|
||||
] + self.runset.csv_files
|
||||
do_command(cmd, fd_out=None)
|
||||
with open(tmp_csv_path, 'rb') as fd:
|
||||
summary_data = pd.read_csv(
|
||||
fd,
|
||||
delimiter=',',
|
||||
header=0,
|
||||
index_col=0,
|
||||
comment='#',
|
||||
float_precision='high',
|
||||
)
|
||||
mask = (
|
||||
[not x.endswith('__') for x in summary_data.index]
|
||||
if self._is_fixed_param
|
||||
else [
|
||||
x == 'lp__' or not x.endswith('__') for x in summary_data.index
|
||||
]
|
||||
)
|
||||
summary_data.index.name = None
|
||||
return summary_data[mask]
|
||||
|
||||
def diagnose(self) -> Optional[str]:
|
||||
"""
|
||||
Run cmdstan/bin/diagnose over all output CSV files,
|
||||
return console output.
|
||||
|
||||
The diagnose utility reads the outputs of all chains
|
||||
and checks for the following potential problems:
|
||||
|
||||
+ Transitions that hit the maximum treedepth
|
||||
+ Divergent transitions
|
||||
+ Low E-BFMI values (sampler transitions HMC potential energy)
|
||||
+ Low effective sample sizes
|
||||
+ High R-hat values
|
||||
"""
|
||||
cmd_path = os.path.join(cmdstan_path(), 'bin', 'diagnose' + EXTENSION)
|
||||
cmd = [cmd_path] + self.runset.csv_files
|
||||
result = StringIO()
|
||||
do_command(cmd=cmd, fd_out=result)
|
||||
return result.getvalue()
|
||||
|
||||
def draws_pd(
|
||||
self,
|
||||
vars: Union[List[str], str, None] = None,
|
||||
inc_warmup: bool = False,
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Returns the sample draws as a pandas DataFrame.
|
||||
Flattens all chains into single column. Container variables
|
||||
(array, vector, matrix) will span multiple columns, one column
|
||||
per element. E.g. variable 'matrix[2,2] foo' spans 4 columns:
|
||||
'foo[1,1], ... foo[2,2]'.
|
||||
|
||||
:param vars: optional list of variable names.
|
||||
|
||||
:param inc_warmup: When ``True`` and the warmup draws are present in
|
||||
the output, i.e., the sampler was run with ``save_warmup=True``,
|
||||
then the warmup draws are included. Default value is ``False``.
|
||||
|
||||
See Also
|
||||
--------
|
||||
CmdStanMCMC.draws
|
||||
CmdStanMCMC.draws_xr
|
||||
CmdStanGQ.draws_pd
|
||||
"""
|
||||
if vars is not None:
|
||||
if isinstance(vars, str):
|
||||
vars_list = [vars]
|
||||
else:
|
||||
vars_list = vars
|
||||
|
||||
if inc_warmup and not self._save_warmup:
|
||||
get_logger().warning(
|
||||
'Draws from warmup iterations not available,'
|
||||
' must run sampler with "save_warmup=True".'
|
||||
)
|
||||
|
||||
self._assemble_draws()
|
||||
cols = []
|
||||
if vars is not None:
|
||||
for var in dict.fromkeys(vars_list):
|
||||
if var in self._metadata.method_vars:
|
||||
cols.append(var)
|
||||
elif var in self._metadata.stan_vars:
|
||||
info = self._metadata.stan_vars[var]
|
||||
cols.extend(
|
||||
self.column_names[info.start_idx : info.end_idx]
|
||||
)
|
||||
elif var in ['chain__', 'iter__', 'draw__']:
|
||||
cols.append(var)
|
||||
else:
|
||||
raise ValueError(f'Unknown variable: {var}')
|
||||
else:
|
||||
cols = ['chain__', 'iter__', 'draw__'] + list(self.column_names)
|
||||
|
||||
draws = self.draws(inc_warmup=inc_warmup)
|
||||
# add long-form columns for chain, iteration, draw
|
||||
n_draws, n_chains, _ = draws.shape
|
||||
chains_col = (
|
||||
np.repeat(np.arange(1, n_chains + 1), n_draws)
|
||||
.reshape(1, n_chains, n_draws)
|
||||
.T
|
||||
)
|
||||
iter_col = (
|
||||
np.tile(np.arange(1, n_draws + 1), n_chains)
|
||||
.reshape(1, n_chains, n_draws)
|
||||
.T
|
||||
)
|
||||
draw_col = (
|
||||
np.arange(1, (n_draws * n_chains) + 1)
|
||||
.reshape(1, n_chains, n_draws)
|
||||
.T
|
||||
)
|
||||
draws = np.concatenate([chains_col, iter_col, draw_col, draws], axis=2)
|
||||
|
||||
return pd.DataFrame(
|
||||
data=flatten_chains(draws),
|
||||
columns=['chain__', 'iter__', 'draw__'] + list(self.column_names),
|
||||
)[cols]
|
||||
|
||||
def draws_xr(
|
||||
self, vars: Union[str, List[str], None] = None, inc_warmup: bool = False
|
||||
) -> "xr.Dataset":
|
||||
"""
|
||||
Returns the sampler draws as a xarray Dataset.
|
||||
|
||||
:param vars: optional list of variable names.
|
||||
|
||||
:param inc_warmup: When ``True`` and the warmup draws are present in
|
||||
the output, i.e., the sampler was run with ``save_warmup=True``,
|
||||
then the warmup draws are included. Default value is ``False``.
|
||||
|
||||
See Also
|
||||
--------
|
||||
CmdStanMCMC.draws
|
||||
CmdStanMCMC.draws_pd
|
||||
CmdStanGQ.draws_xr
|
||||
"""
|
||||
if not XARRAY_INSTALLED:
|
||||
raise RuntimeError(
|
||||
'Package "xarray" is not installed, cannot produce draws array.'
|
||||
)
|
||||
if inc_warmup and not self._save_warmup:
|
||||
get_logger().warning(
|
||||
"Draws from warmup iterations not available,"
|
||||
' must run sampler with "save_warmup=True".'
|
||||
)
|
||||
if vars is None:
|
||||
vars_list = list(self._metadata.stan_vars.keys())
|
||||
elif isinstance(vars, str):
|
||||
vars_list = [vars]
|
||||
else:
|
||||
vars_list = vars
|
||||
|
||||
self._assemble_draws()
|
||||
|
||||
num_draws = self.num_draws_sampling
|
||||
meta = self._metadata.cmdstan_config
|
||||
attrs: MutableMapping[Hashable, Any] = {
|
||||
"stan_version": f"{meta['stan_version_major']}."
|
||||
f"{meta['stan_version_minor']}.{meta['stan_version_patch']}",
|
||||
"model": meta["model"],
|
||||
"num_draws_sampling": num_draws,
|
||||
}
|
||||
if inc_warmup and self._save_warmup:
|
||||
num_draws += self.num_draws_warmup
|
||||
attrs["num_draws_warmup"] = self.num_draws_warmup
|
||||
|
||||
data: MutableMapping[Hashable, Any] = {}
|
||||
coordinates: MutableMapping[Hashable, Any] = {
|
||||
"chain": self.chain_ids,
|
||||
"draw": np.arange(num_draws),
|
||||
}
|
||||
|
||||
for var in vars_list:
|
||||
build_xarray_data(
|
||||
data,
|
||||
self._metadata.stan_vars[var],
|
||||
self.draws(inc_warmup=inc_warmup),
|
||||
)
|
||||
return xr.Dataset(data, coords=coordinates, attrs=attrs).transpose(
|
||||
'chain', 'draw', ...
|
||||
)
|
||||
|
||||
def stan_variable(
|
||||
self,
|
||||
var: str,
|
||||
inc_warmup: bool = False,
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Return a numpy.ndarray which contains the set of draws
|
||||
for the named Stan program variable. Flattens the chains,
|
||||
leaving the draws in chain order. The first array dimension,
|
||||
corresponds to number of draws or post-warmup draws in the sample,
|
||||
per argument ``inc_warmup``. The remaining dimensions correspond to
|
||||
the shape of the Stan program variable.
|
||||
|
||||
Underlyingly draws are in chain order, i.e., for a sample with
|
||||
N chains of M draws each, the first M array elements are from chain 1,
|
||||
the next M are from chain 2, and the last M elements are from chain N.
|
||||
|
||||
* If the variable is a scalar variable, the return array has shape
|
||||
( draws * chains, 1).
|
||||
* If the variable is a vector, the return array has shape
|
||||
( draws * chains, len(vector))
|
||||
* If the variable is a matrix, the return array has shape
|
||||
( draws * chains, size(dim 1), size(dim 2) )
|
||||
* If the variable is an array with N dimensions, the return array
|
||||
has shape ( draws * chains, size(dim 1), ..., size(dim N))
|
||||
|
||||
For example, if the Stan program variable ``theta`` is a 3x3 matrix,
|
||||
and the sample consists of 4 chains with 1000 post-warmup draws,
|
||||
this function will return a numpy.ndarray with shape (4000,3,3).
|
||||
|
||||
This functionaltiy is also available via a shortcut using ``.`` -
|
||||
writing ``fit.a`` is a synonym for ``fit.stan_variable("a")``
|
||||
|
||||
:param var: variable name
|
||||
|
||||
:param inc_warmup: When ``True`` and the warmup draws are present in
|
||||
the output, i.e., the sampler was run with ``save_warmup=True``,
|
||||
then the warmup draws are included. Default value is ``False``.
|
||||
|
||||
See Also
|
||||
--------
|
||||
CmdStanMCMC.stan_variables
|
||||
CmdStanMLE.stan_variable
|
||||
CmdStanPathfinder.stan_variable
|
||||
CmdStanVB.stan_variable
|
||||
CmdStanGQ.stan_variable
|
||||
CmdStanLaplace.stan_variable
|
||||
"""
|
||||
try:
|
||||
draws = self.draws(inc_warmup=inc_warmup, concat_chains=True)
|
||||
out: np.ndarray = self._metadata.stan_vars[var].extract_reshape(
|
||||
draws
|
||||
)
|
||||
return out
|
||||
except KeyError:
|
||||
# pylint: disable=raise-missing-from
|
||||
raise ValueError(
|
||||
f'Unknown variable name: {var}\n'
|
||||
'Available variables are '
|
||||
+ ", ".join(self._metadata.stan_vars.keys())
|
||||
)
|
||||
|
||||
def stan_variables(self) -> Dict[str, np.ndarray]:
|
||||
"""
|
||||
Return a dictionary mapping Stan program variables names
|
||||
to the corresponding numpy.ndarray containing the inferred values.
|
||||
|
||||
See Also
|
||||
--------
|
||||
CmdStanMCMC.stan_variable
|
||||
CmdStanMLE.stan_variables
|
||||
CmdStanPathfinder.stan_variables
|
||||
CmdStanVB.stan_variables
|
||||
CmdStanGQ.stan_variables
|
||||
CmdStanLaplace.stan_variables
|
||||
"""
|
||||
result = {}
|
||||
for name in self._metadata.stan_vars:
|
||||
result[name] = self.stan_variable(name)
|
||||
return result
|
||||
|
||||
def method_variables(self) -> Dict[str, np.ndarray]:
|
||||
"""
|
||||
Returns a dictionary of all sampler variables, i.e., all
|
||||
output column names ending in `__`. Assumes that all variables
|
||||
are scalar variables where column name is variable name.
|
||||
Maps each column name to a numpy.ndarray (draws x chains x 1)
|
||||
containing per-draw diagnostic values.
|
||||
"""
|
||||
self._assemble_draws()
|
||||
return {
|
||||
name: var.extract_reshape(self._draws)
|
||||
for name, var in self._metadata.method_vars.items()
|
||||
}
|
||||
|
||||
def save_csvfiles(self, dir: Optional[str] = None) -> None:
|
||||
"""
|
||||
Move output CSV files to specified directory. If files were
|
||||
written to the temporary session directory, clean filename.
|
||||
E.g., save 'bernoulli-201912081451-1-5nm6as7u.csv' as
|
||||
'bernoulli-201912081451-1.csv'.
|
||||
|
||||
:param dir: directory path
|
||||
|
||||
See Also
|
||||
--------
|
||||
stanfit.RunSet.save_csvfiles
|
||||
cmdstanpy.from_csv
|
||||
"""
|
||||
self.runset.save_csvfiles(dir)
|
||||
@ -0,0 +1,53 @@
|
||||
"""Container for metadata parsed from the output of a CmdStan run"""
|
||||
|
||||
import copy
|
||||
from typing import Any, Dict
|
||||
|
||||
import stanio
|
||||
|
||||
|
||||
class InferenceMetadata:
|
||||
"""
|
||||
CmdStan configuration and contents of output file parsed out of
|
||||
the Stan CSV file header comments and column headers.
|
||||
Assumes valid CSV files.
|
||||
"""
|
||||
|
||||
def __init__(self, config: Dict[str, Any]) -> None:
|
||||
"""Initialize object from CSV headers"""
|
||||
self._cmdstan_config = config
|
||||
vars = stanio.parse_header(config['raw_header'])
|
||||
|
||||
self._method_vars = {
|
||||
k: v for (k, v) in vars.items() if k.endswith('__')
|
||||
}
|
||||
self._stan_vars = {
|
||||
k: v for (k, v) in vars.items() if not k.endswith('__')
|
||||
}
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return 'Metadata:\n{}\n'.format(self._cmdstan_config)
|
||||
|
||||
@property
|
||||
def cmdstan_config(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Returns a dictionary containing a set of name, value pairs
|
||||
parsed out of the Stan CSV file header. These include the
|
||||
command configuration and the CSV file header row information.
|
||||
Uses deepcopy for immutability.
|
||||
"""
|
||||
return copy.deepcopy(self._cmdstan_config)
|
||||
|
||||
@property
|
||||
def method_vars(self) -> Dict[str, stanio.Variable]:
|
||||
"""
|
||||
Method variable names always end in `__`, e.g. `lp__`.
|
||||
"""
|
||||
return self._method_vars
|
||||
|
||||
@property
|
||||
def stan_vars(self) -> Dict[str, stanio.Variable]:
|
||||
"""
|
||||
These are the user-defined variables in the Stan program.
|
||||
"""
|
||||
return self._stan_vars
|
||||
284
.venv/lib/python3.12/site-packages/cmdstanpy/stanfit/mle.py
Normal file
284
.venv/lib/python3.12/site-packages/cmdstanpy/stanfit/mle.py
Normal file
@ -0,0 +1,284 @@
|
||||
"""Container for the result of running optimization"""
|
||||
|
||||
from collections import OrderedDict
|
||||
from typing import Dict, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from cmdstanpy.cmdstan_args import Method, OptimizeArgs
|
||||
from cmdstanpy.utils import get_logger, scan_optimize_csv
|
||||
|
||||
from .metadata import InferenceMetadata
|
||||
from .runset import RunSet
|
||||
|
||||
|
||||
class CmdStanMLE:
|
||||
"""
|
||||
Container for outputs from CmdStan optimization.
|
||||
Created by :meth:`CmdStanModel.optimize`.
|
||||
"""
|
||||
|
||||
def __init__(self, runset: RunSet) -> None:
|
||||
"""Initialize object."""
|
||||
if not runset.method == Method.OPTIMIZE:
|
||||
raise ValueError(
|
||||
'Wrong runset method, expecting optimize runset, '
|
||||
'found method {}'.format(runset.method)
|
||||
)
|
||||
self.runset = runset
|
||||
# info from runset to be exposed
|
||||
self.converged = runset._check_retcodes()
|
||||
optimize_args = self.runset._args.method_args
|
||||
assert isinstance(
|
||||
optimize_args, OptimizeArgs
|
||||
) # make the typechecker happy
|
||||
self._save_iterations: bool = optimize_args.save_iterations
|
||||
self._set_mle_attrs(runset.csv_files[0])
|
||||
|
||||
def __repr__(self) -> str:
|
||||
repr = 'CmdStanMLE: model={}{}'.format(
|
||||
self.runset.model, self.runset._args.method_args.compose(0, cmd=[])
|
||||
)
|
||||
repr = '{}\n csv_file:\n\t{}\n output_file:\n\t{}'.format(
|
||||
repr,
|
||||
'\n\t'.join(self.runset.csv_files),
|
||||
'\n\t'.join(self.runset.stdout_files),
|
||||
)
|
||||
if not self.converged:
|
||||
repr = '{}\n Warning: invalid estimate, '.format(repr)
|
||||
repr = '{} optimization failed to converge.'.format(repr)
|
||||
return repr
|
||||
|
||||
def __getattr__(self, attr: str) -> Union[np.ndarray, float]:
|
||||
"""Synonymous with ``fit.stan_variable(attr)"""
|
||||
if attr.startswith("_"):
|
||||
raise AttributeError(f"Unknown variable name {attr}")
|
||||
try:
|
||||
return self.stan_variable(attr)
|
||||
except ValueError as e:
|
||||
# pylint: disable=raise-missing-from
|
||||
raise AttributeError(*e.args)
|
||||
|
||||
def _set_mle_attrs(self, sample_csv_0: str) -> None:
|
||||
meta = scan_optimize_csv(sample_csv_0, self._save_iterations)
|
||||
self._metadata = InferenceMetadata(meta)
|
||||
self._column_names: Tuple[str, ...] = meta['column_names']
|
||||
self._mle: np.ndarray = meta['mle']
|
||||
if self._save_iterations:
|
||||
self._all_iters: np.ndarray = meta['all_iters']
|
||||
|
||||
@property
|
||||
def column_names(self) -> Tuple[str, ...]:
|
||||
"""
|
||||
Names of estimated quantities, includes joint log probability,
|
||||
and all parameters, transformed parameters, and generated quantities.
|
||||
"""
|
||||
return self._column_names
|
||||
|
||||
@property
|
||||
def metadata(self) -> InferenceMetadata:
|
||||
"""
|
||||
Returns object which contains CmdStan configuration as well as
|
||||
information about the names and structure of the inference method
|
||||
and model output variables.
|
||||
"""
|
||||
return self._metadata
|
||||
|
||||
@property
|
||||
def optimized_params_np(self) -> np.ndarray:
|
||||
"""
|
||||
Returns all final estimates from the optimizer as a numpy.ndarray
|
||||
which contains all optimizer outputs, i.e., the value for `lp__`
|
||||
as well as all Stan program variables.
|
||||
"""
|
||||
if not self.converged:
|
||||
get_logger().warning(
|
||||
'Invalid estimate, optimization failed to converge.'
|
||||
)
|
||||
return self._mle
|
||||
|
||||
@property
|
||||
def optimized_iterations_np(self) -> Optional[np.ndarray]:
|
||||
"""
|
||||
Returns all saved iterations from the optimizer and final estimate
|
||||
as a numpy.ndarray which contains all optimizer outputs, i.e.,
|
||||
the value for `lp__` as well as all Stan program variables.
|
||||
|
||||
"""
|
||||
if not self._save_iterations:
|
||||
get_logger().warning(
|
||||
'Intermediate iterations not saved to CSV output file. '
|
||||
'Rerun the optimize method with "save_iterations=True".'
|
||||
)
|
||||
return None
|
||||
if not self.converged:
|
||||
get_logger().warning(
|
||||
'Invalid estimate, optimization failed to converge.'
|
||||
)
|
||||
return self._all_iters
|
||||
|
||||
@property
|
||||
def optimized_params_pd(self) -> pd.DataFrame:
|
||||
"""
|
||||
Returns all final estimates from the optimizer as a pandas.DataFrame
|
||||
which contains all optimizer outputs, i.e., the value for `lp__`
|
||||
as well as all Stan program variables.
|
||||
"""
|
||||
if not self.runset._check_retcodes():
|
||||
get_logger().warning(
|
||||
'Invalid estimate, optimization failed to converge.'
|
||||
)
|
||||
return pd.DataFrame([self._mle], columns=self.column_names)
|
||||
|
||||
@property
|
||||
def optimized_iterations_pd(self) -> Optional[pd.DataFrame]:
|
||||
"""
|
||||
Returns all saved iterations from the optimizer and final estimate
|
||||
as a pandas.DataFrame which contains all optimizer outputs, i.e.,
|
||||
the value for `lp__` as well as all Stan program variables.
|
||||
|
||||
"""
|
||||
if not self._save_iterations:
|
||||
get_logger().warning(
|
||||
'Intermediate iterations not saved to CSV output file. '
|
||||
'Rerun the optimize method with "save_iterations=True".'
|
||||
)
|
||||
return None
|
||||
if not self.converged:
|
||||
get_logger().warning(
|
||||
'Invalid estimate, optimization failed to converge.'
|
||||
)
|
||||
return pd.DataFrame(self._all_iters, columns=self.column_names)
|
||||
|
||||
@property
|
||||
def optimized_params_dict(self) -> Dict[str, np.float64]:
|
||||
"""
|
||||
Returns all estimates from the optimizer, including `lp__` as a
|
||||
Python Dict. Only returns estimate from final iteration.
|
||||
"""
|
||||
if not self.runset._check_retcodes():
|
||||
get_logger().warning(
|
||||
'Invalid estimate, optimization failed to converge.'
|
||||
)
|
||||
return OrderedDict(zip(self.column_names, self._mle))
|
||||
|
||||
def stan_variable(
|
||||
self,
|
||||
var: str,
|
||||
*,
|
||||
inc_iterations: bool = False,
|
||||
warn: bool = True,
|
||||
) -> Union[np.ndarray, float]:
|
||||
"""
|
||||
Return a numpy.ndarray which contains the estimates for the
|
||||
for the named Stan program variable where the dimensions of the
|
||||
numpy.ndarray match the shape of the Stan program variable.
|
||||
|
||||
This functionaltiy is also available via a shortcut using ``.`` -
|
||||
writing ``fit.a`` is a synonym for ``fit.stan_variable("a")``
|
||||
|
||||
:param var: variable name
|
||||
|
||||
:param inc_iterations: When ``True`` and the intermediate estimates
|
||||
are included in the output, i.e., the optimizer was run with
|
||||
``save_iterations=True``, then intermediate estimates are included.
|
||||
Default value is ``False``.
|
||||
|
||||
See Also
|
||||
--------
|
||||
CmdStanMLE.stan_variables
|
||||
CmdStanMCMC.stan_variable
|
||||
CmdStanPathfinder.stan_variable
|
||||
CmdStanVB.stan_variable
|
||||
CmdStanGQ.stan_variable
|
||||
CmdStanLaplace.stan_variable
|
||||
"""
|
||||
if var not in self._metadata.stan_vars:
|
||||
raise ValueError(
|
||||
f'Unknown variable name: {var}\n'
|
||||
'Available variables are ' + ", ".join(self._metadata.stan_vars)
|
||||
)
|
||||
if warn and inc_iterations and not self._save_iterations:
|
||||
get_logger().warning(
|
||||
'Intermediate iterations not saved to CSV output file. '
|
||||
'Rerun the optimize method with "save_iterations=True".'
|
||||
)
|
||||
if warn and not self.runset._check_retcodes():
|
||||
get_logger().warning(
|
||||
'Invalid estimate, optimization failed to converge.'
|
||||
)
|
||||
if inc_iterations and self._save_iterations:
|
||||
data = self._all_iters
|
||||
else:
|
||||
data = self._mle
|
||||
|
||||
try:
|
||||
out: np.ndarray = self._metadata.stan_vars[var].extract_reshape(
|
||||
data
|
||||
)
|
||||
# TODO(2.0) remove
|
||||
if out.shape == () or out.shape == (1,):
|
||||
get_logger().warning(
|
||||
"The default behavior of CmdStanMLE.stan_variable() "
|
||||
"will change in a future release to always return a "
|
||||
"numpy.ndarray, even for scalar variables."
|
||||
)
|
||||
return out.item() # type: ignore
|
||||
return out
|
||||
except KeyError:
|
||||
# pylint: disable=raise-missing-from
|
||||
raise ValueError(
|
||||
f'Unknown variable name: {var}\n'
|
||||
'Available variables are '
|
||||
+ ", ".join(self._metadata.stan_vars.keys())
|
||||
)
|
||||
|
||||
def stan_variables(
|
||||
self, inc_iterations: bool = False
|
||||
) -> Dict[str, Union[np.ndarray, float]]:
|
||||
"""
|
||||
Return a dictionary mapping Stan program variables names
|
||||
to the corresponding numpy.ndarray containing the inferred values.
|
||||
|
||||
:param inc_iterations: When ``True`` and the intermediate estimates
|
||||
are included in the output, i.e., the optimizer was run with
|
||||
``save_iterations=True``, then intermediate estimates are included.
|
||||
Default value is ``False``.
|
||||
|
||||
|
||||
See Also
|
||||
--------
|
||||
CmdStanMLE.stan_variable
|
||||
CmdStanMCMC.stan_variables
|
||||
CmdStanPathfinder.stan_variables
|
||||
CmdStanVB.stan_variables
|
||||
CmdStanGQ.stan_variables
|
||||
CmdStanLaplace.stan_variables
|
||||
"""
|
||||
if not self.runset._check_retcodes():
|
||||
get_logger().warning(
|
||||
'Invalid estimate, optimization failed to converge.'
|
||||
)
|
||||
result = {}
|
||||
for name in self._metadata.stan_vars:
|
||||
result[name] = self.stan_variable(
|
||||
name, inc_iterations=inc_iterations, warn=False
|
||||
)
|
||||
return result
|
||||
|
||||
def save_csvfiles(self, dir: Optional[str] = None) -> None:
|
||||
"""
|
||||
Move output CSV files to specified directory. If files were
|
||||
written to the temporary session directory, clean filename.
|
||||
E.g., save 'bernoulli-201912081451-1-5nm6as7u.csv' as
|
||||
'bernoulli-201912081451-1.csv'.
|
||||
|
||||
:param dir: directory path
|
||||
|
||||
See Also
|
||||
--------
|
||||
stanfit.RunSet.save_csvfiles
|
||||
cmdstanpy.from_csv
|
||||
"""
|
||||
self.runset.save_csvfiles(dir)
|
||||
@ -0,0 +1,237 @@
|
||||
"""
|
||||
Container for the result of running Pathfinder.
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from cmdstanpy.cmdstan_args import Method
|
||||
from cmdstanpy.stanfit.metadata import InferenceMetadata
|
||||
from cmdstanpy.stanfit.runset import RunSet
|
||||
from cmdstanpy.utils.stancsv import scan_generic_csv
|
||||
|
||||
|
||||
class CmdStanPathfinder:
|
||||
"""
|
||||
Container for outputs from the Pathfinder algorithm.
|
||||
Created by :meth:`CmdStanModel.pathfinder()`.
|
||||
"""
|
||||
|
||||
def __init__(self, runset: RunSet):
|
||||
"""Initialize object."""
|
||||
if not runset.method == Method.PATHFINDER:
|
||||
raise ValueError(
|
||||
'Wrong runset method, expecting Pathfinder runset, '
|
||||
'found method {}'.format(runset.method)
|
||||
)
|
||||
self._runset = runset
|
||||
|
||||
self._draws: np.ndarray = np.array(())
|
||||
|
||||
config = scan_generic_csv(runset.csv_files[0])
|
||||
self._metadata = InferenceMetadata(config)
|
||||
|
||||
def create_inits(
|
||||
self, seed: Optional[int] = None, chains: int = 4
|
||||
) -> Union[List[Dict[str, np.ndarray]], Dict[str, np.ndarray]]:
|
||||
"""
|
||||
Create initial values for the parameters of the model
|
||||
by randomly selecting draws from the Pathfinder approximation.
|
||||
|
||||
:param seed: Used for random selection, defaults to None
|
||||
:param chains: Number of initial values to return, defaults to 4
|
||||
:return: The initial values for the parameters of the model.
|
||||
|
||||
If ``chains`` is 1, a dictionary is returned, otherwise a list
|
||||
of dictionaries is returned, in the format expected for the
|
||||
``inits`` argument. of :meth:`CmdStanModel.sample`.
|
||||
"""
|
||||
self._assemble_draws()
|
||||
rng = np.random.default_rng(seed)
|
||||
idxs = rng.choice(self._draws.shape[0], size=chains, replace=False)
|
||||
if chains == 1:
|
||||
draw = self._draws[idxs[0]]
|
||||
return {
|
||||
name: var.extract_reshape(draw)
|
||||
for name, var in self._metadata.stan_vars.items()
|
||||
}
|
||||
else:
|
||||
return [
|
||||
{
|
||||
name: var.extract_reshape(self._draws[idx])
|
||||
for name, var in self._metadata.stan_vars.items()
|
||||
}
|
||||
for idx in idxs
|
||||
]
|
||||
|
||||
def __repr__(self) -> str:
|
||||
rep = 'CmdStanPathfinder: model={}{}'.format(
|
||||
self._runset.model,
|
||||
self._runset._args.method_args.compose(0, cmd=[]),
|
||||
)
|
||||
rep = '{}\n csv_files:\n\t{}\n output_files:\n\t{}'.format(
|
||||
rep,
|
||||
'\n\t'.join(self._runset.csv_files),
|
||||
'\n\t'.join(self._runset.stdout_files),
|
||||
)
|
||||
return rep
|
||||
|
||||
# below this is identical to same functions in Laplace
|
||||
def _assemble_draws(self) -> None:
|
||||
if self._draws.shape != (0,):
|
||||
return
|
||||
|
||||
with open(self._runset.csv_files[0], 'r') as fd:
|
||||
while (fd.readline()).startswith("#"):
|
||||
pass
|
||||
self._draws = np.loadtxt(
|
||||
fd,
|
||||
dtype=float,
|
||||
ndmin=2,
|
||||
delimiter=',',
|
||||
comments="#",
|
||||
)
|
||||
|
||||
def stan_variable(self, var: str) -> np.ndarray:
|
||||
"""
|
||||
Return a numpy.ndarray which contains the estimates for the
|
||||
for the named Stan program variable where the dimensions of the
|
||||
numpy.ndarray match the shape of the Stan program variable.
|
||||
|
||||
This functionaltiy is also available via a shortcut using ``.`` -
|
||||
writing ``fit.a`` is a synonym for ``fit.stan_variable("a")``
|
||||
|
||||
:param var: variable name
|
||||
|
||||
See Also
|
||||
--------
|
||||
CmdStanPathfinder.stan_variables
|
||||
CmdStanMLE.stan_variable
|
||||
CmdStanMCMC.stan_variable
|
||||
CmdStanVB.stan_variable
|
||||
CmdStanGQ.stan_variable
|
||||
CmdStanLaplace.stan_variable
|
||||
"""
|
||||
self._assemble_draws()
|
||||
try:
|
||||
out: np.ndarray = self._metadata.stan_vars[var].extract_reshape(
|
||||
self._draws
|
||||
)
|
||||
return out
|
||||
except KeyError:
|
||||
# pylint: disable=raise-missing-from
|
||||
raise ValueError(
|
||||
f'Unknown variable name: {var}\n'
|
||||
'Available variables are '
|
||||
+ ", ".join(self._metadata.stan_vars.keys())
|
||||
)
|
||||
|
||||
def stan_variables(self) -> Dict[str, np.ndarray]:
|
||||
"""
|
||||
Return a dictionary mapping Stan program variables names
|
||||
to the corresponding numpy.ndarray containing the inferred values.
|
||||
|
||||
See Also
|
||||
--------
|
||||
CmdStanPathfinder.stan_variable
|
||||
CmdStanMCMC.stan_variables
|
||||
CmdStanMLE.stan_variables
|
||||
CmdStanVB.stan_variables
|
||||
CmdStanGQ.stan_variables
|
||||
CmdStanLaplace.stan_variables
|
||||
"""
|
||||
result = {}
|
||||
for name in self._metadata.stan_vars:
|
||||
result[name] = self.stan_variable(name)
|
||||
return result
|
||||
|
||||
def method_variables(self) -> Dict[str, np.ndarray]:
|
||||
"""
|
||||
Returns a dictionary of all sampler variables, i.e., all
|
||||
output column names ending in `__`. Assumes that all variables
|
||||
are scalar variables where column name is variable name.
|
||||
Maps each column name to a numpy.ndarray (draws x chains x 1)
|
||||
containing per-draw diagnostic values.
|
||||
"""
|
||||
self._assemble_draws()
|
||||
return {
|
||||
name: var.extract_reshape(self._draws)
|
||||
for name, var in self._metadata.method_vars.items()
|
||||
}
|
||||
|
||||
def draws(self) -> np.ndarray:
|
||||
"""
|
||||
Return a numpy.ndarray containing the draws from the
|
||||
approximate posterior distribution. This is a 2-D array
|
||||
of shape (draws, parameters).
|
||||
"""
|
||||
self._assemble_draws()
|
||||
return self._draws
|
||||
|
||||
def __getattr__(self, attr: str) -> np.ndarray:
|
||||
"""Synonymous with ``fit.stan_variable(attr)"""
|
||||
if attr.startswith("_"):
|
||||
raise AttributeError(f"Unknown variable name {attr}")
|
||||
try:
|
||||
return self.stan_variable(attr)
|
||||
except ValueError as e:
|
||||
# pylint: disable=raise-missing-from
|
||||
raise AttributeError(*e.args)
|
||||
|
||||
def __getstate__(self) -> dict:
|
||||
# This function returns the mapping of objects to serialize with pickle.
|
||||
# See https://docs.python.org/3/library/pickle.html#object.__getstate__
|
||||
# for details. We call _assemble_draws to ensure posterior samples have
|
||||
# been loaded prior to serialization.
|
||||
self._assemble_draws()
|
||||
return self.__dict__
|
||||
|
||||
@property
|
||||
def metadata(self) -> InferenceMetadata:
|
||||
"""
|
||||
Returns object which contains CmdStan configuration as well as
|
||||
information about the names and structure of the inference method
|
||||
and model output variables.
|
||||
"""
|
||||
return self._metadata
|
||||
|
||||
@property
|
||||
def column_names(self) -> Tuple[str, ...]:
|
||||
"""
|
||||
Names of all outputs from the sampler, comprising sampler parameters
|
||||
and all components of all model parameters, transformed parameters,
|
||||
and quantities of interest. Corresponds to Stan CSV file header row,
|
||||
with names munged to array notation, e.g. `beta[1]` not `beta.1`.
|
||||
"""
|
||||
return self._metadata.cmdstan_config['column_names'] # type: ignore
|
||||
|
||||
@property
|
||||
def is_resampled(self) -> bool:
|
||||
"""
|
||||
Returns True if the draws were resampled from several Pathfinder
|
||||
approximations, False otherwise.
|
||||
"""
|
||||
return ( # type: ignore
|
||||
self._metadata.cmdstan_config.get("num_paths", 4) > 1
|
||||
and self._metadata.cmdstan_config.get('psis_resample', 1)
|
||||
in (1, 'true')
|
||||
and self._metadata.cmdstan_config.get('calculate_lp', 1)
|
||||
in (1, 'true')
|
||||
)
|
||||
|
||||
def save_csvfiles(self, dir: Optional[str] = None) -> None:
|
||||
"""
|
||||
Move output CSV files to specified directory. If files were
|
||||
written to the temporary session directory, clean filename.
|
||||
E.g., save 'bernoulli-201912081451-1-5nm6as7u.csv' as
|
||||
'bernoulli-201912081451-1.csv'.
|
||||
|
||||
:param dir: directory path
|
||||
|
||||
See Also
|
||||
--------
|
||||
stanfit.RunSet.save_csvfiles
|
||||
cmdstanpy.from_csv
|
||||
"""
|
||||
self._runset.save_csvfiles(dir)
|
||||
307
.venv/lib/python3.12/site-packages/cmdstanpy/stanfit/runset.py
Normal file
307
.venv/lib/python3.12/site-packages/cmdstanpy/stanfit/runset.py
Normal file
@ -0,0 +1,307 @@
|
||||
"""
|
||||
Container for the information used in a generic CmdStan run,
|
||||
such as file locations
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
from time import time
|
||||
from typing import List, Optional
|
||||
|
||||
from cmdstanpy import _TMPDIR
|
||||
from cmdstanpy.cmdstan_args import CmdStanArgs, Method
|
||||
from cmdstanpy.utils import get_logger
|
||||
|
||||
|
||||
class RunSet:
|
||||
"""
|
||||
Encapsulates the configuration and results of a call to any CmdStan
|
||||
inference method. Records the method return code and locations of
|
||||
all console, error, and output files.
|
||||
|
||||
RunSet objects are instantiated by the CmdStanModel class inference methods
|
||||
which validate all inputs, therefore "__init__" method skips input checks.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
args: CmdStanArgs,
|
||||
chains: int = 1,
|
||||
*,
|
||||
chain_ids: Optional[List[int]] = None,
|
||||
time_fmt: str = "%Y%m%d%H%M%S",
|
||||
one_process_per_chain: bool = True,
|
||||
) -> None:
|
||||
"""Initialize object (no input arg checks)."""
|
||||
self._args = args
|
||||
self._chains = chains
|
||||
self._one_process_per_chain = one_process_per_chain
|
||||
if one_process_per_chain:
|
||||
self._num_procs = chains
|
||||
else:
|
||||
self._num_procs = 1
|
||||
self._retcodes = [-1 for _ in range(self._num_procs)]
|
||||
self._timeout_flags = [False for _ in range(self._num_procs)]
|
||||
if chain_ids is None:
|
||||
chain_ids = [i + 1 for i in range(chains)]
|
||||
self._chain_ids = chain_ids
|
||||
|
||||
if args.output_dir is not None:
|
||||
self._output_dir = args.output_dir
|
||||
else:
|
||||
# make a per-run subdirectory of our master temp directory
|
||||
self._output_dir = tempfile.mkdtemp(
|
||||
prefix=args.model_name, dir=_TMPDIR
|
||||
)
|
||||
|
||||
# output files prefix: ``<model_name>-<YYYYMMDDHHMM>_<chain_id>``
|
||||
self._base_outfile = (
|
||||
f'{args.model_name}-{datetime.now().strftime(time_fmt)}'
|
||||
)
|
||||
# per-process outputs
|
||||
self._stdout_files = [''] * self._num_procs
|
||||
self._profile_files = [''] * self._num_procs # optional
|
||||
if one_process_per_chain:
|
||||
for i in range(chains):
|
||||
self._stdout_files[i] = self.file_path("-stdout.txt", id=i)
|
||||
if args.save_profile:
|
||||
self._profile_files[i] = self.file_path(
|
||||
".csv", extra="-profile", id=chain_ids[i]
|
||||
)
|
||||
else:
|
||||
self._stdout_files[0] = self.file_path("-stdout.txt")
|
||||
if args.save_profile:
|
||||
self._profile_files[0] = self.file_path(
|
||||
".csv", extra="-profile"
|
||||
)
|
||||
|
||||
# per-chain output files
|
||||
self._csv_files: List[str] = [''] * chains
|
||||
self._diagnostic_files = [''] * chains # optional
|
||||
|
||||
if chains == 1:
|
||||
self._csv_files[0] = self.file_path(".csv")
|
||||
if args.save_latent_dynamics:
|
||||
self._diagnostic_files[0] = self.file_path(
|
||||
".csv", extra="-diagnostic"
|
||||
)
|
||||
else:
|
||||
for i in range(chains):
|
||||
self._csv_files[i] = self.file_path(".csv", id=chain_ids[i])
|
||||
if args.save_latent_dynamics:
|
||||
self._diagnostic_files[i] = self.file_path(
|
||||
".csv", extra="-diagnostic", id=chain_ids[i]
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
repr = 'RunSet: chains={}, chain_ids={}, num_processes={}'.format(
|
||||
self._chains, self._chain_ids, self._num_procs
|
||||
)
|
||||
repr = '{}\n cmd (chain 1):\n\t{}'.format(repr, self.cmd(0))
|
||||
repr = '{}\n retcodes={}'.format(repr, self._retcodes)
|
||||
repr = f'{repr}\n per-chain output files (showing chain 1 only):'
|
||||
repr = '{}\n csv_file:\n\t{}'.format(repr, self._csv_files[0])
|
||||
if self._args.save_latent_dynamics:
|
||||
repr = '{}\n diagnostics_file:\n\t{}'.format(
|
||||
repr, self._diagnostic_files[0]
|
||||
)
|
||||
if self._args.save_profile:
|
||||
repr = '{}\n profile_file:\n\t{}'.format(
|
||||
repr, self._profile_files[0]
|
||||
)
|
||||
repr = '{}\n console_msgs (if any):\n\t{}'.format(
|
||||
repr, self._stdout_files[0]
|
||||
)
|
||||
return repr
|
||||
|
||||
@property
|
||||
def model(self) -> str:
|
||||
"""Stan model name."""
|
||||
return self._args.model_name
|
||||
|
||||
@property
|
||||
def method(self) -> Method:
|
||||
"""CmdStan method used to generate this fit."""
|
||||
return self._args.method
|
||||
|
||||
@property
|
||||
def num_procs(self) -> int:
|
||||
"""Number of processes run."""
|
||||
return self._num_procs
|
||||
|
||||
@property
|
||||
def one_process_per_chain(self) -> bool:
|
||||
"""
|
||||
When True, for each chain, call CmdStan in its own subprocess.
|
||||
When False, use CmdStan's `num_chains` arg to run parallel chains.
|
||||
Always True if CmdStan < 2.28.
|
||||
For CmdStan 2.28 and up, `sample` method determines value.
|
||||
"""
|
||||
return self._one_process_per_chain
|
||||
|
||||
@property
|
||||
def chains(self) -> int:
|
||||
"""Number of chains."""
|
||||
return self._chains
|
||||
|
||||
@property
|
||||
def chain_ids(self) -> List[int]:
|
||||
"""Chain ids."""
|
||||
return self._chain_ids
|
||||
|
||||
def cmd(self, idx: int) -> List[str]:
|
||||
"""
|
||||
Assemble CmdStan invocation.
|
||||
When running parallel chains from single process (2.28 and up),
|
||||
specify CmdStan arg `num_chains` and leave chain idx off CSV files.
|
||||
"""
|
||||
if self._one_process_per_chain:
|
||||
return self._args.compose_command(
|
||||
idx,
|
||||
csv_file=self.csv_files[idx],
|
||||
diagnostic_file=self.diagnostic_files[idx]
|
||||
if self._args.save_latent_dynamics
|
||||
else None,
|
||||
profile_file=self.profile_files[idx]
|
||||
if self._args.save_profile
|
||||
else None,
|
||||
)
|
||||
else:
|
||||
return self._args.compose_command(
|
||||
idx,
|
||||
csv_file=self.file_path('.csv'),
|
||||
diagnostic_file=self.file_path(".csv", extra="-diagnostic")
|
||||
if self._args.save_latent_dynamics
|
||||
else None,
|
||||
profile_file=self.file_path(".csv", extra="-profile")
|
||||
if self._args.save_profile
|
||||
else None,
|
||||
)
|
||||
|
||||
@property
|
||||
def csv_files(self) -> List[str]:
|
||||
"""List of paths to CmdStan output files."""
|
||||
return self._csv_files
|
||||
|
||||
@property
|
||||
def stdout_files(self) -> List[str]:
|
||||
"""
|
||||
List of paths to transcript of CmdStan messages sent to the console.
|
||||
Transcripts include config information, progress, and error messages.
|
||||
"""
|
||||
return self._stdout_files
|
||||
|
||||
def _check_retcodes(self) -> bool:
|
||||
"""Returns ``True`` when all chains have retcode 0."""
|
||||
for code in self._retcodes:
|
||||
if code != 0:
|
||||
return False
|
||||
return True
|
||||
|
||||
@property
|
||||
def diagnostic_files(self) -> List[str]:
|
||||
"""List of paths to CmdStan hamiltonian diagnostic files."""
|
||||
return self._diagnostic_files
|
||||
|
||||
@property
|
||||
def profile_files(self) -> List[str]:
|
||||
"""List of paths to CmdStan profiler files."""
|
||||
return self._profile_files
|
||||
|
||||
# pylint: disable=invalid-name
|
||||
def file_path(
|
||||
self, suffix: str, *, extra: str = "", id: Optional[int] = None
|
||||
) -> str:
|
||||
if id is not None:
|
||||
suffix = f"_{id}{suffix}"
|
||||
file = os.path.join(
|
||||
self._output_dir, f"{self._base_outfile}{extra}{suffix}"
|
||||
)
|
||||
return file
|
||||
|
||||
def _retcode(self, idx: int) -> int:
|
||||
"""Get retcode for process[idx]."""
|
||||
return self._retcodes[idx]
|
||||
|
||||
def _set_retcode(self, idx: int, val: int) -> None:
|
||||
"""Set retcode at process[idx] to val."""
|
||||
self._retcodes[idx] = val
|
||||
|
||||
def _set_timeout_flag(self, idx: int, val: bool) -> None:
|
||||
"""Set timeout_flag at process[idx] to val."""
|
||||
self._timeout_flags[idx] = val
|
||||
|
||||
def get_err_msgs(self) -> str:
|
||||
"""Checks console messages for each CmdStan run."""
|
||||
msgs = []
|
||||
for i in range(self._num_procs):
|
||||
if (
|
||||
os.path.exists(self._stdout_files[i])
|
||||
and os.stat(self._stdout_files[i]).st_size > 0
|
||||
):
|
||||
if self._args.method == Method.OPTIMIZE:
|
||||
msgs.append('console log output:\n')
|
||||
with open(self._stdout_files[0], 'r') as fd:
|
||||
msgs.append(fd.read())
|
||||
else:
|
||||
with open(self._stdout_files[i], 'r') as fd:
|
||||
contents = fd.read()
|
||||
# pattern matches initial "Exception" or "Error" msg
|
||||
pat = re.compile(r'^E[rx].*$', re.M)
|
||||
errors = re.findall(pat, contents)
|
||||
if len(errors) > 0:
|
||||
msgs.append('\n\t'.join(errors))
|
||||
return '\n'.join(msgs)
|
||||
|
||||
def save_csvfiles(self, dir: Optional[str] = None) -> None:
|
||||
"""
|
||||
Moves CSV files to specified directory.
|
||||
|
||||
:param dir: directory path
|
||||
|
||||
See Also
|
||||
--------
|
||||
cmdstanpy.from_csv
|
||||
"""
|
||||
if dir is None:
|
||||
dir = os.path.realpath('.')
|
||||
test_path = os.path.join(dir, str(time()))
|
||||
try:
|
||||
os.makedirs(dir, exist_ok=True)
|
||||
with open(test_path, 'w'):
|
||||
pass
|
||||
os.remove(test_path) # cleanup
|
||||
except (IOError, OSError, PermissionError) as exc:
|
||||
raise RuntimeError('Cannot save to path: {}'.format(dir)) from exc
|
||||
|
||||
for i in range(self.chains):
|
||||
if not os.path.exists(self._csv_files[i]):
|
||||
raise ValueError(
|
||||
'Cannot access CSV file {}'.format(self._csv_files[i])
|
||||
)
|
||||
|
||||
to_path = os.path.join(dir, os.path.basename(self._csv_files[i]))
|
||||
if os.path.exists(to_path):
|
||||
raise ValueError(
|
||||
'File exists, not overwriting: {}'.format(to_path)
|
||||
)
|
||||
try:
|
||||
get_logger().debug(
|
||||
'saving tmpfile: "%s" as: "%s"', self._csv_files[i], to_path
|
||||
)
|
||||
shutil.move(self._csv_files[i], to_path)
|
||||
self._csv_files[i] = to_path
|
||||
except (IOError, OSError, PermissionError) as e:
|
||||
raise ValueError(
|
||||
'Cannot save to file: {}'.format(to_path)
|
||||
) from e
|
||||
|
||||
def raise_for_timeouts(self) -> None:
|
||||
if any(self._timeout_flags):
|
||||
raise TimeoutError(
|
||||
f"{sum(self._timeout_flags)} of {self.num_procs} processes "
|
||||
"timed out"
|
||||
)
|
||||
240
.venv/lib/python3.12/site-packages/cmdstanpy/stanfit/vb.py
Normal file
240
.venv/lib/python3.12/site-packages/cmdstanpy/stanfit/vb.py
Normal file
@ -0,0 +1,240 @@
|
||||
"""Container for the results of running autodiff variational inference"""
|
||||
|
||||
from collections import OrderedDict
|
||||
from typing import Dict, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from cmdstanpy.cmdstan_args import Method
|
||||
from cmdstanpy.utils import scan_variational_csv
|
||||
from cmdstanpy.utils.logging import get_logger
|
||||
|
||||
from .metadata import InferenceMetadata
|
||||
from .runset import RunSet
|
||||
|
||||
|
||||
class CmdStanVB:
|
||||
"""
|
||||
Container for outputs from CmdStan variational run.
|
||||
Created by :meth:`CmdStanModel.variational`.
|
||||
"""
|
||||
|
||||
def __init__(self, runset: RunSet) -> None:
|
||||
"""Initialize object."""
|
||||
if not runset.method == Method.VARIATIONAL:
|
||||
raise ValueError(
|
||||
'Wrong runset method, expecting variational inference, '
|
||||
'found method {}'.format(runset.method)
|
||||
)
|
||||
self.runset = runset
|
||||
self._set_variational_attrs(runset.csv_files[0])
|
||||
|
||||
def __repr__(self) -> str:
|
||||
repr = 'CmdStanVB: model={}{}'.format(
|
||||
self.runset.model, self.runset._args.method_args.compose(0, cmd=[])
|
||||
)
|
||||
repr = '{}\n csv_file:\n\t{}\n output_file:\n\t{}'.format(
|
||||
repr,
|
||||
'\n\t'.join(self.runset.csv_files),
|
||||
'\n\t'.join(self.runset.stdout_files),
|
||||
)
|
||||
# TODO - diagnostic, profiling files
|
||||
return repr
|
||||
|
||||
def __getattr__(self, attr: str) -> Union[np.ndarray, float]:
|
||||
"""Synonymous with ``fit.stan_variable(attr)"""
|
||||
if attr.startswith("_"):
|
||||
raise AttributeError(f"Unknown variable name {attr}")
|
||||
try:
|
||||
return self.stan_variable(attr)
|
||||
except ValueError as e:
|
||||
# pylint: disable=raise-missing-from
|
||||
raise AttributeError(*e.args)
|
||||
|
||||
def _set_variational_attrs(self, sample_csv_0: str) -> None:
|
||||
meta = scan_variational_csv(sample_csv_0)
|
||||
self._metadata = InferenceMetadata(meta)
|
||||
# these three assignments don't grant type information
|
||||
self._column_names: Tuple[str, ...] = meta['column_names']
|
||||
self._eta: float = meta['eta']
|
||||
self._variational_mean: np.ndarray = meta['variational_mean']
|
||||
self._variational_sample: np.ndarray = meta['variational_sample']
|
||||
|
||||
@property
|
||||
def columns(self) -> int:
|
||||
"""
|
||||
Total number of information items returned by sampler.
|
||||
Includes approximation information and names of model parameters
|
||||
and computed quantities.
|
||||
"""
|
||||
return len(self._column_names)
|
||||
|
||||
@property
|
||||
def column_names(self) -> Tuple[str, ...]:
|
||||
"""
|
||||
Names of information items returned by sampler for each draw.
|
||||
Includes approximation information and names of model parameters
|
||||
and computed quantities.
|
||||
"""
|
||||
return self._column_names
|
||||
|
||||
@property
|
||||
def eta(self) -> float:
|
||||
"""
|
||||
Step size scaling parameter 'eta'
|
||||
"""
|
||||
return self._eta
|
||||
|
||||
@property
|
||||
def variational_params_np(self) -> np.ndarray:
|
||||
"""
|
||||
Returns inferred parameter means as numpy array.
|
||||
"""
|
||||
return self._variational_mean
|
||||
|
||||
@property
|
||||
def variational_params_pd(self) -> pd.DataFrame:
|
||||
"""
|
||||
Returns inferred parameter means as pandas DataFrame.
|
||||
"""
|
||||
return pd.DataFrame([self._variational_mean], columns=self.column_names)
|
||||
|
||||
@property
|
||||
def variational_params_dict(self) -> Dict[str, np.ndarray]:
|
||||
"""Returns inferred parameter means as Dict."""
|
||||
return OrderedDict(zip(self.column_names, self._variational_mean))
|
||||
|
||||
@property
|
||||
def metadata(self) -> InferenceMetadata:
|
||||
"""
|
||||
Returns object which contains CmdStan configuration as well as
|
||||
information about the names and structure of the inference method
|
||||
and model output variables.
|
||||
"""
|
||||
return self._metadata
|
||||
|
||||
def stan_variable(
|
||||
self, var: str, *, mean: Optional[bool] = None
|
||||
) -> Union[np.ndarray, float]:
|
||||
"""
|
||||
Return a numpy.ndarray which contains the estimates for the
|
||||
for the named Stan program variable where the dimensions of the
|
||||
numpy.ndarray match the shape of the Stan program variable, with
|
||||
a leading axis added for the number of draws from the variational
|
||||
approximation.
|
||||
|
||||
* If the variable is a scalar variable, the return array has shape
|
||||
( draws, ).
|
||||
* If the variable is a vector, the return array has shape
|
||||
( draws, len(vector))
|
||||
* If the variable is a matrix, the return array has shape
|
||||
( draws, size(dim 1), size(dim 2) )
|
||||
* If the variable is an array with N dimensions, the return array
|
||||
has shape ( draws, size(dim 1), ..., size(dim N))
|
||||
|
||||
This functionaltiy is also available via a shortcut using ``.`` -
|
||||
writing ``fit.a`` is a synonym for ``fit.stan_variable("a")``
|
||||
|
||||
:param var: variable name
|
||||
|
||||
:param mean: if True, return the variational mean. Otherwise,
|
||||
return the variational sample. The default behavior will
|
||||
change in a future release to return the variational sample.
|
||||
|
||||
See Also
|
||||
--------
|
||||
CmdStanVB.stan_variables
|
||||
CmdStanMCMC.stan_variable
|
||||
CmdStanMLE.stan_variable
|
||||
CmdStanPathfinder.stan_variable
|
||||
CmdStanGQ.stan_variable
|
||||
CmdStanLaplace.stan_variable
|
||||
"""
|
||||
# TODO(2.0): remove None case, make default `False`
|
||||
if mean is None:
|
||||
get_logger().warning(
|
||||
"The default behavior of CmdStanVB.stan_variable() "
|
||||
"will change in a future release to return the "
|
||||
"variational sample, rather than the mean.\n"
|
||||
"To maintain the current behavior, pass the argument "
|
||||
"mean=True"
|
||||
)
|
||||
mean = True
|
||||
if mean:
|
||||
draws = self._variational_mean
|
||||
else:
|
||||
draws = self._variational_sample
|
||||
|
||||
try:
|
||||
out: np.ndarray = self._metadata.stan_vars[var].extract_reshape(
|
||||
draws
|
||||
)
|
||||
# TODO(2.0): remove
|
||||
if out.shape == () or out.shape == (1,):
|
||||
if mean:
|
||||
get_logger().warning(
|
||||
"The default behavior of "
|
||||
"CmdStanVB.stan_variable(mean=True) will change in a "
|
||||
"future release to always return a numpy.ndarray, even "
|
||||
"for scalar variables."
|
||||
)
|
||||
return out.item() # type: ignore
|
||||
return out
|
||||
except KeyError:
|
||||
# pylint: disable=raise-missing-from
|
||||
raise ValueError(
|
||||
f'Unknown variable name: {var}\n'
|
||||
'Available variables are '
|
||||
+ ", ".join(self._metadata.stan_vars.keys())
|
||||
)
|
||||
|
||||
def stan_variables(
|
||||
self, *, mean: Optional[bool] = None
|
||||
) -> Dict[str, Union[np.ndarray, float]]:
|
||||
"""
|
||||
Return a dictionary mapping Stan program variables names
|
||||
to the corresponding numpy.ndarray containing the inferred values.
|
||||
|
||||
See Also
|
||||
--------
|
||||
CmdStanVB.stan_variable
|
||||
CmdStanMCMC.stan_variables
|
||||
CmdStanMLE.stan_variables
|
||||
CmdStanGQ.stan_variables
|
||||
CmdStanPathfinder.stan_variables
|
||||
CmdStanLaplace.stan_variables
|
||||
"""
|
||||
result = {}
|
||||
for name in self._metadata.stan_vars:
|
||||
result[name] = self.stan_variable(name, mean=mean)
|
||||
return result
|
||||
|
||||
@property
|
||||
def variational_sample(self) -> np.ndarray:
|
||||
"""Returns the set of approximate posterior output draws."""
|
||||
return self._variational_sample
|
||||
|
||||
@property
|
||||
def variational_sample_pd(self) -> pd.DataFrame:
|
||||
"""
|
||||
Returns the set of approximate posterior output draws as
|
||||
a pandas DataFrame.
|
||||
"""
|
||||
return pd.DataFrame(self._variational_sample, columns=self.column_names)
|
||||
|
||||
def save_csvfiles(self, dir: Optional[str] = None) -> None:
|
||||
"""
|
||||
Move output CSV files to specified directory. If files were
|
||||
written to the temporary session directory, clean filename.
|
||||
E.g., save 'bernoulli-201912081451-1-5nm6as7u.csv' as
|
||||
'bernoulli-201912081451-1.csv'.
|
||||
|
||||
:param dir: directory path
|
||||
|
||||
See Also
|
||||
--------
|
||||
stanfit.RunSet.save_csvfiles
|
||||
cmdstanpy.from_csv
|
||||
"""
|
||||
self.runset.save_csvfiles(dir)
|
||||
Reference in New Issue
Block a user