some new features

2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions
--- a/.venv/lib/python3.12/site-packages/statsmodels/duration/survfunc.py
+++ b/.venv/lib/python3.12/site-packages/statsmodels/duration/survfunc.py
@ -0,0 +1,817 @@
+import numpy as np
+import pandas as pd
+from scipy.stats.distributions import chi2, norm
+from statsmodels.graphics import utils
+
+
+def _calc_survfunc_right(time, status, weights=None, entry=None, compress=True,
+                         retall=True):
+    """
+    Calculate the survival function and its standard error for a single
+    group.
+    """
+
+    # Convert the unique times to ranks (0, 1, 2, ...)
+    if entry is None:
+        utime, rtime = np.unique(time, return_inverse=True)
+    else:
+        tx = np.concatenate((time, entry))
+        utime, rtime = np.unique(tx, return_inverse=True)
+        rtime = rtime[0:len(time)]
+
+    # Number of deaths at each unique time.
+    ml = len(utime)
+    if weights is None:
+        d = np.bincount(rtime, weights=status, minlength=ml)
+    else:
+        d = np.bincount(rtime, weights=status*weights, minlength=ml)
+
+    # Size of risk set just prior to each event time.
+    if weights is None:
+        n = np.bincount(rtime, minlength=ml)
+    else:
+        n = np.bincount(rtime, weights=weights, minlength=ml)
+    if entry is not None:
+        n = np.cumsum(n) - n
+        rentry = np.searchsorted(utime, entry, side='left')
+        if weights is None:
+            n0 = np.bincount(rentry, minlength=ml)
+        else:
+            n0 = np.bincount(rentry, weights=weights, minlength=ml)
+        n0 = np.cumsum(n0) - n0
+        n = n0 - n
+    else:
+        n = np.cumsum(n[::-1])[::-1]
+
+    # Only retain times where an event occurred.
+    if compress:
+        ii = np.flatnonzero(d > 0)
+        d = d[ii]
+        n = n[ii]
+        utime = utime[ii]
+
+    # The survival function probabilities.
+    sp = 1 - d / n.astype(np.float64)
+    ii = sp < 1e-16
+    sp[ii] = 1e-16
+    sp = np.log(sp)
+    sp = np.cumsum(sp)
+    sp = np.exp(sp)
+    sp[ii] = 0
+
+    if not retall:
+        return sp, utime, rtime, n, d
+
+    # Standard errors
+    if weights is None:
+        # Greenwood's formula
+        denom = n * (n - d)
+        denom = np.clip(denom, 1e-12, np.inf)
+        se = d / denom.astype(np.float64)
+        se[(n == d) | (n == 0)] = np.nan
+        se = np.cumsum(se)
+        se = np.sqrt(se)
+        locs = np.isfinite(se) | (sp != 0)
+        se[locs] *= sp[locs]
+        se[~locs] = np.nan
+    else:
+        # Tsiatis' (1981) formula
+        se = d / (n * n).astype(np.float64)
+        se = np.cumsum(se)
+        se = np.sqrt(se)
+
+    return sp, se, utime, rtime, n, d
+
+
+def _calc_incidence_right(time, status, weights=None):
+    """
+    Calculate the cumulative incidence function and its standard error.
+    """
+
+    # Calculate the all-cause survival function.
+    status0 = (status >= 1).astype(np.float64)
+    sp, utime, rtime, n, d = _calc_survfunc_right(time, status0, weights,
+                                                  compress=False, retall=False)
+
+    ngrp = int(status.max())
+
+    # Number of cause-specific deaths at each unique time.
+    d = []
+    for k in range(ngrp):
+        status0 = (status == k + 1).astype(np.float64)
+        if weights is None:
+            d0 = np.bincount(rtime, weights=status0, minlength=len(utime))
+        else:
+            d0 = np.bincount(rtime, weights=status0*weights,
+                             minlength=len(utime))
+        d.append(d0)
+
+    # The cumulative incidence function probabilities.
+    ip = []
+    sp0 = np.r_[1, sp[:-1]] / n
+    for k in range(ngrp):
+        ip0 = np.cumsum(sp0 * d[k])
+        ip.append(ip0)
+
+    # The standard error of the cumulative incidence function.
+    if weights is not None:
+        return ip, None, utime
+    se = []
+    da = sum(d)
+    for k in range(ngrp):
+
+        ra = da / (n * (n - da))
+        v = ip[k]**2 * np.cumsum(ra)
+        v -= 2 * ip[k] * np.cumsum(ip[k] * ra)
+        v += np.cumsum(ip[k]**2 * ra)
+
+        ra = (n - d[k]) * d[k] / n
+        v += np.cumsum(sp0**2 * ra)
+
+        ra = sp0 * d[k] / n
+        v -= 2 * ip[k] * np.cumsum(ra)
+        v += 2 * np.cumsum(ip[k] * ra)
+
+        se.append(np.sqrt(v))
+
+    return ip, se, utime
+
+
+def _checkargs(time, status, entry, freq_weights, exog):
+
+    if len(time) != len(status):
+        raise ValueError("time and status must have the same length")
+
+    if entry is not None and (len(entry) != len(time)):
+        msg = "entry times and event times must have the same length"
+        raise ValueError(msg)
+
+    if entry is not None and np.any(entry >= time):
+        msg = "Entry times must not occur on or after event times"
+        raise ValueError(msg)
+
+    if freq_weights is not None and (len(freq_weights) != len(time)):
+        raise ValueError("weights, time and status must have the same length")
+
+    if exog is not None and (exog.shape[0] != len(time)):
+        raise ValueError("the rows of exog should align with time")
+
+
+class CumIncidenceRight:
+    """
+    Estimation and inference for a cumulative incidence function.
+
+    If J = 1, 2, ... indicates the event type, the cumulative
+    incidence function for cause j is:
+
+    I(t, j) = P(T <= t and J=j)
+
+    Only right censoring is supported.  If frequency weights are provided,
+    the point estimate is returned without a standard error.
+
+    Parameters
+    ----------
+    time : array_like
+        An array of times (censoring times or event times)
+    status : array_like
+        If status >= 1 indicates which event occurred at time t.  If
+        status = 0, the subject was censored at time t.
+    title : str
+        Optional title used for plots and summary output.
+    freq_weights : array_like
+        Optional frequency weights
+    exog : array_like
+        Optional, if present used to account for violation of
+        independent censoring.
+    bw_factor : float
+        Band-width multiplier for kernel-based estimation.  Only
+        used if exog is provided.
+    dimred : bool
+        If True, proportional hazards regression models are used to
+        reduce exog to two columns by predicting overall events and
+        censoring in two separate models.  If False, exog is used
+        directly for calculating kernel weights without dimension
+        reduction.
+
+    Attributes
+    ----------
+    times : array_like
+        The distinct times at which the incidence rates are estimated
+    cinc : list of arrays
+        cinc[k-1] contains the estimated cumulative incidence rates
+        for outcome k=1,2,...
+    cinc_se : list of arrays
+        The standard errors for the values in `cinc`.  Not available when
+        exog and/or frequency weights are provided.
+
+    Notes
+    -----
+    When exog is provided, a local estimate of the cumulative incidence
+    rate around each point is provided, and these are averaged to
+    produce an estimate of the marginal cumulative incidence
+    functions.  The procedure is analogous to that described in Zeng
+    (2004) for estimation of the marginal survival function.  The
+    approach removes bias resulting from dependent censoring when the
+    censoring becomes independent conditioned on the columns of exog.
+
+    References
+    ----------
+    The Stata stcompet procedure:
+        http://www.stata-journal.com/sjpdf.html?articlenum=st0059
+
+    Dinse, G. E. and M. G. Larson. 1986. A note on semi-Markov models
+    for partially censored data. Biometrika 73: 379-386.
+
+    Marubini, E. and M. G. Valsecchi. 1995. Analysing Survival Data
+    from Clinical Trials and Observational Studies. Chichester, UK:
+    John Wiley & Sons.
+
+    D. Zeng (2004).  Estimating marginal survival function by
+    adjusting for dependent censoring using many covariates.  Annals
+    of Statistics 32:4.
+    https://arxiv.org/pdf/math/0409180.pdf
+    """
+
+    def __init__(self, time, status, title=None, freq_weights=None,
+                 exog=None, bw_factor=1., dimred=True):
+
+        _checkargs(time, status, None, freq_weights, None)
+        time = self.time = np.asarray(time)
+        status = self.status = np.asarray(status)
+        if freq_weights is not None:
+            freq_weights = self.freq_weights = np.asarray(freq_weights)
+
+        if exog is not None:
+            from ._kernel_estimates import _kernel_cumincidence
+            exog = self.exog = np.asarray(exog)
+            nobs = exog.shape[0]
+            kw = nobs**(-1/3.0) * bw_factor
+            kfunc = lambda x: np.exp(-x**2 / kw**2).sum(1)
+            x = _kernel_cumincidence(time, status, exog, kfunc, freq_weights,
+                                     dimred)
+            self.times = x[0]
+            self.cinc = x[1]
+            return
+
+        x = _calc_incidence_right(time, status, freq_weights)
+        self.cinc = x[0]
+        self.cinc_se = x[1]
+        self.times = x[2]
+        self.title = "" if not title else title
+
+
+class SurvfuncRight:
+    """
+    Estimation and inference for a survival function.
+
+    The survival function S(t) = P(T > t) is the probability that an
+    event time T is greater than t.
+
+    This class currently only supports right censoring.
+
+    Parameters
+    ----------
+    time : array_like
+        An array of times (censoring times or event times)
+    status : array_like
+        Status at the event time, status==1 is the 'event'
+        (e.g. death, failure), meaning that the event
+        occurs at the given value in `time`; status==0
+        indicates that censoring has occurred, meaning that
+        the event occurs after the given value in `time`.
+    entry : array_like, optional An array of entry times for handling
+        left truncation (the subject is not in the risk set on or
+        before the entry time)
+    title : str
+        Optional title used for plots and summary output.
+    freq_weights : array_like
+        Optional frequency weights
+    exog : array_like
+        Optional, if present used to account for violation of
+        independent censoring.
+    bw_factor : float
+        Band-width multiplier for kernel-based estimation.  Only used
+        if exog is provided.
+
+    Attributes
+    ----------
+    surv_prob : array_like
+        The estimated value of the survivor function at each time
+        point in `surv_times`.
+    surv_prob_se : array_like
+        The standard errors for the values in `surv_prob`.  Not available
+        if exog is provided.
+    surv_times : array_like
+        The points where the survival function changes.
+    n_risk : array_like
+        The number of subjects at risk just before each time value in
+        `surv_times`.  Not available if exog is provided.
+    n_events : array_like
+        The number of events (e.g. deaths) that occur at each point
+        in `surv_times`.  Not available if exog is provided.
+
+    Notes
+    -----
+    If exog is None, the standard Kaplan-Meier estimator is used.  If
+    exog is not None, a local estimate of the marginal survival
+    function around each point is constructed, and these are then
+    averaged.  This procedure gives an estimate of the marginal
+    survival function that accounts for dependent censoring as long as
+    the censoring becomes independent when conditioning on the
+    covariates in exog.  See Zeng et al. (2004) for details.
+
+    References
+    ----------
+    D. Zeng (2004).  Estimating marginal survival function by
+    adjusting for dependent censoring using many covariates.  Annals
+    of Statistics 32:4.
+    https://arxiv.org/pdf/math/0409180.pdf
+    """
+
+    def __init__(self, time, status, entry=None, title=None,
+                 freq_weights=None, exog=None, bw_factor=1.):
+
+        _checkargs(time, status, entry, freq_weights, exog)
+        time = self.time = np.asarray(time)
+        status = self.status = np.asarray(status)
+        if freq_weights is not None:
+            freq_weights = self.freq_weights = np.asarray(freq_weights)
+
+        if entry is not None:
+            entry = self.entry = np.asarray(entry)
+
+        if exog is not None:
+            if entry is not None:
+                raise ValueError("exog and entry cannot both be present")
+            from ._kernel_estimates import _kernel_survfunc
+            exog = self.exog = np.asarray(exog)
+            nobs = exog.shape[0]
+            kw = nobs**(-1/3.0) * bw_factor
+            kfunc = lambda x: np.exp(-x**2 / kw**2).sum(1)
+            x = _kernel_survfunc(time, status, exog, kfunc, freq_weights)
+            self.surv_prob = x[0]
+            self.surv_times = x[1]
+            return
+
+        x = _calc_survfunc_right(time, status, weights=freq_weights,
+                                 entry=entry)
+
+        self.surv_prob = x[0]
+        self.surv_prob_se = x[1]
+        self.surv_times = x[2]
+        self.n_risk = x[4]
+        self.n_events = x[5]
+        self.title = "" if not title else title
+
+    def plot(self, ax=None):
+        """
+        Plot the survival function.
+
+        Examples
+        --------
+        Change the line color:
+
+        >>> import statsmodels.api as sm
+        >>> data = sm.datasets.get_rdataset("flchain", "survival").data
+        >>> df = data.loc[data.sex == "F", :]
+        >>> sf = sm.SurvfuncRight(df["futime"], df["death"])
+        >>> fig = sf.plot()
+        >>> ax = fig.get_axes()[0]
+        >>> li = ax.get_lines()
+        >>> li[0].set_color('purple')
+        >>> li[1].set_color('purple')
+
+        Do not show the censoring points:
+
+        >>> fig = sf.plot()
+        >>> ax = fig.get_axes()[0]
+        >>> li = ax.get_lines()
+        >>> li[1].set_visible(False)
+        """
+
+        return plot_survfunc(self, ax)
+
+    def quantile(self, p):
+        """
+        Estimated quantile of a survival distribution.
+
+        Parameters
+        ----------
+        p : float
+            The probability point at which the quantile
+            is determined.
+
+        Returns the estimated quantile.
+        """
+
+        # SAS uses a strict inequality here.
+        ii = np.flatnonzero(self.surv_prob < 1 - p)
+
+        if len(ii) == 0:
+            return np.nan
+
+        return self.surv_times[ii[0]]
+
+    def quantile_ci(self, p, alpha=0.05, method='cloglog'):
+        """
+        Returns a confidence interval for a survival quantile.
+
+        Parameters
+        ----------
+        p : float
+            The probability point for which a confidence interval is
+            determined.
+        alpha : float
+            The confidence interval has nominal coverage probability
+            1 - `alpha`.
+        method : str
+            Function to use for g-transformation, must be ...
+
+        Returns
+        -------
+        lb : float
+            The lower confidence limit.
+        ub : float
+            The upper confidence limit.
+
+        Notes
+        -----
+        The confidence interval is obtained by inverting Z-tests.  The
+        limits of the confidence interval will always be observed
+        event times.
+
+        References
+        ----------
+        The method is based on the approach used in SAS, documented here:
+
+          http://support.sas.com/documentation/cdl/en/statug/68162/HTML/default/viewer.htm#statug_lifetest_details03.htm
+        """
+
+        tr = norm.ppf(1 - alpha / 2)
+
+        method = method.lower()
+        if method == "cloglog":
+            g = lambda x: np.log(-np.log(x))
+            gprime = lambda x: -1 / (x * np.log(x))
+        elif method == "linear":
+            g = lambda x: x
+            gprime = lambda x: 1
+        elif method == "log":
+            g = np.log
+            gprime = lambda x: 1 / x
+        elif method == "logit":
+            g = lambda x: np.log(x / (1 - x))
+            gprime = lambda x: 1 / (x * (1 - x))
+        elif method == "asinsqrt":
+            g = lambda x: np.arcsin(np.sqrt(x))
+            gprime = lambda x: 1 / (2 * np.sqrt(x) * np.sqrt(1 - x))
+        else:
+            raise ValueError("unknown method")
+
+        r = g(self.surv_prob) - g(1 - p)
+        r /= (gprime(self.surv_prob) * self.surv_prob_se)
+
+        ii = np.flatnonzero(np.abs(r) <= tr)
+        if len(ii) == 0:
+            return np.nan, np.nan
+
+        lb = self.surv_times[ii[0]]
+
+        if ii[-1] == len(self.surv_times) - 1:
+            ub = np.inf
+        else:
+            ub = self.surv_times[ii[-1] + 1]
+
+        return lb, ub
+
+    def summary(self):
+        """
+        Return a summary of the estimated survival function.
+
+        The summary is a dataframe containing the unique event times,
+        estimated survival function values, and related quantities.
+        """
+
+        df = pd.DataFrame(index=self.surv_times)
+        df.index.name = "Time"
+        df["Surv prob"] = self.surv_prob
+        df["Surv prob SE"] = self.surv_prob_se
+        df["num at risk"] = self.n_risk
+        df["num events"] = self.n_events
+
+        return df
+
+    def simultaneous_cb(self, alpha=0.05, method="hw", transform="log"):
+        """
+        Returns a simultaneous confidence band for the survival function.
+
+        Parameters
+        ----------
+        alpha : float
+            `1 - alpha` is the desired simultaneous coverage
+            probability for the confidence region.  Currently alpha
+            must be set to 0.05, giving 95% simultaneous intervals.
+        method : str
+            The method used to produce the simultaneous confidence
+            band.  Only the Hall-Wellner (hw) method is currently
+            implemented.
+        transform : str
+            The used to produce the interval (note that the returned
+            interval is on the survival probability scale regardless
+            of which transform is used).  Only `log` and `arcsin` are
+            implemented.
+
+        Returns
+        -------
+        lcb : array_like
+            The lower confidence limits corresponding to the points
+            in `surv_times`.
+        ucb : array_like
+            The upper confidence limits corresponding to the points
+            in `surv_times`.
+        """
+
+        method = method.lower()
+        if method != "hw":
+            msg = "only the Hall-Wellner (hw) method is implemented"
+            raise ValueError(msg)
+
+        if alpha != 0.05:
+            raise ValueError("alpha must be set to 0.05")
+
+        transform = transform.lower()
+        s2 = self.surv_prob_se**2 / self.surv_prob**2
+        nn = self.n_risk
+        if transform == "log":
+            denom = np.sqrt(nn) * np.log(self.surv_prob)
+            theta = 1.3581 * (1 + nn * s2) / denom
+            theta = np.exp(theta)
+            lcb = self.surv_prob**(1/theta)
+            ucb = self.surv_prob**theta
+        elif transform == "arcsin":
+            k = 1.3581
+            k *= (1 + nn * s2) / (2 * np.sqrt(nn))
+            k *= np.sqrt(self.surv_prob / (1 - self.surv_prob))
+            f = np.arcsin(np.sqrt(self.surv_prob))
+            v = np.clip(f - k, 0, np.inf)
+            lcb = np.sin(v)**2
+            v = np.clip(f + k, -np.inf, np.pi/2)
+            ucb = np.sin(v)**2
+        else:
+            raise ValueError("Unknown transform")
+
+        return lcb, ucb
+
+
+def survdiff(time, status, group, weight_type=None, strata=None,
+             entry=None, **kwargs):
+    """
+    Test for the equality of two survival distributions.
+
+    Parameters
+    ----------
+    time : array_like
+        The event or censoring times.
+    status : array_like
+        The censoring status variable, status=1 indicates that the
+        event occurred, status=0 indicates that the observation was
+        censored.
+    group : array_like
+        Indicators of the two groups
+    weight_type : str
+        The following weight types are implemented:
+            None (default) : logrank test
+            fh : Fleming-Harrington, weights by S^(fh_p),
+                 requires exponent fh_p to be provided as keyword
+                 argument; the weights are derived from S defined at
+                 the previous event time, and the first weight is
+                 always 1.
+            gb : Gehan-Breslow, weights by the number at risk
+            tw : Tarone-Ware, weights by the square root of the number
+                 at risk
+    strata : array_like
+        Optional stratum indicators for a stratified test
+    entry : array_like
+        Entry times to handle left truncation. The subject is not in
+        the risk set on or before the entry time.
+
+    Returns
+    -------
+    chisq : The chi-square (1 degree of freedom) distributed test
+            statistic value
+    pvalue : The p-value for the chi^2 test
+    """
+
+    time = np.asarray(time)
+    status = np.asarray(status)
+    group = np.asarray(group)
+
+    gr = np.unique(group)
+
+    if strata is None:
+        obs, var = _survdiff(time, status, group, weight_type, gr,
+                             entry, **kwargs)
+    else:
+        strata = np.asarray(strata)
+        stu = np.unique(strata)
+        obs, var = 0., 0.
+        for st in stu:
+            # could be more efficient?
+            ii = (strata == st)
+            obs1, var1 = _survdiff(time[ii], status[ii], group[ii],
+                                   weight_type, gr, entry, **kwargs)
+            obs += obs1
+            var += var1
+
+    chisq = obs.dot(np.linalg.solve(var, obs))  # (O - E).T * V^(-1) * (O - E)
+    pvalue = 1 - chi2.cdf(chisq, len(gr)-1)
+
+    return chisq, pvalue
+
+
+def _survdiff(time, status, group, weight_type, gr, entry=None,
+              **kwargs):
+    # logrank test for one stratum
+    # calculations based on https://web.stanford.edu/~lutian/coursepdf/unit6.pdf
+    # formula for variance better to take from https://web.stanford.edu/~lutian/coursepdf/survweek3.pdf
+
+    # Get the unique times.
+    if entry is None:
+        utimes, rtimes = np.unique(time, return_inverse=True)
+    else:
+        utimes, rtimes = np.unique(np.concatenate((time, entry)),
+                                   return_inverse=True)
+        rtimes = rtimes[0:len(time)]
+
+    # Split entry times by group if present (should use pandas groupby)
+    tse = [(gr_i, None) for gr_i in gr]
+    if entry is not None:
+        for k, _ in enumerate(gr):
+            ii = (group == gr[k])
+            entry1 = entry[ii]
+            tse[k] = (gr[k], entry1)
+
+    # Event count and risk set size at each time point, per group and overall.
+    # TODO: should use Pandas groupby
+    nrisk, obsv = [], []
+    ml = len(utimes)
+    for g, entry0 in tse:
+
+        mk = (group == g)
+        n = np.bincount(rtimes, weights=mk, minlength=ml)
+
+        ob = np.bincount(rtimes, weights=status*mk, minlength=ml)
+        obsv.append(ob)
+
+        if entry is not None:
+            n = np.cumsum(n) - n
+            rentry = np.searchsorted(utimes, entry0, side='left')
+            n0 = np.bincount(rentry, minlength=ml)
+            n0 = np.cumsum(n0) - n0
+            nr = n0 - n
+        else:
+            nr = np.cumsum(n[::-1])[::-1]
+
+        nrisk.append(nr)
+
+    obs = sum(obsv)
+    nrisk_tot = sum(nrisk)
+    ix = np.flatnonzero(nrisk_tot > 1)
+
+    weights = None
+    if weight_type is not None:
+        weight_type = weight_type.lower()
+        if weight_type == "gb":
+            weights = nrisk_tot
+        elif weight_type == "tw":
+            weights = np.sqrt(nrisk_tot)
+        elif weight_type == "fh":
+            if "fh_p" not in kwargs:
+                msg = "weight_type type 'fh' requires specification of fh_p"
+                raise ValueError(msg)
+            fh_p = kwargs["fh_p"]
+            # Calculate the survivor function directly to avoid the
+            # overhead of creating a SurvfuncRight object
+            sp = 1 - obs / nrisk_tot.astype(np.float64)
+            sp = np.log(sp)
+            sp = np.cumsum(sp)
+            sp = np.exp(sp)
+            weights = sp**fh_p
+            weights = np.roll(weights, 1)
+            weights[0] = 1
+        else:
+            raise ValueError("weight_type not implemented")
+
+    dfs = len(gr) - 1
+    r = np.vstack(nrisk) / np.clip(nrisk_tot, 1e-10, np.inf)[None, :]  # each line is timeseries of r's. line per group
+
+    # The variance of event counts in each group.
+    groups_oe = []
+    groups_var = []
+
+    var_denom = nrisk_tot - 1
+    var_denom = np.clip(var_denom, 1e-10, np.inf)
+
+    # use the first group as a reference
+    for g in range(1, dfs+1):
+        # Difference between observed and  expected number of events in the group #g
+        oe = obsv[g] - r[g]*obs
+
+        # build one row of the dfs x dfs variance matrix
+        var_tensor_part = r[1:, :].T * (np.eye(1, dfs, g-1).ravel() - r[g, :, None])  # r*(1 - r) in multidim
+        var_scalar_part = obs * (nrisk_tot - obs) / var_denom
+        var = var_tensor_part * var_scalar_part[:, None]
+
+        if weights is not None:
+            oe = weights * oe
+            var = (weights**2)[:, None] * var
+
+        # sum over times and store
+        groups_oe.append(oe[ix].sum())
+        groups_var.append(var[ix].sum(axis=0))
+
+    obs_vec = np.hstack(groups_oe)
+    var_mat = np.vstack(groups_var)
+
+    return obs_vec, var_mat
+
+
+def plot_survfunc(survfuncs, ax=None):
+    """
+    Plot one or more survivor functions.
+
+    Parameters
+    ----------
+    survfuncs : object or array_like
+        A single SurvfuncRight object, or a list or SurvfuncRight
+        objects that are plotted together.
+
+    Returns
+    -------
+    A figure instance on which the plot was drawn.
+
+    Examples
+    --------
+    Add a legend:
+
+    >>> import statsmodels.api as sm
+    >>> from statsmodels.duration.survfunc import plot_survfunc
+    >>> data = sm.datasets.get_rdataset("flchain", "survival").data
+    >>> df = data.loc[data.sex == "F", :]
+    >>> sf0 = sm.SurvfuncRight(df["futime"], df["death"])
+    >>> sf1 = sm.SurvfuncRight(3.0 * df["futime"], df["death"])
+    >>> fig = plot_survfunc([sf0, sf1])
+    >>> ax = fig.get_axes()[0]
+    >>> ax.set_position([0.1, 0.1, 0.64, 0.8])
+    >>> ha, lb = ax.get_legend_handles_labels()
+    >>> leg = fig.legend((ha[0], ha[1]), (lb[0], lb[1]), loc='center right')
+
+    Change the line colors:
+
+    >>> fig = plot_survfunc([sf0, sf1])
+    >>> ax = fig.get_axes()[0]
+    >>> ax.set_position([0.1, 0.1, 0.64, 0.8])
+    >>> ha, lb = ax.get_legend_handles_labels()
+    >>> ha[0].set_color('purple')
+    >>> ha[1].set_color('orange')
+    """
+
+    fig, ax = utils.create_mpl_ax(ax)
+
+    # If we have only a single survival function to plot, put it into
+    # a list.
+    try:
+        assert type(survfuncs[0]) is SurvfuncRight
+    except:
+        survfuncs = [survfuncs]
+
+    for gx, sf in enumerate(survfuncs):
+
+        # The estimated survival function does not include a point at
+        # time 0, include it here for plotting.
+        surv_times = np.concatenate(([0], sf.surv_times))
+        surv_prob = np.concatenate(([1], sf.surv_prob))
+
+        # If the final times are censoring times they are not included
+        # in the survival function so we add them here
+        mxt = max(sf.time)
+        if mxt > surv_times[-1]:
+            surv_times = np.concatenate((surv_times, [mxt]))
+            surv_prob = np.concatenate((surv_prob, [surv_prob[-1]]))
+
+        label = getattr(sf, "title", "Group %d" % (gx + 1))
+
+        li, = ax.step(surv_times, surv_prob, '-', label=label, lw=2,
+                      where='post')
+
+        # Plot the censored points.
+        ii = np.flatnonzero(np.logical_not(sf.status))
+        ti = np.unique(sf.time[ii])
+        jj = np.searchsorted(surv_times, ti) - 1
+        sp = surv_prob[jj]
+        ax.plot(ti, sp, '+', ms=12, color=li.get_color(),
+                label=label + " points")
+
+    ax.set_ylim(0, 1.01)
+
+    return fig