some new features

2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions
--- a/.venv/lib/python3.12/site-packages/statsmodels/imputation/tests/init.py
+++ b/.venv/lib/python3.12/site-packages/statsmodels/imputation/tests/init.py
--- a/.venv/lib/python3.12/site-packages/statsmodels/imputation/tests/pycache/init.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/statsmodels/imputation/tests/pycache/init.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/statsmodels/imputation/tests/pycache/test_bayes_mi.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/statsmodels/imputation/tests/pycache/test_bayes_mi.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/statsmodels/imputation/tests/pycache/test_mice.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/statsmodels/imputation/tests/pycache/test_mice.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/statsmodels/imputation/tests/pycache/test_ros.cpython-312.pyc
+++ b/.venv/lib/python3.12/site-packages/statsmodels/imputation/tests/pycache/test_ros.cpython-312.pyc
--- a/.venv/lib/python3.12/site-packages/statsmodels/imputation/tests/test_bayes_mi.py
+++ b/.venv/lib/python3.12/site-packages/statsmodels/imputation/tests/test_bayes_mi.py
@ -0,0 +1,175 @@
+import numpy as np
+import pandas as pd
+import statsmodels.api as sm
+from statsmodels.imputation.bayes_mi import BayesGaussMI, MI
+from numpy.testing import assert_allclose, assert_equal
+
+
+def test_pat():
+
+    x = np.asarray([[1, np.nan, 3], [np.nan, 2, np.nan], [3, np.nan, 0],
+                    [np.nan, 1, np.nan], [3, 2, 1]])
+    bm = BayesGaussMI(x)
+    assert_allclose(bm.patterns[0], np.r_[0, 2])
+    assert_allclose(bm.patterns[1], np.r_[1, 3])
+
+
+def test_2x2():
+
+    # Generate correlated data with mean and variance
+    np.random.seed(3434)
+    x = np.random.normal(size=(1000, 2))
+    r = 0.5
+    x[:, 1] = r*x[:, 0] + np.sqrt(1-r**2)*x[:, 1]
+    x[:, 0] *= 2
+    x[:, 1] *= 3
+    x[:, 0] += 1
+    x[:, 1] -= 2
+
+    # Introduce some missing values
+    u = np.random.normal(size=x.shape[0])
+    x[u > 1, 0] = np.nan
+    u = np.random.normal(size=x.shape[0])
+    x[u > 1, 1] = np.nan
+
+    bm = BayesGaussMI(x)
+
+    # Burn-in
+    for k in range(500):
+        bm.update()
+
+    # Estimate the posterior mean
+    mean = 0
+    cov = 0
+    dmean = 0
+    dcov = 0
+    for k in range(500):
+        bm.update()
+        mean += bm.mean
+        cov += bm.cov
+        dmean += bm.data.mean(0)
+        dcov += np.cov(bm.data.T)
+    mean /= 500
+    cov /= 500
+    dmean /= 500
+    dcov /= 500
+
+    assert_allclose(mean, np.r_[1, -2], 0.1)
+    assert_allclose(dmean, np.r_[1, -2], 0.1)
+    assert_allclose(cov, np.asarray([[4, 6*r], [6*r, 9]]), 0.1)
+    assert_allclose(dcov, np.asarray([[4, 6*r], [6*r, 9]]), 0.1)
+
+
+def test_MI():
+
+    np.random.seed(414)
+    x = np.random.normal(size=(200, 4))
+    x[[1, 3, 9], 0] = np.nan
+    x[[1, 4, 3], 1] = np.nan
+    x[[2, 11, 21], 2] = np.nan
+    x[[11, 22, 99], 3] = np.nan
+
+    def model_args_fn(x):
+        # Return endog, exog
+        # Regress x0 on x1 and x2
+        if type(x) is np.ndarray:
+            return (x[:, 0], x[:, 1:])
+        else:
+            return (x.iloc[:, 0].values, x.iloc[:, 1:].values)
+
+    for j in (0, 1):
+        np.random.seed(2342)
+        imp = BayesGaussMI(x.copy())
+        mi = MI(imp, sm.OLS, model_args_fn, burn=0)
+        r = mi.fit()
+        r.summary()  # smoke test
+        # TODO: why does the test tolerance need to be so slack?
+        # There is unexpected variation across versions
+        assert_allclose(r.params, np.r_[
+            -0.05347919, -0.02479701, 0.10075517], 0.25, 0)
+
+        c = np.asarray([[0.00418232, 0.00029746, -0.00035057],
+                        [0.00029746, 0.00407264, 0.00019496],
+                        [-0.00035057, 0.00019496, 0.00509413]])
+        assert_allclose(r.cov_params(), c, 0.3, 0)
+
+        # Test with ndarray and pandas input
+        x = pd.DataFrame(x)
+
+
+def test_MI_stat():
+    # Test for MI where we know statistically what should happen. The
+    # analysis model is x0 ~ x1 with standard error 1/sqrt(n) for the
+    # slope parameter.  The nominal n is 1000, but half of the cases
+    # have missing x1.  Then we introduce x2 that is either
+    # independent of x1, or almost perfectly correlated with x1.  In
+    # the first case the SE is 1/sqrt(500), in the second case the SE
+    # is 1/sqrt(1000).
+
+    np.random.seed(414)
+    z = np.random.normal(size=(1000, 3))
+    z[:, 0] += 0.5*z[:, 1]
+
+    # Control the degree to which x2 proxies for x1
+    exp = [1/np.sqrt(500), 1/np.sqrt(1000)]
+    fmi = [0.5, 0]
+    for j, r in enumerate((0, 0.9999)):
+
+        x = z.copy()
+        x[:, 2] = r*x[:, 1] + np.sqrt(1 - r**2)*x[:, 2]
+        x[0:500, 1] = np.nan
+
+        def model_args(x):
+            # Return endog, exog
+            # Regress x1 on x2
+            return (x[:, 0], x[:, 1])
+
+        np.random.seed(2342)
+        imp = BayesGaussMI(x.copy())
+        mi = MI(imp, sm.OLS, model_args, nrep=100, skip=10)
+        r = mi.fit()
+
+        # Check the SE
+        d = np.abs(r.bse[0] - exp[j]) / exp[j]
+        assert d < 0.03
+
+        # Check the FMI
+        d = np.abs(r.fmi[0] - fmi[j])
+        assert d < 0.05
+
+
+def test_mi_formula():
+
+    np.random.seed(414)
+    x = np.random.normal(size=(200, 4))
+    x[[1, 3, 9], 0] = np.nan
+    x[[1, 4, 3], 1] = np.nan
+    x[[2, 11, 21], 2] = np.nan
+    x[[11, 22, 99], 3] = np.nan
+    df = pd.DataFrame({"y": x[:, 0], "x1": x[:, 1],
+                       "x2": x[:, 2], "x3": x[:, 3]})
+    fml = "y ~ 0 + x1 + x2 + x3"
+
+    def model_kwds_fn(x):
+        return {"data": x}
+
+    np.random.seed(2342)
+    imp = BayesGaussMI(df.copy())
+    mi = MI(imp, sm.OLS, formula=fml, burn=0,
+            model_kwds_fn=model_kwds_fn)
+
+    results_cb = lambda x: x
+
+    r = mi.fit(results_cb=results_cb)
+    r.summary()  # smoke test
+    # TODO: why does the test tolerance need to be so slack?
+    # There is unexpected variation across versions
+    assert_allclose(r.params, np.r_[
+            -0.05347919, -0.02479701, 0.10075517], 0.25, 0)
+
+    c = np.asarray([[0.00418232, 0.00029746, -0.00035057],
+                    [0.00029746, 0.00407264, 0.00019496],
+                    [-0.00035057, 0.00019496, 0.00509413]])
+    assert_allclose(r.cov_params(), c, 0.3, 0)
+
+    assert_equal(len(r.results), 20)
--- a/.venv/lib/python3.12/site-packages/statsmodels/imputation/tests/test_mice.py
+++ b/.venv/lib/python3.12/site-packages/statsmodels/imputation/tests/test_mice.py
@ -0,0 +1,413 @@
+import numpy as np
+import pandas as pd
+import pytest
+from statsmodels.imputation import mice
+import statsmodels.api as sm
+from numpy.testing import assert_equal, assert_allclose
+import warnings
+
+try:
+    import matplotlib.pyplot as plt
+except ImportError:
+    pass
+
+pdf_output = False
+
+
+if pdf_output:
+    from matplotlib.backends.backend_pdf import PdfPages
+    pdf = PdfPages("test_mice.pdf")
+else:
+    pdf = None
+
+
+def close_or_save(pdf, fig):
+    if pdf_output:
+        pdf.savefig(fig)
+
+
+def teardown_module():
+    if pdf_output:
+        pdf.close()
+
+
+def gendat():
+    """
+    Create a data set with missing values.
+    """
+
+    gen = np.random.RandomState(34243)
+
+    n = 200
+    p = 5
+
+    exog = gen.normal(size=(n, p))
+    exog[:, 0] = exog[:, 1] - exog[:, 2] + 2*exog[:, 4]
+    exog[:, 0] += gen.normal(size=n)
+    exog[:, 2] = 1*(exog[:, 2] > 0)
+
+    endog = exog.sum(1) + gen.normal(size=n)
+
+    df = pd.DataFrame(exog)
+    df.columns = ["x%d" % k for k in range(1, p+1)]
+
+    df["y"] = endog
+
+    # loc is inclusive of right end, so needed to lower index by 1
+    df.loc[0:59, "x1"] = np.nan
+    df.loc[0:39, "x2"] = np.nan
+    df.loc[10:29:2, "x3"] = np.nan
+    df.loc[20:49:3, "x4"] = np.nan
+    df.loc[40:44, "x5"] = np.nan
+    df.loc[30:99:2, "y"] = np.nan
+
+    return df
+
+
+class TestMICEData:
+
+    def test_default(self):
+        # Test with all defaults.
+
+        df = gendat()
+        orig = df.copy()
+        mx = pd.notnull(df)
+        imp_data = mice.MICEData(df)
+        nrow, ncol = df.shape
+
+        assert_allclose(imp_data.ix_miss['x1'], np.arange(60))
+        assert_allclose(imp_data.ix_obs['x1'], np.arange(60, 200))
+        assert_allclose(imp_data.ix_miss['x2'], np.arange(40))
+        assert_allclose(imp_data.ix_miss['x3'], np.arange(10, 30, 2))
+        assert_allclose(imp_data.ix_obs['x3'],
+                        np.concatenate((np.arange(10),
+                                        np.arange(11, 30, 2),
+                                        np.arange(30, 200))))
+        assert_equal([set(imp_data.data[col]) for col in imp_data.data],
+                     [set(df[col].dropna()) for col in df])
+
+        for k in range(3):
+            imp_data.update_all()
+            assert_equal(imp_data.data.shape[0], nrow)
+            assert_equal(imp_data.data.shape[1], ncol)
+            assert_allclose(orig[mx], imp_data.data[mx])
+            assert_equal([set(imp_data.data[col]) for col in imp_data.data],
+                         [set(df[col].dropna()) for col in df])
+
+        fml = 'x1 ~ x2 + x3 + x4 + x5 + y'
+        assert_equal(imp_data.conditional_formula['x1'], fml)
+
+        # Order of 3 and 4 is not deterministic
+        # since both have 10 missing
+        assert tuple(imp_data._cycle_order) in (
+            ('x5', 'x3', 'x4', 'y', 'x2', 'x1'),
+            ('x5', 'x4', 'x3', 'y', 'x2', 'x1')
+        )
+
+        # Should make a copy
+        assert not (df is imp_data.data)
+
+        (endog_obs, exog_obs, exog_miss,
+         predict_obs_kwds, predict_miss_kwds) = imp_data.get_split_data('x3')
+        assert_equal(len(endog_obs), 190)
+        assert_equal(exog_obs.shape, [190, 6])
+        assert_equal(exog_miss.shape, [10, 6])
+
+    def test_settingwithcopywarning(self):
+        "Test that MICEData does not throw a SettingWithCopyWarning when imputing (https://github.com/statsmodels/statsmodels/issues/5430)"
+
+        df = gendat()
+        # There need to be some ints in here for the error to be thrown
+        df['intcol'] = np.arange(len(df))
+        df['intcol'] = df.intcol.astype('int32')
+
+        miceData = mice.MICEData(df)
+
+        with pd.option_context('mode.chained_assignment', 'warn'):
+            with warnings.catch_warnings(record=True) as ws:
+                warnings.simplefilter('always')
+                miceData.update_all()
+                # Only include pandas warnings. There are many from patsy
+                # and sometimes warnings from other packages here
+                ws = [w for w in ws if "\\pandas\\" in w.filename]
+                assert len(ws) == 0
+
+    def test_next_sample(self):
+
+        df = gendat()
+        imp_data = mice.MICEData(df)
+
+        all_x = []
+        for j in range(2):
+            x = imp_data.next_sample()
+            assert isinstance(x, pd.DataFrame)
+            assert_equal(df.shape, x.shape)
+            all_x.append(x)
+
+        # The returned dataframes are all the same object
+        assert all_x[0] is all_x[1]
+
+
+    def test_pertmeth(self):
+        # Test with specified perturbation method.
+
+        df = gendat()
+        orig = df.copy()
+        mx = pd.notnull(df)
+        nrow, ncol = df.shape
+
+        for pert_meth in "gaussian", "boot":
+
+            imp_data = mice.MICEData(df, perturbation_method=pert_meth)
+
+            for k in range(2):
+                imp_data.update_all()
+                assert_equal(imp_data.data.shape[0], nrow)
+                assert_equal(imp_data.data.shape[1], ncol)
+                assert_allclose(orig[mx], imp_data.data[mx])
+
+        # Order of 3 and 4 is not deterministic
+        # since both have 10 missing
+        assert tuple(imp_data._cycle_order) in (
+            ('x5', 'x3', 'x4', 'y', 'x2', 'x1'),
+            ('x5', 'x4', 'x3', 'y', 'x2', 'x1')
+        )
+
+    def test_phreg(self):
+
+        gen = np.random.RandomState(8742)
+        n = 300
+        x1 = gen.normal(size=n)
+        x2 = gen.normal(size=n)
+        event_time = gen.exponential(size=n) * np.exp(x1)
+        obs_time = gen.exponential(size=n)
+        time = np.where(event_time < obs_time, event_time, obs_time)
+        status = np.where(time == event_time, 1, 0)
+        df = pd.DataFrame({"time": time, "status": status, "x1": x1, "x2": x2})
+        df.loc[10:40, 'time'] = np.nan
+        df.loc[10:40, 'status'] = np.nan
+        df.loc[30:50, 'x1'] = np.nan
+        df.loc[40:60, 'x2'] = np.nan
+
+        from statsmodels.duration.hazard_regression import PHReg
+
+        # Save the dataset size at each iteration.
+        hist = []
+
+        def cb(imp):
+            hist.append(imp.data.shape)
+
+        for pm in "gaussian", "boot":
+            idata = mice.MICEData(df, perturbation_method=pm, history_callback=cb)
+            idata.set_imputer("time", "0 + x1 + x2", model_class=PHReg,
+                              init_kwds={"status": mice.PatsyFormula("status")},
+                              predict_kwds={"pred_type": "hr"},
+                              perturbation_method=pm)
+
+            x = idata.next_sample()
+            assert isinstance(x, pd.DataFrame)
+
+        assert all([val == (299, 4) for val in hist])
+
+    def test_set_imputer(self):
+        # Test with specified perturbation method.
+
+        from statsmodels.regression.linear_model import RegressionResultsWrapper
+        from statsmodels.genmod.generalized_linear_model import GLMResultsWrapper
+
+        df = gendat()
+        orig = df.copy()
+        mx = pd.notnull(df)
+        nrow, ncol = df.shape
+
+        imp_data = mice.MICEData(df)
+        imp_data.set_imputer('x1', 'x3 + x4 + x3*x4')
+        imp_data.set_imputer('x2', 'x4 + I(x5**2)')
+        imp_data.set_imputer('x3', model_class=sm.GLM,
+                             init_kwds={"family": sm.families.Binomial()})
+
+        imp_data.update_all()
+        assert_equal(imp_data.data.shape[0], nrow)
+        assert_equal(imp_data.data.shape[1], ncol)
+        assert_allclose(orig[mx], imp_data.data[mx])
+        for j in range(1, 6):
+            if j == 3:
+                assert_equal(isinstance(imp_data.models['x3'], sm.GLM), True)
+                assert_equal(isinstance(imp_data.models['x3'].family, sm.families.Binomial), True)
+                assert_equal(isinstance(imp_data.results['x3'], GLMResultsWrapper), True)
+            else:
+                assert_equal(isinstance(imp_data.models['x%d' % j], sm.OLS), True)
+                assert_equal(isinstance(imp_data.results['x%d' % j], RegressionResultsWrapper), True)
+
+        fml = 'x1 ~ x3 + x4 + x3*x4'
+        assert_equal(imp_data.conditional_formula['x1'], fml)
+
+        fml = 'x4 ~ x1 + x2 + x3 + x5 + y'
+        assert_equal(imp_data.conditional_formula['x4'], fml)
+
+        # Order of 3 and 4 is not deterministic
+        # since both have 10 missing
+        assert tuple(imp_data._cycle_order) in (
+            ('x5', 'x3', 'x4', 'y', 'x2', 'x1'),
+            ('x5', 'x4', 'x3', 'y', 'x2', 'x1')
+        )
+
+
+
+    @pytest.mark.matplotlib
+    def test_plot_missing_pattern(self, close_figures):
+
+        df = gendat()
+        imp_data = mice.MICEData(df)
+
+        for row_order in "pattern", "raw":
+            for hide_complete_rows in False, True:
+                for color_row_patterns in False, True:
+                    plt.clf()
+                    fig = imp_data.plot_missing_pattern(row_order=row_order,
+                                      hide_complete_rows=hide_complete_rows,
+                                      color_row_patterns=color_row_patterns)
+                    close_or_save(pdf, fig)
+                    close_figures()
+
+
+    @pytest.mark.matplotlib
+    def test_plot_bivariate(self, close_figures):
+
+        df = gendat()
+        imp_data = mice.MICEData(df)
+        imp_data.update_all()
+
+        plt.clf()
+        for plot_points in False, True:
+            fig = imp_data.plot_bivariate('x2', 'x4', plot_points=plot_points)
+            fig.get_axes()[0].set_title('plot_bivariate')
+            close_or_save(pdf, fig)
+            close_figures()
+
+
+    @pytest.mark.matplotlib
+    def test_fit_obs(self, close_figures):
+
+        df = gendat()
+        imp_data = mice.MICEData(df)
+        imp_data.update_all()
+
+        plt.clf()
+        for plot_points in False, True:
+            fig = imp_data.plot_fit_obs('x4', plot_points=plot_points)
+            fig.get_axes()[0].set_title('plot_fit_scatterplot')
+            close_or_save(pdf, fig)
+            close_figures()
+
+
+    @pytest.mark.matplotlib
+    def test_plot_imputed_hist(self, close_figures):
+
+        df = gendat()
+        imp_data = mice.MICEData(df)
+        imp_data.update_all()
+
+        plt.clf()
+        for plot_points in False, True:
+            fig = imp_data.plot_imputed_hist('x4')
+            fig.get_axes()[0].set_title('plot_imputed_hist')
+            close_or_save(pdf, fig)
+            close_figures()
+
+
+class TestMICE:
+
+    def test_MICE(self):
+
+        df = gendat()
+        imp_data = mice.MICEData(df)
+        mi = mice.MICE("y ~ x1 + x2 + x1:x2", sm.OLS, imp_data)
+        result = mi.fit(1, 3)
+
+        assert issubclass(result.__class__, mice.MICEResults)
+
+        # Smoke test for results
+        smr = result.summary()
+
+
+    def test_MICE1(self):
+
+        df = gendat()
+        imp_data = mice.MICEData(df)
+        mi = mice.MICE("y ~ x1 + x2 + x1:x2", sm.OLS, imp_data)
+
+        from statsmodels.regression.linear_model import RegressionResultsWrapper
+
+        for j in range(3):
+            x = mi.next_sample()
+            assert issubclass(x.__class__, RegressionResultsWrapper)
+
+
+    def test_MICE1_regularized(self):
+
+        df = gendat()
+        imp = mice.MICEData(df, perturbation_method='boot')
+        imp.set_imputer('x1', 'x2 + y', fit_kwds={'alpha': 1, 'L1_wt': 0})
+        imp.update_all()
+
+
+    def test_MICE2(self):
+
+        from statsmodels.genmod.generalized_linear_model import GLMResultsWrapper
+
+        df = gendat()
+        imp_data = mice.MICEData(df)
+        mi = mice.MICE("x3 ~ x1 + x2", sm.GLM, imp_data,
+                       init_kwds={"family": sm.families.Binomial()})
+
+        for j in range(3):
+            x = mi.next_sample()
+            assert isinstance(x, GLMResultsWrapper)
+            assert isinstance(x.family, sm.families.Binomial)
+
+    @pytest.mark.slow
+    def t_est_combine(self):
+
+        gen = np.random.RandomState(3897)
+        x1 = gen.normal(size=300)
+        x2 = gen.normal(size=300)
+        y = x1 + x2 + gen.normal(size=300)
+        x1[0:100] = np.nan
+        x2[250:] = np.nan
+        df = pd.DataFrame({"x1": x1, "x2": x2, "y": y})
+        idata = mice.MICEData(df)
+        mi = mice.MICE("y ~ x1 + x2", sm.OLS, idata, n_skip=20)
+        result = mi.fit(10, 20)
+
+        fmi = np.asarray([0.1778143, 0.11057262, 0.29626521])
+        assert_allclose(result.frac_miss_info, fmi, atol=1e-5)
+
+        params = np.asarray([-0.03486102, 0.96236808, 0.9970371])
+        assert_allclose(result.params, params, atol=1e-5)
+
+        tvalues = np.asarray([-0.54674776, 15.28091069, 13.61359403])
+        assert_allclose(result.tvalues, tvalues, atol=1e-5)
+
+
+def test_micedata_miss1():
+    # test for #4375
+    gen = np.random.RandomState(3897)
+    data = pd.DataFrame(gen.rand(50, 4))
+    data.columns = ['var1', 'var2', 'var3', 'var4']
+    # one column with a single missing value
+    data.iloc[1, 1] = np.nan
+    data.iloc[[1, 3], 2] = np.nan
+
+    data_imp = mice.MICEData(data)
+    data_imp.update_all()
+
+    assert_equal(data_imp.data.isnull().values.sum(), 0)
+
+    ix_miss = {'var1': np.array([], dtype=np.int64),
+                 'var2': np.array([1], dtype=np.int64),
+                 'var3': np.array([1, 3], dtype=np.int64),
+                 'var4': np.array([], dtype=np.int64)}
+
+    for k in ix_miss:
+        assert_equal(data_imp.ix_miss[k], ix_miss[k])
--- a/.venv/lib/python3.12/site-packages/statsmodels/imputation/tests/test_ros.py
+++ b/.venv/lib/python3.12/site-packages/statsmodels/imputation/tests/test_ros.py
@ -0,0 +1,654 @@
+from statsmodels.compat.pandas import assert_series_equal, assert_frame_equal
+
+from io import StringIO
+from textwrap import dedent
+
+import numpy as np
+import numpy.testing as npt
+
+import numpy
+from numpy.testing import assert_equal
+import pandas
+import pytest
+
+from statsmodels.imputation import ros
+
+
+def load_basic_data():
+    raw_csv = StringIO(
+        "res,qual\n2.00,=\n4.20,=\n4.62,=\n5.00,ND\n5.00,ND\n5.50,ND\n"
+        "5.57,=\n5.66,=\n5.75,ND\n5.86,=\n6.65,=\n6.78,=\n6.79,=\n7.50,=\n"
+        "7.50,=\n7.50,=\n8.63,=\n8.71,=\n8.99,=\n9.50,ND\n9.50,ND\n9.85,=\n"
+        "10.82,=\n11.00,ND\n11.25,=\n11.25,=\n12.20,=\n14.92,=\n16.77,=\n"
+        "17.81,=\n19.16,=\n19.19,=\n19.64,=\n20.18,=\n22.97,=\n"
+    )
+    df = pandas.read_csv(raw_csv)
+    df.loc[:, 'conc'] = df['res']
+    df.loc[:, 'censored'] = df['qual'] == 'ND'
+
+    return df
+
+
+def load_intermediate_data():
+    df = pandas.DataFrame([
+        {'censored': True, 'conc': 5.0, 'det_limit_index': 1, 'rank': 1},
+        {'censored': True, 'conc': 5.0, 'det_limit_index': 1, 'rank': 2},
+        {'censored': True, 'conc': 5.5, 'det_limit_index': 2, 'rank': 1},
+        {'censored': True, 'conc': 5.75, 'det_limit_index': 3, 'rank': 1},
+        {'censored': True, 'conc': 9.5, 'det_limit_index': 4, 'rank': 1},
+        {'censored': True, 'conc': 9.5, 'det_limit_index': 4, 'rank': 2},
+        {'censored': True, 'conc': 11.0, 'det_limit_index': 5, 'rank': 1},
+        {'censored': False, 'conc': 2.0, 'det_limit_index': 0, 'rank': 1},
+        {'censored': False, 'conc': 4.2, 'det_limit_index': 0, 'rank': 2},
+        {'censored': False, 'conc': 4.62, 'det_limit_index': 0, 'rank': 3},
+        {'censored': False, 'conc': 5.57, 'det_limit_index': 2, 'rank': 1},
+        {'censored': False, 'conc': 5.66, 'det_limit_index': 2, 'rank': 2},
+        {'censored': False, 'conc': 5.86, 'det_limit_index': 3, 'rank': 1},
+        {'censored': False, 'conc': 6.65, 'det_limit_index': 3, 'rank': 2},
+        {'censored': False, 'conc': 6.78, 'det_limit_index': 3, 'rank': 3},
+        {'censored': False, 'conc': 6.79, 'det_limit_index': 3, 'rank': 4},
+        {'censored': False, 'conc': 7.5, 'det_limit_index': 3, 'rank': 5},
+        {'censored': False, 'conc': 7.5, 'det_limit_index': 3, 'rank': 6},
+        {'censored': False, 'conc': 7.5, 'det_limit_index': 3, 'rank': 7},
+        {'censored': False, 'conc': 8.63, 'det_limit_index': 3, 'rank': 8},
+        {'censored': False, 'conc': 8.71, 'det_limit_index': 3, 'rank': 9},
+        {'censored': False, 'conc': 8.99, 'det_limit_index': 3, 'rank': 10},
+        {'censored': False, 'conc': 9.85, 'det_limit_index': 4, 'rank': 1},
+        {'censored': False, 'conc': 10.82, 'det_limit_index': 4, 'rank': 2},
+        {'censored': False, 'conc': 11.25, 'det_limit_index': 5, 'rank': 1},
+        {'censored': False, 'conc': 11.25, 'det_limit_index': 5, 'rank': 2},
+        {'censored': False, 'conc': 12.2, 'det_limit_index': 5, 'rank': 3},
+        {'censored': False, 'conc': 14.92, 'det_limit_index': 5, 'rank': 4},
+        {'censored': False, 'conc': 16.77, 'det_limit_index': 5, 'rank': 5},
+        {'censored': False, 'conc': 17.81, 'det_limit_index': 5, 'rank': 6},
+        {'censored': False, 'conc': 19.16, 'det_limit_index': 5, 'rank': 7},
+        {'censored': False, 'conc': 19.19, 'det_limit_index': 5, 'rank': 8},
+        {'censored': False, 'conc': 19.64, 'det_limit_index': 5, 'rank': 9},
+        {'censored': False, 'conc': 20.18, 'det_limit_index': 5, 'rank': 10},
+        {'censored': False, 'conc': 22.97, 'det_limit_index': 5, 'rank': 11}
+    ])
+
+    return df
+
+
+def load_advanced_data():
+    df = pandas.DataFrame([
+        {'Zprelim': -1.4456202174142005, 'censored': True, 'conc': 5.0,
+        'det_limit_index': 1, 'plot_pos': 0.07414187643020594, 'rank': 1},
+        {'Zprelim': -1.2201035333697587, 'censored': True, 'conc': 5.0,
+        'det_limit_index': 1, 'plot_pos': 0.11121281464530891, 'rank': 2},
+        {'Zprelim': -1.043822530159519, 'censored': True, 'conc': 5.5,
+        'det_limit_index': 2, 'plot_pos': 0.14828375286041187, 'rank': 1},
+        {'Zprelim': -1.0438225301595188, 'censored': True, 'conc': 5.75,
+        'det_limit_index': 3, 'plot_pos': 0.1482837528604119, 'rank': 1},
+        {'Zprelim': -0.8109553641377003, 'censored': True, 'conc': 9.5,
+        'det_limit_index': 4, 'plot_pos': 0.20869565217391303, 'rank': 1},
+        {'Zprelim': -0.4046779045300476, 'censored': True, 'conc': 9.5,
+        'det_limit_index': 4, 'plot_pos': 0.34285714285714286, 'rank': 2},
+        {'Zprelim': -0.20857169501420522, 'censored': True, 'conc': 11.0,
+        'det_limit_index': 5, 'plot_pos': 0.41739130434782606, 'rank': 1},
+        {'Zprelim': -1.5927654676048002, 'censored': False, 'conc': 2.0,
+        'det_limit_index': 0, 'plot_pos': 0.055606407322654455, 'rank': 1},
+        {'Zprelim': -1.2201035333697587, 'censored': False, 'conc': 4.2,
+        'det_limit_index': 0, 'plot_pos': 0.11121281464530891, 'rank': 2},
+        {'Zprelim': -0.9668111610681008, 'censored': False, 'conc': 4.62,
+        'det_limit_index': 0, 'plot_pos': 0.16681922196796337, 'rank': 3},
+        {'Zprelim': -0.6835186393930371, 'censored': False, 'conc': 5.57,
+        'det_limit_index': 2, 'plot_pos': 0.24713958810068648, 'rank': 1},
+        {'Zprelim': -0.6072167256926887, 'censored': False, 'conc': 5.66,
+        'det_limit_index': 2, 'plot_pos': 0.27185354691075514, 'rank': 2},
+        {'Zprelim': -0.44953240276543616, 'censored': False, 'conc': 5.86,
+        'det_limit_index': 3, 'plot_pos': 0.3265238194299979, 'rank': 1},
+        {'Zprelim': -0.36788328223414807, 'censored': False, 'conc': 6.65,
+        'det_limit_index': 3, 'plot_pos': 0.35648013313917204, 'rank': 2},
+        {'Zprelim': -0.28861907892223937, 'censored': False, 'conc': 6.78,
+        'det_limit_index': 3, 'plot_pos': 0.38643644684834616, 'rank': 3},
+        {'Zprelim': -0.21113039741112186, 'censored': False, 'conc': 6.79,
+        'det_limit_index': 3, 'plot_pos': 0.4163927605575203, 'rank': 4},
+        {'Zprelim': -0.1348908823006299, 'censored': False, 'conc': 7.5,
+        'det_limit_index': 3, 'plot_pos': 0.4463490742666944, 'rank': 5},
+        {'Zprelim': -0.05942854708257491, 'censored': False, 'conc': 7.5,
+        'det_limit_index': 3, 'plot_pos': 0.4763053879758685, 'rank': 6},
+        {'Zprelim': 0.015696403006170083, 'censored': False, 'conc': 7.5,
+        'det_limit_index': 3, 'plot_pos': 0.5062617016850427, 'rank': 7},
+        {'Zprelim': 0.09091016994359362, 'censored': False, 'conc': 8.63,
+        'det_limit_index': 3, 'plot_pos': 0.5362180153942168, 'rank': 8},
+        {'Zprelim': 0.16664251178856201, 'censored': False, 'conc': 8.71,
+        'det_limit_index': 3, 'plot_pos': 0.5661743291033909, 'rank': 9},
+        {'Zprelim': 0.24334426739770573, 'censored': False, 'conc': 8.99,
+        'det_limit_index': 3, 'plot_pos': 0.596130642812565, 'rank': 10},
+        {'Zprelim': 0.3744432988606558, 'censored': False, 'conc': 9.85,
+        'det_limit_index': 4, 'plot_pos': 0.6459627329192545, 'rank': 1},
+        {'Zprelim': 0.4284507519609981, 'censored': False, 'conc': 10.82,
+        'det_limit_index': 4, 'plot_pos': 0.6658385093167701, 'rank': 2},
+        {'Zprelim': 0.5589578655042562, 'censored': False, 'conc': 11.25,
+        'det_limit_index': 5, 'plot_pos': 0.7119047619047619, 'rank': 1},
+        {'Zprelim': 0.6374841609623771, 'censored': False, 'conc': 11.25,
+        'det_limit_index': 5, 'plot_pos': 0.7380952380952381, 'rank': 2},
+        {'Zprelim': 0.7201566171385521, 'censored': False, 'conc': 12.2,
+        'det_limit_index': 5, 'plot_pos': 0.7642857142857142, 'rank': 3},
+        {'Zprelim': 0.8080746339118065, 'censored': False, 'conc': 14.92,
+        'det_limit_index': 5, 'plot_pos': 0.7904761904761904, 'rank': 4},
+        {'Zprelim': 0.9027347916438648, 'censored': False, 'conc': 16.77,
+        'det_limit_index': 5, 'plot_pos': 0.8166666666666667, 'rank': 5},
+        {'Zprelim': 1.0062699858608395, 'censored': False, 'conc': 17.81,
+        'det_limit_index': 5, 'plot_pos': 0.8428571428571429, 'rank': 6},
+        {'Zprelim': 1.1219004674623523, 'censored': False, 'conc': 19.16,
+        'det_limit_index': 5, 'plot_pos': 0.8690476190476191, 'rank': 7},
+        {'Zprelim': 1.2548759122271174, 'censored': False, 'conc': 19.19,
+        'det_limit_index': 5, 'plot_pos': 0.8952380952380953, 'rank': 8},
+        {'Zprelim': 1.414746425534976, 'censored': False, 'conc': 19.64,
+        'det_limit_index': 5, 'plot_pos': 0.9214285714285714, 'rank': 9},
+        {'Zprelim': 1.622193585315426, 'censored': False, 'conc': 20.18,
+        'det_limit_index': 5, 'plot_pos': 0.9476190476190476, 'rank': 10},
+        {'Zprelim': 1.9399896117517081, 'censored': False, 'conc': 22.97,
+        'det_limit_index': 5, 'plot_pos': 0.9738095238095239, 'rank': 11}
+    ])
+
+    return df
+
+
+def load_basic_cohn():
+    cohn = pandas.DataFrame([
+        {'lower_dl': 2.0, 'ncen_equal': 0.0, 'nobs_below': 0.0,
+         'nuncen_above': 3.0, 'prob_exceedance': 1.0, 'upper_dl': 5.0},
+        {'lower_dl': 5.0, 'ncen_equal': 2.0, 'nobs_below': 5.0,
+         'nuncen_above': 0.0, 'prob_exceedance': 0.77757437070938218, 'upper_dl': 5.5},
+        {'lower_dl': 5.5, 'ncen_equal': 1.0, 'nobs_below': 6.0,
+         'nuncen_above': 2.0, 'prob_exceedance': 0.77757437070938218, 'upper_dl': 5.75},
+        {'lower_dl': 5.75, 'ncen_equal': 1.0, 'nobs_below': 9.0,
+         'nuncen_above': 10.0, 'prob_exceedance': 0.7034324942791762, 'upper_dl': 9.5},
+        {'lower_dl': 9.5, 'ncen_equal': 2.0, 'nobs_below': 21.0,
+         'nuncen_above': 2.0, 'prob_exceedance': 0.37391304347826088, 'upper_dl': 11.0},
+        {'lower_dl': 11.0, 'ncen_equal': 1.0, 'nobs_below': 24.0,
+         'nuncen_above': 11.0, 'prob_exceedance': 0.31428571428571428, 'upper_dl': numpy.inf},
+        {'lower_dl': numpy.nan, 'ncen_equal': numpy.nan, 'nobs_below': numpy.nan,
+         'nuncen_above': numpy.nan, 'prob_exceedance': 0.0, 'upper_dl': numpy.nan}
+    ])
+    return cohn
+
+
+class Test__ros_sort:
+    def setup_method(self):
+        self.df = load_basic_data()
+
+        self.expected_baseline = pandas.DataFrame([
+            {'censored': True,  'conc': 5.0},   {'censored': True,  'conc': 5.0},
+            {'censored': True,  'conc': 5.5},   {'censored': True,  'conc': 5.75},
+            {'censored': True,  'conc': 9.5},   {'censored': True,  'conc': 9.5},
+            {'censored': True,  'conc': 11.0},  {'censored': False, 'conc': 2.0},
+            {'censored': False, 'conc': 4.2},   {'censored': False, 'conc': 4.62},
+            {'censored': False, 'conc': 5.57},  {'censored': False, 'conc': 5.66},
+            {'censored': False, 'conc': 5.86},  {'censored': False, 'conc': 6.65},
+            {'censored': False, 'conc': 6.78},  {'censored': False, 'conc': 6.79},
+            {'censored': False, 'conc': 7.5},   {'censored': False, 'conc': 7.5},
+            {'censored': False, 'conc': 7.5},   {'censored': False, 'conc': 8.63},
+            {'censored': False, 'conc': 8.71},  {'censored': False, 'conc': 8.99},
+            {'censored': False, 'conc': 9.85},  {'censored': False, 'conc': 10.82},
+            {'censored': False, 'conc': 11.25}, {'censored': False, 'conc': 11.25},
+            {'censored': False, 'conc': 12.2},  {'censored': False, 'conc': 14.92},
+            {'censored': False, 'conc': 16.77}, {'censored': False, 'conc': 17.81},
+            {'censored': False, 'conc': 19.16}, {'censored': False, 'conc': 19.19},
+            {'censored': False, 'conc': 19.64}, {'censored': False, 'conc': 20.18},
+            {'censored': False, 'conc': 22.97},
+        ])[['conc', 'censored']]
+
+        self.expected_with_warning = self.expected_baseline.iloc[:-1]
+
+    def test_baseline(self):
+        result = ros._ros_sort(self.df, 'conc', 'censored')
+        assert_frame_equal(result, self.expected_baseline)
+
+    def test_censored_greater_than_max(self):
+        df = self.df.copy()
+        max_row = df['conc'].idxmax()
+        df.loc[max_row, 'censored'] = True
+        result = ros._ros_sort(df, 'conc', 'censored')
+        assert_frame_equal(result, self.expected_with_warning)
+
+
+class Test_cohn_numbers:
+    def setup_method(self):
+        self.df = load_basic_data()
+        self.final_cols = ['lower_dl', 'upper_dl', 'nuncen_above', 'nobs_below',
+                           'ncen_equal', 'prob_exceedance']
+
+        self.expected_baseline = pandas.DataFrame([
+            {'lower_dl': 2.0, 'ncen_equal': 0.0, 'nobs_below': 0.0,
+             'nuncen_above': 3.0, 'prob_exceedance': 1.0, 'upper_dl': 5.0},
+            {'lower_dl': 5.0, 'ncen_equal': 2.0, 'nobs_below': 5.0,
+             'nuncen_above': 0.0, 'prob_exceedance': 0.77757437070938218, 'upper_dl': 5.5},
+            {'lower_dl': 5.5, 'ncen_equal': 1.0, 'nobs_below': 6.0,
+             'nuncen_above': 2.0, 'prob_exceedance': 0.77757437070938218, 'upper_dl': 5.75},
+            {'lower_dl': 5.75, 'ncen_equal': 1.0, 'nobs_below': 9.0,
+             'nuncen_above': 10.0, 'prob_exceedance': 0.7034324942791762, 'upper_dl': 9.5},
+            {'lower_dl': 9.5, 'ncen_equal': 2.0, 'nobs_below': 21.0,
+             'nuncen_above': 2.0, 'prob_exceedance': 0.37391304347826088, 'upper_dl': 11.0},
+            {'lower_dl': 11.0, 'ncen_equal': 1.0, 'nobs_below': 24.0,
+             'nuncen_above': 11.0, 'prob_exceedance': 0.31428571428571428, 'upper_dl': numpy.inf},
+            {'lower_dl': numpy.nan, 'ncen_equal': numpy.nan, 'nobs_below': numpy.nan,
+             'nuncen_above': numpy.nan, 'prob_exceedance': 0.0, 'upper_dl': numpy.nan}
+        ])[self.final_cols]
+
+
+    def test_baseline(self):
+        result = ros.cohn_numbers(self.df, observations='conc', censorship='censored')
+        assert_frame_equal(result, self.expected_baseline)
+
+    def test_no_NDs(self):
+        _df = self.df.copy()
+        _df['qual'] = False
+        result = ros.cohn_numbers(_df, observations='conc', censorship='qual')
+        assert result.shape == (0, 6)
+
+
+class Test__detection_limit_index:
+    def setup_method(self):
+        self.cohn = load_basic_cohn()
+        self.empty_cohn = pandas.DataFrame(numpy.empty((0, 7)))
+
+    def test_empty(self):
+        assert_equal(ros._detection_limit_index(None, self.empty_cohn), 0)
+
+    def test_populated(self):
+        assert_equal(ros._detection_limit_index(3.5, self.cohn), 0)
+        assert_equal(ros._detection_limit_index(6.0, self.cohn), 3)
+        assert_equal(ros._detection_limit_index(12.0, self.cohn), 5)
+
+    def test_out_of_bounds(self):
+        with pytest.raises(IndexError):
+            ros._detection_limit_index(0, self.cohn)
+
+
+def test__ros_group_rank():
+    df = pandas.DataFrame({
+        'dl_idx': [1] * 12,
+        'params': list('AABCCCDE') + list('DCBA'),
+        'values': list(range(12))
+    })
+
+    result = ros._ros_group_rank(df, 'dl_idx', 'params')
+    expected = pandas.Series([1, 2, 1, 1, 2, 3, 1, 1, 2, 4, 2, 3], name='rank')
+    assert_series_equal(result.astype(int), expected.astype(int))
+
+
+class Test__ros_plot_pos:
+    def setup_method(self):
+        self.cohn = load_basic_cohn()
+
+    def test_uncensored_1(self):
+        row = {'censored': False, 'det_limit_index': 2, 'rank': 1}
+        result = ros._ros_plot_pos(row, 'censored', self.cohn)
+        assert_equal(result, 0.24713958810068648)
+
+    def test_uncensored_2(self):
+        row = {'censored': False, 'det_limit_index': 2, 'rank': 12}
+        result = ros._ros_plot_pos(row, 'censored', self.cohn)
+        assert_equal(result, 0.51899313501144173)
+
+    def test_censored_1(self):
+        row = {'censored': True, 'det_limit_index': 5, 'rank': 4}
+        result = ros._ros_plot_pos(row, 'censored', self.cohn)
+        assert_equal(result, 1.3714285714285714)
+
+    def test_censored_2(self):
+        row = {'censored': True, 'det_limit_index': 4, 'rank': 2}
+        result = ros._ros_plot_pos(row, 'censored', self.cohn)
+        assert_equal(result, 0.41739130434782606)
+
+
+def test__norm_plot_pos():
+    result = ros._norm_plot_pos([1, 2, 3, 4])
+    expected = numpy.array([ 0.159104,  0.385452,  0.614548,  0.840896])
+    npt.assert_array_almost_equal(result, expected)
+
+
+def test_plotting_positions():
+    df = load_intermediate_data()
+    cohn = load_basic_cohn()
+
+    results = ros.plotting_positions(df, 'censored', cohn)
+    expected = numpy.array([
+        0.07414188,  0.11121281,  0.14828375,  0.14828375,  0.20869565,
+        0.34285714,  0.4173913 ,  0.05560641,  0.11121281,  0.16681922,
+        0.24713959,  0.27185355,  0.32652382,  0.35648013,  0.38643645,
+        0.41639276,  0.44634907,  0.47630539,  0.5062617 ,  0.53621802,
+        0.56617433,  0.59613064,  0.64596273,  0.66583851,  0.71190476,
+        0.73809524,  0.76428571,  0.79047619,  0.81666667,  0.84285714,
+        0.86904762,  0.8952381 ,  0.92142857,  0.94761905,  0.97380952
+    ])
+    npt.assert_array_almost_equal(results, expected)
+
+
+def test__impute():
+    expected = numpy.array([
+        3.11279729,   3.60634338,   4.04602788,   4.04602788,
+        4.71008116,   6.14010906,   6.97841457,   2.        ,
+        4.2       ,   4.62      ,   5.57      ,   5.66      ,
+        5.86      ,   6.65      ,   6.78      ,   6.79      ,
+        7.5       ,   7.5       ,   7.5       ,   8.63      ,
+        8.71      ,   8.99      ,   9.85      ,  10.82      ,
+        11.25      ,  11.25      ,  12.2       ,  14.92      ,
+        16.77      ,  17.81      ,  19.16      ,  19.19      ,
+        19.64      ,  20.18      ,  22.97
+    ])
+    df = load_advanced_data()
+    df = ros._impute(df, 'conc', 'censored', numpy.log, numpy.exp)
+    result = df['final'].values
+    npt.assert_array_almost_equal(result, expected)
+
+
+def test__do_ros():
+    expected = numpy.array([
+        3.11279729,   3.60634338,   4.04602788,   4.04602788,
+        4.71008116,   6.14010906,   6.97841457,   2.        ,
+        4.2       ,   4.62      ,   5.57      ,   5.66      ,
+        5.86      ,   6.65      ,   6.78      ,   6.79      ,
+        7.5       ,   7.5       ,   7.5       ,   8.63      ,
+        8.71      ,   8.99      ,   9.85      ,  10.82      ,
+        11.25      ,  11.25      ,  12.2       ,  14.92      ,
+        16.77      ,  17.81      ,  19.16      ,  19.19      ,
+        19.64      ,  20.18      ,  22.97
+    ])
+
+    df = load_basic_data()
+    df = ros._do_ros(df, 'conc', 'censored', numpy.log, numpy.exp)
+    result = df['final'].values
+    npt.assert_array_almost_equal(result, expected)
+
+
+class CheckROSMixin:
+    def test_ros_df(self):
+        result = ros.impute_ros(self.rescol, self.cencol, df=self.df)
+        npt.assert_array_almost_equal(
+            sorted(result),
+            sorted(self.expected_final),
+            decimal=self.decimal
+        )
+
+    def test_ros_arrays(self):
+        result = ros.impute_ros(self.df[self.rescol], self.df[self.cencol], df=None)
+        npt.assert_array_almost_equal(
+            sorted(result),
+            sorted(self.expected_final),
+            decimal=self.decimal
+        )
+
+    def test_cohn(self):
+        cols = [
+            'nuncen_above', 'nobs_below',
+            'ncen_equal', 'prob_exceedance'
+        ]
+        cohn = ros.cohn_numbers(self.df, self.rescol, self.cencol)
+        # Use round in place of the deprecated check_less_precise arg
+        assert_frame_equal(
+            np.round(cohn[cols], 3),
+            np.round(self.expected_cohn[cols], 3),
+        )
+
+
+class Test_ROS_HelselAppendixB(CheckROSMixin):
+    """
+    Appendix B dataset from "Estimation of Descriptive Statists for
+    Multiply Censored Water Quality Data", Water Resources Research,
+    Vol 24, No 12, pp 1997 - 2004. December 1988.
+    """
+    decimal = 2
+    res = numpy.array([
+        1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 10., 10., 10.,
+        3.0, 7.0, 9.0, 12., 15., 20., 27., 33., 50.
+    ])
+    cen = numpy.array([
+        True, True, True, True, True, True, True, True, True,
+        False, False, False, False, False, False, False,
+        False, False
+    ])
+    rescol = 'obs'
+    cencol = 'cen'
+    df = pandas.DataFrame({rescol: res, cencol: cen})
+    expected_final = numpy.array([
+        0.47,  0.85, 1.11, 1.27, 1.76, 2.34, 2.50, 3.00, 3.03,
+        4.80, 7.00, 9.00, 12.0, 15.0, 20.0, 27.0, 33.0, 50.0
+    ])
+
+    expected_cohn = pandas.DataFrame({
+        'nuncen_above': numpy.array([3.0, 6.0, numpy.nan]),
+        'nobs_below': numpy.array([6.0, 12.0, numpy.nan]),
+        'ncen_equal': numpy.array([6.0, 3.0, numpy.nan]),
+        'prob_exceedance': numpy.array([0.55556, 0.33333, 0.0]),
+    })
+
+
+class Test_ROS_HelselArsenic(CheckROSMixin):
+    """
+    Oahu arsenic data from Nondetects and Data Analysis by
+    Dennis R. Helsel (John Wiley, 2005)
+
+    Plotting positions are fudged since relative to source data since
+    modeled data is what matters and (source data plot positions are
+    not uniformly spaced, which seems weird)
+    """
+    decimal = 2
+    res = numpy.array([
+        3.2, 2.8, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+        2.0, 2.0, 1.7, 1.5, 1.0, 1.0, 1.0, 1.0,
+        0.9, 0.9, 0.7, 0.7, 0.6, 0.5, 0.5, 0.5
+    ])
+
+    cen = numpy.array([
+        False, False, True, True, True, True, True,
+        True, True, True, False, False, True, True,
+        True, True, False, True, False, False, False,
+        False, False, False
+    ])
+    rescol = 'obs'
+    cencol = 'cen'
+    df = pandas.DataFrame({rescol: res, cencol: cen})
+    expected_final = numpy.array([
+        3.20, 2.80, 1.42, 1.14, 0.95, 0.81, 0.68, 0.57,
+        0.46, 0.35, 1.70, 1.50, 0.98, 0.76, 0.58, 0.41,
+        0.90, 0.61, 0.70, 0.70, 0.60, 0.50, 0.50, 0.50
+    ])
+
+    expected_cohn = pandas.DataFrame({
+        'nuncen_above': numpy.array([6.0, 1.0, 2.0, 2.0, numpy.nan]),
+        'nobs_below': numpy.array([0.0, 7.0, 12.0, 22.0, numpy.nan]),
+        'ncen_equal': numpy.array([0.0, 1.0, 4.0, 8.0, numpy.nan]),
+        'prob_exceedance': numpy.array([1.0, 0.3125, 0.21429, 0.0833, 0.0]),
+    })
+
+
+class Test_ROS_RNADAdata(CheckROSMixin):
+    decimal = 3
+    datastring = StringIO(dedent("""\
+        res cen
+        0.090  True
+        0.090  True
+        0.090  True
+        0.101 False
+        0.136 False
+        0.340 False
+        0.457 False
+        0.514 False
+        0.629 False
+        0.638 False
+        0.774 False
+        0.788 False
+        0.900  True
+        0.900  True
+        0.900  True
+        1.000  True
+        1.000  True
+        1.000  True
+        1.000  True
+        1.000  True
+        1.000 False
+        1.000  True
+        1.000  True
+        1.000  True
+        1.000  True
+        1.000  True
+        1.000  True
+        1.000  True
+        1.000  True
+        1.000  True
+        1.000  True
+        1.000  True
+        1.000  True
+        1.100 False
+        2.000 False
+        2.000 False
+        2.404 False
+        2.860 False
+        3.000 False
+        3.000 False
+        3.705 False
+        4.000 False
+        5.000 False
+        5.960 False
+        6.000 False
+        7.214 False
+       16.000 False
+       17.716 False
+       25.000 False
+       51.000 False"""))
+    rescol = 'res'
+    cencol = 'cen'
+    df = pandas.read_csv(datastring, sep=r'\s+')
+    expected_final = numpy.array([
+        0.01907990,  0.03826254,  0.06080717,  0.10100000,  0.13600000,
+        0.34000000,  0.45700000,  0.51400000,  0.62900000,  0.63800000,
+        0.77400000,  0.78800000,  0.08745914,  0.25257575,  0.58544205,
+        0.01711153,  0.03373885,  0.05287083,  0.07506079,  0.10081573,
+        1.00000000,  0.13070334,  0.16539309,  0.20569039,  0.25257575,
+        0.30725491,  0.37122555,  0.44636843,  0.53507405,  0.64042242,
+        0.76644378,  0.91850581,  1.10390531,  1.10000000,  2.00000000,
+        2.00000000,  2.40400000,  2.86000000,  3.00000000,  3.00000000,
+        3.70500000,  4.00000000,  5.00000000,  5.96000000,  6.00000000,
+        7.21400000, 16.00000000, 17.71600000, 25.00000000, 51.00000000
+    ])
+
+    expected_cohn = pandas.DataFrame({
+        'nuncen_above': numpy.array([9., 0.0, 18., numpy.nan]),
+        'nobs_below': numpy.array([3., 15., 32., numpy.nan]),
+        'ncen_equal': numpy.array([3., 3., 17., numpy.nan]),
+        'prob_exceedance': numpy.array([0.84, 0.36, 0.36, 0]),
+    })
+
+
+class Test_NoOp_ZeroND(CheckROSMixin):
+    decimal = 2
+    numpy.random.seed(0)
+    N = 20
+    res = numpy.random.lognormal(size=N)
+    cen = [False] * N
+    rescol = 'obs'
+    cencol = 'cen'
+    df = pandas.DataFrame({rescol: res, cencol: cen})
+    expected_final = numpy.array([
+        0.38, 0.43, 0.81, 0.86, 0.90, 1.13, 1.15, 1.37, 1.40,
+        1.49, 1.51, 1.56, 2.14, 2.59, 2.66, 4.28, 4.46, 5.84,
+        6.47, 9.4
+    ])
+
+    expected_cohn = pandas.DataFrame({
+        'nuncen_above': numpy.array([]),
+        'nobs_below': numpy.array([]),
+        'ncen_equal': numpy.array([]),
+        'prob_exceedance': numpy.array([]),
+    })
+
+
+class Test_ROS_OneND(CheckROSMixin):
+    decimal = 3
+    res = numpy.array([
+        1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 10., 10., 10.,
+        3.0, 7.0, 9.0, 12., 15., 20., 27., 33., 50.
+    ])
+    cen = numpy.array([
+        True, False, False, False, False, False, False, False, False,
+        False, False, False, False, False, False, False,
+        False, False
+    ])
+    rescol = 'conc'
+    cencol = 'cen'
+    df = pandas.DataFrame({rescol: res, cencol: cen})
+    expected_final = numpy.array([
+        0.24, 1.0, 1.0, 1.0, 1.0, 1.0, 10., 10., 10.,
+        3.0 , 7.0, 9.0, 12., 15., 20., 27., 33., 50.
+    ])
+
+    expected_cohn = pandas.DataFrame({
+        'nuncen_above': numpy.array([17.0, numpy.nan]),
+        'nobs_below': numpy.array([1.0, numpy.nan]),
+        'ncen_equal': numpy.array([1.0, numpy.nan]),
+        'prob_exceedance': numpy.array([0.94444, 0.0]),
+    })
+
+
+class Test_HalfDLs_80pctNDs(CheckROSMixin):
+    decimal = 3
+    res = numpy.array([
+        1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 10., 10., 10.,
+        3.0, 7.0, 9.0, 12., 15., 20., 27., 33., 50.
+    ])
+    cen = numpy.array([
+        True, True, True, True, True, True, True, True,
+        True, True, True, True, True, True, True, False,
+        False, False
+    ])
+    rescol = 'value'
+    cencol = 'qual'
+    df = pandas.DataFrame({rescol: res, cencol: cen})
+    expected_final = numpy.array([
+        0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 5.0, 5.0, 5.0,
+        1.5, 3.5, 4.5, 6.0, 7.5, 10., 27., 33., 50.
+    ])
+
+    expected_cohn = pandas.DataFrame({
+        'nuncen_above': numpy.array([0., 0., 0., 0., 0., 0., 0., 3., numpy.nan]),
+        'nobs_below': numpy.array([6., 7., 8., 9., 12., 13., 14., 15., numpy.nan]),
+        'ncen_equal': numpy.array([6., 1., 1., 1., 3., 1., 1., 1., numpy.nan]),
+        'prob_exceedance': numpy.array([0.16667] * 8 + [0.]),
+    })
+
+
+class Test_HaflDLs_OneUncensored(CheckROSMixin):
+    decimal = 3
+    res = numpy.array([1.0, 1.0, 12., 15., ])
+    cen = numpy.array([True, True, True, False ])
+    rescol = 'value'
+    cencol = 'qual'
+    df = pandas.DataFrame({rescol: res, cencol: cen})
+    expected_final = numpy.array([0.5,   0.5,   6. ,  15.])
+
+    expected_cohn = pandas.DataFrame({
+        'nuncen_above': numpy.array([0., 1., numpy.nan]),
+        'nobs_below': numpy.array([2., 3., numpy.nan]),
+        'ncen_equal': numpy.array([2., 1., numpy.nan]),
+        'prob_exceedance': numpy.array([0.25, 0.25, 0.]),
+    })
+
+
+class Test_ROS_MaxCen_GT_MaxUncen(Test_ROS_HelselAppendixB):
+    res = numpy.array([
+        1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 10., 10., 10.,
+        3.0, 7.0, 9.0, 12., 15., 20., 27., 33., 50.,
+        60, 70
+    ])
+    cen = numpy.array([
+        True, True, True, True, True, True, True, True, True,
+        False, False, False, False, False, False, False,
+        False, False, True, True
+    ])
+
+
+class Test_ROS_OnlyDL_GT_MaxUncen(Test_NoOp_ZeroND):
+    numpy.random.seed(0)
+    N = 20
+    res =  [
+        0.38, 0.43, 0.81, 0.86, 0.90, 1.13, 1.15, 1.37, 1.40,
+        1.49, 1.51, 1.56, 2.14, 2.59, 2.66, 4.28, 4.46, 5.84,
+        6.47, 9.40, 10.0, 10.0
+    ]
+    cen = ([False] * N) + [True, True]