some new features

This commit is contained in:
ilgazca
2025-07-30 17:09:11 +03:00
parent db5d46760a
commit 8019bd3b7c
20616 changed files with 4375466 additions and 8 deletions

View File

@ -0,0 +1,47 @@
import numpy as np
import pandas as pd
import pytest
from statsmodels.graphics.agreement import mean_diff_plot
try:
import matplotlib.pyplot as plt
except ImportError:
pass
@pytest.mark.matplotlib
def test_mean_diff_plot(close_figures):
# Seed the random number generator.
# This ensures that the results below are reproducible.
np.random.seed(11111)
m1 = np.random.random(20)
m2 = np.random.random(20)
fig = plt.figure()
ax = fig.add_subplot(111)
# basic test.
mean_diff_plot(m1, m2, ax=ax)
# Test with pandas Series.
p1 = pd.Series(m1)
p2 = pd.Series(m2)
mean_diff_plot(p1, p2)
# Test plotting on assigned axis.
fig, ax = plt.subplots(2)
mean_diff_plot(m1, m2, ax=ax[0])
# Test the setting of confidence intervals.
mean_diff_plot(m1, m2, sd_limit=0)
# Test asethetic controls.
mean_diff_plot(m1, m2, scatter_kwds={'color': 'green', 's': 10})
mean_diff_plot(m1, m2, mean_line_kwds={'color': 'green', 'lw': 5})
mean_diff_plot(m1, m2, limit_lines_kwds={'color': 'green',
'lw': 5,
'ls': 'dotted'})

View File

@ -0,0 +1,99 @@
import numpy as np
import pytest
from statsmodels.datasets import anes96
from statsmodels.graphics.boxplots import beanplot, violinplot
try:
import matplotlib.pyplot as plt
except ImportError:
pass
@pytest.fixture(scope="module")
def age_and_labels():
# Test violinplot and beanplot with the same dataset.
data = anes96.load_pandas()
party_ID = np.arange(7)
labels = ["Strong Democrat", "Weak Democrat", "Independent-Democrat",
"Independent-Independent", "Independent-Republican",
"Weak Republican", "Strong Republican"]
age = [data.exog['age'][data.endog == id] for id in party_ID]
age = np.array(age, dtype="object")
return age, labels
@pytest.mark.matplotlib
def test_violinplot(age_and_labels, close_figures):
age, labels = age_and_labels
fig, ax = plt.subplots(1, 1)
violinplot(age, ax=ax, labels=labels,
plot_opts={'cutoff_val': 5, 'cutoff_type': 'abs',
'label_fontsize': 'small',
'label_rotation': 30})
@pytest.mark.matplotlib
def test_violinplot_bw_factor(age_and_labels, close_figures):
age, labels = age_and_labels
fig, ax = plt.subplots(1, 1)
violinplot(age, ax=ax, labels=labels,
plot_opts={'cutoff_val': 5, 'cutoff_type': 'abs',
'label_fontsize': 'small',
'label_rotation': 30,
'bw_factor': .2})
@pytest.mark.matplotlib
def test_beanplot(age_and_labels, close_figures):
age, labels = age_and_labels
fig, ax = plt.subplots(1, 1)
beanplot(age, ax=ax, labels=labels,
plot_opts={'cutoff_val': 5, 'cutoff_type': 'abs',
'label_fontsize': 'small',
'label_rotation': 30})
@pytest.mark.matplotlib
def test_beanplot_jitter(age_and_labels, close_figures):
age, labels = age_and_labels
fig, ax = plt.subplots(1, 1)
beanplot(age, ax=ax, labels=labels, jitter=True,
plot_opts={'cutoff_val': 5, 'cutoff_type': 'abs',
'label_fontsize': 'small',
'label_rotation': 30})
@pytest.mark.matplotlib
def test_beanplot_side_right(age_and_labels, close_figures):
age, labels = age_and_labels
fig, ax = plt.subplots(1, 1)
beanplot(age, ax=ax, labels=labels, jitter=True, side='right',
plot_opts={'cutoff_val': 5, 'cutoff_type': 'abs',
'label_fontsize': 'small',
'label_rotation': 30})
@pytest.mark.matplotlib
def test_beanplot_side_left(age_and_labels, close_figures):
age, labels = age_and_labels
fig, ax = plt.subplots(1, 1)
beanplot(age, ax=ax, labels=labels, jitter=True, side='left',
plot_opts={'cutoff_val': 5, 'cutoff_type': 'abs',
'label_fontsize': 'small',
'label_rotation': 30})
@pytest.mark.matplotlib
def test_beanplot_legend_text(age_and_labels, close_figures):
age, labels = age_and_labels
fig, ax = plt.subplots(1, 1)
beanplot(age, ax=ax, labels=labels,
plot_opts={'bean_legend_text': 'text'})

View File

@ -0,0 +1,29 @@
import numpy as np
import pytest
from statsmodels.datasets import randhie
from statsmodels.graphics.correlation import plot_corr, plot_corr_grid
@pytest.mark.matplotlib
def test_plot_corr(close_figures):
hie_data = randhie.load_pandas()
corr_matrix = np.corrcoef(hie_data.data.values.T)
plot_corr(corr_matrix, xnames=hie_data.names)
plot_corr(corr_matrix, xnames=[], ynames=hie_data.names)
plot_corr(corr_matrix, normcolor=True, title='', cmap='jet')
@pytest.mark.matplotlib
def test_plot_corr_grid(close_figures):
hie_data = randhie.load_pandas()
corr_matrix = np.corrcoef(hie_data.data.values.T)
plot_corr_grid([corr_matrix] * 2, xnames=hie_data.names)
plot_corr_grid([corr_matrix] * 5, xnames=[], ynames=hie_data.names)
plot_corr_grid([corr_matrix] * 3, normcolor=True, titles='', cmap='jet')

View File

@ -0,0 +1,416 @@
import numpy as np
import pandas as pd
import pytest
from statsmodels.graphics.dotplots import dot_plot
# If true, the output is written to a multi-page pdf file.
pdf_output = False
try:
import matplotlib.pyplot as plt
except ImportError:
pass
def close_or_save(pdf, fig):
if pdf_output:
pdf.savefig(fig)
plt.close(fig)
@pytest.mark.matplotlib
def test_all(close_figures, reset_randomstate):
if pdf_output:
from matplotlib.backends.backend_pdf import PdfPages
pdf = PdfPages("test_dotplot.pdf")
else:
pdf = None
# Basic dotplot with points only
plt.clf()
points = range(20)
ax = plt.axes()
fig = dot_plot(points, ax=ax)
ax.set_title("Basic horizontal dotplot")
close_or_save(pdf, fig)
# Basic vertical dotplot
plt.clf()
points = range(20)
ax = plt.axes()
fig = dot_plot(points, ax=ax, horizontal=False)
ax.set_title("Basic vertical dotplot")
close_or_save(pdf, fig)
# Tall and skinny
plt.figure(figsize=(4,12))
ax = plt.axes()
vals = np.arange(40)
fig = dot_plot(points, ax=ax)
ax.set_title("Tall and skinny dotplot")
ax.set_xlabel("x axis label")
close_or_save(pdf, fig)
# Short and wide
plt.figure(figsize=(12,4))
ax = plt.axes()
vals = np.arange(40)
fig = dot_plot(points, ax=ax, horizontal=False)
ax.set_title("Short and wide dotplot")
ax.set_ylabel("y axis label")
close_or_save(pdf, fig)
# Tall and skinny striped dotplot
plt.figure(figsize=(4,12))
ax = plt.axes()
points = np.arange(40)
fig = dot_plot(points, ax=ax, striped=True)
ax.set_title("Tall and skinny striped dotplot")
ax.set_xlim(-10, 50)
close_or_save(pdf, fig)
# Short and wide striped
plt.figure(figsize=(12,4))
ax = plt.axes()
points = np.arange(40)
fig = dot_plot(points, ax=ax, striped=True, horizontal=False)
ax.set_title("Short and wide striped dotplot")
ax.set_ylim(-10, 50)
close_or_save(pdf, fig)
# Basic dotplot with few points
plt.figure()
ax = plt.axes()
points = np.arange(4)
fig = dot_plot(points, ax=ax)
ax.set_title("Basic horizontal dotplot with few lines")
close_or_save(pdf, fig)
# Basic dotplot with few points
plt.figure()
ax = plt.axes()
points = np.arange(4)
fig = dot_plot(points, ax=ax, horizontal=False)
ax.set_title("Basic vertical dotplot with few lines")
close_or_save(pdf, fig)
# Manually set the x axis limits
plt.figure()
ax = plt.axes()
points = np.arange(20)
fig = dot_plot(points, ax=ax)
ax.set_xlim(-10, 30)
ax.set_title("Dotplot with adjusted horizontal range")
close_or_save(pdf, fig)
# Left row labels
plt.clf()
ax = plt.axes()
lines = ["ABCDEFGH"[np.random.randint(0, 8)] for k in range(20)]
points = np.random.normal(size=20)
fig = dot_plot(points, lines=lines, ax=ax)
ax.set_title("Dotplot with user-supplied labels in the left margin")
close_or_save(pdf, fig)
# Left and right row labels
plt.clf()
ax = plt.axes()
points = np.random.normal(size=20)
lines = ["ABCDEFGH"[np.random.randint(0, 8)] + "::" + str(k+1)
for k in range(20)]
fig = dot_plot(points, lines=lines, ax=ax, split_names="::")
ax.set_title("Dotplot with user-supplied labels in both margins")
close_or_save(pdf, fig)
# Both sides row labels
plt.clf()
ax = plt.axes([0.1, 0.1, 0.88, 0.8])
points = np.random.normal(size=20)
lines = ["ABCDEFGH"[np.random.randint(0, 8)] + "::" + str(k+1)
for k in range(20)]
fig = dot_plot(points, lines=lines, ax=ax, split_names="::",
horizontal=False)
txt = ax.set_title("Vertical dotplot with user-supplied labels in both margins")
txt.set_position((0.5, 1.06))
close_or_save(pdf, fig)
# Custom colors and symbols
plt.clf()
ax = plt.axes([0.1, 0.07, 0.78, 0.85])
points = np.random.normal(size=20)
lines = np.kron(range(5), np.ones(4)).astype(np.int32)
styles = np.kron(np.ones(5), range(4)).astype(np.int32)
marker_props = {k: {"color": "rgbc"[k], "marker": "osvp"[k],
"ms": 7, "alpha": 0.6} for k in range(4)}
fig = dot_plot(points, lines=lines, styles=styles, ax=ax,
marker_props=marker_props)
ax.set_title("Dotplot with custom colors and symbols")
close_or_save(pdf, fig)
# Basic dotplot with symmetric intervals
plt.clf()
ax = plt.axes()
points = range(20)
fig = dot_plot(points, intervals=np.ones(20), ax=ax)
ax.set_title("Dotplot with symmetric intervals")
close_or_save(pdf, fig)
# Basic dotplot with symmetric intervals, pandas inputs.
plt.clf()
ax = plt.axes()
points = pd.Series(range(20))
intervals = pd.Series(np.ones(20))
fig = dot_plot(points, intervals=intervals, ax=ax)
ax.set_title("Dotplot with symmetric intervals (Pandas inputs)")
close_or_save(pdf, fig)
# Basic dotplot with nonsymmetric intervals
plt.clf()
ax = plt.axes()
points = np.arange(20)
intervals = [(1, 3) for i in range(20)]
fig = dot_plot(points, intervals=intervals, ax=ax)
ax.set_title("Dotplot with nonsymmetric intervals")
close_or_save(pdf, fig)
# Vertical dotplot with nonsymmetric intervals
plt.clf()
ax = plt.axes()
points = np.arange(20)
intervals = [(1, 3) for i in range(20)]
fig = dot_plot(points, intervals=intervals, ax=ax, horizontal=False)
ax.set_title("Vertical dotplot with nonsymmetric intervals")
close_or_save(pdf, fig)
# Dotplot with nonsymmetric intervals, adjust line properties
plt.clf()
ax = plt.axes()
points = np.arange(20)
intervals = [(1, 3) for x in range(20)]
line_props = {0: {"color": "lightgrey",
"solid_capstyle": "round"}}
fig = dot_plot(points, intervals=intervals, line_props=line_props, ax=ax)
ax.set_title("Dotplot with custom line properties")
close_or_save(pdf, fig)
# Dotplot with two points per line and a legend
plt.clf()
ax = plt.axes([0.1, 0.1, 0.75, 0.8])
points = 5*np.random.normal(size=40)
lines = np.kron(range(20), (1,1))
intervals = [(1,3) for k in range(40)]
styles = np.kron(np.ones(20), (0,1)).astype(np.int32)
styles = [["Cat", "Dog"][i] for i in styles]
fig = dot_plot(points, intervals=intervals, lines=lines, styles=styles,
ax=ax, stacked=True)
handles, labels = ax.get_legend_handles_labels()
leg = plt.figlegend(handles, labels, loc="center right", numpoints=1,
handletextpad=0.0001)
leg.draw_frame(False)
ax.set_title("Dotplot with two points per line")
close_or_save(pdf, fig)
# Dotplot with two points per line and a legend
plt.clf()
ax = plt.axes([0.1, 0.1, 0.75, 0.8])
fig = dot_plot(points, intervals=intervals, lines=lines,
styles=styles, ax=ax, stacked=True,
styles_order=["Dog", "Cat"])
handles, labels = ax.get_legend_handles_labels()
leg = plt.figlegend(handles, labels, loc="center right", numpoints=1,
handletextpad=0.0001)
leg.draw_frame(False)
ax.set_title("Dotplot with two points per line (reverse order)")
close_or_save(pdf, fig)
# Vertical dotplot with two points per line and a legend
plt.clf()
ax = plt.axes([0.1, 0.1, 0.75, 0.8])
points = 5*np.random.normal(size=40)
lines = np.kron(range(20), (1,1))
intervals = [(1,3) for k in range(40)]
styles = np.kron(np.ones(20), (0,1)).astype(np.int32)
styles = [["Cat", "Dog"][i] for i in styles]
fig = dot_plot(points, intervals=intervals, lines=lines, styles=styles,
ax=ax, stacked=True, horizontal=False)
handles, labels = ax.get_legend_handles_labels()
leg = plt.figlegend(handles, labels, loc="center right", numpoints=1,
handletextpad=0.0001)
leg.draw_frame(False)
ax.set_title("Vertical dotplot with two points per line")
close_or_save(pdf, fig)
# Vertical dotplot with two points per line and a legend
plt.clf()
ax = plt.axes([0.1, 0.1, 0.75, 0.8])
styles_order = ["Dog", "Cat"]
fig = dot_plot(points, intervals=intervals, lines=lines,
styles=styles, ax=ax, stacked=True,
horizontal=False, styles_order=styles_order)
handles, labels = ax.get_legend_handles_labels()
lh = dict(zip(labels, handles))
handles = [lh[l] for l in styles_order]
leg = plt.figlegend(handles, styles_order, loc="center right", numpoints=1,
handletextpad=0.0001)
leg.draw_frame(False)
ax.set_title("Vertical dotplot with two points per line (reverse order)")
close_or_save(pdf, fig)
# Vertical dotplot with two points per line and a legend
plt.clf()
ax = plt.axes([0.1, 0.1, 0.75, 0.8])
points = 5*np.random.normal(size=40)
lines = np.kron(range(20), (1,1))
intervals = [(1,3) for k in range(40)]
styles = np.kron(np.ones(20), (0,1)).astype(np.int32)
styles = [["Cat", "Dog"][i] for i in styles]
fig = dot_plot(points, intervals=intervals, lines=lines, styles=styles,
ax=ax, stacked=True, striped=True, horizontal=False)
handles, labels = ax.get_legend_handles_labels()
leg = plt.figlegend(handles, labels, loc="center right", numpoints=1,
handletextpad=0.0001)
leg.draw_frame(False)
plt.ylim(-20, 20)
ax.set_title("Vertical dotplot with two points per line")
close_or_save(pdf, fig)
# Dotplot with color-matched points and intervals
plt.clf()
ax = plt.axes([0.1, 0.1, 0.75, 0.8])
points = 5*np.random.normal(size=40)
lines = np.kron(range(20), (1,1))
intervals = [(1,3) for k in range(40)]
styles = np.kron(np.ones(20), (0,1)).astype(np.int32)
styles = [["Cat", "Dog"][i] for i in styles]
marker_props = {"Cat": {"color": "orange"},
"Dog": {"color": "purple"}}
line_props = {"Cat": {"color": "orange"},
"Dog": {"color": "purple"}}
fig = dot_plot(points, intervals=intervals, lines=lines, styles=styles,
ax=ax, stacked=True, marker_props=marker_props,
line_props=line_props)
handles, labels = ax.get_legend_handles_labels()
leg = plt.figlegend(handles, labels, loc="center right", numpoints=1,
handletextpad=0.0001)
leg.draw_frame(False)
ax.set_title("Dotplot with color-matched points and intervals")
close_or_save(pdf, fig)
# Dotplot with color-matched points and intervals
plt.clf()
ax = plt.axes([0.1, 0.1, 0.75, 0.8])
points = 5*np.random.normal(size=40)
lines = np.kron(range(20), (1,1))
intervals = [(1,3) for k in range(40)]
styles = np.kron(np.ones(20), (0,1)).astype(np.int32)
styles = [["Cat", "Dog"][i] for i in styles]
marker_props = {"Cat": {"color": "orange"},
"Dog": {"color": "purple"}}
line_props = {"Cat": {"color": "orange"},
"Dog": {"color": "purple"}}
fig = dot_plot(points, intervals=intervals, lines=lines, styles=styles,
ax=ax, stacked=True, marker_props=marker_props,
line_props=line_props, horizontal=False)
handles, labels = ax.get_legend_handles_labels()
leg = plt.figlegend(handles, labels, loc="center right", numpoints=1,
handletextpad=0.0001)
leg.draw_frame(False)
ax.set_title("Dotplot with color-matched points and intervals")
close_or_save(pdf, fig)
# Dotplot with sections
plt.clf()
ax = plt.axes()
points = range(30)
lines = np.kron(range(15), (1,1)).astype(np.int32)
styles = np.kron(np.ones(15), (0,1)).astype(np.int32)
sections = np.kron((0,1,2), np.ones(10)).astype(np.int32)
sections = [["Axx", "Byy", "Czz"][k] for k in sections]
fig = dot_plot(points, lines=lines, styles=styles, sections=sections, ax=ax)
ax.set_title("Dotplot with sections")
close_or_save(pdf, fig)
# Vertical dotplot with sections
plt.clf()
ax = plt.axes([0.1,0.1,0.9,0.75])
points = range(30)
lines = np.kron(range(15), (1,1)).astype(np.int32)
styles = np.kron(np.ones(15), (0,1)).astype(np.int32)
sections = np.kron((0,1,2), np.ones(10)).astype(np.int32)
sections = [["Axx", "Byy", "Czz"][k] for k in sections]
fig = dot_plot(points, lines=lines, styles=styles,
sections=sections, ax=ax, horizontal=False)
txt = ax.set_title("Vertical dotplot with sections")
txt.set_position((0.5, 1.08))
close_or_save(pdf, fig)
# Reorder sections
plt.clf()
ax = plt.axes()
points = range(30)
lines = np.kron(range(15), (1,1)).astype(np.int32)
styles = np.kron(np.ones(15), (0,1)).astype(np.int32)
sections = np.kron((0,1,2), np.ones(10)).astype(np.int32)
sections = [["Axx", "Byy", "Czz"][k] for k in sections]
fig = dot_plot(points, lines=lines, styles=styles, sections=sections, ax=ax,
section_order=["Byy", "Axx", "Czz"])
ax.set_title("Dotplot with sections in specified order")
close_or_save(pdf, fig)
# Reorder the lines.
plt.figure()
ax = plt.axes()
points = np.arange(4)
lines = ["A", "B", "C", "D"]
line_order = ["B", "C", "A", "D"]
fig = dot_plot(points, lines=lines, line_order=line_order, ax=ax)
ax.set_title("Dotplot with reordered lines")
close_or_save(pdf, fig)
# Format labels
plt.clf()
points = range(20)
lines = ["%d::%d" % (i, 100+i) for i in range(20)]
fmt_left = lambda x : "lft_" + x
fmt_right = lambda x : "rgt_" + x
ax = plt.axes()
fig = dot_plot(points, lines=lines, ax=ax, split_names="::",
fmt_left_name=fmt_left, fmt_right_name=fmt_right)
ax.set_title("Horizontal dotplot with name formatting")
close_or_save(pdf, fig)
# Right names only
plt.clf()
points = range(20)
lines = ["%d::%d" % (i, 100+i) for i in range(20)]
ax = plt.axes()
fig = dot_plot(points, lines=lines, ax=ax, split_names="::",
show_names="right")
ax.set_title("Show right names only")
close_or_save(pdf, fig)
# Dotplot with different numbers of points per line
plt.clf()
ax = plt.axes([0.1, 0.1, 0.75, 0.8])
points = 5*np.random.normal(size=40)
lines = []
ii = 0
while len(lines) < 40:
for k in range(np.random.randint(1, 4)):
lines.append(ii)
ii += 1
styles = np.kron(np.ones(20), (0,1)).astype(np.int32)
styles = [["Cat", "Dog"][i] for i in styles]
fig = dot_plot(points, lines=lines, styles=styles,
ax=ax, stacked=True)
handles, labels = ax.get_legend_handles_labels()
leg = plt.figlegend(handles, labels, loc="center right", numpoints=1,
handletextpad=0.0001)
leg.draw_frame(False)
ax.set_title("Dotplot with different numbers of points per line")
close_or_save(pdf, fig)
if pdf_output:
pdf.close()

View File

@ -0,0 +1,71 @@
import numpy as np
from numpy.testing import assert_equal, assert_raises
from pandas import Series
import pytest
from statsmodels.graphics.factorplots import _recode, interaction_plot
try:
import matplotlib.pyplot as plt
except ImportError:
pass
class TestInteractionPlot:
@classmethod
def setup_class(cls):
np.random.seed(12345)
cls.weight = np.random.randint(1,4,size=60)
cls.duration = np.random.randint(1,3,size=60)
cls.days = np.log(np.random.randint(1,30, size=60))
@pytest.mark.matplotlib
def test_plot_both(self, close_figures):
fig = interaction_plot(self.weight, self.duration, self.days,
colors=['red','blue'], markers=['D','^'], ms=10)
@pytest.mark.matplotlib
def test_plot_rainbow(self, close_figures):
fig = interaction_plot(self.weight, self.duration, self.days,
markers=['D','^'], ms=10)
@pytest.mark.matplotlib
@pytest.mark.parametrize('astype', ['str', 'int'])
def test_plot_pandas(self, astype, close_figures):
weight = Series(self.weight, name='Weight').astype(astype)
duration = Series(self.duration, name='Duration')
days = Series(self.days, name='Days')
fig = interaction_plot(weight, duration, days,
markers=['D', '^'], ms=10)
ax = fig.axes[0]
trace = ax.get_legend().get_title().get_text()
assert_equal(trace, 'Duration')
assert_equal(ax.get_ylabel(), 'mean of Days')
assert_equal(ax.get_xlabel(), 'Weight')
@pytest.mark.matplotlib
def test_formatting(self, close_figures):
fig = interaction_plot(self.weight, self.duration, self.days, colors=['r','g'], linestyles=['--','-.'])
assert_equal(isinstance(fig, plt.Figure), True)
@pytest.mark.matplotlib
def test_formatting_errors(self, close_figures):
assert_raises(ValueError, interaction_plot, self.weight, self.duration, self.days, markers=['D'])
assert_raises(ValueError, interaction_plot, self.weight, self.duration, self.days, colors=['b','r','g'])
assert_raises(ValueError, interaction_plot, self.weight, self.duration, self.days, linestyles=['--','-.',':'])
@pytest.mark.matplotlib
def test_plottype(self, close_figures):
fig = interaction_plot(self.weight, self.duration, self.days, plottype='line')
assert_equal(isinstance(fig, plt.Figure), True)
fig = interaction_plot(self.weight, self.duration, self.days, plottype='scatter')
assert_equal(isinstance(fig, plt.Figure), True)
assert_raises(ValueError, interaction_plot, self.weight, self.duration, self.days, plottype='unknown')
def test_recode_series(self):
series = Series(['a', 'b'] * 10, index=np.arange(0, 40, 2),
name='index_test')
series_ = _recode(series, {'a': 0, 'b': 1})
assert_equal(series_.index.values, series.index.values,
err_msg='_recode changed the index')

View File

@ -0,0 +1,247 @@
import numpy as np
from numpy.testing import assert_almost_equal, assert_equal
import pytest
from statsmodels.datasets import elnino
from statsmodels.graphics.functional import (
banddepth,
fboxplot,
hdrboxplot,
rainbowplot,
)
try:
import matplotlib.pyplot as plt
except ImportError:
pass
data = elnino.load()
data.raw_data = np.asarray(data.raw_data)
labels = data.raw_data[:, 0].astype(int)
data = data.raw_data[:, 1:]
@pytest.mark.matplotlib
def test_hdr_basic(close_figures):
try:
_, hdr = hdrboxplot(data, labels=labels, seed=12345)
assert len(hdr.extra_quantiles) == 0
median_t = [24.247, 25.625, 25.964, 24.999, 23.648, 22.302,
21.231, 20.366, 20.168, 20.434, 21.111, 22.299]
assert_almost_equal(hdr.median, median_t, decimal=2)
quant = np.vstack([hdr.outliers, hdr.hdr_90, hdr.hdr_50])
quant_t = np.vstack([[24.36, 25.42, 25.40, 24.96, 24.21, 23.35,
22.50, 21.89, 22.04, 22.88, 24.57, 25.89],
[27.25, 28.23, 28.85, 28.82, 28.37, 27.43,
25.73, 23.88, 22.26, 22.22, 22.21, 23.19],
[23.70, 26.08, 27.17, 26.74, 26.77, 26.15,
25.59, 24.95, 24.69, 24.64, 25.85, 27.08],
[28.12, 28.82, 29.24, 28.45, 27.36, 25.19,
23.61, 22.27, 21.31, 21.37, 21.60, 22.81],
[25.48, 26.99, 27.51, 27.04, 26.23, 24.94,
23.69, 22.72, 22.26, 22.64, 23.33, 24.44],
[23.11, 24.50, 24.66, 23.44, 21.74, 20.58,
19.68, 18.84, 18.76, 18.99, 19.66, 20.86],
[24.84, 26.23, 26.67, 25.93, 24.87, 23.57,
22.46, 21.45, 21.26, 21.57, 22.14, 23.41],
[23.62, 25.10, 25.34, 24.22, 22.74, 21.52,
20.40, 19.56, 19.63, 19.67, 20.37, 21.76]])
assert_almost_equal(quant, quant_t, decimal=0)
labels_pos = np.all(np.isin(data, hdr.outliers).reshape(data.shape),
axis=1)
outliers = labels[labels_pos]
assert_equal([1982, 1983, 1997, 1998], outliers)
assert_equal(labels[hdr.outliers_idx], outliers)
except OSError:
pytest.xfail('Multiprocess randomly crashes in Windows testing')
@pytest.mark.slow
@pytest.mark.matplotlib
def test_hdr_basic_brute(close_figures, reset_randomstate):
try:
_, hdr = hdrboxplot(data, ncomp=2, labels=labels, use_brute=True)
assert len(hdr.extra_quantiles) == 0
median_t = [24.247, 25.625, 25.964, 24.999, 23.648, 22.302,
21.231, 20.366, 20.168, 20.434, 21.111, 22.299]
assert_almost_equal(hdr.median, median_t, decimal=2)
except OSError:
pytest.xfail('Multiprocess randomly crashes in Windows testing')
@pytest.mark.slow
@pytest.mark.matplotlib
def test_hdr_plot(close_figures):
fig = plt.figure()
ax = fig.add_subplot(111)
try:
hdrboxplot(data, labels=labels.tolist(), ax=ax, threshold=1,
seed=12345)
ax.set_xlabel("Month of the year")
ax.set_ylabel("Sea surface temperature (C)")
ax.set_xticks(np.arange(13, step=3) - 1)
ax.set_xticklabels(["", "Mar", "Jun", "Sep", "Dec"])
ax.set_xlim([-0.2, 11.2])
except OSError:
pytest.xfail('Multiprocess randomly crashes in Windows testing')
@pytest.mark.slow
@pytest.mark.matplotlib
def test_hdr_alpha(close_figures):
try:
_, hdr = hdrboxplot(data, alpha=[0.7], seed=12345)
extra_quant_t = np.vstack([[25.1, 26.5, 27.0, 26.4, 25.4, 24.1,
23.0, 22.0, 21.7, 22.1, 22.7, 23.8],
[23.4, 24.8, 25.0, 23.9, 22.4, 21.1,
20.0, 19.3, 19.2, 19.4, 20.1, 21.3]])
assert_almost_equal(hdr.extra_quantiles, extra_quant_t, decimal=0)
except OSError:
pytest.xfail('Multiprocess randomly crashes in Windows testing')
@pytest.mark.slow
@pytest.mark.matplotlib
def test_hdr_multiple_alpha(close_figures):
try:
_, hdr = hdrboxplot(data, alpha=[0.4, 0.92], seed=12345)
extra_quant_t = [[25.712, 27.052, 27.711, 27.200,
26.162, 24.833, 23.639, 22.378,
22.250, 22.640, 23.472, 24.649],
[22.973, 24.526, 24.608, 23.343,
21.908, 20.655, 19.750, 19.046,
18.812, 18.989, 19.520, 20.685],
[24.667, 26.033, 26.416, 25.584,
24.308, 22.849, 21.684, 20.948,
20.483, 21.019, 21.751, 22.890],
[23.873, 25.371, 25.667, 24.644,
23.177, 21.923, 20.791, 20.015,
19.697, 19.951, 20.622, 21.858]]
assert_almost_equal(hdr.extra_quantiles, np.vstack(extra_quant_t),
decimal=0)
except OSError:
pytest.xfail('Multiprocess randomly crashes in Windows testing')
@pytest.mark.slow
@pytest.mark.matplotlib
def test_hdr_threshold(close_figures):
try:
_, hdr = hdrboxplot(data, alpha=[0.8], threshold=0.93,
seed=12345)
labels_pos = np.all(np.isin(data, hdr.outliers).reshape(data.shape),
axis=1)
outliers = labels[labels_pos]
assert_equal([1968, 1982, 1983, 1997, 1998], outliers)
except OSError:
pytest.xfail('Multiprocess randomly crashes in Windows testing')
@pytest.mark.matplotlib
def test_hdr_bw(close_figures):
try:
_, hdr = hdrboxplot(data, bw='cv_ml', seed=12345)
median_t = [24.25, 25.64, 25.99, 25.04, 23.71, 22.38,
21.31, 20.44, 20.24, 20.51, 21.19, 22.38]
assert_almost_equal(hdr.median, median_t, decimal=2)
except OSError:
pytest.xfail('Multiprocess randomly crashes in Windows testing')
@pytest.mark.slow
@pytest.mark.matplotlib
def test_hdr_ncomp(close_figures):
try:
_, hdr = hdrboxplot(data, ncomp=3, seed=12345)
median_t = [24.33, 25.71, 26.04, 25.08, 23.74, 22.40,
21.32, 20.45, 20.25, 20.53, 21.20, 22.39]
assert_almost_equal(hdr.median, median_t, decimal=2)
except OSError:
pytest.xfail('Multiprocess randomly crashes in Windows testing')
def test_banddepth_BD2():
xx = np.arange(500) / 150.
y1 = 1 + 0.5 * np.sin(xx)
y2 = 0.3 + np.sin(xx + np.pi/6)
y3 = -0.5 + np.sin(xx + np.pi/6)
y4 = -1 + 0.3 * np.cos(xx + np.pi/6)
data = np.asarray([y1, y2, y3, y4])
depth = banddepth(data, method='BD2')
expected_depth = [0.5, 5./6, 5./6, 0.5]
assert_almost_equal(depth, expected_depth)
# Plot to visualize why we expect this output
# fig = plt.figure()
# ax = fig.add_subplot(111)
# for ii, yy in enumerate([y1, y2, y3, y4]):
# ax.plot(xx, yy, label="y%s" % ii)
# ax.legend()
# plt.close(fig)
def test_banddepth_MBD():
xx = np.arange(5001) / 5000.
y1 = np.zeros(xx.shape)
y2 = 2 * xx - 1
y3 = np.ones(xx.shape) * 0.5
y4 = np.ones(xx.shape) * -0.25
data = np.asarray([y1, y2, y3, y4])
depth = banddepth(data, method='MBD')
expected_depth = [5./6, (2*(0.75-3./8)+3)/6, 3.5/6, (2*3./8+3)/6]
assert_almost_equal(depth, expected_depth, decimal=4)
@pytest.mark.matplotlib
def test_fboxplot_rainbowplot(close_figures):
# Test fboxplot and rainbowplot together, is much faster.
def harmfunc(t):
"""Test function, combination of a few harmonic terms."""
# Constant, 0 with p=0.9, 1 with p=1 - for creating outliers
ci = int(np.random.random() > 0.9)
a1i = np.random.random() * 0.05
a2i = np.random.random() * 0.05
b1i = (0.15 - 0.1) * np.random.random() + 0.1
b2i = (0.15 - 0.1) * np.random.random() + 0.1
func = (1 - ci) * (a1i * np.sin(t) + a2i * np.cos(t)) + \
ci * (b1i * np.sin(t) + b2i * np.cos(t))
return func
np.random.seed(1234567)
# Some basic test data, Model 6 from Sun and Genton.
t = np.linspace(0, 2 * np.pi, 250)
data = [harmfunc(t) for _ in range(20)]
# fboxplot test
fig = plt.figure()
ax = fig.add_subplot(111)
_, depth, ix_depth, ix_outliers = fboxplot(data, wfactor=2, ax=ax)
ix_expected = np.array([13, 4, 15, 19, 8, 6, 3, 16, 9, 7, 1, 5, 2,
12, 17, 11, 14, 10, 0, 18])
assert_equal(ix_depth, ix_expected)
ix_expected2 = np.array([2, 11, 17, 18])
assert_equal(ix_outliers, ix_expected2)
# rainbowplot test (re-uses depth variable)
xdata = np.arange(data[0].size)
fig = rainbowplot(data, xdata=xdata, depth=depth, cmap=plt.cm.rainbow)

View File

@ -0,0 +1,688 @@
import numpy as np
import numpy.testing as nptest
from numpy.testing import assert_equal
import pytest
from scipy import stats
import statsmodels.api as sm
from statsmodels.graphics import gofplots
from statsmodels.graphics.gofplots import (
ProbPlot,
qqline,
qqplot,
qqplot_2samples,
)
from statsmodels.graphics.utils import _import_mpl
class BaseProbplotMixin:
def setup_method(self):
try:
import matplotlib.pyplot as plt
self.fig, self.ax = plt.subplots()
except ImportError:
pass
self.other_array = np.random.normal(size=self.prbplt.data.shape)
self.other_prbplot = ProbPlot(self.other_array)
self.plot_options = dict(
marker="d",
markerfacecolor="cornflowerblue",
markeredgecolor="white",
alpha=0.5,
)
@pytest.mark.matplotlib
def test_qqplot(self, close_figures):
self.prbplt.qqplot(ax=self.ax, line=self.line, **self.plot_options)
@pytest.mark.matplotlib
def test_ppplot(self, close_figures):
self.prbplt.ppplot(ax=self.ax, line=self.line)
@pytest.mark.matplotlib
def test_probplot(self, close_figures):
self.prbplt.probplot(ax=self.ax, line=self.line, **self.plot_options)
@pytest.mark.matplotlib
def test_probplot_exceed(self, close_figures):
self.prbplt.probplot(
ax=self.ax, exceed=True, line=self.line, **self.plot_options
)
@pytest.mark.matplotlib
def test_qqplot_other_array(self, close_figures):
self.prbplt.qqplot(
ax=self.ax,
line=self.line,
other=self.other_array,
**self.plot_options,
)
@pytest.mark.matplotlib
def test_ppplot_other_array(self, close_figures):
self.prbplt.ppplot(
ax=self.ax,
line=self.line,
other=self.other_array,
**self.plot_options,
)
@pytest.mark.xfail(strict=True)
@pytest.mark.matplotlib
def test_probplot_other_array(self, close_figures):
self.prbplt.probplot(
ax=self.ax,
line=self.line,
other=self.other_array,
**self.plot_options,
)
@pytest.mark.matplotlib
def test_qqplot_other_prbplt(self, close_figures):
self.prbplt.qqplot(
ax=self.ax,
line=self.line,
other=self.other_prbplot,
**self.plot_options,
)
@pytest.mark.matplotlib
def test_ppplot_other_prbplt(self, close_figures):
self.prbplt.ppplot(
ax=self.ax,
line=self.line,
other=self.other_prbplot,
**self.plot_options,
)
@pytest.mark.xfail(strict=True)
@pytest.mark.matplotlib
def test_probplot_other_prbplt(self, close_figures):
self.prbplt.probplot(
ax=self.ax,
line=self.line,
other=self.other_prbplot,
**self.plot_options,
)
@pytest.mark.matplotlib
def test_qqplot_custom_labels(self, close_figures):
self.prbplt.qqplot(
ax=self.ax,
line=self.line,
xlabel="Custom X-Label",
ylabel="Custom Y-Label",
**self.plot_options,
)
@pytest.mark.matplotlib
def test_ppplot_custom_labels(self, close_figures):
self.prbplt.ppplot(
ax=self.ax,
line=self.line,
xlabel="Custom X-Label",
ylabel="Custom Y-Label",
**self.plot_options,
)
@pytest.mark.matplotlib
def test_probplot_custom_labels(self, close_figures):
self.prbplt.probplot(
ax=self.ax,
line=self.line,
xlabel="Custom X-Label",
ylabel="Custom Y-Label",
**self.plot_options,
)
@pytest.mark.matplotlib
def test_qqplot_pltkwargs(self, close_figures):
self.prbplt.qqplot(
ax=self.ax,
line=self.line,
marker="d",
markerfacecolor="cornflowerblue",
markeredgecolor="white",
alpha=0.5,
)
@pytest.mark.matplotlib
def test_ppplot_pltkwargs(self, close_figures):
self.prbplt.ppplot(
ax=self.ax,
line=self.line,
marker="d",
markerfacecolor="cornflowerblue",
markeredgecolor="white",
alpha=0.5,
)
@pytest.mark.matplotlib
def test_probplot_pltkwargs(self, close_figures):
self.prbplt.probplot(
ax=self.ax,
line=self.line,
marker="d",
markerfacecolor="cornflowerblue",
markeredgecolor="white",
alpha=0.5,
)
def test_fit_params(self):
assert self.prbplt.fit_params[-2] == self.prbplt.loc
assert self.prbplt.fit_params[-1] == self.prbplt.scale
class TestProbPlotLongelyNoFit(BaseProbplotMixin):
def setup_method(self):
np.random.seed(5)
self.data = sm.datasets.longley.load()
self.data.exog = sm.add_constant(self.data.exog, prepend=False)
self.mod_fit = sm.OLS(self.data.endog, self.data.exog).fit()
self.prbplt = ProbPlot(
self.mod_fit.resid, dist=stats.t, distargs=(4,), fit=False
)
self.line = "r"
super().setup_method()
class TestProbPlotLongelyWithFit(BaseProbplotMixin):
def setup_method(self):
np.random.seed(5)
self.data = sm.datasets.longley.load()
self.data.exog = sm.add_constant(self.data.exog, prepend=False)
self.mod_fit = sm.OLS(self.data.endog, self.data.exog).fit()
self.prbplt = ProbPlot(
self.mod_fit.resid, dist=stats.t, distargs=(4,), fit=True
)
self.line = "r"
super().setup_method()
class TestProbPlotRandomNormalMinimal(BaseProbplotMixin):
def setup_method(self):
np.random.seed(5)
self.data = np.random.normal(loc=8.25, scale=3.25, size=37)
self.prbplt = ProbPlot(self.data)
self.line = None
super().setup_method()
class TestProbPlotRandomNormalWithFit(BaseProbplotMixin):
def setup_method(self):
np.random.seed(5)
self.data = np.random.normal(loc=8.25, scale=3.25, size=37)
self.prbplt = ProbPlot(self.data, fit=True)
self.line = "q"
super().setup_method()
class TestProbPlotRandomNormalFullDist(BaseProbplotMixin):
def setup_method(self):
np.random.seed(5)
self.data = np.random.normal(loc=8.25, scale=3.25, size=37)
self.prbplt = ProbPlot(self.data, dist=stats.norm(loc=8.5, scale=3.0))
self.line = "45"
super().setup_method()
def test_loc_set(self):
assert self.prbplt.loc == 8.5
def test_scale_set(self):
assert self.prbplt.scale == 3.0
def test_exceptions(self):
with pytest.raises(ValueError):
ProbPlot(self.data, dist=stats.norm(loc=8.5, scale=3.0), fit=True)
with pytest.raises(ValueError):
ProbPlot(
self.data,
dist=stats.norm(loc=8.5, scale=3.0),
distargs=(8.5, 3.0),
)
with pytest.raises(ValueError):
ProbPlot(self.data, dist=stats.norm(loc=8.5, scale=3.0), loc=8.5)
with pytest.raises(ValueError):
ProbPlot(self.data, dist=stats.norm(loc=8.5, scale=3.0), scale=3.0)
class TestCompareSamplesDifferentSize:
def setup_method(self):
np.random.seed(5)
self.data1 = ProbPlot(np.random.normal(loc=8.25, scale=3.25, size=37))
self.data2 = ProbPlot(np.random.normal(loc=8.25, scale=3.25, size=55))
@pytest.mark.matplotlib
def test_qqplot(self, close_figures):
self.data1.qqplot(other=self.data2)
with pytest.raises(ValueError):
self.data2.qqplot(other=self.data1)
@pytest.mark.matplotlib
def test_ppplot(self, close_figures):
self.data1.ppplot(other=self.data2)
self.data2.ppplot(other=self.data1)
class TestProbPlotRandomNormalLocScaleDist(BaseProbplotMixin):
def setup_method(self):
np.random.seed(5)
self.data = np.random.normal(loc=8.25, scale=3.25, size=37)
self.prbplt = ProbPlot(self.data, loc=8, scale=3)
self.line = "45"
super().setup_method()
def test_loc_set(self):
assert self.prbplt.loc == 8
def test_scale_set(self):
assert self.prbplt.scale == 3
def test_loc_set_in_dist(self):
assert self.prbplt.dist.mean() == 8.0
def test_scale_set_in_dist(self):
assert self.prbplt.dist.var() == 9.0
class TestTopLevel:
def setup_method(self):
self.data = sm.datasets.longley.load()
self.data.exog = sm.add_constant(self.data.exog, prepend=False)
self.mod_fit = sm.OLS(self.data.endog, self.data.exog).fit()
self.res = self.mod_fit.resid
self.prbplt = ProbPlot(self.mod_fit.resid, dist=stats.t, distargs=(4,))
self.other_array = np.random.normal(size=self.prbplt.data.shape)
self.other_prbplot = ProbPlot(self.other_array)
@pytest.mark.matplotlib
def test_qqplot(self, close_figures):
qqplot(self.res, line="r")
@pytest.mark.matplotlib
def test_qqplot_pltkwargs(self, close_figures):
qqplot(
self.res,
line="r",
marker="d",
markerfacecolor="cornflowerblue",
markeredgecolor="white",
alpha=0.5,
)
@pytest.mark.matplotlib
def test_qqplot_2samples_prob_plot_objects(self, close_figures):
# also tests all valuesg for line
for line in ["r", "q", "45", "s"]:
# test with `ProbPlot` instances
qqplot_2samples(self.prbplt, self.other_prbplot, line=line)
@pytest.mark.matplotlib
def test_qqplot_2samples_arrays(self, close_figures):
# also tests all values for line
for line in ["r", "q", "45", "s"]:
# test with arrays
qqplot_2samples(self.res, self.other_array, line=line)
def test_invalid_dist_config(close_figures):
# GH 4226
np.random.seed(5)
data = sm.datasets.longley.load()
data.exog = sm.add_constant(data.exog, prepend=False)
mod_fit = sm.OLS(data.endog, data.exog).fit()
with pytest.raises(TypeError, match=r"dist\(0, 1, 4, loc=0, scale=1\)"):
ProbPlot(mod_fit.resid, stats.t, distargs=(0, 1, 4))
@pytest.mark.matplotlib
def test_qqplot_unequal():
rs = np.random.RandomState(0)
data1 = rs.standard_normal(100)
data2 = rs.standard_normal(200)
fig1 = qqplot_2samples(data1, data2)
fig2 = qqplot_2samples(data2, data1)
x1, y1 = fig1.get_axes()[0].get_children()[0].get_data()
x2, y2 = fig2.get_axes()[0].get_children()[0].get_data()
np.testing.assert_allclose(x1, x2)
np.testing.assert_allclose(y1, y2)
numobj1 = len(fig1.get_axes()[0].get_children())
numobj2 = len(fig2.get_axes()[0].get_children())
assert numobj1 == numobj2
@pytest.mark.matplotlib
def test_qqplot(self, close_figures):
qqplot(self.res, line="r")
@pytest.mark.matplotlib
def test_qqplot_2samples_prob_plot_obj(self, close_figures):
# also tests all values for line
for line in ["r", "q", "45", "s"]:
# test with `ProbPlot` instances
qqplot_2samples(self.prbplt, self.other_prbplot, line=line)
@pytest.mark.matplotlib
def test_qqplot_2samples_arrays(self, close_figures):
# also tests all values for line
for line in ["r", "q", "45", "s"]:
# test with arrays
qqplot_2samples(self.res, self.other_array, line=line)
class TestCheckDist:
def test_good(self):
gofplots._check_for(stats.norm, "ppf")
gofplots._check_for(stats.norm, "cdf")
def test_bad(self):
with pytest.raises(AttributeError):
gofplots._check_for("junk", "ppf")
with pytest.raises(AttributeError):
gofplots._check_for("junk", "cdf")
class TestDoPlot:
def setup_method(self):
try:
import matplotlib.pyplot as plt
self.fig, self.ax = plt.subplots()
except ImportError:
pass
self.x = [0.2, 0.6, 2.0, 4.5, 10.0, 50.0, 83.0, 99.1, 99.7]
self.y = [1.2, 1.4, 1.7, 2.1, 3.2, 3.7, 4.5, 5.1, 6.3]
self.full_options = {
"marker": "s",
"markerfacecolor": "cornflowerblue",
"markeredgecolor": "firebrick",
"markeredgewidth": 1.25,
"linestyle": "--",
}
self.step_options = {"linestyle": "-", "where": "mid"}
@pytest.mark.matplotlib
def test_baseline(self, close_figures):
plt = _import_mpl()
fig, ax = gofplots._do_plot(self.x, self.y)
assert isinstance(fig, plt.Figure)
assert isinstance(ax, plt.Axes)
assert self.fig is not fig
assert self.ax is not ax
@pytest.mark.matplotlib
def test_with_ax(self, close_figures):
plt = _import_mpl()
fig, ax = gofplots._do_plot(self.x, self.y, ax=self.ax)
assert isinstance(fig, plt.Figure)
assert isinstance(ax, plt.Axes)
assert self.fig is fig
assert self.ax is ax
@pytest.mark.matplotlib
def test_plot_full_options(self, close_figures):
gofplots._do_plot(
self.x,
self.y,
ax=self.ax,
step=False,
**self.full_options,
)
@pytest.mark.matplotlib
def test_step_baseline(self, close_figures):
gofplots._do_plot(
self.x,
self.y,
ax=self.ax,
step=True,
**self.step_options,
)
@pytest.mark.matplotlib
def test_step_full_options(self, close_figures):
gofplots._do_plot(
self.x,
self.y,
ax=self.ax,
step=True,
**self.full_options,
)
@pytest.mark.matplotlib
def test_plot_qq_line(self, close_figures):
gofplots._do_plot(self.x, self.y, ax=self.ax, line="r")
@pytest.mark.matplotlib
def test_step_qq_line(self, close_figures):
gofplots._do_plot(self.x, self.y, ax=self.ax, step=True, line="r")
class TestQQLine:
def setup_method(self):
np.random.seed(0)
self.x = np.sort(np.random.normal(loc=2.9, scale=1.2, size=37))
self.y = np.sort(np.random.normal(loc=3.0, scale=1.1, size=37))
try:
import matplotlib.pyplot as plt
self.fig, self.ax = plt.subplots()
self.ax.plot(self.x, self.y, "ko")
except ImportError:
pass
self.lineoptions = {
"linewidth": 2,
"dashes": (10, 1, 3, 4),
"color": "green",
}
self.fmt = "bo-"
@pytest.mark.matplotlib
def test_badline(self):
with pytest.raises(ValueError):
qqline(self.ax, "junk")
@pytest.mark.matplotlib
def test_non45_no_x(self, close_figures):
with pytest.raises(ValueError):
qqline(self.ax, "s", y=self.y)
@pytest.mark.matplotlib
def test_non45_no_y(self, close_figures):
with pytest.raises(ValueError):
qqline(self.ax, "s", x=self.x)
@pytest.mark.matplotlib
def test_non45_no_x_no_y(self, close_figures):
with pytest.raises(ValueError):
qqline(self.ax, "s")
@pytest.mark.matplotlib
def test_45(self, close_figures):
nchildren = len(self.ax.get_children())
qqline(self.ax, "45")
assert len(self.ax.get_children()) > nchildren
@pytest.mark.matplotlib
def test_45_fmt(self, close_figures):
qqline(self.ax, "45", fmt=self.fmt)
@pytest.mark.matplotlib
def test_45_fmt_lineoptions(self, close_figures):
qqline(self.ax, "45", fmt=self.fmt, **self.lineoptions)
@pytest.mark.matplotlib
def test_r(self, close_figures):
nchildren = len(self.ax.get_children())
qqline(self.ax, "r", x=self.x, y=self.y)
assert len(self.ax.get_children()) > nchildren
@pytest.mark.matplotlib
def test_r_fmt(self, close_figures):
qqline(self.ax, "r", x=self.x, y=self.y, fmt=self.fmt)
@pytest.mark.matplotlib
def test_r_fmt_lineoptions(self, close_figures):
qqline(
self.ax, "r", x=self.x, y=self.y, fmt=self.fmt, **self.lineoptions
)
@pytest.mark.matplotlib
def test_s(self, close_figures):
nchildren = len(self.ax.get_children())
qqline(self.ax, "s", x=self.x, y=self.y)
assert len(self.ax.get_children()) > nchildren
@pytest.mark.matplotlib
def test_s_fmt(self, close_figures):
qqline(self.ax, "s", x=self.x, y=self.y, fmt=self.fmt)
@pytest.mark.matplotlib
def test_s_fmt_lineoptions(self, close_figures):
qqline(
self.ax, "s", x=self.x, y=self.y, fmt=self.fmt, **self.lineoptions
)
@pytest.mark.matplotlib
def test_q(self, close_figures):
nchildren = len(self.ax.get_children())
qqline(self.ax, "q", dist=stats.norm, x=self.x, y=self.y)
assert len(self.ax.get_children()) > nchildren
@pytest.mark.matplotlib
def test_q_fmt(self, close_figures):
qqline(self.ax, "q", dist=stats.norm, x=self.x, y=self.y, fmt=self.fmt)
@pytest.mark.matplotlib
def test_q_fmt_lineoptions(self, close_figures):
qqline(
self.ax,
"q",
dist=stats.norm,
x=self.x,
y=self.y,
fmt=self.fmt,
**self.lineoptions,
)
class TestPlottingPosition:
def setup_method(self):
self.N = 13
self.data = np.arange(self.N)
def do_test(self, alpha, beta):
smpp = gofplots.plotting_pos(self.N, a=alpha, b=beta)
sppp = stats.mstats.plotting_positions(
self.data, alpha=alpha, beta=beta
)
nptest.assert_array_almost_equal(smpp, sppp, decimal=5)
@pytest.mark.matplotlib
def test_weibull(self, close_figures):
self.do_test(0, 0)
@pytest.mark.matplotlib
def test_lininterp(self, close_figures):
self.do_test(0, 1)
@pytest.mark.matplotlib
def test_piecewise(self, close_figures):
self.do_test(0.5, 0.5)
@pytest.mark.matplotlib
def test_approx_med_unbiased(self, close_figures):
self.do_test(1.0 / 3.0, 1.0 / 3.0)
@pytest.mark.matplotlib
def test_cunnane(self, close_figures):
self.do_test(0.4, 0.4)
def test_param_unpacking():
expected = np.array([2.0, 3, 0, 1])
pp = ProbPlot(np.empty(100), dist=stats.beta(2, 3))
assert_equal(pp.fit_params, expected)
pp = ProbPlot(np.empty(100), stats.beta(2, b=3))
assert_equal(pp.fit_params, expected)
pp = ProbPlot(np.empty(100), stats.beta(a=2, b=3))
assert_equal(pp.fit_params, expected)
expected = np.array([2.0, 3, 4, 1])
pp = ProbPlot(np.empty(100), stats.beta(2, 3, 4))
assert_equal(pp.fit_params, expected)
pp = ProbPlot(np.empty(100), stats.beta(a=2, b=3, loc=4))
assert_equal(pp.fit_params, expected)
expected = np.array([2.0, 3, 4, 5])
pp = ProbPlot(np.empty(100), stats.beta(2, 3, 4, 5))
assert_equal(pp.fit_params, expected)
pp = ProbPlot(np.empty(100), stats.beta(2, 3, 4, scale=5))
assert_equal(pp.fit_params, expected)
pp = ProbPlot(np.empty(100), stats.beta(2, 3, loc=4, scale=5))
assert_equal(pp.fit_params, expected)
pp = ProbPlot(np.empty(100), stats.beta(2, b=3, loc=4, scale=5))
assert_equal(pp.fit_params, expected)
pp = ProbPlot(np.empty(100), stats.beta(a=2, b=3, loc=4, scale=5))
assert_equal(pp.fit_params, expected)
@pytest.mark.matplotlib
@pytest.mark.parametrize("labels", [{}, {"xlabel": "X", "ylabel": "Y"}])
@pytest.mark.parametrize("x_size", [30, 50])
@pytest.mark.parametrize("y_size", [30, 50])
@pytest.mark.parametrize("line", [None, "45", "s", "r", "q"])
def test_correct_labels(
close_figures, reset_randomstate, line, x_size, y_size, labels
):
rs = np.random.RandomState(9876554)
x = rs.normal(loc=0, scale=0.1, size=x_size)
y = rs.standard_t(3, size=y_size)
pp_x = sm.ProbPlot(x)
pp_y = sm.ProbPlot(y)
fig = qqplot_2samples(pp_x, pp_y, line=line, **labels)
ax = fig.get_axes()[0]
x_label = ax.get_xlabel()
y_label = ax.get_ylabel()
if x_size < y_size:
if not labels:
assert "2nd" in x_label
assert "1st" in y_label
else:
assert "Y" in x_label
assert "X" in y_label
else:
if not labels:
assert "1st" in x_label
assert "2nd" in y_label
else:
assert "X" in x_label
assert "Y" in y_label
@pytest.mark.matplotlib
def test_axis_order(close_figures):
xx = np.random.normal(10, 1, (100,))
xy = np.random.normal(1, 0.01, (100,))
fig = qqplot_2samples(xx, xy, "x", "y")
ax = fig.get_axes()[0]
y_range = np.diff(ax.get_ylim())[0]
x_range = np.diff(ax.get_xlim())[0]
assert y_range < x_range
xx_long = np.random.normal(10, 1, (1000,))
fig = qqplot_2samples(xx_long, xy, "x", "y")
ax = fig.get_axes()[0]
y_range = np.diff(ax.get_ylim())[0]
x_range = np.diff(ax.get_xlim())[0]
assert y_range < x_range
xy_long = np.random.normal(1, 0.01, (1000,))
fig = qqplot_2samples(xx, xy_long, "x", "y")
ax = fig.get_axes()[0]
y_range = np.diff(ax.get_ylim())[0]
x_range = np.diff(ax.get_xlim())[0]
assert x_range < y_range

View File

@ -0,0 +1,449 @@
from statsmodels.compat.python import lrange
from io import BytesIO
from itertools import product
import numpy as np
from numpy.testing import assert_, assert_raises
import pandas as pd
import pytest
from statsmodels.api import datasets
# utilities for the tests
try:
import matplotlib.pyplot as plt # noqa:F401
except ImportError:
pass
# other functions to be tested for accuracy
# the main drawing function
from statsmodels.graphics.mosaicplot import (
_hierarchical_split,
_key_splitting,
_normalize_split,
_reduce_dict,
_split_rect,
mosaic,
)
@pytest.mark.matplotlib
def test_data_conversion(close_figures):
# It will not reorder the elements
# so the dictionary will look odd
# as it key order has the c and b
# keys swapped
import pandas
_, ax = plt.subplots(4, 4)
data = {'ax': 1, 'bx': 2, 'cx': 3}
mosaic(data, ax=ax[0, 0], title='basic dict', axes_label=False)
data = pandas.Series(data)
mosaic(data, ax=ax[0, 1], title='basic series', axes_label=False)
data = [1, 2, 3]
mosaic(data, ax=ax[0, 2], title='basic list', axes_label=False)
data = np.asarray(data)
mosaic(data, ax=ax[0, 3], title='basic array', axes_label=False)
plt.close("all")
data = {('ax', 'cx'): 1, ('bx', 'cx'): 2, ('ax', 'dx'): 3, ('bx', 'dx'): 4}
mosaic(data, ax=ax[1, 0], title='compound dict', axes_label=False)
mosaic(data, ax=ax[2, 0], title='inverted keys dict', index=[1, 0], axes_label=False)
data = pandas.Series(data)
mosaic(data, ax=ax[1, 1], title='compound series', axes_label=False)
mosaic(data, ax=ax[2, 1], title='inverted keys series', index=[1, 0])
data = [[1, 2], [3, 4]]
mosaic(data, ax=ax[1, 2], title='compound list', axes_label=False)
mosaic(data, ax=ax[2, 2], title='inverted keys list', index=[1, 0])
data = np.array([[1, 2], [3, 4]])
mosaic(data, ax=ax[1, 3], title='compound array', axes_label=False)
mosaic(data, ax=ax[2, 3], title='inverted keys array', index=[1, 0], axes_label=False)
plt.close("all")
gender = ['male', 'male', 'male', 'female', 'female', 'female']
pet = ['cat', 'dog', 'dog', 'cat', 'dog', 'cat']
data = pandas.DataFrame({'gender': gender, 'pet': pet})
mosaic(data, ['gender'], ax=ax[3, 0], title='dataframe by key 1', axes_label=False)
mosaic(data, ['pet'], ax=ax[3, 1], title='dataframe by key 2', axes_label=False)
mosaic(data, ['gender', 'pet'], ax=ax[3, 2], title='both keys', axes_label=False)
mosaic(data, ['pet', 'gender'], ax=ax[3, 3], title='keys inverted', axes_label=False)
plt.close("all")
plt.suptitle('testing data conversion (plot 1 of 4)')
@pytest.mark.matplotlib
def test_mosaic_simple(close_figures):
# display a simple plot of 4 categories of data, splitted in four
# levels with increasing size for each group
# creation of the levels
key_set = (['male', 'female'], ['old', 'adult', 'young'],
['worker', 'unemployed'], ['healty', 'ill'])
# the cartesian product of all the categories is
# the complete set of categories
keys = list(product(*key_set))
data = dict(zip(keys, range(1, 1 + len(keys))))
# which colours should I use for the various categories?
# put it into a dict
props = {}
#males and females in blue and red
props[('male',)] = {'color': 'b'}
props[('female',)] = {'color': 'r'}
# all the groups corresponding to ill groups have a different color
for key in keys:
if 'ill' in key:
if 'male' in key:
props[key] = {'color': 'BlueViolet' , 'hatch': '+'}
else:
props[key] = {'color': 'Crimson' , 'hatch': '+'}
# mosaic of the data, with given gaps and colors
mosaic(data, gap=0.05, properties=props, axes_label=False)
plt.suptitle('syntetic data, 4 categories (plot 2 of 4)')
@pytest.mark.matplotlib
def test_mosaic(close_figures):
# make the same analysis on a known dataset
# load the data and clean it a bit
affairs = datasets.fair.load_pandas()
datas = affairs.exog
# any time greater than 0 is cheating
datas['cheated'] = affairs.endog > 0
# sort by the marriage quality and give meaningful name
# [rate_marriage, age, yrs_married, children,
# religious, educ, occupation, occupation_husb]
datas = datas.sort_values(['rate_marriage', 'religious'])
num_to_desc = {1: 'awful', 2: 'bad', 3: 'intermediate',
4: 'good', 5: 'wonderful'}
datas['rate_marriage'] = datas['rate_marriage'].map(num_to_desc)
num_to_faith = {1: 'non religious', 2: 'poorly religious', 3: 'religious',
4: 'very religious'}
datas['religious'] = datas['religious'].map(num_to_faith)
num_to_cheat = {False: 'faithful', True: 'cheated'}
datas['cheated'] = datas['cheated'].map(num_to_cheat)
# finished cleaning
_, ax = plt.subplots(2, 2)
mosaic(datas, ['rate_marriage', 'cheated'], ax=ax[0, 0],
title='by marriage happiness')
mosaic(datas, ['religious', 'cheated'], ax=ax[0, 1],
title='by religiosity')
mosaic(datas, ['rate_marriage', 'religious', 'cheated'], ax=ax[1, 0],
title='by both', labelizer=lambda k:'')
ax[1, 0].set_xlabel('marriage rating')
ax[1, 0].set_ylabel('religion status')
mosaic(datas, ['religious', 'rate_marriage'], ax=ax[1, 1],
title='inter-dependence', axes_label=False)
plt.suptitle("extramarital affairs (plot 3 of 4)")
@pytest.mark.matplotlib
def test_mosaic_very_complex(close_figures):
# make a scattermatrix of mosaic plots to show the correlations between
# each pair of variable in a dataset. Could be easily converted into a
# new function that does this automatically based on the type of data
key_name = ['gender', 'age', 'health', 'work']
key_base = (['male', 'female'], ['old', 'young'],
['healty', 'ill'], ['work', 'unemployed'])
keys = list(product(*key_base))
data = dict(zip(keys, range(1, 1 + len(keys))))
props = {}
props[('male', 'old')] = {'color': 'r'}
props[('female',)] = {'color': 'pink'}
L = len(key_base)
_, axes = plt.subplots(L, L)
for i in range(L):
for j in range(L):
m = set(range(L)).difference({i, j})
if i == j:
axes[i, i].text(0.5, 0.5, key_name[i],
ha='center', va='center')
axes[i, i].set_xticks([])
axes[i, i].set_xticklabels([])
axes[i, i].set_yticks([])
axes[i, i].set_yticklabels([])
else:
ji = max(i, j)
ij = min(i, j)
temp_data = {(k[ij], k[ji]) + tuple(k[r] for r in m): v
for k, v in data.items()}
keys = list(temp_data.keys())
for k in keys:
value = _reduce_dict(temp_data, k[:2])
temp_data[k[:2]] = value
del temp_data[k]
mosaic(temp_data, ax=axes[i, j], axes_label=False,
properties=props, gap=0.05, horizontal=i > j)
plt.suptitle('old males should look bright red, (plot 4 of 4)')
@pytest.mark.matplotlib
def test_axes_labeling(close_figures):
from numpy.random import rand
key_set = (['male', 'female'], ['old', 'adult', 'young'],
['worker', 'unemployed'], ['yes', 'no'])
# the cartesian product of all the categories is
# the complete set of categories
keys = list(product(*key_set))
data = dict(zip(keys, rand(len(keys))))
lab = lambda k: ''.join(s[0] for s in k)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))
mosaic(data, ax=ax1, labelizer=lab, horizontal=True, label_rotation=45)
mosaic(data, ax=ax2, labelizer=lab, horizontal=False,
label_rotation=[0, 45, 90, 0])
#fig.tight_layout()
fig.suptitle("correct alignment of the axes labels")
@pytest.mark.smoke
@pytest.mark.matplotlib
def test_mosaic_empty_cells(close_figures):
# GH#2286
import pandas as pd
mydata = pd.DataFrame({'id2': {64: 'Angelica',
65: 'DXW_UID', 66: 'casuid01',
67: 'casuid01', 68: 'EC93_uid',
69: 'EC93_uid', 70: 'EC93_uid',
60: 'DXW_UID', 61: 'AtmosFox',
62: 'DXW_UID', 63: 'DXW_UID'},
'id1': {64: 'TGP',
65: 'Retention01', 66: 'default',
67: 'default', 68: 'Musa_EC_9_3',
69: 'Musa_EC_9_3', 70: 'Musa_EC_9_3',
60: 'default', 61: 'default',
62: 'default', 63: 'default'}})
ct = pd.crosstab(mydata.id1, mydata.id2)
_, vals = mosaic(ct.T.unstack())
_, vals = mosaic(mydata, ['id1','id2'])
eq = lambda x, y: assert_(np.allclose(x, y))
def test_recursive_split():
keys = list(product('mf'))
data = dict(zip(keys, [1] * len(keys)))
res = _hierarchical_split(data, gap=0)
assert_(list(res.keys()) == keys)
res[('m',)] = (0.0, 0.0, 0.5, 1.0)
res[('f',)] = (0.5, 0.0, 0.5, 1.0)
keys = list(product('mf', 'yao'))
data = dict(zip(keys, [1] * len(keys)))
res = _hierarchical_split(data, gap=0)
assert_(list(res.keys()) == keys)
res[('m', 'y')] = (0.0, 0.0, 0.5, 1 / 3)
res[('m', 'a')] = (0.0, 1 / 3, 0.5, 1 / 3)
res[('m', 'o')] = (0.0, 2 / 3, 0.5, 1 / 3)
res[('f', 'y')] = (0.5, 0.0, 0.5, 1 / 3)
res[('f', 'a')] = (0.5, 1 / 3, 0.5, 1 / 3)
res[('f', 'o')] = (0.5, 2 / 3, 0.5, 1 / 3)
def test__reduce_dict():
data = dict(zip(list(product('mf', 'oy', 'wn')), [1] * 8))
eq(_reduce_dict(data, ('m',)), 4)
eq(_reduce_dict(data, ('m', 'o')), 2)
eq(_reduce_dict(data, ('m', 'o', 'w')), 1)
data = dict(zip(list(product('mf', 'oy', 'wn')), lrange(8)))
eq(_reduce_dict(data, ('m',)), 6)
eq(_reduce_dict(data, ('m', 'o')), 1)
eq(_reduce_dict(data, ('m', 'o', 'w')), 0)
def test__key_splitting():
# subdivide starting with an empty tuple
base_rect = {tuple(): (0, 0, 1, 1)}
res = _key_splitting(base_rect, ['a', 'b'], [1, 1], tuple(), True, 0)
assert_(list(res.keys()) == [('a',), ('b',)])
eq(res[('a',)], (0, 0, 0.5, 1))
eq(res[('b',)], (0.5, 0, 0.5, 1))
# subdivide a in two sublevel
res_bis = _key_splitting(res, ['c', 'd'], [1, 1], ('a',), False, 0)
assert_(list(res_bis.keys()) == [('a', 'c'), ('a', 'd'), ('b',)])
eq(res_bis[('a', 'c')], (0.0, 0.0, 0.5, 0.5))
eq(res_bis[('a', 'd')], (0.0, 0.5, 0.5, 0.5))
eq(res_bis[('b',)], (0.5, 0, 0.5, 1))
# starting with a non empty tuple and uneven distribution
base_rect = {('total',): (0, 0, 1, 1)}
res = _key_splitting(base_rect, ['a', 'b'], [1, 2], ('total',), True, 0)
assert_(list(res.keys()) == [('total',) + (e,) for e in ['a', 'b']])
eq(res[('total', 'a')], (0, 0, 1 / 3, 1))
eq(res[('total', 'b')], (1 / 3, 0, 2 / 3, 1))
def test_proportion_normalization():
# extremes should give the whole set, as well
# as if 0 is inserted
eq(_normalize_split(0.), [0.0, 0.0, 1.0])
eq(_normalize_split(1.), [0.0, 1.0, 1.0])
eq(_normalize_split(2.), [0.0, 1.0, 1.0])
# negative values should raise ValueError
assert_raises(ValueError, _normalize_split, -1)
assert_raises(ValueError, _normalize_split, [1., -1])
assert_raises(ValueError, _normalize_split, [1., -1, 0.])
# if everything is zero it will complain
assert_raises(ValueError, _normalize_split, [0.])
assert_raises(ValueError, _normalize_split, [0., 0.])
# one-element array should return the whole interval
eq(_normalize_split([0.5]), [0.0, 1.0])
eq(_normalize_split([1.]), [0.0, 1.0])
eq(_normalize_split([2.]), [0.0, 1.0])
# simple division should give two pieces
for x in [0.3, 0.5, 0.9]:
eq(_normalize_split(x), [0., x, 1.0])
# multiple division should split as the sum of the components
for x, y in [(0.25, 0.5), (0.1, 0.8), (10., 30.)]:
eq(_normalize_split([x, y]), [0., x / (x + y), 1.0])
for x, y, z in [(1., 1., 1.), (0.1, 0.5, 0.7), (10., 30., 40)]:
eq(_normalize_split(
[x, y, z]), [0., x / (x + y + z), (x + y) / (x + y + z), 1.0])
def test_false_split():
# if you ask it to be divided in only one piece, just return the original
# one
pure_square = [0., 0., 1., 1.]
conf_h = dict(proportion=[1], gap=0.0, horizontal=True)
conf_v = dict(proportion=[1], gap=0.0, horizontal=False)
eq(_split_rect(*pure_square, **conf_h), pure_square)
eq(_split_rect(*pure_square, **conf_v), pure_square)
conf_h = dict(proportion=[1], gap=0.5, horizontal=True)
conf_v = dict(proportion=[1], gap=0.5, horizontal=False)
eq(_split_rect(*pure_square, **conf_h), pure_square)
eq(_split_rect(*pure_square, **conf_v), pure_square)
# identity on a void rectangle should not give anything strange
null_square = [0., 0., 0., 0.]
conf = dict(proportion=[1], gap=0.0, horizontal=True)
eq(_split_rect(*null_square, **conf), null_square)
conf = dict(proportion=[1], gap=1.0, horizontal=True)
eq(_split_rect(*null_square, **conf), null_square)
# splitting a negative rectangle should raise error
neg_square = [0., 0., -1., 0.]
conf = dict(proportion=[1], gap=0.0, horizontal=True)
assert_raises(ValueError, _split_rect, *neg_square, **conf)
conf = dict(proportion=[1, 1], gap=0.0, horizontal=True)
assert_raises(ValueError, _split_rect, *neg_square, **conf)
conf = dict(proportion=[1], gap=0.5, horizontal=True)
assert_raises(ValueError, _split_rect, *neg_square, **conf)
conf = dict(proportion=[1, 1], gap=0.5, horizontal=True)
assert_raises(ValueError, _split_rect, *neg_square, **conf)
def test_rect_pure_split():
pure_square = [0., 0., 1., 1.]
# division in two equal pieces from the perfect square
h_2split = [(0.0, 0.0, 0.5, 1.0), (0.5, 0.0, 0.5, 1.0)]
conf_h = dict(proportion=[1, 1], gap=0.0, horizontal=True)
eq(_split_rect(*pure_square, **conf_h), h_2split)
v_2split = [(0.0, 0.0, 1.0, 0.5), (0.0, 0.5, 1.0, 0.5)]
conf_v = dict(proportion=[1, 1], gap=0.0, horizontal=False)
eq(_split_rect(*pure_square, **conf_v), v_2split)
# division in two non-equal pieces from the perfect square
h_2split = [(0.0, 0.0, 1 / 3, 1.0), (1 / 3, 0.0, 2 / 3, 1.0)]
conf_h = dict(proportion=[1, 2], gap=0.0, horizontal=True)
eq(_split_rect(*pure_square, **conf_h), h_2split)
v_2split = [(0.0, 0.0, 1.0, 1 / 3), (0.0, 1 / 3, 1.0, 2 / 3)]
conf_v = dict(proportion=[1, 2], gap=0.0, horizontal=False)
eq(_split_rect(*pure_square, **conf_v), v_2split)
# division in three equal pieces from the perfect square
h_2split = [(0.0, 0.0, 1 / 3, 1.0), (1 / 3, 0.0, 1 / 3, 1.0), (2 / 3, 0.0,
1 / 3, 1.0)]
conf_h = dict(proportion=[1, 1, 1], gap=0.0, horizontal=True)
eq(_split_rect(*pure_square, **conf_h), h_2split)
v_2split = [(0.0, 0.0, 1.0, 1 / 3), (0.0, 1 / 3, 1.0, 1 / 3), (0.0, 2 / 3,
1.0, 1 / 3)]
conf_v = dict(proportion=[1, 1, 1], gap=0.0, horizontal=False)
eq(_split_rect(*pure_square, **conf_v), v_2split)
# division in three non-equal pieces from the perfect square
h_2split = [(0.0, 0.0, 1 / 4, 1.0), (1 / 4, 0.0, 1 / 2, 1.0), (3 / 4, 0.0,
1 / 4, 1.0)]
conf_h = dict(proportion=[1, 2, 1], gap=0.0, horizontal=True)
eq(_split_rect(*pure_square, **conf_h), h_2split)
v_2split = [(0.0, 0.0, 1.0, 1 / 4), (0.0, 1 / 4, 1.0, 1 / 2), (0.0, 3 / 4,
1.0, 1 / 4)]
conf_v = dict(proportion=[1, 2, 1], gap=0.0, horizontal=False)
eq(_split_rect(*pure_square, **conf_v), v_2split)
# splitting on a void rectangle should give multiple void
null_square = [0., 0., 0., 0.]
conf = dict(proportion=[1, 1], gap=0.0, horizontal=True)
eq(_split_rect(*null_square, **conf), [null_square, null_square])
conf = dict(proportion=[1, 2], gap=1.0, horizontal=True)
eq(_split_rect(*null_square, **conf), [null_square, null_square])
def test_rect_deformed_split():
non_pure_square = [1., -1., 1., 0.5]
# division in two equal pieces from the perfect square
h_2split = [(1.0, -1.0, 0.5, 0.5), (1.5, -1.0, 0.5, 0.5)]
conf_h = dict(proportion=[1, 1], gap=0.0, horizontal=True)
eq(_split_rect(*non_pure_square, **conf_h), h_2split)
v_2split = [(1.0, -1.0, 1.0, 0.25), (1.0, -0.75, 1.0, 0.25)]
conf_v = dict(proportion=[1, 1], gap=0.0, horizontal=False)
eq(_split_rect(*non_pure_square, **conf_v), v_2split)
# division in two non-equal pieces from the perfect square
h_2split = [(1.0, -1.0, 1 / 3, 0.5), (1 + 1 / 3, -1.0, 2 / 3, 0.5)]
conf_h = dict(proportion=[1, 2], gap=0.0, horizontal=True)
eq(_split_rect(*non_pure_square, **conf_h), h_2split)
v_2split = [(1.0, -1.0, 1.0, 1 / 6), (1.0, 1 / 6 - 1, 1.0, 2 / 6)]
conf_v = dict(proportion=[1, 2], gap=0.0, horizontal=False)
eq(_split_rect(*non_pure_square, **conf_v), v_2split)
def test_gap_split():
pure_square = [0., 0., 1., 1.]
# null split
conf_h = dict(proportion=[1], gap=1.0, horizontal=True)
eq(_split_rect(*pure_square, **conf_h), pure_square)
# equal split
h_2split = [(0.0, 0.0, 0.25, 1.0), (0.75, 0.0, 0.25, 1.0)]
conf_h = dict(proportion=[1, 1], gap=1.0, horizontal=True)
eq(_split_rect(*pure_square, **conf_h), h_2split)
# disequal split
h_2split = [(0.0, 0.0, 1 / 6, 1.0), (0.5 + 1 / 6, 0.0, 1 / 3, 1.0)]
conf_h = dict(proportion=[1, 2], gap=1.0, horizontal=True)
eq(_split_rect(*pure_square, **conf_h), h_2split)
@pytest.mark.matplotlib
def test_default_arg_index(close_figures):
# 2116
df = pd.DataFrame({'size' : ['small', 'large', 'large', 'small', 'large',
'small'],
'length' : ['long', 'short', 'short', 'long', 'long',
'short']})
assert_raises(ValueError, mosaic, data=df, title='foobar')
@pytest.mark.matplotlib
def test_missing_category(close_figures):
# GH5639
animal = ['dog', 'dog', 'dog', 'cat', 'dog', 'cat', 'cat',
'dog', 'dog', 'cat']
size = ['medium', 'large', 'medium', 'medium', 'medium', 'medium',
'large', 'large', 'large', 'small']
testdata = pd.DataFrame({'animal': animal, 'size': size})
testdata['size'] = pd.Categorical(testdata['size'],
categories=['small', 'medium', 'large'])
testdata = testdata.sort_values('size')
fig, _ = mosaic(testdata, ['animal', 'size'])
bio = BytesIO()
fig.savefig(bio, format='png')

View File

@ -0,0 +1,389 @@
import numpy as np
from numpy.testing import assert_array_less, assert_equal, assert_raises
from pandas import DataFrame, Series
import pytest
import statsmodels.api as sm
from statsmodels.graphics.regressionplots import (
abline_plot,
add_lowess,
influence_plot,
plot_added_variable,
plot_ccpr,
plot_ccpr_grid,
plot_ceres_residuals,
plot_fit,
plot_leverage_resid2,
plot_partial_residuals,
plot_partregress_grid,
plot_regress_exog,
)
try:
import matplotlib.pyplot as plt
except ImportError:
pass
pdf_output = False
if pdf_output:
from matplotlib.backends.backend_pdf import PdfPages
pdf = PdfPages("test_regressionplots.pdf")
else:
pdf = None
def close_or_save(pdf, fig):
if pdf_output:
pdf.savefig(fig)
class TestPlot:
@classmethod
def setup_class(cls):
nsample = 100
sig = 0.5
x1 = np.linspace(0, 20, nsample)
x2 = 5 + 3 * np.random.randn(nsample)
x = np.c_[x1, x2, np.sin(0.5 * x1), (x2 - 5) ** 2, np.ones(nsample)]
beta = [0.5, 0.5, 1, -0.04, 5.]
y_true = np.dot(x, beta)
y = y_true + sig * np.random.normal(size=nsample)
exog0 = sm.add_constant(np.c_[x1, x2], prepend=False)
cls.res = sm.OLS(y, exog0).fit()
cls.res_true = sm.OLS(y, x).fit()
@pytest.mark.matplotlib
def test_plot_fit(self, close_figures):
res = self.res
fig = plot_fit(res, 0, y_true=None)
x0 = res.model.exog[:, 0]
yf = res.fittedvalues
y = res.model.endog
px1, px2 = fig.axes[0].get_lines()[0].get_data()
np.testing.assert_equal(x0, px1)
np.testing.assert_equal(y, px2)
px1, px2 = fig.axes[0].get_lines()[1].get_data()
np.testing.assert_equal(x0, px1)
np.testing.assert_equal(yf, px2)
close_or_save(pdf, fig)
@pytest.mark.matplotlib
def test_plot_oth(self, close_figures):
# just test that they run
res = self.res
plot_fit(res, 0, y_true=None)
plot_partregress_grid(res, exog_idx=[0, 1])
# GH 5873
plot_partregress_grid(self.res_true, grid=(2, 3))
plot_regress_exog(res, exog_idx=0)
plot_ccpr(res, exog_idx=0)
plot_ccpr_grid(res, exog_idx=[0])
fig = plot_ccpr_grid(res, exog_idx=[0,1])
for ax in fig.axes:
add_lowess(ax)
close_or_save(pdf, fig)
@pytest.mark.matplotlib
def test_plot_influence(self, close_figures):
infl = self.res.get_influence()
fig = influence_plot(self.res)
assert_equal(isinstance(fig, plt.Figure), True)
# test that we have the correct criterion for sizes #3103
try:
sizes = fig.axes[0].get_children()[0]._sizes
ex = sm.add_constant(infl.cooks_distance[0])
ssr = sm.OLS(sizes, ex).fit().ssr
assert_array_less(ssr, 1e-12)
except AttributeError:
import warnings
warnings.warn('test not compatible with matplotlib version')
fig = influence_plot(self.res, criterion='DFFITS')
assert_equal(isinstance(fig, plt.Figure), True)
try:
sizes = fig.axes[0].get_children()[0]._sizes
ex = sm.add_constant(np.abs(infl.dffits[0]))
ssr = sm.OLS(sizes, ex).fit().ssr
assert_array_less(ssr, 1e-12)
except AttributeError:
pass
assert_raises(ValueError, influence_plot, self.res, criterion='unknown')
@pytest.mark.matplotlib
def test_plot_leverage_resid2(self, close_figures):
fig = plot_leverage_resid2(self.res)
assert_equal(isinstance(fig, plt.Figure), True)
class TestPlotPandas(TestPlot):
def setup_method(self):
nsample = 100
sig = 0.5
x1 = np.linspace(0, 20, nsample)
x2 = 5 + 3* np.random.randn(nsample)
X = np.c_[x1, x2, np.sin(0.5*x1), (x2-5)**2, np.ones(nsample)]
beta = [0.5, 0.5, 1, -0.04, 5.]
y_true = np.dot(X, beta)
y = y_true + sig * np.random.normal(size=nsample)
exog0 = sm.add_constant(np.c_[x1, x2], prepend=False)
exog0 = DataFrame(exog0, columns=["const", "var1", "var2"])
y = Series(y, name="outcome")
res = sm.OLS(y, exog0).fit()
self.res = res
data = DataFrame(exog0, columns=["const", "var1", "var2"])
data['y'] = y
self.data = data
class TestPlotFormula(TestPlotPandas):
@pytest.mark.matplotlib
def test_one_column_exog(self, close_figures):
from statsmodels.formula.api import ols
res = ols("y~var1-1", data=self.data).fit()
plot_regress_exog(res, "var1")
res = ols("y~var1", data=self.data).fit()
plot_regress_exog(res, "var1")
class TestABLine:
@classmethod
def setup_class(cls):
np.random.seed(12345)
X = sm.add_constant(np.random.normal(0, 20, size=30))
y = np.dot(X, [25, 3.5]) + np.random.normal(0, 30, size=30)
mod = sm.OLS(y,X).fit()
cls.X = X
cls.y = y
cls.mod = mod
@pytest.mark.matplotlib
def test_abline_model(self, close_figures):
fig = abline_plot(model_results=self.mod)
ax = fig.axes[0]
ax.scatter(self.X[:,1], self.y)
close_or_save(pdf, fig)
@pytest.mark.matplotlib
def test_abline_model_ax(self, close_figures):
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(self.X[:,1], self.y)
fig = abline_plot(model_results=self.mod, ax=ax)
close_or_save(pdf, fig)
@pytest.mark.matplotlib
def test_abline_ab(self, close_figures):
mod = self.mod
intercept, slope = mod.params
fig = abline_plot(intercept=intercept, slope=slope)
close_or_save(pdf, fig)
@pytest.mark.matplotlib
def test_abline_ab_ax(self, close_figures):
mod = self.mod
intercept, slope = mod.params
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(self.X[:,1], self.y)
fig = abline_plot(intercept=intercept, slope=slope, ax=ax)
close_or_save(pdf, fig)
@pytest.mark.matplotlib
def test_abline_remove(self, close_figures):
mod = self.mod
intercept, slope = mod.params
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(self.X[:,1], self.y)
abline_plot(intercept=intercept, slope=slope, ax=ax)
abline_plot(intercept=intercept, slope=2*slope, ax=ax)
lines = ax.get_lines()
lines.pop(0).remove()
close_or_save(pdf, fig)
class TestABLinePandas(TestABLine):
@classmethod
def setup_class(cls):
np.random.seed(12345)
X = sm.add_constant(np.random.normal(0, 20, size=30))
y = np.dot(X, [25, 3.5]) + np.random.normal(0, 30, size=30)
cls.X = X
cls.y = y
X = DataFrame(X, columns=["const", "someX"])
y = Series(y, name="outcome")
mod = sm.OLS(y,X).fit()
cls.mod = mod
class TestAddedVariablePlot:
@pytest.mark.matplotlib
def test_added_variable_ols(self, close_figures):
np.random.seed(3446)
n = 100
p = 3
exog = np.random.normal(size=(n, p))
lin_pred = 4 + exog[:, 0] + 0.2 * exog[:, 1]**2
endog = lin_pred + np.random.normal(size=n)
model = sm.OLS(endog, exog)
results = model.fit()
fig = plot_added_variable(results, 0)
ax = fig.get_axes()[0]
ax.set_title("Added variable plot (OLS)")
close_or_save(pdf, fig)
close_figures()
@pytest.mark.matplotlib
def test_added_variable_poisson(self, close_figures):
np.random.seed(3446)
n = 100
p = 3
exog = np.random.normal(size=(n, p))
lin_pred = 4 + exog[:, 0] + 0.2 * exog[:, 1]**2
expval = np.exp(lin_pred)
endog = np.random.poisson(expval)
model = sm.GLM(endog, exog, family=sm.families.Poisson())
results = model.fit()
for focus_col in 0, 1, 2:
for use_glm_weights in False, True:
for resid_type in "resid_deviance", "resid_response":
weight_str = ["Unweighted", "Weighted"][use_glm_weights]
# Run directly and called as a results method.
for j in 0, 1:
if j == 0:
fig = plot_added_variable(results, focus_col,
use_glm_weights=use_glm_weights,
resid_type=resid_type)
ti = "Added variable plot"
else:
fig = results.plot_added_variable(focus_col,
use_glm_weights=use_glm_weights,
resid_type=resid_type)
ti = "Added variable plot (called as method)"
ax = fig.get_axes()[0]
add_lowess(ax)
ax.set_position([0.1, 0.1, 0.8, 0.7])
effect_str = ["Linear effect, slope=1",
"Quadratic effect", "No effect"][focus_col]
ti += "\nPoisson regression\n"
ti += effect_str + "\n"
ti += weight_str + "\n"
ti += "Using '%s' residuals" % resid_type
ax.set_title(ti)
close_or_save(pdf, fig)
close_figures()
class TestPartialResidualPlot:
@pytest.mark.matplotlib
def test_partial_residual_poisson(self, close_figures):
np.random.seed(3446)
n = 100
p = 3
exog = np.random.normal(size=(n, p))
exog[:, 0] = 1
lin_pred = 4 + exog[:, 1] + 0.2*exog[:, 2]**2
expval = np.exp(lin_pred)
endog = np.random.poisson(expval)
model = sm.GLM(endog, exog, family=sm.families.Poisson())
results = model.fit()
for focus_col in 1, 2:
for j in 0,1:
if j == 0:
fig = plot_partial_residuals(results, focus_col)
else:
fig = results.plot_partial_residuals(focus_col)
ax = fig.get_axes()[0]
add_lowess(ax)
ax.set_position([0.1, 0.1, 0.8, 0.77])
effect_str = ["Intercept", "Linear effect, slope=1",
"Quadratic effect"][focus_col]
ti = "Partial residual plot"
if j == 1:
ti += " (called as method)"
ax.set_title(ti + "\nPoisson regression\n" +
effect_str)
close_or_save(pdf, fig)
class TestCERESPlot:
@pytest.mark.matplotlib
def test_ceres_poisson(self, close_figures):
np.random.seed(3446)
n = 100
p = 3
exog = np.random.normal(size=(n, p))
exog[:, 0] = 1
lin_pred = 4 + exog[:, 1] + 0.2*exog[:, 2]**2
expval = np.exp(lin_pred)
endog = np.random.poisson(expval)
model = sm.GLM(endog, exog, family=sm.families.Poisson())
results = model.fit()
for focus_col in 1, 2:
for j in 0, 1:
if j == 0:
fig = plot_ceres_residuals(results, focus_col)
else:
fig = results.plot_ceres_residuals(focus_col)
ax = fig.get_axes()[0]
add_lowess(ax)
ax.set_position([0.1, 0.1, 0.8, 0.77])
effect_str = ["Intercept", "Linear effect, slope=1",
"Quadratic effect"][focus_col]
ti = "CERES plot"
if j == 1:
ti += " (called as method)"
ax.set_title(ti + "\nPoisson regression\n" +
effect_str)
close_or_save(pdf, fig)
@pytest.mark.matplotlib
def test_partregress_formula_env():
# test that user function in formulas work, see #7672
@np.vectorize
def lg(x):
return np.log10(x) if x > 0 else 0
df = DataFrame(
dict(
a=np.random.random(size=10),
b=np.random.random(size=10),
c=np.random.random(size=10),
)
)
sm.graphics.plot_partregress(
"a", "lg(b)", ["c"], obs_labels=False, data=df, eval_env=1)
sm.graphics.plot_partregress(
"a", "lg(b)", ["c"], obs_labels=False, data=df)

View File

@ -0,0 +1,390 @@
from statsmodels.compat.pandas import MONTH_END
from statsmodels.compat.python import lmap
import calendar
from io import BytesIO
import locale
import numpy as np
from numpy.testing import assert_, assert_equal
import pandas as pd
import pytest
from statsmodels.datasets import elnino, macrodata
from statsmodels.graphics.tsaplots import (
month_plot,
plot_accf_grid,
plot_acf,
plot_ccf,
plot_pacf,
plot_predict,
quarter_plot,
seasonal_plot,
)
from statsmodels.tsa import arima_process as tsp
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.arima.model import ARIMA
try:
from matplotlib import pyplot as plt
except ImportError:
pass
@pytest.mark.matplotlib
def test_plot_acf(close_figures):
# Just test that it runs.
fig = plt.figure()
ax = fig.add_subplot(111)
ar = np.r_[1.0, -0.9]
ma = np.r_[1.0, 0.9]
armaprocess = tsp.ArmaProcess(ar, ma)
rs = np.random.RandomState(1234)
acf = armaprocess.generate_sample(100, distrvs=rs.standard_normal)
plot_acf(acf, ax=ax, lags=10)
plot_acf(acf, ax=ax)
plot_acf(acf, ax=ax, alpha=None)
@pytest.mark.matplotlib
def test_plot_acf_irregular(close_figures):
# Just test that it runs.
fig = plt.figure()
ax = fig.add_subplot(111)
ar = np.r_[1.0, -0.9]
ma = np.r_[1.0, 0.9]
armaprocess = tsp.ArmaProcess(ar, ma)
rs = np.random.RandomState(1234)
acf = armaprocess.generate_sample(100, distrvs=rs.standard_normal)
plot_acf(acf, ax=ax, lags=np.arange(1, 11))
plot_acf(acf, ax=ax, lags=10, zero=False)
plot_acf(acf, ax=ax, alpha=None, zero=False)
@pytest.mark.matplotlib
def test_plot_pacf(close_figures):
# Just test that it runs.
fig = plt.figure()
ax = fig.add_subplot(111)
ar = np.r_[1.0, -0.9]
ma = np.r_[1.0, 0.9]
armaprocess = tsp.ArmaProcess(ar, ma)
rs = np.random.RandomState(1234)
pacf = armaprocess.generate_sample(100, distrvs=rs.standard_normal)
plot_pacf(pacf, ax=ax)
plot_pacf(pacf, ax=ax, alpha=None)
@pytest.mark.matplotlib
def test_plot_pacf_kwargs(close_figures):
# Just test that it runs.
fig = plt.figure()
ax = fig.add_subplot(111)
ar = np.r_[1.0, -0.9]
ma = np.r_[1.0, 0.9]
armaprocess = tsp.ArmaProcess(ar, ma)
rs = np.random.RandomState(1234)
pacf = armaprocess.generate_sample(100, distrvs=rs.standard_normal)
buff = BytesIO()
plot_pacf(pacf, ax=ax)
fig.savefig(buff, format="rgba")
buff_linestyle = BytesIO()
fig_linestyle = plt.figure()
ax = fig_linestyle.add_subplot(111)
plot_pacf(pacf, ax=ax, ls="-")
fig_linestyle.savefig(buff_linestyle, format="rgba")
buff_with_vlines = BytesIO()
fig_with_vlines = plt.figure()
ax = fig_with_vlines.add_subplot(111)
vlines_kwargs = {"linestyles": "dashdot"}
plot_pacf(pacf, ax=ax, vlines_kwargs=vlines_kwargs)
fig_with_vlines.savefig(buff_with_vlines, format="rgba")
buff.seek(0)
buff_linestyle.seek(0)
buff_with_vlines.seek(0)
plain = buff.read()
linestyle = buff_linestyle.read()
with_vlines = buff_with_vlines.read()
assert_(plain != linestyle)
assert_(with_vlines != plain)
assert_(linestyle != with_vlines)
@pytest.mark.matplotlib
def test_plot_acf_kwargs(close_figures):
# Just test that it runs.
fig = plt.figure()
ax = fig.add_subplot(111)
ar = np.r_[1.0, -0.9]
ma = np.r_[1.0, 0.9]
armaprocess = tsp.ArmaProcess(ar, ma)
rs = np.random.RandomState(1234)
acf = armaprocess.generate_sample(100, distrvs=rs.standard_normal)
buff = BytesIO()
plot_acf(acf, ax=ax)
fig.savefig(buff, format="rgba")
buff_with_vlines = BytesIO()
fig_with_vlines = plt.figure()
ax = fig_with_vlines.add_subplot(111)
vlines_kwargs = {"linestyles": "dashdot"}
plot_acf(acf, ax=ax, vlines_kwargs=vlines_kwargs)
fig_with_vlines.savefig(buff_with_vlines, format="rgba")
buff.seek(0)
buff_with_vlines.seek(0)
plain = buff.read()
with_vlines = buff_with_vlines.read()
assert_(with_vlines != plain)
@pytest.mark.matplotlib
def test_plot_acf_missing(close_figures):
# Just test that it runs.
fig = plt.figure()
ax = fig.add_subplot(111)
ar = np.r_[1.0, -0.9]
ma = np.r_[1.0, 0.9]
armaprocess = tsp.ArmaProcess(ar, ma)
rs = np.random.RandomState(1234)
acf = armaprocess.generate_sample(100, distrvs=rs.standard_normal)
acf[::13] = np.nan
buff = BytesIO()
plot_acf(acf, ax=ax, missing="drop")
fig.savefig(buff, format="rgba")
buff.seek(0)
fig = plt.figure()
ax = fig.add_subplot(111)
buff_conservative = BytesIO()
plot_acf(acf, ax=ax, missing="conservative")
fig.savefig(buff_conservative, format="rgba")
buff_conservative.seek(0)
assert_(buff.read() != buff_conservative.read())
@pytest.mark.matplotlib
def test_plot_pacf_irregular(close_figures):
# Just test that it runs.
fig = plt.figure()
ax = fig.add_subplot(111)
ar = np.r_[1.0, -0.9]
ma = np.r_[1.0, 0.9]
armaprocess = tsp.ArmaProcess(ar, ma)
rs = np.random.RandomState(1234)
pacf = armaprocess.generate_sample(100, distrvs=rs.standard_normal)
plot_pacf(pacf, ax=ax, lags=np.arange(1, 11))
plot_pacf(pacf, ax=ax, lags=10, zero=False)
plot_pacf(pacf, ax=ax, alpha=None, zero=False)
@pytest.mark.matplotlib
def test_plot_ccf(close_figures):
# Just test that it runs.
fig = plt.figure()
ax = fig.add_subplot(111)
ar = np.r_[1.0, -0.9]
ma = np.r_[1.0, 0.9]
armaprocess = tsp.ArmaProcess(ar, ma)
rs = np.random.RandomState(1234)
x1 = armaprocess.generate_sample(100, distrvs=rs.standard_normal)
x2 = armaprocess.generate_sample(100, distrvs=rs.standard_normal)
plot_ccf(x1, x2)
plot_ccf(x1, x2, ax=ax, lags=10)
plot_ccf(x1, x2, ax=ax)
plot_ccf(x1, x2, ax=ax, alpha=None)
plot_ccf(x1, x2, ax=ax, negative_lags=True)
plot_ccf(x1, x2, ax=ax, adjusted=True)
plot_ccf(x1, x2, ax=ax, fft=True)
plot_ccf(x1, x2, ax=ax, title='CCF')
plot_ccf(x1, x2, ax=ax, auto_ylims=True)
plot_ccf(x1, x2, ax=ax, use_vlines=False)
@pytest.mark.matplotlib
def test_plot_accf_grid(close_figures):
# Just test that it runs.
fig = plt.figure()
ar = np.r_[1.0, -0.9]
ma = np.r_[1.0, 0.9]
armaprocess = tsp.ArmaProcess(ar, ma)
rs = np.random.RandomState(1234)
x = np.vstack([
armaprocess.generate_sample(100, distrvs=rs.standard_normal),
armaprocess.generate_sample(100, distrvs=rs.standard_normal),
]).T
plot_accf_grid(x)
plot_accf_grid(pd.DataFrame({'x': x[:, 0], 'y': x[:, 1]}))
plot_accf_grid(x, fig=fig, lags=10)
plot_accf_grid(x, fig=fig)
plot_accf_grid(x, fig=fig, negative_lags=False)
plot_accf_grid(x, fig=fig, alpha=None)
plot_accf_grid(x, fig=fig, adjusted=True)
plot_accf_grid(x, fig=fig, fft=True)
plot_accf_grid(x, fig=fig, auto_ylims=True)
plot_accf_grid(x, fig=fig, use_vlines=False)
@pytest.mark.matplotlib
def test_plot_month(close_figures):
dta = elnino.load_pandas().data
dta["YEAR"] = dta.YEAR.astype(int).apply(str)
dta = dta.set_index("YEAR").T.unstack()
dates = pd.to_datetime(
["-".join([x[1], x[0]]) for x in dta.index.values], format="%b-%Y"
)
# test dates argument
fig = month_plot(dta.values, dates=dates, ylabel="el nino")
# test with a TimeSeries DatetimeIndex with no freq
dta.index = pd.DatetimeIndex(dates)
fig = month_plot(dta)
# w freq
dta.index = pd.DatetimeIndex(dates, freq="MS")
fig = month_plot(dta)
# test with a TimeSeries PeriodIndex
dta.index = pd.PeriodIndex(dates, freq="M")
fig = month_plot(dta)
# test localized xlabels
try:
with calendar.different_locale("DE_de"):
fig = month_plot(dta)
labels = [_.get_text() for _ in fig.axes[0].get_xticklabels()]
expected = [
"Jan",
"Feb",
("Mär", "Mrz"),
"Apr",
"Mai",
"Jun",
"Jul",
"Aug",
"Sep",
"Okt",
"Nov",
"Dez",
]
for lbl, exp in zip(labels, expected):
if isinstance(exp, tuple):
assert lbl in exp
else:
assert lbl == exp
except locale.Error:
pytest.xfail(reason="Failure due to unsupported locale")
@pytest.mark.matplotlib
def test_plot_quarter(close_figures):
dta = macrodata.load_pandas().data
dates = lmap(
"-Q".join,
zip(
dta.year.astype(int).apply(str), dta.quarter.astype(int).apply(str)
),
)
# test dates argument
quarter_plot(dta.unemp.values, dates)
# test with a DatetimeIndex with no freq
dta.set_index(pd.DatetimeIndex(dates, freq="QS-OCT"), inplace=True)
quarter_plot(dta.unemp)
# w freq
# see pandas #6631
dta.index = pd.DatetimeIndex(dates, freq="QS-OCT")
quarter_plot(dta.unemp)
# w PeriodIndex
dta.index = pd.PeriodIndex(dates, freq="Q")
quarter_plot(dta.unemp)
@pytest.mark.matplotlib
def test_seasonal_plot(close_figures):
rs = np.random.RandomState(1234)
data = rs.randn(20, 12)
data += 6 * np.sin(np.arange(12.0) / 11 * np.pi)[None, :]
data = data.ravel()
months = np.tile(np.arange(1, 13), (20, 1))
months = months.ravel()
df = pd.DataFrame([data, months], index=["data", "months"]).T
grouped = df.groupby("months")["data"]
labels = [
"Jan",
"Feb",
"Mar",
"Apr",
"May",
"Jun",
"Jul",
"Aug",
"Sep",
"Oct",
"Nov",
"Dec",
]
fig = seasonal_plot(grouped, labels)
ax = fig.get_axes()[0]
output = [tl.get_text() for tl in ax.get_xticklabels()]
assert_equal(labels, output)
@pytest.mark.matplotlib
@pytest.mark.parametrize(
"model_and_args",
[(AutoReg, dict(lags=2, old_names=False)), (ARIMA, dict(order=(2, 0, 0)))],
)
@pytest.mark.parametrize("use_pandas", [True, False])
@pytest.mark.parametrize("alpha", [None, 0.10])
def test_predict_plot(use_pandas, model_and_args, alpha):
model, kwargs = model_and_args
rs = np.random.RandomState(0)
y = rs.standard_normal(1000)
for i in range(2, 1000):
y[i] += 1.8 * y[i - 1] - 0.9 * y[i - 2]
y = y[100:]
if use_pandas:
index = pd.date_range(
"1960-1-1", freq=MONTH_END, periods=y.shape[0] + 24
)
start = index[index.shape[0] // 2]
end = index[-1]
y = pd.Series(y, index=index[:-24])
else:
start = y.shape[0] // 2
end = y.shape[0] + 24
res = model(y, **kwargs).fit()
fig = plot_predict(res, start, end, alpha=alpha)
assert isinstance(fig, plt.Figure)
@pytest.mark.matplotlib
def test_plot_pacf_small_sample():
idx = [pd.Timestamp.now() + pd.Timedelta(seconds=i) for i in range(10)]
df = pd.DataFrame(
index=idx,
columns=["a"],
data=list(range(10))
)
plot_pacf(df)