reconnect moved files to git repo
This commit is contained in:
@ -0,0 +1,5 @@
|
||||
__all__ = ['dentonm', 'test']
|
||||
from .denton import dentonm
|
||||
from statsmodels.tools._test_runner import PytestTester
|
||||
|
||||
test = PytestTester()
|
||||
Binary file not shown.
Binary file not shown.
@ -0,0 +1,311 @@
|
||||
import numpy as np
|
||||
from numpy import (dot, eye, diag_indices, zeros, ones, diag,
|
||||
asarray, r_)
|
||||
from numpy.linalg import solve
|
||||
|
||||
|
||||
# def denton(indicator, benchmark, freq="aq", **kwarg):
|
||||
# """
|
||||
# Denton's method to convert low-frequency to high frequency data.
|
||||
#
|
||||
# Parameters
|
||||
# ----------
|
||||
# benchmark : array_like
|
||||
# The higher frequency benchmark. A 1d or 2d data series in columns.
|
||||
# If 2d, then M series are assumed.
|
||||
# indicator
|
||||
# A low-frequency indicator series. It is assumed that there are no
|
||||
# pre-sample indicators. Ie., the first indicators line up with
|
||||
# the first benchmark.
|
||||
# freq : str {"aq","qm", "other"}
|
||||
# "aq" - Benchmarking an annual series to quarterly.
|
||||
# "mq" - Benchmarking a quarterly series to monthly.
|
||||
# "other" - Custom stride. A kwarg, k, must be supplied.
|
||||
# kwargs :
|
||||
# k : int
|
||||
# The number of high-frequency observations that sum to make an
|
||||
# aggregate low-frequency observation. `k` is used with
|
||||
# `freq` == "other".
|
||||
# Returns
|
||||
# -------
|
||||
# benchmarked series : ndarray
|
||||
#
|
||||
# Notes
|
||||
# -----
|
||||
# Denton's method minimizes the distance given by the penalty function, in
|
||||
# a least squares sense, between the unknown benchmarked series and the
|
||||
# indicator series subject to the condition that the sum of the benchmarked
|
||||
# series is equal to the benchmark.
|
||||
#
|
||||
#
|
||||
# References
|
||||
# ----------
|
||||
# Bloem, A.M, Dippelsman, R.J. and Maehle, N.O. 2001 Quarterly National
|
||||
# Accounts Manual--Concepts, Data Sources, and Compilation. IMF.
|
||||
# http://www.imf.org/external/pubs/ft/qna/2000/Textbook/index.htm
|
||||
# Denton, F.T. 1971. "Adjustment of monthly or quarterly series to annual
|
||||
# totals: an approach based on quadratic minimization." Journal of the
|
||||
# American Statistical Association. 99-102.
|
||||
#
|
||||
# """
|
||||
# # check arrays and make 2d
|
||||
# indicator = np.asarray(indicator)
|
||||
# if indicator.ndim == 1:
|
||||
# indicator = indicator[:,None]
|
||||
# benchmark = np.asarray(benchmark)
|
||||
# if benchmark.ndim == 1:
|
||||
# benchmark = benchmark[:,None]
|
||||
#
|
||||
# # get dimensions
|
||||
# N = len(indicator) # total number of high-freq
|
||||
# m = len(benchmark) # total number of low-freq
|
||||
#
|
||||
# # number of low-freq observations for aggregate measure
|
||||
# # 4 for annual to quarter and 3 for quarter to monthly
|
||||
# if freq == "aq":
|
||||
# k = 4
|
||||
# elif freq == "qm":
|
||||
# k = 3
|
||||
# elif freq == "other":
|
||||
# k = kwargs.get("k")
|
||||
# if not k:
|
||||
# raise ValueError("k must be supplied with freq=\"other\"")
|
||||
# else:
|
||||
# raise ValueError("freq %s not understood" % freq)
|
||||
#
|
||||
# n = k*m # number of indicator series with a benchmark for back-series
|
||||
# # if k*m != n, then we are going to extrapolate q observations
|
||||
#
|
||||
# B = block_diag(*(np.ones((k,1)),)*m)
|
||||
#
|
||||
# r = benchmark - B.T.dot(indicator)
|
||||
#TODO: take code in the string at the end and implement Denton's original
|
||||
# method with a few of the penalty functions.
|
||||
|
||||
|
||||
def dentonm(indicator, benchmark, freq="aq", **kwargs):
|
||||
"""
|
||||
Modified Denton's method to convert low-frequency to high-frequency data.
|
||||
|
||||
Uses proportionate first-differences as the penalty function. See notes.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
indicator : array_like
|
||||
A low-frequency indicator series. It is assumed that there are no
|
||||
pre-sample indicators. Ie., the first indicators line up with
|
||||
the first benchmark.
|
||||
benchmark : array_like
|
||||
The higher frequency benchmark. A 1d or 2d data series in columns.
|
||||
If 2d, then M series are assumed.
|
||||
freq : str {"aq","qm", "other"}
|
||||
The frequency to use in the conversion.
|
||||
|
||||
* "aq" - Benchmarking an annual series to quarterly.
|
||||
* "mq" - Benchmarking a quarterly series to monthly.
|
||||
* "other" - Custom stride. A kwarg, k, must be supplied.
|
||||
**kwargs
|
||||
Additional keyword argument. For example:
|
||||
|
||||
* k, an int, the number of high-frequency observations that sum to make
|
||||
an aggregate low-frequency observation. `k` is used with
|
||||
`freq` == "other".
|
||||
|
||||
Returns
|
||||
-------
|
||||
transformed : ndarray
|
||||
The transformed series.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> indicator = [50,100,150,100] * 5
|
||||
>>> benchmark = [500,400,300,400,500]
|
||||
>>> benchmarked = dentonm(indicator, benchmark, freq="aq")
|
||||
|
||||
Notes
|
||||
-----
|
||||
Denton's method minimizes the distance given by the penalty function, in
|
||||
a least squares sense, between the unknown benchmarked series and the
|
||||
indicator series subject to the condition that the sum of the benchmarked
|
||||
series is equal to the benchmark. The modification allows that the first
|
||||
value not be pre-determined as is the case with Denton's original method.
|
||||
If the there is no benchmark provided for the last few indicator
|
||||
observations, then extrapolation is performed using the last
|
||||
benchmark-indicator ratio of the previous period.
|
||||
|
||||
Minimizes sum((X[t]/I[t] - X[t-1]/I[t-1])**2)
|
||||
|
||||
s.t.
|
||||
|
||||
sum(X) = A, for each period. Where X is the benchmarked series, I is
|
||||
the indicator, and A is the benchmark.
|
||||
|
||||
References
|
||||
----------
|
||||
Bloem, A.M, Dippelsman, R.J. and Maehle, N.O. 2001 Quarterly National
|
||||
Accounts Manual--Concepts, Data Sources, and Compilation. IMF.
|
||||
http://www.imf.org/external/pubs/ft/qna/2000/Textbook/index.htm
|
||||
Cholette, P. 1988. "Benchmarking systems of socio-economic time series."
|
||||
Statistics Canada, Time Series Research and Analysis Division,
|
||||
Working Paper No TSRA-88-017E.
|
||||
Denton, F.T. 1971. "Adjustment of monthly or quarterly series to annual
|
||||
totals: an approach based on quadratic minimization." Journal of the
|
||||
American Statistical Association. 99-102.
|
||||
"""
|
||||
# penalty : str
|
||||
# Penalty function. Can be "D1", "D2", "D3", "D4", "D5".
|
||||
# X is the benchmarked series and I is the indicator.
|
||||
# D1 - sum((X[t] - X[t-1]) - (I[t] - I[ti-1])**2)
|
||||
# D2 - sum((ln(X[t]/X[t-1]) - ln(I[t]/I[t-1]))**2)
|
||||
# D3 - sum((X[t]/X[t-1] / I[t]/I[t-1])**2)
|
||||
# D4 - sum((X[t]/I[t] - X[t-1]/I[t-1])**2)
|
||||
# D5 - sum((X[t]/I[t] / X[t-1]/I[t-1] - 1)**2)
|
||||
#NOTE: only D4 is the only one implemented, see IMF chapter 6.
|
||||
|
||||
# check arrays and make 2d
|
||||
indicator = asarray(indicator)
|
||||
if indicator.ndim == 1:
|
||||
indicator = indicator[:,None]
|
||||
benchmark = asarray(benchmark)
|
||||
if benchmark.ndim == 1:
|
||||
benchmark = benchmark[:,None]
|
||||
|
||||
# get dimensions
|
||||
N = len(indicator) # total number of high-freq
|
||||
m = len(benchmark) # total number of low-freq
|
||||
|
||||
# number of low-freq observations for aggregate measure
|
||||
# 4 for annual to quarter and 3 for quarter to monthly
|
||||
if freq == "aq":
|
||||
k = 4
|
||||
elif freq == "qm":
|
||||
k = 3
|
||||
elif freq == "other":
|
||||
k = kwargs.get("k")
|
||||
if not k:
|
||||
raise ValueError("k must be supplied with freq=\"other\"")
|
||||
else:
|
||||
raise ValueError("freq %s not understood" % freq)
|
||||
|
||||
n = k*m # number of indicator series with a benchmark for back-series
|
||||
# if k*m != n, then we are going to extrapolate q observations
|
||||
if N > n:
|
||||
q = N - n
|
||||
else:
|
||||
q = 0
|
||||
|
||||
# make the aggregator matrix
|
||||
#B = block_diag(*(ones((k,1)),)*m)
|
||||
B = np.kron(np.eye(m), ones((k,1)))
|
||||
|
||||
# following the IMF paper, we can do
|
||||
Zinv = diag(1./indicator.squeeze()[:n])
|
||||
# this is D in Denton's notation (not using initial value correction)
|
||||
# D = eye(n)
|
||||
# make off-diagonal = -1
|
||||
# D[((np.diag_indices(n)[0])[:-1]+1,(np.diag_indices(n)[1])[:-1])] = -1
|
||||
# account for starting conditions
|
||||
# H = D[1:,:]
|
||||
# HTH = dot(H.T,H)
|
||||
# just make HTH
|
||||
HTH = eye(n)
|
||||
diag_idx0, diag_idx1 = diag_indices(n)
|
||||
HTH[diag_idx0[1:-1], diag_idx1[1:-1]] += 1
|
||||
HTH[diag_idx0[:-1]+1, diag_idx1[:-1]] = -1
|
||||
HTH[diag_idx0[:-1], diag_idx1[:-1]+1] = -1
|
||||
|
||||
W = dot(dot(Zinv,HTH),Zinv)
|
||||
|
||||
# make partitioned matrices
|
||||
# TODO: break this out so that we can simplify the linalg?
|
||||
I = zeros((n+m, n+m)) # noqa:E741
|
||||
I[:n,:n] = W
|
||||
I[:n,n:] = B
|
||||
I[n:,:n] = B.T
|
||||
|
||||
A = zeros((m+n,1)) # zero first-order constraints
|
||||
A[-m:] = benchmark # adding up constraints
|
||||
X = solve(I,A)
|
||||
X = X[:-m] # drop the lagrange multipliers
|
||||
|
||||
# handle extrapolation
|
||||
if q > 0:
|
||||
# get last Benchmark-Indicator ratio
|
||||
bi = X[n-1]/indicator[n-1]
|
||||
extrapolated = bi * indicator[n:]
|
||||
X = r_[X,extrapolated]
|
||||
|
||||
return X.squeeze()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
#these will be the tests
|
||||
# from IMF paper
|
||||
|
||||
# quarterly data
|
||||
indicator = np.array([98.2, 100.8, 102.2, 100.8, 99.0, 101.6,
|
||||
102.7, 101.5, 100.5, 103.0, 103.5, 101.5])
|
||||
# two annual observations
|
||||
benchmark = np.array([4000.,4161.4])
|
||||
x_imf = dentonm(indicator, benchmark, freq="aq")
|
||||
|
||||
imf_stata = np.array([969.8, 998.4, 1018.3, 1013.4, 1007.2, 1042.9,
|
||||
1060.3, 1051.0, 1040.6, 1066.5, 1071.7, 1051.0])
|
||||
np.testing.assert_almost_equal(imf_stata, x_imf, 1)
|
||||
|
||||
# Denton example
|
||||
zQ = np.array([50,100,150,100] * 5)
|
||||
Y = np.array([500,400,300,400,500])
|
||||
x_denton = dentonm(zQ, Y, freq="aq")
|
||||
x_stata = np.array([64.334796,127.80616,187.82379,120.03526,56.563894,
|
||||
105.97568,147.50144,89.958987,40.547201,74.445963,
|
||||
108.34473,76.66211,42.763347,94.14664,153.41596,
|
||||
109.67405,58.290761,122.62556,190.41409,128.66959])
|
||||
|
||||
|
||||
"""
|
||||
# Examples from the Denton 1971 paper
|
||||
k = 4
|
||||
m = 5
|
||||
n = m*k
|
||||
|
||||
zQ = [50,100,150,100] * m
|
||||
Y = [500,400,300,400,500]
|
||||
|
||||
A = np.eye(n)
|
||||
B = block_diag(*(np.ones((k,1)),)*m)
|
||||
|
||||
r = Y - B.T.dot(zQ)
|
||||
#Ainv = inv(A)
|
||||
Ainv = A # shortcut for identity
|
||||
C = Ainv.dot(B).dot(inv(B.T.dot(Ainv).dot(B)))
|
||||
x = zQ + C.dot(r)
|
||||
|
||||
# minimize first difference d(x-z)
|
||||
R = linalg.tri(n, dtype=float) # R is tril so actually R.T in paper
|
||||
Ainv = R.dot(R.T)
|
||||
C = Ainv.dot(B).dot(inv(B.T.dot(Ainv).dot(B)))
|
||||
x1 = zQ + C.dot(r)
|
||||
|
||||
# minimize the second difference d**2(x-z)
|
||||
Ainv = R.dot(Ainv).dot(R.T)
|
||||
C = Ainv.dot(B).dot(inv(B.T.dot(Ainv).dot(B)))
|
||||
x12 = zQ + C.dot(r)
|
||||
|
||||
|
||||
# # do it proportionately (x-z)/z
|
||||
Z = np.diag(zQ)
|
||||
Ainv = np.eye(n)
|
||||
C = Z.dot(Ainv).dot(Z).dot(B).dot(inv(B.T.dot(Z).dot(Ainv).dot(Z).dot(B)))
|
||||
x11 = zQ + C.dot(r)
|
||||
|
||||
# do it proportionately with differencing d((x-z)/z)
|
||||
Ainv = R.dot(R.T)
|
||||
C = Z.dot(Ainv).dot(Z).dot(B).dot(inv(B.T.dot(Z).dot(Ainv).dot(Z).dot(B)))
|
||||
x111 = zQ + C.dot(r)
|
||||
|
||||
x_stata = np.array([64.334796,127.80616,187.82379,120.03526,56.563894,
|
||||
105.97568,147.50144,89.958987,40.547201,74.445963,
|
||||
108.34473,76.66211,42.763347,94.14664,153.41596,
|
||||
109.67405,58.290761,122.62556,190.41409,128.66959])
|
||||
"""
|
||||
Binary file not shown.
Binary file not shown.
@ -0,0 +1,30 @@
|
||||
import numpy as np
|
||||
from statsmodels.tsa.interp import dentonm
|
||||
|
||||
|
||||
def test_denton_quarterly():
|
||||
# Data and results taken from IMF paper
|
||||
indicator = np.array([98.2, 100.8, 102.2, 100.8, 99.0, 101.6,
|
||||
102.7, 101.5, 100.5, 103.0, 103.5, 101.5])
|
||||
benchmark = np.array([4000.,4161.4])
|
||||
x_imf = dentonm(indicator, benchmark, freq="aq")
|
||||
imf_stata = np.array([969.8, 998.4, 1018.3, 1013.4, 1007.2, 1042.9,
|
||||
1060.3, 1051.0, 1040.6, 1066.5, 1071.7, 1051.0])
|
||||
np.testing.assert_almost_equal(imf_stata, x_imf, 1)
|
||||
|
||||
|
||||
def test_denton_quarterly2():
|
||||
# Test denton vs stata. Higher precision than other test.
|
||||
zQ = np.array([50,100,150,100] * 5)
|
||||
Y = np.array([500,400,300,400,500])
|
||||
x_denton = dentonm(zQ, Y, freq="aq")
|
||||
x_stata = np.array([64.334796,127.80616,187.82379,120.03526,56.563894,
|
||||
105.97568,147.50144,89.958987,40.547201,74.445963,
|
||||
108.34473,76.66211,42.763347,94.14664,153.41596,
|
||||
109.67405,58.290761,122.62556,190.41409,128.66959])
|
||||
np.testing.assert_almost_equal(x_denton, x_stata, 5)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import pytest
|
||||
pytest.main([__file__, '-vvs', '-x', '--pdb'])
|
||||
Reference in New Issue
Block a user