reconnect moved files to git repo
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,294 @@
|
||||
from numpy import array
|
||||
|
||||
from statsmodels.tools.testing import Holder
|
||||
|
||||
|
||||
data = Holder()
|
||||
data.comment = 'generated data, divide by 1000'
|
||||
data.name = 'data'
|
||||
data.xo = array([
|
||||
[-419, -731, -1306, -1294],
|
||||
[6, 529, -200, -437],
|
||||
[-27, -833, -6, -564],
|
||||
[-304, -273, -502, -739],
|
||||
[1377, -912, 927, 280],
|
||||
[-375, -517, -514, 49],
|
||||
[247, -504, 123, -259],
|
||||
[712, 534, -773, 286],
|
||||
[195, -1080, 3256, -178],
|
||||
[-854, 75, -706, -1084],
|
||||
[-1219, -612, -15, -203],
|
||||
[550, -628, -483, -2686],
|
||||
[-365, 1376, -1266, 317],
|
||||
[-489, 544, -195, 431],
|
||||
[-656, 854, 840, -723],
|
||||
[16, -1385, -880, -460],
|
||||
[258, -2252, 96, 54],
|
||||
[2049, -750, -1115, 381],
|
||||
[-65, 280, -777, 416],
|
||||
[755, 82, -806, 1027],
|
||||
[-39, -170, -2134, 743],
|
||||
[-859, 780, 746, -133],
|
||||
[762, 252, -450, -459],
|
||||
[-941, -202, 49, -202],
|
||||
[-54, 115, 455, 388],
|
||||
[-1348, 1246, 1430, -480],
|
||||
[229, -535, -1831, 1524],
|
||||
[-651, -167, 2116, 483],
|
||||
[-1249, -1373, 888, -1092],
|
||||
[-75, -2162, 486, -496],
|
||||
[2436, -1627, -1069, 162],
|
||||
[-63, 560, -601, 587],
|
||||
[-60, 1051, -277, 1323],
|
||||
[1329, -1294, 68, 5],
|
||||
[1532, -633, -923, 696],
|
||||
[669, 895, -1762, -375],
|
||||
[1129, -548, 2064, 609],
|
||||
[1320, 573, 2119, 270],
|
||||
[-213, -412, -2517, 1685],
|
||||
[73, -979, 1312, -1220],
|
||||
[-1360, -2107, -237, 1522],
|
||||
[-645, 205, -543, -169],
|
||||
[-212, 1072, 543, -128],
|
||||
[-352, -129, -605, -904],
|
||||
[511, 85, 167, -1914],
|
||||
[1515, 1862, 942, 1622],
|
||||
[-465, 623, -495, -89],
|
||||
[-1396, -979, 1758, 128],
|
||||
[-255, -47, 980, 501],
|
||||
[-1282, -58, -49, -610],
|
||||
[-889, -1177, -492, 494],
|
||||
[1415, 1146, 696, -722],
|
||||
[1237, -224, -1609, -64],
|
||||
[-528, -1625, 231, 883],
|
||||
[-327, 1636, -476, -361],
|
||||
[-781, 793, 1882, 234],
|
||||
[-506, -561, 1988, -810],
|
||||
[-1233, 1467, -261, 2164],
|
||||
[53, 1069, 824, 2123],
|
||||
[-1200, -441, -321, 339],
|
||||
[1606, 298, -995, 1292],
|
||||
[-1740, -672, -1628, -129],
|
||||
[-1450, -354, 224, -657],
|
||||
[-2556, 1006, -706, -1453],
|
||||
[-717, -463, 345, -1821],
|
||||
[1056, -38, -420, -455],
|
||||
[-523, 565, 425, 1138],
|
||||
[-1030, -187, 683, 78],
|
||||
[-214, -312, -1171, -528],
|
||||
[819, 736, -265, 423],
|
||||
[1339, 351, 1142, 579],
|
||||
[-387, -126, -1573, 2346],
|
||||
[969, 2, 327, -134],
|
||||
[163, 227, 90, 2021],
|
||||
[1022, -1076, 174, 304],
|
||||
[1042, 1317, 311, 880],
|
||||
[2018, -840, 295, 2651],
|
||||
[-277, 566, 1147, -189],
|
||||
[20, 467, 1262, 263],
|
||||
[-663, 1061, -1552, -1159],
|
||||
[1830, 391, 2534, -199],
|
||||
[-487, 752, -1061, 351],
|
||||
[-2138, -556, -367, -457],
|
||||
[-868, -411, -559, 726],
|
||||
[1770, 819, -892, -363],
|
||||
[553, -736, -169, -490],
|
||||
[388, -503, 809, -821],
|
||||
[-516, -1452, -192, 483],
|
||||
[493, 2904, 1318, 2591],
|
||||
[175, 584, -1001, 1675],
|
||||
[1316, -1596, -460, 1500],
|
||||
[1212, 214, -644, -696],
|
||||
[-501, 338, 1197, -841],
|
||||
[-587, -469, -1101, 24],
|
||||
[-1205, 1910, 659, 1232],
|
||||
[-150, 398, 594, 394],
|
||||
[34, -663, 235, -334],
|
||||
[-1580, 647, 239, -351],
|
||||
[-2177, -345, 1215, -1494],
|
||||
[1923, 329, -152, 1128]])
|
||||
|
||||
princomp1 = Holder()
|
||||
princomp1.comment = 'mlab.princomp(x, nout=3)'
|
||||
princomp1.factors = array([
|
||||
[-.83487832815382, -1.75681522344645, -.50882660928949, -.59661466511045],
|
||||
[-.18695786699253, -.10732909330422, .23971799542554, -.75468286946853],
|
||||
[-.57403949255604, -.39667006607544, -.7927838094217, .02652621881328],
|
||||
[-.60828125251513, -.75979035898754, -.20148864200404, -.40278856050237],
|
||||
[.55997928601548, .88869370546643, -1.55474410845786, .23033958281961],
|
||||
[-.18023239851961, -.72398923145328, -.07056264751117, .29292391015376],
|
||||
[-.189029743271, -.05888596186903, -.63882208368513, -.05682951829677],
|
||||
[.94694345324739, -.33448036234864, .16665867708366, -.67190948646953],
|
||||
[-1.355171899399, 2.58899695901774, -1.53157119606928, .93743278678908],
|
||||
[-1.06797676403358, -1.01894055566289, .29181722134698, -.65261957826524],
|
||||
[-1.08919199915725, -.5395876105009, .18846579824378, .61935728909742],
|
||||
[-1.36598849770841, -1.00986627679465, -1.6090477073157, -1.82708847399443], # noqa:E501
|
||||
[.561511276285, -.74919011595195, 1.49872898209738, -.80588545345232],
|
||||
[.04805787176428, -.05522267212748, .82943784435024, .01537039050312],
|
||||
[-1.12006939155398, .73462770352006, .58868274831601, -.67786987413505],
|
||||
[-.26087838474316, -1.33362289066951, -1.02932517860259, .24865839951801],
|
||||
[-.24666198784909, -.58247196399204, -1.78971960966265, 1.18908143657302],
|
||||
[1.80675592845666, -.73341258204636, -1.45012544705912, -.44875329121288],
|
||||
[.4794281391435, -.57169295903913, .48557628591056, -.11638075289238],
|
||||
[1.39425263398653, -.3665732682294, .06937942447187, .06683559082703],
|
||||
[1.11015707065101, -1.87631329249852, .48914958604867, .11096926802212],
|
||||
[-.85159530389901, .68543874135386, .86736021483251, -.17641002537865],
|
||||
[.34109015314112, -.25431311542374, -.36804227540019, -.95824474920131],
|
||||
[-.86253950274987, -.28796613689709, .30820634958709, .27228599921917],
|
||||
[.01266190412089, .48559962017667, .14020630700546, .18517398749337],
|
||||
[-1.56345869427724, 1.27917754070516, 1.25640847929385, -.36055181722313],
|
||||
[1.62834293379132, -1.51923809467869, .27754976407182, .79362967384835],
|
||||
[-.94400458067084, 1.77733054371289, .03595731772774, .96570688640992],
|
||||
[-2.11906234438329, -.13226430948321, -.78992396115366, .66362103473975],
|
||||
[-.94372331181891, -.37502966791165, -1.77907324401749, .97801542954941],
|
||||
[1.76575198740032, -.92309597844861, -2.3872195277998, -.21817018301121],
|
||||
[.57418226616373, -.2925257318724, .71180507312941, -.13937750314467],
|
||||
[1.01654397566275, .28855305878842, 1.25119859389106, .11257524396004],
|
||||
[.58979013567212, -.06866577243092, -1.74447546690995, .13917953157575],
|
||||
[1.62072087150051, -.5835145063711, -.99029357957459, -.06334029436682],
|
||||
[.893493925425, -1.23995040005948, .40058503790479, -1.49029669097391],
|
||||
[.26990527585623, 2.03399854143898, -1.2335089890881, .54010061879979],
|
||||
[.33504096277444, 2.42394994177782, -.6643863358332, -.42471161848557],
|
||||
[1.69952476943058, -2.1707037237448, .79694026483866, .88177267205969],
|
||||
[-1.41498253257895, .65248089992094, -1.40045976465378, -.12045332880702],
|
||||
[-.22640706265253, -.94114558124915, -.18868114063537, 2.67652245892778],
|
||||
[-.37493712386529, -.61985213642068, .5383582946365, -.17931524703276],
|
||||
[-.30437796317839, .74252786648649, .73255373596822, -.64993745548429],
|
||||
[-.68788283675831, -.84714762684627, -.10721753874211, -.59777382822281],
|
||||
[-1.00667616522842, -.06670525233919, -.92973707141688, -1.60742284256649],
|
||||
[1.95220512266515, 2.05751265066695, .79640648143073, -.59608004229343],
|
||||
[-.15504464969388, -.3882079443045, .75049869361395, -.44163703260023],
|
||||
[-1.6686863460652, .96325894557423, -.16453379247258, 1.4560996746313],
|
||||
[-.25573631707529, .88265554068571, .08984550855664, .53561910563178],
|
||||
[-1.29430028690793, -.48042359291447, .49318558750269, .03689178852848],
|
||||
[-.34391235307349, -.95154811896716, -.09714022474353, 1.19792361047367],
|
||||
[.34367523316975, 1.16641214447854, -.39528838072965, -1.72565643987406],
|
||||
[1.23887392116229, -1.27474554996132, -.65859544264097, -.81757560038832],
|
||||
[-.17739006831099, -.29057501559843, -.62533324788504, 1.7092669546224],
|
||||
[-.08610919021307, -.06524996994257, 1.3018284944661, -1.28219607271255],
|
||||
[-.95717735853496, 1.79841555744597, .75799149339397, .23542916575208],
|
||||
[-1.70175078442029, 1.33831900642462, -.73979048943944, .26157699746442],
|
||||
[.84631686421106, .32029666775009, 2.51638540556813, .90367536744335],
|
||||
[1.22693220256582, 1.45665385966518, 1.27480662666555, .78786331120259],
|
||||
[-.59251239046609, -.660398245535, .53258334042042, .81248748854679],
|
||||
[2.22723057510913, -.22856960444805, -.15586801032885, -.26957090658609],
|
||||
[-.83192612439183, -2.11983096548132, .75319973501664, .62196293266702],
|
||||
[-1.577627210601, -.3747136286972, .31736538266249, .30187577548949],
|
||||
[-2.28230005998543, -1.17283119424281, 1.83780755209602, -.75928026219594],
|
||||
[-1.90574204329052, -.34197417196464, -.59978910354131, -.68240235236779],
|
||||
[.48132729275936, -.2524965456322, -.75271273075, -.89651237903089],
|
||||
[.26961427953002, .62968227134995, .99324664633985, .59917742452108],
|
||||
[-.95910506784013, .31907970712369, .35568397653203, .60155535679072],
|
||||
[-.18528259973205, -1.31831013869974, -.09749195643548, -.39885348684496],
|
||||
[.9608404103702, .23727553971573, .20695289013955, -.65281918968052],
|
||||
[.85302395609555, 1.5303724004181, -.56440186223081, -.27348033453255],
|
||||
[1.72786301913767, -1.14859994931789, 1.16222121440674, 1.39284961909257],
|
||||
[.37711527308989, .47231886947072, -.69423676772182, -.53515102147655],
|
||||
[1.35642227654922, .53204130038923, .69844068787197, 1.04544871561741],
|
||||
[.57797880484094, .08044525072063, -1.32634695941334, .35179408060132],
|
||||
[1.29437232500619, 1.07461562326311, .54545226737269, -.6836610122092],
|
||||
[2.74736726573105, .90881277479338, -.98342785084735, 1.38171127911719],
|
||||
[-.67749479829901, 1.10093727650063, .28416704607992, -.24984509303044],
|
||||
[-.24513961858774, 1.32098977907584, .16904762754153, .00886790270539],
|
||||
[-.5392290825383, -1.43851802284774, 1.0064737206577, -1.52649870396689],
|
||||
[.19486366400459, 2.77236000318994, -1.32201258472682, -.75922390642504],
|
||||
[.33271229220962, -.78464273816827, 1.09930224781861, -.32184679755027],
|
||||
[-1.72814706427698, -1.09275114767838, .7451569579997, .72871211772761],
|
||||
[-.035506207751, -.72161367235521, .52828318684787, .87177739169758],
|
||||
[1.31224955134141, -.22742530984642, -.44682270809773, -1.72769462581607],
|
||||
[-.07125058353119, -.36850925227739, -1.01188688859296, -.24962251325969],
|
||||
[-.69840680770104, .4925285516285, -1.0255829922787, -.36214090052941],
|
||||
[-.2530614593082, -.68595709316063, -.56882710610856, 1.25787365685572],
|
||||
[1.93782484285419, 2.67095706598253, 2.4023579082791, -.09112046819432],
|
||||
[1.57782156817208, -.39819017512275, 1.01938038947667, .39718992194809],
|
||||
[1.6839282738726, -.37808442385434, -1.36566197748227, 1.22029200163339],
|
||||
[.54652714502605, -.38206797548206, -.70554510441189, -1.31224358889695],
|
||||
[-1.30026063006148, .90642495630747, .02711437433058, -.44482098905042],
|
||||
[-.1239033493518, -1.29112252171673, .18092802221218, .22673242779457],
|
||||
[.01152882540055, 1.13242883415094, 2.34980443084773, .17712319903618],
|
||||
[-.0505195424414, .6807219067402, .37771832345982, .0842510459176],
|
||||
[-.44230076745505, -.07002728477811, -.6716520563439, .09637247949641],
|
||||
[-1.31245480585229, -.01674966464909, 1.21063252882651, -.03927111631335],
|
||||
[-2.94268586886381, .20925236551048, .30321714445262, .22027672852006],
|
||||
[2.04121905977187, .58496246543101, -.5192457175416, -.37212298770116]])
|
||||
princomp1.values = array([
|
||||
[1.29489288337888],
|
||||
[1.12722515391348],
|
||||
[.94682423958163],
|
||||
[.65890241090379]])
|
||||
princomp1.name = 'princomp1'
|
||||
princomp1.coef = array([
|
||||
[.65989917631713, .22621848650964, -.5882833472413, -.40899997165748],
|
||||
[.15824945056105, .3189419948895, .71689623797385, -.5994104597619],
|
||||
[-.3488766362785, .90294049788532, -.17151017930575, .1832151967827],
|
||||
[.64635538301471, .17832458477678, .33251578268108, .66321815082225]])
|
||||
|
||||
princomp2 = Holder()
|
||||
princomp2.comment = 'mlab.princomp(x[:20,], nout=3)'
|
||||
princomp2.factors = array([
|
||||
[.74592631465403, -.92093638563647, 1.10020213969681, -.20234362115983],
|
||||
[.40379773814409, -.23694214086306, -.53526599590626, .48048423978257],
|
||||
[-.43826559396565, -.26267383420164, .35939862515391, -.15176605914773],
|
||||
[.29427656853499, -.56363285386285, .19525662206552, -.0384830001072],
|
||||
[-1.4327917748351, 1.18414191887856, .05435949672922, .46861687286613],
|
||||
[.23033214569426, -.00452237842477, .00346120473054, -.61483888402985],
|
||||
[-.40976419499281, .10137131352284, .02570805136468, .06798926306103],
|
||||
[.83201287149759, .82736894861103, -.35298970920805, .49344802383821],
|
||||
[-3.36634598435507, -.18324521714611, -1.12118215528184, .2057949493723],
|
||||
[.70198992281665, -1.1856449495675, .02465727900177, -.08333428418838],
|
||||
[-.13789069679894, -.79430992968357, -.33106496391047, -1.01808298459082],
|
||||
[-.10779840884825, -1.41970796854378, 1.55590290358904, 1.34014813517248],
|
||||
[1.8229340670437, .13065838030104, -1.06152350166072, .11456488463131],
|
||||
[.51650051521229, .07999783864926, -1.08601194413786, -.28255247881905],
|
||||
[-.24654203558433, -1.02895891025197, -1.34475655787845, .52240852619949],
|
||||
[.03542169335227, -.01198903021187, 1.12649412049726, -.60518306798831],
|
||||
[-1.23945075955452, .48778599927278, 1.11522465483282, -.994827967694],
|
||||
[.30661562766349, 1.91993049714024, 1.08834307939522, .61608892787963],
|
||||
[.8241280516035, .43533554216801, -.48261931874702, -.22391158066897],
|
||||
[.6649139327178, 1.44597315984982, -.33359403032613, -.094219894409]])
|
||||
princomp2.values = array([
|
||||
[1.16965204468073],
|
||||
[.77687367815155],
|
||||
[.72297937656591],
|
||||
[.32548581375971]])
|
||||
princomp2.name = 'princomp2'
|
||||
princomp2.coef = array([
|
||||
[-.13957162231397, .6561182967648, .32256106777669, .66781951188167],
|
||||
[.49534264552989, -.08241251099014, -.6919444767593, .51870674049413],
|
||||
[-.85614372781797, -.11427402995055, -.47665923729502, .16357058078438],
|
||||
[.04661912785591, .74138950947638, -.43584764555793, -.50813884128056]])
|
||||
|
||||
princomp3 = Holder()
|
||||
princomp3.comment = 'mlab.princomp(x[:20,]-x[:20,].mean(0), nout=3)'
|
||||
princomp3.factors = array([
|
||||
[.74592631465403, -.92093638563647, 1.10020213969681, -.20234362115983],
|
||||
[.40379773814409, -.23694214086306, -.53526599590626, .48048423978257],
|
||||
[-.43826559396565, -.26267383420164, .35939862515391, -.15176605914773],
|
||||
[.29427656853499, -.56363285386285, .19525662206552, -.0384830001072],
|
||||
[-1.4327917748351, 1.18414191887856, .05435949672922, .46861687286613],
|
||||
[.23033214569426, -.00452237842477, .00346120473054, -.61483888402985],
|
||||
[-.40976419499281, .10137131352284, .02570805136468, .06798926306103],
|
||||
[.83201287149759, .82736894861103, -.35298970920805, .49344802383821],
|
||||
[-3.36634598435507, -.18324521714611, -1.12118215528184, .2057949493723],
|
||||
[.70198992281665, -1.1856449495675, .02465727900177, -.08333428418838],
|
||||
[-.13789069679894, -.79430992968357, -.33106496391047, -1.01808298459082],
|
||||
[-.10779840884825, -1.41970796854378, 1.55590290358904, 1.34014813517248],
|
||||
[1.8229340670437, .13065838030104, -1.06152350166072, .11456488463131],
|
||||
[.51650051521229, .07999783864926, -1.08601194413786, -.28255247881905],
|
||||
[-.24654203558433, -1.02895891025197, -1.34475655787845, .52240852619949],
|
||||
[.03542169335227, -.01198903021187, 1.12649412049726, -.60518306798831],
|
||||
[-1.23945075955452, .48778599927278, 1.11522465483282, -.994827967694],
|
||||
[.30661562766349, 1.91993049714024, 1.08834307939522, .61608892787963],
|
||||
[.8241280516035, .43533554216801, -.48261931874702, -.22391158066897],
|
||||
[.6649139327178, 1.44597315984982, -.33359403032613, -.094219894409]])
|
||||
princomp3.values = array([
|
||||
[1.16965204468073],
|
||||
[.77687367815155],
|
||||
[.72297937656591],
|
||||
[.32548581375971]])
|
||||
princomp3.name = 'princomp3'
|
||||
princomp3.coef = array([
|
||||
[-.13957162231397, .6561182967648, .32256106777669, .66781951188167],
|
||||
[.49534264552989, -.08241251099014, -.6919444767593, .51870674049413],
|
||||
[-.85614372781797, -.11427402995055, -.47665923729502, .16357058078438],
|
||||
[.04661912785591, .74138950947638, -.43584764555793, -.50813884128056]])
|
||||
@ -0,0 +1,101 @@
|
||||
var1,var2,var3,var4,var5
|
||||
2.3358,0.0044,0.3163,0.8698,1.4817
|
||||
3.1387,-0.1494,1.1793,2.1482,-0.2141
|
||||
0.0501,0.6111,-0.892,1.0971,-2.6557
|
||||
-0.029,-1.7519,-0.5098,-0.5294,0.2512
|
||||
-0.0012,-0.8835,3.1745,3.6743,2.9339
|
||||
1.3228,0.1419,0.6433,2.5167,0.9105
|
||||
0.9066,-0.7031,-0.9839,-0.0551,0.049
|
||||
-1.5817,-1.332,1.0518,-1.1584,-0.9183
|
||||
2.9412,-1.9048,-1.328,0.3225,-0.2039
|
||||
-1.5636,-1.506,1.6153,1.8511,0.9356
|
||||
-0.5645,-0.7889,1.136,1.9609,2.5086
|
||||
-0.802,-0.3495,-1.6138,-0.4514,-0.5998
|
||||
0.7878,0.8931,0.3226,-1.0011,1.4319
|
||||
-2.375,-0.6369,-0.5691,-1.3663,-1.7005
|
||||
-0.2379,0.4552,-0.0294,-0.5558,1.4641
|
||||
-1.3576,-0.1541,0.2684,-2.3244,-1.2963
|
||||
0.9799,0.219,-2.0489,-3.1371,-1.0562
|
||||
1.5379,2.7226,-0.0049,-3.8528,-0.4739
|
||||
-0.8352,-0.8438,-0.4886,0.8641,-1.2653
|
||||
1.3476,-0.0039,-0.8244,0.2143,0.0362
|
||||
0.3315,-0.2731,-0.2188,-2.3388,-0.3962
|
||||
-0.2723,0.6647,-0.2893,0.0999,-0.8111
|
||||
-0.1344,0.695,0.6257,-0.283,-0.5913
|
||||
-2.2572,-1.5057,1.3967,0.471,0.0997
|
||||
1.0519,-1.3884,1.0226,-1.0947,1.3978
|
||||
1.7878,1.8082,-0.694,0.6162,-0.9046
|
||||
0.5601,0.8636,0.4449,0.6998,1.0791
|
||||
-0.2156,1.4126,2.0064,0.3332,0.0751
|
||||
-1.2607,-1.2132,-0.0598,-1.693,-1.0813
|
||||
0.7107,1.9284,1.2093,-0.0175,1.0042
|
||||
0.0362,1.7571,-0.0752,1.8337,2.6863
|
||||
2.1175,0.8949,-1.765,0.6082,0.8375
|
||||
-1.0219,0.2911,-0.727,0.2553,-1.6644
|
||||
2.653,0.0148,0.4559,-0.0419,1.2743
|
||||
-0.3103,0.4724,-0.6975,0.3755,3.4604
|
||||
-1.7467,0.4565,-1.7263,0.9031,0.1875
|
||||
-0.574,-2.3953,-0.8059,1.5461,-0.8906
|
||||
-1.5758,1.8004,-1.3741,0.9648,0.0344
|
||||
-1.2976,-0.6741,2.0647,2.1778,1.5391
|
||||
0.6771,2.042,0.3806,-2.4027,-2.3492
|
||||
-1.0357,0.5604,0.2532,-1.6972,-0.4285
|
||||
-0.17,-0.0818,-2.5097,-1.4429,-0.8825
|
||||
-0.9111,-0.5983,-1.3297,0.5678,2.5338
|
||||
0.0865,2.3449,-1.9526,0.16,0.4645
|
||||
0.7475,-0.5134,-0.598,0.5344,0.0727
|
||||
-2.298,-0.8431,0.2371,-0.7896,-1.7017
|
||||
3.008,-0.271,0.4868,0.4959,0.1369
|
||||
0.376,1.0972,-1.4817,0.1465,0.8261
|
||||
-0.2943,-1.9401,-0.4638,1.8092,0.9328
|
||||
0.131,-0.8266,-1.4767,-0.5936,-2.0493
|
||||
-0.1,0.265,0.4371,1.1967,1.8712
|
||||
0.8886,0.945,-0.1471,-0.1363,-0.9092
|
||||
0.1406,-0.5044,-1.3068,1.441,-3.8205
|
||||
1.896,1.0309,1.1718,2.3715,1.6846
|
||||
-2.3731,0.3547,-2.5275,0.3097,-1.4761
|
||||
-0.5936,-1.5261,-1.0773,1.417,1.3027
|
||||
-2.4798,-1.5857,-0.6344,-2.1682,-0.002
|
||||
0.7588,0.0225,1.2982,0.01,1.1708
|
||||
-0.0718,1.9237,1.3538,1.4318,1.4835
|
||||
1.1017,-0.5897,-0.3399,1.2663,1.6784
|
||||
-0.7308,0.6094,-0.7773,0.2373,1.013
|
||||
1.0155,-0.2549,1.2958,0.6724,0.484
|
||||
-0.4901,0.92,0.4208,0.2325,1.6677
|
||||
0.6138,1.4609,0.3375,-0.8655,-1.2248
|
||||
0.3232,-0.2704,2.8568,-0.7418,1.2925
|
||||
1.1547,0.2841,0.3959,-0.2621,1.2498
|
||||
-0.8148,-0.1754,-0.6326,-2.8309,-3.0651
|
||||
-2.6977,-1.9161,-1.1292,-1.4923,0.3646
|
||||
-3.1057,-0.2471,0.3585,-1.0263,-0.1043
|
||||
0.666,0.368,0.0196,-1.1868,0.2599
|
||||
-1.0735,-1.3328,-0.9537,-0.2594,-1.2733
|
||||
-0.0316,2.3285,1.872,0.1398,3.1739
|
||||
-0.495,-0.245,-2.0064,-1.315,-1.4454
|
||||
-1.1888,-1.0905,1.0745,1.2094,1.4798
|
||||
-2.7048,-0.9399,-1.1409,-1.3737,-1.2151
|
||||
1.2275,2.3317,-1.3622,-0.9929,-1.5922
|
||||
-2.659,-1.18,-1.6486,-0.2288,0.4164
|
||||
-0.5639,2.0618,-1.9634,0.1514,1.6458
|
||||
-1.8483,-0.4639,0.6209,-0.0183,2.4059
|
||||
-0.4303,-0.1728,-0.3347,-0.3546,-0.7524
|
||||
1.9564,-0.6527,0.4776,1.3519,-0.9619
|
||||
-1.5531,-1.2717,1.4032,0.9843,0.3788
|
||||
2.0049,-0.6503,0.0042,-0.3649,1.1627
|
||||
-0.1315,0.5443,0.5422,0.8582,0.4374
|
||||
0.5894,-0.2894,0.8457,0.641,0.3239
|
||||
1.7067,-0.4797,-0.2498,1.1692,0.5081
|
||||
-3.2533,1.3689,1.0815,1.6946,0.8739
|
||||
2.8036,0.5355,0.0828,-0.7673,-1.0338
|
||||
-1.0385,-0.6787,0.8265,-1.7571,-3.1357
|
||||
-2.1853,0.2404,-0.5056,-1.7177,0.6123
|
||||
2.2815,0.5445,1.2507,0.6492,-0.6182
|
||||
0.247,0.1745,0.8681,-1.4099,1.3582
|
||||
0.1303,0.9697,0.6633,0.3373,-0.5746
|
||||
-0.6143,-0.3428,1.3671,-1.5012,-2.0953
|
||||
3.2129,0.5585,0.0043,0.9622,-1.0555
|
||||
-1.3977,-0.1699,-2.4553,-1.2764,-1.0301
|
||||
-1.1966,-0.6408,-1.0887,-1.4875,-0.4743
|
||||
-1.7013,0.2085,0.2438,-1.2822,-1.4098
|
||||
-0.6957,-1.055,-0.6753,-0.3784,-1.9997
|
||||
1.7702,1.1211,-0.6032,-0.6982,0.4066
|
||||
|
@ -0,0 +1,101 @@
|
||||
f1,f2,f1b,f2b,f1o,f2o,f1ob,f2ob
|
||||
.77409906,.5265066,1.2342164,1.5539443,.82174469,.64430356,1.3798437,1.7324318
|
||||
.90211532,.5778448,1.4354716,1.691956,.95415644,.71558883,1.5935961,1.9010544
|
||||
-.55152949,.10318112,-1.0868707,.62321661,-.53884179,.01300941,-1.0209297,.44003871
|
||||
-.11934901,-.53549872,.01129338,-1.8973372,-.17099081,-.54772966,-.17374833,-1.8707504
|
||||
2.3941179,-.26282474,4.6079809,-1.9774392,2.3570865,.12618302,4.3932292,-1.2095023
|
||||
1.0927031,.30140322,1.9283693,.6165865,1.1168835,.47345041,1.9792983,.91910478
|
||||
-.13079791,-.02496757,-.22584839,-.04664732,-.13260905,-.04570687,-.22932042,-.08241165
|
||||
-.33812166,-.74795931,-.33393451,-2.5509963,-.40943578,-.7926505,-.58106234,-2.5714763
|
||||
-.04786263,.01681279,-.0859222,.0515025,-.04599537,.00888495,-.08049141,.03699227
|
||||
1.0480495,-.81266539,2.3144765,-3.3816091,.96382241,-.63326664,1.9737471,-2.964715
|
||||
1.3937318,-.33515776,2.7734803,-1.8141489,1.3544142,-.10631967,2.5833894,-1.3437932
|
||||
-.60405968,-.29122039,-.99811882,-.79446532,-.62957534,-.38470365,-1.0708228,-.94484311
|
||||
.1768074,.51456466,.11159002,1.777466,.22613441,.53632266,.28435898,1.7722347
|
||||
-.96740945,-.723384,-1.5150277,-2.1864758,-1.0333294,-.86974388,-1.7209881,-2.4019313
|
||||
.20389441,.14744239,.3268983,.44172513,.21729843,.17835522,.36840847,.48860637
|
||||
-.92590203,-.31839571,-1.6216858,-.74416985,-.95253387,-.46335749,-1.6865151,-.99563074
|
||||
-1.467613,.3140052,-2.8897371,1.7524627,-1.4300057,.07354455,-2.7051065,1.2641888
|
||||
-1.1479964,1.2582306,-2.7173854,5.041731,-1.0198511,1.0569188,-2.2128766,4.5382765
|
||||
-.17961277,-.49664318,-.12436604,-1.7124124,-.22717909,-.51908693,-.29073149,-1.7100879
|
||||
-.03173505,.28265502,-.17368455,1.0251028,-.00402536,.27385425,-.07291087,.98374888
|
||||
-.73229133,.02886435,-1.397402,.41212285,-.7259882,-.08944924,-1.3505629,.18168897
|
||||
-.24941449,.1269423,-.52647655,.57374862,-.23584948,.08511655,-.46802849,.48146893
|
||||
-.10732968,.18432286,-.29391519,.7210821,-.08884709,.16463107,-.22221031,.66433352
|
||||
.39880577,-.95391695,1.1482693,-3.6091908,.30390006,-.87723612,.79090718,-3.3771456
|
||||
.35872658,-.13977614,.72766572,-.67702616,.34338949,-.08017796,.6581897,-.55099642
|
||||
-.1661692,.91000156,-.70379606,3.3628839,-.07665355,.87136053,-.37256614,3.2056372
|
||||
.55913446,.40910597,.87726935,1.2383401,.59635784,.49381506,.99382627,1.3634605
|
||||
.4792189,.40196303,.70586224,1.2632806,.51612661,.47389483,.82566748,1.36047
|
||||
-.73646072,-.63906787,-1.1203902,-1.9917138,-.79526024,-.74933373,-1.3092417,-2.1461547
|
||||
.4795629,.79207339,.55137636,2.6632818,.55450422,.85896807,.80841591,2.7173154
|
||||
1.1127571,.59389352,1.8580276,1.6695604,1.1653594,.76535228,2.0119553,1.9470045
|
||||
.08956588,.73845254,-.12614275,2.6149467,.16113731,.7432375,.12941213,2.5604956
|
||||
-.52911256,-.1793241,-.91998487,-.4085011,-.54407555,-.26219769,-.95543007,-.55133367
|
||||
.51942396,.61292952,.71244766,1.9736557,.57670908,.68858245,.90148216,2.0626324
|
||||
.8132702,.16897349,1.4862315,.24911189,.82587019,.29774627,1.5034386,.48522013
|
||||
-.12857721,-.24986049,-.11127733,-.84082875,-.1523257,-.26730639,-.19272687,-.84777395
|
||||
.08574189,-.93312063,.57059076,-3.4152487,-.00564465,-.90713076,.23488998,-3.2787713
|
||||
-.10989362,.20354792,-.27660743,.79824474,-.0895244,.18319224,-.19746176,.74327632
|
||||
1.3642833,-.48395053,2.7669022,-2.3267277,1.3105988,-.25791284,2.5268668,-1.8507401
|
||||
-1.1672069,.77791884,-2.5579395,3.3272367,-1.0857999,.57978316,-2.2213513,2.8718422
|
||||
-.55165952,-.01582468,-1.0412775,.18814338,-.55057411,-.10446383,-1.0179728,.0179878
|
||||
-1.1024776,-.06132851,-2.0295797,.2446312,-1.1032044,-.23808385,-1.9960589,-.08542956
|
||||
.51121121,-.32752081,1.1344589,-1.4159784,.47684277,-.24091389,.99099791,-1.2147876
|
||||
-.2827674,.75734733,-.8347537,2.8623962,-.20757982,.70192071,-.55169675,2.6905919
|
||||
.09876273,-.01059937,.19723165,-.08903476,.09725877,.00544491,.1876112,-.056108
|
||||
-.6421798,-.78126,-.88585867,-2.5329525,-.71529206,-.87448557,-1.1285977,-2.6425563
|
||||
.40709836,.55585896,.51952852,1.8181175,.45935433,.61416665,.69431739,1.8780547
|
||||
-.05459569,.44056716,-.27210725,1.6111778,-.01138085,.42602325,-.1137231,1.5463221
|
||||
.67929573,-.6847309,1.5879431,-2.7776557,.60929897,-.56639064,1.3095598,-2.4856551
|
||||
-.92607077,-.27752489,-1.6212673,-.60797893,-.94871697,-.42304737,-1.6728202,-.86115026
|
||||
.87850547,.09109933,1.6219634,-.05027553,.88320204,.23139495,1.609334,.21160078
|
||||
-.26970441,.46623015,-.71459682,1.8042973,-.22296261,.4167076,-.5352756,1.6656569
|
||||
-.79989201,-.26861592,-1.3954051,-.61625155,-.82227074,-.39393339,-1.4488407,-.83293939
|
||||
1.3522459,.72936579,2.2300877,2.0517907,1.4169156,.93762617,2.4195097,2.384166
|
||||
-.88674943,-.45168537,-1.4528825,-1.2385071,-.92656338,-.58860158,-1.5667133,-1.4563288
|
||||
.5197045,-.60245723,1.2605819,-2.4123386,.45848966,-.51089342,1.0193762,-2.1778289
|
||||
-.74788779,-.97828004,-.98283317,-3.2156233,-.83970562,-1.0859582,-1.2916697,-3.3319334
|
||||
.60152168,.20957638,1.0322015,.49464574,.61908925,.30371668,1.0755111,.65442646
|
||||
.98432058,.60450513,1.5874137,1.7740498,1.0385694,.75514043,1.7528183,2.006547
|
||||
.76389179,.07029355,1.4222455,-.08934065,.76710587,.19240203,1.4067588,.14088071
|
||||
.12791207,.05698825,.23093481,.15178897,.13285894,.07684478,.24463378,.18699996
|
||||
.62420468,.14365152,1.1006233,.24788906,.63523658,.24230553,1.1195484,.42191039
|
||||
.53308959,.22886039,.90944184,.60071557,.55286335,.31172784,.96367793,.739341
|
||||
-.47080363,.57870828,-1.1505061,2.3032231,-.41213726,.49533006,-.92046348,2.0878657
|
||||
.73660289,.05121274,1.3362788,-.13133352,.73808663,.16917538,1.3171075,.08559093
|
||||
.3750162,.38087227,.54184582,1.2062216,.41036404,.43629736,.65686931,1.2777407
|
||||
-1.6628303,-.25019609,-3.0399027,-.18082388,-1.6793018,-.51473173,-3.0430496,-.66804499
|
||||
-.57518971,-1.1354505,-.58045463,-3.8604945,-.68315424,-1.2132636,-.95408231,-3.9035827
|
||||
-.33048237,-.71724484,-.31867622,-2.4346249,-.39883828,-.76110666,-.55453068,-2.4541665
|
||||
-.21711319,.29449836,-.53874819,1.1527255,-.18736559,.25568751,-.42379218,1.0509114
|
||||
-.57134442,-.67766186,-.78074037,-2.2034566,-.63469348,-.76083165,-.9918548,-2.3004322
|
||||
1.1568365,.81787262,1.8212026,2.4652365,1.2310664,.99350661,2.0528831,2.7263631
|
||||
-1.1158728,-.19839769,-2.0042252,-.23966229,-1.1298999,-.37552105,-2.0180431,-.55931767
|
||||
.90698836,-.57501394,1.9541201,-2.467368,.84660404,-.42143564,1.7042449,-2.1204445
|
||||
-.96822116,-.87756275,-1.4404227,-2.7473047,-1.0491694,-1.0220407,-1.7014187,-2.9434239
|
||||
-.93949139,.96862152,-2.1843695,3.9104251,-.840576,.80467039,-1.7927011,3.5075818
|
||||
-.34872923,-.92561876,-.23562044,-3.1952854,-.43731439,-.96969915,-.54603402,-3.1915211
|
||||
-.01008118,.56239284,-.22983374,2.0376015,.04479942,.55342776,-.03007525,1.9739874
|
||||
.6753194,-.46834193,1.4803838,-1.98315,.62643921,-.35346682,1.2799762,-1.7188435
|
||||
-.34822558,-.15758844,-.58978344,-.41711157,-.36193118,-.2116136,-.62764134,-.50665223
|
||||
.32773507,.16164562,.53650026,.43747533,.34193386,.21231779,.57659746,.51816881
|
||||
.62853751,-.73379458,1.4879734,-2.9151351,.55399893,-.62298855,1.1966623,-2.6374401
|
||||
.30674611,.26827549,.46503444,.81759821,.33144117,.31417541,.54253362,.88181986
|
||||
.44192818,.14021085,.7685949,.32232893,.45349306,.20955383,.79635969,.44190477
|
||||
.46887837,.03616074,.85905645,-.07268136,.47017009,.11120237,.84787727,.06661995
|
||||
.48475223,.20430528,.83041791,.51731057,.5023622,.27970846,.87689861,.64429782
|
||||
.73545423,-.27367592,1.5001756,-1.2762381,.70526722,-.15165716,1.3685967,-1.0179721
|
||||
-.33495813,.75918345,-.96995976,2.8784489,-.25934288,.69532746,-.68469352,2.6846599
|
||||
-1.0941765,-.47592994,-1.8866643,-1.2349393,-1.135366,-.64593615,-1.9980805,-1.5226689
|
||||
-.48718385,-.3357568,-.76630583,-.99841793,-.51759859,-.40983563,-.85999933,-1.1087994
|
||||
.3617318,.6347137,.38779162,2.1372949,.42189215,.68468566,.5943275,2.1718491
|
||||
.15777211,.18310759,.21152508,.5882512,.17487318,.20612677,.26787102,.61463863
|
||||
.07296441,.31367206,-.00945973,1.1123917,.1031994,.32132842,.09904208,1.096347
|
||||
-.64908777,-.25472472,-1.1440559,-.62922708,-.67083062,-.35593625,-1.199954,-.80526531
|
||||
.12122439,.81412903,-.13059422,2.8820144,.20002335,.82302476,.15102062,2.82336
|
||||
-1.1305481,-.35912583,-1.9541954,-.81215474,-1.160176,-.53651452,-2.0240689,-1.1162794
|
||||
-.75096061,-.43799405,-1.2186468,-1.2618644,-.79008663,-.55321997,-1.3358709,-1.4416571
|
||||
-.70483427,-.304975,-1.2091407,-.78438434,-.73121086,-.41450863,-1.2798564,-.96887917
|
||||
-.7211226,-.52612824,-1.1360481,-1.5881501,-.76898374,-.63539819,-1.2854782,-1.7503809
|
||||
-.15257555,.75718097,-.60854757,2.7933029,-.07802447,.72272415,-.33330485,2.6588313
|
||||
|
@ -0,0 +1,108 @@
|
||||
import pandas as pd
|
||||
from ..cancorr import CanCorr
|
||||
from numpy.testing import assert_almost_equal
|
||||
|
||||
data_fit = pd.DataFrame([[191, 36, 50, 5, 162, 60],
|
||||
[189, 37, 52, 2, 110, 60],
|
||||
[193, 38, 58, 12, 101, 101],
|
||||
[162, 35, 62, 12, 105, 37],
|
||||
[189, 35, 46, 13, 155, 58],
|
||||
[182, 36, 56, 4, 101, 42],
|
||||
[211, 38, 56, 8, 101, 38],
|
||||
[167, 34, 60, 6, 125, 40],
|
||||
[176, 31, 74, 15, 200, 40],
|
||||
[154, 33, 56, 17, 251, 250],
|
||||
[169, 34, 50, 17, 120, 38],
|
||||
[166, 33, 52, 13, 210, 115],
|
||||
[154, 34, 64, 14, 215, 105],
|
||||
[247, 46, 50, 1, 50, 50],
|
||||
[193, 36, 46, 6, 70, 31],
|
||||
[202, 37, 62, 12, 210, 120],
|
||||
[176, 37, 54, 4, 60, 25],
|
||||
[157, 32, 52, 11, 230, 80],
|
||||
[156, 33, 54, 15, 225, 73],
|
||||
[138, 33, 68, 2, 110, 43]])
|
||||
|
||||
|
||||
def test_cancorr():
|
||||
# Compare results to SAS example:
|
||||
# https://support.sas.com/documentation/cdl/en/statug/63347/HTML/default/
|
||||
# viewer.htm#statug_cancorr_sect020.htm
|
||||
X1 = data_fit.iloc[:, :3]
|
||||
Y1 = data_fit.iloc[:, 3:]
|
||||
mod = CanCorr(Y1, X1)
|
||||
r = mod.corr_test()
|
||||
assert_almost_equal(r.stats_mv.loc["Wilks' lambda", 'Value'],
|
||||
0.35039053, decimal=8)
|
||||
assert_almost_equal(r.stats_mv.loc["Pillai's trace", 'Value'],
|
||||
0.67848151, decimal=8)
|
||||
assert_almost_equal(r.stats_mv.loc["Hotelling-Lawley trace", 'Value'],
|
||||
1.77194146, decimal=8)
|
||||
assert_almost_equal(r.stats_mv.loc["Roy's greatest root", 'Value'],
|
||||
1.72473874, decimal=8)
|
||||
assert_almost_equal(r.stats_mv.loc["Wilks' lambda", 'F Value'],
|
||||
2.05, decimal=2)
|
||||
assert_almost_equal(r.stats_mv.loc["Pillai's trace", 'F Value'],
|
||||
1.56, decimal=2)
|
||||
assert_almost_equal(r.stats_mv.loc["Hotelling-Lawley trace",
|
||||
'F Value'],
|
||||
2.64, decimal=2)
|
||||
assert_almost_equal(r.stats_mv.loc["Roy's greatest root", 'F Value'],
|
||||
9.20, decimal=2)
|
||||
assert_almost_equal(r.stats_mv.loc["Wilks' lambda", 'Num DF'],
|
||||
9, decimal=3)
|
||||
assert_almost_equal(r.stats_mv.loc["Pillai's trace", 'Num DF'],
|
||||
9, decimal=3)
|
||||
assert_almost_equal(r.stats_mv.loc["Hotelling-Lawley trace",
|
||||
'Num DF'],
|
||||
9, decimal=3)
|
||||
assert_almost_equal(r.stats_mv.loc["Roy's greatest root", 'Num DF'],
|
||||
3, decimal=3)
|
||||
assert_almost_equal(r.stats_mv.loc["Wilks' lambda", 'Den DF'],
|
||||
34.223, decimal=3)
|
||||
assert_almost_equal(r.stats_mv.loc["Pillai's trace", 'Den DF'],
|
||||
48, decimal=3)
|
||||
assert_almost_equal(r.stats_mv.loc["Hotelling-Lawley trace",
|
||||
'Den DF'],
|
||||
19.053, decimal=3)
|
||||
assert_almost_equal(r.stats_mv.loc["Roy's greatest root", 'Den DF'],
|
||||
16, decimal=3)
|
||||
assert_almost_equal(r.stats_mv.loc["Wilks' lambda", 'Pr > F'],
|
||||
0.0635, decimal=4)
|
||||
assert_almost_equal(r.stats_mv.loc["Pillai's trace", 'Pr > F'],
|
||||
0.1551, decimal=4)
|
||||
assert_almost_equal(r.stats_mv.loc["Hotelling-Lawley trace",
|
||||
'Pr > F'],
|
||||
0.0357, decimal=4)
|
||||
assert_almost_equal(r.stats_mv.loc["Roy's greatest root", 'Pr > F'],
|
||||
0.0009, decimal=4)
|
||||
assert_almost_equal(r.stats.loc[0, "Wilks' lambda"],
|
||||
0.35039053, decimal=8)
|
||||
assert_almost_equal(r.stats.loc[1, "Wilks' lambda"],
|
||||
0.95472266, decimal=8)
|
||||
assert_almost_equal(r.stats.loc[2, "Wilks' lambda"],
|
||||
0.99473355, decimal=8)
|
||||
assert_almost_equal(r.stats.loc[0, 'F Value'],
|
||||
2.05, decimal=2)
|
||||
assert_almost_equal(r.stats.loc[1, 'F Value'],
|
||||
0.18, decimal=2)
|
||||
assert_almost_equal(r.stats.loc[2, 'F Value'],
|
||||
0.08, decimal=2)
|
||||
assert_almost_equal(r.stats.loc[0, 'Num DF'],
|
||||
9, decimal=2)
|
||||
assert_almost_equal(r.stats.loc[1, 'Num DF'],
|
||||
4, decimal=2)
|
||||
assert_almost_equal(r.stats.loc[2, 'Num DF'],
|
||||
1, decimal=2)
|
||||
assert_almost_equal(r.stats.loc[0, 'Den DF'],
|
||||
34.223, decimal=3)
|
||||
assert_almost_equal(r.stats.loc[1, 'Den DF'],
|
||||
30, decimal=2)
|
||||
assert_almost_equal(r.stats.loc[2, 'Den DF'],
|
||||
16, decimal=2)
|
||||
assert_almost_equal(r.stats.loc[0, 'Pr > F'],
|
||||
0.0635, decimal=4)
|
||||
assert_almost_equal(r.stats.loc[1, 'Pr > F'],
|
||||
0.9491, decimal=4)
|
||||
assert_almost_equal(r.stats.loc[2, 'Pr > F'],
|
||||
0.7748, decimal=4)
|
||||
@ -0,0 +1,314 @@
|
||||
import warnings
|
||||
|
||||
from statsmodels.compat.pandas import PD_LT_1_4
|
||||
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from statsmodels.multivariate.factor import Factor
|
||||
from numpy.testing import (assert_equal, assert_array_almost_equal, assert_,
|
||||
assert_raises, assert_array_equal,
|
||||
assert_array_less, assert_allclose)
|
||||
import pytest
|
||||
|
||||
try:
|
||||
import matplotlib.pyplot as plt
|
||||
missing_matplotlib = False
|
||||
plt.switch_backend('Agg')
|
||||
|
||||
except ImportError:
|
||||
missing_matplotlib = True
|
||||
|
||||
# Example data
|
||||
# https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/
|
||||
# viewer.htm#statug_introreg_sect012.htm
|
||||
X = pd.DataFrame([['Minas Graes', 2.068, 2.070, 1.580, 1, 0],
|
||||
['Minas Graes', 2.068, 2.074, 1.602, 2, 1],
|
||||
['Minas Graes', 2.090, 2.090, 1.613, 3, 0],
|
||||
['Minas Graes', 2.097, 2.093, 1.613, 4, 1],
|
||||
['Minas Graes', 2.117, 2.125, 1.663, 5, 0],
|
||||
['Minas Graes', 2.140, 2.146, 1.681, 6, 1],
|
||||
['Matto Grosso', 2.045, 2.054, 1.580, 7, 0],
|
||||
['Matto Grosso', 2.076, 2.088, 1.602, 8, 1],
|
||||
['Matto Grosso', 2.090, 2.093, 1.643, 9, 0],
|
||||
['Matto Grosso', 2.111, 2.114, 1.643, 10, 1],
|
||||
['Santa Cruz', 2.093, 2.098, 1.653, 11, 0],
|
||||
['Santa Cruz', 2.100, 2.106, 1.623, 12, 1],
|
||||
['Santa Cruz', 2.104, 2.101, 1.653, 13, 0]],
|
||||
columns=['Loc', 'Basal', 'Occ', 'Max', 'id', 'alt'])
|
||||
|
||||
|
||||
def test_auto_col_name():
|
||||
# Test auto generated variable names when endog_names is None
|
||||
mod = Factor(None, 2, corr=np.eye(11), endog_names=None,
|
||||
smc=False)
|
||||
assert_array_equal(mod.endog_names,
|
||||
['var00', 'var01', 'var02', 'var03', 'var04', 'var05',
|
||||
'var06', 'var07', 'var08', 'var09', 'var10'])
|
||||
|
||||
|
||||
def test_direct_corr_matrix():
|
||||
# Test specifying the correlation matrix directly
|
||||
mod = Factor(None, 2, corr=np.corrcoef(X.iloc[:, 1:-1], rowvar=0),
|
||||
smc=False)
|
||||
results = mod.fit(tol=1e-10)
|
||||
a = np.array([[0.965392158864, 0.225880658666255],
|
||||
[0.967587154301, 0.212758741910989],
|
||||
[0.929891035996, -0.000603217967568],
|
||||
[0.486822656362, -0.869649573289374]])
|
||||
assert_array_almost_equal(results.loadings, a, decimal=8)
|
||||
# Test set and get endog_names
|
||||
mod.endog_names = X.iloc[:, 1:-1].columns
|
||||
assert_array_equal(mod.endog_names, ['Basal', 'Occ', 'Max', 'id'])
|
||||
|
||||
# Test set endog_names with the wrong number of elements
|
||||
assert_raises(ValueError, setattr, mod, 'endog_names',
|
||||
X.iloc[:, :1].columns)
|
||||
|
||||
|
||||
def test_unknown_fa_method_error():
|
||||
# Test raise error if an unkonwn FA method is specified in fa.method
|
||||
mod = Factor(X.iloc[:, 1:-1], 2, method='ab')
|
||||
assert_raises(ValueError, mod.fit)
|
||||
|
||||
|
||||
def test_example_compare_to_R_output():
|
||||
# Testing basic functions and compare to R output
|
||||
|
||||
# R code for producing the results:
|
||||
# library(psych)
|
||||
# library(GPArotation)
|
||||
# Basal = c(2.068, 2.068, 2.09, 2.097, 2.117, 2.14, 2.045, 2.076, 2.09, 2.111, 2.093, 2.1, 2.104)
|
||||
# Occ = c(2.07, 2.074, 2.09, 2.093, 2.125, 2.146, 2.054, 2.088, 2.093, 2.114, 2.098, 2.106, 2.101)
|
||||
# Max = c(1.58, 1.602, 1.613, 1.613, 1.663, 1.681, 1.58, 1.602, 1.643, 1.643, 1.653, 1.623, 1.653)
|
||||
# id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)
|
||||
# Y <- cbind(Basal, Occ, Max, id)
|
||||
# a <- fa(Y, nfactors=2, fm="pa", rotate="none", SMC=FALSE, min.err=1e-10)
|
||||
# b <- cbind(a$loadings[,1], -a$loadings[,2])
|
||||
# b
|
||||
# a <- fa(Y, nfactors=2, fm="pa", rotate="Promax", SMC=TRUE, min.err=1e-10)
|
||||
# b <- cbind(a$loadings[,1], a$loadings[,2])
|
||||
# b
|
||||
# a <- fa(Y, nfactors=2, fm="pa", rotate="Varimax", SMC=TRUE, min.err=1e-10)
|
||||
# b <- cbind(a$loadings[,1], a$loadings[,2])
|
||||
# b
|
||||
# a <- fa(Y, nfactors=2, fm="pa", rotate="quartimax", SMC=TRUE, min.err=1e-10)
|
||||
# b <- cbind(a$loadings[,1], -a$loadings[,2])
|
||||
# b
|
||||
# a <- fa(Y, nfactors=2, fm="pa", rotate="oblimin", SMC=TRUE, min.err=1e-10)
|
||||
# b <- cbind(a$loadings[,1], a$loadings[,2])
|
||||
# b
|
||||
|
||||
# No rotation without squared multiple correlations prior
|
||||
# produce same results as in R `fa`
|
||||
mod = Factor(X.iloc[:, 1:-1], 2, smc=False)
|
||||
results = mod.fit(tol=1e-10)
|
||||
a = np.array([[0.965392158864, 0.225880658666255],
|
||||
[0.967587154301, 0.212758741910989],
|
||||
[0.929891035996, -0.000603217967568],
|
||||
[0.486822656362, -0.869649573289374]])
|
||||
assert_array_almost_equal(results.loadings, a, decimal=8)
|
||||
|
||||
# No rotation WITH squared multiple correlations prior
|
||||
# produce same results as in R `fa`
|
||||
mod = Factor(X.iloc[:, 1:-1], 2, smc=True)
|
||||
results = mod.fit()
|
||||
a = np.array([[0.97541115, 0.20280987],
|
||||
[0.97113975, 0.17207499],
|
||||
[0.9618705, -0.2004196],
|
||||
[0.37570708, -0.45821379]])
|
||||
assert_array_almost_equal(results.loadings, a, decimal=8)
|
||||
|
||||
# Same as R GRArotation
|
||||
results.rotate('varimax')
|
||||
a = np.array([[0.98828898, -0.12587155],
|
||||
[0.97424206, -0.15354033],
|
||||
[0.84418097, -0.502714],
|
||||
[0.20601929, -0.55558235]])
|
||||
assert_array_almost_equal(results.loadings, a, decimal=8)
|
||||
|
||||
results.rotate('quartimax') # Same as R fa
|
||||
a = np.array([[0.98935598, 0.98242714, 0.94078972, 0.33442284],
|
||||
[0.117190049, 0.086943252, -0.283332952, -0.489159543]])
|
||||
assert_array_almost_equal(results.loadings, a.T, decimal=8)
|
||||
|
||||
results.rotate('equamax') # Not the same as R fa
|
||||
|
||||
results.rotate('promax') # Not the same as R fa
|
||||
|
||||
results.rotate('biquartimin') # Not the same as R fa
|
||||
|
||||
results.rotate('oblimin') # Same as R fa
|
||||
a = np.array([[1.02834170170, 1.00178840104, 0.71824931384,
|
||||
-0.00013510048],
|
||||
[0.06563421, 0.03096076, -0.39658839, -0.59261944]])
|
||||
assert_array_almost_equal(results.loadings, a.T, decimal=8)
|
||||
|
||||
# Testing result summary string
|
||||
results.rotate('varimax')
|
||||
desired = (
|
||||
""" Factor analysis results
|
||||
=============================
|
||||
Eigenvalues
|
||||
-----------------------------
|
||||
Basal Occ Max id
|
||||
-----------------------------
|
||||
2.9609 0.3209 0.0000 -0.0000
|
||||
-----------------------------
|
||||
|
||||
-----------------------------
|
||||
Communality
|
||||
-----------------------------
|
||||
Basal Occ Max id
|
||||
-----------------------------
|
||||
0.9926 0.9727 0.9654 0.3511
|
||||
-----------------------------
|
||||
|
||||
-----------------------------
|
||||
Pre-rotated loadings
|
||||
-----------------------------------
|
||||
factor 0 factor 1
|
||||
-----------------------------------
|
||||
Basal 0.9754 0.2028
|
||||
Occ 0.9711 0.1721
|
||||
Max 0.9619 -0.2004
|
||||
id 0.3757 -0.4582
|
||||
-----------------------------
|
||||
|
||||
-----------------------------
|
||||
varimax rotated loadings
|
||||
-----------------------------------
|
||||
factor 0 factor 1
|
||||
-----------------------------------
|
||||
Basal 0.9883 -0.1259
|
||||
Occ 0.9742 -0.1535
|
||||
Max 0.8442 -0.5027
|
||||
id 0.2060 -0.5556
|
||||
=============================
|
||||
""")
|
||||
actual = results.summary().as_text()
|
||||
actual = "\n".join(line.rstrip() for line in actual.splitlines()) + "\n"
|
||||
assert_equal(actual, desired)
|
||||
|
||||
|
||||
@pytest.mark.skipif(missing_matplotlib, reason='matplotlib not available')
|
||||
def test_plots(close_figures):
|
||||
mod = Factor(X.iloc[:, 1:], 3)
|
||||
results = mod.fit()
|
||||
results.rotate('oblimin')
|
||||
fig = results.plot_scree()
|
||||
|
||||
fig_loadings = results.plot_loadings()
|
||||
assert_equal(3, len(fig_loadings))
|
||||
|
||||
|
||||
@pytest.mark.smoke
|
||||
def test_getframe_smoke():
|
||||
# mostly smoke tests for now
|
||||
mod = Factor(X.iloc[:, 1:-1], 2, smc=True)
|
||||
res = mod.fit()
|
||||
|
||||
df = res.get_loadings_frame(style='raw')
|
||||
assert_(isinstance(df, pd.DataFrame))
|
||||
|
||||
lds = res.get_loadings_frame(style='strings', decimals=3, threshold=0.3)
|
||||
|
||||
|
||||
# The Styler option require jinja2, skip if not available
|
||||
try:
|
||||
from jinja2 import Template # noqa:F401
|
||||
except ImportError:
|
||||
return
|
||||
# TODO: separate this and do pytest.skip?
|
||||
|
||||
# Old implementation that warns
|
||||
if PD_LT_1_4:
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("always")
|
||||
lds.to_latex()
|
||||
else:
|
||||
# Smoke test using new style to_latex
|
||||
lds.style.to_latex()
|
||||
try:
|
||||
from pandas.io import formats as pd_formats
|
||||
except ImportError:
|
||||
from pandas import formats as pd_formats
|
||||
|
||||
ldf = res.get_loadings_frame(style='display')
|
||||
assert_(isinstance(ldf, pd_formats.style.Styler))
|
||||
assert_(isinstance(ldf.data, pd.DataFrame))
|
||||
|
||||
res.get_loadings_frame(style='display', decimals=3, threshold=0.2)
|
||||
|
||||
res.get_loadings_frame(style='display', decimals=3, color_max='GAINSBORO')
|
||||
|
||||
res.get_loadings_frame(style='display', decimals=3, threshold=0.45, highlight_max=False, sort_=False)
|
||||
|
||||
|
||||
def test_factor_missing():
|
||||
xm = X.iloc[:, 1:-1].copy()
|
||||
nobs, k_endog = xm.shape
|
||||
xm.iloc[2,2] = np.nan
|
||||
mod = Factor(xm, 2)
|
||||
assert_equal(mod.nobs, nobs - 1)
|
||||
assert_equal(mod.k_endog, k_endog)
|
||||
assert_equal(mod.endog.shape, (nobs - 1, k_endog))
|
||||
|
||||
|
||||
def _zscore(x):
|
||||
# helper function
|
||||
return (x - x.mean(0)) / x.std(0)
|
||||
|
||||
|
||||
@pytest.mark.smoke
|
||||
def test_factor_scoring():
|
||||
path = os.path.abspath(__file__)
|
||||
dir_path = os.path.dirname(path)
|
||||
csv_path = os.path.join(dir_path, 'results', 'factor_data.csv')
|
||||
y = pd.read_csv(csv_path)
|
||||
csv_path = os.path.join(dir_path, 'results', 'factors_stata.csv')
|
||||
f_s = pd.read_csv(csv_path)
|
||||
# mostly smoke tests for now
|
||||
mod = Factor(y, 2)
|
||||
res = mod.fit(maxiter=1)
|
||||
res.rotate('varimax')
|
||||
f_reg = res.factor_scoring(method='reg')
|
||||
assert_allclose(f_reg * [1, -1], f_s[["f1", 'f2']].values,
|
||||
atol=1e-4, rtol=1e-3)
|
||||
f_bart = res.factor_scoring()
|
||||
assert_allclose(f_bart * [1, -1], f_s[["f1b", 'f2b']].values,
|
||||
atol=1e-4, rtol=1e-3)
|
||||
|
||||
# check we have high correlation to ols and gls
|
||||
f_ols = res.factor_scoring(method='ols')
|
||||
f_gls = res.factor_scoring(method='gls')
|
||||
f_reg_z = _zscore(f_reg)
|
||||
f_ols_z = _zscore(f_ols)
|
||||
f_gls_z = _zscore(f_gls)
|
||||
assert_array_less(0.98, (f_ols_z * f_reg_z).mean(0))
|
||||
assert_array_less(0.999, (f_gls_z * f_reg_z).mean(0))
|
||||
|
||||
# with oblique rotation
|
||||
res.rotate('oblimin')
|
||||
# Note: Stata has second factor with flipped sign compared to statsmodels
|
||||
assert_allclose(res._corr_factors()[0, 1], (-1) * 0.25651037, rtol=1e-3)
|
||||
f_reg = res.factor_scoring(method='reg')
|
||||
assert_allclose(f_reg * [1, -1], f_s[["f1o", 'f2o']].values,
|
||||
atol=1e-4, rtol=1e-3)
|
||||
f_bart = res.factor_scoring()
|
||||
assert_allclose(f_bart * [1, -1], f_s[["f1ob", 'f2ob']].values,
|
||||
atol=1e-4, rtol=1e-3)
|
||||
|
||||
# check we have high correlation to ols and gls
|
||||
f_ols = res.factor_scoring(method='ols')
|
||||
f_gls = res.factor_scoring(method='gls')
|
||||
f_reg_z = _zscore(f_reg)
|
||||
f_ols_z = _zscore(f_ols)
|
||||
f_gls_z = _zscore(f_gls)
|
||||
assert_array_less(0.97, (f_ols_z * f_reg_z).mean(0))
|
||||
assert_array_less(0.999, (f_gls_z * f_reg_z).mean(0))
|
||||
|
||||
# check provided endog
|
||||
f_ols2 = res.factor_scoring(method='ols', endog=res.model.endog)
|
||||
assert_allclose(f_ols2, f_ols, rtol=1e-13)
|
||||
@ -0,0 +1,197 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from numpy.testing import assert_almost_equal, assert_raises, assert_allclose
|
||||
|
||||
from statsmodels.multivariate.manova import MANOVA
|
||||
from statsmodels.multivariate.multivariate_ols import MultivariateTestResults
|
||||
from statsmodels.tools import add_constant
|
||||
|
||||
# Example data
|
||||
# https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/
|
||||
# viewer.htm#statug_introreg_sect012.htm
|
||||
X = pd.DataFrame([['Minas Graes', 2.068, 2.070, 1.580],
|
||||
['Minas Graes', 2.068, 2.074, 1.602],
|
||||
['Minas Graes', 2.090, 2.090, 1.613],
|
||||
['Minas Graes', 2.097, 2.093, 1.613],
|
||||
['Minas Graes', 2.117, 2.125, 1.663],
|
||||
['Minas Graes', 2.140, 2.146, 1.681],
|
||||
['Matto Grosso', 2.045, 2.054, 1.580],
|
||||
['Matto Grosso', 2.076, 2.088, 1.602],
|
||||
['Matto Grosso', 2.090, 2.093, 1.643],
|
||||
['Matto Grosso', 2.111, 2.114, 1.643],
|
||||
['Santa Cruz', 2.093, 2.098, 1.653],
|
||||
['Santa Cruz', 2.100, 2.106, 1.623],
|
||||
['Santa Cruz', 2.104, 2.101, 1.653]],
|
||||
columns=['Loc', 'Basal', 'Occ', 'Max'])
|
||||
|
||||
|
||||
def test_manova_sas_example():
|
||||
# Results should be the same as figure 4.5 of
|
||||
# https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/
|
||||
# viewer.htm#statug_introreg_sect012.htm
|
||||
mod = MANOVA.from_formula('Basal + Occ + Max ~ Loc', data=X)
|
||||
r = mod.mv_test()
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'Value'],
|
||||
0.60143661, decimal=8)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'Value'],
|
||||
0.44702843, decimal=8)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace", 'Value'],
|
||||
0.58210348, decimal=8)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'Value'],
|
||||
0.35530890, decimal=8)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'F Value'],
|
||||
0.77, decimal=2)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'F Value'],
|
||||
0.86, decimal=2)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace", 'F Value'],
|
||||
0.75, decimal=2)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'F Value'],
|
||||
1.07, decimal=2)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'Num DF'],
|
||||
6, decimal=3)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'Num DF'],
|
||||
6, decimal=3)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace", 'Num DF'],
|
||||
6, decimal=3)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'Num DF'],
|
||||
3, decimal=3)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'Den DF'],
|
||||
16, decimal=3)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'Den DF'],
|
||||
18, decimal=3)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace", 'Den DF'],
|
||||
9.0909, decimal=4)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'Den DF'],
|
||||
9, decimal=3)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'Pr > F'],
|
||||
0.6032, decimal=4)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'Pr > F'],
|
||||
0.5397, decimal=4)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace", 'Pr > F'],
|
||||
0.6272, decimal=4)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'Pr > F'],
|
||||
0.4109, decimal=4)
|
||||
|
||||
|
||||
def test_manova_no_formula():
|
||||
# Same as previous test only skipping formula interface
|
||||
exog = add_constant(pd.get_dummies(X[['Loc']], drop_first=True,
|
||||
dtype=float))
|
||||
endog = X[['Basal', 'Occ', 'Max']]
|
||||
mod = MANOVA(endog, exog)
|
||||
intercept = np.zeros((1, 3))
|
||||
intercept[0, 0] = 1
|
||||
loc = np.zeros((2, 3))
|
||||
loc[0, 1] = loc[1, 2] = 1
|
||||
hypotheses = [('Intercept', intercept), ('Loc', loc)]
|
||||
r = mod.mv_test(hypotheses)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'Value'],
|
||||
0.60143661, decimal=8)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'Value'],
|
||||
0.44702843, decimal=8)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace",
|
||||
'Value'],
|
||||
0.58210348, decimal=8)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'Value'],
|
||||
0.35530890, decimal=8)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'F Value'],
|
||||
0.77, decimal=2)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'F Value'],
|
||||
0.86, decimal=2)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace",
|
||||
'F Value'],
|
||||
0.75, decimal=2)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'F Value'],
|
||||
1.07, decimal=2)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'Num DF'],
|
||||
6, decimal=3)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'Num DF'],
|
||||
6, decimal=3)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace",
|
||||
'Num DF'],
|
||||
6, decimal=3)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'Num DF'],
|
||||
3, decimal=3)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'Den DF'],
|
||||
16, decimal=3)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'Den DF'],
|
||||
18, decimal=3)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace",
|
||||
'Den DF'],
|
||||
9.0909, decimal=4)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'Den DF'],
|
||||
9, decimal=3)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Wilks' lambda", 'Pr > F'],
|
||||
0.6032, decimal=4)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Pillai's trace", 'Pr > F'],
|
||||
0.5397, decimal=4)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Hotelling-Lawley trace",
|
||||
'Pr > F'],
|
||||
0.6272, decimal=4)
|
||||
assert_almost_equal(r['Loc']['stat'].loc["Roy's greatest root", 'Pr > F'],
|
||||
0.4109, decimal=4)
|
||||
|
||||
|
||||
@pytest.mark.smoke
|
||||
def test_manova_no_formula_no_hypothesis():
|
||||
# Same as previous test only skipping formula interface
|
||||
exog = add_constant(pd.get_dummies(X[['Loc']], drop_first=True,
|
||||
dtype=float))
|
||||
endog = X[['Basal', 'Occ', 'Max']]
|
||||
mod = MANOVA(endog, exog)
|
||||
r = mod.mv_test()
|
||||
assert isinstance(r, MultivariateTestResults)
|
||||
|
||||
|
||||
def test_manova_test_input_validation():
|
||||
mod = MANOVA.from_formula('Basal + Occ + Max ~ Loc', data=X)
|
||||
hypothesis = [('test', np.array([[1, 1, 1]]), None)]
|
||||
mod.mv_test(hypothesis)
|
||||
hypothesis = [('test', np.array([[1, 1]]), None)]
|
||||
assert_raises(ValueError, mod.mv_test, hypothesis)
|
||||
"""
|
||||
assert_raises_regex(ValueError,
|
||||
('Contrast matrix L should have the same number of '
|
||||
'columns as exog! 2 != 3'),
|
||||
mod.mv_test, hypothesis)
|
||||
"""
|
||||
hypothesis = [('test', np.array([[1, 1, 1]]), np.array([[1], [1], [1]]))]
|
||||
mod.mv_test(hypothesis)
|
||||
hypothesis = [('test', np.array([[1, 1, 1]]), np.array([[1], [1]]))]
|
||||
assert_raises(ValueError, mod.mv_test, hypothesis)
|
||||
"""
|
||||
assert_raises_regex(ValueError,
|
||||
('Transform matrix M should have the same number of '
|
||||
'rows as the number of columns of endog! 2 != 3'),
|
||||
mod.mv_test, hypothesis)
|
||||
"""
|
||||
|
||||
def test_endog_1D_array():
|
||||
assert_raises(ValueError, MANOVA.from_formula, 'Basal ~ Loc', X)
|
||||
|
||||
|
||||
def test_manova_demeaned():
|
||||
# see last example in #8713
|
||||
# If a term has no effect, all eigenvalues below threshold, then computaion
|
||||
# raised numpy exception with empty arrays.
|
||||
# currently we have an option to skip the intercept test, but don't handle
|
||||
# empty arrays directly
|
||||
ng = 5
|
||||
loc = ["Basal", "Occ", "Max"] * ng
|
||||
y1 = (np.random.randn(ng, 3) + [0, 0.5, 1]).ravel()
|
||||
y2 = (np.random.randn(ng, 3) + [0.25, 0.75, 1]).ravel()
|
||||
y3 = (np.random.randn(ng, 3) + [0.3, 0.6, 1]).ravel()
|
||||
dta = pd.DataFrame(dict(Loc=loc, Basal=y1, Occ=y2, Max=y3))
|
||||
mod = MANOVA.from_formula('Basal + Occ + Max ~ C(Loc, Helmert)', data=dta)
|
||||
res1 = mod.mv_test()
|
||||
|
||||
# subtract sample means to have insignificant intercept
|
||||
means = dta[["Basal", "Occ", "Max"]].mean()
|
||||
dta[["Basal", "Occ", "Max"]] = dta[["Basal", "Occ", "Max"]] - means
|
||||
mod = MANOVA.from_formula('Basal + Occ + Max ~ C(Loc, Helmert)', data=dta)
|
||||
res2 = mod.mv_test(skip_intercept_test=True)
|
||||
|
||||
stat1 = res1.results["C(Loc, Helmert)"]["stat"].to_numpy(float)
|
||||
stat2 = res2.results["C(Loc, Helmert)"]["stat"].to_numpy(float)
|
||||
assert_allclose(stat1, stat2, rtol=1e-10)
|
||||
@ -0,0 +1,206 @@
|
||||
import numpy as np
|
||||
from statsmodels.multivariate.factor import Factor
|
||||
from numpy.testing import assert_allclose, assert_equal
|
||||
from scipy.optimize import approx_fprime
|
||||
import warnings
|
||||
|
||||
# A small model for basic testing
|
||||
def _toy():
|
||||
uniq = np.r_[4, 9, 16]
|
||||
load = np.asarray([[3, 1, 2], [2, 5, 8]]).T
|
||||
par = np.r_[2, 3, 4, 3, 1, 2, 2, 5, 8]
|
||||
corr = np.asarray([[1, .5, .25], [.5, 1, .5], [.25, .5, 1]])
|
||||
return uniq, load, corr, par
|
||||
|
||||
|
||||
def test_loglike():
|
||||
|
||||
uniq, load, corr, par = _toy()
|
||||
fa = Factor(n_factor=2, corr=corr)
|
||||
|
||||
# Two ways of passing the parameters to loglike
|
||||
ll1 = fa.loglike((load, uniq))
|
||||
ll2 = fa.loglike(par)
|
||||
|
||||
assert_allclose(ll1, ll2)
|
||||
|
||||
|
||||
def test_score():
|
||||
|
||||
uniq, load, corr, par = _toy()
|
||||
fa = Factor(n_factor=2, corr=corr)
|
||||
|
||||
def f(par):
|
||||
return fa.loglike(par)
|
||||
|
||||
par2 = np.r_[0.1, 0.2, 0.3, 0.4, 0.3, 0.1, 0.2, -0.2, 0, 0.8, 0.5, 0]
|
||||
|
||||
for pt in (par, par2):
|
||||
g1 = approx_fprime(pt, f, 1e-8)
|
||||
g2 = fa.score(pt)
|
||||
assert_allclose(g1, g2, atol=1e-3)
|
||||
|
||||
|
||||
def test_exact():
|
||||
# Test if we can recover exact factor-structured matrices with
|
||||
# default starting values.
|
||||
|
||||
np.random.seed(23324)
|
||||
|
||||
# Works for larger k_var but slow for routine testing.
|
||||
for k_var in 5, 10, 25:
|
||||
for n_factor in 1, 2, 3:
|
||||
load = np.random.normal(size=(k_var, n_factor))
|
||||
uniq = np.linspace(1, 2, k_var)
|
||||
c = np.dot(load, load.T)
|
||||
c.flat[::c.shape[0]+1] += uniq
|
||||
s = np.sqrt(np.diag(c))
|
||||
c /= np.outer(s, s)
|
||||
fa = Factor(corr=c, n_factor=n_factor, method='ml')
|
||||
rslt = fa.fit()
|
||||
assert_allclose(rslt.fitted_cov, c, rtol=1e-4, atol=1e-4)
|
||||
rslt.summary() # smoke test
|
||||
|
||||
|
||||
def test_exact_em():
|
||||
# Test if we can recover exact factor-structured matrices with
|
||||
# default starting values using the EM algorithm.
|
||||
|
||||
np.random.seed(23324)
|
||||
|
||||
# Works for larger k_var but slow for routine testing.
|
||||
for k_var in 5, 10, 25:
|
||||
for n_factor in 1, 2, 3:
|
||||
load = np.random.normal(size=(k_var, n_factor))
|
||||
uniq = np.linspace(1, 2, k_var)
|
||||
c = np.dot(load, load.T)
|
||||
c.flat[::c.shape[0]+1] += uniq
|
||||
s = np.sqrt(np.diag(c))
|
||||
c /= np.outer(s, s)
|
||||
fa = Factor(corr=c, n_factor=n_factor, method='ml')
|
||||
load_e, uniq_e = fa._fit_ml_em(2000)
|
||||
c_e = np.dot(load_e, load_e.T)
|
||||
c_e.flat[::c_e.shape[0]+1] += uniq_e
|
||||
assert_allclose(c_e, c, rtol=1e-4, atol=1e-4)
|
||||
|
||||
|
||||
def test_fit_ml_em_random_state():
|
||||
# Ensure Factor._fit_ml_em doesn't change numpy's singleton random state
|
||||
# see #7357
|
||||
|
||||
T = 10
|
||||
epsilon = np.random.multivariate_normal(np.zeros(3), np.eye(3), size=T).T
|
||||
initial = np.random.get_state()
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings("ignore", message='Fitting did not converge')
|
||||
Factor(endog=epsilon, n_factor=2, method='ml').fit()
|
||||
final = np.random.get_state()
|
||||
|
||||
assert initial[0] == final[0]
|
||||
assert_equal(initial[1], final[1])
|
||||
assert initial[2:] == final[2:]
|
||||
|
||||
|
||||
def test_em():
|
||||
|
||||
n_factor = 1
|
||||
cor = np.asarray([[1, 0.5, 0.3], [0.5, 1, 0], [0.3, 0, 1]])
|
||||
|
||||
fa = Factor(corr=cor, n_factor=n_factor, method='ml')
|
||||
rslt = fa.fit(opt={'gtol': 1e-3})
|
||||
load_opt = rslt.loadings
|
||||
uniq_opt = rslt.uniqueness
|
||||
|
||||
load_em, uniq_em = fa._fit_ml_em(1000)
|
||||
cc = np.dot(load_em, load_em.T)
|
||||
cc.flat[::cc.shape[0]+1] += uniq_em
|
||||
|
||||
assert_allclose(cc, rslt.fitted_cov, rtol=1e-2, atol=1e-2)
|
||||
|
||||
|
||||
def test_1factor():
|
||||
"""
|
||||
# R code:
|
||||
r = 0.4
|
||||
p = 4
|
||||
ii = seq(0, p-1)
|
||||
ii = outer(ii, ii, "-")
|
||||
ii = abs(ii)
|
||||
cm = r^ii
|
||||
fa = factanal(covmat=cm, factors=1)
|
||||
print(fa, digits=10)
|
||||
"""
|
||||
|
||||
r = 0.4
|
||||
p = 4
|
||||
ii = np.arange(p)
|
||||
cm = r ** np.abs(np.subtract.outer(ii, ii))
|
||||
|
||||
fa = Factor(corr=cm, n_factor=1, method='ml')
|
||||
rslt = fa.fit()
|
||||
|
||||
if rslt.loadings[0, 0] < 0:
|
||||
rslt.loadings[:, 0] *= -1
|
||||
|
||||
# R solution, but our likelihood is higher
|
||||
# uniq = np.r_[0.8392472054, 0.5820958187, 0.5820958187, 0.8392472054]
|
||||
# load = np.asarray([[0.4009399224, 0.6464550935, 0.6464550935,
|
||||
# 0.4009399224]]).T
|
||||
# l1 = fa.loglike(fa._pack(load, uniq))
|
||||
# l2 = fa.loglike(fa._pack(rslt.loadings, rslt.uniqueness))
|
||||
|
||||
# So use a smoke test
|
||||
uniq = np.r_[0.85290232, 0.60916033, 0.55382266, 0.82610666]
|
||||
load = np.asarray([[0.38353316], [0.62517171], [0.66796508],
|
||||
[0.4170052]])
|
||||
|
||||
assert_allclose(load, rslt.loadings, rtol=1e-3, atol=1e-3)
|
||||
assert_allclose(uniq, rslt.uniqueness, rtol=1e-3, atol=1e-3)
|
||||
|
||||
assert_equal(rslt.df, 2)
|
||||
|
||||
|
||||
def test_2factor():
|
||||
"""
|
||||
# R code:
|
||||
r = 0.4
|
||||
p = 6
|
||||
ii = seq(0, p-1)
|
||||
ii = outer(ii, ii, "-")
|
||||
ii = abs(ii)
|
||||
cm = r^ii
|
||||
factanal(covmat=cm, factors=2)
|
||||
"""
|
||||
|
||||
r = 0.4
|
||||
p = 6
|
||||
ii = np.arange(p)
|
||||
cm = r ** np.abs(np.subtract.outer(ii, ii))
|
||||
|
||||
fa = Factor(corr=cm, n_factor=2, nobs=100, method='ml')
|
||||
rslt = fa.fit()
|
||||
|
||||
for j in 0, 1:
|
||||
if rslt.loadings[0, j] < 0:
|
||||
rslt.loadings[:, j] *= -1
|
||||
|
||||
uniq = np.r_[0.782, 0.367, 0.696, 0.696, 0.367, 0.782]
|
||||
assert_allclose(uniq, rslt.uniqueness, rtol=1e-3, atol=1e-3)
|
||||
|
||||
loads = [np.r_[0.323, 0.586, 0.519, 0.519, 0.586, 0.323],
|
||||
np.r_[0.337, 0.538, 0.187, -0.187, -0.538, -0.337]]
|
||||
for k in 0, 1:
|
||||
if np.dot(loads[k], rslt.loadings[:, k]) < 0:
|
||||
loads[k] *= -1
|
||||
assert_allclose(loads[k], rslt.loadings[:, k], rtol=1e-3, atol=1e-3)
|
||||
|
||||
assert_equal(rslt.df, 4)
|
||||
|
||||
# Smoke test for standard errors
|
||||
e = np.asarray([0.11056836, 0.05191071, 0.09836349,
|
||||
0.09836349, 0.05191071, 0.11056836])
|
||||
assert_allclose(rslt.uniq_stderr, e, atol=1e-4)
|
||||
e = np.asarray([[0.08842151, 0.08842151], [0.06058582, 0.06058582],
|
||||
[0.08339874, 0.08339874], [0.08339874, 0.08339874],
|
||||
[0.06058582, 0.06058582], [0.08842151, 0.08842151]])
|
||||
assert_allclose(rslt.load_stderr, e, atol=1e-4)
|
||||
@ -0,0 +1,199 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from statsmodels.multivariate.multivariate_ols import _MultivariateOLS
|
||||
from numpy.testing import assert_array_almost_equal, assert_raises
|
||||
import patsy
|
||||
|
||||
data = pd.DataFrame([['Morphine', 'N', .04, .20, .10, .08],
|
||||
['Morphine', 'N', .02, .06, .02, .02],
|
||||
['Morphine', 'N', .07, 1.40, .48, .24],
|
||||
['Morphine', 'N', .17, .57, .35, .24],
|
||||
['Morphine', 'Y', .10, .09, .13, .14],
|
||||
['placebo', 'Y', .07, .07, .06, .07],
|
||||
['placebo', 'Y', .05, .07, .06, .07],
|
||||
['placebo', 'N', .03, .62, .31, .22],
|
||||
['placebo', 'N', .03, 1.05, .73, .60],
|
||||
['placebo', 'N', .07, .83, 1.07, .80],
|
||||
['Trimethaphan', 'N', .09, 3.13, 2.06, 1.23],
|
||||
['Trimethaphan', 'Y', .10, .09, .09, .08],
|
||||
['Trimethaphan', 'Y', .08, .09, .09, .10],
|
||||
['Trimethaphan', 'Y', .13, .10, .12, .12],
|
||||
['Trimethaphan', 'Y', .06, .05, .05, .05]],
|
||||
columns=['Drug', 'Depleted',
|
||||
'Histamine0', 'Histamine1',
|
||||
'Histamine3', 'Histamine5'])
|
||||
|
||||
for i in range(2, 6):
|
||||
data.iloc[:, i] = np.log(data.iloc[:, i])
|
||||
|
||||
|
||||
def compare_r_output_dogs_data(method):
|
||||
''' Testing within-subject effect interact with 2 between-subject effect
|
||||
Compares with R car library Anova(, type=3) output
|
||||
|
||||
Note: The test statistis Phillai, Wilks, Hotelling-Lawley
|
||||
and Roy are the same as R output but the approximate F and degree
|
||||
of freedoms can be different. This is due to the fact that this
|
||||
implementation is based on SAS formula [1]
|
||||
|
||||
.. [*] https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introreg_sect012.htm
|
||||
'''
|
||||
|
||||
|
||||
# Repeated measures with orthogonal polynomial contrasts coding
|
||||
mod = _MultivariateOLS.from_formula(
|
||||
'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted',
|
||||
data)
|
||||
r = mod.fit(method=method)
|
||||
r = r.mv_test()
|
||||
a = [[2.68607660e-02, 4, 6, 5.43435304e+01, 7.59585610e-05],
|
||||
[9.73139234e-01, 4, 6, 5.43435304e+01, 7.59585610e-05],
|
||||
[3.62290202e+01, 4, 6, 5.43435304e+01, 7.59585610e-05],
|
||||
[3.62290202e+01, 4, 6, 5.43435304e+01, 7.59585610e-05]]
|
||||
assert_array_almost_equal(r['Intercept']['stat'].values, a, decimal=6)
|
||||
a = [[8.39646619e-02, 8, 1.20000000e+01, 3.67658068e+00, 2.12614444e-02],
|
||||
[1.18605382e+00, 8, 1.40000000e+01, 2.55003861e+00, 6.01270701e-02],
|
||||
[7.69391362e+00, 8, 6.63157895e+00, 5.50814270e+00, 2.07392260e-02],
|
||||
[7.25036952e+00, 4, 7.00000000e+00, 1.26881467e+01, 2.52669877e-03]]
|
||||
assert_array_almost_equal(r['Drug']['stat'].values, a, decimal=6)
|
||||
a = [[0.32048892, 4., 6., 3.18034906, 0.10002373],
|
||||
[0.67951108, 4., 6., 3.18034906, 0.10002373],
|
||||
[2.12023271, 4., 6., 3.18034906, 0.10002373],
|
||||
[2.12023271, 4., 6., 3.18034906, 0.10002373]]
|
||||
assert_array_almost_equal(r['Depleted']['stat'].values, a, decimal=6)
|
||||
a = [[0.15234366, 8., 12., 2.34307678, 0.08894239],
|
||||
[1.13013353, 8., 14., 2.27360606, 0.08553213],
|
||||
[3.70989596, 8., 6.63157895, 2.65594824, 0.11370285],
|
||||
[3.1145597, 4., 7., 5.45047947, 0.02582767]]
|
||||
assert_array_almost_equal(r['Drug:Depleted']['stat'].values, a, decimal=6)
|
||||
|
||||
|
||||
def test_glm_dogs_example():
|
||||
compare_r_output_dogs_data(method='svd')
|
||||
compare_r_output_dogs_data(method='pinv')
|
||||
|
||||
|
||||
def test_specify_L_M_by_string():
|
||||
mod = _MultivariateOLS.from_formula(
|
||||
'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted',
|
||||
data)
|
||||
r = mod.fit()
|
||||
r1 = r.mv_test(hypotheses=[['Intercept', ['Intercept'], None]])
|
||||
a = [[2.68607660e-02, 4, 6, 5.43435304e+01, 7.59585610e-05],
|
||||
[9.73139234e-01, 4, 6, 5.43435304e+01, 7.59585610e-05],
|
||||
[3.62290202e+01, 4, 6, 5.43435304e+01, 7.59585610e-05],
|
||||
[3.62290202e+01, 4, 6, 5.43435304e+01, 7.59585610e-05]]
|
||||
assert_array_almost_equal(r1['Intercept']['stat'].values, a, decimal=6)
|
||||
L = ['Intercept', 'Drug[T.Trimethaphan]', 'Drug[T.placebo]']
|
||||
M = ['Histamine1', 'Histamine3', 'Histamine5']
|
||||
r1 = r.mv_test(hypotheses=[['a', L, M]])
|
||||
a = [[1, 0, 0, 0, 0, 0],
|
||||
[0, 1, 0, 0, 0, 0],
|
||||
[0, 0, 1, 0, 0, 0]]
|
||||
assert_array_almost_equal(r1['a']['contrast_L'], a, decimal=10)
|
||||
a = [[0, 1, 0, 0],
|
||||
[0, 0, 1, 0],
|
||||
[0, 0, 0, 1]]
|
||||
assert_array_almost_equal(r1['a']['transform_M'].T, a, decimal=10)
|
||||
|
||||
|
||||
def test_independent_variable_singular():
|
||||
data1 = data.copy()
|
||||
data1['dup'] = data1['Drug']
|
||||
mod = _MultivariateOLS.from_formula(
|
||||
'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * dup',
|
||||
data1)
|
||||
assert_raises(ValueError, mod.fit)
|
||||
mod = _MultivariateOLS.from_formula(
|
||||
'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * dup',
|
||||
data1)
|
||||
assert_raises(ValueError, mod.fit)
|
||||
|
||||
|
||||
def test_from_formula_vs_no_formula():
|
||||
mod = _MultivariateOLS.from_formula(
|
||||
'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted',
|
||||
data)
|
||||
r = mod.fit(method='svd')
|
||||
r0 = r.mv_test()
|
||||
endog, exog = patsy.dmatrices(
|
||||
'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted',
|
||||
data, return_type="dataframe")
|
||||
L = np.array([[1, 0, 0, 0, 0, 0]])
|
||||
# DataFrame input
|
||||
r = _MultivariateOLS(endog, exog).fit(method='svd')
|
||||
r1 = r.mv_test(hypotheses=[['Intercept', L, None]])
|
||||
assert_array_almost_equal(r1['Intercept']['stat'].values,
|
||||
r0['Intercept']['stat'].values, decimal=6)
|
||||
# Numpy array input
|
||||
r = _MultivariateOLS(endog.values, exog.values).fit(method='svd')
|
||||
r1 = r.mv_test(hypotheses=[['Intercept', L, None]])
|
||||
assert_array_almost_equal(r1['Intercept']['stat'].values,
|
||||
r0['Intercept']['stat'].values, decimal=6)
|
||||
L = np.array([[0, 1, 0, 0, 0, 0],
|
||||
[0, 0, 1, 0, 0, 0],
|
||||
])
|
||||
r1 = r.mv_test(hypotheses=[['Drug', L, None]])
|
||||
# DataFrame input
|
||||
r = _MultivariateOLS(endog, exog).fit(method='svd')
|
||||
r1 = r.mv_test(hypotheses=[['Drug', L, None]])
|
||||
assert_array_almost_equal(r1['Drug']['stat'].values,
|
||||
r0['Drug']['stat'].values, decimal=6)
|
||||
# Numpy array input
|
||||
r = _MultivariateOLS(endog.values, exog.values).fit(method='svd')
|
||||
r1 = r.mv_test(hypotheses=[['Drug', L, None]])
|
||||
assert_array_almost_equal(r1['Drug']['stat'].values,
|
||||
r0['Drug']['stat'].values, decimal=6)
|
||||
|
||||
def test_L_M_matrices_1D_array():
|
||||
mod = _MultivariateOLS.from_formula(
|
||||
'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted',
|
||||
data)
|
||||
r = mod.fit(method='svd')
|
||||
L = np.array([1, 0, 0, 0, 0, 0])
|
||||
assert_raises(ValueError, r.mv_test, hypotheses=[['Drug', L, None]])
|
||||
L = np.array([[1, 0, 0, 0, 0, 0]])
|
||||
M = np.array([1, 0, 0, 0, 0, 0])
|
||||
assert_raises(ValueError, r.mv_test, hypotheses=[['Drug', L, M]])
|
||||
|
||||
|
||||
def test_exog_1D_array():
|
||||
mod = _MultivariateOLS.from_formula(
|
||||
'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ 0 + Depleted',
|
||||
data)
|
||||
r = mod.fit(method='svd')
|
||||
r0 = r.mv_test()
|
||||
a = [[0.0019, 8.0000, 20.0000, 55.0013, 0.0000],
|
||||
[1.8112, 8.0000, 22.0000, 26.3796, 0.0000],
|
||||
[97.8858, 8.0000, 12.1818, 117.1133, 0.0000],
|
||||
[93.2742, 4.0000, 11.0000, 256.5041, 0.0000]]
|
||||
assert_array_almost_equal(r0['Depleted']['stat'].values, a, decimal=4)
|
||||
|
||||
|
||||
def test_endog_1D_array():
|
||||
assert_raises(ValueError, _MultivariateOLS.from_formula,
|
||||
'Histamine0 ~ 0 + Depleted', data)
|
||||
|
||||
def test_affine_hypothesis():
|
||||
# Testing affine hypothesis, compared with R car linearHypothesis
|
||||
# Note: The test statistis Phillai, Wilks, Hotelling-Lawley
|
||||
# and Roy are the same as R output but the approximate F and degree
|
||||
# of freedoms can be different. This is due to the fact that this
|
||||
# implementation is based on SAS formula [1]
|
||||
mod = _MultivariateOLS.from_formula(
|
||||
'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted',
|
||||
data)
|
||||
r = mod.fit(method='svd')
|
||||
L = np.array([[0, 1.2, 1.1, 1.3, 1.5, 1.4],
|
||||
[0, 3.2, 2.1, 3.3, 5.5, 4.4]])
|
||||
M = None
|
||||
C = np.array([[1, 2, 3, 4],
|
||||
[5, 6, 7, 8]])
|
||||
r0 = r.mv_test(hypotheses=[('test1', L, M, C)])
|
||||
a = [[0.0269, 8.0000, 12.0000, 7.6441, 0.0010],
|
||||
[1.4277, 8.0000, 14.0000, 4.3657, 0.0080],
|
||||
[19.2678, 8.0000, 6.6316, 13.7940, 0.0016],
|
||||
[18.3470, 4.0000, 7.0000, 32.1072, 0.0001]]
|
||||
assert_array_almost_equal(r0['test1']['stat'].values, a, decimal=4)
|
||||
r0.summary(show_contrast_L=True, show_transform_M=True,
|
||||
show_constant_C=True)
|
||||
@ -0,0 +1,443 @@
|
||||
from statsmodels.compat.platform import PLATFORM_WIN32
|
||||
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from numpy.testing import assert_allclose, assert_equal, assert_raises
|
||||
|
||||
from statsmodels.multivariate.pca import PCA, pca
|
||||
from statsmodels.multivariate.tests.results.datamlw import (data, princomp1,
|
||||
princomp2)
|
||||
from statsmodels.tools.sm_exceptions import EstimationWarning
|
||||
|
||||
DECIMAL_5 = .00001
|
||||
|
||||
|
||||
class TestPCA:
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
rs = np.random.RandomState()
|
||||
rs.seed(1234)
|
||||
k = 3
|
||||
n = 100
|
||||
t = 200
|
||||
lam = 2
|
||||
|
||||
norm_rng = rs.standard_normal
|
||||
e = norm_rng((t, n))
|
||||
f = norm_rng((t, k))
|
||||
b = rs.standard_gamma(lam, size=(k, n)) / lam
|
||||
cls.x = f.dot(b) + e
|
||||
cls.x_copy = cls.x + 0.0
|
||||
cls.rs = rs
|
||||
|
||||
k = 3
|
||||
n = 300
|
||||
t = 200
|
||||
lam = 2
|
||||
|
||||
norm_rng = rs.standard_normal
|
||||
e = norm_rng((t, n))
|
||||
f = norm_rng((t, k))
|
||||
b = rs.standard_gamma(lam, size=(k, n)) / lam
|
||||
cls.x_wide = f.dot(b) + e
|
||||
|
||||
@pytest.mark.smoke
|
||||
@pytest.mark.matplotlib
|
||||
def test_smoke_plot_and_repr(self, close_figures):
|
||||
pc = PCA(self.x)
|
||||
fig = pc.plot_scree()
|
||||
fig = pc.plot_scree(ncomp=10)
|
||||
fig = pc.plot_scree(log_scale=False)
|
||||
fig = pc.plot_scree(cumulative=True)
|
||||
fig = pc.plot_rsquare()
|
||||
fig = pc.plot_rsquare(ncomp=5)
|
||||
# Additional smoke test
|
||||
pc.__repr__()
|
||||
pc = PCA(self.x, standardize=False)
|
||||
pc.__repr__()
|
||||
pc = PCA(self.x, standardize=False, demean=False)
|
||||
pc.__repr__()
|
||||
pc = PCA(self.x, ncomp=2, gls=True)
|
||||
assert "GLS" in pc.__repr__()
|
||||
# Check data for no changes
|
||||
assert_equal(self.x, pc.data)
|
||||
|
||||
def test_eig_svd_equiv(self):
|
||||
# Test leading components since the tail end can differ
|
||||
pc_eig = PCA(self.x)
|
||||
pc_svd = PCA(self.x, method='svd')
|
||||
|
||||
assert_allclose(pc_eig.projection, pc_svd.projection)
|
||||
assert_allclose(np.abs(pc_eig.factors[:, :2]),
|
||||
np.abs(pc_svd.factors[:, :2]))
|
||||
assert_allclose(np.abs(pc_eig.coeff[:2, :]),
|
||||
np.abs(pc_svd.coeff[:2, :]))
|
||||
assert_allclose(pc_eig.eigenvals,
|
||||
pc_svd.eigenvals)
|
||||
assert_allclose(np.abs(pc_eig.eigenvecs[:, :2]),
|
||||
np.abs(pc_svd.eigenvecs[:, :2]))
|
||||
|
||||
pc_svd = PCA(self.x, method='svd', ncomp=2)
|
||||
pc_nipals = PCA(self.x, method='nipals', ncomp=2)
|
||||
assert_allclose(np.abs(pc_nipals.factors),
|
||||
np.abs(pc_svd.factors),
|
||||
atol=DECIMAL_5)
|
||||
assert_allclose(np.abs(pc_nipals.coeff),
|
||||
np.abs(pc_svd.coeff),
|
||||
atol=DECIMAL_5)
|
||||
assert_allclose(pc_nipals.eigenvals,
|
||||
pc_svd.eigenvals,
|
||||
atol=DECIMAL_5)
|
||||
assert_allclose(np.abs(pc_nipals.eigenvecs),
|
||||
np.abs(pc_svd.eigenvecs),
|
||||
atol=DECIMAL_5)
|
||||
# Check data for no changes
|
||||
assert_equal(self.x, pc_svd.data)
|
||||
# Check data for no changes
|
||||
assert_equal(self.x, pc_eig.data)
|
||||
# Check data for no changes
|
||||
assert_equal(self.x, pc_nipals.data)
|
||||
|
||||
def test_options(self):
|
||||
pc = PCA(self.x)
|
||||
pc_no_norm = PCA(self.x, normalize=False)
|
||||
assert_allclose(pc.factors.dot(pc.coeff),
|
||||
pc_no_norm.factors.dot(pc_no_norm.coeff))
|
||||
princomp = pc.factors
|
||||
assert_allclose(princomp.T.dot(princomp), np.eye(100), atol=1e-5)
|
||||
weights = pc_no_norm.coeff
|
||||
assert_allclose(weights.T.dot(weights), np.eye(100), atol=1e-5)
|
||||
|
||||
pc_10 = PCA(self.x, ncomp=10)
|
||||
assert_allclose(pc.factors[:, :10], pc_10.factors)
|
||||
assert_allclose(pc.coeff[:10, :], pc_10.coeff)
|
||||
assert_allclose(pc.rsquare[:(10 + 1)], pc_10.rsquare)
|
||||
assert_allclose(pc.eigenvals[:10], pc_10.eigenvals)
|
||||
assert_allclose(pc.eigenvecs[:, :10], pc_10.eigenvecs)
|
||||
|
||||
pc = PCA(self.x, standardize=False, normalize=False)
|
||||
mu = self.x.mean(0)
|
||||
xdm = self.x - mu
|
||||
xpx = xdm.T.dot(xdm)
|
||||
val, vec = np.linalg.eigh(xpx)
|
||||
ind = np.argsort(val)
|
||||
ind = ind[::-1]
|
||||
val = val[ind]
|
||||
vec = vec[:, ind]
|
||||
assert_allclose(xdm, pc.transformed_data)
|
||||
assert_allclose(val, pc.eigenvals)
|
||||
assert_allclose(np.abs(vec), np.abs(pc.eigenvecs))
|
||||
assert_allclose(np.abs(pc.factors), np.abs(xdm.dot(vec)))
|
||||
assert_allclose(pc.projection, xdm + mu)
|
||||
|
||||
pc = PCA(self.x, standardize=False, demean=False, normalize=False)
|
||||
x = self.x
|
||||
xpx = x.T.dot(x)
|
||||
val, vec = np.linalg.eigh(xpx)
|
||||
ind = np.argsort(val)
|
||||
ind = ind[::-1]
|
||||
val = val[ind]
|
||||
vec = vec[:, ind]
|
||||
assert_allclose(x, pc.transformed_data)
|
||||
assert_allclose(val, pc.eigenvals)
|
||||
assert_allclose(np.abs(vec), np.abs(pc.eigenvecs))
|
||||
assert_allclose(np.abs(pc.factors), np.abs(x.dot(vec)))
|
||||
|
||||
def test_against_reference(self):
|
||||
# Test against MATLAB, which by default demeans but does not standardize
|
||||
x = data.xo / 1000.0
|
||||
pc = PCA(x, normalize=False, standardize=False)
|
||||
|
||||
ref = princomp1
|
||||
assert_allclose(np.abs(pc.factors), np.abs(ref.factors))
|
||||
assert_allclose(pc.factors.dot(pc.coeff) + x.mean(0), x)
|
||||
assert_allclose(np.abs(pc.coeff), np.abs(ref.coef.T))
|
||||
assert_allclose(pc.factors.dot(pc.coeff),
|
||||
ref.factors.dot(ref.coef.T))
|
||||
|
||||
pc = PCA(x[:20], normalize=False, standardize=False)
|
||||
mu = x[:20].mean(0)
|
||||
ref = princomp2
|
||||
assert_allclose(np.abs(pc.factors), np.abs(ref.factors))
|
||||
assert_allclose(pc.factors.dot(pc.coeff) + mu, x[:20])
|
||||
assert_allclose(np.abs(pc.coeff), np.abs(ref.coef.T))
|
||||
assert_allclose(pc.factors.dot(pc.coeff),
|
||||
ref.factors.dot(ref.coef.T))
|
||||
|
||||
def test_warnings_and_errors(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
pc = PCA(self.x, ncomp=300)
|
||||
assert_equal(len(w), 1)
|
||||
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
rs = self.rs
|
||||
x = rs.standard_normal((200, 1)) * np.ones(200)
|
||||
pc = PCA(x, method='eig')
|
||||
assert_equal(len(w), 1)
|
||||
|
||||
assert_raises(ValueError, PCA, self.x, method='unknown')
|
||||
assert_raises(ValueError, PCA, self.x, missing='unknown')
|
||||
assert_raises(ValueError, PCA, self.x, tol=2.0)
|
||||
assert_raises(ValueError, PCA, np.nan * np.ones((200, 100)), tol=2.0)
|
||||
|
||||
@pytest.mark.matplotlib
|
||||
def test_pandas(self, close_figures):
|
||||
pc = PCA(pd.DataFrame(self.x))
|
||||
pc1 = PCA(self.x)
|
||||
assert_allclose(pc.factors.values, pc1.factors)
|
||||
fig = pc.plot_scree()
|
||||
fig = pc.plot_scree(ncomp=10)
|
||||
fig = pc.plot_scree(log_scale=False)
|
||||
fig = pc.plot_rsquare()
|
||||
fig = pc.plot_rsquare(ncomp=5)
|
||||
proj = pc.project(2)
|
||||
PCA(pd.DataFrame(self.x), ncomp=4, gls=True)
|
||||
PCA(pd.DataFrame(self.x), ncomp=4, standardize=False)
|
||||
|
||||
def test_gls_and_weights(self):
|
||||
assert_raises(ValueError, PCA, self.x, gls=True)
|
||||
assert_raises(ValueError, PCA, self.x, weights=np.array([1.0, 1.0]))
|
||||
|
||||
# Pre-standardize to make comparison simple
|
||||
x = (self.x - self.x.mean(0))
|
||||
x = x / (x ** 2.0).mean(0)
|
||||
pc_gls = PCA(x, ncomp=1, standardize=False, demean=False, gls=True)
|
||||
pc = PCA(x, ncomp=1, standardize=False, demean=False)
|
||||
errors = x - pc.projection
|
||||
var = (errors ** 2.0).mean(0)
|
||||
weights = 1.0 / var
|
||||
weights = weights / np.sqrt((weights ** 2.0).mean())
|
||||
|
||||
assert_allclose(weights, pc_gls.weights)
|
||||
assert_equal(x, pc_gls.data)
|
||||
assert_equal(x, pc.data)
|
||||
|
||||
pc_weights = PCA(x, ncomp=1, standardize=False, demean=False, weights=weights)
|
||||
|
||||
assert_allclose(weights, pc_weights.weights)
|
||||
assert_allclose(np.abs(pc_weights.factors), np.abs(pc_gls.factors))
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_wide(self):
|
||||
pc = PCA(self.x_wide)
|
||||
assert_equal(pc.factors.shape[1], self.x_wide.shape[0])
|
||||
assert_equal(pc.eigenvecs.shape[1], min(np.array(self.x_wide.shape)))
|
||||
|
||||
pc = PCA(pd.DataFrame(self.x_wide))
|
||||
assert_equal(pc.factors.shape[1], self.x_wide.shape[0])
|
||||
assert_equal(pc.eigenvecs.shape[1], min(np.array(self.x_wide.shape)))
|
||||
|
||||
def test_projection(self):
|
||||
pc = PCA(self.x, ncomp=5)
|
||||
mu = self.x.mean(0)
|
||||
demean_x = self.x - mu
|
||||
coef = np.linalg.pinv(pc.factors).dot(demean_x)
|
||||
direct = pc.factors.dot(coef)
|
||||
assert_allclose(pc.projection, direct + mu)
|
||||
|
||||
pc = PCA(self.x, standardize=False, ncomp=5)
|
||||
coef = np.linalg.pinv(pc.factors).dot(demean_x)
|
||||
direct = pc.factors.dot(coef)
|
||||
assert_allclose(pc.projection, direct + mu)
|
||||
|
||||
pc = PCA(self.x, standardize=False, demean=False, ncomp=5)
|
||||
coef = np.linalg.pinv(pc.factors).dot(self.x)
|
||||
direct = pc.factors.dot(coef)
|
||||
assert_allclose(pc.projection, direct)
|
||||
|
||||
pc = PCA(self.x, ncomp=5, gls=True)
|
||||
mu = self.x.mean(0)
|
||||
demean_x = self.x - mu
|
||||
coef = np.linalg.pinv(pc.factors).dot(demean_x)
|
||||
direct = pc.factors.dot(coef)
|
||||
assert_allclose(pc.projection, direct + mu)
|
||||
|
||||
pc = PCA(self.x, standardize=False, ncomp=5)
|
||||
coef = np.linalg.pinv(pc.factors).dot(demean_x)
|
||||
direct = pc.factors.dot(coef)
|
||||
assert_allclose(pc.projection, direct + mu)
|
||||
|
||||
pc = PCA(self.x, standardize=False, demean=False, ncomp=5, gls=True)
|
||||
coef = np.linalg.pinv(pc.factors).dot(self.x)
|
||||
direct = pc.factors.dot(coef)
|
||||
assert_allclose(pc.projection, direct)
|
||||
|
||||
# Test error for too many factors
|
||||
project = pc.project
|
||||
assert_raises(ValueError, project, 6)
|
||||
|
||||
@pytest.mark.skipif(PLATFORM_WIN32, reason='Windows 32-bit')
|
||||
def test_replace_missing(self):
|
||||
x = self.x.copy()
|
||||
x[::5, ::7] = np.nan
|
||||
|
||||
pc = PCA(x, missing='drop-row')
|
||||
x_dropped_row = x[np.logical_not(np.any(np.isnan(x), 1))]
|
||||
pc_dropped = PCA(x_dropped_row)
|
||||
assert_allclose(pc.projection, pc_dropped.projection)
|
||||
assert_equal(x, pc.data)
|
||||
|
||||
pc = PCA(x, missing='drop-col')
|
||||
x_dropped_col = x[:, np.logical_not(np.any(np.isnan(x), 0))]
|
||||
pc_dropped = PCA(x_dropped_col)
|
||||
assert_allclose(pc.projection, pc_dropped.projection)
|
||||
assert_equal(x, pc.data)
|
||||
|
||||
pc = PCA(x, missing='drop-min')
|
||||
if x_dropped_row.size > x_dropped_col.size:
|
||||
x_dropped_min = x_dropped_row
|
||||
else:
|
||||
x_dropped_min = x_dropped_col
|
||||
pc_dropped = PCA(x_dropped_min)
|
||||
assert_allclose(pc.projection, pc_dropped.projection)
|
||||
assert_equal(x, pc.data)
|
||||
|
||||
pc = PCA(x, ncomp=3, missing='fill-em')
|
||||
missing = np.isnan(x)
|
||||
mu = np.nanmean(x, axis=0)
|
||||
errors = x - mu
|
||||
sigma = np.sqrt(np.nanmean(errors ** 2, axis=0))
|
||||
x_std = errors / sigma
|
||||
x_std[missing] = 0.0
|
||||
last = x_std[missing]
|
||||
delta = 1.0
|
||||
count = 0
|
||||
while delta > 5e-8:
|
||||
pc_temp = PCA(x_std, ncomp=3, standardize=False, demean=False)
|
||||
x_std[missing] = pc_temp.projection[missing]
|
||||
current = x_std[missing]
|
||||
diff = current - last
|
||||
delta = np.sqrt(np.sum(diff ** 2)) / np.sqrt(np.sum(current ** 2))
|
||||
last = current
|
||||
count += 1
|
||||
x = self.x + 0.0
|
||||
projection = pc_temp.projection * sigma + mu
|
||||
x[missing] = projection[missing]
|
||||
assert_allclose(pc._adjusted_data, x)
|
||||
# Check data for no changes
|
||||
assert_equal(self.x, self.x_copy)
|
||||
|
||||
x = self.x
|
||||
pc = PCA(x)
|
||||
pc_dropped = PCA(x, missing='drop-row')
|
||||
assert_allclose(pc.projection, pc_dropped.projection, atol=DECIMAL_5)
|
||||
|
||||
pc_dropped = PCA(x, missing='drop-col')
|
||||
assert_allclose(pc.projection, pc_dropped.projection, atol=DECIMAL_5)
|
||||
|
||||
pc_dropped = PCA(x, missing='drop-min')
|
||||
assert_allclose(pc.projection, pc_dropped.projection, atol=DECIMAL_5)
|
||||
|
||||
pc = PCA(x, ncomp=3)
|
||||
pc_dropped = PCA(x, ncomp=3, missing='fill-em')
|
||||
assert_allclose(pc.projection, pc_dropped.projection, atol=DECIMAL_5)
|
||||
|
||||
# Test too many missing for missing='fill-em'
|
||||
x = self.x.copy()
|
||||
x[:, :] = np.nan
|
||||
assert_raises(ValueError, PCA, x, missing='drop-row')
|
||||
assert_raises(ValueError, PCA, x, missing='drop-col')
|
||||
assert_raises(ValueError, PCA, x, missing='drop-min')
|
||||
assert_raises(ValueError, PCA, x, missing='fill-em')
|
||||
|
||||
def test_rsquare(self):
|
||||
x = self.x + 0.0
|
||||
mu = x.mean(0)
|
||||
x_demean = x - mu
|
||||
std = np.std(x, 0)
|
||||
x_std = x_demean / std
|
||||
|
||||
pc = PCA(self.x)
|
||||
nvar = x.shape[1]
|
||||
rsquare = np.zeros(nvar + 1)
|
||||
tss = np.sum(x_std ** 2)
|
||||
for i in range(nvar + 1):
|
||||
errors = x_std - pc.project(i, transform=False, unweight=False)
|
||||
rsquare[i] = 1.0 - np.sum(errors ** 2) / tss
|
||||
assert_allclose(rsquare, pc.rsquare)
|
||||
|
||||
pc = PCA(self.x, standardize=False)
|
||||
tss = np.sum(x_demean ** 2)
|
||||
for i in range(nvar + 1):
|
||||
errors = x_demean - pc.project(i, transform=False, unweight=False)
|
||||
rsquare[i] = 1.0 - np.sum(errors ** 2) / tss
|
||||
assert_allclose(rsquare, pc.rsquare)
|
||||
|
||||
pc = PCA(self.x, standardize=False, demean=False)
|
||||
tss = np.sum(x ** 2)
|
||||
for i in range(nvar + 1):
|
||||
errors = x - pc.project(i, transform=False, unweight=False)
|
||||
rsquare[i] = 1.0 - np.sum(errors ** 2) / tss
|
||||
assert_allclose(rsquare, pc.rsquare)
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_missing_dataframe(self):
|
||||
x = self.x.copy()
|
||||
x[::5, ::7] = np.nan
|
||||
pc = PCA(x, ncomp=3, missing='fill-em')
|
||||
|
||||
x = pd.DataFrame(x)
|
||||
pc_df = PCA(x, ncomp=3, missing='fill-em')
|
||||
assert_allclose(pc.coeff, pc_df.coeff)
|
||||
assert_allclose(pc.factors, pc_df.factors)
|
||||
|
||||
pc_df_nomissing = PCA(pd.DataFrame(self.x.copy()), ncomp=3)
|
||||
assert isinstance(pc_df.coeff, type(pc_df_nomissing.coeff))
|
||||
assert isinstance(pc_df.data, type(pc_df_nomissing.data))
|
||||
assert isinstance(pc_df.eigenvals, type(pc_df_nomissing.eigenvals))
|
||||
assert isinstance(pc_df.eigenvecs, type(pc_df_nomissing.eigenvecs))
|
||||
|
||||
x = self.x.copy()
|
||||
x[::5, ::7] = np.nan
|
||||
x_df = pd.DataFrame(x)
|
||||
pc = PCA(x, missing='drop-row')
|
||||
pc_df = PCA(x_df, missing='drop-row')
|
||||
assert_allclose(pc.coeff, pc_df.coeff)
|
||||
assert_allclose(pc.factors, pc_df.factors)
|
||||
|
||||
pc = PCA(x, missing='drop-col')
|
||||
pc_df = PCA(x_df, missing='drop-col')
|
||||
assert_allclose(pc.coeff, pc_df.coeff)
|
||||
assert_allclose(pc.factors, pc_df.factors)
|
||||
|
||||
pc = PCA(x, missing='drop-min')
|
||||
pc_df = PCA(x_df, missing='drop-min')
|
||||
assert_allclose(pc.coeff, pc_df.coeff)
|
||||
assert_allclose(pc.factors, pc_df.factors)
|
||||
|
||||
def test_equivalence(self):
|
||||
x = self.x.copy()
|
||||
assert_allclose(PCA(x).factors, pca(x)[0])
|
||||
|
||||
def test_equivalence_full_matrices(self):
|
||||
x = self.x.copy()
|
||||
svd_full_matrices_true = PCA(x, svd_full_matrices=True).factors
|
||||
svd_full_matrices_false = PCA(x).factors
|
||||
assert_allclose(svd_full_matrices_true, svd_full_matrices_false)
|
||||
|
||||
|
||||
def test_missing():
|
||||
data = np.empty((200, 50))
|
||||
data[0, 0] = np.nan
|
||||
with pytest.raises(ValueError, match="data contains non-finite values"):
|
||||
PCA(data)
|
||||
|
||||
|
||||
def test_too_many_missing(reset_randomstate):
|
||||
data = np.random.standard_normal((200, 50))
|
||||
data[0, :-3] = np.nan
|
||||
with pytest.raises(ValueError):
|
||||
PCA(data, ncomp=5, missing="drop-col")
|
||||
p = PCA(data, missing="drop-min")
|
||||
assert max(p.factors.shape) == max(data.shape) - 1
|
||||
|
||||
|
||||
def test_gls_warning(reset_randomstate):
|
||||
data = np.random.standard_normal((400, 200))
|
||||
data[:, 1:] = data[:, :1] + .01 * data[:, 1:]
|
||||
with pytest.warns(EstimationWarning, match="Many series are being down weighted"):
|
||||
factors = PCA(data, ncomp=2, gls=True).factors
|
||||
assert factors.shape == (data.shape[0], 2)
|
||||
Reference in New Issue
Block a user