15
15
def sign_test(samp,mu0=0):
17
Signs test with mu0=0 by default (though
18
the median is often used in practice)
32
M=(N(+) - N(-))/2, N(+) is the number of values above Mu0,
33
N(-) is the number of values below. Values equal to Mu0
36
The p-value for M is calculated using the binomial distrubution
37
and can be intrepreted the same as for a t-test.
46
p=stats.binom_test(min(pos,neg),pos+neg,.5)
17
Signs test with mu0=0 by default (though
18
the median is often used in practice)
32
M=(N(+) - N(-))/2, N(+) is the number of values above Mu0,
33
N(-) is the number of values below. Values equal to Mu0
36
The p-value for M is calculated using the binomial distrubution
37
and can be intrepreted the same as for a t-test.
46
p=stats.binom_test(min(pos,neg),pos+neg,.5)
49
49
def descstats(data, cols=None, axis=0):
59
A list of the column number or field names (for a recarray) of variables.
60
Default is all columns.
59
A list of the column number or field names (for a recarray) of variables.
60
Default is all columns.
63
axis order of data. Default is 0 for column-ordered data.
63
axis order of data. Default is 0 for column-ordered data.
72
72
x = np.array(data) # or rather, the data we're interested in
74
# if isinstance(x, np.recarray):
75
# cols = np.array(len(x.dtype.names))
74
if isinstance(x, np.recarray):
75
cols = np.array(len(x.dtype.names))
76
76
if not isinstance(x, np.recarray) and x.ndim == 1:
93
93
Variance %(variance)22.4g Sum Observations %(sobs)22.4g
94
94
Std. Dev. %(stddev)22.4g
95
95
''' % {'name': cols, 'sum': 'N/A', 'nobs': len(x), 'mode': \
96
stats.mode(x)[0][0], 'nmode': stats.mode(x)[1][0], \
97
'mean': x.mean(), 'median': np.median(x), 'range': \
98
'('+str(x.min())+', '+str(x.max())+')', 'variance': \
99
x.var(), 'stddev': x.std(), 'coeffvar': \
100
stats.variation(x), 'skewness': stats.skew(x), \
101
'kurtosis': stats.kurtosis(x), 'uss': stats.ss(x),\
102
'ss': stats.ss(x-x.mean()), 'sobs': np.sum(x)}
96
stats.mode(x)[0][0], 'nmode': stats.mode(x)[1][0], \
97
'mean': x.mean(), 'median': np.median(x), 'range': \
98
'('+str(x.min())+', '+str(x.max())+')', 'variance': \
99
x.var(), 'stddev': x.std(), 'coeffvar': \
100
stats.variation(x), 'skewness': stats.skew(x), \
101
'kurtosis': stats.kurtosis(x), 'uss': stats.ss(x),\
102
'ss': stats.ss(x-x.mean()), 'sobs': np.sum(x)}
104
104
# ''' % {'name': cols[0], 'sum': 'N/A', 'nobs': len(x[cols[0]]), 'mode': \
105
105
# stats.mode(x[cols[0]])[0][0], 'nmode': stats.mode(x[cols[0]])[1][0], \
128
128
''' % tuple([stats.scoreatpercentile(x,per) for per in (1,5,10,25,
130
130
t,p_t=stats.ttest_1samp(x,0)
131
131
M,p_M=sign_test(x)
132
132
S,p_S=stats.wilcoxon(np.squeeze(x))
146
146
# in any event these should be split up, so that they can be called
147
147
# individually and only returned together if someone calls summary
148
148
# or something of the sort
150
150
elif x.shape[1] > 1:
152
152
Var. Name | Obs. Mean Std. Dev. Range
153
153
------------+--------------------------------------------------------'''+\
156
156
# for recarrays with columns passed as names
157
157
# if isinstance(cols[0],str):
162
162
# +str(x[var].max())+')'+os.linesep}
164
164
for var in range(x.shape[1]):
165
desc += "%(name)15s %(obs)9i %(mean)12.4g %(stddev)12.4g \
165
desc += "%(name)15s %(obs)9i %(mean)12.4g %(stddev)12.4g \
166
166
%(range)20s" % {'name': var, 'obs': len(x[:,var]), 'mean': x[:,var].mean(),
167
167
'stddev': x[:,var].std(), 'range': '('+str(x[:,var].min())+', '+\
168
168
str(x[:,var].max())+')'+os.linesep}
170
170
raise ValueError, "data not understood"
174
#if __name__=='__main__':
177
# loc='http://eagle1.american.edu/~js2796a/data/handguns_data.csv'
178
# relpath=(load_dataset(loc))
179
# dta=np.recfromcsv(relpath)
180
# descstats(dta,['stpop'])
181
# raw_input('Hit enter for multivariate test')
182
# descstats(dta,['stpop','avginc','vio'])
174
##if __name__=='__main__':
175
## data = np.recfromcsv('datasets/anes96/anes96.csv', delimiter='\t')
176
## descstats(dta,['stpop'])
180
## import datasets.load_dataset
181
## loc='http://eagle1.american.edu/~js2796a/data/handguns_data.csv'
182
## relpath=(load_dataset(loc))
183
## dta=np.recfromcsv(relpath)
184
## descstats(dta,['stpop'])
185
## raw_input('Hit enter for multivariate test')
186
## descstats(dta,['stpop','avginc','vio'])
184
188
# with plain arrays
185
189
# import string2dummy as s2d
187
191
# ndts=np.vstack(dts[col] for col in dts.dtype.names)
188
192
# observations in columns and data in rows
189
193
# is easier for the call to stats
191
195
# what to make of
192
196
# ndts=np.column_stack(dts[col] for col in dts.dtype.names)
193
197
# ntda=ntds.swapaxis(1,0)
207
211
data.exog = sm.add_constant(data.exog)
208
212
sum1 = descstats(data.exog)
209
213
sum1a = descstats(data.exog[:,:1])
211
215
# loc='http://eagle1.american.edu/~js2796a/data/handguns_data.csv'
212
216
# dta=np.recfromcsv(loc)
213
217
# summary2 = descstats(dta,['stpop'])
226
230
sum2 = descstats(data2.ahe)
227
231
sum3 = descstats(np.column_stack((data2.ahe,data2.yrseduc)))
228
232
sum4 = descstats(np.column_stack(([data2[_] for \
229
_ in data2.dtype.names])))
233
_ in data2.dtype.names])))