1
from __future__ import division
2
from random import shuffle, random, choice
6
from math import factorial
7
except ImportError: # python version < 2.6
8
from cogent.maths.stats.special import Gamma
9
factorial = lambda x: Gamma(x+1)
11
from cogent.maths.stats.special import igam
13
__author__ = "Hua Ying, Julien Epps and Gavin Huttley"
14
__copyright__ = "Copyright 2007-2010, The Cogent Project"
15
__credits__ = ["Julien Epps", "Hua Ying", "Gavin Huttley"]
18
__maintainer__ = "Gavin Huttley"
19
__email__ = "Gavin.Huttley@anu.edu.au"
20
__status__ = "Production"
22
def chi_square(x, p, df=1):
23
"""returns the chisquare statistic and it's probability"""
26
sim = numpy.logical_not(numpy.logical_xor(x[0:end-p], x[p:end]))*1
27
s = ((numpy.ones((N-p,), float)-sim)**2).sum()
29
p_val = 1 - igam(df/2.0, D/2)
32
def g_statistic(X, p=None, idx=None):
34
return g statistic and p value
36
X - the periodicity profile (e.g. DFT magnitudes, autocorrelation etc)
37
X needs to contain only those period values being considered,
38
i.e. only periods in the range [llim, ulim]
41
X = abs(numpy.array(X))
46
assert idx is not None
49
M = numpy.floor(1/g_obs)
52
result = numpy.zeros((int(M+1),), float)
53
pmax_fact = factorial(pmax)
54
for index in xrange(1, min(pmax, int(M))+1):
55
v = (-1)**(index-1)*pmax_fact/factorial(pmax-index)/factorial(index)
56
v *= (1-index*g_obs)**(pmax-1)
62
def _seq_to_symbols(seq, motifs, motif_length, result=None):
63
"""return symbolic represetation of the sequence
66
- motifs: a list of sequence motifs
67
- motif_length: length of first motif
70
result = numpy.zeros(len(seq), numpy.uint8)
74
if motif_length is None:
75
motif_length = len(motifs[0])
77
for i in xrange(len(seq) - motif_length + 1):
78
if seq[i: i + motif_length] in motifs:
84
from cogent.maths._period import seq_to_symbols
87
seq_to_symbols = _seq_to_symbols
89
class SeqToSymbols(object):
90
"""class for converting all occurrences of motifs in passed sequence
92
def __init__(self, motifs, length=None, motif_length=None):
93
super(SeqToSymbols, self).__init__()
94
if type(motifs) == str:
98
self.motif_length = motif_length or len(motifs[0])
100
if length is not None:
101
self.setResultArray(length)
103
def setResultArray(self, length):
104
"""sets a result array for length"""
105
self.working = numpy.zeros(length, numpy.uint8)
108
def __call__(self, seq, result=None):
109
if result is None and self.working is None:
110
self.setResultArray(len(seq))
111
elif self.working is not None:
112
if len(seq) != self.working.shape[0]:
113
self.setResultArray(len(seq))
115
result = self.working
120
return seq_to_symbols(seq, self.motifs, self.motif_length, result)
123
def circular_indices(vector, start, length, num):
124
"""docstring for circular_indices"""
128
if start+num < length:
129
return vector[start: start+num]
130
# get all till end, then from beginning
131
return vector[start:] + vector[:start+num-length]
133
def sampled_places(block_size, length):
134
"""returns randomly sampled positions with block_size to make a new vector
137
# Main condition is to identify when a draw would run off end, we want to
138
# draw from beginning
139
num_seg, remainder = divmod(length, block_size)
140
vector = range(length)
142
for seg_num in xrange(num_seg):
144
result += circular_indices(vector, i, length, block_size)
147
result += circular_indices(vector, i+block_size, length, remainder)
149
assert len(result) == length, len(result)
152
def blockwise_bootstrap(signal, calc, block_size, num_reps, seq_to_symbols=None, num_stats=None):
153
"""returns observed statistic and the probability from the bootstrap
154
test of observing more `power' by chance than that estimated from the
158
- signal: a series, can be a sequence object
159
- calc: function to calculate the period power, e.g. ipdft, hybrid,
160
auto_corr or any other statistic.
161
- block_size: size of contiguous values for resampling
162
- num_reps: number of randomly generated permutations
163
- seq_to_symbols: function to convert a sequence to 1/0. If not
164
provided, the raw data is used.
165
- num_stats: the number of statistics being evaluated for each
166
interation. Default to 1.
168
signal_length = len(signal)
170
if seq_to_symbols is not None:
173
dtype=None # let numpy guess
175
signal = numpy.array(list(signal), dtype=dtype)
177
if seq_to_symbols is not None:
178
symbolic = seq_to_symbols(signal)
183
obs_stat = calc(data)
184
if seq_to_symbols is not None:
185
if sum(symbolic) == 0:
186
p = [numpy.array([1.0, 1.0, 1.0]), 1.0][num_stats == 1]
190
if num_stats is None:
192
num_stats = calc.getNumStats()
193
except AttributeError:
199
count = numpy.zeros(num_stats)
201
for rep in range(num_reps):
202
# get sample positions
203
sampled_indices = sampled_places(block_size, signal_length)
204
new_signal = signal.take(sampled_indices)
205
if seq_to_symbols is not None:
206
symbolic = seq_to_symbols(new_signal)
210
sim_stat = calc(data)
211
# count if > than observed
213
count[sim_stat >= obs_stat] += 1
214
elif sim_stat >= obs_stat:
217
return obs_stat, count / num_reps
221
# """Return SNR and CRB for periodicity estimates from symbolic signals"""
222
# # TODO: complete the function according to Julien's percrb4.m