1
# montecarlo.py: Routines for Monte Carlo simulation
3
# Copyright: Ed Schofield, 2005-2006
4
# License: BSD-style (see LICENSE.txt at root of scipy tree)
6
__author__ = "Ed Schofield"
8
from __future__ import division
11
from scipy.sandbox.montecarlo._intsampler import _intsampler
14
class genericsampler(object):
15
"""A base class for other samplers.
18
self.format = self.__class__.__name__[:3]
19
if self.__class__.__name__ == "genericsampler":
20
raise TypeError, "this class cannot be instantiated directly"
22
def seed(self, myseed):
23
"""Initializes the sampler with a given seed. If the seed is 0, uses a
24
seed from /dev/urandom or the system time.
26
self.sampler.seed(myseed)
29
class intsampler(genericsampler):
30
"""A class that samples objects from a given discrete distribution.
31
The distribution is defined on an integer-valued sample space and
32
specified with a PMF as a list or array like this:
34
>>> table = [10, 15, 20]
36
representing this pmf:
38
p(x) 10/45 15/45 20/45
40
The output will be something like:
41
>>> sampler = intsampler(table)
44
array([c, b, b, b, b, b, c, b, b, b], dtype=object)
46
The class is a thin wrapper around the '_intsampler' class, which
47
uses the 5 table compressed lookup sampler of Marsaglia (2004). It
48
is very fast, and requires near-constant time in the size of the
49
sample space. It provides a friendlier interface than _intsampler,
50
allowing use of an integer-valued PMF (unnormalized) and able to
51
return pmf values of the sample points in addition to the sample
54
def __init__(self, mytable):
55
self.probs = numpy.array(mytable, float)
60
raise ValueError, "sum of table frequencies must be > 0"
62
self.sampler = _intsampler(self.probs)
65
def sample(self, size, return_probs=0):
66
"""Generates a sample of the given size from the specified
67
discrete distribution, optionally returning the probabilities
68
under the distribution.
70
The optional argument return_probs, 0 by default, has the
72
0: don't return pmf values at each sample point
73
1: return pmf values at each sample point
74
2: return log pmf values at each sample point
76
sample = self.sampler.sample(size)
79
elif return_probs > 0:
80
# More fancy indexing:
81
sampleprobs = self.probs[sample]
83
return (sample, sampleprobs)
84
elif return_probs == 2:
85
return (sample, numpy.log(sampleprobs))
89
class dictsampler(genericsampler):
90
"""A class that samples objects from a given discrete distribution.
91
The distribution is specified as a dictionary representing a PMF
98
The output will be something like:
99
>>> sampler = dictsampler(table)
101
>> sampler.sample(10)
102
array([c, b, b, b, b, b, c, b, b, b], dtype=object)
104
giving a sample from this distribution:
106
p(x) 10/180 150/180 20/180
108
The function uses the constant-time 'intsampler' class, and should be
111
def __init__(self, mydict):
113
# self.labels = numpy.array(mydict.keys(), object)
114
# since numpy's construction of object arrays is dodgy. Instead,
115
# create an empty object array and fill it:
116
self.labels = numpy.empty(len(mydict), dtype=object)
117
for i, label in enumerate(mydict):
118
self.labels[i] = label
119
self.probs = numpy.array(mydict.values(), float)
124
raise ValueError, "sum of table frequencies must be > 0"
126
self.sampler = intsampler(self.probs)
129
def sample(self, size, return_probs=0):
130
"""Generates a sample of the given size from the specified
131
discrete distribution, optionally returning the probabilities
132
under the distribution.
134
The optional argument return_probs, 0 by default, has the
136
0: don't return pmf values at each sample point
137
1: return pmf values at each sample point
138
2: return log pmf values at each sample point
140
sampleindices = self.sampler.sample(size)
141
# Fancy indexing with the object array of labels
142
sample = self.labels[sampleindices]
143
if return_probs == 0:
145
elif return_probs > 0:
146
# More fancy indexing:
147
sampleprobs = self.probs[sampleindices]
148
if return_probs == 1:
149
return (sample, sampleprobs)
150
elif return_probs == 2:
151
return (sample, numpy.log(sampleprobs))
158
if __name__ == "__main__":