~ubuntu-branches/ubuntu/karmic/tahoe-lafs/karmic

« back to all changes in this revision

Viewing changes to src/allmydata/util/statistics.py

Committer: Bazaar Package Importer
Author(s): Zooko O'Whielacronx (Hacker)
Date: 2009-09-24 00:00:05 UTC
Revision ID: james.westby@ubuntu.com-20090924000005-ixe2n4yngmk49ysz

Tags: upstream-1.5.0

Import upstream version 1.5.0

files added:

.darcs-boringfile

COPYING.GPL

COPYING.TGPPL.html

CREDITS

Makefile

NEWS

PKG-INFO

README

Tahoe.home

_auto_deps.py

bin/tahoe-script.template

contrib

contrib/README

contrib/fuse

contrib/fuse/impl_a

contrib/fuse/impl_a/README

contrib/fuse/impl_a/tahoe_fuse.py

contrib/fuse/impl_b

contrib/fuse/impl_b/announce.txt

contrib/fuse/impl_b/pyfuse

contrib/fuse/impl_b/pyfuse/OrderedDict.py

contrib/fuse/impl_b/pyfuse/__init__.py

contrib/fuse/impl_b/pyfuse/cachefs.py

contrib/fuse/impl_b/pyfuse/greenhandler.py

contrib/fuse/impl_b/pyfuse/handler.py

contrib/fuse/impl_b/pyfuse/httpfs.py

contrib/fuse/impl_b/pyfuse/kernel.py

contrib/fuse/impl_b/pyfuse/memoryfs.py

contrib/fuse/impl_b/pyfuse/mirrorfs.py

contrib/fuse/impl_b/pyfuse/objectfs.py

contrib/fuse/impl_b/pyfuse/pairtype.py

contrib/fuse/impl_b/pyfuse/pathfs.py

contrib/fuse/impl_b/pyfuse/pysvnfs.py

contrib/fuse/impl_b/pyfuse/r_svnfs.py

contrib/fuse/impl_b/pyfuse/rwobjectfs.py

contrib/fuse/impl_b/pyfuse/svnfs.py

contrib/fuse/impl_b/pyfuse/tahoe.py

contrib/fuse/impl_b/pyfuse/test.py

contrib/fuse/impl_c

contrib/fuse/impl_c/blackmatch.py

contrib/fuse/runtests.py

debian

debian/changelog

debian/compat

debian/control

debian/copyright

debian/docs

debian/gbp.conf

debian/rules

debian/watch

docs

docs/Makefile

docs/about.html

docs/architecture.txt

docs/backupdb.txt

docs/codemap.txt

docs/configuration.txt

docs/debian.txt

docs/filesystem-notes.txt

docs/frontends

docs/frontends/CLI.txt

docs/frontends/FTP-and-SFTP.txt

docs/frontends/webapi.txt

docs/garbage-collection.txt

docs/helper.txt

docs/historical

docs/historical/historical_known_issues.txt

docs/historical/peer-selection-tahoe2.txt

docs/historical/peer-selection-tahoe3.txt

docs/historical/peer-selection.txt

docs/how_to_make_a_tahoe_release.txt

docs/install.html

docs/known_issues.txt

docs/lease-tradeoffs.svg

docs/logging.txt

docs/network-and-reliance-topology.svg

docs/proposed

docs/proposed/GridID.txt

docs/proposed/README.lossmodel

docs/proposed/accounting-overview.txt

docs/proposed/denver.txt

docs/proposed/lossmodel.lyx

docs/proposed/mutable-DSA.svg

docs/proposed/mutable-DSA.txt

docs/proposed/mutsemi.svg

docs/proposed/old-accounts-introducer.txt

docs/proposed/old-accounts-pubkey.txt

docs/running.html

docs/specifications

docs/specifications/CHK-hashes.svg

docs/specifications/Makefile

docs/specifications/URI-extension.txt

docs/specifications/dirnodes.txt

docs/specifications/file-encoding.txt

docs/specifications/file-encoding1.svg

docs/specifications/file-encoding2.svg

docs/specifications/file-encoding3.svg

docs/specifications/file-encoding4.svg

docs/specifications/file-encoding5.svg

docs/specifications/file-encoding6.svg

docs/specifications/mut.svg

docs/specifications/mutable.txt

docs/specifications/outline.txt

docs/specifications/uri.txt

docs/subtree1.svg

docs/testgrid

docs/testgrid/introducer.furl

docs/using.html

docs/write_coordination.html

ez_setup.py

mac/COPYING.LGPL2

mac/Makefile

mac/README.txt

mac/allmydata.icns

mac/allmydata_tahoe.py

mac/depends.py

mac/fuse.py

mac/fuseparts

mac/fuseparts/__init__.py

mac/fuseparts/setcompatwrap.py

mac/fuseparts/subbedopts.py

mac/setup.py

misc

misc/boodlegrid.tac

misc/check-build.py

misc/count_dirs.py

misc/cpu-watcher-poll.py

misc/cpu-watcher-subscribe.py

misc/cpu-watcher.tac

misc/delete-old-helper.sh

misc/dependencies

misc/dependencies/darcsver-1.2.1.tar

misc/dependencies/setuptools-0.6c12dev.egg

misc/dependencies/setuptools_darcs-1.2.8.tar

misc/dependencies/setuptools_trial-0.5.2.tar

misc/etch

misc/etch/debian

misc/etch/debian/changelog

misc/etch/debian/compat

misc/etch/debian/control

misc/etch/debian/copyright

misc/etch/debian/pycompat

misc/etch/debian/rules

misc/figleaf.el

misc/figleaf.excludes

misc/figleaf2el.py

misc/find-share-anomalies.py

misc/find-trailing-spaces.py

misc/find_trial.py

misc/fixshebangs.py

misc/get-version.py

misc/getmem.py

misc/incident-gatherer

misc/incident-gatherer/classify_tahoe.py

misc/lenny

misc/lenny/debian

misc/lenny/debian/changelog

misc/lenny/debian/compat

misc/lenny/debian/control

misc/lenny/debian/copyright

misc/lenny/debian/pycompat

misc/lenny/debian/rules

misc/logtool.py

misc/make-canary-files.py

misc/make_umid

misc/munin

misc/munin/tahoe-conf

misc/munin/tahoe-stats.plugin-conf

misc/munin/tahoe_cpu_watcher

misc/munin/tahoe_diskleft

misc/munin/tahoe_disktotal

misc/munin/tahoe_diskusage

misc/munin/tahoe_diskused

misc/munin/tahoe_doomsday

misc/munin/tahoe_estimate_files

misc/munin/tahoe_files

misc/munin/tahoe_helperstats_active

misc/munin/tahoe_helperstats_fetched

misc/munin/tahoe_introstats

misc/munin/tahoe_nodememory

misc/munin/tahoe_overhead

misc/munin/tahoe_rootdir_space

misc/munin/tahoe_server_latency_

misc/munin/tahoe_server_operations_

misc/munin/tahoe_spacetime

misc/munin/tahoe_stats

misc/munin/tahoe_storagespace

misc/pyver.py

misc/run-with-pythonpath.py

misc/show-tool-versions.py

misc/sid

misc/sid/debian

misc/sid/debian/changelog

misc/sid/debian/compat

misc/sid/debian/control

misc/sid/debian/copyright

misc/sid/debian/pycompat

misc/sid/debian/rules

misc/simulate_load.py

misc/simulator.py

misc/sizes.py

misc/spacetime

misc/spacetime/diskwatcher.py

misc/spacetime/diskwatcher.tac

misc/storage-overhead.py

misc/sub-ver.py

misc/test-darcs-boringfile.py

misc/test_mac_diskimage.py

misc/xfer-client.py

relnotes-short.txt

relnotes.txt

setup.cfg

setup.py

src/allmydata

src/allmydata/__init__.py

src/allmydata/_appname.py

src/allmydata/_auto_deps.py

src/allmydata/_version.py

src/allmydata/check_results.py

src/allmydata/client.py

src/allmydata/codec.py

src/allmydata/control.py

src/allmydata/debugshell.py

src/allmydata/dirnode.py

src/allmydata/frontends

src/allmydata/frontends/__init__.py

src/allmydata/frontends/auth.py

src/allmydata/frontends/ftpd.py

src/allmydata/frontends/sftpd.py

src/allmydata/gui

src/allmydata/gui/__init__.py

src/allmydata/gui/amdicon.py

src/allmydata/gui/amdlogo.py

src/allmydata/gui/confwiz.py

src/allmydata/gui/macapp.py

src/allmydata/hashtree.py

src/allmydata/history.py

src/allmydata/immutable

src/allmydata/immutable/__init__.py

src/allmydata/immutable/checker.py

src/allmydata/immutable/download.py

src/allmydata/immutable/encode.py

src/allmydata/immutable/filenode.py

src/allmydata/immutable/layout.py

src/allmydata/immutable/offloaded.py

src/allmydata/immutable/repairer.py

src/allmydata/immutable/upload.py

src/allmydata/interfaces.py

src/allmydata/introducer

src/allmydata/introducer/__init__.py

src/allmydata/introducer/client.py

src/allmydata/introducer/interfaces.py

src/allmydata/introducer/server.py

src/allmydata/key_generator.py

src/allmydata/manhole.py

src/allmydata/monitor.py

src/allmydata/mutable

src/allmydata/mutable/__init__.py

src/allmydata/mutable/checker.py

src/allmydata/mutable/common.py

src/allmydata/mutable/filenode.py

src/allmydata/mutable/layout.py

src/allmydata/mutable/publish.py

src/allmydata/mutable/repairer.py

src/allmydata/mutable/retrieve.py

src/allmydata/mutable/servermap.py

src/allmydata/node.py

src/allmydata/provisioning.py

src/allmydata/reliability.py

src/allmydata/scripts

src/allmydata/scripts/__init__.py

src/allmydata/scripts/backupdb.py

src/allmydata/scripts/cli.py

src/allmydata/scripts/common.py

src/allmydata/scripts/common_http.py

src/allmydata/scripts/consolidate.py

src/allmydata/scripts/create_node.py

src/allmydata/scripts/debug.py

src/allmydata/scripts/keygen.py

src/allmydata/scripts/runner.py

src/allmydata/scripts/slow_operation.py

src/allmydata/scripts/startstop_node.py

src/allmydata/scripts/stats_gatherer.py

src/allmydata/scripts/tahoe_add_alias.py

src/allmydata/scripts/tahoe_backup.py

src/allmydata/scripts/tahoe_check.py

src/allmydata/scripts/tahoe_cp.py

src/allmydata/scripts/tahoe_get.py

src/allmydata/scripts/tahoe_ls.py

src/allmydata/scripts/tahoe_manifest.py

src/allmydata/scripts/tahoe_mkdir.py

src/allmydata/scripts/tahoe_mv.py

src/allmydata/scripts/tahoe_put.py

src/allmydata/scripts/tahoe_rm.py

src/allmydata/scripts/tahoe_webopen.py

src/allmydata/stats.py

src/allmydata/storage

src/allmydata/storage/__init__.py

src/allmydata/storage/common.py

src/allmydata/storage/crawler.py

src/allmydata/storage/expirer.py

src/allmydata/storage/immutable.py

src/allmydata/storage/lease.py

src/allmydata/storage/mutable.py

src/allmydata/storage/server.py

src/allmydata/storage/shares.py

src/allmydata/storage_client.py

src/allmydata/test

src/allmydata/test/__init__.py

src/allmydata/test/bench_dirnode.py

src/allmydata/test/check_grid.py

src/allmydata/test/check_load.py

src/allmydata/test/check_memory.py

src/allmydata/test/check_speed.py

src/allmydata/test/common.py

src/allmydata/test/common_util.py

src/allmydata/test/common_web.py

src/allmydata/test/figleaf.excludes

src/allmydata/test/no_network.py

src/allmydata/test/test_backupdb.py

src/allmydata/test/test_base62.py

src/allmydata/test/test_checker.py

src/allmydata/test/test_cli.py

src/allmydata/test/test_client.py

src/allmydata/test/test_codec.py

src/allmydata/test/test_consolidate.py

src/allmydata/test/test_crawler.py

src/allmydata/test/test_deepcheck.py

src/allmydata/test/test_dirnode.py

src/allmydata/test/test_download.py

src/allmydata/test/test_encode.py

src/allmydata/test/test_filenode.py

src/allmydata/test/test_hashtree.py

src/allmydata/test/test_helper.py

src/allmydata/test/test_immutable.py

src/allmydata/test/test_introducer.py

src/allmydata/test/test_iputil.py

src/allmydata/test/test_keygen.py

src/allmydata/test/test_mutable.py

src/allmydata/test/test_netstring.py

src/allmydata/test/test_nevow.py

src/allmydata/test/test_no_network.py

src/allmydata/test/test_node.py

src/allmydata/test/test_observer.py

src/allmydata/test/test_provisioning.py

src/allmydata/test/test_repairer.py

src/allmydata/test/test_runner.py

src/allmydata/test/test_stats.py

src/allmydata/test/test_storage.py

src/allmydata/test/test_system.py

src/allmydata/test/test_upload.py

src/allmydata/test/test_uri.py

src/allmydata/test/test_util.py

src/allmydata/test/test_web.py

src/allmydata/test/trial_figleaf.py

src/allmydata/unknown.py

src/allmydata/uri.py

src/allmydata/util

src/allmydata/util/__init__.py

src/allmydata/util/abbreviate.py

src/allmydata/util/assertutil.py

src/allmydata/util/base32.py

src/allmydata/util/base62.py

src/allmydata/util/cachedir.py

src/allmydata/util/deferredutil.py

src/allmydata/util/dictutil.py

src/allmydata/util/fileutil.py

src/allmydata/util/find_exe.py

src/allmydata/util/hashutil.py

src/allmydata/util/humanreadable.py

src/allmydata/util/idlib.py

src/allmydata/util/iputil.py

src/allmydata/util/limiter.py

src/allmydata/util/log.py

src/allmydata/util/mathutil.py

src/allmydata/util/netstring.py

src/allmydata/util/nummedobj.py

src/allmydata/util/observer.py

src/allmydata/util/pipeline.py

src/allmydata/util/pkgresutil.py

src/allmydata/util/pollmixin.py

src/allmydata/util/repeatable_random.py

src/allmydata/util/rrefutil.py

src/allmydata/util/sibpath.py

src/allmydata/util/statistics.py

src/allmydata/util/time_format.py

src/allmydata/web

src/allmydata/web/__init__.py

src/allmydata/web/check-and-repair-results.xhtml

src/allmydata/web/check-results.xhtml

src/allmydata/web/check_results.py

src/allmydata/web/common.py

src/allmydata/web/deep-check-and-repair-results.xhtml

src/allmydata/web/deep-check-results.xhtml

src/allmydata/web/directory.py

src/allmydata/web/directory.xhtml

src/allmydata/web/download-status.xhtml

src/allmydata/web/filenode.py

src/allmydata/web/helper.xhtml

src/allmydata/web/info.py

src/allmydata/web/info.xhtml

src/allmydata/web/introducer.xhtml

src/allmydata/web/introweb.py

src/allmydata/web/literal-check-results.xhtml

src/allmydata/web/manifest.xhtml

src/allmydata/web/map-update-status.xhtml

src/allmydata/web/operations.py

src/allmydata/web/provisioning.xhtml

src/allmydata/web/publish-status.xhtml

src/allmydata/web/reliability.py

src/allmydata/web/reliability.xhtml

src/allmydata/web/rename-form.xhtml

src/allmydata/web/retrieve-status.xhtml

src/allmydata/web/root.py

src/allmydata/web/statistics.xhtml

src/allmydata/web/status.py

src/allmydata/web/status.xhtml

src/allmydata/web/storage.py

src/allmydata/web/storage_status.xhtml

src/allmydata/web/tahoe.css

src/allmydata/web/unlinked.py

src/allmydata/web/upload-results.xhtml

src/allmydata/web/upload-status.xhtml

src/allmydata/web/welcome.xhtml

src/allmydata/webish.py

src/allmydata/windows

src/allmydata/windows/__init__.py

src/allmydata/windows/registry.py

src/allmydata_tahoe.egg-info

src/allmydata_tahoe.egg-info/PKG-INFO

src/allmydata_tahoe.egg-info/SOURCES.txt

src/allmydata_tahoe.egg-info/dependency_links.txt

src/allmydata_tahoe.egg-info/entry_points.txt

src/allmydata_tahoe.egg-info/not-zip-safe

src/allmydata_tahoe.egg-info/requires.txt

src/allmydata_tahoe.egg-info/top_level.txt

twisted

twisted/plugins

twisted/plugins/allmydata_trial.py

windows

windows/Makefile

windows/amdicon.ico

windows/confwiz.py

windows/depends.py

windows/installer.bmp

windows/installer.ico

windows/installer.tmpl

windows/setup.py

windows/tahoe.py

windows/tahoesvc.py

Show diffs side-by-side

added added

removed removed

src/allmydata/util/statistics.py

# mailto:shawn@willden.org

# I hereby license all patches I have contributed or will contribute to

# the Allmydata Tahoe-LAFS project, including the file 'statistics.py',

# under both the GNU General Public License, version 2 or later, and

# under the Transitive Grace Period Public License, version 1 or later.

from __future__ import division

from mathutil import round_sigfigs

import math

import sys

def pr_file_loss(p_list, k):

"""

Probability of single-file loss for shares with reliabilities in

p_list.

Computes the probability that a single file will become

unrecoverable, based on the individual share survival

probabilities and and k (number of shares needed for recovery).

Example: pr_file_loss([.9] * 5 + [.99] * 5, 3) returns the

probability that a file with k=3, N=10 and stored on five servers

with reliability .9 and five servers with reliability .99 is lost.

See survival_pmf docstring for important statistical assumptions.

"""

assert 0 < k <= len(p_list)

assert valid_probability_list(p_list)

# Sum elements 0 through k-1 of the share set PMF to get the

# probability that less than k shares survived.

return sum(survival_pmf(p_list)[0:k])

def survival_pmf(p_list):

"""

Return the collective PMF of share survival count for a set of

shares with the individual survival probabilities in p_list.

Example: survival_pmf([.99] * 10 + [.8] * 6) returns the

probability mass function for the number of shares that will

survive from an initial set of 16, 10 with p=0.99 and 6 with

p=0.8. The ith element of the resulting list is the probability

that exactly i shares will survive.

This calculation makes the following assumptions:

1. p_list[i] is the probability that any individual share will

will survive during the time period in question (whatever that may

be).

2. The share failures are "independent", in the statistical

sense. Note that if a group of shares are stored on the same

machine or even in the same data center, they are NOT independent

and this calculation is therefore wrong.

"""

assert valid_probability_list(p_list)

pmf = survival_pmf_via_conv(p_list)

assert valid_pmf(pmf)

return pmf

def survival_pmf_via_bd(p_list):

"""

Compute share survival PMF using the binomial distribution PMF as

much as possible.

This is more efficient than the convolution method below, but

doesn't work for large numbers of shares because the

binomial_coeff calculation blows up. Since the efficiency gains

only matter in the case of large numbers of shares, it's pretty

much useless except for testing the convolution methond.

Note that this function does little to no error checking and is

intended for internal use and testing only.

"""

pmf_list = [ binomial_distribution_pmf(p_list.count(p), p)

for p in set(p_list) ]

return reduce(convolve, pmf_list)

def survival_pmf_via_conv(p_list):

"""

Compute share survival PMF using iterated convolution of trivial

PMFs.

Note that this function does little to no error checking and is

intended for internal use and testing only.

"""

pmf_list = [ [1 - p, p] for p in p_list ];

return reduce(convolve, pmf_list)

def print_pmf(pmf, n=4, out=sys.stdout):

"""

Print a PMF in a readable form, with values rounded to n

significant digits.

"""

for k, p in enumerate(pmf):

100

print >>out, "i=" + str(k) + ":", round_sigfigs(p, n)

101

102

def pr_backup_file_loss(p_list, backup_p, k):

103

"""

104

Probability of single-file loss in a backup context

105

106

Same as pr_file_loss, except it factors in the probability of

107

survival of the original source, specified as backup_p. Because

108

that's a precondition to caring about the availability of the

109

backup, it's an independent event.

110

"""

111

assert valid_probability_list(p_list)

112

assert 0 < backup_p <= 1

113

assert 0 < k <= len(p_list)

114

115

return pr_file_loss(p_list, k) * (1 - backup_p)

116

117

118

def find_k(p_list, target_loss_prob):

119

"""

120

Find the highest k value that achieves the targeted loss

121

probability, given the share reliabilities given in p_list.

122

"""

123

assert valid_probability_list(p_list)

124

assert 0 < target_loss_prob < 1

125

126

pmf = survival_pmf(p_list)

127

return find_k_from_pmf(pmf, target_loss_prob)

128

129

def find_k_from_pmf(pmf, target_loss_prob):

130

"""

131

Find the highest k value that achieves the targeted loss

132

probability, given the share survival PMF given in pmf.

133

"""

134

assert valid_pmf(pmf)

135

assert 0 < target_loss_prob < 1

136

137

loss_prob = 0.0

138

for k, p_k in enumerate(pmf):

139

loss_prob += p_k

140

if loss_prob > target_loss_prob:

141

return k

142

143

# we shouldn't be able to get here, since sum(pmf)==1.0

144

k = len(pmf) - 1

145

return k

146

147

def repair_count_pmf(survival_pmf, k):

148

"""

149

Return Pr[D=d], where D represents the number of shares that have

150

to be repaired at the end of an interval, starting with a full

151

set and subject to losses described in survival_pmf.

152

"""

153

n = len(survival_pmf) - 1

154

155

# Probability of 0 to repair is the probability of all shares

156

# surviving plus the probability of less than k surviving.

157

pmf = [ survival_pmf[n] + sum(survival_pmf[0:k]) ]

158

159

# Probability of more than 0, up to N-k to repair

160

for i in range(1, n-k+1):

161

pmf.append(survival_pmf[n-i])

162

163

# Probability of more than N-k to repair is 0, because that means

164

# there are less than k available and the file is irreparable.

165

for i in range(n-k+1, n+1):

166

pmf.append(0.0)

167

168

assert(valid_pmf(pmf))

169

return pmf

170

171

def bandwidth_cost_function(file_size, shares, k, ul_dl_ratio):

172

return file_size + float(file_size) / k * shares * ul_dl_ratio

173

174

def mean_repair_cost(cost_function, file_size, survival_pmf, k, ul_dl_ratio):

175

"""

176

Return the expected cost for a repair run on a file with the given

177

survival_pmf and requiring k shares, in which upload cost is

178

'ul_dl_ratio' times download cost.

179

"""

180

repair_pmf = repair_count_pmf(survival_pmf, k)

181

expected_cost = sum([cost_function(file_size, new_shares, k, ul_dl_ratio)

182

* repair_pmf[new_shares]

183

for new_shares in range(1, len(repair_pmf))])

184

return expected_cost

185

186

def eternal_repair_cost(cost_function, file_size, survival_pmf, k,

187

discount_rate=0, ul_dl_ratio=1.0):

188

"""

189

Calculate the eternal repair cost for a file that is aggressively

190

repaired, i.e. the sum of repair costs until the file is dead.

191

"""

192

c = mean_repair_cost(cost_function, file_size, survival_pmf, k, ul_dl_ratio)

193

f = 1 - sum(survival_pmf[0:k])

194

r = float(discount_rate)

195

196

return (c * (1-r)) / (1 - (1-r) * f)

197

198

def valid_pmf(pmf):

199

"""

200

Validate that pmf looks like a proper discrete probability mass

201

function in list form.

202

203

Returns true if the elements of pmf sum to 1.

204

"""

205

return round(sum(pmf),5) == 1.0

206

207

def valid_probability_list(p_list):

208

"""

209

Validate that p_list is a list of probibilities

210

"""

211

for p in p_list:

212

if p < 0 or p > 1:

213

return False

214

215

return True

216

217

def convolve(list_a, list_b):

218

"""

219

Returns the discrete convolution of two lists.

220

221

Given two random variables X and Y, the convolution of their

222

probability mass functions Pr(X) and Pr(Y) is equal to the

223

Pr(X+Y).

224

"""

225

n = len(list_a)

226

m = len(list_b)

227

228

result = []

229

for i in range(n + m - 1):

230

sum = 0.0

231

232

lower = max(0, i - n + 1)

233

upper = min(m - 1, i)

234

235

for j in range(lower, upper+1):

236

sum += list_a[i-j] * list_b[j]

237

238

result.append(sum)

239

240

return result

241

242

def binomial_distribution_pmf(n, p):

243

"""

244

Returns Pr(K), where K ~ B(n,p), as a list of values.

245

246

Returns the full probability mass function of a B(n, p) as a list

247

of values, where the kth element is Pr(K=k), or, in the Tahoe

248

context, the probability that exactly k copies of a file share

249

survive, when placed on n independent servers with survival

250

probability p.

251

"""

252

assert p >= 0 and p <= 1, 'p=%s must be in the range [0,1]'%p

253

assert n > 0

254

255

result = []

256

for k in range(n+1):

257

result.append(math.pow(p , k ) *

258

math.pow(1 - p, n - k) *

259

binomial_coeff(n, k))

260

261

assert valid_pmf(result)

262

return result;

263

264

def binomial_coeff(n, k):

265

"""

266

Returns the number of ways that k items can be chosen from a set

267

of n.

268

"""

269

assert n >= k

270

271

if k > n/2:

272

k = n - k

273

274

accum = 1.0

275

for i in range(1, k+1):

276

accum = accum * (n - k + i) // i;

277

278

return int(accum + 0.5)

Older »