2
#file test_alpha_diversity.py
3
from __future__ import division
4
from numpy import array, log, sqrt, exp
6
from cogent.util.unit_test import TestCase, main
7
from qiime.pycogent_backports.alpha_diversity import (expand_counts, counts,
8
observed_species, singles, doubles, osd, margalef, menhinick, dominance,
9
simpson, simpson_reciprocal, reciprocal_simpson, shannon, equitability,
10
berger_parker_d, mcintosh_d, brillouin_d, strong, kempton_taylor_q,
11
fisher_alpha, mcintosh_e, heip_e, simpson_e, robbins, robbins_confidence,
12
chao1_uncorrected, chao1_bias_corrected, chao1, chao1_var,
13
chao1_confidence, ACE, michaelis_menten_fit, diversity, lorenz_curve,
14
lorenz_curve_integrator, gini_index, goods_coverage,
15
lladser_ci, lladser_pe,
16
starr, ul_confidence_bounds, upper_confidence_bound,
17
lower_confidence_bound, lladser_ci_from_r, lladser_ci_series, starr_est,
18
esty_ci, get_interval_for_r_new_species, lladser_point_estimates)
20
from qiime.alpha_diversity import (AlphaDiversityCalc, AlphaDiversityCalcs,
21
single_file_cup, get_cup_metric, list_known_metrics)
23
from qiime.util import get_tmp_filename
24
from cogent.util.misc import remove_files
25
from itertools import izip
26
import qiime.pycogent_backports.alpha_diversity as alph
28
__author__ = "Rob Knight"
29
__copyright__ = "Copyright 2007-2012, The Cogent Project"
30
__credits__ = ["Rob Knight","Justin Kuczynski, William Van Treuren",
31
"Jose Antonio Navas Molina"]
33
__version__ = "1.5.3-dev"
34
__maintainer__ = "Rob Knight"
35
__email__ = "rob@spot.colorado.edu"
36
__status__ = "Production"
38
class diversity_tests(TestCase):
39
"""Tests of top-level functions"""
42
"""Set up shared variables"""
43
self.TestData = array([0,1,1,4,2,5,2,4,1,2])
44
self.NoSingles = array([0,2,2,4,5,0,0,0,0,0])
45
self.NoDoubles = array([0,1,1,4,5,0,0,0,0,0])
47
self.gini_data = array([4.5, 6.7, 3.4, 15., 18., 3.5, 6.7, 14.1])
48
self.gini_lorenz_curve_points = \
49
[(0.125, 0.047287899860917935),
50
(0.25, 0.095966620305980521),
51
(0.375, 0.15855354659248957),
52
(0.5, 0.2517385257301808),
53
(0.625, 0.34492350486787204),
54
(0.75, 0.541029207232267),
55
(0.875, 0.74965229485396379),
57
# for Manuel's coverage estimations
58
self.tmp_file = get_tmp_filename(tmp_dir = "./", suffix="test_single_file_cup.biom")
59
self.tmp_outfile = get_tmp_filename(tmp_dir = "./", suffix="test_single_file_cup.txt")
60
self.files_to_remove = []
62
""" Remove tmp files """
63
remove_files(self.files_to_remove)
64
def test_expand_counts(self):
65
"""expand_counts should return correct expanded array"""
67
self.assertEqual(expand_counts(c), array([0,0,2,3,3]))
69
def test_counts(self):
70
"""counts should return correct array"""
71
c = array([5,0,1,1,5,5])
73
exp = array([1,2,0,0,0,3])
74
self.assertEqual(obs, exp)
77
exp = array([2,3,2,0,0,3])
78
self.assertEqual(obs, exp)
80
def test_singles(self):
81
"""singles should return correct # of singles"""
82
self.assertEqual(singles(self.TestData), 3)
83
self.assertEqual(singles(array([0,3,4])), 0)
84
self.assertEqual(singles(array([1])), 1)
86
def test_doubles(self):
87
"""doubles should return correct # of doubles"""
88
self.assertEqual(doubles(self.TestData), 3)
89
self.assertEqual(doubles(array([0,3,4])), 0)
90
self.assertEqual(doubles(array([2])), 1)
93
"""osd should return correct # of observeds, singles, doubles"""
94
self.assertEqual(osd(self.TestData), (9,3,3))
96
def test_margalef(self):
97
"""margalef should match hand-calculated values"""
98
self.assertEqual(margalef(self.TestData), 8/log(22))
100
def test_menhinick(self):
101
"""menhinick should match hand-calculated values"""
102
self.assertEqual(menhinick(self.TestData), 9/sqrt(22))
104
def test_dominance(self):
105
"""dominance should match hand-calculated values"""
106
c = array([1,0,2,5,2])
107
self.assertFloatEqual(dominance(c), .34)
109
self.assertEqual(dominance(d), 1)
111
def test_simpson(self):
112
"""simpson should match hand-calculated values"""
113
c = array([1,0,2,5,2])
114
self.assertFloatEqual(simpson(c), .66)
116
self.assertFloatEqual(simpson(d), 0)
118
def test_reciprocal_simpson(self):
119
"""reciprocal_simpson should match hand-calculated results"""
120
c = array([1,0,2,5,2])
121
self.assertFloatEqual(reciprocal_simpson(c), 1/.66)
123
def test_simpson_reciprocal(self):
124
"""simpson_reciprocal should match 1/D results"""
125
c = array([1,0,2,5,2])
126
self.assertFloatEqual(simpson_reciprocal(c), 1./dominance(c))
128
def test_shannon(self):
129
"""shannon should match hand-calculated values"""
131
self.assertFloatEqual(shannon(c), 0)
133
self.assertFloatEqual(shannon(c), 1)
134
c = array([1,1,1,1,0])
135
self.assertEqual(shannon(c), 2)
137
def test_equitability(self):
138
"""equitability should match hand-calculated values"""
140
self.assertFloatEqual(equitability(c), 0)
142
self.assertFloatEqual(equitability(c), 1)
143
c = array([1,1,1,1,0])
144
self.assertEqual(equitability(c), 1)
146
def test_berger_parker_d(self):
147
"""berger-parker_d should match hand-calculated values"""
149
self.assertFloatEqual(berger_parker_d(c), 1)
151
self.assertFloatEqual(berger_parker_d(c), 0.5)
152
c = array([1,1,1,1,0])
153
self.assertEqual(berger_parker_d(c), 0.25)
155
def test_mcintosh_d(self):
156
"""mcintosh_d should match hand-calculated values"""
158
self.assertFloatEqual(mcintosh_d(c), 0.636061424871458)
160
def test_brillouin_d(self):
161
"""brillouin_d should match hand-calculated values"""
163
self.assertFloatEqual(brillouin_d(c), 0.86289353018248782)
165
def test_strong(self):
166
"""strong's dominance index should match hand-calculated values"""
168
self.assertFloatEqual(strong(c), 0.214285714)
170
def test_kempton_taylor_q(self):
171
"""kempton_taylor_q should approximate Magurran 1998 calculation p143"""
172
c = array([2,3,3,3,3,3,4,4,4,6,6,7,7,9,9,11,14,15,15,20,29,33,34,
173
36,37,53,57,138,146,170])
174
self.assertFloatEqual(kempton_taylor_q(c), 14/log(34/4))
176
def test_fisher_alpha(self):
177
"""fisher alpha should match hand-calculated value."""
178
c = array([4,3,4,0,1,0,2])
179
obs = fisher_alpha(c)
180
self.assertFloatEqual(obs, 2.7823795367398798)
182
def test_mcintosh_e(self):
183
"""mcintosh e should match hand-calculated value."""
188
self.assertEqual(mcintosh_e(c), exp)
190
def test_heip_e(self):
191
"""heip e should match hand-calculated value"""
193
h = shannon(c, base=e)
194
expected = exp(h-1)/3
195
self.assertEqual(heip_e(c), expected)
197
def test_simpson_e(self):
198
"""simpson e should match hand-calculated value"""
201
self.assertEqual((1/s)/4, simpson_e(c))
203
def test_robbins(self):
204
"""robbins metric should match hand-calculated value"""
205
c = array([1,2,3,0,1])
207
self.assertEqual(r,2./7)
209
def test_robbins_confidence(self):
210
"""robbins CI should match hand-calculated value"""
211
c = array([1,2,3,0,1])
212
r = robbins_confidence(c, 0.05)
216
self.assertEqual(r, ((s-k)/(n+1), (s+k)/(n+1)))
219
def test_observed_species(self):
220
"""observed_species should return # observed species"""
221
c = array([4,3,4,0,1,0,2])
222
obs = observed_species(c)
224
self.assertEqual(obs, exp)
226
obs = observed_species(c)
228
self.assertEqual(obs, exp)
229
self.assertEqual(observed_species(self.TestData), 9)
231
def test_chao1_bias_corrected(self):
232
"""chao1_bias_corrected should return same result as EstimateS"""
233
obs = chao1_bias_corrected(*osd(self.TestData))
234
self.assertEqual(obs, 9.75)
236
def test_chao1_uncorrected(self):
237
"""chao1_uncorrected should return same result as EstimateS"""
238
obs = chao1_uncorrected(*osd(self.TestData))
239
self.assertEqual(obs, 10.5)
241
def test_chao1(self):
242
"""chao1 should use right decision rules"""
243
self.assertEqual(chao1(self.TestData), 9.75)
244
self.assertEqual(chao1(self.TestData,bias_corrected=False),10.5)
245
self.assertEqual(chao1(self.NoSingles), 4)
246
self.assertEqual(chao1(self.NoSingles,bias_corrected=False),4)
247
self.assertEqual(chao1(self.NoDoubles), 5)
248
self.assertEqual(chao1(self.NoDoubles,bias_corrected=False),5)
250
def test_chao1_var(self):
251
"""chao1_var should match observed results from EstimateS"""
252
#NOTE: EstimateS reports sd, not var, and rounds to 2 dp
253
self.assertFloatEqual(chao1_var(self.TestData), 1.42**2, eps=0.01)
254
self.assertFloatEqual(chao1_var(self.TestData,bias_corrected=False),\
256
self.assertFloatEqualAbs(chao1_var(self.NoSingles), 0.39**2, eps=0.01)
257
self.assertFloatEqualAbs(chao1_var(self.NoSingles, \
258
bias_corrected=False), 0.39**2, eps=0.01)
259
self.assertFloatEqualAbs(chao1_var(self.NoDoubles), 2.17**2, eps=0.01)
260
self.assertFloatEqualAbs(chao1_var(self.NoDoubles, \
261
bias_corrected=False), 2.17**2, eps=0.01)
263
def test_chao1_confidence(self):
264
"""chao1_confidence should match observed results from EstimateS"""
265
#NOTE: EstimateS rounds to 2 dp
266
self.assertFloatEqual(chao1_confidence(self.TestData), (9.07,17.45), \
268
self.assertFloatEqual(chao1_confidence(self.TestData, \
269
bias_corrected=False), (9.17,21.89), eps=0.01)
270
self.assertFloatEqualAbs(chao1_confidence(self.NoSingles),\
272
self.assertFloatEqualAbs(chao1_confidence(self.NoSingles, \
273
bias_corrected=False), (4,4.95), eps=0.01)
274
self.assertFloatEqualAbs(chao1_confidence(self.NoDoubles), \
275
(4.08,17.27), eps=0.01)
276
self.assertFloatEqualAbs(chao1_confidence(self.NoDoubles, \
277
bias_corrected=False), (4.08,17.27), eps=0.01)
280
"""ACE should match values calculated by hand"""
281
self.assertFloatEqual(ACE(array([2,0])), 1.0, eps=0.001)
282
# next: just returns the number of species when all are abundant
283
self.assertFloatEqual(ACE(array([12,0,9])), 2.0, eps=0.001)
284
self.assertFloatEqual(ACE(array([12,2,8])), 3.0, eps=0.001)
285
self.assertFloatEqual(ACE(array([12,2,1])), 4.0, eps=0.001)
286
self.assertFloatEqual(ACE(array([12,1,2,1])), 7.0, eps=0.001)
287
self.assertFloatEqual(ACE(array([12,3,2,1])), 4.6, eps=0.001)
288
self.assertFloatEqual(ACE(array([12,3,6,1,10])), 5.62749672, eps=0.001)
290
def test_michaelis_menten_fit(self):
291
""" michaelis_menten_fit should match hand values in limiting cases"""
292
res = michaelis_menten_fit([22])
293
self.assertFloatEqual(res,1.0,eps=.01)
294
res = michaelis_menten_fit([42])
295
self.assertFloatEqual(res,1.0,eps=.01)
296
res = michaelis_menten_fit([34],num_repeats=3,params_guess=[13,13])
297
self.assertFloatEqual(res,1.0,eps=.01)
298
res = michaelis_menten_fit([70,70],num_repeats=5)
299
self.assertFloatEqual(res,2.0,eps=.01)
301
def test_lorenz_curve(self):
302
"""Tests that Lorenz curve points are correctly calculated from data."""
303
self.assertEqual(self.gini_lorenz_curve_points, lorenz_curve(self.gini_data))
305
# check errors on negative data
306
self.assertRaises(ValueError, lorenz_curve, [1.0, -3.1, 4.5])
308
def test_lorenz_curve_integrator(self):
309
"""Tests both methods of integration work correctly."""
310
expected_trapezoids = 0.33614394993045893
311
expected_rectangles = 0.39864394993045893
313
self.assertAlmostEqual(expected_trapezoids,
314
lorenz_curve_integrator(self.gini_lorenz_curve_points, 'trapezoids'))
316
self.assertAlmostEqual(expected_rectangles,
317
lorenz_curve_integrator(self.gini_lorenz_curve_points, 'rectangles'))
319
def test_gini_index(self):
320
"""Test Gini index is correctly calculated."""
321
expected_trapezoids = 0.32771210013908214
322
expected_rectangles = 0.20271210013908214
324
self.assertAlmostEqual(expected_trapezoids,
325
gini_index(self.gini_data, 'trapezoids'))
327
self.assertAlmostEqual(expected_rectangles,
328
gini_index(self.gini_data, 'rectangles'))
330
def test_single_file_cup(self):
331
"""single_file_cup returns matrix with estimates"""
332
# Test using a string as metrics
333
# convert_biom using otu_table w/o leading #
334
bt_string = '{"rows": [{"id": "1", "metadata": null}, {"id": "2",\
335
"metadata": null}, {"id": "3", "metadata": null}, {"id": "4", "metadata":\
336
null}, {"id": "5", "metadata": null}], "format": "Biological Observation\
337
Matrix 0.9.1-dev", "data": [[0, 0, 3.0], [0, 1, 4.0], [1, 0, 2.0],\
338
[1, 1, 5.0], [2, 0, 1.0], [2, 1, 2.0], [3, 1, 4.0], [4, 0, 1.0]], "columns":\
339
[{"id": "S1", "metadata": null}, {"id": "S2", "metadata": null}],\
340
"generated_by": "BIOM-Format 0.9.1-dev", "matrix_type": "sparse", "shape":\
341
[5, 2], "format_url": "http://biom-format.org", "date":\
342
"2012-05-04T09:28:28.247809", "type": "OTU table", "id": null,\
343
"matrix_element_type": "float"}'
345
fh = open(self.tmp_file,"w")
348
self.files_to_remove.append(self.tmp_file)
349
self.files_to_remove.append(self.tmp_outfile)
351
# Not much testing here, just make sure we get back a (formatted)
352
# matrix with the right dimensions
353
single_file_cup(self.tmp_file, 'lladser_pe,lladser_ci',
354
self.tmp_outfile, r=4, alpha=0.95, f=10, ci_type="ULCL")
355
observed = open(self.tmp_outfile,"U").readlines()
356
self.assertEqual(len(observed), 3)
357
self.assertEqual(len(observed[1].split('\t')), 4)
359
# Test using a list as metrics
360
# convert_biom using otu_table w/o leading #
361
bt_string = '{"rows": [{"id": "1", "metadata": null}], "format":\
362
"Biological Observation Matrix 0.9.1-dev", "data": [[0, 0, 3.0]], "columns":\
363
[{"id": "S1", "metadata": null}], "generated_by": "BIOM-Format 0.9.1-dev",\
364
"matrix_type": "sparse", "shape": [1, 1], "format_url":\
365
"http://biom-format.org", "date": "2012-05-04T09:36:57.500673", "type":\
366
"OTU table", "id": null, "matrix_element_type": "float"}'
368
fh = open(self.tmp_file,"w")
372
single_file_cup(self.tmp_file, ['lladser_pe','lladser_ci'],
373
self.tmp_outfile, r=4, alpha=0.95, f=10, ci_type="ULCL")
374
observed = open(self.tmp_outfile,"U").readlines()
375
expected=["\tlladser_pe\tlladser_lower_bound\tlladser_upper_bound\n",
377
self.assertEqual(observed, expected)
379
def test_lladser_point_estimates(self):
380
"""lladser_point_estimates calculates correct estimates"""
382
s = [5,1,5,1,2,3,1,5,3,2,5,3]
384
observed = list(lladser_point_estimates(s,r))
385
self.assertEqual(len(observed), 3)
390
self.assertEqual(x[0], (r-1)/t)
393
#Estimator has variance of (1-p)^2/(r-2),
394
# which for r=7 and p=0.5 is 0.05
395
seq="WBWBWBWBWBWBWBWBWBWBWBWBWBWBWBWBWBW"
398
for i in range(reps):
399
p,_,_ = list(lladser_point_estimates(seq, r=7))[0]
401
self.assertTrue(0.45 < sum/reps and sum/reps <0.55)
403
def test_get_interval_for_r_new_species(self):
404
"""get_interval_for_r_new_species should return the right intervals"""
406
s = [5,1,5,1,2,3,1,5,3,2,5]
407
expected = [(3,set([5]),4,0),
409
(4,set([5,1,2]),9,4)]
410
for x,y in izip (get_interval_for_r_new_species(s,2), expected):
411
self.assertEqual(y,x)
415
self.assertEqual(list(get_interval_for_r_new_species(s,2)), [])
417
def test_lladser_ci_series_exact(self):
418
"""lladser_ci_series returns series of predictions"""
420
#Values are from Manuel's email of 9/11/09
422
urn_1 = 'RWBWWBWRRWRYWRPPZ'
423
results = list(lladser_ci_series(urn_1, r=4))
424
self.assertEqual(len(results), 3)
426
def test_lladser_ci_series_random(self):
427
"""lladser_ci_series' interval contain true prob with expected alpha."""
433
for i in range(reps):
434
obs = list(lladser_ci_series(seq, r=4, alpha=alpha))[0]
435
observations.append(obs)
436
tps = filter (lambda (a,b): a < 0.5 and 0.5 < b, observations)
437
self.assertTrue(len(tps) >= alpha*reps ) #100%-95%
439
def test_lladser_ci_from_r(self):
440
"""lladser_ci_from_r returns correct conf interval."""
445
obs_low, obs_high = lladser_ci_from_r(r=r, t=t, f=f)
446
self.assertFloatEqual(obs_low, 0.0806026244)
447
self.assertFloatEqual(obs_high, 0.806026244)
451
obs_low, obs_high = lladser_ci_from_r(r=r, t=t, f=f)
452
self.assertFloatEqual(obs_low, 0.02787923964)
453
self.assertFloatEqual(obs_high, 0.2787923964)
456
# make sure we test with each possible alpha
458
obs_low, obs_high = lladser_ci_from_r(r=r, t=t, f=f, alpha=alpha)
459
self.assertFloatEqual(obs_low, 0.03184536992)
460
self.assertFloatEqual(obs_high, 0.3184536992)
464
obs_low, obs_high = lladser_ci_from_r(r=r, t=t, f=f, alpha=alpha)
465
self.assertFloatEqual(obs_low, 0.005635941995)
466
self.assertFloatEqual(obs_high, 0.05635941995)
469
# test other ci_types
471
obs_low, obs_high = lladser_ci_from_r(r=r, t=t, f=f, alpha=alpha, ci_type=ci_type)
472
self.assertFloatEqual(obs_low, 0.01095834700)
473
self.assertFloatEqual(obs_high, 0.1095834700)
478
obs_low, obs_high = lladser_ci_from_r(r=r, t=t, f=f, alpha=alpha, ci_type=ci_type)
479
self.assertFloatEqual(obs_low, 0)
480
self.assertFloatEqual(obs_high, 0.6295793622)
483
obs_low, obs_high = lladser_ci_from_r(r=r, t=t, f=f, alpha=alpha, ci_type=ci_type)
484
self.assertFloatEqual(obs_low, 0.0817691447)
485
self.assertFloatEqual(obs_high, 1)
487
#Requesting CI for not precomputed values raises Exception
489
self.assertRaises(ValueError, lladser_ci_from_r, r=r, t=t, f=f,
490
alpha=alpha, ci_type=ci_type)
492
def test_esty_ci(self):
493
"""esty_ci computes correct confidence intervals."""
495
data = [1,1,2,1,1,3,2,1,3,4]
497
(observed_upper, observed_lower) = \
498
zip(*diversity(data, f=esty_ci, step=1))
500
expected_upper = [1, 1.38590382, 1.40020259, 0.67434465, 0.55060902,
501
0.71052858, 0.61613483, 0.54041008, 0.43554755,
503
expected_lower= [1, -1.38590382, -0.73353593, -0.17434465, -0.15060902,
504
-0.04386191, -0.33042054, -0.29041008, -0.43554755,
507
self.assertFloatEqual(observed_upper, expected_upper)
508
self.assertFloatEqual(observed_lower, expected_lower)
510
def test_goods_coverage(self):
511
counts = [1]*75 + [2,2,2,2,2,2,3,4,4]
512
res = goods_coverage(counts)
513
self.assertFloatEqual(res, 0.23469387755)
515
if __name__ == '__main__':