11
11
__copyright = "Copyright 2007, the authors."
12
12
__credits__ = ["Rob Knight", "Micah Hamady", "Daniel McDonald"]
13
13
__license__ = "GPL"
15
15
__maintainer__ = "Rob Knight, Micah Hamady"
16
16
__email__ = "rob@spot.colorado.edu, hamady@colorado.edu"
17
17
__status__ = "Prototype"
434
434
result[i] = [metric(branch_lengths, first_col, cols[j]) for \
435
435
j in range(num_cols)]
438
def unifrac_one_sample(one_sample_idx, branch_lengths, m, metric=unifrac):
439
"""Calculates unifrac(one_sample_idx,j) for all environments j in m.
441
branch_lengths is the array of branch lengths.
443
m is 2D count array: rows are taxa (corresponding to branch_lenths),
444
samples/states/envs are columns.
445
Assumes that ancestral
446
states have already been calculated (either by logical_or or Fitch).
448
metric: metric to use for when comparing two environments. Default
449
is unifrac. must be called like:
450
metric(branch_lengths, env1_counts, env2counts)
452
returns a numpy 1d array
453
if asymmetric metric, returns metric(one_sample, other), usually a row in
454
the mtx returned by unifrac_matrix
456
num_cols = m.shape[-1]
457
cols = [m[:,i] for i in range(num_cols)]
458
# result = zeros((num_cols), float)
460
first_col = cols[one_sample_idx]
461
# better to do loop into preallocated numpy array here?
462
result = array([metric(branch_lengths, first_col, cols[j]) for \
463
j in range(num_cols)],'float')
438
466
def env_unique_fraction(branch_lengths, m):
439
467
""" Calculates unique branch length for each env.
535
563
result[i,:j+1] = row_result
536
564
result = result + transpose(result)
567
def weighted_one_sample(one_sample_idx, branch_lengths, m, tip_indices,
568
bl_correct=False, tip_distances=None, unifrac_f=_weighted_unifrac):
569
"""Calculates weighted_unifrac(one_sample_idx,j) for all environments j in m
571
Requires tip_indices for calculating sums, etc.
572
bl_correct: if True (default: False), applies branch length correction.
573
tip_distances is required for normalization for weighted unifrac.
575
num_cols = m.shape[-1]
576
cols = [m[:,i] for i in range(num_cols)] #note that these will be row vecs
577
sums = [take(m[:,i], tip_indices).sum() for i in range(num_cols)]
578
result = zeros((num_cols),float)
580
i_sum = sums[one_sample_idx]
581
first_col = cols[one_sample_idx]
583
for j in range(num_cols):
586
curr = unifrac_f(branch_lengths, first_col, \
587
second_col, i_sum, j_sum)
589
curr /= _branch_correct(tip_distances, first_col, \
590
second_col, i_sum, j_sum)
539
594
def weighted_unifrac_vector(branch_lengths, m, tip_indices, bl_correct=False,
540
595
tip_distances=None, unifrac_f=_weighted_unifrac):