~ubuntu-branches/ubuntu/natty/python-cogent/natty

« back to all changes in this revision

Viewing changes to cogent/phylo/nj.py

  • Committer: Bazaar Package Importer
  • Author(s): Steffen Moeller
  • Date: 2010-12-04 22:30:35 UTC
  • mfrom: (1.1.1 upstream)
  • Revision ID: james.westby@ubuntu.com-20101204223035-j11kinhcrrdgg2p2
Tags: 1.5-1
* Bumped standard to 3.9.1, no changes required.
* New upstream version.
  - major additions to Cookbook
  - added AlleleFreqs attribute to ensembl Variation objects.
  - added getGeneByStableId method to genome objects.
  - added Introns attribute to Transcript objects and an Intron class.
  - added Mann-Whitney test and a Monte-Carlo version
  - exploratory and confirmatory period estimation techniques (suitable for
    symbolic and continuous data)
  - Information theoretic measures (AIC and BIC) added
  - drawing of trees with collapsed nodes
  - progress display indicator support for terminal and GUI apps
  - added parser for illumina HiSeq2000 and GAiix sequence files as 
    cogent.parse.illumina_sequence.MinimalIlluminaSequenceParser.
  - added parser to FASTQ files, one of the output options for illumina's
    workflow, also added cookbook demo.
  - added functionality for parsing of SFF files without the Roche tools in
    cogent.parse.binary_sff
  - thousand fold performance improvement to nmds
  - >10-fold performance improvements to some Table operations

Show diffs side-by-side

added added

removed removed

Lines of Context:
9
9
Generalised as described by Pearson, Robins & Zhang, 1999.
10
10
"""
11
11
 
 
12
from __future__ import division
12
13
import numpy
13
14
from cogent.core.tree import TreeBuilder
14
15
from cogent.phylo.tree_collection import ScoredTreeCollection
15
 
from util import distanceDictTo2D
 
16
from cogent.phylo.util import distanceDictTo2D
 
17
from cogent.util import progress_display as UI
16
18
from collections import deque
17
19
 
18
20
 
19
 
__author__ = "Gavin Huttley"
 
21
__author__ = "Peter Maxwell"
20
22
__copyright__ = "Copyright 2007-2009, The Cogent Project"
21
23
__credits__ = ["Gavin Huttley", "Peter Maxwell"]
22
24
__license__ = "GPL"
23
 
__version__ = "1.4.1"
 
25
__version__ = "1.5.0"
24
26
__maintainer__ = "Gavin Huttley"
25
27
__email__ = "gavin.huttley@anu.edu.au"
26
28
__status__ = "Production"
160
162
        topologies.add(topology)
161
163
 
162
164
 
163
 
def gnj(dists, keep=None, dkeep=0, show_progress=False):
 
165
@UI.display_wrap
 
166
def gnj(dists, keep=None, dkeep=0, ui=None):
164
167
    """Arguments:
165
168
        - dists: dict of (name1, name2): distance
166
169
        - keep: number of best partial trees to keep at each iteration,  
175
178
 
176
179
    if keep is None:
177
180
        keep = len(names) * 5
 
181
    all_keep = keep + dkeep
178
182
        
179
183
    # For recognising duplicate topologies, encode partitions (ie: edges) as 
180
184
    # frozensets of tip names, which should be quickly comparable.
193
197
    star_tree.topology = frozenset([])
194
198
    trees = [star_tree]
195
199
    
 
200
    # Progress display auxiliary code
 
201
    template = ' size %%s/%s  trees %%%si' % (len(names), len(str(all_keep)))
 
202
    total_work = 0
 
203
    max_candidates = 1
 
204
    total_work_before = {}
 
205
    for L in range(len(names), 3, -1):
 
206
        total_work_before[L] = total_work
 
207
        max_candidates = min(all_keep, max_candidates*L*(L-1)//2)
 
208
        total_work += max_candidates
 
209
        
 
210
    def _show_progress():
 
211
        t = len(next_trees)
 
212
        work_done = total_work_before[L] + t
 
213
        ui.display(msg=template % (L, t), progress=work_done/total_work)
 
214
    
196
215
    for L in range(len(names), 3, -1):
197
216
        # Generator of candidate joins, best first.
198
217
        # Note that with dkeep>0 this generator is used up a bit at a time
201
220
        
202
221
        # First take up to 'keep' best ones
203
222
        next_trees = []
 
223
        _show_progress()
204
224
        for pair in candidates:
205
225
            next_trees.append(pair)
206
226
            if len(next_trees) == keep:
207
 
                break
208
 
        
 
227
                break 
 
228
        _show_progress()
 
229
 
209
230
        # The very best one is used as an anchor for measuring the 
210
231
        # topological distance to others
211
232
        best_topology = next_trees[0].topology
220
241
        # Now take up to dkeep joins, an equal number of the best at each 
221
242
        # topological distance, while not calculating any more TDs than 
222
243
        # necessary.
223
 
        all_keep = len(next_trees) + dkeep
224
244
        prior_td = dict(zip(map(id, trees), prior_td))
225
245
        target_td = 1
226
246
        while (candidates or queued) and len(next_trees) < all_keep:
238
258
            if queue[target_td]:
239
259
                next_trees.append(queue[target_td].popleft())
240
260
                queued -= 1
 
261
                _show_progress()
 
262
 
241
263
            target_td = target_td % max_td + 1
242
264
        
243
265
        trees = [pair.joined() for pair in next_trees]
244
 
        
245
 
        if show_progress:
246
 
            print L
247
 
        
 
266
                
248
267
    result = [tree.asScoreTreeTuple() for tree in trees]
249
268
    result.sort()
250
269
    return ScoredTreeCollection(result)