~ubuntu-branches/ubuntu/natty/python-cogent/natty

« back to all changes in this revision

Viewing changes to cogent/evolve/bootstrap.py

  • Committer: Bazaar Package Importer
  • Author(s): Steffen Moeller
  • Date: 2010-12-04 22:30:35 UTC
  • mfrom: (1.1.1 upstream)
  • Revision ID: james.westby@ubuntu.com-20101204223035-j11kinhcrrdgg2p2
Tags: 1.5-1
* Bumped standard to 3.9.1, no changes required.
* New upstream version.
  - major additions to Cookbook
  - added AlleleFreqs attribute to ensembl Variation objects.
  - added getGeneByStableId method to genome objects.
  - added Introns attribute to Transcript objects and an Intron class.
  - added Mann-Whitney test and a Monte-Carlo version
  - exploratory and confirmatory period estimation techniques (suitable for
    symbolic and continuous data)
  - Information theoretic measures (AIC and BIC) added
  - drawing of trees with collapsed nodes
  - progress display indicator support for terminal and GUI apps
  - added parser for illumina HiSeq2000 and GAiix sequence files as 
    cogent.parse.illumina_sequence.MinimalIlluminaSequenceParser.
  - added parser to FASTQ files, one of the output options for illumina's
    workflow, also added cookbook demo.
  - added functionality for parsing of SFF files without the Roche tools in
    cogent.parse.binary_sff
  - thousand fold performance improvement to nmds
  - >10-fold performance improvements to some Table operations

Show diffs side-by-side

added added

removed removed

Lines of Context:
21
21
the EstimateProbability class.
22
22
 
23
23
"""
24
 
from __future__ import with_statement
 
24
from __future__ import with_statement, division
25
25
from cogent.util import parallel
26
26
from cogent.maths import optimisers
 
27
from cogent.util import progress_display as UI
27
28
 
28
29
import random
29
30
 
30
 
__author__ = "Gavin Huttley and Andrew Butterfield"
 
31
__author__ = "Gavin Huttley, Andrew Butterfield and Peter Maxwell"
31
32
__copyright__ = "Copyright 2007-2009, The Cogent Project"
32
33
__credits__ = ["Gavin Huttley","Andrew Butterfield", "Matthew Wakefield",
33
34
                    "Edward Lang", "Peter Maxwell"]
34
35
__license__ = "GPL"
35
 
__version__ = "1.4.1"
 
36
__version__ = "1.5.0"
36
37
__maintainer__ = "Gavin Huttley"
37
38
__email__ = "gavin.huttley@anu.edu.au"
38
39
__status__ = "Production"
52
53
    def setSeed(self, seed):
53
54
        self.seed = seed
54
55
    
55
 
    def run(self, show_progress=False, **opt_args):
 
56
    @UI.display_wrap
 
57
    def run(self, ui, **opt_args):
56
58
        # Sets self.observed and self.results (a list _numreplicates long) to
57
59
        # whatever is returned from self.simplify([LF result from each PC]).
58
60
        # self.simplify() is used as the entire LF result might not be picklable
59
61
        # for MPI. Subclass must provide self.alignment and
60
62
        # self.parameter_controllers
61
 
        alignment_randomness = random.Random()
62
 
        alignment_randomness.seed(self.seed)
63
 
        parallel.sync_random(alignment_randomness)
64
 
        
65
 
        opt_args['show_progress'] = show_progress
66
63
        if 'random_series' not in opt_args and not opt_args.get('local', None):
67
64
            opt_args['random_series'] = random.Random()
68
65
        
69
 
        self._observed = []
70
 
        starting_points = []
71
 
        for pc in self.parameter_controllers:
72
 
            pc.setAlignment(self.alignment)
73
 
            lf = pc.optimise(return_calculator=True, **opt_args)
74
 
            starting_points.append(lf)
75
 
            self._observed.append(lf)
76
 
        self.observed = self.simplify(*self.parameter_controllers)
77
 
 
78
 
        # making the simulated alignment on every node keeps alignment_randomness in sync
79
 
        # across all cpus.  This is only a good way of doing it so long as making the alignment
80
 
        # is much faster than optimising the likelihood function.
81
 
        simaligns = [self.parameter_controllers[0].simulateAlignment(
82
 
                random_series=alignment_randomness) for i in range(self._numreplicates)]
83
 
 
84
 
        def _one_replicate(simalign):
85
 
            for (pc, starting_point) in zip(self.parameter_controllers, starting_points):
86
 
                with pc.real_par_controller.updatesPostponed():
87
 
                    # using a calculator as a memo object to rest the parameter values
88
 
                    pc.real_par_controller.updateFromCalculator(starting_point)
89
 
                    # but alignment is different each time
90
 
                    pc.setAlignment(simalign)
91
 
                    # pc needs to be told that it can't use all the CPUs it did initially
92
 
                    # which seems less than ideal (it should be able to discover that itself) 
93
 
                    pc.real_par_controller.setupParallelContext()
94
 
                pc.optimise(**opt_args)
95
 
            return self.simplify(*self.parameter_controllers)
96
 
        
97
 
        self.results = parallel.map(_one_replicate, simaligns, show_progress=False)#opt shows anyway
98
 
    
 
66
        null_pc = self.parameter_controllers[0]
 
67
        pcs = len(self.parameter_controllers)
 
68
        if pcs == 1:
 
69
            model_label = ['']
 
70
        elif pcs == 2:
 
71
            model_label = ['null', 'alt ']
 
72
        else:
 
73
            model_label = ['null'] + ['alt%s'%i for i in range(1,pcs)]
 
74
        
 
75
        @UI.display_wrap
 
76
        def each_model(alignment, ui):
 
77
            def one_model(pc):
 
78
                pc.setAlignment(alignment)
 
79
                return pc.optimise(return_calculator=True, **opt_args)
 
80
            memos = ui.eager_map(one_model, self.parameter_controllers, 
 
81
                    labels=model_label)
 
82
            concise_result = self.simplify(*self.parameter_controllers)
 
83
            return (memos, concise_result)
 
84
        
 
85
        #optimisations = pcs * (self._numreplicates + 1)
 
86
        init_work = pcs / (self._numreplicates + pcs)
 
87
        ui.display('Original data', 0.0, init_work)
 
88
        (starting_points, self.observed) = each_model(self.alignment)
 
89
        
 
90
        ui.display('Randomness', init_work, 0.0)
 
91
        alignment_random_state = random.Random(self.seed).getstate()
 
92
        if self.seed is None:
 
93
            comm  = parallel.getCommunicator()
 
94
            alignment_random_state = comm.bcast(alignment_random_state, 0)
 
95
        
 
96
        def one_replicate(i):
 
97
            for (pc, start_point) in zip(self.parameter_controllers, starting_points):
 
98
                # using a calculator as a memo object to reset the params
 
99
                pc.updateFromCalculator(start_point)
 
100
            aln_rnd = random.Random(0)
 
101
            aln_rnd.setstate(alignment_random_state)
 
102
            aln_rnd.jumpahead(i*10**9)
 
103
            simalign = null_pc.simulateAlignment(random_series=aln_rnd)
 
104
            (dummy, result) = each_model(simalign)
 
105
            return result
 
106
        
 
107
        ui.display('Bootstrap', init_work)
 
108
        self.results = ui.eager_map(
 
109
                one_replicate, range(self._numreplicates), noun='replicate',
 
110
                start=init_work)
 
111
 
99
112
 
100
113
class EstimateProbability(ParametricBootstrapCore):
101
114
    # 2 parameter controllers, LR
154
167
        self.func_calcstats = func_calcstats
155
168
    
156
169
    def simplify(self, result):
157
 
        return (result.getLogLikelihood(), self.func_calcstats(result)) #.getStatisticsAsDict())
 
170
        return (result.getLogLikelihood(), self.func_calcstats(result))
158
171
    
159
172
    def getObservedStats(self):
160
173
        return self.observed[1]