~ubuntu-branches/ubuntu/trusty/qiime/trusty

« back to all changes in this revision

Viewing changes to qiime/workflow/core_diversity_analyses.py

  • Committer: Package Import Robot
  • Author(s): Andreas Tille
  • Date: 2013-06-17 18:28:26 UTC
  • mfrom: (9.1.2 sid)
  • Revision ID: package-import@ubuntu.com-20130617182826-376az5ad080a0sfe
Tags: 1.7.0+dfsg-1
Upload preparations done for BioLinux to Debian

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#!/usr/bin/env python
 
2
# File created on 09 Feb 2013
 
3
from __future__ import division
 
4
import re
 
5
from glob import glob
 
6
from os.path import split, splitext
 
7
from qiime.parse import (parse_qiime_parameters,
 
8
                         parse_mapping_file_to_dict)
 
9
from qiime.util import (create_dir,
 
10
                        MetadataMap)
 
11
from qiime.workflow.downstream import (
 
12
                            run_beta_diversity_through_plots,
 
13
                            run_alpha_rarefaction,
 
14
                            run_summarize_taxa_through_plots)
 
15
from qiime.workflow.util import (print_to_stdout,
 
16
                            generate_log_fp,
 
17
                            WorkflowLogger,
 
18
                            log_input_md5s,
 
19
                            call_commands_serially,
 
20
                            get_params_str)
 
21
 
 
22
__author__ = "Greg Caporaso"
 
23
__copyright__ = "Copyright 2011, The QIIME project"
 
24
__credits__ = ["Greg Caporaso"]
 
25
__license__ = "GPL"
 
26
__version__ = "1.7.0"
 
27
__maintainer__ = "Greg Caporaso"
 
28
__email__ = "gregcaporaso@gmail.com"
 
29
__status__ = "Release"
 
30
 
 
31
_index_headers = {
 
32
     "run_summary": "Run summary data",
 
33
     "beta_diversity_even": "Beta diversity results (even sampling: %d)",
 
34
     "alpha_diversity": "Alpha diversity results",
 
35
     "taxa_summary": "Taxonomic summary results",
 
36
     "taxa_summary_categorical": "Taxonomic summary results (by %s)",
 
37
     "otu_category_sig": "Category results"}
 
38
 
 
39
def format_index_link(link_description,relative_path):
 
40
    
 
41
    return '<td>%s</td><td> <a href="%s" target="_blank">%s</a></td>' % (link_description,
 
42
                                        re.sub('/+','/',relative_path),
 
43
                                        split(relative_path)[1])
 
44
 
 
45
def generate_index_page(index_links,
 
46
                        index_fp,
 
47
                        order=[_index_headers['run_summary']]):
 
48
    """ generate the top-level index page """
 
49
    # get containing directory for index_fp
 
50
    top_level_dir = split(split(index_fp)[0])[1]
 
51
    index_page_header = get_index_page_header()
 
52
    index_lines = [index_page_header]
 
53
    d = {}
 
54
    for e in index_links:
 
55
        try:
 
56
            d[e[2]].append((e[0],e[1]))
 
57
        except KeyError:
 
58
            d[e[2]] = [(e[0],e[1])]
 
59
    index_lines.append('<table border=1>\n')
 
60
    
 
61
    # Determine the order the data should be presented in. This should be
 
62
    # the order that the user requested, followed by any categories that
 
63
    # the user didn't include in the order parameter. 
 
64
    ordered_table_entries = order + [k for k in d if k not in order]
 
65
    for k in ordered_table_entries:
 
66
        v = d[k]
 
67
        index_lines.append(
 
68
         '<tr colspan=2 align=center bgcolor=#e8e8e8><td colspan=2 align=center>%s</td></tr>\n' % k)
 
69
        for description,path in v:
 
70
            path = re.sub('.*%s' % top_level_dir,'./',path)
 
71
            index_lines.append('<tr>%s</tr>\n' % format_index_link(description,path))
 
72
    index_lines.append('</table>\n')
 
73
    
 
74
    index_page_footer = get_index_page_footer()
 
75
    index_lines.append(index_page_footer)
 
76
    
 
77
    open(index_fp,'w').write(''.join(index_lines))
 
78
 
 
79
def get_index_page_header():
 
80
    return """<html>
 
81
<head><title>QIIME results</title></head>
 
82
<body>
 
83
<a href="http://www.qiime.org" target="_blank"><img src=\"http://qiime.org/_static/wordpressheader.png\" alt="www.qiime.org""/></a><p>
 
84
"""
 
85
    
 
86
def get_index_page_footer():
 
87
    return """<p><b>Need help?</b><br>You can get answers to your questions on the <a href="http://forum.qiime.org" target="_blank">QIIME Forum</a>.<br>See the <a href="http://qiime.org/tutorials/index.html" target="_blank">QIIME tutorials</a> for examples of additional analyses that can be run.<br>You can find documentation of the QIIME scripts in the <a href="http://qiime.org/scripts/index.html" target="_blank">QIIME script index</a>.
 
88
</body></html>"""
 
89
 
 
90
 
 
91
def run_core_diversity_analyses(
 
92
    biom_fp,
 
93
    mapping_fp,
 
94
    sampling_depth,
 
95
    output_dir,
 
96
    qiime_config,
 
97
    command_handler=call_commands_serially,
 
98
    tree_fp=None,
 
99
    params=None,
 
100
    categories=None,
 
101
    arare_min_rare_depth=10,
 
102
    arare_num_steps=10,
 
103
    parallel=False,
 
104
    suppress_taxa_summary=False,
 
105
    suppress_beta_diversity=False,
 
106
    suppress_alpha_diversity=False,
 
107
    suppress_otu_category_significance=False,
 
108
    status_update_callback=print_to_stdout):
 
109
    """
 
110
    """
 
111
    if categories != None:
 
112
        # Validate categories provided by the users
 
113
        mapping_data, mapping_comments = \
 
114
         parse_mapping_file_to_dict(open(mapping_fp,'U'))
 
115
        metadata_map = MetadataMap(mapping_data, mapping_comments)
 
116
        for c in categories:
 
117
            if c not in metadata_map.CategoryNames:
 
118
                raise ValueError, ("Category '%s' is not a column header "
 
119
                 "in your mapping file. "
 
120
                 "Categories are case and white space sensitive. Valid "
 
121
                 "choices are: (%s)" % (c,', '.join(metadata_map.CategoryNames)))
 
122
            if metadata_map.hasSingleCategoryValue(c):
 
123
                raise ValueError, ("Category '%s' contains only one value. "
 
124
                 "Categories analyzed here require at least two values." % c)
 
125
            
 
126
    else:
 
127
        categories= []
 
128
    
 
129
    # prep some variables
 
130
    if params == None:
 
131
        params = parse_qiime_parameters([])
 
132
        
 
133
    create_dir(output_dir)
 
134
    index_fp = '%s/index.html' % output_dir
 
135
    index_links = []
 
136
    commands = []
 
137
    
 
138
    # begin logging
 
139
    log_fp = generate_log_fp(output_dir)
 
140
    index_links.append(('Master run log',log_fp,_index_headers['run_summary']))
 
141
    logger = WorkflowLogger(log_fp,
 
142
                            params=params,
 
143
                            qiime_config=qiime_config)
 
144
    input_fps = [biom_fp,mapping_fp]
 
145
    if tree_fp != None:
 
146
        input_fps.append(tree_fp)
 
147
    log_input_md5s(logger,input_fps)
 
148
 
 
149
    # run print_biom_table_summary.py on input BIOM table
 
150
    try:
 
151
        params_str = get_params_str(params['print_biom_table_summary'])
 
152
    except KeyError:
 
153
        params_str = ''
 
154
    biom_table_stats_output_fp = '%s/biom_table_summary.txt' % output_dir
 
155
    print_biom_table_summary_cmd = \
 
156
     "print_biom_table_summary.py -i %s -o %s --suppress_md5 %s" % \
 
157
     (biom_fp, biom_table_stats_output_fp,params_str)
 
158
    index_links.append(('BIOM table statistics',
 
159
                        biom_table_stats_output_fp,
 
160
                        _index_headers['run_summary']))
 
161
    commands.append([('Generate BIOM table summary',
 
162
                      print_biom_table_summary_cmd)])
 
163
    
 
164
    # filter samples with fewer observations than the requested sampling_depth. 
 
165
    # since these get filtered for some analyses (eg beta diversity after
 
166
    # even sampling) it's useful to filter them here so they're filtered 
 
167
    # from all analyses.
 
168
    filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
 
169
    filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" %\
 
170
     (biom_fp,filtered_biom_fp,sampling_depth)
 
171
    commands.append([('Filter low sequence count samples from table (minimum sequence count: %d)' % sampling_depth,
 
172
                      filter_samples_cmd)])
 
173
    biom_fp = filtered_biom_fp
 
174
    
 
175
    # run initial commands and reset the command list
 
176
    command_handler(commands, 
 
177
                    status_update_callback, 
 
178
                    logger,
 
179
                    close_logger_on_success=False)
 
180
    commands = []
 
181
    
 
182
    if not suppress_beta_diversity:
 
183
        bdiv_even_output_dir = '%s/bdiv_even%d/' % (output_dir,sampling_depth)
 
184
        even_dm_fps = run_beta_diversity_through_plots(
 
185
         otu_table_fp=biom_fp, 
 
186
         mapping_fp=mapping_fp,
 
187
         output_dir=bdiv_even_output_dir,
 
188
         command_handler=command_handler,
 
189
         params=params,
 
190
         qiime_config=qiime_config,
 
191
         sampling_depth=sampling_depth,
 
192
         # force suppression of distance histograms - boxplots work better
 
193
         # in this context, and are created below.
 
194
         histogram_categories=[],
 
195
         tree_fp=tree_fp,
 
196
         parallel=parallel,
 
197
         logger=logger,
 
198
         suppress_md5=True,
 
199
         status_update_callback=status_update_callback)
 
200
    
 
201
        for bdiv_metric, dm_fp in even_dm_fps:
 
202
            for category in categories:
 
203
                boxplots_output_dir = '%s/%s_boxplots/' % (bdiv_even_output_dir,bdiv_metric)
 
204
                try:
 
205
                    params_str = get_params_str(params['make_distance_boxplots'])
 
206
                except KeyError:
 
207
                    params_str = ''
 
208
                boxplots_cmd = \
 
209
                 'make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s' %\
 
210
                 (dm_fp, category, boxplots_output_dir, mapping_fp, params_str)
 
211
                commands.append([('Boxplots (%s)' % category,
 
212
                                  boxplots_cmd)])
 
213
                index_links.append(('Distance boxplots (%s)' % bdiv_metric,
 
214
                                    '%s/%s_Distances.pdf' % \
 
215
                                     (boxplots_output_dir,category),
 
216
                                    _index_headers['beta_diversity_even'] % sampling_depth))
 
217
                index_links.append(('Distance boxplots statistics (%s)' % bdiv_metric,
 
218
                                    '%s/%s_Stats.txt' % \
 
219
                                     (boxplots_output_dir,category),
 
220
                                    _index_headers['beta_diversity_even'] % sampling_depth))
 
221
            
 
222
            index_links.append(('3D plot (%s, continuous coloring)' % bdiv_metric,
 
223
                                '%s/%s_3d_continuous/%s_pc_3D_PCoA_plots.html' % \
 
224
                                 (bdiv_even_output_dir,bdiv_metric,bdiv_metric),
 
225
                                _index_headers['beta_diversity_even'] % sampling_depth))
 
226
            index_links.append(('3D plot (%s, discrete coloring)' % bdiv_metric,
 
227
                                '%s/%s_3d_discrete/%s_pc_3D_PCoA_plots.html' % \
 
228
                                 (bdiv_even_output_dir,bdiv_metric,bdiv_metric),
 
229
                                _index_headers['beta_diversity_even'] % sampling_depth))
 
230
            index_links.append(('2D plot (%s, continuous coloring)' % bdiv_metric,
 
231
                                '%s/%s_2d_continuous/%s_pc_2D_PCoA_plots.html' % \
 
232
                                 (bdiv_even_output_dir,bdiv_metric,bdiv_metric),
 
233
                                _index_headers['beta_diversity_even'] % sampling_depth))
 
234
            index_links.append(('2D plot (%s, discrete coloring)' % bdiv_metric,
 
235
                                '%s/%s_2d_discrete/%s_pc_2D_PCoA_plots.html' % \
 
236
                                 (bdiv_even_output_dir,bdiv_metric,bdiv_metric),
 
237
                                _index_headers['beta_diversity_even'] % sampling_depth))
 
238
            index_links.append(('Distance matrix (%s)' % bdiv_metric,
 
239
                                '%s/%s_dm.txt' % \
 
240
                                 (bdiv_even_output_dir,bdiv_metric),
 
241
                                _index_headers['beta_diversity_even'] % sampling_depth))
 
242
            index_links.append(('Principal coordinate matrix (%s)' % bdiv_metric,
 
243
                                '%s/%s_pc.txt' % \
 
244
                                 (bdiv_even_output_dir,bdiv_metric),
 
245
                                _index_headers['beta_diversity_even'] % sampling_depth))
 
246
    
 
247
    if not suppress_alpha_diversity:
 
248
        ## Alpha rarefaction workflow
 
249
        arare_full_output_dir = '%s/arare_max%d/' % (output_dir,sampling_depth)
 
250
        run_alpha_rarefaction(
 
251
         otu_table_fp=biom_fp,
 
252
         mapping_fp=mapping_fp,
 
253
         output_dir=arare_full_output_dir,
 
254
         command_handler=command_handler,
 
255
         params=params,
 
256
         qiime_config=qiime_config,
 
257
         tree_fp=tree_fp,
 
258
         num_steps=arare_num_steps,
 
259
         parallel=parallel,
 
260
         logger=logger,
 
261
         min_rare_depth=arare_min_rare_depth,
 
262
         max_rare_depth=sampling_depth,
 
263
         suppress_md5=True,
 
264
         status_update_callback=status_update_callback)
 
265
    
 
266
        index_links.append(('Alpha rarefaction plots',
 
267
                            '%s/alpha_rarefaction_plots/rarefaction_plots.html'\
 
268
                              % arare_full_output_dir,
 
269
                            _index_headers['alpha_diversity']))
 
270
                        
 
271
        collated_alpha_diversity_fps = \
 
272
         glob('%s/alpha_div_collated/*txt' % arare_full_output_dir)
 
273
        try:
 
274
            params_str = get_params_str(params['compare_alpha_diversity'])
 
275
        except KeyError:
 
276
            params_str = ''
 
277
        for category in categories:
 
278
            for collated_alpha_diversity_fp in collated_alpha_diversity_fps:
 
279
                alpha_metric = splitext(split(collated_alpha_diversity_fp)[1])[0]
 
280
                alpha_comparison_output_fp = '%s/%s_%s.txt' % \
 
281
                 (arare_full_output_dir,category,alpha_metric)
 
282
                compare_alpha_cmd = \
 
283
                 'compare_alpha_diversity.py -i %s -m %s -c %s -o %s -n 999 %s' %\
 
284
                 (collated_alpha_diversity_fp, mapping_fp, category, 
 
285
                  alpha_comparison_output_fp, params_str)
 
286
                commands.append([('Compare alpha diversity (%s, %s)' %\
 
287
                                   (category,alpha_metric),
 
288
                                  compare_alpha_cmd)])
 
289
                index_links.append(
 
290
                 ('Alpha diversity statistics (%s, %s)' % (category,alpha_metric),
 
291
                  alpha_comparison_output_fp,
 
292
                  _index_headers['alpha_diversity']))
 
293
    
 
294
    if not suppress_taxa_summary:
 
295
        taxa_plots_output_dir = '%s/taxa_plots/' % output_dir
 
296
        run_summarize_taxa_through_plots(
 
297
         otu_table_fp=biom_fp,
 
298
         mapping_fp=mapping_fp,
 
299
         output_dir=taxa_plots_output_dir,
 
300
         mapping_cat=None, 
 
301
         sort=True,
 
302
         command_handler=command_handler,
 
303
         params=params,
 
304
         qiime_config=qiime_config,
 
305
         logger=logger,
 
306
         suppress_md5=True,
 
307
         status_update_callback=status_update_callback)
 
308
    
 
309
 
 
310
        index_links.append(('Taxa summary bar plots',
 
311
                            '%s/taxa_summary_plots/bar_charts.html'\
 
312
                              % taxa_plots_output_dir,
 
313
                            _index_headers['taxa_summary']))
 
314
        index_links.append(('Taxa summary area plots',
 
315
                            '%s/taxa_summary_plots/area_charts.html'\
 
316
                              % taxa_plots_output_dir,
 
317
                            _index_headers['taxa_summary']))
 
318
        for category in categories:
 
319
            taxa_plots_output_dir = '%s/taxa_plots_%s/' % (output_dir,category)
 
320
            run_summarize_taxa_through_plots(
 
321
             otu_table_fp=biom_fp,
 
322
             mapping_fp=mapping_fp,
 
323
             output_dir=taxa_plots_output_dir,
 
324
             mapping_cat=category, 
 
325
             sort=True,
 
326
             command_handler=command_handler,
 
327
             params=params,
 
328
             qiime_config=qiime_config,
 
329
             logger=logger,
 
330
             suppress_md5=True,
 
331
             status_update_callback=status_update_callback)
 
332
 
 
333
            index_links.append(('Taxa summary bar plots',
 
334
                                '%s/taxa_summary_plots/bar_charts.html'\
 
335
                                  % taxa_plots_output_dir,
 
336
                                _index_headers['taxa_summary_categorical'] % category))
 
337
            index_links.append(('Taxa summary area plots',
 
338
                                '%s/taxa_summary_plots/area_charts.html'\
 
339
                                  % taxa_plots_output_dir,
 
340
                                _index_headers['taxa_summary_categorical'] % category))
 
341
    
 
342
    if not suppress_otu_category_significance:
 
343
        # OTU category significance
 
344
        for category in categories:
 
345
            category_signifance_fp = \
 
346
             '%s/category_significance_%s.txt' % (output_dir, category)
 
347
            try:
 
348
                params_str = get_params_str(params['otu_category_significance'])
 
349
            except KeyError:
 
350
                params_str = ''
 
351
            # Build the OTU cateogry significance command
 
352
            category_significance_cmd = \
 
353
             'otu_category_significance.py -i %s -m %s -c %s -o %s %s' %\
 
354
             (biom_fp, mapping_fp, category, 
 
355
              category_signifance_fp, params_str)
 
356
            commands.append([('OTU category significance (%s)' % category, 
 
357
                              category_significance_cmd)])
 
358
                          
 
359
            index_links.append(('Category significance (%s)' % category,
 
360
                        category_signifance_fp,
 
361
                        _index_headers['otu_category_sig']))
 
362
    
 
363
    commands.append([('Compress the filtered BIOM table','gzip %s' % filtered_biom_fp)])
 
364
    index_links.append(('Filtered BIOM table (minimum sequence count: %d)' % sampling_depth,
 
365
                        '%s.gz' % filtered_biom_fp,
 
366
                        _index_headers['run_summary']))
 
367
    
 
368
    command_handler(commands, status_update_callback, logger)
 
369
    generate_index_page(index_links,index_fp)