2
# File created on 09 Feb 2013
3
from __future__ import division
6
from os.path import split, splitext
7
from qiime.parse import (parse_qiime_parameters,
8
parse_mapping_file_to_dict)
9
from qiime.util import (create_dir,
11
from qiime.workflow.downstream import (
12
run_beta_diversity_through_plots,
13
run_alpha_rarefaction,
14
run_summarize_taxa_through_plots)
15
from qiime.workflow.util import (print_to_stdout,
19
call_commands_serially,
22
__author__ = "Greg Caporaso"
23
__copyright__ = "Copyright 2011, The QIIME project"
24
__credits__ = ["Greg Caporaso"]
27
__maintainer__ = "Greg Caporaso"
28
__email__ = "gregcaporaso@gmail.com"
29
__status__ = "Release"
32
"run_summary": "Run summary data",
33
"beta_diversity_even": "Beta diversity results (even sampling: %d)",
34
"alpha_diversity": "Alpha diversity results",
35
"taxa_summary": "Taxonomic summary results",
36
"taxa_summary_categorical": "Taxonomic summary results (by %s)",
37
"otu_category_sig": "Category results"}
39
def format_index_link(link_description,relative_path):
41
return '<td>%s</td><td> <a href="%s" target="_blank">%s</a></td>' % (link_description,
42
re.sub('/+','/',relative_path),
43
split(relative_path)[1])
45
def generate_index_page(index_links,
47
order=[_index_headers['run_summary']]):
48
""" generate the top-level index page """
49
# get containing directory for index_fp
50
top_level_dir = split(split(index_fp)[0])[1]
51
index_page_header = get_index_page_header()
52
index_lines = [index_page_header]
56
d[e[2]].append((e[0],e[1]))
58
d[e[2]] = [(e[0],e[1])]
59
index_lines.append('<table border=1>\n')
61
# Determine the order the data should be presented in. This should be
62
# the order that the user requested, followed by any categories that
63
# the user didn't include in the order parameter.
64
ordered_table_entries = order + [k for k in d if k not in order]
65
for k in ordered_table_entries:
68
'<tr colspan=2 align=center bgcolor=#e8e8e8><td colspan=2 align=center>%s</td></tr>\n' % k)
69
for description,path in v:
70
path = re.sub('.*%s' % top_level_dir,'./',path)
71
index_lines.append('<tr>%s</tr>\n' % format_index_link(description,path))
72
index_lines.append('</table>\n')
74
index_page_footer = get_index_page_footer()
75
index_lines.append(index_page_footer)
77
open(index_fp,'w').write(''.join(index_lines))
79
def get_index_page_header():
81
<head><title>QIIME results</title></head>
83
<a href="http://www.qiime.org" target="_blank"><img src=\"http://qiime.org/_static/wordpressheader.png\" alt="www.qiime.org""/></a><p>
86
def get_index_page_footer():
87
return """<p><b>Need help?</b><br>You can get answers to your questions on the <a href="http://forum.qiime.org" target="_blank">QIIME Forum</a>.<br>See the <a href="http://qiime.org/tutorials/index.html" target="_blank">QIIME tutorials</a> for examples of additional analyses that can be run.<br>You can find documentation of the QIIME scripts in the <a href="http://qiime.org/scripts/index.html" target="_blank">QIIME script index</a>.
91
def run_core_diversity_analyses(
97
command_handler=call_commands_serially,
101
arare_min_rare_depth=10,
104
suppress_taxa_summary=False,
105
suppress_beta_diversity=False,
106
suppress_alpha_diversity=False,
107
suppress_otu_category_significance=False,
108
status_update_callback=print_to_stdout):
111
if categories != None:
112
# Validate categories provided by the users
113
mapping_data, mapping_comments = \
114
parse_mapping_file_to_dict(open(mapping_fp,'U'))
115
metadata_map = MetadataMap(mapping_data, mapping_comments)
117
if c not in metadata_map.CategoryNames:
118
raise ValueError, ("Category '%s' is not a column header "
119
"in your mapping file. "
120
"Categories are case and white space sensitive. Valid "
121
"choices are: (%s)" % (c,', '.join(metadata_map.CategoryNames)))
122
if metadata_map.hasSingleCategoryValue(c):
123
raise ValueError, ("Category '%s' contains only one value. "
124
"Categories analyzed here require at least two values." % c)
129
# prep some variables
131
params = parse_qiime_parameters([])
133
create_dir(output_dir)
134
index_fp = '%s/index.html' % output_dir
139
log_fp = generate_log_fp(output_dir)
140
index_links.append(('Master run log',log_fp,_index_headers['run_summary']))
141
logger = WorkflowLogger(log_fp,
143
qiime_config=qiime_config)
144
input_fps = [biom_fp,mapping_fp]
146
input_fps.append(tree_fp)
147
log_input_md5s(logger,input_fps)
149
# run print_biom_table_summary.py on input BIOM table
151
params_str = get_params_str(params['print_biom_table_summary'])
154
biom_table_stats_output_fp = '%s/biom_table_summary.txt' % output_dir
155
print_biom_table_summary_cmd = \
156
"print_biom_table_summary.py -i %s -o %s --suppress_md5 %s" % \
157
(biom_fp, biom_table_stats_output_fp,params_str)
158
index_links.append(('BIOM table statistics',
159
biom_table_stats_output_fp,
160
_index_headers['run_summary']))
161
commands.append([('Generate BIOM table summary',
162
print_biom_table_summary_cmd)])
164
# filter samples with fewer observations than the requested sampling_depth.
165
# since these get filtered for some analyses (eg beta diversity after
166
# even sampling) it's useful to filter them here so they're filtered
168
filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
169
filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" %\
170
(biom_fp,filtered_biom_fp,sampling_depth)
171
commands.append([('Filter low sequence count samples from table (minimum sequence count: %d)' % sampling_depth,
172
filter_samples_cmd)])
173
biom_fp = filtered_biom_fp
175
# run initial commands and reset the command list
176
command_handler(commands,
177
status_update_callback,
179
close_logger_on_success=False)
182
if not suppress_beta_diversity:
183
bdiv_even_output_dir = '%s/bdiv_even%d/' % (output_dir,sampling_depth)
184
even_dm_fps = run_beta_diversity_through_plots(
185
otu_table_fp=biom_fp,
186
mapping_fp=mapping_fp,
187
output_dir=bdiv_even_output_dir,
188
command_handler=command_handler,
190
qiime_config=qiime_config,
191
sampling_depth=sampling_depth,
192
# force suppression of distance histograms - boxplots work better
193
# in this context, and are created below.
194
histogram_categories=[],
199
status_update_callback=status_update_callback)
201
for bdiv_metric, dm_fp in even_dm_fps:
202
for category in categories:
203
boxplots_output_dir = '%s/%s_boxplots/' % (bdiv_even_output_dir,bdiv_metric)
205
params_str = get_params_str(params['make_distance_boxplots'])
209
'make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s' %\
210
(dm_fp, category, boxplots_output_dir, mapping_fp, params_str)
211
commands.append([('Boxplots (%s)' % category,
213
index_links.append(('Distance boxplots (%s)' % bdiv_metric,
214
'%s/%s_Distances.pdf' % \
215
(boxplots_output_dir,category),
216
_index_headers['beta_diversity_even'] % sampling_depth))
217
index_links.append(('Distance boxplots statistics (%s)' % bdiv_metric,
218
'%s/%s_Stats.txt' % \
219
(boxplots_output_dir,category),
220
_index_headers['beta_diversity_even'] % sampling_depth))
222
index_links.append(('3D plot (%s, continuous coloring)' % bdiv_metric,
223
'%s/%s_3d_continuous/%s_pc_3D_PCoA_plots.html' % \
224
(bdiv_even_output_dir,bdiv_metric,bdiv_metric),
225
_index_headers['beta_diversity_even'] % sampling_depth))
226
index_links.append(('3D plot (%s, discrete coloring)' % bdiv_metric,
227
'%s/%s_3d_discrete/%s_pc_3D_PCoA_plots.html' % \
228
(bdiv_even_output_dir,bdiv_metric,bdiv_metric),
229
_index_headers['beta_diversity_even'] % sampling_depth))
230
index_links.append(('2D plot (%s, continuous coloring)' % bdiv_metric,
231
'%s/%s_2d_continuous/%s_pc_2D_PCoA_plots.html' % \
232
(bdiv_even_output_dir,bdiv_metric,bdiv_metric),
233
_index_headers['beta_diversity_even'] % sampling_depth))
234
index_links.append(('2D plot (%s, discrete coloring)' % bdiv_metric,
235
'%s/%s_2d_discrete/%s_pc_2D_PCoA_plots.html' % \
236
(bdiv_even_output_dir,bdiv_metric,bdiv_metric),
237
_index_headers['beta_diversity_even'] % sampling_depth))
238
index_links.append(('Distance matrix (%s)' % bdiv_metric,
240
(bdiv_even_output_dir,bdiv_metric),
241
_index_headers['beta_diversity_even'] % sampling_depth))
242
index_links.append(('Principal coordinate matrix (%s)' % bdiv_metric,
244
(bdiv_even_output_dir,bdiv_metric),
245
_index_headers['beta_diversity_even'] % sampling_depth))
247
if not suppress_alpha_diversity:
248
## Alpha rarefaction workflow
249
arare_full_output_dir = '%s/arare_max%d/' % (output_dir,sampling_depth)
250
run_alpha_rarefaction(
251
otu_table_fp=biom_fp,
252
mapping_fp=mapping_fp,
253
output_dir=arare_full_output_dir,
254
command_handler=command_handler,
256
qiime_config=qiime_config,
258
num_steps=arare_num_steps,
261
min_rare_depth=arare_min_rare_depth,
262
max_rare_depth=sampling_depth,
264
status_update_callback=status_update_callback)
266
index_links.append(('Alpha rarefaction plots',
267
'%s/alpha_rarefaction_plots/rarefaction_plots.html'\
268
% arare_full_output_dir,
269
_index_headers['alpha_diversity']))
271
collated_alpha_diversity_fps = \
272
glob('%s/alpha_div_collated/*txt' % arare_full_output_dir)
274
params_str = get_params_str(params['compare_alpha_diversity'])
277
for category in categories:
278
for collated_alpha_diversity_fp in collated_alpha_diversity_fps:
279
alpha_metric = splitext(split(collated_alpha_diversity_fp)[1])[0]
280
alpha_comparison_output_fp = '%s/%s_%s.txt' % \
281
(arare_full_output_dir,category,alpha_metric)
282
compare_alpha_cmd = \
283
'compare_alpha_diversity.py -i %s -m %s -c %s -o %s -n 999 %s' %\
284
(collated_alpha_diversity_fp, mapping_fp, category,
285
alpha_comparison_output_fp, params_str)
286
commands.append([('Compare alpha diversity (%s, %s)' %\
287
(category,alpha_metric),
290
('Alpha diversity statistics (%s, %s)' % (category,alpha_metric),
291
alpha_comparison_output_fp,
292
_index_headers['alpha_diversity']))
294
if not suppress_taxa_summary:
295
taxa_plots_output_dir = '%s/taxa_plots/' % output_dir
296
run_summarize_taxa_through_plots(
297
otu_table_fp=biom_fp,
298
mapping_fp=mapping_fp,
299
output_dir=taxa_plots_output_dir,
302
command_handler=command_handler,
304
qiime_config=qiime_config,
307
status_update_callback=status_update_callback)
310
index_links.append(('Taxa summary bar plots',
311
'%s/taxa_summary_plots/bar_charts.html'\
312
% taxa_plots_output_dir,
313
_index_headers['taxa_summary']))
314
index_links.append(('Taxa summary area plots',
315
'%s/taxa_summary_plots/area_charts.html'\
316
% taxa_plots_output_dir,
317
_index_headers['taxa_summary']))
318
for category in categories:
319
taxa_plots_output_dir = '%s/taxa_plots_%s/' % (output_dir,category)
320
run_summarize_taxa_through_plots(
321
otu_table_fp=biom_fp,
322
mapping_fp=mapping_fp,
323
output_dir=taxa_plots_output_dir,
324
mapping_cat=category,
326
command_handler=command_handler,
328
qiime_config=qiime_config,
331
status_update_callback=status_update_callback)
333
index_links.append(('Taxa summary bar plots',
334
'%s/taxa_summary_plots/bar_charts.html'\
335
% taxa_plots_output_dir,
336
_index_headers['taxa_summary_categorical'] % category))
337
index_links.append(('Taxa summary area plots',
338
'%s/taxa_summary_plots/area_charts.html'\
339
% taxa_plots_output_dir,
340
_index_headers['taxa_summary_categorical'] % category))
342
if not suppress_otu_category_significance:
343
# OTU category significance
344
for category in categories:
345
category_signifance_fp = \
346
'%s/category_significance_%s.txt' % (output_dir, category)
348
params_str = get_params_str(params['otu_category_significance'])
351
# Build the OTU cateogry significance command
352
category_significance_cmd = \
353
'otu_category_significance.py -i %s -m %s -c %s -o %s %s' %\
354
(biom_fp, mapping_fp, category,
355
category_signifance_fp, params_str)
356
commands.append([('OTU category significance (%s)' % category,
357
category_significance_cmd)])
359
index_links.append(('Category significance (%s)' % category,
360
category_signifance_fp,
361
_index_headers['otu_category_sig']))
363
commands.append([('Compress the filtered BIOM table','gzip %s' % filtered_biom_fp)])
364
index_links.append(('Filtered BIOM table (minimum sequence count: %d)' % sampling_depth,
365
'%s.gz' % filtered_biom_fp,
366
_index_headers['run_summary']))
368
command_handler(commands, status_update_callback, logger)
369
generate_index_page(index_links,index_fp)