5
5
__author__ = "Justin Kuczynski"
6
6
__copyright__ = "Copyright 2011, The QIIME Project"
7
__credits__ = ["Justin Kuczynski"]
7
__credits__ = ["Justin Kuczynski", "Jai Ram Rideout", "Greg Caporaso"]
10
10
__maintainer__ = "Justin Kuczynski"
11
11
__email__ = "justinak@gmail.com"
12
12
__status__ = "Release"
14
from os.path import join, split, splitext
14
15
from cogent.parse.tree import DndParser
15
from qiime.util import parse_command_line_parameters, make_option
16
from qiime.simsam import sim_otu_table
16
from qiime.util import (add_filename_suffix, create_dir, get_options_lookup,
17
parse_command_line_parameters, make_option)
18
from qiime.simsam import simsam_range_to_files
17
19
from qiime.format import format_biom_table
18
20
from biom.table import table_factory
19
21
from biom.parse import parse_biom_table
23
options_lookup = get_options_lookup()
22
26
script_info['brief_description'] = "Simulate samples for each sample in an OTU table, using a phylogenetic tree."
23
script_info['script_description'] = ""
24
script_info['script_usage'] = [("","Make 3 related sample for each sample in otu_table.biom.","%prog -i otu_table.biom -t rep_set.tre -o otu_table.simsam.biom -d .001 -n 3")]
25
script_info['output_description']= "an otu table, samples are named: 'original_sample_0, original_sample_1 ...'"
27
script_info['script_description'] = """ This script makes n samples related to each sample in an input otu table
29
An input OTU table with 3 samples and n=2 will result in an output OTU table with 6 samples total: 3 clusters of 2 related samples.
31
To simulate each of the new samples, this script uses a sample in the input OTU table, and for each OTU in that sample the script
32
traverses rootward on the tree a distance specified by '-d' to a point x. It then randomly selects a tip that decends from x,
33
(call that new tip 'o2'), and reassigns all observations of the original OTU to the tip/OTU 'o2'.
35
script_info['script_usage'] = [("","Create an OTU table with 3 related samples for each sample in otu_table.biom with dissimilarities of 0.001.","%prog -i otu_table.biom -t rep_set.tre -o simsam_out1 -d .001 -n 3")]
36
script_info['script_usage'].append(("","Create OTU tables with 2, 3 and 4 related samples for each sample in otu_table.biom with dissimilarities of 0.001 and 0.01. Additionally create new mapping files with metadata for each of the new samples for use in downstream analyses.","%prog -i otu_table.biom -t rep_set.tre -o simsam_out2 -d .001,.01 -n 2,3,4 -m map.txt"))
37
script_info['output_description']= """
38
The output directory will contain an OTU table with samples named:
39
'original_sample_0, original_sample_1 ...'
41
If a mapping file is provided via -m, an output mapping file containing the
42
replicated sample IDs (with all other metadata columns copied over) will also
26
45
script_info['required_options'] = [
27
46
make_option('-i','--otu_table',help='the input otu table',type='existing_filepath'),
28
47
make_option('-t','--tree_file',help='tree file',type='existing_filepath'),
29
make_option('-o','--output_file',help='the output file',type='new_filepath'),
30
make_option('-d','--dissim',help='dissimilarity between nodes up the tree',
48
options_lookup['output_dir'],
49
make_option('-d','--dissim',help='dissimilarity between nodes up the tree, as a single value or comma-separated list of values'),
32
50
make_option('-n','--num',
33
help='number of simulated samples per input sample',
51
help='number of simulated samples per input sample, as a single value or comma-separated list of values')
37
script_info['optional_options'] = []
54
script_info['optional_options'] = [
55
make_option('-m', '--mapping_fp', type='existing_filepath',
56
help='the mapping filepath. If provided, an output mapping file '
57
'containing the replicated sample IDs (with all other metadata '
58
'columns copied over) will also be created [default: %default]',
38
61
script_info['version'] = __version__
41
option_parser, opts, args =\
42
parse_command_line_parameters(**script_info)
44
out_fh = open(opts.output_file,'w')
45
otu_table_fh = open(opts.otu_table,'U')
64
option_parser, opts, args = parse_command_line_parameters(**script_info)
66
output_dir = opts.output_dir
67
create_dir(output_dir)
69
otu_table_fp = opts.otu_table
70
otu_table_fh = open(otu_table_fp,'U')
46
71
otu_table = parse_biom_table(otu_table_fh)
47
74
tree_fh = open(opts.tree_file,'U')
48
75
tree = DndParser(tree_fh)
50
res_sam_names, res_otus, res_otu_mtx, res_otu_metadata = \
51
sim_otu_table(otu_table.SampleIds, otu_table.ObservationIds, otu_table.iterSamples(),
52
otu_table.ObservationMetadata, tree, opts.num, opts.dissim)
55
rich_table = table_factory(res_otu_mtx,res_sam_names,res_otus,
56
observation_metadata=res_otu_metadata)
57
out_fh.write(format_biom_table(rich_table))
78
mapping_fp = opts.mapping_fp
80
mapping_f = open(mapping_fp,'U')
81
input_map_basename = splitext(split(mapping_fp)[1])[0]
84
input_map_basename = None
86
input_table_basename = splitext(split(otu_table_fp)[1])[0]
88
simsam_range_to_files(otu_table,
90
simulated_sample_sizes=map(int,opts.num.split(',')),
91
dissimilarities=map(float,opts.dissim.split(',')),
92
output_dir=output_dir,
94
output_table_basename=input_table_basename,
95
output_map_basename=input_map_basename)
60
98
if __name__ == "__main__":
b'\\ No newline at end of file'