2
# File created on 19 Dec 2012
3
from __future__ import division
5
__author__ = "Daniel McDonald"
6
__copyright__ = "Copyright 2011, The QIIME project"
7
__credits__ = ["Daniel McDonald", "Yoshiki Vazquez Baeza"]
10
__maintainer__ = "Daniel McDonald"
11
__email__ = "mcdonadt@colorado.edu"
12
__status__ = "Release"
14
from collections import defaultdict
15
from qiime.parse import parse_mapping_file
16
from qiime.util import parse_command_line_parameters, make_option
17
from qiime.sort import natsort
18
from sys import stdout
21
script_info['brief_description'] = "Count the number of samples associated to a category value"
22
script_info['script_description'] = """Sum up the number of samples with each category value and print this information."""
23
script_info['script_usage'] = [("Example:","Count the number of samples associated with Treatment","""%prog -m $PWD/mapping.txt -c Treatment"""),
24
("Example writting the output to a file", "Count the number of samples associated with Treatment and save them to a file called stats.txt", """%prog -m mapping.txt -c Treatment -o stats.txt""")]
25
script_info['output_description']= """Two columns, the first being the category value and the second being the count. Output is to standard out. If there are unspecified values, the output category is identified as ***UNSPECIFIED***"""
26
script_info['required_options'] = [\
27
make_option('-m', '--mapping_file',type="existing_filepath",help='the input metadata file'),\
28
make_option('-c','--category',type='string',help='the category to examine')
30
script_info['optional_options'] = [
31
make_option('-o','--output_fp',type="new_filepath",
32
help="path where output will be written [default: print to screen]",
35
script_info['version'] = __version__
38
option_parser, opts, args =\
39
parse_command_line_parameters(**script_info)
41
output_fp = opts.output_fp
43
map_data, header, comments = parse_mapping_file(opts.mapping_file)
45
if opts.category not in header:
46
option_parser.error("%s doesn't appear to exist in the mapping file!" % opts.category)
48
# use stdout or the user supplied file path
50
fd = open(output_fp, 'w')
54
result = defaultdict(int)
55
cat_idx = header.index(opts.category)
57
result[samp[cat_idx]] += 1
59
for cat_val in natsort(result):
61
fd.write("***UNSPECIFIED***\t%d\n" % result[cat_val])
63
fd.write("%s\t%d\n" % (cat_val, result[cat_val]))
67
if __name__ == "__main__":