~ubuntu-branches/ubuntu/trusty/qiime/trusty

« back to all changes in this revision

Viewing changes to scripts/print_metadata_stats.py

  • Committer: Package Import Robot
  • Author(s): Andreas Tille
  • Date: 2013-06-17 18:28:26 UTC
  • mfrom: (9.1.2 sid)
  • Revision ID: package-import@ubuntu.com-20130617182826-376az5ad080a0sfe
Tags: 1.7.0+dfsg-1
Upload preparations done for BioLinux to Debian

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#!/usr/bin/env python
 
2
# File created on 19 Dec 2012
 
3
from __future__ import division
 
4
 
 
5
__author__ = "Daniel McDonald"
 
6
__copyright__ = "Copyright 2011, The QIIME project"
 
7
__credits__ = ["Daniel McDonald", "Yoshiki Vazquez Baeza"]
 
8
__license__ = "GPL"
 
9
__version__ = "1.7.0"
 
10
__maintainer__ = "Daniel McDonald"
 
11
__email__ = "mcdonadt@colorado.edu"
 
12
__status__ = "Release"
 
13
 
 
14
from collections import defaultdict
 
15
from qiime.parse import parse_mapping_file
 
16
from qiime.util import parse_command_line_parameters, make_option
 
17
from qiime.sort import natsort
 
18
from sys import stdout
 
19
 
 
20
script_info = {}
 
21
script_info['brief_description'] = "Count the number of samples associated to a category value"
 
22
script_info['script_description'] = """Sum up the number of samples with each category value and print this information."""
 
23
script_info['script_usage'] = [("Example:","Count the number of samples associated with Treatment","""%prog -m $PWD/mapping.txt -c Treatment"""),
 
24
("Example writting the output to a file", "Count the number of samples associated with Treatment and save them to a file called stats.txt", """%prog -m mapping.txt -c Treatment -o stats.txt""")]
 
25
script_info['output_description']= """Two columns, the first being the category value and the second being the count. Output is to standard out. If there are unspecified values, the output category is identified as ***UNSPECIFIED***"""
 
26
script_info['required_options'] = [\
 
27
 make_option('-m', '--mapping_file',type="existing_filepath",help='the input metadata file'),\
 
28
 make_option('-c','--category',type='string',help='the category to examine')
 
29
]
 
30
script_info['optional_options'] = [
 
31
    make_option('-o','--output_fp',type="new_filepath", 
 
32
    help="path where output will be written [default: print to screen]", 
 
33
    default=None)
 
34
]
 
35
script_info['version'] = __version__
 
36
 
 
37
def main():
 
38
    option_parser, opts, args =\
 
39
       parse_command_line_parameters(**script_info)
 
40
 
 
41
    output_fp = opts.output_fp
 
42
 
 
43
    map_data, header, comments = parse_mapping_file(opts.mapping_file)
 
44
 
 
45
    if opts.category not in header:
 
46
        option_parser.error("%s doesn't appear to exist in the mapping file!" % opts.category)
 
47
 
 
48
    # use stdout or the user supplied file path
 
49
    if output_fp:
 
50
        fd = open(output_fp, 'w')
 
51
    else:
 
52
        fd = stdout
 
53
 
 
54
    result = defaultdict(int)
 
55
    cat_idx = header.index(opts.category)
 
56
    for samp in map_data:
 
57
        result[samp[cat_idx]] += 1
 
58
 
 
59
    for cat_val in natsort(result):
 
60
        if not cat_val:
 
61
            fd.write("***UNSPECIFIED***\t%d\n" % result[cat_val])
 
62
        else:
 
63
            fd.write("%s\t%d\n" % (cat_val, result[cat_val]))
 
64
 
 
65
    fd.close()
 
66
 
 
67
if __name__ == "__main__":
 
68
    main()