~ubuntu-branches/ubuntu/trusty/qiime/trusty

« back to all changes in this revision

Viewing changes to qiime/parallel/align_seqs.py

  • Committer: Package Import Robot
  • Author(s): Andreas Tille
  • Date: 2013-06-17 18:28:26 UTC
  • mfrom: (9.1.2 sid)
  • Revision ID: package-import@ubuntu.com-20130617182826-376az5ad080a0sfe
Tags: 1.7.0+dfsg-1
Upload preparations done for BioLinux to Debian

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#!/usr/bin/env python
 
2
# File created on 07 Jul 2012
 
3
from __future__ import division
 
4
 
 
5
__author__ = "Greg Caporaso"
 
6
__copyright__ = "Copyright 2011, The QIIME project"
 
7
__credits__ = ["Greg Caporaso"]
 
8
__license__ = "GPL"
 
9
__version__ = "1.7.0"
 
10
__maintainer__ = "Greg Caporaso"
 
11
__email__ = "gregcaporaso@gmail.com"
 
12
__status__ = "Release"
 
13
 
 
14
from cogent.app.formatdb import build_blast_db_from_fasta_path
 
15
from qiime.align_seqs import compute_min_alignment_length
 
16
from qiime.parallel.util import ParallelWrapper
 
17
 
 
18
class ParallelAlignSeqsPyNast(ParallelWrapper):
 
19
    _script_name = "align_seqs.py"
 
20
    _job_prefix = 'ALIGN'
 
21
    _input_splitter = ParallelWrapper._split_fasta
 
22
 
 
23
    def _precommand_initiation(self,input_fp,output_dir,working_dir,params):
 
24
        if not params['blast_db']:        
 
25
            # Build the blast database from the reference_seqs_fp -- all procs
 
26
            # will then access one db rather than create one per proc
 
27
            blast_db, db_files_to_remove = \
 
28
                 build_blast_db_from_fasta_path(params['template_fp'])
 
29
            self.files_to_remove += db_files_to_remove
 
30
            params['blast_db'] = blast_db
 
31
        
 
32
        if params['min_length'] < 0:
 
33
            params['min_length'] = compute_min_alignment_length(\
 
34
                                    open(input_fp,'U'))
 
35
 
 
36
    def _get_job_commands(self,
 
37
                          fasta_fps,
 
38
                          output_dir,
 
39
                          params,
 
40
                          job_prefix,
 
41
                          working_dir,
 
42
                          command_prefix='/bin/bash; ',
 
43
                          command_suffix='; exit'):
 
44
        """Generate PyNAST commands which should be submitted to cluster
 
45
        """
 
46
        # Create basenames for each of the output files. These will be filled
 
47
        # in to create the full list of files created by all of the runs.
 
48
        out_filenames = [job_prefix + '.%d_aligned.fasta', 
 
49
                         job_prefix + '.%d_failures.fasta',
 
50
                         job_prefix + '.%d_log.txt']
 
51
    
 
52
        # Initialize the command_prefix and command_suffix
 
53
        command_prefix = command_prefix or '/bin/bash; '
 
54
        command_suffix = command_suffix or '; exit'
 
55
    
 
56
        # Create lists to store the results
 
57
        commands = []
 
58
        result_filepaths = []
 
59
    
 
60
        # If there is a value for blast_db, pass it. If not, it
 
61
        # will be created on-the-fly. Note that on-the-fly blast dbs
 
62
        # are created with a string of random chars in the name, so this is safe.
 
63
        # They shouldn't overwrite one another, and will be cleaned up.
 
64
        if params['blast_db']:
 
65
            blast_str = '-d %s' % params['blast_db']
 
66
        else:
 
67
            blast_str = ''
 
68
    
 
69
        # Iterate over the input files
 
70
        for i,fasta_fp in enumerate(fasta_fps):
 
71
            # Each run ends with moving the output file from the tmp dir to
 
72
            # the output_dir. Build the command to perform the move here.
 
73
            rename_command, current_result_filepaths = self._get_rename_command(\
 
74
             [fn % i for fn in out_filenames],working_dir,output_dir)
 
75
            result_filepaths += current_result_filepaths
 
76
            
 
77
            command = \
 
78
             '%s %s %s -p %1.2f -e %d -m pynast -t %s -a %s -o %s -i %s %s %s' %\
 
79
             (command_prefix,
 
80
              self._script_name,
 
81
              blast_str,
 
82
              params['min_percent_id'],
 
83
              params['min_length'],
 
84
              params['template_fp'],\
 
85
              params['pairwise_alignment_method'],
 
86
              working_dir,
 
87
              fasta_fp,
 
88
              rename_command,
 
89
              command_suffix)
 
90
          
 
91
            commands.append(command)
 
92
 
 
93
        return commands, result_filepaths
 
94
 
 
95
    def _write_merge_map_file(self,
 
96
                              input_file_basename,
 
97
                              job_result_filepaths,
 
98
                              params,
 
99
                              output_dir,
 
100
                              merge_map_filepath):
 
101
    
 
102
        f = open(merge_map_filepath,'w')
 
103
    
 
104
        out_filepaths = ['%s/%s_aligned.fasta' % (output_dir,input_file_basename),
 
105
                         '%s/%s_failures.fasta' % (output_dir,input_file_basename),
 
106
                         '%s/%s_log.txt' % (output_dir,input_file_basename)]
 
107
    
 
108
        aligned_fps = []
 
109
        failures_fps = []
 
110
        log_fps = []
 
111
    
 
112
        for fp in job_result_filepaths:
 
113
            if fp.endswith('_aligned.fasta'):
 
114
                aligned_fps.append(fp)
 
115
            elif fp.endswith('_failures.fasta'):
 
116
                failures_fps.append(fp)
 
117
            else:
 
118
                log_fps.append(fp)
 
119
    
 
120
        for in_files, out_file in\
 
121
         zip([aligned_fps,failures_fps,log_fps],out_filepaths):
 
122
            f.write('\t'.join(in_files + [out_file]))
 
123
            f.write('\n')
 
124
        f.close()
 
125
    
 
 
b'\\ No newline at end of file'