2
# File created on 07 Jul 2012
3
from __future__ import division
5
__author__ = "Greg Caporaso"
6
__copyright__ = "Copyright 2011, The QIIME project"
7
__credits__ = ["Greg Caporaso"]
10
__maintainer__ = "Greg Caporaso"
11
__email__ = "gregcaporaso@gmail.com"
12
__status__ = "Release"
14
from cogent.app.formatdb import build_blast_db_from_fasta_path
15
from qiime.align_seqs import compute_min_alignment_length
16
from qiime.parallel.util import ParallelWrapper
18
class ParallelAlignSeqsPyNast(ParallelWrapper):
19
_script_name = "align_seqs.py"
21
_input_splitter = ParallelWrapper._split_fasta
23
def _precommand_initiation(self,input_fp,output_dir,working_dir,params):
24
if not params['blast_db']:
25
# Build the blast database from the reference_seqs_fp -- all procs
26
# will then access one db rather than create one per proc
27
blast_db, db_files_to_remove = \
28
build_blast_db_from_fasta_path(params['template_fp'])
29
self.files_to_remove += db_files_to_remove
30
params['blast_db'] = blast_db
32
if params['min_length'] < 0:
33
params['min_length'] = compute_min_alignment_length(\
36
def _get_job_commands(self,
42
command_prefix='/bin/bash; ',
43
command_suffix='; exit'):
44
"""Generate PyNAST commands which should be submitted to cluster
46
# Create basenames for each of the output files. These will be filled
47
# in to create the full list of files created by all of the runs.
48
out_filenames = [job_prefix + '.%d_aligned.fasta',
49
job_prefix + '.%d_failures.fasta',
50
job_prefix + '.%d_log.txt']
52
# Initialize the command_prefix and command_suffix
53
command_prefix = command_prefix or '/bin/bash; '
54
command_suffix = command_suffix or '; exit'
56
# Create lists to store the results
60
# If there is a value for blast_db, pass it. If not, it
61
# will be created on-the-fly. Note that on-the-fly blast dbs
62
# are created with a string of random chars in the name, so this is safe.
63
# They shouldn't overwrite one another, and will be cleaned up.
64
if params['blast_db']:
65
blast_str = '-d %s' % params['blast_db']
69
# Iterate over the input files
70
for i,fasta_fp in enumerate(fasta_fps):
71
# Each run ends with moving the output file from the tmp dir to
72
# the output_dir. Build the command to perform the move here.
73
rename_command, current_result_filepaths = self._get_rename_command(\
74
[fn % i for fn in out_filenames],working_dir,output_dir)
75
result_filepaths += current_result_filepaths
78
'%s %s %s -p %1.2f -e %d -m pynast -t %s -a %s -o %s -i %s %s %s' %\
82
params['min_percent_id'],
84
params['template_fp'],\
85
params['pairwise_alignment_method'],
91
commands.append(command)
93
return commands, result_filepaths
95
def _write_merge_map_file(self,
102
f = open(merge_map_filepath,'w')
104
out_filepaths = ['%s/%s_aligned.fasta' % (output_dir,input_file_basename),
105
'%s/%s_failures.fasta' % (output_dir,input_file_basename),
106
'%s/%s_log.txt' % (output_dir,input_file_basename)]
112
for fp in job_result_filepaths:
113
if fp.endswith('_aligned.fasta'):
114
aligned_fps.append(fp)
115
elif fp.endswith('_failures.fasta'):
116
failures_fps.append(fp)
120
for in_files, out_file in\
121
zip([aligned_fps,failures_fps,log_fps],out_filepaths):
122
f.write('\t'.join(in_files + [out_file]))
b'\\ No newline at end of file'