3
# Copyright (C) 2011- The University of Notre Dame
4
# This software is distributed under the GNU General Public License.
5
# See the file COPYING for details.
7
# This program generates makeflows to parallelize the
8
# popular blastall program.
19
sys.stderr.write("Usage: makeflow_blast query_granularity character_granularity [blastall arguments]")
20
subprocess.call(["blastall"], stdout=sys.stderr)
22
#In which global variables are used with abandon
24
FILE = open(query, "r")
29
if(re.search('^>', line)):
30
if(num_queries > (int(query_gran) - 1) or num_chars > (int(char_gran) - 1) ):
31
num_jobs = num_jobs + 1
36
num_chars += len(line)
40
#In which global variables are used with abandon
41
def check_blast( args=None ):
42
null = open(os.devnull, 'w')
43
TMPF = open("tmp", "w")
44
TMPF.write(">TestSeq\nATATATATTTCCCAGGTAGACCACACAGAGACAGATACACCACA\n")
46
blast_arguments = 'blastall ' + string.join(args)
47
blast_arguments = re.sub(query,"tmp",blast_arguments)
49
subprocess.check_call(re.split("\s+",blast_arguments), stdout=null,stderr=null)
50
except subprocess.CalledProcessError, err:
52
rm_path = os.curdir + "/tmp"
58
#Check input in a bunch of ways (getopt wont finish unless it recognizes all the flags, so we put in all the blast options as well
62
(options,args) = getopt.gnu_getopt(sys.argv,'p:d:i:e:m:o:F:G:E:X:I:q:r:v:b:f:g:Q:D:a:O:J:M:W:z:K:P:Y:S:T:l:U:y:Z:R:n:L:A:w:t:B:V:C:s:')
63
except getopt.GetoptError, err:
78
#Basically, do we have the things we need, and does blast know what to do with the rest of the things
79
if (query == None or output == None or database == None or len(sys.argv) < 3 or check_blast(sys.argv[3:]) == 1) :
83
#Survived input validation
84
query_gran = sys.argv[1]
85
char_gran = sys.argv[2]
86
blast_args = string.join(sys.argv[3:])
90
num_splits = count_splits()
91
for i in range(num_splits):
92
inputlist = inputlist + "input." + str(i) + " "
93
outputlist = outputlist + "output." + str(i) + " "
94
errorlist = errorlist + "error." + str(i) + " "
96
#Here we actually start generating the Makeflow
98
print str(inputlist) + ": " + query + " split_fasta"
99
print "\tLOCAL python split_fasta " + query_gran + " " + char_gran + " " + query + "\n"
102
db_split = re.split("/",database)
103
database = db_split[0]
104
for i in range(num_splits):
105
temp_cmd= re.sub("-i\s*" + query,'-i input.'+str(i),blast_args)
106
temp_cmd= re.sub("-o\s*" + output,'-o output.'+str(i),temp_cmd)
107
print "output." + str(i) + " error." + str(i) + ": blastall input." + str(i) + " " + database
108
print "\tblastall " + temp_cmd + " 2> error." + str(i) + "\n"
110
#How to concatenate and cleanup outputs (very naive)
111
print output + ": find cat " + outputlist
112
print "\tLOCAL find . -maxdepth 1 -name 'output.*' -exec cat {} \\\\;> " + output + ";find . -maxdepth 1 -name 'output.*' -exec rm {} \\\\;\n"
114
#How to concatenate and cleanup errors (very naive)
115
print "error: find cat " + errorlist
116
print "\tLOCAL find . -maxdepth 1 -name 'error.*' -exec cat {} \\\\;> error;find . -maxdepth 1 -name 'error.*' -exec rm {} \\\\;\n"
118
#How to cleanup inputs (very naive)
119
print "clean: find rm error " + output + " " + inputlist
120
print "\tLOCAL find . -maxdepth 1 -name 'input.*' -exec rm {} \\\\; > clean\n"