2
from asterias.lib.base import *
16
from stat import ST_SIZE
23
from asterias.lib.util.apperror import *
24
from asterias.lib.util.constants import *
26
#cgitb.enable() ## zz: eliminar for real work?
27
sys.stderr = sys.stdout
29
class GenesrfController(BaseController):
31
MAX_genesrf = 4 ## MAX_genesrf + 1 = Maximum number of R processes running at same time.
32
MAX_time = 3600 * 24 * 5 ## 5 is days until deletion of a tmp directory
33
R_MAX_time = 3600 * 4 ## 4 hours is max duration allowd for any process
34
MAX_covariate_size = 363948523L ## a 500 * 40000 array of floats
35
MAX_class_size = 61897L
36
## f5 <- rep(paste(paste(letters, collapse = ""),
37
## paste(LETTERS, collapse="")), 1000)
38
## so each of 1000 labels has 48 chars.
45
return render_response('/' + GENESRF_MYT)
47
## For redirections, from Python Cookbook
49
def getQualifiedURL(self,uri = None):
50
""" Return a full URL starting with schema, servername and port.
52
*uri* -- append this server-rooted uri (must start with a slash)
54
schema, stdport = ('http', '80')
55
host = os.environ.get('HTTP_HOST')
57
host = os.environ.get('SERVER_NAME')
58
port = os.environ.get('SERVER_PORT', '80')
59
if port != stdport: host = host + ":" + port
61
result = "%s://%s" % (schema, host)
62
if uri: result = result + uri
67
""" Return te scriptname part of the URL."""
68
return os.environ.get('SCRIPT_NAME', '')
71
""" Return a fully qualified URL to this script. """
72
return getQualifiedURL(getScriptname())
75
def fileUpload(self,fieldName):
76
"""Upload and get the files and do some checking. We assume there is an existing call
77
to fs = cgi.FieldStorage()"""
78
## we don't deal with OS specific "\n"intranet.cnio.es/
79
## because R does not have a problem (at least with Windows files)
80
## no problem in R either with empty carriage returns at end of file
84
if request.params.has_key(fieldName):
85
fileClient = request.params[fieldName].file
87
raise FieldNotAFileError(fieldName,GENESRF)
89
raise FieldRequiredError(fieldName,GENESRF)
91
# transferring files to final destination;
93
fileInServer = self.tmpDir + "/" + fieldName
94
srvfile = open(fileInServer, mode = 'w')
95
fileString = request.params[fieldName].value
96
srvfile.write(fileString)
99
## this is slower than reading all to memory and copying from
100
## there, but this is less taxing on memory.
101
## but with the current files, probably not worth it
103
# line = fileClient.readline()
105
# srvfile.write(line)
108
os.chmod(fileInServer, 0666)
110
if os.path.getsize(fileInServer) == 0:
111
raise FileZeroSizeError(fieldName,GENESRF)
113
def radioUpload(self,fieldName,acceptedValues):
114
"""Upload and get the values and do some checking. For radio selections
115
with text data; check those are in acceptedValues.
116
We assume there is an existing call to fs = cgi.FieldStorage()"""
119
fieldvalue = request.params[fieldName]
122
raise FieldRequiredError(fieldName,GENESRF)
126
raise FieldAFileError(fieldName,GENESRF)
130
if fieldvalue==type([]):
131
raise MultipleValueError(fieldName,GENESRF)
133
if fieldvalue not in acceptedValues:
134
raise FieldValueNotValidError(fieldName,GENESRF)
136
fileInServer = self.tmpDir + "/" + fieldName
137
srvfile = open(fileInServer, mode = 'w')
138
fileString = fieldvalue
139
srvfile.write(fileString)
141
os.chmod(fileInServer, 0666)
147
## Deleting tmp directories older than MAX_time
148
currentTime = time.time()
149
currentTmp = dircache.listdir(TMP_DIR)
150
for directory in currentTmp:
151
tmpS = TMP_DIR + "/" + directory
152
if (currentTime - os.path.getmtime(tmpS)) > self.MAX_time:
155
### Creating temporal directoriesfs
156
self.newDir = str(whrandom.randint(1, 10000)) + str(os.getpid()) + str(whrandom.randint(1, 100000)) + str(int(currentTime)) + str(whrandom.randint(1, 10000))
157
self.tmpDir = TMP_DIR + "/" + self.newDir
158
os.mkdir(self.tmpDir)
159
os.chmod(self.tmpDir, 0700)
162
idtype = self.radioUpload(RADIO_IDTYPE,acceptedIDTypes)
163
organism = self.radioUpload(RADIO_ORGANISM,acceptedOrganisms)
165
self.fileUpload(INPUT_CLASS)
166
if os.stat(self.tmpDir + '/' + INPUT_CLASS)[ST_SIZE] > self.MAX_class_size:
167
raise FileTooLargeError(self.title,INPUT_CLASS)
169
##check if file coming from preP
170
if request.params.has_key(INPUT_COVARIATE2):
171
#prep_tmpdir = fs.getfirst("covariate2")
172
shutil.copy("tmp/prep/outdata.txt", self.tmpDir + "/" + INPUT_COVARIATE2)
174
self.fileUpload(INPUT_COVARIATE)
175
if os.stat(self.tmpDir + '/' + INPUT_COVARIATE)[ST_SIZE] > self.MAX_covariate_size:
176
raise apperror.FileTooLargeError(GENESRF,INPUT_COVARIATE)
178
## Upload worked OK. We store the original names of the files in the
179
## browser for later report:
180
#fileNamesBrowser = open(self.tmpDir + '/' + FILENAMEBROW_DIR, mode = 'w')
181
#if request.params.has_key(INPUT_COVARIATE2):
182
# fileNamesBrowser.write(request.params[INPUT_COVARIATE2] + '\n')
183
#fileNamesBrowser.write(request.params[INPUT_CLASS] + '\n')
184
#fileNamesBrowser.close()
186
## current number of processes > max number of processes?
187
## and yes, we do it here, not before, so that we have the most
188
## current info about number of pFieldRequiredErrorrocess right before we launch R.
190
## Now, delete any R file left (e.g., from killing procs, etc).
191
RrunningFiles = dircache.listdir(TMP_DIR + "/" + RPROCS_DIR)
192
for Rtouchfile in RrunningFiles:
193
tmpS = TMP_DIR + "/" + RPROCS_DIR + Rtouchfile
194
if (currentTime - os.path.getmtime(tmpS)) > self.R_MAX_time:
197
## Now, verify any processes left
198
numRgenesrf = len(glob.glob(TMP_DIR + "/" + RPROCS_DIR + "/R.*@*%*"))
199
if numRgenesrf > self.MAX_genesrf:
200
raise ServerTooBusyError(self.title)
202
################ Launching R ###############
204
# prepare the arrayNames file:
206
covarInServer = self.tmpDir + "/" + INPUT_COVARIATE
207
arrayNames = self.tmpDir + "/" + ARRAYNAMES_DIR
208
srvfile = open(covarInServer, mode = 'r')
209
arrayfile = open(arrayNames, mode = 'w')
212
line = srvfile.readline()
214
if (line.find("#name") == 0) or (line.find("#NAME") == 0) or (line.find("#Name") == 0) \
215
or (line.find('"#name"') == 0) or (line.find('"#NAME"') == 0) or (line.find('"#Name"') == 0):
216
num_name_lines = num_name_lines + 1
217
if num_name_lines > 1:
218
raise NameLineDataMatrixError(GENESRF)
219
arrayfile.write(line)
220
arrayfile.write("\n\n")
224
os.chmod(arrayNames, 0600)
226
## It would be good to use spawnl or similar instead of system,
227
## but I have no luck with R. This, I keep using system.
228
## Its safety depends crucially on the newDir not being altered,
229
## but newDir is not passed from any other user-reachable place
230
## (it is created here).
232
## recall to include in R
233
##pid <- Sys.getpid()
234
##write.table(file = "pid.txt", pid, row.names = FALSE, col.names = FALSE)
236
## touch Rout, o.w. checkdone can try to open a non-existing file
237
touchRout = os.system("/bin/touch " + self.tmpDir + "/f1.Rout")
238
##touchRrunning = os.system("/bin/touch /http/genesrf/www/R.running.procs/R." + newDir)
239
touchRrunning = os.system(RPROCS_DIR + "/R." + self.newDir + "@" + socket.gethostname())
240
shutil.copy(LIB_DIR + "/f1.R", self.tmpDir)
241
## we add the 2> error.msg because o.w. if we kill R we get a server error as standard
242
## error is sent to the server
243
# Rcommand = "cd " + tmpDir + "; " + "/usr/bin/R CMD BATCH --no-restore --no-readline --no-save -q f1.R 2> error.msg &"
244
# Rrun = os.system(Rcommand)
245
tryrrun = os.system(MPILOG_DIR + '/tryRrun2.py ' + self.tmpDir + ' 10 ' + 'GeneSrF &')
246
createResultsFile = os.system("/bin/touch " + self.tmpDir + "/results.txt")
250
########### Creating a results.hmtl ###############
252
resultlink = GENESRF_URL + "/" + self.tmpDir + "/" + AUTOREFRESH_MYT
253
return render_response('/' + AUTOREFRESH_MYT,title=GENESRF,newDir=self.newDir)
256
shutil.rmtree(self.tmpDir)
260
def checkdone(self, ARGS):
264
self.newDir = ARGS.get('newDir','')
265
self.tmpDir = TMP_DIR + "/" + self.newDir
267
## Changing to the appropriate directory
269
if type(self.newDir) is types.ListType:
271
raise NewDirListError(GENESRF)
274
raise NewDirEmptyError(GENESRF)
276
if re.search(r'[^0-9]', str(self.newDir)):
278
raise NewDirNotValidFormatError(GENESRF)
280
# redirectLoc = "/tmp/" + newDir
282
if not os.path.isdir(self.tmpDir):
284
raise NewDirNotDirectoryError(GENESRF)
287
## Were we already done in a previous execution?
288
## No need to reopen files or check anything else. Return url with results and bail out.
289
if os.path.exists(self.tmpDir + "/natural.death.pid.txt") or os.path.exists(self.tmpDir + "/killed.pid.txt"):
290
print 'Location: http://genesrf.bioinfo.cnio.es/tmp/'+ self.newDir + '/results.html \n\n'
293
## No, we were not done. Need to examine R output
294
Rrout = open(self.tmpDir + "/f1.Rout")
297
finishedOK = soFar.endswith("Normal termination\n")
298
errorRun = soFar.endswith("Execution halted\n")
300
if os.path.exists(self.tmpDir + "/pid.txt"):
301
## do we need to kill an R process?
302
if (time.time() - os.path.getmtime(self.tmpDir + "/pid.txt")) > self.R_MAX_time:
303
lamenv = open(self.tmpDir + "/lamSuffix", mode = "r").readline()
305
os.system('export LAM_MPI_SESSION_SUFFIX=' + lamenv +
306
'; lamhalt -H; lamwipe -H')
310
os.rename(self.tmpDir + '/pid.txt', self.tmpDir + '/killed.pid.txt')
311
os.remove(self.tmpDir + '/f1.R')
313
os.system("rm /http/genesrf/www/R.running.procs/R." + self.newDir + "*")
316
Rresults = open(self.tmpDir + "/results.txt")
317
raise RProcessKilledError(GENESRF,self.R_MAX_time,Rresults)
320
os.rename(tmpDir + '/pid.txt', tmpDir + '/natural.death.pid.txt')
321
os.remove(tmpDir + '/f1.R')
322
## chkmpi = os.system('/http/mpi.log/adhocCheckRmpi.py GeneSrF&')
324
lamenv = open(tmpDir + "/lamSuffix", mode = "r").readline()
328
os.system('export LAM_MPI_SESSION_SUFFIX=' + lamenv +
329
'; lamhalt -H; lamwipe -H')
333
os.system("rm /http/genesrf/www/R.running.procs/R." + newDir + "*")
336
raise RProcessRunError(GENESRF,Rresults)
339
##zz: killing lam seems not to be working from here...
341
lamenv = open(tmpDir + "/lamSuffix", mode = "r").readline()
345
lamkill = os.system('export LAM_MPI_SESSION_SUFFIX=' + lamenv +
346
'; lamhalt -H; lamwipe -H')
349
os.rename(tmpDir + '/pid.txt', tmpDir + '/natural.death.pid.txt')
350
os.remove(tmpDir + '/f1.R')
352
os.system("rm /http/genesrf/www/R.running.procs/R." + newDir + "*")
355
print 'Location: http://genesrf.bioinfo.cnio.es/tmp/'+ newDir + '/results.html \n\n'
357
## compress all the results
358
allResults = tarfile.open(tmpDir + '/all.results.tar.gz', 'w:gz')
359
allResults.add(tmpDir + '/results.txt', 'results.txt')
360
if os.path.exists(tmpDir + "/fselprobplot.png"):
361
allResults.add(tmpDir + '/fselprobplot.png', 'SelectionProbabilityPlot.png')
362
if os.path.exists(tmpDir + "/fimpspec-all.png"):
363
allResults.add(tmpDir + '/fimpspec-all.png', 'ImportanceSpectrumAllGenes.png')
364
if os.path.exists(tmpDir + "/fimpspec-200.png"):
365
allResults.add(tmpDir + '/fimpspec-200.png', 'ImportanceSpectrum200Genes.png')
366
if os.path.exists(tmpDir + "/fimpspec-30.png"):
367
allResults.add(tmpDir + '/fimpspec-30.png', 'ImportanceSpectrum30Genes.png')
368
allResults.add(listPNGS[nf1 - 1], 'OOBErrorvsNumGenes.png')
370
for index in range(nf1 - 1):
371
allResults.add(listPNGS[index], 'OOBPredictionsFigure' + str(index + 1) + '.png')
373
listPDFS = glob.glob(tmpDir + "/fboot*.pdf")
376
allResults.add(tmpDir + '/fselprobplot.pdf', 'SelectionProbabilityPlot.pdf')
377
allResults.add(tmpDir + '/fimpspec-all.pdf', 'ImportanceSpectrumAllGenes.pdf')
378
allResults.add(tmpDir + '/fimpspec-200.pdf', 'ImportanceSpectrum200Genes.pdf')
379
allResults.add(tmpDir + '/fimpspec-30.pdf', 'ImportanceSpectrum30Genes.pdf')
380
allResults.add(listPDFS[nf1 - 1], 'OOBErrorvsNumGenes.pdf')
382
for index in range(nf1 - 1):
383
allResults.add(listPDFS[index], 'OOBPredictionsFigure' + str(index + 1) + '.pdf')
386
Rresults = open(self.tmpDir + "/results.txt")
387
resultsFile = Rresults.read()
388
return render_response('/' + RESULTS_MYT,title=GENESRF,tmpDir=self.tmpDir,resultsfile=resultsFile)
391
## we only end up here if: we were not done in a previous run AND no process was overtime
392
## AND we did not just finish. So we must continue.
393
resultlink = GENESRF_URL + "/" + self.tmpDir + "/" + AUTOREFRESH_MYT
394
return render_response('/' + AUTOREFRESH_MYT,title=GENESRF,newDir=self.newDir)