~rdiaz02/asterias-pylons/main01

« back to all changes in this revision

Viewing changes to asterias/controllers/genesrf.py.bak

  • Committer: RDU
  • Date: 2007-01-25 13:24:32 UTC
  • Revision ID: rdiaz@ligarto.org-20070125132432-90pymze7kb13txs0
 

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
 
 
2
from asterias.lib.base import *
 
3
 
 
4
#!/usr/bin/python
 
5
import glob
 
6
import socket
 
7
import sys
 
8
import os
 
9
import cgi 
 
10
import types
 
11
import time
 
12
import shutil
 
13
import dircache
 
14
import string
 
15
import whrandom
 
16
from stat import ST_SIZE
 
17
import signal
 
18
import re
 
19
import tarfile
 
20
#import cgitb
 
21
 
 
22
# asterias modules
 
23
from asterias.lib.util.apperror import *
 
24
from asterias.lib.util.constants import *
 
25
 
 
26
#cgitb.enable() ## zz: eliminar for real work?
 
27
sys.stderr = sys.stdout
 
28
 
 
29
class GenesrfController(BaseController):
 
30
    
 
31
        MAX_genesrf = 4 ## MAX_genesrf + 1 = Maximum number of R processes running at same time.
 
32
        MAX_time = 3600 * 24 * 5 ## 5 is days until deletion of a tmp directory
 
33
        R_MAX_time = 3600 * 4 ## 4 hours is max duration allowd for any process
 
34
        MAX_covariate_size = 363948523L ## a 500 * 40000 array of floats
 
35
        MAX_class_size = 61897L
 
36
        ##  f5 <- rep(paste(paste(letters, collapse = ""),
 
37
        ##                  paste(LETTERS, collapse="")), 1000)
 
38
        ## so each of 1000 labels has 48 chars.
 
39
        
 
40
        newDir = ""
 
41
        tmpDir = ""
 
42
        
 
43
        
 
44
        def index(self):
 
45
                return render_response('/' + GENESRF_MYT)
 
46
        
 
47
        ## For redirections, from Python Cookbook
 
48
 
 
49
        def getQualifiedURL(self,uri = None):
 
50
                """ Return a full URL starting with schema, servername and port.
 
51
                
 
52
                        *uri* -- append this server-rooted uri (must start with a slash)
 
53
                """
 
54
                schema, stdport = ('http', '80')
 
55
                host = os.environ.get('HTTP_HOST')
 
56
                if not host:
 
57
                        host = os.environ.get('SERVER_NAME')
 
58
                        port = os.environ.get('SERVER_PORT', '80')
 
59
                        if port != stdport: host = host + ":" + port
 
60
                self.tmpDir
 
61
                result = "%s://%s" % (schema, host)
 
62
                if uri: result = result + uri
 
63
                
 
64
                return result
 
65
        
 
66
        def getScriptname():
 
67
                """ Return te scriptname part of the URL."""
 
68
                return os.environ.get('SCRIPT_NAME', '')
 
69
        
 
70
        def getBaseURL():
 
71
                """ Return a fully qualified URL to this script. """
 
72
                return getQualifiedURL(getScriptname())
 
73
        
 
74
        
 
75
        def fileUpload(self,fieldName):
 
76
                """Upload and get the files and do some checking. We assume there is an existing call
 
77
                to fs = cgi.FieldStorage()"""
 
78
                ## we don't deal with OS specific "\n"intranet.cnio.es/
 
79
                ## because R does not have a problem (at least with Windows files)
 
80
                ## no problem in R either with empty carriage returns at end of file
 
81
                
 
82
                print fieldName
 
83
                
 
84
                if request.params.has_key(fieldName):
 
85
                        fileClient = request.params[fieldName].file
 
86
                        if not fileClient: 
 
87
                                raise FieldNotAFileError(fieldName,GENESRF)
 
88
                else:
 
89
            raise FieldRequiredError(fieldName,GENESRF)
 
90
                
 
91
                # transferring files to final destination;
 
92
                
 
93
                fileInServer = self.tmpDir + "/" + fieldName
 
94
                srvfile = open(fileInServer, mode = 'w')
 
95
                fileString = request.params[fieldName].value
 
96
                srvfile.write(fileString)
 
97
                srvfile.close()
 
98
                
 
99
                ## this is slower than reading all to memory and copying from
 
100
                ## there, but this is less taxing on memory.
 
101
                ## but with the current files, probably not worth it
 
102
                #     while 1:
 
103
                #         line = fileClient.readline()
 
104
                #         if not line: break
 
105
                #         srvfile.write(line)
 
106
                #     srvfile.close()
 
107
                
 
108
                os.chmod(fileInServer, 0666)
 
109
                        
 
110
                if os.path.getsize(fileInServer) == 0:
 
111
                        raise FileZeroSizeError(fieldName,GENESRF)
 
112
                                
 
113
        def radioUpload(self,fieldName,acceptedValues):
 
114
                """Upload and get the values and do some checking. For radio selections
 
115
                with text data; check those are in acceptedValues.
 
116
                We assume there is an existing call to fs = cgi.FieldStorage()"""
 
117
                
 
118
                print fieldName
 
119
                fieldvalue = request.params[fieldName]
 
120
                
 
121
                if not fieldvalue:
 
122
                        raise FieldRequiredError(fieldName,GENESRF)
 
123
                else:
 
124
                        try:
 
125
                                f = open(fieldvalue)
 
126
                                raise FieldAFileError(fieldName,GENESRF)
 
127
                        except (IOError):
 
128
                                pass
 
129
                        
 
130
                        if fieldvalue==type([]):
 
131
                                raise MultipleValueError(fieldName,GENESRF)
 
132
                                                        
 
133
                if fieldvalue not in acceptedValues:
 
134
                        raise FieldValueNotValidError(fieldName,GENESRF)
 
135
                
 
136
                fileInServer = self.tmpDir + "/" + fieldName
 
137
                srvfile = open(fileInServer, mode = 'w')
 
138
                fileString = fieldvalue
 
139
                srvfile.write(fileString)
 
140
                srvfile.close()
 
141
                os.chmod(fileInServer, 0666)
 
142
                
 
143
                return fieldvalue
 
144
                        
 
145
        def submit(self):
 
146
                
 
147
                ## Deleting tmp directories older than MAX_time
 
148
                currentTime = time.time()
 
149
                currentTmp = dircache.listdir(TMP_DIR)
 
150
                for directory in currentTmp:
 
151
                        tmpS = TMP_DIR + "/" + directory
 
152
                        if (currentTime - os.path.getmtime(tmpS)) > self.MAX_time:
 
153
                                shutil.rmtree(tmpS)
 
154
                                
 
155
                ### Creating temporal directoriesfs
 
156
                self.newDir = str(whrandom.randint(1, 10000)) + str(os.getpid()) + str(whrandom.randint(1, 100000)) + str(int(currentTime)) + str(whrandom.randint(1, 10000))
 
157
                self.tmpDir = TMP_DIR + "/" + self.newDir
 
158
                os.mkdir(self.tmpDir)
 
159
                os.chmod(self.tmpDir, 0700)
 
160
                
 
161
                try:
 
162
                        idtype = self.radioUpload(RADIO_IDTYPE,acceptedIDTypes)
 
163
                        organism = self.radioUpload(RADIO_ORGANISM,acceptedOrganisms)
 
164
                        
 
165
                        self.fileUpload(INPUT_CLASS)
 
166
                        if os.stat(self.tmpDir + '/' + INPUT_CLASS)[ST_SIZE] > self.MAX_class_size:
 
167
                                raise FileTooLargeError(self.title,INPUT_CLASS)
 
168
                        
 
169
                        ##check if file coming from preP
 
170
                        if request.params.has_key(INPUT_COVARIATE2):
 
171
                                #prep_tmpdir = fs.getfirst("covariate2")
 
172
                                shutil.copy("tmp/prep/outdata.txt", self.tmpDir + "/" + INPUT_COVARIATE2)
 
173
                        else:
 
174
                                self.fileUpload(INPUT_COVARIATE)
 
175
                                if os.stat(self.tmpDir + '/' + INPUT_COVARIATE)[ST_SIZE] > self.MAX_covariate_size:
 
176
                                        raise apperror.FileTooLargeError(GENESRF,INPUT_COVARIATE)
 
177
                                                
 
178
                        ## Upload worked OK. We store the original names of the files in the
 
179
                        ## browser for later report:
 
180
                        #fileNamesBrowser = open(self.tmpDir + '/' + FILENAMEBROW_DIR, mode = 'w')
 
181
                        #if request.params.has_key(INPUT_COVARIATE2):
 
182
                        #       fileNamesBrowser.write(request.params[INPUT_COVARIATE2] + '\n')
 
183
                        #fileNamesBrowser.write(request.params[INPUT_CLASS] + '\n')
 
184
                        #fileNamesBrowser.close()
 
185
                        
 
186
                        ## current number of processes > max number of processes?
 
187
                        ## and yes, we do it here, not before, so that we have the most
 
188
                        ## current info about number of pFieldRequiredErrorrocess right before we launch R.
 
189
                        
 
190
                        ## Now, delete any R file left (e.g., from killing procs, etc).
 
191
                        RrunningFiles = dircache.listdir(TMP_DIR + "/" + RPROCS_DIR)
 
192
                        for Rtouchfile in RrunningFiles:
 
193
                                tmpS = TMP_DIR + "/" + RPROCS_DIR + Rtouchfile
 
194
                                if (currentTime - os.path.getmtime(tmpS)) > self.R_MAX_time:
 
195
                                        os.remove(tmpS)
 
196
                        
 
197
                        ## Now, verify any processes left
 
198
                        numRgenesrf = len(glob.glob(TMP_DIR + "/" + RPROCS_DIR + "/R.*@*%*"))
 
199
                        if numRgenesrf > self.MAX_genesrf:
 
200
                                raise ServerTooBusyError(self.title)
 
201
                        
 
202
                        ################        Launching R   ###############
 
203
                        
 
204
                        # prepare the arrayNames file:
 
205
                        
 
206
                        covarInServer = self.tmpDir + "/" + INPUT_COVARIATE
 
207
                        arrayNames = self.tmpDir + "/" + ARRAYNAMES_DIR
 
208
                        srvfile = open(covarInServer, mode = 'r')
 
209
                        arrayfile = open(arrayNames, mode = 'w')
 
210
                        num_name_lines = 0
 
211
                        while 1:
 
212
                                line = srvfile.readline()
 
213
                                if not line: break
 
214
                                if (line.find("#name") == 0) or (line.find("#NAME") == 0) or (line.find("#Name") == 0) \
 
215
                                        or (line.find('"#name"') == 0) or (line.find('"#NAME"') == 0) or (line.find('"#Name"') == 0):
 
216
                                        num_name_lines = num_name_lines + 1
 
217
                                        if num_name_lines > 1:
 
218
                                                raise NameLineDataMatrixError(GENESRF)
 
219
                                        arrayfile.write(line)
 
220
                                        arrayfile.write("\n\n")
 
221
                        
 
222
                        srvfile.close()
 
223
                        arrayfile.close()   
 
224
                        os.chmod(arrayNames, 0600)
 
225
                        
 
226
                        ## It would be good to use spawnl or similar instead of system,
 
227
                        ## but I have no luck with R. This, I keep using system.
 
228
                        ## Its safety depends crucially on the newDir not being altered,
 
229
                        ## but newDir is not passed from any other user-reachable place
 
230
                        ## (it is created here).
 
231
                                                
 
232
                        ## recall to include in R
 
233
                        ##pid <- Sys.getpid()
 
234
                        ##write.table(file = "pid.txt", pid, row.names = FALSE, col.names = FALSE)
 
235
                        
 
236
                        ## touch Rout, o.w. checkdone can try to open a non-existing file
 
237
                        touchRout = os.system("/bin/touch " + self.tmpDir + "/f1.Rout") 
 
238
                        ##touchRrunning = os.system("/bin/touch /http/genesrf/www/R.running.procs/R." + newDir)
 
239
                        touchRrunning = os.system(RPROCS_DIR + "/R." + self.newDir + "@" + socket.gethostname())
 
240
                        shutil.copy(LIB_DIR + "/f1.R", self.tmpDir)
 
241
                        ## we add the 2> error.msg because o.w. if we kill R we get a server error as standard
 
242
                        ## error is sent to the server
 
243
                        # Rcommand = "cd " + tmpDir + "; " + "/usr/bin/R CMD BATCH --no-restore --no-readline --no-save -q f1.R 2> error.msg &"
 
244
                        # Rrun = os.system(Rcommand)
 
245
                        tryrrun = os.system(MPILOG_DIR + '/tryRrun2.py ' + self.tmpDir + ' 10 ' + 'GeneSrF &')
 
246
                        createResultsFile = os.system("/bin/touch " + self.tmpDir + "/results.txt")
 
247
                        
 
248
                        print "here 3"
 
249
                        
 
250
                        ###########   Creating a results.hmtl   ###############
 
251
                        
 
252
                        resultlink = GENESRF_URL + "/" + self.tmpDir + "/" + AUTOREFRESH_MYT
 
253
                        return render_response('/' + AUTOREFRESH_MYT,title=GENESRF,newDir=self.newDir)
 
254
                        
 
255
                except AppError, e:
 
256
                        shutil.rmtree(self.tmpDir)
 
257
                        return e.showError()
 
258
                                                
 
259
        
 
260
        def checkdone(self, ARGS):
 
261
                
 
262
                try:
 
263
                        
 
264
                        self.newDir = ARGS.get('newDir','')
 
265
                        self.tmpDir = TMP_DIR + "/" + self.newDir
 
266
                        
 
267
                        ## Changing to the appropriate directory
 
268
                        if self.newDir!="":
 
269
                                if type(self.newDir) is types.ListType:
 
270
                                        # ?????????
 
271
                                        raise NewDirListError(GENESRF)
 
272
                        else:
 
273
                                # ??????
 
274
                                raise NewDirEmptyError(GENESRF)
 
275
                        
 
276
                        if re.search(r'[^0-9]', str(self.newDir)):
 
277
                                # ??????
 
278
                                raise NewDirNotValidFormatError(GENESRF)
 
279
                        
 
280
                        # redirectLoc = "/tmp/" + newDir
 
281
                        
 
282
                        if not os.path.isdir(self.tmpDir):
 
283
                                # ??????
 
284
                                raise NewDirNotDirectoryError(GENESRF)
 
285
                        
 
286
                        
 
287
                        ## Were we already done in a previous execution?
 
288
                        ## No need to reopen files or check anything else. Return url with results and bail out.
 
289
                        if os.path.exists(self.tmpDir + "/natural.death.pid.txt") or os.path.exists(self.tmpDir + "/killed.pid.txt"):
 
290
                                print 'Location: http://genesrf.bioinfo.cnio.es/tmp/'+ self.newDir + '/results.html \n\n'
 
291
                                sys.exit()
 
292
                        
 
293
                        ## No, we were not done. Need to examine R output
 
294
                        Rrout = open(self.tmpDir + "/f1.Rout")
 
295
                        soFar = Rrout.read()
 
296
                        Rrout.close()
 
297
                        finishedOK = soFar.endswith("Normal termination\n")
 
298
                        errorRun = soFar.endswith("Execution halted\n")
 
299
                        
 
300
                        if os.path.exists(self.tmpDir + "/pid.txt"):
 
301
                                ## do we need to kill an R process?
 
302
                                if (time.time() - os.path.getmtime(self.tmpDir + "/pid.txt")) > self.R_MAX_time:
 
303
                                        lamenv = open(self.tmpDir + "/lamSuffix", mode = "r").readline()
 
304
                                        try:
 
305
                                                os.system('export LAM_MPI_SESSION_SUFFIX=' + lamenv +
 
306
                                                        '; lamhalt -H; lamwipe -H')
 
307
                                        except:
 
308
                                                None
 
309
                                        
 
310
                                        os.rename(self.tmpDir + '/pid.txt', self.tmpDir + '/killed.pid.txt')
 
311
                                        os.remove(self.tmpDir + '/f1.R')
 
312
                                        try:
 
313
                                                os.system("rm /http/genesrf/www/R.running.procs/R." + self.newDir + "*")
 
314
                                        except:
 
315
                                                None
 
316
                                        Rresults = open(self.tmpDir + "/results.txt")
 
317
                                        raise RProcessKilledError(GENESRF,self.R_MAX_time,Rresults)
 
318
                        
 
319
                        if errorRun > 0:
 
320
                                os.rename(tmpDir + '/pid.txt', tmpDir + '/natural.death.pid.txt')
 
321
                                os.remove(tmpDir + '/f1.R')
 
322
                                ##    chkmpi = os.system('/http/mpi.log/adhocCheckRmpi.py GeneSrF&')
 
323
                                try:
 
324
                                        lamenv = open(tmpDir + "/lamSuffix", mode = "r").readline()
 
325
                                except:
 
326
                                        None
 
327
                                try:
 
328
                                        os.system('export LAM_MPI_SESSION_SUFFIX=' + lamenv +
 
329
                                                '; lamhalt -H; lamwipe -H')
 
330
                                except:
 
331
                                        None
 
332
                                try:
 
333
                                        os.system("rm /http/genesrf/www/R.running.procs/R." + newDir + "*")
 
334
                                except:
 
335
                                        None
 
336
                                raise RProcessRunError(GENESRF,Rresults)
 
337
                        
 
338
                        elif finishedOK > 0:
 
339
                                ##zz: killing lam seems not to be working from here...
 
340
                                try:
 
341
                                        lamenv = open(tmpDir + "/lamSuffix", mode = "r").readline()
 
342
                                except:
 
343
                                        None
 
344
                                try:
 
345
                                        lamkill = os.system('export LAM_MPI_SESSION_SUFFIX=' + lamenv +
 
346
                                                        '; lamhalt -H; lamwipe -H')
 
347
                                except:
 
348
                                        None
 
349
                                os.rename(tmpDir + '/pid.txt', tmpDir + '/natural.death.pid.txt')
 
350
                                os.remove(tmpDir + '/f1.R')
 
351
                                try:
 
352
                                        os.system("rm /http/genesrf/www/R.running.procs/R." + newDir  + "*")
 
353
                                except:
 
354
                                        None
 
355
                                print 'Location: http://genesrf.bioinfo.cnio.es/tmp/'+ newDir + '/results.html \n\n'
 
356
                                
 
357
                                ## compress all the results
 
358
                                allResults = tarfile.open(tmpDir + '/all.results.tar.gz', 'w:gz')
 
359
                                allResults.add(tmpDir + '/results.txt', 'results.txt')
 
360
                                if os.path.exists(tmpDir + "/fselprobplot.png"): 
 
361
                                        allResults.add(tmpDir + '/fselprobplot.png', 'SelectionProbabilityPlot.png')
 
362
                                if os.path.exists(tmpDir + "/fimpspec-all.png"): 
 
363
                                        allResults.add(tmpDir + '/fimpspec-all.png', 'ImportanceSpectrumAllGenes.png')
 
364
                                if os.path.exists(tmpDir + "/fimpspec-200.png"): 
 
365
                                        allResults.add(tmpDir + '/fimpspec-200.png', 'ImportanceSpectrum200Genes.png')
 
366
                                if os.path.exists(tmpDir + "/fimpspec-30.png"): 
 
367
                                        allResults.add(tmpDir + '/fimpspec-30.png', 'ImportanceSpectrum30Genes.png')
 
368
                                allResults.add(listPNGS[nf1 - 1], 'OOBErrorvsNumGenes.png')
 
369
                                if nf1 > 1:
 
370
                                        for index in range(nf1 - 1):
 
371
                                                allResults.add(listPNGS[index], 'OOBPredictionsFigure' + str(index + 1) + '.png')
 
372
                                ## Now, the pdfs
 
373
                                listPDFS = glob.glob(tmpDir + "/fboot*.pdf")
 
374
                                if len(listPDFS):
 
375
                                        listPDFS.sort()
 
376
                                        allResults.add(tmpDir + '/fselprobplot.pdf', 'SelectionProbabilityPlot.pdf') 
 
377
                                        allResults.add(tmpDir + '/fimpspec-all.pdf', 'ImportanceSpectrumAllGenes.pdf')
 
378
                                        allResults.add(tmpDir + '/fimpspec-200.pdf', 'ImportanceSpectrum200Genes.pdf')
 
379
                                        allResults.add(tmpDir + '/fimpspec-30.pdf', 'ImportanceSpectrum30Genes.pdf')
 
380
                                        allResults.add(listPDFS[nf1 - 1], 'OOBErrorvsNumGenes.pdf')
 
381
                                        if nf1 > 1:
 
382
                                                for index in range(nf1 - 1):
 
383
                                                        allResults.add(listPDFS[index], 'OOBPredictionsFigure' + str(index + 1) + '.pdf')
 
384
                                allResults.close()
 
385
                                
 
386
                                Rresults = open(self.tmpDir + "/results.txt")
 
387
                                resultsFile = Rresults.read()
 
388
                                return render_response('/' + RESULTS_MYT,title=GENESRF,tmpDir=self.tmpDir,resultsfile=resultsFile)
 
389
                        
 
390
                        else:
 
391
                                ## we only end up here if: we were not done in a previous run AND no process was overtime 
 
392
                                ## AND we did not just finish. So we must continue.
 
393
                                resultlink = GENESRF_URL + "/" + self.tmpDir + "/" + AUTOREFRESH_MYT
 
394
                                return render_response('/' + AUTOREFRESH_MYT,title=GENESRF,newDir=self.newDir)
 
395
                        
 
396
                except AppError, e:
 
397
                        return e.showError()
 
398
        
 
399
                
 
400
 
 
401
 
 
402
 
 
403
 
 
404