~grosmoteur/grosmoteur/trunk

« back to all changes in this revision

Viewing changes to lib/groTools.py

  • Committer: kim g
  • Date: 2014-05-20 22:26:52 UTC
  • Revision ID: kmgrds@gmail.com-20140520222652-344s6vxjkheaa1yx
correct language guessing

Show diffs side-by-side

added added

removed removed

Lines of Context:
20
20
 
21
21
#import pattern
22
22
 
23
 
verbose=False
24
 
#verbose=True
 
23
debug=False
 
24
#debug=True
25
25
 
26
26
class GroTools(QDialog, Ui_GroTools):
27
27
        """
46
46
                self.columns=parent.textcolumns
47
47
                self.columnslabel.setText("source columns: "+", ".join(self.columns))
48
48
                
49
 
#               for k in sorted(self.textcolumns):self.inputColumn.addItem(self.textcolumns[k])
50
49
                
51
50
                lemmatizeLanguages={"en":"English", "de":"German", "es":"Spanish", "nl":"Dutch","fr":"French","it":"Italian"}
52
 
#               dict((v,k) for k,v in ref.iteritems())
53
51
                for l in sorted(lemmatizeLanguages.values()):
54
52
                        self.language.addItem(l)
55
53
                self.language.setCurrentIndex(1)
179
177
                self.progressBar.setMaximum(todo)
180
178
                
181
179
                while not self.base.reqs.empty():
182
 
                        if verbose: print "not empty",self.base.reqs.qsize()
 
180
                        if debug: print "not empty",self.base.reqs.qsize()
183
181
                        self.progressBar.setValue(todo-self.base.reqs.qsize())
184
182
                        sleep(1)
185
183
                        
191
189
                
192
190
        
193
191
        def prepareColumns(self,  affix):
194
 
                if verbose:print "prepareColumns", affix
 
192
                if debug:print "prepareColumns", affix
195
193
                self.insertColumns=[c+unicode(affix.text()) for c in self.columns]
196
194
 
197
195
                
198
196
                count=len(self.allcolumns)
199
 
                if verbose:
 
197
                if debug:
200
198
                        print "insertColumns", self.insertColumns
201
199
                        print "self.allcolumns", self.allcolumns
202
200
                for ic in self.insertColumns:
215
213
                #print "self.textname2textid", self.textname2textid
216
214
                self.insertTextIds=[self.textname2textid[c]  for c in self.insertColumns]
217
215
                self.sourceTextIds=[self.textname2textid[c]  for c in self.columns]
218
 
                if verbose: print "insertTextIds", self.insertTextIds
 
216
                if debug: print "insertTextIds", self.insertTextIds
219
217
#               sys.exit()
220
218
                
221
219
        
227
225
                #(totalnum,) =self.base.select("select count(*) from sources;").next()
228
226
                self.progressBar.setMaximum(self.totalnum)
229
227
                self.progressBar.show()
230
 
                if verbose: print "self.selectstatement",self.selectstatement
 
228
                if debug: print "self.selectstatement",self.selectstatement
231
229
                sele=self.base.select(self.selectstatement)
232
230
                #go extract the part we choose
233
231
                while True:     
238
236
                        
239
237
                        i=columns[0]
240
238
                        for columncontent in columns[1:]:
241
 
                                if verbose:
 
239
                                if debug:
242
240
                                        try:print "oldcolumn",unicode(columncontent)[:100]
243
241
                                        except:pass
244
242
#                               try: (columncontent, )=columncontent
245
243
#                               except:pass
246
244
#                               break
247
245
                                newcolumn=newfunction(unicode(columncontent))
248
 
                                if verbose: 
 
246
                                if debug: 
249
247
                                        try:print "newcolumn",textids[c], newcolumn[:100] # in try to avoid encoding problems
250
248
                                        except:pass
251
249
                                nbs, nbc = self.computeStat(newcolumn)
257
255
                        
258
256
                        self.progressBar.setValue(i)
259
257
                
260
 
#               print "makeTextual", self.textualization, self.textualId
 
258
                if debug: print "makeTextual", self.textualization, self.textualId
261
259
                self.base.makeTextual(self.textualization, self.textualId)
262
260
                # self.base.execute("--commit--")
263
261
        
283
281
                        i=columns[0]
284
282
                        rowids+=[i]
285
283
                        # print self.rowidselectstatement
286
 
                        #if verbose: 
 
284
                        #if debug: 
287
285
                                #try:print self.rowidselectstatement, (str(i), )
288
286
                                #except:pass
289
287
                        #try:columns = self.base.select(self.rowidselectstatement, (str(i), )).next()
290
288
                        #except:continue
291
289
                        for columncontent in columns[1:]:
292
 
                                if verbose: 
 
290
                                if debug: 
293
291
                                        try:print i,"columncontent", columncontent[:100]
294
292
                                        except:pass
295
293
                                urheenin.write(columncontent+"\n<grocolumn>\n")
299
297
                        self.progressBar.setValue(i/3)
300
298
                urheenin.close()
301
299
                
302
 
                if verbose: print [os.path.join( os.getcwd(),"lib", "tools", self.urheenfile)] + shlex.split( self.urheenoption) 
 
300
                if debug: print [os.path.join( os.getcwd(),"lib", "tools", self.urheenfile)] + shlex.split( self.urheenoption) 
303
301
                if platform.startswith("linux"):                command=" ".join([os.path.join( os.getcwd(),"lib", "tools", self.urheenfile)] + shlex.split( self.urheenoption) )
304
302
                else:                                                                   command=[os.path.join( os.getcwd(),"lib", "tools", self.urheenfile)] + shlex.split( self.urheenoption)  
305
303
                p1 = subprocess.Popen(command , shell=True, stdout=subprocess.PIPE,  cwd=os.path.join( os.getcwd(),"lib", "tools")) 
307
305
                p1.stdout.read() # wait until finished
308
306
                # sleep(2)
309
307
                # sys.exit()
310
 
                if verbose: print "urheen has finished"
 
308
                if debug: print "urheen has finished"
311
309
                # sys.exit()
312
310
                urheenout=codecs.open(os.path.join("lib", "tools", "urheen.out.txt"), "r", "GB18030",  'replace')
313
311
                
315
313
                i=1
316
314
                c=0
317
315
                rid=rowids.pop(0)
318
 
                if verbose: print "self.insertTextIds[c]", self.insertTextIds
 
316
                if debug: print "self.insertTextIds[c]", self.insertTextIds
319
317
                
320
318
                for line in urheenout:
321
319
                        lstrip=line.strip()
329
327
                                self.progressBar.setValue(self.totalnum*2/3+i/3)
330
328
                        elif lstrip in ["<grocolumn>" , "< grocolumn >",  "</PU grocolumn/NR >/PU",  "<grocolumn>/:"]  : # bullshit analysis of tags by urheen
331
329
                                nbs, nbc = self.computeStat(column)
332
 
                                if verbose:print i, c, self.insertTextIds, self.insertTextIds[c], "____", len(rowids), column[10:]
 
330
                                if debug:print i, c, self.insertTextIds, self.insertTextIds[c], "____", len(rowids), column[10:]
333
331
                                self.base.enterUpsert( rid, self.textualId,  self.insertTextIds[c],  column, nbs, nbc)
334
332
                                column=""
335
333
                                
339
337
                                column+=line
340
338
 
341
339
                 
342
 
                if verbose: print "makeTextual", self.textualization, self.textualId
 
340
                if debug: print "makeTextual", self.textualization, self.textualId
343
341
                self.base.makeTextual(self.textualization, self.textualId)
344
342
                # self.base.execute("--commit--")
345
343
        
435
433
                """
436
434
                Slot documentation goes here.
437
435
                """
438
 
                if verbose:print "on_affix_textChanged"
 
436
                if debug:print "on_affix_textChanged"
439
437
                affix=unicode(p0)
440
438
                if len(affix)==0:
441
439
                        self.infoLabel.setText("Please enter an affix for the new columns.")
467
465
                if len(p0)==0:
468
466
                        self.findFound.setText("")
469
467
                        return
470
 
                if verbose:
 
468
                if debug:
471
469
                        print self.currentExampleText[:100]     
472
470
                        print type(self.currentExampleText)
473
471
                testmatch=retest.search(self.currentExampleText)
474
 
                if verbose: print testmatch,testmatch==None
 
472
                if debug: print testmatch,testmatch==None
475
473
                if testmatch:
476
474
                        matched = self.currentExampleText[max(0,testmatch.start()-10):testmatch.start()]
477
475
                        matched += "<span style='color:red'>"+self.currentExampleText[testmatch.start():testmatch.end()]+"</span>"
497
495
                try:res=self.currentselect.next()
498
496
                except: # ende der fahnenstange
499
497
                        self.currentselect=self.base.select( self.selectstatement)
500
 
                        res=self.currentselect.next()
 
498
                        try:res=self.currentselect.next()
 
499
                        except:res="" # empty database
501
500
                #res=self.base.select( self.rowidselectstatement, (str(self.currentTestRow),)).next()
502
501
                self.currentExampleText=" ".join([unicode(r) for r in res])
503
502
                #except:        self.currentExampleText=""