~ubuntu-branches/ubuntu/hardy/kalign/hardy

« back to all changes in this revision

Viewing changes to kalign2_profile.c

  • Committer: Bazaar Package Importer
  • Author(s): Charles Plessy
  • Date: 2007-02-21 09:07:33 UTC
  • mfrom: (1.1.1 upstream)
  • Revision ID: james.westby@ubuntu.com-20070221090733-ttdj0bl7yes4to8f
Tags: 2.03-1
* New upstream release.
* Mended the watch file.
* Updated the manpage.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
        kalign2_profile.c 
 
3
        
 
4
        Released under GPL - see the 'COPYING' file   
 
5
        
 
6
        Copyright (C) 2006 Timo Lassmann <timolassmann@gmail.com>
 
7
        
 
8
        This program is free software; you can redistribute it and/or modify
 
9
        it under the terms of the GNU General Public License as published by
 
10
        the Free Software Foundation; either version 2 of the License, or
 
11
        any later version.
 
12
 
 
13
        This program is distributed in the hope that it will be useful,
 
14
        but WITHOUT ANY WARRANTY; without even the implied warranty of
 
15
        MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
16
        GNU General Public License for more details.
 
17
 
 
18
        You should have received a copy of the GNU General Public License
 
19
        along with this program; if not, write to the Free Software
 
20
        Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
21
    
 
22
        Please send bug reports, comments etc. to:
 
23
        timolassmann@gmail.com
 
24
*/
 
25
 
 
26
#include "kalign2.h"
 
27
 
 
28
 
 
29
/*
 
30
void add_feature_information_from_alignment(int* path,int* fprof1,int* fprof2,int weight)
 
31
{
 
32
        int i = 0;
 
33
        int j = 0;
 
34
        int c = 1;
 
35
        while(path[c] != 3){            
 
36
                if (!path[c]){
 
37
                        fprof1[i] +=1;
 
38
                        fprof1[i+1] +=weight;
 
39
                        fprof2[j] +=1;
 
40
                        fprof2[j+1] +=weight;
 
41
                        i+=2;
 
42
                        j+=2;   
 
43
                }
 
44
                if (path[c] & 1){
 
45
                        j+=2;
 
46
                }
 
47
                if (path[c] & 2){
 
48
                        i+=2;           
 
49
                }
 
50
                c++;
 
51
        }
 
52
        free(path);
 
53
}*/
 
54
 
 
55
 
 
56
float* update2(const float* profa, const float* profb,float* newp,int* path,int sipa,int sipb,float internal_gap_weight)
 
57
{
 
58
        int i,c;
 
59
        int* gap_len = 0;
 
60
        int gap_cost = 0;
 
61
        
 
62
        gap_len = malloc(sizeof(int)* (path[0]+1));
 
63
        gap_len[0] = 0;
 
64
        
 
65
        //fprintf(stderr,"%d len,,,,\n",path[0]);
 
66
        for(i = 1; i <= path[0];i++){
 
67
        //      fprintf(stderr,"%d,%d   ",i,path[i]);
 
68
                gap_len[i] = (path[i] >> 16);
 
69
                path[i]  = path[i] & 0x0000ffff;
 
70
        //      fprintf(stderr,"%d      %d\n",path[i],gap_len[i]);
 
71
        }
 
72
        //gap_len[path[0]] = 0;
 
73
//      int len = 0;
 
74
        c = 1;
 
75
        
 
76
        /*while(path[c] != 3){  
 
77
                fprintf(stderr,"%d %d   %d\n",c,path[c],gap_len[c]);
 
78
                                
 
79
                c++;
 
80
        }
 
81
        exit(0);*/
 
82
 
 
83
 
 
84
        while(path[c] != 3){            
 
85
                gap_cost = 0;
 
86
                if (!path[c]){
 
87
                        while(!path[c] && path[c] != 3){
 
88
                //              fprintf(stderr,"Align   %d      %d\n",c,path[c]);
 
89
                                for (i = 64; i--;){
 
90
                                        newp[i] = profa[i] + profb[i];
 
91
                                }
 
92
                                profa += 64;
 
93
                                profb += 64;
 
94
                                newp += 64; 
 
95
                                c++;
 
96
                        }
 
97
                }else if (path[c] & 1){
 
98
                        //fprintf(stderr,"%d\n",gap_len[c]);
 
99
                        if(path[c]  & 128){//N terminal gap !!!!!!1
 
100
                                for (i = 0; i < gap_len[c]-1;i++){
 
101
                                        gap_cost += profb[29+64*i];
 
102
                        //              fprintf(stderr,"i:%d    %d\n",i,gap_cost);
 
103
                                }                               
 
104
                                gap_cost += profb[27+64*i];
 
105
                        //      fprintf(stderr,"i:%d    %d\n",i,gap_cost);
 
106
                        }else if(path[c]  & 64){//c terminal gap !!!!!!1
 
107
                        //      fprintf(stderr,"c terminal gap\n");
 
108
                                gap_cost += profb[27+64];
 
109
                        //      fprintf(stderr,"i:%d    %d\n",0,gap_cost);
 
110
                                for (i = 1; i < gap_len[c];i++){
 
111
                                        gap_cost += profb[29+64*i];
 
112
                        //              fprintf(stderr,"i:%d    %d\n",i,gap_cost);
 
113
                                }
 
114
                        }else{
 
115
                        //      fprintf(stderr,"middle gap\n");
 
116
                                gap_cost += profb[27+64];
 
117
                        //      fprintf(stderr,"i:%d    %d\n",0,gap_cost);
 
118
                                for (i = 1; i < gap_len[c]-1;i++){
 
119
                                        gap_cost += profb[28+64*i];
 
120
                        //              fprintf(stderr,"i:%d    %d\n",i,gap_cost);
 
121
                                }
 
122
                                gap_cost += profb[27+64*i];
 
123
                        //      fprintf(stderr,"i:%d    %d\n",i,gap_cost);
 
124
                        }
 
125
                        //fprintf(stderr,"gap_A %d      %d      length:%d       cost:%d\n",c,path[c],gap_len[c],gap_cost);
 
126
                        gap_cost /=  gap_len[c];
 
127
                        gap_cost *= internal_gap_weight;
 
128
                        
 
129
                        while(path[c] & 1 && path[c] != 3){
 
130
                //              fprintf(stderr,"gap_A   %d      %d      cost:%d\n",c,path[c],gap_cost);
 
131
                                for (i = 64; i--;){
 
132
                                        newp[i] = profb[i];
 
133
                                }
 
134
                                newp[23] += gap_cost;
 
135
                                for (i = 32; i < 55;i++){
 
136
                                        newp[i] += gap_cost;
 
137
                                }
 
138
                                profb +=64;
 
139
                                newp += 64;
 
140
                                c++;
 
141
                        }               
 
142
                }else if (path[c] & 2){
 
143
                        //fprintf(stderr,"%d\n",gap_len[c]);
 
144
                        if(path[c]  & 128){//N terminal gap !!!!!!1
 
145
                                for (i = 0; i < gap_len[c]-1;i++){
 
146
                                        gap_cost += profa[29+64*i];
 
147
                        //              fprintf(stderr,"i:%d    %d\n",i,gap_cost);
 
148
                                }
 
149
                                gap_cost += profa[27+64*i];
 
150
                        //      fprintf(stderr,"i:%d    %d\n",i,gap_cost);
 
151
                        }else if(path[c]  & 64){//c terminal gap !!!!!!1
 
152
                        //      fprintf(stderr,"c terminal gap\n");
 
153
                                gap_cost += profa[27+64];
 
154
                        //      fprintf(stderr,"i:%d    %d\n",c-1,gap_cost);
 
155
                                for (i = 1; i < gap_len[c];i++){
 
156
                                        gap_cost += profa[29+64*i];
 
157
                        //              fprintf(stderr,"i:%d    %d\n",i,gap_cost);
 
158
                                }
 
159
                        }else{
 
160
                        //      fprintf(stderr,"middle gap\n");
 
161
                                gap_cost += profa[27+64];
 
162
                        //      fprintf(stderr,"i:%d    %d\n",c-1,gap_cost);
 
163
                                for (i = 1; i < gap_len[c]-1;i++){
 
164
                                        gap_cost += profa[28+64*i];
 
165
                        //              fprintf(stderr,"i:%d    %d\n",i,gap_cost);
 
166
                                }
 
167
                                gap_cost += profa[27+64*i];
 
168
                        //      fprintf(stderr,"i:%d    %d\n",i,gap_cost);
 
169
                        }
 
170
                        
 
171
                        gap_cost /=  gap_len[c];
 
172
                        
 
173
                        gap_cost *= internal_gap_weight;
 
174
                        
 
175
                        while(path[c] & 2 && path[c] != 3){
 
176
                //              fprintf(stderr,"gap_b   %d      %d cost:%d\n",c,path[c],gap_cost);
 
177
                                for (i = 64; i--;){
 
178
                                        newp[i] = profa[i];
 
179
                                }
 
180
                                newp[23] += gap_cost;
 
181
                                for (i = 32;i < 55;i++){
 
182
                                        newp[i] += gap_cost;
 
183
                                }
 
184
                                profa +=64;
 
185
                                newp += 64;
 
186
                                c++;
 
187
                        }                       
 
188
                }
 
189
        }
 
190
        for (i = 64; i--;){
 
191
                newp[i] =  profa[i] + profb[i];
 
192
        }       
 
193
        newp -= path[0] *64; 
 
194
        
 
195
        free(gap_len);
 
196
        
 
197
        return newp;
 
198
}
 
199
 
 
200
 
 
201
void smooth_gaps(float* prof,int len,int window,float strength)
 
202
{
 
203
        float tmp_gpo;
 
204
        float tmp_gpe;
 
205
        float tmp_tgpe;
 
206
        int i,j;
 
207
        if(!(window &1)){
 
208
                window--;
 
209
        }
 
210
        for ( i = (window/2); i < len - (window/2);i++){
 
211
                tmp_gpo = 0.0;
 
212
                tmp_gpe = 0.0;
 
213
                tmp_tgpe = 0.0;
 
214
                for (j = -(window/2); j < (window/2);j++){
 
215
                        tmp_gpo += (float)prof[27+((i+j)*64)]*strength;
 
216
                        tmp_gpe += (float) prof[28+((i+j)*64)]*strength;
 
217
                        tmp_tgpe += (float) prof[29+((i+j)*64)]*strength;
 
218
                }
 
219
                tmp_gpo /= window;
 
220
                tmp_gpe /= window;
 
221
                tmp_tgpe /= window;
 
222
                prof[27+(i*64)] =  prof[27+(i*64)]*(1.0-strength) + tmp_gpo;
 
223
                prof[28+(i*64)] =  prof[28+(i*64)]*(1.0-strength) + tmp_gpe;
 
224
                prof[29+(i*64)] =  prof[29+(i*64)]*(1.0-strength) + tmp_tgpe;
 
225
        }
 
226
}
 
227
 
 
228
 
 
229
void increase_gaps(float* prof,int len,int window,float strength)
 
230
{
 
231
        float* mod = 0;
 
232
        int i,j,c;
 
233
        int start_pos = 0;
 
234
        int end_pos = 0; 
 
235
        
 
236
        mod = malloc(sizeof(float)*window);
 
237
        for ( i = 0; i < window;i++){
 
238
                mod[i] = (strength - i*(float)strength / (float) window) - (0.5*strength);
 
239
        }
 
240
        //only gpo first....
 
241
        for ( i = 0; i < len;i++){
 
242
//      //      fprintf(stderr,"(%0.2f:%0.2f) ",prof[26],prof[23]);
 
243
                prof[26] = 0.0;
 
244
                prof+= 64;
 
245
        }
 
246
        prof -= len << 6;
 
247
        
 
248
        
 
249
        
 
250
        for ( i = 0; i < len;i++){
 
251
                
 
252
                if(prof[23]!= 0){
 
253
                        
 
254
                        start_pos = i-window;
 
255
                        if(start_pos < 0){
 
256
                                c = start_pos + window; 
 
257
                        }else{
 
258
                                c = window;
 
259
                        }
 
260
                        
 
261
                        for ( j = c;j--;){
 
262
                                prof[26 - (64*(j+1))] +=  mod[j];
 
263
                        }
 
264
                        end_pos = i+window;
 
265
                        if(end_pos > len){
 
266
                                c = len - i;    
 
267
                        }else{
 
268
                                c = window;
 
269
                        }
 
270
                        //fprintf(stderr,"%d %d\n",i,c);
 
271
                        for (j = 0;j < c;j++){
 
272
                                prof[26 +(64*(j+1))] +=  mod[j];
 
273
                        }
 
274
                }
 
275
                prof+= 64;
 
276
        }
 
277
        prof -= len << 6;
 
278
 
 
279
        for ( i = 0; i < len;i++){
 
280
//      //      fprintf(stderr,"(%0.2f:%0.2f) ",prof[26],prof[23]);
 
281
                prof[27] = prof[27] * (prof[26]+1.0);
 
282
                prof[28] = prof[28] * (prof[26]+1.0);
 
283
                prof[29] = prof[29] * (prof[26]+1.0);
 
284
                prof+= 64;
 
285
        }
 
286
        prof -= len << 6;
 
287
        
 
288
        free(mod);
 
289
}
 
290
 
 
291
 
 
292
void set_gap_penalties2(float* prof,int len,int nsip,int window,float strength)
 
293
{
 
294
        int i,j;
 
295
        float tmp_gpo;
 
296
        float tmp_gpe;
 
297
        float tmp_tgpe;
 
298
        
 
299
        prof +=  (64 *(len));
 
300
        
 
301
        prof[27] = prof[55]*nsip*-gpo;
 
302
        prof[28] = prof[55]*nsip*-gpe;
 
303
        prof[29] = prof[55]*nsip*-tgpe;
 
304
 
 
305
        i = len;
 
306
        while(i--){
 
307
                prof -= 64;
 
308
                prof[27] = prof[55]*nsip*-gpo;
 
309
                prof[28] = prof[55]*nsip*-gpe;
 
310
                
 
311
                prof[29] = prof[55]*nsip*-tgpe;
 
312
        }
 
313
        if(!(window &1)){
 
314
                window--;
 
315
        }
 
316
        
 
317
        
 
318
        for ( i = (window/2); i < len - (window/2);i++){
 
319
                tmp_gpo = 0.0;
 
320
                tmp_gpe = 0.0;
 
321
                tmp_tgpe = 0.0;
 
322
                for (j = -(window/2); j < (window/2);j++){
 
323
                        tmp_gpo += (float)prof[27+((i+j)*64)]*strength;
 
324
                        tmp_gpe += (float) prof[28+((i+j)*64)]*strength;
 
325
                        tmp_tgpe += (float) prof[29+((i+j)*64)]*strength;
 
326
                }
 
327
                tmp_gpo /= window;
 
328
                tmp_gpe /= window;
 
329
                tmp_tgpe /= window;
 
330
                prof[27+(i*64)] =  prof[27+(i*64)]*(1-strength) + tmp_gpo;
 
331
                prof[28+(i*64)] =  prof[28+(i*64)]*(1-strength) + tmp_gpe;
 
332
                prof[29+(i*64)] =  prof[29+(i*64)]*(1-strength) + tmp_tgpe;
 
333
        }
 
334
        
 
335
        /*for ( i = 2; i < len-2;i++){
 
336
                prof[27+(i*64)]  = (prof[27+((i-2)*64)] +prof[27+((i-1)*64)] + prof[27+(i*64)] + prof[27+((i+1)*64)] +prof[27+((i+2)*64)])/ 5;
 
337
        }*/
 
338
       /* for ( i = 2; i < len-2;i++){
 
339
                prof[28+(i*64)]  = (prof[28+((i-2)*64)] + prof[28+((i-1)*64)] + prof[28+(i*64)] + prof[28+((i+1)*64)] +prof[28+((i+2)*64)])/ 5;
 
340
        }
 
341
        for ( i = 2; i < len-2;i++){
 
342
                prof[29+(i*64)]  = (prof[29+((i-2)*64)] + prof[29+((i-1)*64)] + prof[29+(i*64)] + prof[29+((i+1)*64)] +prof[29+((i+2)*64)])/ 5;
 
343
        }*/
 
344
 
 
345
}
 
346
 
 
347
 
 
348
 
 
349
float* make_profile2(float* prof, int* seq,int len,float** subm)
 
350
{
 
351
        int i,j,c;      
 
352
        prof = malloc(sizeof(float)*(len+1)*64);
 
353
        prof +=  (64 *len);
 
354
        
 
355
        for (i = 0;i < 64;i++){
 
356
                prof[i] = 0;
 
357
        }
 
358
        prof[55] = 1;
 
359
 
 
360
        i = len;
 
361
        while(i--){
 
362
                prof -= 64;
 
363
 
 
364
                for (j = 0;j < 64;j++){
 
365
                        prof[j] = 0;
 
366
                }
 
367
                c = seq[i];
 
368
                
 
369
                prof[c] += 1;
 
370
                                        
 
371
                prof += 32;
 
372
                for(j = 23;j--;){
 
373
                        prof[j] = subm[c][j];
 
374
                        
 
375
                }
 
376
                prof[23] = 1;
 
377
                prof -= 32;
 
378
 
 
379
        }
 
380
        return prof;
 
381
}
 
382
 
 
383
 
 
384
float*  feature_update(const float* profa, const float* profb,float* newp,int* path,int stride)
 
385
{
 
386
        int i,c;
 
387
        c = 1;
 
388
        while(path[c] != 3){
 
389
                if (!path[c]){
 
390
                        for (i = stride; i--;){
 
391
                                newp[i] = profa[i] + profb[i];
 
392
                        }
 
393
                        profa += stride;
 
394
                        profb += stride;
 
395
                }
 
396
                if (path[c] & 1){
 
397
                        for (i = stride; i--;){
 
398
                                newp[i] = profb[i];
 
399
                        }
 
400
                        profb += stride;        
 
401
                        
 
402
                }
 
403
                if (path[c] & 2){
 
404
                        for (i = stride; i--;){
 
405
                                newp[i] = profa[i];
 
406
                        }
 
407
                        profa+=stride;                  
 
408
                }
 
409
                newp += stride;
 
410
                c++;
 
411
        }
 
412
        for (i = stride; i--;){
 
413
                newp[i] =  profa[i] + profb[i];
 
414
        }       
 
415
        newp -= path[0] *stride;
 
416
        return newp;
 
417
}
 
418
 
 
419
float* make_wu_profile(float* prof,float* wu,int len)
 
420
{
 
421
        int i;  
 
422
        
 
423
        
 
424
        prof = malloc(sizeof(float)*(len+1)*2);
 
425
        
 
426
        
 
427
        for (i = 0;i < (len+1)*2;i++){
 
428
                prof[i] = 0;
 
429
        }
 
430
        for (i = 0; i < len;i++){
 
431
                if(!wu[i]){
 
432
                        prof[i<<1] = 1;
 
433
                        prof[(i<<1)+1] = 1;
 
434
                        
 
435
                }else{
 
436
                        prof[i<<1] = wu[i]+1;
 
437
                        prof[(i<<1)+1] = wu[i]+1;
 
438
                }
 
439
        }
 
440
        return prof;
 
441
}
 
442
 
 
443
 
 
444
float* make_feature_profile(float* prof,struct feature* f,int len,struct feature_matrix* fm)
 
445
{
 
446
        int i,j;        
 
447
        
 
448
        
 
449
        prof = malloc(sizeof(int)*(len+1)*fm->stride);
 
450
        
 
451
        
 
452
        for (i = 0;i < (len+1)*fm->stride;i++){
 
453
                prof[i] = 0;
 
454
        }
 
455
        
 
456
        while(f){
 
457
                if(f->color != -1){
 
458
                        if(f->start < len && f->end < len){
 
459
                                for (i = f->start-1;i < f->end;i++){
 
460
                                        prof[i*fm->stride + f->color] += 1;
 
461
                                        for ( j =fm->mdim ;j < fm->stride;j++){
 
462
                                                prof[i*fm->stride+j] += fm->m[f->color][j-fm->mdim];
 
463
                                        }
 
464
                                }
 
465
                        }
 
466
                }
 
467
                f = f->next;
 
468
        }       
 
469
        return prof;
 
470
}
 
471
 
 
472
 
 
473
 
 
474
float* make_profile(float* prof, int* seq,int len, float** subm)
 
475
{
 
476
        int i,j,c;      
 
477
        prof = malloc(sizeof(float)*(len+2)*64);
 
478
        prof +=  (64 *(len+1));
 
479
 
 
480
        for (i = 0;i < 64;i++){
 
481
                prof[i] = 0;
 
482
        }
 
483
        prof[23+32] = -gpo;
 
484
        prof[24+32] = -gpe;
 
485
        prof[25+32] = -tgpe;
 
486
 
 
487
        
 
488
        i = len;
 
489
        while(i--){
 
490
                prof -= 64;
 
491
 
 
492
                for (j = 0;j < 64;j++){
 
493
                        prof[j] = 0;
 
494
                }
 
495
                c = seq[i];
 
496
                
 
497
                prof[c] += 1;
 
498
                
 
499
                prof += 32;
 
500
                
 
501
                for(j = 23;j--;){
 
502
                        prof[j] = subm[c][j];
 
503
                }
 
504
                prof[23] = -gpo;
 
505
                prof[24] = -gpe;
 
506
                prof[25] = -tgpe;
 
507
                
 
508
                prof -= 32;
 
509
        }
 
510
        prof -= 64;
 
511
        for (i = 0;i < 64;i++){
 
512
                prof[i] = 0;
 
513
        }
 
514
        prof[23+32] = -gpo;
 
515
        prof[24+32] = -gpe;
 
516
        prof[25+32] = -tgpe;    
 
517
        return prof;
 
518
}
 
519
 
 
520
float* dna_make_profile(float* prof, int* seq,int len,float** subm)
 
521
//int* make_profile(int* prof, int* seq,int len)
 
522
{
 
523
        int i,j,c;      
 
524
        prof = malloc(sizeof(float)*(len+2)*22);
 
525
        prof +=  (22 *(len+1));
 
526
        //fprintf(stderr,"Len:%d        %d\n",len,64*len);
 
527
        //for (i = 64;i--;){
 
528
        for (i = 0;i < 22;i++){
 
529
                prof[i] = 0;
 
530
        }
 
531
        prof[5+11] = -gpo;
 
532
        prof[6+11] = -gpe;
 
533
        prof[7+11] = -tgpe;
 
534
 
 
535
        
 
536
        i = len;
 
537
        while(i--){
 
538
                prof -= 22;
 
539
                //fprintf(stderr,"-64\n");
 
540
                //for (j = 64; j--;){
 
541
                for (j = 0;j < 22;j++){
 
542
                        prof[j] = 0;
 
543
                }
 
544
                c = seq[i];
 
545
                
 
546
                prof[c] += 1;
 
547
                
 
548
                //n = feature[i];
 
549
                //prof[n+23] = 1;
 
550
                
 
551
                prof += 11;
 
552
                for(j = 5;j--;){
 
553
                        prof[j] = subm[c][j];
 
554
                }
 
555
                prof[5] = -gpo;
 
556
                prof[6] = -gpe;
 
557
                prof[7] = -tgpe;
 
558
                prof -= 11;
 
559
        }
 
560
        prof -= 22;
 
561
        for (i = 0;i < 22;i++){
 
562
                prof[i] = 0;
 
563
        }
 
564
        prof[5+11] = -gpo;
 
565
        prof[6+11] = -gpe;
 
566
        prof[7+11] = -tgpe;
 
567
        
 
568
        return prof;
 
569
}
 
570
 
 
571
 
 
572
 
 
573
 
 
574
float* update(const float* profa, const float* profb,float* newp,int* path,int sipa,int sipb)
 
575
{
 
576
        int i,j,c;
 
577
        for (i = 64; i--;){
 
578
                newp[i] = profa[i] + profb[i];
 
579
        }
 
580
        
 
581
        profa += 64;
 
582
        profb += 64;
 
583
        newp += 64;
 
584
 
 
585
        c = 1;
 
586
        
 
587
        while(path[c] != 3){
 
588
                //Idea: limit the 'virtual' number of residues of one type to x.
 
589
                // i.e. only allow a maximum of 10 alanines to be registered in each column
 
590
                // the penalty for aligning a 'G' to this column will stay stable even when many (>10) alanines are present.
 
591
                // the difference in score between the 'correct' (all alanine) and incorrect (alanines + glycine) will not increase 
 
592
                // with the number of sequences. -> see Durbin pp 140
 
593
                
 
594
                if (!path[c]){
 
595
                        //fprintf(stderr,"Align %d\n",c);
 
596
                        for (i = 64; i--;){
 
597
                                newp[i] = profa[i] + profb[i];
 
598
                        }
 
599
                                
 
600
                        
 
601
                        profa += 64;
 
602
                        profb += 64;
 
603
                }
 
604
                
 
605
                if (path[c] & 1){
 
606
                        //fprintf(stderr,"Gap_A:%d\n",c);
 
607
                        //printf("open:%d       ext:%d  %d      %d\n",si->nsip[a] * gpo,si->nsip[a] * gpe,si->nsip[a] * profb[41],si->nsip[a] * profb[46]);
 
608
                        for (i = 64; i--;){
 
609
                                newp[i] = profb[i];
 
610
                        }
 
611
                        profb += 64;
 
612
                        #ifndef SIMPLE 
 
613
                        if(!(path[c] & 20)){
 
614
                                if(path[c] & 32){
 
615
                                        newp[25] += sipa;//1;
 
616
                                        i = tgpe*sipa;
 
617
                                }else{
 
618
                                        newp[24] += sipa;//1;
 
619
                                        i = gpe*sipa;
 
620
                                }
 
621
                                
 
622
                                for (j = 32; j < 55;j++){
 
623
                                        newp[j] -=i;
 
624
                                }
 
625
                        }else{
 
626
                        if (path[c] & 16){ 
 
627
        //                      fprintf(stderr,"close_open");
 
628
                                if(path[c] & 32){
 
629
                                        newp[25] += sipa;//1;
 
630
                                        i = tgpe*sipa;
 
631
                                        newp[23] += sipa;//1;
 
632
                                        i += gpo*sipa;
 
633
                                }else{
 
634
                                        newp[23] += sipa;//1;
 
635
                                        i = gpo*sipa;
 
636
                                }
 
637
                                                                
 
638
                                for (j = 32; j < 55;j++){
 
639
                                        newp[j] -=i;
 
640
                                }
 
641
                        }
 
642
                        if (path[c] & 4){ 
 
643
        //                      fprintf(stderr,"Gap_open");
 
644
                                if(path[c] & 32){
 
645
                                        newp[25] += sipa;//1;
 
646
                                        i = tgpe*sipa;
 
647
                                        newp[23] += sipa;//1;
 
648
                                        i += gpo*sipa;
 
649
                                }else{
 
650
                                        newp[23] += sipa;//1;
 
651
                                        i = gpo*sipa;
 
652
                                }
 
653
                                for (j = 32; j < 55;j++){
 
654
                                        newp[j] -=i;
 
655
                                }
 
656
                        }
 
657
                        }
 
658
                        #endif
 
659
                        
 
660
                        
 
661
                }
 
662
                if (path[c] & 2){
 
663
                        //fprintf(stderr,"Gap_B:%d\n",c);
 
664
                        //printf("open:%d       ext:%d  %d      %d\n",si->nsip[b] * gpo,si->nsip[b] * gpe,profa[26],profa[27]);
 
665
                        for (i = 64; i--;){
 
666
                                newp[i] = profa[i];
 
667
                        }
 
668
                        profa+=64;
 
669
                        #ifndef SIMPLE 
 
670
                        if(!(path[c] & 20)){
 
671
                                if(path[c] & 32){
 
672
                                        newp[25] += sipb;//1;
 
673
                                        i = tgpe*sipb;
 
674
                                }else{
 
675
                                        newp[24] += sipb;//1;
 
676
                                        i = gpe*sipb;
 
677
                                }
 
678
                                for (j = 32; j < 55;j++){
 
679
                                        newp[j] -=i;
 
680
                                }
 
681
                        }else{
 
682
                        if (path[c] & 16){
 
683
        //                      fprintf(stderr,"close_open");
 
684
                                if(path[c] & 32){
 
685
                                        newp[25] += sipb;//1;
 
686
                                        i =  tgpe*sipb;
 
687
                                        newp[23] += sipb;//1;
 
688
                                        i +=  gpo*sipb;
 
689
                                }else{
 
690
                                        newp[23] += sipb;//1;
 
691
                                        i =  gpo*sipb;
 
692
                                }
 
693
                                for (j = 32; j < 55;j++){
 
694
                                        newp[j] -=i;
 
695
                                }
 
696
                        }
 
697
                        if (path[c] & 4){
 
698
        //                      fprintf(stderr,"Gap_open");
 
699
                                if(path[c] & 32){
 
700
                                        newp[25] += sipb;//1;
 
701
                                        i = tgpe*sipb;
 
702
                                        newp[23] += sipb;//1;
 
703
                                        i += gpo*sipb;
 
704
                                }else{
 
705
                                        newp[23] += sipb;//1;
 
706
                                        i = gpo*sipb;
 
707
                                }
 
708
                                
 
709
                                for (j = 32; j < 55;j++){
 
710
                                        newp[j] -=i;
 
711
                                }
 
712
                        }
 
713
                        }
 
714
                        #endif
 
715
                        
 
716
                }
 
717
                newp += 64;
 
718
                c++;
 
719
        }
 
720
        for (i = 64; i--;){
 
721
                newp[i] =  profa[i] + profb[i];
 
722
        }       
 
723
        newp -= (path[0]+1) *64;
 
724
        return newp;
 
725
}
 
726
 
 
727
float* dna_update(const float* profa, const float* profb, float* newp,int* path,int sipa,int sipb)
 
728
{
 
729
        int i,j,c;
 
730
        
 
731
        for (i = 22; i--;){
 
732
                newp[i] = profa[i] + profb[i];
 
733
        }
 
734
        
 
735
        profa += 22;
 
736
        profb += 22;
 
737
        newp += 22;
 
738
        
 
739
        
 
740
        c = 1;
 
741
        while(path[c] != 3){
 
742
                //Idea: limit the 'virtual' number of residues of one type to x.
 
743
                // i.e. only allow a maximum of 10 alanines to be registered in each column
 
744
                // the penalty for aligning a 'G' to this column will stay stable even when many (>10) alanines are present.
 
745
                // the difference in score between the 'correct' (all alanine) and incorrect (alanines + glycine) will not increase 
 
746
                // with the number of sequences. -> see Durbin pp 140
 
747
                
 
748
                if (!path[c]){
 
749
                        //fprintf(stderr,"Align %d\n",c);
 
750
                        for (i = 22; i--;){
 
751
                                newp[i] = profa[i] + profb[i];
 
752
                        }
 
753
                                
 
754
                        
 
755
                        profa += 22;
 
756
                        profb += 22;
 
757
                }
 
758
                if (path[c] & 1){
 
759
                        //fprintf(stderr,"Gap_A:%d\n",c);
 
760
                        //printf("open:%d       ext:%d  %d      %d\n",si->nsip[a] * gpo,si->nsip[a] * gpe,si->nsip[a] * profb[41],si->nsip[a] * profb[46]);
 
761
                        for (i = 22; i--;){
 
762
                                newp[i] = profb[i];
 
763
                        }
 
764
                        profb += 22;
 
765
                        if(!(path[c]&20)){
 
766
                                if(path[c]&32){
 
767
                                        newp[7] += sipa;//1;
 
768
                                        i = tgpe*sipa;
 
769
                                }else{
 
770
                                        newp[6] += sipa;//1;
 
771
                                        i = gpe*sipa;
 
772
                                }
 
773
                                
 
774
                                for (j = 11; j < 16;j++){
 
775
                                        newp[j] -=i;
 
776
                                }
 
777
                        }else{
 
778
                        if (path[c] & 16){ 
 
779
        //                      fprintf(stderr,"close_open");
 
780
                                if(path[c]&32){
 
781
                                        newp[7] += sipa;//1;
 
782
                                        i = tgpe*sipa;
 
783
                                        newp[5] += sipa;//1;
 
784
                                        i += gpo*sipa;
 
785
                                }else{
 
786
                                        newp[5] += sipa;//1;
 
787
                                        i = gpo*sipa;
 
788
                                }
 
789
                                                                
 
790
                                for (j = 11; j < 16;j++){
 
791
                                        newp[j] -=i;
 
792
                                }
 
793
                        }
 
794
                        if (path[c] & 4){ 
 
795
        //                      fprintf(stderr,"Gap_open");
 
796
                                if(path[c]&32){
 
797
                                        newp[7] += sipa;//1;
 
798
                                        i = tgpe*sipa;
 
799
                                        newp[5] += sipa;//1;
 
800
                                        i += gpo*sipa;
 
801
                                }else{
 
802
                                        newp[5] += sipa;//1;
 
803
                                        i = gpo*sipa;
 
804
                                }
 
805
                                for (j = 11; j < 16; j++){
 
806
                                        newp[j] -=i;
 
807
                                }
 
808
                        }
 
809
                        }
 
810
                        
 
811
                        
 
812
                }
 
813
                if (path[c] & 2){
 
814
                        //fprintf(stderr,"Gap_B:%d\n",c);
 
815
                        //printf("open:%d       ext:%d  %d      %d\n",si->nsip[b] * gpo,si->nsip[b] * gpe,profa[26],profa[27]);
 
816
                        for (i = 22; i--;){
 
817
                                newp[i] = profa[i];
 
818
                        }
 
819
                        profa+=22;
 
820
                        if(!(path[c]&20)){
 
821
                                if(path[c]&32){
 
822
                                        newp[7] += sipb;//1;
 
823
                                        i = tgpe*sipb;
 
824
                                }else{
 
825
                                        newp[6] += sipb;//1;
 
826
                                        i = gpe*sipb;
 
827
                                }
 
828
                                for (j = 11; j < 16;j++){
 
829
                                        newp[j] -=i;
 
830
                                }
 
831
                        }else{
 
832
                        if (path[c] & 16){
 
833
        //                      fprintf(stderr,"close_open");
 
834
                                if(path[c]&32){
 
835
                                        newp[7] += sipb;//1;
 
836
                                        i =  tgpe*sipb;
 
837
                                        newp[5] += sipb;//1;
 
838
                                        i +=  gpo*sipb;
 
839
                                }else{
 
840
                                        newp[5] += sipb;//1;
 
841
                                        i =  gpo*sipb;
 
842
                                }
 
843
                                for (j = 11; j < 16;j++){
 
844
                                        newp[j] -=i;
 
845
                                }
 
846
                        }
 
847
                        if (path[c] & 4){
 
848
        //                      fprintf(stderr,"Gap_open");
 
849
                                if(path[c]&32){
 
850
                                        newp[7] += sipb;//1;
 
851
                                        i = tgpe*sipb;
 
852
                                        newp[5] += sipb;//1;
 
853
                                        i += gpo*sipb;
 
854
                                }else{
 
855
                                        newp[5] += sipb;//1;
 
856
                                        i = gpo*sipb;
 
857
                                }
 
858
                                
 
859
                                for (j = 11; j < 16;j++){
 
860
                                        newp[j] -=i;
 
861
                                }
 
862
                        }
 
863
                        }
 
864
                        
 
865
                }
 
866
                newp += 22;
 
867
                c++;
 
868
        }
 
869
        for (i = 22; i--;){
 
870
                newp[i] =  profa[i] + profb[i];
 
871
        }       
 
872
        newp -= (path[0]+1) *22; 
 
873
        return newp;
 
874
}
 
875
 
 
876
 
 
877
void dna_set_gap_penalties(float* prof,int len,int nsip)
 
878
{
 
879
        int i;
 
880
        
 
881
        prof +=  (22 *(len+1));
 
882
        prof[8] = prof[16]*nsip;//gap open or close
 
883
        prof[9] = prof[17]*nsip;//gap extention
 
884
        
 
885
        prof[10] = prof[18]*nsip;//gap open or close
 
886
        //prof[30] = prof[58]*nsip;//gap extention
 
887
        
 
888
        
 
889
        i = len+1;
 
890
        while(i--){
 
891
                prof -= 22;
 
892
                prof[8] = prof[16]*nsip;//gap open or close
 
893
                prof[9] = prof[17]*nsip;//gap extention
 
894
                
 
895
                prof[10] = prof[18]*nsip;//gap open or close
 
896
        //      prof[30] = prof[58]*nsip;//gap extention        
 
897
        }
 
898
}
 
899
 
 
900
void set_gap_penalties(float* prof,int len,int nsip)
 
901
{
 
902
        int i;
 
903
        
 
904
        prof +=  (64 *(len+1));
 
905
        prof[27] = prof[55]*nsip;//gap open or close
 
906
        prof[28] = prof[56]*nsip;//gap extention
 
907
                
 
908
        prof[29] = prof[57]*nsip;//gap open or close
 
909
        i = len+1;
 
910
        while(i--){
 
911
                prof -= 64;
 
912
                prof[27] = prof[55]*nsip;//gap open or close
 
913
                prof[28] = prof[56]*nsip;//gap extention
 
914
                
 
915
                prof[29] = prof[57]*nsip;//gap open or close
 
916
        }
 
917
}
 
918