2
kalign2_alignment_types.c
4
Released under GPL - see the 'COPYING' file
6
Copyright (C) 2006 Timo Lassmann <timolassmann@gmail.com>
8
This program is free software; you can redistribute it and/or modify
9
it under the terms of the GNU General Public License as published by
10
the Free Software Foundation; either version 2 of the License, or
13
This program is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
GNU General Public License for more details.
18
You should have received a copy of the GNU General Public License
19
along with this program; if not, write to the Free Software
20
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22
Please send bug reports, comments etc. to:
23
timolassmann@gmail.com
28
int** default_alignment(struct alignment* aln,int* tree,float**submatrix, int** map)
30
struct dp_matrix *dp = 0;
38
profile = malloc(sizeof(float*)*numprofiles);
39
for ( i = 0;i< numprofiles;i++){
43
map = malloc(sizeof(int*)*numprofiles);
44
for ( i = 0;i < numprofiles;i++){
49
dp = dp_matrix_alloc(dp,511,511);
51
fprintf(stderr,"\nAlignment:\n");
54
for (i = 0; i < (numseq-1);i++){
58
fprintf(stderr,"\r%8.0f percent done",(float)(i) /(float)numseq * 100);
59
//fprintf(stderr,"Aligning:%d %d->%d %d %d\n",a,b,c,numseq,i);
62
dp = dp_matrix_realloc(dp,len_a,len_b);
64
map[c] = malloc(sizeof(int) * (len_a+len_b+2));
65
for (j = len_a+len_b+2;j--;){
69
profile[a] = make_profile(profile[a],aln->s[a],len_a,submatrix);
72
profile[b] = make_profile(profile[b],aln->s[b],len_b,submatrix);
74
profa = profile[a]+64;
75
profb = profile[b]+64;
77
set_gap_penalties(profile[a],len_a,aln->nsip[b]);
78
set_gap_penalties(profile[b],len_b,aln->nsip[a]);
79
if(aln->nsip[a] == 1){
80
if(aln->nsip[b] == 1){
81
map[c] = ss_dyn(submatrix,map[c],dp,aln->s[a],aln->s[b],len_a,len_b);
83
map[c] = ps_dyn(map[c],dp,profb,aln->s[a],len_b,len_a,aln->nsip[b]);
84
map[c] = mirror_path(map[c]);
87
if(aln->nsip[b] == 1){
88
map[c] = ps_dyn(map[c],dp,profa,aln->s[b],len_a,len_b,aln->nsip[a]);
91
map[c] = pp_dyn(map[c],dp,profa,profb,len_a,len_b);
93
map[c] = pp_dyn(map[c],dp,profb,profa,len_b,len_a);
94
map[c] = mirror_path(map[c]);
99
profile[c] = malloc(sizeof(float)*64*(len_a+len_b+2));
101
profile[c] = update(profile[a],profile[b],profile[c],map[c],aln->nsip[a],aln->nsip[b]);
104
aln->sl[c] = map[c][0];
106
aln->nsip[c] = aln->nsip[a] + aln->nsip[b];
107
aln->sip[c] = malloc(sizeof(int)*(aln->nsip[a] + aln->nsip[b]));
109
for (j = aln->nsip[a];j--;){
110
aln->sip[c][g] = aln->sip[a][j];
113
for (j = aln->nsip[b];j--;){
114
aln->sip[c][g] = aln->sip[b][j];
120
fprintf(stderr,"\r%8.0f percent done\n",100.0);
121
free(profile[numprofiles-1]);
133
int** aa_alignment(struct alignment* aln,int* tree,int**submatrix, int** map,int mmbonus)
135
struct dp_matrix *dp = 0;
145
profile = malloc(sizeof(int*)*numprofiles);
146
for ( i = 0;i< numprofiles;i++){
150
map = malloc(sizeof(int*)*numprofiles);
151
for ( i = 0;i < numprofiles;i++){
156
dp = dp_matrix_alloc(dp,511,511);
158
for (i = 0; i < (numseq-1);i++){
162
fprintf(stderr,"Aligning:%d %d->%d\n",a,b,c);
165
dp = dp_matrix_realloc(dp,len_a,len_b);
167
map[c] = malloc(sizeof(int) * (len_a+len_b+2));
168
for (j = len_a+len_b+2;j--;){
172
profile[a] = make_profile(profile[a],aln->s[a],len_a,submatrix);
175
profile[b] = make_profile(profile[b],aln->s[b],len_b,submatrix);
180
set_gap_penalties(profa,len_a,aln->nsip[b]);
181
set_gap_penalties(profb,len_b,aln->nsip[a]);
183
pbonus = mmbonus * aln->nsip[a] * aln->nsip[b];
186
map[c] = aapp_dyn(map[c],dp,profa,profb,len_a,len_b,pbonus);
188
map[c] = aapp_dyn(map[c],dp,profb,profa,len_b,len_a,pbonus);
189
map[c] = mirror_path(map[c]);
192
profile[c] = malloc(sizeof(int)*64*(len_a+len_b+2));
193
profile[c] = update(profa,profb,profile[c],map[c],aln->nsip[a],aln->nsip[b]);
195
aln->sl[c] = map[c][0];
197
aln->nsip[c] = aln->nsip[a] + aln->nsip[b];
198
aln->sip[c] = malloc(sizeof(int)*(aln->nsip[a] + aln->nsip[b]));
200
for (j = aln->nsip[a];j--;){
201
aln->sip[c][g] = aln->sip[a][j];
204
for (j = aln->nsip[b];j--;){
205
aln->sip[c][g] = aln->sip[b][j];
212
free(profile[numprofiles-1]);
224
int** alter_gaps_alignment(struct alignment* aln,int* tree,int**submatrix, int** map,int n,float range,int weight)
226
struct dp_matrix *dp = 0;
253
per =(float) range*2/(n+1);
255
gpo_step = (float)gpo * per;
256
gpe_step = (float)gpe * per;
257
tgpe_step = (float)tgpe * per;
260
profile = malloc(sizeof(int*)*numprofiles);
261
for ( i = 0;i< numprofiles;i++){
265
map = malloc(sizeof(int*)*numprofiles);
266
for ( i = 0;i < numprofiles;i++){
270
dp = dp_matrix_alloc(dp,511,511);
273
for (i = 0; i < (numseq-1);i++){
277
fprintf(stderr,"Aligning:%d %d->%d\n",a,b,c);
280
dp = dp_matrix_realloc(dp,len_a,len_b);
282
map[c] = malloc(sizeof(int) * (len_a+len_b+2));
283
for (j = len_a+len_b+2;j--;){
287
profile[a] = make_profile(profile[a],aln->s[a],len_a,submatrix);
290
profile[b] = make_profile(profile[b],aln->s[b],len_b,submatrix);
295
fprofa = malloc(sizeof(int)*(len_a+1)*2);
296
for (j = 0;j < (len_a+1)*2;j++){
299
fprofb = malloc(sizeof(int)*(len_b+1)*2);
300
for (j = 0;j < (len_b+1)*2;j++){
304
gpo = org_gpo - ((int)gpo_step* (n/2));
305
gpe = org_gpe - ((int)gpe_step* (n/2));
306
tgpe = org_tgpe - ((int)tgpe_step* (n/2));
308
for (j = 0; j < n;j++){
309
set_gap_penalties(profa,len_a,aln->nsip[b]);
310
set_gap_penalties(profb,len_b,aln->nsip[a]);
312
path = malloc(sizeof(int) * (len_a+len_b+2));
313
for (g = len_a+len_b+2;g--;){
317
if(aln->nsip[a] == 1){
318
if(aln->nsip[b] == 1){
319
path = ss_dyn(submatrix,path,dp,aln->s[a],aln->s[b],len_a,len_b);
321
path = ps_dyn(path,dp,profb,aln->s[a],len_b,len_a,aln->nsip[b]);
322
path = mirror_path(path);
325
if(aln->nsip[b] == 1){
326
path = ps_dyn(path,dp,profa,aln->s[b],len_a,len_b,aln->nsip[a]);
329
path = pp_dyn(path,dp,profa,profb,len_a,len_b);
331
path = pp_dyn(path,dp,profb,profa,len_b,len_a);
332
path = mirror_path(path);
336
fprintf(stderr,"Test alignment with gpo:%d gpe:%d tgpe:%d\n",gpo,gpe,tgpe);
339
add_feature_information_from_alignment(path,fprofa,fprofb,weight/n);
342
gpo += (int)gpo_step;
343
gpe += (int)gpe_step;
344
tgpe += (int)tgpe_step;
350
set_gap_penalties(profa,len_a,aln->nsip[b]);
351
set_gap_penalties(profb,len_b,aln->nsip[a]);
356
// map[c] = f_only_pp_dyn(map[c],dp,fprofa,fprofb,len_a,len_b,1,2);
357
map[c] = fpp_dyn(map[c],dp,profa,profb,fprofa,fprofb,len_a,len_b,1,2);
359
// map[c] = f_only_pp_dyn(map[c],dp,fprofb,fprofa,len_b,len_a,1,2);
360
map[c] = fpp_dyn(map[c],dp,profb,profa,fprofb,fprofa,len_b,len_a,1,2);
361
map[c] = mirror_path(map[c]);
364
profile[c] = malloc(sizeof(int)*64*(len_a+len_b+2));
365
profile[c] = update(profa,profb,profile[c],map[c],aln->nsip[a],aln->nsip[b]);
368
aln->sl[c] = map[c][0];
370
aln->nsip[c] = aln->nsip[a] + aln->nsip[b];
371
aln->sip[c] = malloc(sizeof(int)*(aln->nsip[a] + aln->nsip[b]));
373
for (j = aln->nsip[a];j--;){
374
aln->sip[c][g] = aln->sip[a][j];
377
for (j = aln->nsip[b];j--;){
378
aln->sip[c][g] = aln->sip[b][j];
388
free(profile[numprofiles-1]);
400
int** test_alignment(struct alignment* aln,int* tree,float **submatrix, int** map,float internal_gap_weight,int window,float strength)
402
struct dp_matrix *dp = 0;
410
profile = malloc(sizeof(float*)*numprofiles);
411
for ( i = 0;i< numprofiles;i++){
415
map = malloc(sizeof(int*)*numprofiles);
416
for ( i = 0;i < numprofiles;i++){
421
dp = dp_matrix_alloc(dp,511,511);
423
for (i = 0; i < (numseq-1);i++){
427
fprintf(stderr,"Aligning:%d %d->%d\n",a,b,c);
430
dp = dp_matrix_realloc(dp,len_a,len_b);
432
map[c] = malloc(sizeof(int) * (len_a+len_b+2));
433
for (j = len_a+len_b+2;j--;){
437
profile[a] = make_profile2(profile[a],aln->s[a],len_a,submatrix);
440
profile[b] = make_profile2(profile[b],aln->s[b],len_b,submatrix);
445
set_gap_penalties2(profa,len_a,aln->nsip[b],window,strength);
446
set_gap_penalties2(profb,len_b,aln->nsip[a],window,strength);
448
if(aln->nsip[a] == 1){
449
if(aln->nsip[b] == 1){
450
map[c] = ss_dyn2(submatrix,map[c],dp,aln->s[a],aln->s[b],len_a,len_b);
452
// map[c] = ps_dyn2(map[c],dp,profb,aln->s[a],len_b,len_a,aln->nsip[b]);
454
map[c] = pp_dyn2(map[c],dp,profb,profa,len_b,len_a);
455
map[c] = mirror_path(map[c]);
458
if(aln->nsip[b] == 1){
459
// map[c] = ps_dyn2(map[c],dp,profa,aln->s[b],len_a,len_b,aln->nsip[a]);
460
map[c] = pp_dyn2(map[c],dp,profa,profb,len_a,len_b);
463
map[c] = pp_dyn2(map[c],dp,profa,profb,len_a,len_b);
465
map[c] = pp_dyn2(map[c],dp,profb,profa,len_b,len_a);
466
map[c] = mirror_path(map[c]);
471
profile[c] = malloc(sizeof(float)*64*(len_a+len_b+2));
472
profile[c] = update2(profa,profb,profile[c],map[c],aln->nsip[a],aln->nsip[b],internal_gap_weight);
475
aln->sl[c] = map[c][0];
477
aln->nsip[c] = aln->nsip[a] + aln->nsip[b];
478
aln->sip[c] = malloc(sizeof(int)*(aln->nsip[a] + aln->nsip[b]));
480
for (j = aln->nsip[a];j--;){
481
aln->sip[c][g] = aln->sip[a][j];
484
for (j = aln->nsip[b];j--;){
485
aln->sip[c][g] = aln->sip[b][j];
492
free(profile[numprofiles-1]);
503
int** feature_alignment(struct alignment* aln,int* tree,int**submatrix, int** map,struct feature_matrix* fm)
505
struct dp_matrix *dp = 0;
517
profile = malloc(sizeof(int*)*numprofiles);
518
for ( i = 0;i< numprofiles;i++){
522
fprofile = malloc(sizeof(int*)*numprofiles);
523
for ( i = 0;i< numprofiles;i++){
527
map = malloc(sizeof(int*)*numprofiles);
528
for ( i = 0;i < numprofiles;i++){
532
dp = dp_matrix_alloc(dp,511,511);
535
for (i = 0; i < (numseq-1);i++){
539
fprintf(stderr,"Aligning:%d %d->%d\n",a,b,c);
542
dp = dp_matrix_realloc(dp,len_a,len_b);
544
map[c] = malloc(sizeof(int) * (len_a+len_b+2));
545
for (j = len_a+len_b+2;j--;){
549
profile[a] = make_profile(profile[a],aln->s[a],len_a,submatrix);
550
// fprintf(stderr,"Making feature profile for %d (%s)\n",a,aln->sn[a]);
551
fprofile[a] = make_feature_profile(fprofile[a],aln->ft[a],len_a,fm);
554
profile[b] = make_profile(profile[b],aln->s[b],len_b,submatrix);
555
// fprintf(stderr,"Making feature profile for %d (%s)\n",b,aln->sn[b]);
556
fprofile[b] = make_feature_profile(fprofile[b],aln->ft[b],len_b,fm);
558
//profa = profile[a];
559
//profb = profile[b];
560
profa = profile[a]+64;
561
profb = profile[b]+64;
563
fprofa = fprofile[a];
564
fprofb = fprofile[b];
566
set_gap_penalties(profile[a],len_a,aln->nsip[b]);
567
set_gap_penalties(profile[b],len_b,aln->nsip[a]);
570
map[c] = fpp_dyn(map[c],dp,profa,profb,fprofa,fprofb,len_a,len_b,fm->mdim,fm->stride);
572
map[c] = fpp_dyn(map[c],dp,profb,profa,fprofb,fprofa,len_b,len_a,fm->mdim,fm->stride);
573
map[c] = mirror_path(map[c]);
576
profile[c] = malloc(sizeof(int)*64*(len_a+len_b+2));
577
profile[c] = update(profile[a],profile[b],profile[c],map[c],aln->nsip[a],aln->nsip[b]);
579
fprofile[c] = malloc(sizeof(int)*fm->stride*(len_a+len_b+2));
580
fprofile[c] = feature_update(fprofa,fprofb,fprofile[c],map[c],fm->stride);
582
aln->sl[c] = map[c][0];
584
aln->nsip[c] = aln->nsip[a] + aln->nsip[b];
585
aln->sip[c] = malloc(sizeof(int)*(aln->nsip[a] + aln->nsip[b]));
587
for (j = aln->nsip[a];j--;){
588
aln->sip[c][g] = aln->sip[a][j];
591
for (j = aln->nsip[b];j--;){
592
aln->sip[c][g] = aln->sip[b][j];
603
free(profile[numprofiles-1]);
606
free(fprofile[numprofiles-1]);
614
free_feature_matrix(fm);
618
struct ntree_data* ntree_sub_alignment(struct ntree_data* ntree_data,int* tree,int num)
620
struct dp_matrix *dp = 0;
621
struct alignment* aln = 0;
625
float** local_profile = 0;
634
int* which_to_alloc = 0;
636
aln = ntree_data->aln;
638
which_to_alloc = malloc(sizeof(int*)*numprofiles);
639
for ( i = 0;i< numprofiles;i++){
640
which_to_alloc[i] = 0;
643
local_profile = malloc(sizeof(float*)*numprofiles);
644
local_sl = malloc(sizeof(int)*numprofiles);
645
local_nsip = malloc(sizeof(int)*numprofiles);
646
local_sip = malloc(sizeof(int*)*numprofiles);
649
for (i = 0; i < num-1;i++){
651
if(!which_to_alloc[a]){
652
which_to_alloc[a] = 1;
655
if(!which_to_alloc[b]){
656
which_to_alloc[b] = 1;
659
if(!which_to_alloc[c]){
660
which_to_alloc[c] = 2;
663
//for ( i = 0;i< numprofiles;i++){
664
// fprintf(stderr,"alloc?:%d %d\n",i,which_to_alloc[i]);
668
for ( i = 0;i< numprofiles;i++){
669
if(which_to_alloc[i] == 1){
670
local_profile[i] = ntree_data->profile[i];
671
local_sl[i] = aln->sl[i];
672
local_nsip[i] = aln->nsip[i];
673
local_sip[i] = malloc(sizeof(int*)*aln->nsip[i]);
674
for(j = 0;j < aln->nsip[i];j++){
675
local_sip[i][j] = aln->sip[i][j];
678
local_profile[i] = 0;
685
for ( i = 0;i< numprofiles;i++){
686
local_profile[i] = ntree_data->profile[i];
687
local_sl[i] = aln->sl[i];
688
local_nsip[i] = aln->nsip[i];
690
fprintf(stderr,"Allocing..:%d\n",aln->nsip[i]);
691
local_sip[i] = malloc(sizeof(int*)*aln->nsip[i]);
692
for(j = 0;j < aln->nsip[i];j++){
693
local_sip[i][j] = aln->sip[i][j];
700
local_map = malloc(sizeof(int*)*numprofiles);
701
for ( i = 0;i < numprofiles;i++){
706
dp = dp_matrix_alloc(dp,511,511);
708
for (i = 0; i < num-1;i++){
712
// fprintf(stderr,"Aligning:%d %d->%d\n",a,b,c);
715
dp = dp_matrix_realloc(dp,len_a,len_b);
717
local_map[c] = malloc(sizeof(int) * (len_a+len_b+2));
718
for (j = len_a+len_b+2;j--;){
722
local_profile[a] = make_profile(local_profile[a],aln->s[a],len_a,ntree_data->submatrix);
725
local_profile[b] = make_profile(local_profile[b],aln->s[b],len_b,ntree_data->submatrix);
727
profa = local_profile[a];
728
profb = local_profile[b];
730
set_gap_penalties(profa,len_a,local_nsip[b]);
731
set_gap_penalties(profb,len_b,local_nsip[a]);
733
if(local_nsip[a] == 1){
734
if(local_nsip[b] == 1){
735
local_map[c] = ss_dyn(ntree_data->submatrix,local_map[c],dp,aln->s[a],aln->s[b],len_a,len_b);
737
local_map[c] = ps_dyn(local_map[c],dp,profb,aln->s[a],len_b,len_a,local_nsip[b]);
738
local_map[c] = mirror_path(local_map[c]);
741
if(local_nsip[b] == 1){
742
local_map[c] = ps_dyn(local_map[c],dp,profa,aln->s[b],len_a,len_b,local_nsip[a]);
745
local_map[c] = pp_dyn(local_map[c],dp,profa,profb,len_a,len_b);
747
local_map[c] = pp_dyn(local_map[c],dp,profb,profa,len_b,len_a);
748
local_map[c] = mirror_path(local_map[c]);
753
local_profile[c] = malloc(sizeof(float)*64*(len_a+len_b+2));
754
local_profile[c] = update(profa,profb,local_profile[c],local_map[c],local_nsip[a],local_nsip[b]);
756
local_sl[c] = local_map[c][0];
758
local_nsip[c] = local_nsip[a] + local_nsip[b];
759
local_sip[c] = malloc(sizeof(int)*(local_nsip[a] + local_nsip[b]));
761
for (j = local_nsip[a];j--;){
762
local_sip[c][g] = local_sip[a][j];
765
for (j = local_nsip[b];j--;){
766
local_sip[c][g] = local_sip[b][j];
773
if(ntree_data->profile[c]){
774
if(ntree_data->map[c][ntree_data->map[c][0]+2] < local_map[c][local_map[c][0]+2]){
775
fprintf(stderr,"%d\n",local_map[c][local_map[c][0]+2]);
776
//remove old map,profile,etc..
777
for (i = 0; i < num-1;i++){
779
free(ntree_data->map[c]);
780
free(ntree_data->profile[c]);
782
ntree_data->map[c] = malloc(sizeof(int)*(local_map[c][0]+3));
783
for (j = 0; j < local_map[c][0]+3;j++){
784
ntree_data->map[c][j] = local_map[c][j];
786
aln->sip[c] = malloc(sizeof(int)*local_nsip[c]);
787
aln->nsip[c] = local_nsip[c];
788
for (j = 0; j < local_nsip[c];j++){
789
aln->sip[c][j] = local_sip[c][j];
791
aln->sl[c] = local_sl[c];
794
ntree_data->profile[c] = malloc(sizeof(int)*64*(aln->sl[c]+1));
795
for (i = 0; i < (64*(aln->sl[c]+1));i++){
796
ntree_data->profile[c][i] = local_profile[c][i];
798
ntree_data->tree[0] -= (tree[0]-1);
799
for (j = 1; j < tree[0];j++){
800
ntree_data->tree[ntree_data->tree[0]+j-1] = tree[j];
802
ntree_data->tree[0] += (tree[0]-1);
805
fprintf(stderr,"no improvement\n");
808
fprintf(stderr,"%d\n",local_map[c][local_map[c][0]+2]);
809
for (i = 0; i < num-1;i++){
811
ntree_data->map[c] = malloc(sizeof(int)*(local_map[c][0]+3));
812
for (j = 0; j < local_map[c][0]+3;j++){
813
ntree_data->map[c][j] = local_map[c][j];
816
aln->sip[c] = malloc(sizeof(int)*local_nsip[c]);
817
aln->nsip[c] = local_nsip[c];
818
for (j = 0; j < local_nsip[c];j++){
819
aln->sip[c][j] = local_sip[c][j];
821
aln->sl[c] = local_sl[c];
823
ntree_data->profile[c] = malloc(sizeof(int)*64*(aln->sl[c]+1));
824
for (i = 0; i < (64*(aln->sl[c]+1));i++){
825
ntree_data->profile[c][i] = local_profile[c][i];
827
for (j = 1; j < tree[0];j++){
828
ntree_data->tree[ntree_data->tree[0]+j-1] = tree[j];
830
ntree_data->tree[0] += tree[0]-1;
833
for ( i = 0;i< numprofiles;i++){
834
if(which_to_alloc[i] == 1){
837
free(local_profile[i]);
840
if(which_to_alloc[i] == 2){
841
free(local_profile[i]);
848
free(which_to_alloc);
859
struct ntree_data* ntree_alignment(struct ntree_data* ntree_data)
862
ntree_data->profile = malloc(sizeof(float*)*numprofiles);
863
for ( i = 0;i< numprofiles;i++){
864
ntree_data->profile[i] = 0;
867
ntree_data->map = malloc(sizeof(int*)*numprofiles);
868
for ( i = 0;i < numprofiles;i++){
869
ntree_data->map[i] = 0;
872
ntree_data = alignntree(ntree_data,ntree_data->realtree);
874
for ( i = 0;i< numprofiles;i++){
875
if(ntree_data->profile[i]){
876
free(ntree_data->profile[i]);
879
free(ntree_data->profile);
882
free(ntree_data->submatrix[i]);
884
free(ntree_data->submatrix);
885
free_real_tree(ntree_data->realtree);