1
/* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
3
/*********************************************************************
4
* Clustal Omega - Multiple sequence alignment
6
* Copyright (C) 2010 University College Dublin
8
* Clustal-Omega is free software; you can redistribute it and/or
9
* modify it under the terms of the GNU General Public License as
10
* published by the Free Software Foundation; either version 2 of the
11
* License, or (at your option) any later version.
13
* This file is part of Clustal-Omega.
15
********************************************************************/
18
* RCS $Id: hhhalfalignment-C.h 227 2011-03-28 17:03:09Z fabian $
25
#include <iostream> // cin, cout, cerr
26
#include <fstream> // ofstream, ifstream
27
#include <stdio.h> // printf
28
#include <stdlib.h> // exit
29
#include <string> // strcmp, strstr
30
#include <math.h> // sqrt, pow
31
#include <limits.h> // INT_MIN
32
#include <float.h> // FLT_MIN
33
#include <time.h> // clock
34
#include <ctype.h> // islower, isdigit etc
41
#include "util-C.h" // imax, fmax, iround, iceil, ifloor, strint, strscn, strcut, substr, uprstr, uprchr, Basename etc.
42
#include "list.h" // list data structure
43
#include "hash.h" // hash data structure
44
#include "hhdecl-C.h" // constants, class
45
#include "hhutil-C.h" // imax, fmax, iround, iceil, ifloor, strint, strscn, strcut, substr, uprstr, uprchr, Basename etc.
46
#include "hhhmm.h" // class HMM
47
#include "hhalignment.h" // class Alignment
51
/////////////////////////////////////////////////////////////////////////////
52
/////////////////////////////////////////////////////////////////////////////
53
// Methods of class HalfAlignment
54
/////////////////////////////////////////////////////////////////////////////
55
/////////////////////////////////////////////////////////////////////////////
59
/////////////////////////////////////////////////////////////////////////////
61
HalfAlignment::HalfAlignment(int maxseqdis)
65
nss_dssp = nss_pred = nss_conf = nsa_dssp = ncons= -1;
66
h = new(int[maxseqdis]); //h[k] = next position of sequence k to be written
67
s = new(char*[maxseqdis]); //s[k][h] = character in column h, sequence k of output alignment
68
l = new(int*[maxseqdis]); //counts non-gap residues: l[k][i] = index of last residue AT OR BEFORE match state i in seq k
69
m = new(int*[maxseqdis]); //counts positions: m[k][i] = position of match state i in string seq[k]
72
/////////////////////////////////////////////////////////////////////////////////////
74
HalfAlignment::~HalfAlignment()
85
/////////////////////////////////////////////////////////////////////////////////////
87
* @brief Free memory in HalfAlignment arrays s[][], l[][], and m[][]
90
HalfAlignment::Unset()
92
// Free memory for alignment characters and residue counts
93
for (int k=0; k<n; k++)
95
delete[] s[k]; s[k] = NULL;
96
delete[] l[k]; l[k] = NULL;
97
delete[] m[k]; m[k] = NULL;
101
nss_dssp = nss_pred = nss_conf = nsa_dssp = ncons= -1;
105
//////////////////////////////////////////////////////////////////////////////
107
* @brief Prepare a2m/a3m alignment:
108
* Calculate l[k][i] (residue indices) and m[k][i] (position in seq[k])
111
HalfAlignment::Set(char* name, char** seq_in, char** sname_in, int n_in, int L_in, int n1, int n2, int n3, int n4, int nc, int L_in2/*<--FS*/)
113
int i; /* counts match states in seq[k] */
114
int ll; /* counts residues LEFT from or at current position in seq[k] */
115
int mm; /* counts postions in string seq[k] */
116
int k; /* counts sequences */
120
nss_dssp=n1; nss_pred=n2; nss_conf=n3; nsa_dssp=n4; ncons=nc;
121
seq=seq_in; /* flat copy of sequences */
122
sname=sname_in; /* flat copy of sequence names */
127
/* Allocate memory for alignment characters and residue counts */
128
for (k=0; k<n; k++) {
129
s[k]=new char[LINELEN];
130
l[k]=new int[L+10+L_in2/*<--FS*/];
131
m[k]=new int[L+10+L_in2/*<--FS*/];
132
if (!s[k] || !l[k] || !m[k]) MemoryError("space for formatting HMM-HMM alignment");
133
h[k]=0; //starting positions in alignment = 0
134
} /* k <= 0 < n (= n_in) */
136
for (k=0; k<n; k++) {
137
m[k][0]=0; // 0'th match state (virtual) is begin state at 0
138
//if k is consensus sequence
140
for (i=1; i<=L; i++) m[k][i]=l[k][i]=i;
141
m[k][L+1]=l[k][L+1]=L;
145
while ((c=seq[k][mm]))
147
if (MatchChr(c)==c) //count match/delete states
153
if (WordChr(c)) ll++; //index of next residue
156
l[k][i]=ll-1; //set l[k][L+1] eq number of residues in seq k (-1 since there is no residue at L+1st match state)
157
m[k][i]=mm; //set m[k][L+1]
159
if ((i-1)!=L && !warned)
161
cerr<<"Warning: sequence "<<sname[k]<<" in HMM "<<name<<" has "<<i<<" match states but should have "<<L<<"\n";
164
} /* k <= 0 < n (= n_in) */
168
printf(" i chr m l\n");
169
for(i=0;i<=L+1;i++) printf("%3i %1c %3i %3i\n",i,seq[0][m[0][i]],m[0][i],l[0][i]);
173
} /*** end HalfAlignment::Set() ***/
176
/////////////////////////////////////////////////////////////////////////////////////
178
* @brief Fill in insert states following match state i (without inserting '.' to fill up)
181
HalfAlignment::AddInserts(int i)
183
for (int k=0; k<n; k++) // for all sequences...
184
for (int mm=m[k][i]+1; mm<m[k][i+1]; mm++) // for all inserts between match state i and i+1...
185
s[k][h[k]++]=seq[k][mm]; // fill inserts into output alignment s[k]
188
/////////////////////////////////////////////////////////////////////////////////////
190
* @brief Fill up alignment with gaps '.' to generate flush end (all h[k] equal)
193
HalfAlignment::FillUpGaps()
195
int k; //counts sequences
198
// Determine max position h[k]
199
for (k=0; k<n; k++) pos = imax(h[k],pos);
201
// Fill in gaps up to pos
204
for (int hh=h[k]; hh<pos; hh++) s[k][hh]='.';
209
/////////////////////////////////////////////////////////////////////////////////////
211
* @brief Fill in insert states following match state i and fill up gaps with '.'
214
HalfAlignment::AddInsertsAndFillUpGaps(int i)
220
/////////////////////////////////////////////////////////////////////////////////////
222
* @brief Add gap column '.'
225
HalfAlignment::AddChar(char c)
227
for (int k=0; k<n; k++) s[k][h[k]++]=c;
231
/////////////////////////////////////////////////////////////////////////////////////
233
* @brief Add match state column i as is
236
HalfAlignment::AddColumn(int i)
238
for (int k=0; k<n; k++) s[k][h[k]++]=seq[k][m[k][i]];
242
/////////////////////////////////////////////////////////////////////////////////////
244
* @brief Add match state column i as insert state
247
HalfAlignment::AddColumnAsInsert(int i)
250
for (int k=0; k<n; k++)
251
if ((c=seq[k][m[k][i]])!='-' && (c<'0' || c>'9'))
252
s[k][h[k]++]=InsertChr(c);
256
/////////////////////////////////////////////////////////////////////////////////////
258
* @brief Remove all characters c from template sequences
261
HalfAlignment::RemoveChars(char c)
266
for (h=hh=0; h<pos; h++)
267
if (s[k][h]!=c) s[k][hh++]=s[k][h];
272
/////////////////////////////////////////////////////////////////////////////////////
274
* @brief Transform alignment sequences from A3M to A2M (insert ".")
277
HalfAlignment::BuildFASTA()
281
for (int i=1; i<=L; i++)
290
/////////////////////////////////////////////////////////////////////////////////////
292
* @brief Transform alignment sequences from A3M to A2M (insert ".")
295
HalfAlignment::BuildA2M()
299
for (int i=1; i<=L; i++)
308
/////////////////////////////////////////////////////////////////////////////////////
310
* @brief Transform alignment sequences from A3M to A2M (insert ".")
313
HalfAlignment::BuildA3M()
316
for (int i=1; i<=L; i++)
324
/////////////////////////////////////////////////////////////////////////////////////
326
* @brief Transform alignment sequences from A2M to FASTA ( lowercase to uppercase and '.' to '-')
329
HalfAlignment::ToFASTA()
331
for (int k=0; k<n; k++)
338
/////////////////////////////////////////////////////////////////////////////////////
340
* @brief Align query (HalfAlignment) to template (i.e. hit) match state structure
343
HalfAlignment::AlignToTemplate(Hit& hit)
346
int step; // column of the HMM-HMM alignment (first:nstep, last:1)
349
if(0) { //par.loc==0) { //////////////////////////////////////////// STILL NEEDED??
350
// If in global mode: Add part of alignment before first MM state
351
AddInserts(0); // Fill in insert states before first match state
352
for (i=1; i<hit.i[hit.nsteps]; i++)
354
AddColumnAsInsert(i);
356
if (par.outformat<=2) FillUpGaps();
360
// Add endgaps (First state must be an MM state!!)
361
for (j=1; j<hit.j[hit.nsteps]; j++)
366
// Add alignment between first and last MM state
367
for (step=hit.nsteps; step>=1; step--)
369
state = hit.states[step];
374
case MM: //MM pair state (both query and template in Match state)
380
AddColumnAsInsert(i);
388
if (par.outformat<=2) FillUpGaps();
392
if(0) { //par.loc==0) { //////////////////////////////////////////// STILL NEEDED??
394
// If in global mode: Add part of alignment after last MM state
395
for (i=hit.i[1]+1; i<=L; i++)
397
AddColumnAsInsert(i);
399
if (par.outformat==2) FillUpGaps();
404
for (j=hit.j[1]+1; j<=hit.L; j++)
409
// Add end-of-string character
414
/////////////////////////////////////////////////////////////////////////////////////
416
* @brief Write the a2m/a3m alignment into alnfile
419
HalfAlignment::Print(char* alnfile)
421
int k; //counts sequences
422
int omitted=0; // counts number of sequences with no residues in match states
424
if (strcmp(alnfile,"stdout"))
426
if (par.append) outf=fopen(alnfile,"a"); else outf=fopen(alnfile,"w");
427
if (!outf) OpenFileError(alnfile);
431
if (v>=3) cout<<"Writing alignment to "<<alnfile<<"\n";
435
// Print sequence only if it contains at least one residue in a match state
436
if (1) //strpbrk(s[k],"ABCDEFGHIKLMNPQRSTUVWXYZ1234567890"))
438
fprintf(outf,">%s\n",sname[k]);
439
fprintf(outf,"%s\n",s[k]);
442
if (v>=3) printf("%-14.14s contains no residue in match state. Omitting sequence\n",sname[k]);
445
if (v>=2 && omitted) printf("Omitted %i sequences in %s which contained no residue in match state\n",omitted,alnfile);
450
/** EOF hhhalfalignment-C.h **/