1
// =============================================================== //
3
// File : BI_helix.cxx //
6
// Institute of Microbiology (Technical University Munich) //
7
// http://www.arb-home.de/ //
9
// =============================================================== //
11
#include "BI_helix.hxx"
9
#include "BI_helix.hxx"
11
17
#define LEFT_HELIX "{[<("
12
18
#define RIGHT_HELIX "}]>)"
25
void BI_helix::_init(void)
31
void BI_helix::_init()
28
for (i=0;i<HELIX_MAX; i++) pairs[i] = 0;
29
for (i=0;i<HELIX_MAX; i++) char_bind[i] = 0;
34
for (i=0; i<HELIX_MAX; i++) pairs[i] = 0;
35
for (i=0; i<HELIX_MAX; i++) char_bind[i] = 0;
34
40
pairs[HELIX_NONE]=strdup("");
35
41
char_bind[HELIX_NONE] = strdup(" ");
37
pairs[HELIX_STRONG_PAIR]=strdup("CG");
38
char_bind[HELIX_STRONG_PAIR] = strdup("-");
43
pairs[HELIX_STRONG_PAIR]=strdup("CG AT AU");
44
char_bind[HELIX_STRONG_PAIR] = strdup("~");
40
pairs[HELIX_PAIR]=strdup("AU GU");
46
pairs[HELIX_PAIR]=strdup("GT GU");
41
47
char_bind[HELIX_PAIR] = strdup("-");
43
pairs[HELIX_WEAK_PAIR]=strdup("GA GT");
44
char_bind[HELIX_WEAK_PAIR] = strdup(".");
49
pairs[HELIX_WEAK_PAIR]=strdup("GA");
50
char_bind[HELIX_WEAK_PAIR] = strdup("=");
46
pairs[HELIX_NO_PAIR]=strdup("AA AC CC CT CU TU");
52
pairs[HELIX_NO_PAIR]=strdup("AA AC CC CT CU GG TU");
47
53
char_bind[HELIX_NO_PAIR] = strdup("#");
49
55
pairs[HELIX_USER0]=strdup(".A .C .G .T .U");
50
char_bind[HELIX_USER0] = strdup("?");
56
char_bind[HELIX_USER0] = strdup("*");
52
58
pairs[HELIX_USER1]=strdup("-A -C -G -T -U");
53
59
char_bind[HELIX_USER1] = strdup("#");
55
pairs[HELIX_USER2]=strdup(".. --");
56
char_bind[HELIX_USER2] = strdup(" ");
61
pairs[HELIX_USER2]=strdup("UU TT");
62
char_bind[HELIX_USER2] = strdup("+");
58
pairs[HELIX_USER3]=strdup(".-");
59
char_bind[HELIX_USER3] = strdup(" ");
64
pairs[HELIX_USER3]=strdup("");
65
char_bind[HELIX_USER3] = strdup("");
61
67
pairs[HELIX_DEFAULT]=strdup("");
62
char_bind[HELIX_DEFAULT] = strdup("?");
68
char_bind[HELIX_DEFAULT] = strdup("");
64
for (i=HELIX_NON_STANDARD0;i<=HELIX_NON_STANDARD9;i++){
70
for (i=HELIX_NON_STANDARD0; i<=HELIX_NON_STANDARD9; i++) {
65
71
pairs[i] = strdup("");
66
72
char_bind[i] = strdup("");
70
76
char_bind[HELIX_NO_MATCH] = strdup("|");
73
BI_helix::BI_helix(void) {
79
BI_helix::BI_helix() {
77
BI_helix::~BI_helix(void){
83
BI_helix::~BI_helix() {
79
for (i=0;i<HELIX_MAX; i++) free(pairs[i]);
80
for (i=0;i<HELIX_MAX; i++) free(char_bind[i]);
85
for (i=0; i<HELIX_MAX; i++) free(pairs[i]);
86
for (i=0; i<HELIX_MAX; i++) free(char_bind[i]);
83
89
for (i = 0; i<Size; ++i) {
92
long BI_helix_check_error(const char *key, long val, void *) {
98
static long BI_helix_check_error(const char *key, long val, void *) {
93
99
struct helix_stack *stack = (struct helix_stack *)val;
94
100
if (!BI_helix::get_error() && stack) { // don't overwrite existing error
95
101
BI_helix::set_error(GBS_global_string("Too many '%c' in Helix '%s' pos %li", stack->c, key, stack->pos));
101
long BI_helix_free_hash(const char *, long val, void *) {
107
static long BI_helix_free_hash(const char *, long val, void *) {
102
108
struct helix_stack *stack = (struct helix_stack *)val;
103
109
struct helix_stack *next;
104
for ( ; stack; stack = next) {
110
for (; stack; stack = next) {
105
111
next = stack->next;
119
125
GB_HASH *hash = GBS_create_hash(256, GB_IGNORE_CASE);
124
struct helix_stack *laststack = 0,*stack;
131
struct helix_stack *laststack = 0, *stack;
133
140
char *h = (char *)malloc(Size+1);
136
if (len<Size) memset(h+len,'.', Size-len);
143
if (len<Size) memset(h+len, '.', Size-len);
137
144
memcpy(h, helix_in, len);
146
153
char *h = (char *)malloc((int)Size+1);
149
if (len<Size) memset(h+len,'.',(int)(Size-len));
156
if (len<Size) memset(h+len, '.', (int)(Size-len));
150
157
memcpy(h, helix_nr_in, len);
155
162
long pos_scanned_till = -1;
157
entries = (struct BI_helix_entry *)GB_calloc(sizeof(struct BI_helix_entry),(size_t)Size);
164
entries = (BI_helix_entry *)GB_calloc(sizeof(BI_helix_entry), (size_t)Size);
160
for (pos = 0; pos < Size; pos ++ ) {
167
for (pos = 0; pos < Size; pos ++) {
162
169
if (long(pos)>pos_scanned_till && isalnum(helix_nr[pos])) {
163
170
for (int j=0; (pos+j)<Size; j++) {
177
if (strchr(LEFT_HELIX,c) || strchr(LEFT_NONS,c) ){ // push
178
laststack = (struct helix_stack *)GBS_read_hash(hash,ident);
184
if (strchr(LEFT_HELIX, c) || strchr(LEFT_NONS, c)) { // push
185
laststack = (struct helix_stack *)GBS_read_hash(hash, ident);
179
186
stack = new helix_stack;
180
187
stack->next = laststack;
181
188
stack->pos = pos;
183
GBS_write_hash(hash,ident,(long)stack);
190
GBS_write_hash(hash, ident, (long)stack);
185
else if (strchr(RIGHT_HELIX,c) || strchr(RIGHT_NONS,c) ){ // pop
186
stack = (struct helix_stack *)GBS_read_hash(hash,ident);
192
else if (strchr(RIGHT_HELIX, c) || strchr(RIGHT_NONS, c)) { // pop
193
stack = (struct helix_stack *)GBS_read_hash(hash, ident);
188
195
bi_assert(!helix_error); // already have an error
189
196
helix_error = GBS_global_string_copy("Too many '%c' in Helix '%s' pos %zu", c, ident, pos);
192
if (strchr(RIGHT_HELIX,c)) {
199
if (strchr(RIGHT_HELIX, c)) {
193
200
entries[pos].pair_type = HELIX_PAIR;
194
201
entries[stack->pos].pair_type = HELIX_PAIR;
197
205
if (stack->c != c) {
198
206
bi_assert(!helix_error); // already have an error
203
211
if (isalpha(c)) {
204
212
entries[pos].pair_type = (BI_PAIR_TYPE)(HELIX_NON_STANDARD0+c-'a');
205
213
entries[stack->pos].pair_type = (BI_PAIR_TYPE)(HELIX_NON_STANDARD0+c-'a');
207
216
entries[pos].pair_type = HELIX_NO_PAIR;
208
217
entries[stack->pos].pair_type = HELIX_NO_PAIR;
211
220
entries[pos].pair_pos = stack->pos;
212
221
entries[stack->pos].pair_pos = pos;
213
GBS_write_hash(hash,ident,(long)stack->next);
222
GBS_write_hash(hash, ident, (long)stack->next);
215
if (sident == 0 || strcmp(sident+1,ident) != 0) {
224
if (sident == 0 || strcmp(sident+1, ident) != 0) {
216
225
sident = (char*)malloc(strlen(ident)+2);
217
sprintf(sident,"-%s",ident);
226
sprintf(sident, "-%s", ident);
219
228
entries[stack->pos].allocated = true;
221
230
entries[pos].helix_nr = sident+1;
242
251
const char *BI_helix::init(GBDATA *gb_helix_nr, GBDATA *gb_helix, size_t sizei) {
245
254
if (!gb_helix) set_error("Can't find SAI:HELIX");
246
255
else if (!gb_helix_nr) set_error("Can't find SAI:HELIX_NR");
248
257
GB_transaction ta(gb_helix);
249
258
initFromData(GB_read_char_pntr(gb_helix_nr), GB_read_char_pntr(gb_helix), sizei);
252
261
return get_error();
260
269
GBDATA *gb_sai_data = GBT_get_SAI_data(gb_main);
261
long size2 = GBT_get_alignment_len(gb_main,alignment_name);
270
long size2 = GBT_get_alignment_len(gb_main, alignment_name);
263
272
if (size2<=0) set_error(GB_await_error());
267
276
GBDATA *gb_helix = 0;
268
277
GBDATA *gb_helix_nr = 0;
270
if (gb_helix_nr_con) gb_helix_nr = GBT_read_sequence(gb_helix_nr_con,alignment_name);
271
if (gb_helix_con) gb_helix = GBT_read_sequence(gb_helix_con,alignment_name);
279
if (gb_helix_nr_con) gb_helix_nr = GBT_read_sequence(gb_helix_nr_con, alignment_name);
280
if (gb_helix_con) gb_helix = GBT_read_sequence(gb_helix_con, alignment_name);
273
282
init(gb_helix_nr, gb_helix, size2);
294
303
const char *BI_helix::init(GBDATA *gb_main) {
295
304
GB_transaction ta(gb_main);
297
306
char *alignment_name = GBT_get_default_alignment(gb_main);
298
307
const char *err = init(gb_main, alignment_name);
305
314
int len = strlen(pairs[pair_type])-1;
306
315
char *pai = pairs[pair_type];
308
for (int i=0; i<len;i+=3){
317
for (int i=0; i<len; i+=3) {
309
318
if ((pai[i] == left && pai[i+1] == right) ||
310
319
(pai[i] == right && pai[i+1] == left)) return true;
322
331
left = toupper(left);
323
332
right = toupper(right);
326
if (is_pairtype(left,right,HELIX_STRONG_PAIR) ||
327
is_pairtype(left,right,HELIX_PAIR)) return 2;
328
if (is_pairtype(left,right,HELIX_WEAK_PAIR) ) return 1;
335
if (is_pairtype(left, right, HELIX_STRONG_PAIR) ||
336
is_pairtype(left, right, HELIX_PAIR)) return 2;
337
if (is_pairtype(left, right, HELIX_WEAK_PAIR)) return 1;
331
340
case HELIX_NO_PAIR:
332
if (is_pairtype(left,right,HELIX_STRONG_PAIR) ||
333
is_pairtype(left,right,HELIX_PAIR) ) return 0;
341
if (is_pairtype(left, right, HELIX_STRONG_PAIR) ||
342
is_pairtype(left, right, HELIX_PAIR)) return 0;
337
return is_pairtype(left,right,pair_type) ? 1 : 0;
346
return is_pairtype(left, right, pair_type) ? 1 : 0;
391
/***************************************************************************************
392
******* Reference to abs pos ********
393
****************************************************************************************/
394
void BI_ecoli_ref::bi_exit(void){
401
BI_ecoli_ref::BI_ecoli_ref(void) {
402
memset((char *)this,0,sizeof(BI_ecoli_ref));
405
BI_ecoli_ref::~BI_ecoli_ref(void){
409
inline bool isGap(char c) { return c == '-' || c == '.'; }
411
const char *BI_ecoli_ref::init(const char *seq, size_t size) {
414
abs2rel = new size_t[size];
415
rel2abs = new size_t[size];
416
memset((char *)rel2abs,0,(size_t)(sizeof(*rel2abs)*size));
421
size_t sl = strlen(seq);
422
for (i=0; i<size; i++) {
425
if (i<sl && !isGap(seq[i])) ++relLen;
430
const char *BI_ecoli_ref::init(GBDATA *gb_main,char *alignment_name, char *ref_name) {
431
GB_transaction ta(gb_main);
434
long size = GBT_get_alignment_len(gb_main,alignment_name);
436
if (size<=0) err = GB_await_error();
438
GBDATA *gb_ref_con = GBT_find_SAI(gb_main, ref_name);
439
if (!gb_ref_con) err = GBS_global_string("I cannot find the SAI '%s'",ref_name);
441
GBDATA *gb_ref = GBT_read_sequence(gb_ref_con,alignment_name);
442
if (!gb_ref) err = GBS_global_string("Your SAI '%s' has no sequence '%s/data'", ref_name, alignment_name);
444
err = init(GB_read_char_pntr(gb_ref), size);
451
const char *BI_ecoli_ref::init(GBDATA *gb_main) {
452
GB_transaction ta(gb_main);
454
char *ref = GBT_get_default_ref(gb_main);
455
char *use = GBT_get_default_alignment(gb_main);
456
GB_ERROR err = init(gb_main,use,ref);