~ubuntu-branches/ubuntu/precise/exonerate/precise

1 by Steffen Moeller
Import upstream version 1.4.0
1
/****************************************************************\
2
*                                                                *
3
*  C4 dynamic programming library - sub-alignment regions        *
4
*                                                                *
5
*  Guy St.C. Slater..   mailto:guy@ebi.ac.uk                     *
1.1.1 by Charles Plessy
Import upstream version 2.0.0
6
*  Copyright (C) 2000-2008.  All Rights Reserved.                *
1 by Steffen Moeller
Import upstream version 1.4.0
7
*                                                                *
8
*  This source code is distributed under the terms of the        *
1.1.1 by Charles Plessy
Import upstream version 2.0.0
9
*  GNU General Public License, version 3. See the file COPYING   *
10
*  or http://www.gnu.org/licenses/gpl.txt for details            *
1 by Steffen Moeller
Import upstream version 1.4.0
11
*                                                                *
12
*  If you use this code, please keep this notice intact.         *
13
*                                                                *
14
\****************************************************************/
15
16
#include "sar.h"
17
18
/**/
19
20
SAR_ArgumentSet *SAR_ArgumentSet_create(Argument *arg){
21
    register ArgumentSet *as;
22
    static SAR_ArgumentSet sas;
23
    if(arg){
24
        as = ArgumentSet_create("SAR Options");
25
        /**/
26
        ArgumentSet_add_option(as, '\0', "quality", "percent",
27
            "HSP quality threshold", "0",
28
            Argument_parse_float, &sas.hsp_quality);
29
        /**/
30
        Argument_absorb_ArgumentSet(arg, as);
31
        }
32
    return &sas;
33
    }
34
35
/**/
36
37
static gboolean SAR_region_valid_hsp_exit(Region *region, HSP *hsp){
38
#ifndef G_DISABLE_ASSERT
39
    register gint query_width = Region_query_end(region)
40
                              - hsp->query_start;
41
    register gint target_width = Region_target_end(region)
42
                               - hsp->target_start;
43
    register gint query_prefix = Region_query_end(region)
44
                               - hsp->query_start;
45
#endif /* G_DISABLE_ASSERT */
46
    /* Check region corner meets the HSP */
47
    g_assert(Region_is_valid(region));
48
    g_assert((query_width * HSP_target_advance(hsp))
49
          == (target_width * HSP_query_advance(hsp)));
50
    /* Check region corner is at HSP word boundary */
51
    g_assert(!(query_width % HSP_query_advance(hsp)));
52
    g_assert(!(target_width % HSP_target_advance(hsp)));
53
    /* Check region boundary is between cobs and HSP end */
54
    g_assert(query_prefix >= 0);
55
    g_assert(query_prefix <= HSP_query_cobs(hsp));
56
    return TRUE;
57
    }
58
59
static gboolean SAR_region_valid_hsp_entry(Region *region, HSP *hsp){
60
#ifndef G_DISABLE_ASSERT
61
    register gint query_width = HSP_query_end(hsp)
62
                              - region->query_start;
63
    register gint target_width = HSP_target_end(hsp)
64
                               - region->target_start;
65
    register gint query_suffix = HSP_query_end(hsp)
66
                               - region->query_start;
67
#endif /* G_DISABLE_ASSERT */
68
    g_assert(Region_is_valid(region));
69
    /* Check region corner meets the HSP */
70
    g_assert((query_width * HSP_target_advance(hsp))
71
          == (target_width * HSP_query_advance(hsp)));
72
    /* Check region corner is at HSP word boundary */
73
    g_assert(!(query_width % HSP_query_advance(hsp)));
74
    g_assert(!(target_width % HSP_target_advance(hsp)));
75
    /* Check region boundary is between HSP start and cobs */
76
    g_assert(query_suffix >= 0);
77
    g_assert(query_suffix <= (HSP_query_end(hsp)-HSP_query_cobs(hsp)));
78
    return TRUE;
79
    }
80
81
/**/
82
83
static gboolean SAR_Terminal_calculate_start_region(HSP *hsp,
84
            HPair *hpair, Heuristic_Range *range, C4_Scope scope,
85
            Region *region){
86
    register gint to_shrink;
87
    register gboolean at_query_edge = FALSE, at_target_edge = FALSE;
88
    Region outer_box;
89
    /* Find outer box (cobs -> corner) */
90
    outer_box.query_start = 0;
91
    outer_box.target_start = 0;
92
    outer_box.query_length = HSP_query_cobs(hsp);
93
    outer_box.target_length = HSP_target_cobs(hsp);
94
    /* Find inner_box (end point) */
95
    region->query_start = hsp->query_start;
96
    region->target_start = hsp->target_start;
97
    region->query_length = 0;
98
    region->target_length = 0;
99
    /* Grow inner_box by external range */
100
    region->query_start -= range->external_query;
101
    region->target_start -= range->external_target;
102
    region->query_length += range->external_query;
103
    region->target_length += range->external_target;
104
    /* Grow inner_box on HSPs by internal range */
105
    region->query_length += range->internal_query;
106
    region->target_length += range->internal_target;
107
    /* Trim inner_box to intersection (inner,outer) */
108
    if(region->query_start < outer_box.query_start){
109
        region->query_length -= (outer_box.query_start
110
                                 - region->query_start);
111
        region->query_start = outer_box.query_start;
112
        }
113
    if(region->target_start < outer_box.target_start){
114
        region->target_length -= (outer_box.target_start
115
                                  - region->target_start);
116
        region->target_start = outer_box.target_start;
117
        }
118
    to_shrink = Region_query_end(region)
119
              - Region_query_end(&outer_box);
120
    if(to_shrink > 0)
121
        region->query_length -= to_shrink;
122
    to_shrink = Region_target_end(region)
123
              - Region_target_end(&outer_box);
124
    if(to_shrink > 0)
125
        region->target_length -= to_shrink;
126
    /* Fail when region is empty */
127
    if(region->query_length <= 0)
128
        return FALSE;
129
    if(region->target_length <= 0)
130
        return FALSE;
131
    /* Check region is still on HSP boundary */
132
    g_assert(SAR_region_valid_hsp_exit(region, hsp));
133
    /* Check validity for non-local models */
134
    at_query_edge = (region->query_start == 0);
135
    at_target_edge = (region->target_start == 0);
136
    switch(scope){
137
        case C4_Scope_ANYWHERE:
138
            return TRUE;
139
            break;
140
        case C4_Scope_CORNER:
141
            if(at_query_edge && at_target_edge)
142
                return TRUE;
143
            break;
144
        case C4_Scope_EDGE:
145
            if(at_query_edge || at_target_edge)
146
                return TRUE;
147
            break;
148
        case C4_Scope_QUERY:
149
            if(at_query_edge)
150
                return TRUE;
151
            break;
152
        case C4_Scope_TARGET:
153
            if(at_target_edge)
154
                return TRUE;
155
            break;
156
        }
157
    return FALSE;
158
    }
159
160
static gboolean SAR_Terminal_calculate_end_region(HSP *hsp,
161
            HPair *hpair, Heuristic_Range *range, C4_Scope scope,
162
            Region *region){
163
    register gint to_shrink;
164
    register gboolean at_query_edge = FALSE, at_target_edge = FALSE;
165
    Region outer_box;
166
    /* Find outer box (cobs -> corner) */
167
    outer_box.query_start = HSP_query_cobs(hsp);
168
    outer_box.target_start = HSP_target_cobs(hsp);
169
    outer_box.query_length = hsp->hsp_set->query->len
170
                           - HSP_query_cobs(hsp);
171
    outer_box.target_length = hsp->hsp_set->target->len
172
                           - HSP_target_cobs(hsp);
173
    /**/
174
    /* Find inner_box (end point) */
175
    region->query_start = HSP_query_end(hsp);
176
    region->target_start = HSP_target_end(hsp);
177
    region->query_length = 0;
178
    region->target_length = 0;
179
    /* Grow inner_box by external range */
180
    region->query_length += range->external_query;
181
    region->target_length += range->external_target;
182
    /* Grow inner_box on HSPs by internal range */
183
    region->query_start -= range->internal_query;
184
    region->query_length += range->internal_query;
185
    region->target_start -= range->internal_target;
186
    region->target_length += range->internal_target;
187
    /* Trim inner_box to intersection (inner,outer) */
188
    if(Region_query_end(region) > Region_query_end(&outer_box)){
189
        region->query_length -= (Region_query_end(region)
190
                                - Region_query_end(&outer_box));
191
        }
192
    if(Region_target_end(region) > Region_target_end(&outer_box)){
193
        region->target_length -= (Region_target_end(region)
194
                                  - Region_target_end(&outer_box));
195
        }
196
    to_shrink = outer_box.query_start
197
              - region->query_start;
198
    if(to_shrink > 0){
199
        region->query_start += to_shrink;
200
        region->query_length -= to_shrink;
201
        }
202
    to_shrink = outer_box.target_start
203
              - region->target_start;
204
    if(to_shrink > 0){
205
        region->target_start += to_shrink;
206
        region->target_length -= to_shrink;
207
        }
208
    /* Fail when region is empty */
209
    if(region->query_length <= 0)
210
        return FALSE;
211
    if(region->target_length <= 0)
212
        return FALSE;
213
    /* Check region is still on HSP boundary */
214
    g_assert(SAR_region_valid_hsp_entry(region, hsp));
215
    /* Check validity for non-local models */
216
    at_query_edge = (Region_query_end(region)
217
                     == hsp->hsp_set->query->len);
218
    at_target_edge = (Region_target_end(region)
219
                     == hsp->hsp_set->target->len);
220
    switch(scope){
221
        case C4_Scope_ANYWHERE:
222
            return TRUE;
223
            break;
224
        case C4_Scope_CORNER:
225
            if(at_query_edge && at_target_edge)
226
                return TRUE;
227
            break;
228
        case C4_Scope_EDGE:
229
            if(at_query_edge || at_target_edge)
230
                return TRUE;
231
            break;
232
        case C4_Scope_QUERY:
233
            if(at_query_edge)
234
                return TRUE;
235
            break;
236
        case C4_Scope_TARGET:
237
            if(at_target_edge)
238
                return TRUE;
239
            break;
240
        }
241
    return FALSE;
242
    }
243
244
/**/
245
246
static C4_Score SAR_find_start_component(HPair *hpair,
247
                Region *region, HSP *hsp, C4_Calc *calc, gint *prefix){
248
    register C4_Score component = 0;
249
    register gint i, query_pos = hsp->query_start,
250
                     target_pos = hsp->target_start;
251
    register Region *calc_region;
252
    g_assert(prefix);
253
    (*prefix) = (Region_query_end(region) - hsp->query_start)
254
              / HSP_query_advance(hsp);
255
    g_assert((*prefix) >= 0);
256
    calc_region = Region_create(hsp->query_start, hsp->target_start,
257
                     (*prefix) * HSP_query_advance(hsp),
258
                     (*prefix) * HSP_target_advance(hsp));
259
    C4_Calc_init(calc, calc_region, hpair->user_data);
260
    for(i = 0; i < (*prefix); i++){
261
        component += C4_Calc_score(calc, query_pos, target_pos,
262
                                   hpair->user_data);
263
        query_pos += HSP_query_advance(hsp);
264
        target_pos += HSP_target_advance(hsp);
265
        }
266
    g_assert(component <= hsp->score);
267
    C4_Calc_exit(calc, calc_region, hpair->user_data);
268
    Region_destroy(calc_region);
269
    return component;
270
    }
271
272
static C4_Score SAR_find_end_component(HPair *hpair,
273
                Region *region, HSP *hsp, C4_Calc *calc, gint *suffix){
274
    register C4_Score component = 0;
275
    register gint i, query_pos, target_pos;
276
    register Region *calc_region;
277
    g_assert(suffix >= 0);
278
    g_assert(suffix);
279
    (*suffix) = (HSP_query_end(hsp) - region->query_start)
280
              / HSP_query_advance(hsp);
281
    calc_region = Region_create(region->query_start,
282
                                region->target_start,
283
                    (*suffix) * HSP_query_advance(hsp),
284
                    (*suffix) * HSP_target_advance(hsp));
285
    C4_Calc_init(calc, calc_region, hpair->user_data);
286
    query_pos = region->query_start;
287
    target_pos = region->target_start;
288
    for(i = 0; i < (*suffix); i++){
289
        component += C4_Calc_score(calc, query_pos, target_pos,
290
                                   hpair->user_data);
291
        query_pos += HSP_query_advance(hsp);
292
        target_pos += HSP_target_advance(hsp);
293
        }
294
    g_assert(component <= hsp->score);
295
    C4_Calc_exit(calc, calc_region, hpair->user_data);
296
    Region_destroy(calc_region);
297
    return component;
298
    }
299
300
/**/
301
302
static void SAR_HSP_quality(HPair *hpair, C4_Calc *calc,
303
                            HSP *hsp, C4_Score component,
304
                            gint start, gint length,
305
                            gint *half_score, gint *max_score){
306
    register gint i, query_pos, target_pos;
307
    g_assert(half_score);
308
    g_assert(max_score);
309
    query_pos = hsp->query_start + (start * HSP_query_advance(hsp));
310
    target_pos = hsp->target_start + (start * HSP_target_advance(hsp));
311
    (*half_score) = (*max_score) = 0;
312
    for(i = 0; i < length; i++){
313
        (*half_score) += HSP_get_score(hsp, query_pos, target_pos);
314
        (*max_score) += HSP_query_self(hsp, query_pos);
315
        query_pos += HSP_query_advance(hsp);
316
        target_pos += HSP_target_advance(hsp);
317
        }
318
    return;
319
    }
320
321
SAR_Terminal *SAR_Terminal_create(HSP *hsp, HPair *hpair,
322
                                  Heuristic_Match *match,
323
                                  gboolean is_start){
324
    register SAR_Terminal *sar_terminal;
325
    Region region;
326
    register C4_Score component;
327
    gint prefix, suffix;
328
    register SAR_ArgumentSet *sas = SAR_ArgumentSet_create(NULL);
329
    register gboolean is_valid = FALSE;
330
    gint half_score, max_score;
331
    register gint start, length;
332
    prefix = suffix = 0;
333
    if(is_start){
334
        is_valid = SAR_Terminal_calculate_start_region(hsp, hpair,
335
                       match->start_terminal->range,
336
                       hpair->heuristic->model->start_state->scope,
337
                       &region);
338
    } else { /* is end */
339
        is_valid = SAR_Terminal_calculate_end_region(hsp, hpair,
340
                       match->end_terminal->range,
341
                       hpair->heuristic->model->end_state->scope,
342
                       &region);
343
        }
344
    if(!is_valid)
345
        return NULL;
346
    if(is_start){
347
        component = SAR_find_start_component(hpair,
348
                                   &region, hsp, match->portal->calc,
349
                                   &prefix);
350
        start = prefix;
351
        length = hsp->cobs - prefix;
352
    } else {
353
        component = SAR_find_end_component(hpair,
354
                                   &region, hsp, match->portal->calc,
355
                                   &suffix);
356
        start = hsp->cobs;
357
        length = hsp->length - hsp->cobs - suffix;
358
        }
359
    if(length && (sas->hsp_quality > 0.0)){
360
        SAR_HSP_quality(hpair, match->portal->calc,
361
                        hsp, component, start, length,
362
                        &half_score, &max_score);
363
        if((((gdouble)half_score/(gdouble)max_score) * 100.0)
364
          < sas->hsp_quality)
365
            return NULL;
366
        }
367
    sar_terminal = g_new(SAR_Terminal, 1);
368
    sar_terminal->region = Region_copy(&region);
369
    sar_terminal->component = component;
370
    return sar_terminal;
371
    }
372
373
void SAR_Terminal_destroy(SAR_Terminal *sar_terminal){
374
    g_assert(sar_terminal);
375
    Region_destroy(sar_terminal->region);
376
    g_free(sar_terminal);
377
    return;
378
    }
379
380
C4_Score SAR_Terminal_find_bound(SAR_Terminal *sar_terminal,
381
                                 Heuristic_Bound *heuristic_bound){
382
    g_assert(sar_terminal->region->query_length >= 0);
383
    g_assert(sar_terminal->region->target_length >= 0);
384
    g_assert(sar_terminal->region->query_length
385
            <= heuristic_bound->query_range);
386
    g_assert(sar_terminal->region->target_length
387
            <= heuristic_bound->target_range);
388
    return heuristic_bound->matrix[sar_terminal->region->query_length]
389
                                  [sar_terminal->region->target_length]
390
           - sar_terminal->component;
391
    }
392
393
C4_Score SAR_Terminal_find_score(SAR_Terminal *sar_terminal,
394
                                 Optimal *optimal, HPair *hpair){
395
    return Optimal_find_score(optimal, sar_terminal->region,
396
                              hpair->user_data, hpair->subopt)
397
          - sar_terminal->component;
398
    }
399
400
/**/
401
402
static void SAR_reduce_mid_overlap(HPair *hpair, HSP *src, HSP *dst,
403
            Region *region, C4_Calc *src_calc, C4_Calc *dst_calc){
404
    register C4_Score src_total, dst_total, max_total;
405
    register gint src_query_pos, src_target_pos,
406
                  dst_query_pos, dst_target_pos;
407
    register gint max_src_query_pos, max_src_target_pos,
408
                  max_dst_query_pos, max_dst_target_pos,
409
                  max_dist_from_centre;
410
    /* Reverse up dst region, counting dst_total */
411
    if(!(region->query_length + region->target_length))
412
        return;
413
    src_total = dst_total = 0;
414
    dst_query_pos = Region_query_end(region)
415
                  - HSP_query_advance(dst);
416
    dst_target_pos = Region_target_end(region)
417
                   - HSP_target_advance(dst);
418
    while((dst_query_pos >= region->query_start)
419
       && (dst_target_pos >= region->target_start)
420
       && (dst_query_pos >= dst->query_start)
421
       && (dst_target_pos >= dst->target_start)){
422
        dst_total += C4_Calc_score(dst_calc,
423
                                   dst_query_pos, dst_target_pos,
424
                                   hpair->user_data);
425
        dst_query_pos -= HSP_query_advance(dst);
426
        dst_target_pos -= HSP_target_advance(dst);
427
        }
428
    dst_query_pos += HSP_query_advance(dst);
429
    dst_target_pos += HSP_target_advance(dst);
430
    /* Go down src region, storing max total */
431
    src_query_pos = region->query_start;
432
    src_target_pos = region->target_start;
433
    max_total = dst_total;
434
    max_src_query_pos = src_query_pos;
435
    max_src_target_pos = src_target_pos;
436
    max_dst_query_pos = dst_query_pos;
437
    max_dst_target_pos = dst_target_pos;
438
    max_dist_from_centre = Region_query_end(region)
439
                         - src_query_pos;
440
    while((src_query_pos < Region_query_end(region))
441
       && (src_target_pos < Region_target_end(region))
442
       && (src_query_pos < HSP_query_end(src))
443
       && (src_target_pos < HSP_target_end(src))){
444
        src_total += C4_Calc_score(src_calc,
445
                            src_query_pos, src_target_pos,
446
                            hpair->user_data);
447
        while((src_query_pos >= dst_query_pos)
448
           || (src_target_pos >= dst_target_pos)){
449
            dst_total -= C4_Calc_score(dst_calc,
450
                                       dst_query_pos, dst_target_pos,
451
                                       hpair->user_data);
452
            dst_query_pos += HSP_query_advance(dst);
453
            dst_target_pos += HSP_target_advance(dst);
454
            }
455
        if(max_total <= (src_total + dst_total)){
456
            /* If the score better
457
             * or the distance from the overlap centre is less
458
             */
459
            if((max_total < (src_total + dst_total))
460
            || (ABS(Region_query_end(region)-src_query_pos)
461
                < max_dist_from_centre)){
462
                max_dist_from_centre = ABS(Region_query_end(region)
463
                                           - src_query_pos);
464
                max_total = (src_total + dst_total);
465
                max_src_query_pos = src_query_pos;
466
                max_src_target_pos = src_target_pos;
467
                max_dst_query_pos = dst_query_pos;
468
                max_dst_target_pos = dst_target_pos;
469
                }
470
            }
471
        src_query_pos += HSP_query_advance(src);
472
        src_target_pos += HSP_target_advance(src);
473
        }
474
    /**/
475
    region->query_start = max_src_query_pos;
476
    region->target_start = max_src_target_pos;
477
    region->query_length = max_dst_query_pos
478
                         - max_src_query_pos;
479
    region->target_length = max_dst_target_pos
480
                          - max_src_target_pos;
481
    g_assert(Region_is_valid(region));
482
    return;
483
    }
484
485
static void SAR_find_end_box(HPair *hpair, HSP *src, HSP *dst,
486
                             Region *region, Region *cobs_box,
487
                             C4_Calc *src_calc, C4_Calc *dst_calc){
488
    register gint
489
        query_overlap = (HSP_query_end(src) - dst->query_start),
490
        target_overlap = (HSP_target_end(src) - dst->target_start);
491
    register gint src_query_move = 0, src_target_move = 0,
492
                  dst_query_move = 0, dst_target_move = 0;
493
    region->query_start = MIN(HSP_query_end(src), dst->query_start);
494
    region->target_start = MIN(HSP_target_end(src), dst->target_start);
495
    region->query_length = MAX(HSP_query_end(src), dst->query_start)
496
                         - region->query_start;
497
    region->target_length = MAX(HSP_target_end(src), dst->target_start)
498
                          - region->target_start;
499
    if((query_overlap > 0) || (target_overlap > 0)){
500
        /* Snap end_box to HSPs within cobs */
501
        src_query_move = region->query_start
502
                   - cobs_box->query_start;
503
        src_target_move = region->target_start
504
                    - cobs_box->target_start;
505
        if((src_query_move <= 0) || (src_target_move <= 0)){
506
            src_query_move = src_target_move = 0;
507
        } else {
508
            src_query_move -= (src_query_move % HSP_query_advance(src));
509
            src_target_move -= (src_target_move
510
                                % HSP_target_advance(src));
511
            if((src_query_move / HSP_query_advance(src))
512
             < (src_target_move / HSP_target_advance(src))){
513
                src_target_move = (src_query_move
514
                                   / HSP_query_advance(src))
515
                                * HSP_target_advance(src);
516
            } else {
517
                src_query_move = (src_target_move
518
                                  / HSP_target_advance(src))
519
                               * HSP_query_advance(src);
520
                }
521
            }
522
        /**/
523
        dst_query_move = Region_query_end(cobs_box)
524
                   - Region_query_end(region);
525
        dst_target_move = Region_target_end(cobs_box)
526
                    - Region_target_end(region);
527
        if((dst_query_move <= 0) || (dst_target_move <= 0)){
528
            dst_query_move = dst_target_move = 0;
529
        } else {
530
            dst_query_move -= (dst_query_move
531
                               % HSP_query_advance(dst));
532
            dst_target_move -= (dst_target_move
533
                                % HSP_target_advance(dst));
534
            if((dst_query_move / HSP_query_advance(dst))
535
             < (dst_target_move / HSP_target_advance(dst))){
536
                dst_target_move = (dst_query_move
537
                                   / HSP_query_advance(dst))
538
                            * HSP_target_advance(dst);
539
            } else {
540
                dst_query_move = (dst_target_move
541
                                  / HSP_target_advance(dst))
542
                               * HSP_query_advance(dst);
543
                }
544
            }
545
        region->query_start = cobs_box->query_start
546
                            + src_query_move;
547
        region->target_start = cobs_box->target_start
548
                             + src_target_move;
549
        region->query_length = Region_query_end(cobs_box)
550
                             - dst_query_move
551
                             - region->query_start;
552
        region->target_length = Region_target_end(cobs_box)
553
                             - dst_target_move
554
                             - region->target_start;
555
        g_assert(SAR_region_valid_hsp_entry(region, src));
556
        g_assert(SAR_region_valid_hsp_exit(region, dst));
557
        SAR_reduce_mid_overlap(hpair, src, dst, region,
558
                               src_calc, dst_calc);
559
        }
560
    g_assert(SAR_region_valid_hsp_entry(region, src));
561
    g_assert(SAR_region_valid_hsp_exit(region, dst));
562
    return;
563
    }
564
565
static gboolean SAR_find_cobs_box(HSP *src, HSP *dst, Region *region){
566
    region->query_start = HSP_query_cobs(src);
567
    region->target_start = HSP_target_cobs(src);
568
    region->query_length = HSP_query_cobs(dst)
569
                         - region->query_start;
570
    region->target_length = HSP_target_cobs(dst)
571
                          - region->target_start;
572
    if(region->query_length <= 0)
573
        return FALSE;
574
    if(region->target_length <= 0)
575
        return FALSE;
576
    return TRUE;
577
    }
578
579
static gboolean SAR_Join_calculate_region(HPair *hpair,
580
                                          HSP *src, HSP *dst,
581
                                          Region *region,
582
                                          Heuristic_Pair *pair){
583
    register gint to_shrink;
584
    Region outer_box;
585
    /* Find outer_box (cobs box) */
586
    if(!SAR_find_cobs_box(src, dst, &outer_box))
587
        return FALSE;
588
    /* Find inner_box (end_box) */
589
    SAR_find_end_box(hpair, src, dst, region, &outer_box,
590
                     pair->src->portal->calc, pair->dst->portal->calc);
591
    /* Check within external range */
592
    if(region->query_length > (pair->join->src_range->external_query
593
                             + pair->join->dst_range->external_query))
594
        return FALSE;
595
    if(region->target_length > (pair->join->src_range->external_target
596
                              + pair->join->dst_range->external_target))
597
        return FALSE;
598
    /* Grow inner_region by internal_range */
599
    region->query_start -= pair->join->src_range->internal_query;
600
    region->query_length += (pair->join->src_range->internal_query
601
                           + pair->join->dst_range->internal_query);
602
    region->target_start -= pair->join->src_range->internal_target;
603
    region->target_length += (pair->join->src_range->internal_target
604
                           + pair->join->dst_range->internal_target);
605
    /* Trim inner_box to intersection (inner,outer) */
606
    to_shrink = outer_box.query_start
607
              - region->query_start;
608
    if(to_shrink > 0){
609
        region->query_start += to_shrink;
610
        region->query_length -= to_shrink;
611
        }
612
    to_shrink = outer_box.target_start
613
              - region->target_start;
614
    if(to_shrink > 0){
615
        region->target_start += to_shrink;
616
        region->target_length -= to_shrink;
617
        }
618
    to_shrink = Region_query_end(region)
619
              - Region_query_end(&outer_box);
620
    if(to_shrink > 0)
621
        region->query_length -= to_shrink;
622
    to_shrink = Region_target_end(region)
623
              - Region_target_end(&outer_box);
624
    if(to_shrink > 0)
625
        region->target_length -= to_shrink;
626
    /* Check region not empty */
627
    if(region->query_length < 1)
628
        return FALSE;
629
    if(region->target_length < 1)
630
        return FALSE;
631
    /* Check region is still on HSP boundaries */
632
    g_assert(SAR_region_valid_hsp_entry(region, src));
633
    g_assert(SAR_region_valid_hsp_exit(region, dst));
634
    return TRUE;
635
    }
636
637
SAR_Join *SAR_Join_create(HSP *src_hsp, HSP *dst_hsp, HPair *hpair,
638
                          Heuristic_Pair *pair){
639
    register SAR_Join *sar_join;
640
    register C4_Score src_component, dst_component;
641
    register gint src_length, dst_length;
642
    gint prefix, suffix, src_half_score, dst_half_score,
643
                         src_max_score, dst_max_score;
644
    Region region;
645
    register SAR_ArgumentSet *sas = SAR_ArgumentSet_create(NULL);
646
    if(!SAR_Join_calculate_region(hpair, src_hsp, dst_hsp, &region,
647
                                  pair))
648
        return NULL;
649
    src_component = SAR_find_end_component(hpair,
650
              &region, src_hsp, pair->src->portal->calc, &suffix);
651
    dst_component = SAR_find_start_component(hpair,
652
              &region, dst_hsp, pair->dst->portal->calc, &prefix);
653
    src_length = src_hsp->length - src_hsp->cobs - suffix;
654
    dst_length = dst_hsp->cobs - prefix;
655
    if((src_length + dst_length) && (sas->hsp_quality > 0.0)){
656
        SAR_HSP_quality(hpair, pair->src->portal->calc,
657
                        src_hsp, src_component,
658
                        src_hsp->cobs, src_length,
659
                        &src_half_score, &src_max_score);
660
        SAR_HSP_quality(hpair, pair->dst->portal->calc,
661
                        dst_hsp, dst_component,
662
                        prefix, dst_length,
663
                        &dst_half_score, &dst_max_score);
664
        if(((((gdouble)(src_half_score+dst_half_score))
665
           / ((gdouble)(src_max_score+dst_max_score))) * 100.0)
666
          < sas->hsp_quality)
667
            return NULL;
668
        }
669
    sar_join = g_new(SAR_Join, 1);
670
    sar_join->region = Region_copy(&region);
671
    sar_join->src_component = src_component;
672
    sar_join->dst_component = dst_component;
673
    sar_join->pair = pair;
674
    return sar_join;
675
    }
676
677
void SAR_Join_destroy(SAR_Join *sar_join){
678
    g_assert(sar_join);
679
    Region_destroy(sar_join->region);
680
    g_free(sar_join);
681
    return;
682
    }
683
684
C4_Score SAR_Join_find_bound(SAR_Join *sar_join){
685
    g_assert(sar_join->region->query_length >= 0);
686
    g_assert(sar_join->region->target_length >= 0);
687
    g_assert(sar_join->region->query_length
688
            <= sar_join->pair->join->bound->query_range);
689
    g_assert(sar_join->region->target_length
690
            <= sar_join->pair->join->bound->target_range);
691
    return sar_join->pair->join->bound->matrix
692
               [sar_join->region->query_length]
693
               [sar_join->region->target_length]
694
          - (sar_join->src_component + sar_join->dst_component);
695
    }
696
697
C4_Score SAR_Join_find_score(SAR_Join *sar_join, HPair *hpair){
698
    return Optimal_find_score(sar_join->pair->join->optimal,
699
                              sar_join->region,
700
                              hpair->user_data, hpair->subopt)
701
          - (sar_join->src_component + sar_join->dst_component);
702
    }
703
704
/**/
705
706
static gboolean SAR_Span_calculate_regions(HPair *hpair,
707
                                           HSP *src, HSP *dst,
708
                                           Heuristic_Span *span,
709
                                           Region *src_region,
710
                                           Region *dst_region,
711
                                           C4_Calc *src_calc,
712
                                           C4_Calc *dst_calc){
713
    register gint to_shrink;
714
    Region outer_box, end_box;
715
    /* Find outer_box (cobs box) */
716
    if(!SAR_find_cobs_box(src, dst, &outer_box))
717
        return FALSE;
718
    /* Find inner_box (end_box) */
719
    SAR_find_end_box(hpair, src, dst, &end_box, &outer_box,
720
                     src_calc, dst_calc);
721
    /* Set src_region to entry points to end_box */
722
    src_region->query_start = end_box.query_start;
723
    src_region->target_start = end_box.target_start;
724
    src_region->query_length = 0;
725
    src_region->target_length = 0;
726
    /* Set dst_region to exit points from end_box */
727
    dst_region->query_start = Region_query_end(&end_box);
728
    dst_region->target_start = Region_target_end(&end_box);
729
    dst_region->query_length = 0;
730
    dst_region->target_length = 0;
731
    /* Grow src_region by external range */
732
    src_region->query_length += span->src_range->external_query;
733
    src_region->target_length += span->src_range->external_target;
734
    /* Grow dst_region by external range */
735
    dst_region->query_start -= span->dst_range->external_query;
736
    dst_region->target_start -= span->dst_range->external_target;
737
    dst_region->query_length += span->dst_range->external_query;
738
    dst_region->target_length += span->dst_range->external_target;
739
    /* Grow inner_box on HSPs by internal range */
740
    src_region->query_start -= span->src_range->internal_query;
741
    src_region->query_length += span->src_range->internal_query;
742
    src_region->target_start -= span->src_range->internal_target;
743
    src_region->target_length += span->src_range->internal_target;
744
    /**/
745
    /* Grow inner_box on HSPs by internal range */
746
    dst_region->query_length += span->dst_range->internal_query;
747
    dst_region->target_length += span->dst_range->internal_target;
748
    /**/
749
    /* Trim inner_box to intersection (inner,outer) */
750
    if(Region_query_end(src_region) > Region_query_end(&outer_box)){
751
        src_region->query_length -= (Region_query_end(src_region)
752
                                - Region_query_end(&outer_box));
753
        }
754
    if(Region_target_end(src_region) > Region_target_end(&outer_box)){
755
        src_region->target_length -= (Region_target_end(src_region)
756
                                  - Region_target_end(&outer_box));
757
        }
758
    to_shrink = outer_box.query_start
759
              - src_region->query_start;
760
    if(to_shrink > 0){
761
        src_region->query_start += to_shrink;
762
        src_region->query_length -= to_shrink;
763
        }
764
    to_shrink = outer_box.target_start
765
              - src_region->target_start;
766
    if(to_shrink > 0){
767
        src_region->target_start += to_shrink;
768
        src_region->target_length -= to_shrink;
769
        }
770
    /* Trim inner_box to intersection (inner,outer) */
771
    if(dst_region->query_start < outer_box.query_start){
772
        dst_region->query_length -= (outer_box.query_start
773
                                 - dst_region->query_start);
774
        dst_region->query_start = outer_box.query_start;
775
        }
776
    if(dst_region->target_start < outer_box.target_start){
777
        dst_region->target_length -= (outer_box.target_start
778
                                  - dst_region->target_start);
779
        dst_region->target_start = outer_box.target_start;
780
        }
781
    to_shrink = Region_query_end(dst_region)
782
              - Region_query_end(&outer_box);
783
    if(to_shrink > 0)
784
        dst_region->query_length -= to_shrink;
785
    to_shrink = Region_target_end(dst_region)
786
              - Region_target_end(&outer_box);
787
    if(to_shrink > 0)
788
        dst_region->target_length -= to_shrink;
789
    /* Check size not zero */
790
    if(src_region->query_length < 1)
791
        return FALSE;
792
    if(src_region->target_length < 1)
793
        return FALSE;
794
    if(dst_region->query_length < 1)
795
        return FALSE;
796
    if(dst_region->target_length < 1)
797
        return FALSE;
798
    /* Check a path is possible */
799
    if((dst_region->query_start - Region_query_end(src_region))
800
       > span->span->max_query)
801
        return FALSE;
802
    if((dst_region->target_start - Region_target_end(src_region))
803
       > span->span->max_target)
804
        return FALSE;
805
    /* Check region are still on HSP boundaries */
806
    g_assert(SAR_region_valid_hsp_entry(src_region, src));
807
    g_assert(SAR_region_valid_hsp_exit(dst_region, dst));
808
    return TRUE;
809
    }
810
811
SAR_Span *SAR_Span_create(HSP *src_hsp, HSP *dst_hsp, HPair *hpair,
812
                          Heuristic_Span *span,
813
                          C4_Portal *src_portal, C4_Portal *dst_portal){
814
    register SAR_Span *sar_span;
815
    register SAR_ArgumentSet *sas = SAR_ArgumentSet_create(NULL);
816
    register C4_Score src_component, dst_component;
817
    register gint src_length, dst_length;
818
    gint suffix, prefix, src_half_score, dst_half_score,
819
                         src_max_score, dst_max_score;
820
    Region src_region, dst_region;
821
    if(!SAR_Span_calculate_regions(hpair, src_hsp, dst_hsp, span,
822
                                   &src_region, &dst_region,
823
                                   src_portal->calc, dst_portal->calc))
824
        return NULL;
825
    src_component = SAR_find_end_component(hpair,
826
              &src_region, src_hsp, src_portal->calc, &suffix);
827
    dst_component = SAR_find_start_component(hpair,
828
              &dst_region, dst_hsp, dst_portal->calc, &prefix);
829
    src_length = src_hsp->length - src_hsp->cobs - suffix;
830
    dst_length = dst_hsp->cobs - prefix;
831
    if((src_length + dst_length) && (sas->hsp_quality > 0.0)){
832
        SAR_HSP_quality(hpair, src_portal->calc,
833
                        src_hsp, src_component,
834
                        src_hsp->cobs, src_length,
835
                        &src_half_score, &src_max_score);
836
        SAR_HSP_quality(hpair, dst_portal->calc,
837
                        dst_hsp, dst_component,
838
                        prefix, dst_length,
839
                        &dst_half_score, &dst_max_score);
840
        if(((((gdouble)(src_half_score+dst_half_score))
841
           / ((gdouble)(src_max_score+dst_max_score))) * 100.0)
842
          < sas->hsp_quality)
843
            return NULL;
844
        }
845
    if((100.0) < sas->hsp_quality)
846
        return NULL;
847
    sar_span = g_new(SAR_Span, 1);
848
    sar_span->src_region = Region_copy(&src_region);
849
    sar_span->dst_region = Region_copy(&dst_region);
850
    sar_span->src_component = src_component;
851
    sar_span->dst_component = dst_component;
852
    sar_span->span = span;
853
    return sar_span;
854
    }
855
856
void SAR_Span_destroy(SAR_Span *sar_span){
857
    g_assert(sar_span);
858
    Region_destroy(sar_span->src_region);
859
    Region_destroy(sar_span->dst_region);
860
    g_free(sar_span);
861
    return;
862
    }
863
864
C4_Score SAR_Span_find_bound(SAR_Span *sar_span){
865
    register C4_Score src_raw_score, dst_raw_score;
866
    register gint query_overlap, target_overlap;
867
    g_assert(sar_span->src_region->query_length
868
            <= sar_span->span->src_bound->query_range);
869
    g_assert(sar_span->src_region->target_length
870
            <= sar_span->span->src_bound->target_range);
871
    g_assert(sar_span->dst_region->query_length
872
            <= sar_span->span->dst_bound->query_range);
873
    g_assert(sar_span->dst_region->target_length
874
            <= sar_span->span->dst_bound->target_range);
875
    query_overlap = Region_query_end(sar_span->src_region)
876
                  - sar_span->dst_region->query_start;
877
    target_overlap = Region_target_end(sar_span->src_region)
878
                   - sar_span->dst_region->target_start;
879
    if(query_overlap < 0)
880
        query_overlap = 0;
881
    if(target_overlap < 0)
882
        target_overlap = 0;
883
    /**/
884
    /* Get the source bounds */
885
    src_raw_score = sar_span->span->src_bound->matrix
886
      [sar_span->src_region->query_length-(query_overlap>>1)]
887
      [sar_span->src_region->target_length-(target_overlap>>1)];
888
    dst_raw_score = sar_span->span->dst_bound->matrix
889
      [sar_span->dst_region->query_length-(query_overlap>>1)
890
                                         -(query_overlap & 1)]
891
      [sar_span->dst_region->target_length-(target_overlap>>1)
892
                                          -(target_overlap & 1)];
893
    /* Calculate the bound score */
894
    return (src_raw_score - sar_span->src_component)
895
         + (dst_raw_score - sar_span->dst_component);
896
    }
897
898
C4_Score SAR_Span_find_score(SAR_Span *sar_span, HPair *hpair){
899
    register C4_Score score;
900
    register Heuristic_Data *heuristic_data;
901
    Heuristic_Span_register(sar_span->span, sar_span->src_region,
902
                                            sar_span->dst_region);
903
    /* Calculate src optimal, reporting to the integration matrix */
904
    heuristic_data = hpair->user_data;
905
    heuristic_data->heuristic_span = sar_span->span;
906
    score = Optimal_find_score(sar_span->span->src_optimal,
907
                               sar_span->src_region, hpair->user_data,
908
                               hpair->subopt);
909
    Heuristic_Span_integrate(sar_span->span, sar_span->src_region,
910
                                             sar_span->dst_region);
911
    /* Calculate dst optimal, starting from the integration matrix */
912
    score = Optimal_find_score(sar_span->span->dst_optimal,
913
                               sar_span->dst_region, hpair->user_data,
914
                               hpair->subopt);
915
    heuristic_data->heuristic_span = NULL;
916
    return score - (sar_span->src_component + sar_span->dst_component);
917
    }
918
919
/**/
920
921
SAR_Alignment *SAR_Alignment_create(SAR_Terminal *sar_start,
922
                                    SAR_Terminal *sar_end,
923
                                    Heuristic_Match *start_match,
924
                                    Heuristic_Match *end_match,
925
                                    HPair *hpair, C4_Score score){
926
    register SAR_Alignment *sar_alignment = g_new(SAR_Alignment, 1);
927
    register Region *align_region;
928
    register Alignment *start_alignment
929
      = Optimal_find_path(start_match->start_terminal->optimal,
930
                          sar_start->region, hpair->user_data,
931
                          C4_IMPOSSIBLY_LOW_SCORE, hpair->subopt);
932
    g_assert(start_alignment);
933
    sar_alignment->end_alignment
934
      = Optimal_find_path(end_match->end_terminal->optimal,
935
                          sar_end->region, hpair->user_data,
936
                          C4_IMPOSSIBLY_LOW_SCORE, hpair->subopt);
937
    sar_alignment->end_region = Region_share(sar_end->region);
938
    sar_alignment->end_match = end_match;
939
    g_assert(sar_alignment->end_alignment);
940
    align_region = Region_create(start_alignment->region->query_start,
941
                                 start_alignment->region->target_start,
942
        Region_query_end(sar_alignment->end_alignment->region)
943
        - start_alignment->region->query_start,
944
        Region_target_end(sar_alignment->end_alignment->region)
945
        - start_alignment->region->target_start);
946
    sar_alignment->alignment = Alignment_create(hpair->heuristic->model,
947
                                                align_region, score);
948
    Alignment_import_derived(sar_alignment->alignment, start_alignment,
949
                           start_match->start_terminal->terminal_model);
950
    sar_alignment->hpair = hpair;
951
    sar_alignment->last_region = Region_share(start_alignment->region);
952
    sar_alignment->last_hsp = NULL;
953
    sar_alignment->last_match = NULL;
954
    Alignment_destroy(start_alignment);
955
    Region_destroy(align_region);
956
    return sar_alignment;
957
    }
958
959
void SAR_Alignment_destroy(SAR_Alignment *sar_alignment){
960
    g_assert(sar_alignment);
961
    g_assert(sar_alignment->alignment);
962
    Alignment_destroy(sar_alignment->alignment);
963
    if(sar_alignment->end_alignment)
964
        Alignment_destroy(sar_alignment->end_alignment);
965
    if(sar_alignment->end_region)
966
        Region_destroy(sar_alignment->end_region);
967
    if(sar_alignment->last_region)
968
        Region_destroy(sar_alignment->last_region);
969
    g_free(sar_alignment);
970
    return;
971
    }
972
973
static void SAR_Alignment_add_region(SAR_Alignment *sar_alignment,
974
                            Region *src_region, Region *dst_region){
975
    register gint suffix;
976
    g_assert(sar_alignment->last_hsp);
977
    g_assert(sar_alignment->last_match);
978
    g_assert(!sar_alignment->last_region);
979
    suffix = (HSP_query_end(sar_alignment->last_hsp)
980
              - src_region->query_start)
981
           / HSP_query_advance(sar_alignment->last_hsp);
982
    Alignment_add(sar_alignment->alignment,
983
                  sar_alignment->last_match->transition,
984
                  -suffix);
985
    sar_alignment->last_hsp = NULL;
986
    sar_alignment->last_match = NULL;
987
    sar_alignment->last_region = Region_share(dst_region);
988
    return;
989
    }
990
991
void SAR_Alignment_finalise(SAR_Alignment *sar_alignment){
992
    Region seq_region;
993
    g_assert(sar_alignment->end_alignment);
994
    SAR_Alignment_add_region(sar_alignment, sar_alignment->end_region,
995
                                            sar_alignment->end_region);
996
    Alignment_import_derived(sar_alignment->alignment,
997
              sar_alignment->end_alignment,
998
              sar_alignment->end_match->end_terminal->terminal_model);
999
    Alignment_destroy(sar_alignment->end_alignment);
1000
    sar_alignment->end_alignment = NULL;
1001
    Region_destroy(sar_alignment->end_region);
1002
    sar_alignment->end_region = NULL;
1003
    Region_init_static(&seq_region, 0, 0,
1004
                       sar_alignment->hpair->query_length,
1005
                       sar_alignment->hpair->target_length);
1006
    g_assert(Alignment_is_valid(sar_alignment->alignment, &seq_region,
1007
                                sar_alignment->hpair->user_data));
1008
    return;
1009
    }
1010
1011
void SAR_Alignment_add_SAR_Join(SAR_Alignment *sar_alignment,
1012
                                SAR_Join *sar_join){
1013
    register Alignment *edge_alignment;
1014
    edge_alignment = Optimal_find_path(
1015
            sar_join->pair->join->optimal,
1016
            sar_join->region, sar_alignment->hpair->user_data,
1017
            C4_IMPOSSIBLY_LOW_SCORE, sar_alignment->hpair->subopt);
1018
    SAR_Alignment_add_region(sar_alignment, sar_join->region,
1019
                                            sar_join->region);
1020
    Alignment_import_derived(sar_alignment->alignment, edge_alignment,
1021
            sar_join->pair->join->join_model);
1022
    Alignment_destroy(edge_alignment);
1023
    return;
1024
    }
1025
1026
void SAR_Alignment_add_SAR_Span(SAR_Alignment *sar_alignment,
1027
                                SAR_Span *sar_span){
1028
    register Alignment *src_alignment, *dst_alignment;
1029
    register gint query_span_start, target_span_start,
1030
                  query_span_end, target_span_end;
1031
    register Region *src_align_region;
1032
    register Heuristic_Data *heuristic_data
1033
                           = sar_alignment->hpair->user_data;
1034
    heuristic_data->heuristic_span = sar_span->span;
1035
    Heuristic_Span_register(sar_span->span, sar_span->src_region,
1036
                                            sar_span->dst_region);
1037
    Optimal_find_score(sar_span->span->src_optimal,
1038
        sar_span->src_region, sar_alignment->hpair->user_data,
1039
        sar_alignment->hpair->subopt);
1040
    Heuristic_Span_integrate(sar_span->span, sar_span->src_region,
1041
                                             sar_span->dst_region);
1042
    dst_alignment = Optimal_find_path(sar_span->span->dst_optimal,
1043
        sar_span->dst_region, sar_alignment->hpair->user_data,
1044
        C4_IMPOSSIBLY_LOW_SCORE, sar_alignment->hpair->subopt);
1045
    /* Merge edge traceback */
1046
    query_span_end = dst_alignment->region->query_start
1047
                   - sar_span->dst_region->query_start;
1048
    target_span_end = dst_alignment->region->target_start
1049
                    - sar_span->dst_region->target_start;
1050
    query_span_start
1051
        = sar_span->span->dst_integration_matrix
1052
          [query_span_end][target_span_end].query_pos
1053
        - sar_span->src_region->query_start;
1054
    target_span_start
1055
        = sar_span->span->dst_integration_matrix
1056
          [query_span_end][target_span_end].target_pos
1057
        - sar_span->src_region->target_start;
1058
    src_align_region = Region_create(sar_span->src_region->query_start,
1059
                                     sar_span->src_region->target_start,
1060
                                     query_span_start,
1061
                                     target_span_start);
1062
    src_alignment = Optimal_find_path(
1063
        sar_span->span->src_traceback_optimal, src_align_region,
1064
        sar_alignment->hpair->user_data, C4_IMPOSSIBLY_LOW_SCORE,
1065
        sar_alignment->hpair->subopt);
1066
    Region_destroy(src_align_region);
1067
    SAR_Alignment_add_region(sar_alignment, sar_span->src_region,
1068
                                            sar_span->dst_region);
1069
    Alignment_import_derived(sar_alignment->alignment, src_alignment,
1070
              sar_span->span->src_traceback_model);
1071
    /* Add traceback for the span */
1072
    Heuristic_Span_add_traceback(sar_span->span,
1073
               sar_alignment->alignment,
1074
               dst_alignment->region->query_start
1075
               -Region_query_end(src_alignment->region),
1076
               dst_alignment->region->target_start
1077
               -Region_target_end(src_alignment->region));
1078
    heuristic_data->heuristic_span = NULL;
1079
    Alignment_import_derived(sar_alignment->alignment, dst_alignment,
1080
                sar_span->span->dst_model);
1081
    Alignment_destroy(src_alignment);
1082
    Alignment_destroy(dst_alignment);
1083
    return;
1084
    }
1085
1086
void SAR_Alignment_add_HSP(SAR_Alignment *sar_alignment, HSP *hsp,
1087
                           Heuristic_Match *match){
1088
    register gint prefix;
1089
    g_assert(sar_alignment->last_region);
1090
    g_assert(!sar_alignment->last_hsp);
1091
    g_assert(!sar_alignment->last_match);
1092
    prefix = (Region_query_end(sar_alignment->last_region)
1093
              - hsp->query_start) / HSP_query_advance(hsp);
1094
    Alignment_add(sar_alignment->alignment, match->transition,
1095
                  hsp->length - prefix);
1096
    Region_destroy(sar_alignment->last_region);
1097
    sar_alignment->last_region = NULL;
1098
    sar_alignment->last_hsp = hsp;
1099
    sar_alignment->last_match = match;
1100
    return;
1101
    }
1102