1
by Steffen Moeller
Import upstream version 1.4.0 |
1 |
/****************************************************************\
|
2 |
* *
|
|
3 |
* C4 dynamic programming library - sub-alignment regions *
|
|
4 |
* *
|
|
5 |
* Guy St.C. Slater.. mailto:guy@ebi.ac.uk *
|
|
1.1.1
by Charles Plessy
Import upstream version 2.0.0 |
6 |
* Copyright (C) 2000-2008. All Rights Reserved. *
|
1
by Steffen Moeller
Import upstream version 1.4.0 |
7 |
* *
|
8 |
* This source code is distributed under the terms of the *
|
|
1.1.1
by Charles Plessy
Import upstream version 2.0.0 |
9 |
* GNU General Public License, version 3. See the file COPYING *
|
10 |
* or http://www.gnu.org/licenses/gpl.txt for details *
|
|
1
by Steffen Moeller
Import upstream version 1.4.0 |
11 |
* *
|
12 |
* If you use this code, please keep this notice intact. *
|
|
13 |
* *
|
|
14 |
\****************************************************************/
|
|
15 |
||
16 |
#include "sar.h" |
|
17 |
||
18 |
/**/
|
|
19 |
||
20 |
SAR_ArgumentSet *SAR_ArgumentSet_create(Argument *arg){ |
|
21 |
register ArgumentSet *as; |
|
22 |
static SAR_ArgumentSet sas; |
|
23 |
if(arg){ |
|
24 |
as = ArgumentSet_create("SAR Options"); |
|
25 |
/**/
|
|
26 |
ArgumentSet_add_option(as, '\0', "quality", "percent", |
|
27 |
"HSP quality threshold", "0", |
|
28 |
Argument_parse_float, &sas.hsp_quality); |
|
29 |
/**/
|
|
30 |
Argument_absorb_ArgumentSet(arg, as); |
|
31 |
}
|
|
32 |
return &sas; |
|
33 |
}
|
|
34 |
||
35 |
/**/
|
|
36 |
||
37 |
static gboolean SAR_region_valid_hsp_exit(Region *region, HSP *hsp){ |
|
38 |
#ifndef G_DISABLE_ASSERT
|
|
39 |
register gint query_width = Region_query_end(region) |
|
40 |
- hsp->query_start; |
|
41 |
register gint target_width = Region_target_end(region) |
|
42 |
- hsp->target_start; |
|
43 |
register gint query_prefix = Region_query_end(region) |
|
44 |
- hsp->query_start; |
|
45 |
#endif /* G_DISABLE_ASSERT */ |
|
46 |
/* Check region corner meets the HSP */
|
|
47 |
g_assert(Region_is_valid(region)); |
|
48 |
g_assert((query_width * HSP_target_advance(hsp)) |
|
49 |
== (target_width * HSP_query_advance(hsp))); |
|
50 |
/* Check region corner is at HSP word boundary */
|
|
51 |
g_assert(!(query_width % HSP_query_advance(hsp))); |
|
52 |
g_assert(!(target_width % HSP_target_advance(hsp))); |
|
53 |
/* Check region boundary is between cobs and HSP end */
|
|
54 |
g_assert(query_prefix >= 0); |
|
55 |
g_assert(query_prefix <= HSP_query_cobs(hsp)); |
|
56 |
return TRUE; |
|
57 |
}
|
|
58 |
||
59 |
static gboolean SAR_region_valid_hsp_entry(Region *region, HSP *hsp){ |
|
60 |
#ifndef G_DISABLE_ASSERT
|
|
61 |
register gint query_width = HSP_query_end(hsp) |
|
62 |
- region->query_start; |
|
63 |
register gint target_width = HSP_target_end(hsp) |
|
64 |
- region->target_start; |
|
65 |
register gint query_suffix = HSP_query_end(hsp) |
|
66 |
- region->query_start; |
|
67 |
#endif /* G_DISABLE_ASSERT */ |
|
68 |
g_assert(Region_is_valid(region)); |
|
69 |
/* Check region corner meets the HSP */
|
|
70 |
g_assert((query_width * HSP_target_advance(hsp)) |
|
71 |
== (target_width * HSP_query_advance(hsp))); |
|
72 |
/* Check region corner is at HSP word boundary */
|
|
73 |
g_assert(!(query_width % HSP_query_advance(hsp))); |
|
74 |
g_assert(!(target_width % HSP_target_advance(hsp))); |
|
75 |
/* Check region boundary is between HSP start and cobs */
|
|
76 |
g_assert(query_suffix >= 0); |
|
77 |
g_assert(query_suffix <= (HSP_query_end(hsp)-HSP_query_cobs(hsp))); |
|
78 |
return TRUE; |
|
79 |
}
|
|
80 |
||
81 |
/**/
|
|
82 |
||
83 |
static gboolean SAR_Terminal_calculate_start_region(HSP *hsp, |
|
84 |
HPair *hpair, Heuristic_Range *range, C4_Scope scope, |
|
85 |
Region *region){ |
|
86 |
register gint to_shrink; |
|
87 |
register gboolean at_query_edge = FALSE, at_target_edge = FALSE; |
|
88 |
Region outer_box; |
|
89 |
/* Find outer box (cobs -> corner) */
|
|
90 |
outer_box.query_start = 0; |
|
91 |
outer_box.target_start = 0; |
|
92 |
outer_box.query_length = HSP_query_cobs(hsp); |
|
93 |
outer_box.target_length = HSP_target_cobs(hsp); |
|
94 |
/* Find inner_box (end point) */
|
|
95 |
region->query_start = hsp->query_start; |
|
96 |
region->target_start = hsp->target_start; |
|
97 |
region->query_length = 0; |
|
98 |
region->target_length = 0; |
|
99 |
/* Grow inner_box by external range */
|
|
100 |
region->query_start -= range->external_query; |
|
101 |
region->target_start -= range->external_target; |
|
102 |
region->query_length += range->external_query; |
|
103 |
region->target_length += range->external_target; |
|
104 |
/* Grow inner_box on HSPs by internal range */
|
|
105 |
region->query_length += range->internal_query; |
|
106 |
region->target_length += range->internal_target; |
|
107 |
/* Trim inner_box to intersection (inner,outer) */
|
|
108 |
if(region->query_start < outer_box.query_start){ |
|
109 |
region->query_length -= (outer_box.query_start |
|
110 |
- region->query_start); |
|
111 |
region->query_start = outer_box.query_start; |
|
112 |
}
|
|
113 |
if(region->target_start < outer_box.target_start){ |
|
114 |
region->target_length -= (outer_box.target_start |
|
115 |
- region->target_start); |
|
116 |
region->target_start = outer_box.target_start; |
|
117 |
}
|
|
118 |
to_shrink = Region_query_end(region) |
|
119 |
- Region_query_end(&outer_box); |
|
120 |
if(to_shrink > 0) |
|
121 |
region->query_length -= to_shrink; |
|
122 |
to_shrink = Region_target_end(region) |
|
123 |
- Region_target_end(&outer_box); |
|
124 |
if(to_shrink > 0) |
|
125 |
region->target_length -= to_shrink; |
|
126 |
/* Fail when region is empty */
|
|
127 |
if(region->query_length <= 0) |
|
128 |
return FALSE; |
|
129 |
if(region->target_length <= 0) |
|
130 |
return FALSE; |
|
131 |
/* Check region is still on HSP boundary */
|
|
132 |
g_assert(SAR_region_valid_hsp_exit(region, hsp)); |
|
133 |
/* Check validity for non-local models */
|
|
134 |
at_query_edge = (region->query_start == 0); |
|
135 |
at_target_edge = (region->target_start == 0); |
|
136 |
switch(scope){ |
|
137 |
case C4_Scope_ANYWHERE: |
|
138 |
return TRUE; |
|
139 |
break; |
|
140 |
case C4_Scope_CORNER: |
|
141 |
if(at_query_edge && at_target_edge) |
|
142 |
return TRUE; |
|
143 |
break; |
|
144 |
case C4_Scope_EDGE: |
|
145 |
if(at_query_edge || at_target_edge) |
|
146 |
return TRUE; |
|
147 |
break; |
|
148 |
case C4_Scope_QUERY: |
|
149 |
if(at_query_edge) |
|
150 |
return TRUE; |
|
151 |
break; |
|
152 |
case C4_Scope_TARGET: |
|
153 |
if(at_target_edge) |
|
154 |
return TRUE; |
|
155 |
break; |
|
156 |
}
|
|
157 |
return FALSE; |
|
158 |
}
|
|
159 |
||
160 |
static gboolean SAR_Terminal_calculate_end_region(HSP *hsp, |
|
161 |
HPair *hpair, Heuristic_Range *range, C4_Scope scope, |
|
162 |
Region *region){ |
|
163 |
register gint to_shrink; |
|
164 |
register gboolean at_query_edge = FALSE, at_target_edge = FALSE; |
|
165 |
Region outer_box; |
|
166 |
/* Find outer box (cobs -> corner) */
|
|
167 |
outer_box.query_start = HSP_query_cobs(hsp); |
|
168 |
outer_box.target_start = HSP_target_cobs(hsp); |
|
169 |
outer_box.query_length = hsp->hsp_set->query->len |
|
170 |
- HSP_query_cobs(hsp); |
|
171 |
outer_box.target_length = hsp->hsp_set->target->len |
|
172 |
- HSP_target_cobs(hsp); |
|
173 |
/**/
|
|
174 |
/* Find inner_box (end point) */
|
|
175 |
region->query_start = HSP_query_end(hsp); |
|
176 |
region->target_start = HSP_target_end(hsp); |
|
177 |
region->query_length = 0; |
|
178 |
region->target_length = 0; |
|
179 |
/* Grow inner_box by external range */
|
|
180 |
region->query_length += range->external_query; |
|
181 |
region->target_length += range->external_target; |
|
182 |
/* Grow inner_box on HSPs by internal range */
|
|
183 |
region->query_start -= range->internal_query; |
|
184 |
region->query_length += range->internal_query; |
|
185 |
region->target_start -= range->internal_target; |
|
186 |
region->target_length += range->internal_target; |
|
187 |
/* Trim inner_box to intersection (inner,outer) */
|
|
188 |
if(Region_query_end(region) > Region_query_end(&outer_box)){ |
|
189 |
region->query_length -= (Region_query_end(region) |
|
190 |
- Region_query_end(&outer_box)); |
|
191 |
}
|
|
192 |
if(Region_target_end(region) > Region_target_end(&outer_box)){ |
|
193 |
region->target_length -= (Region_target_end(region) |
|
194 |
- Region_target_end(&outer_box)); |
|
195 |
}
|
|
196 |
to_shrink = outer_box.query_start |
|
197 |
- region->query_start; |
|
198 |
if(to_shrink > 0){ |
|
199 |
region->query_start += to_shrink; |
|
200 |
region->query_length -= to_shrink; |
|
201 |
}
|
|
202 |
to_shrink = outer_box.target_start |
|
203 |
- region->target_start; |
|
204 |
if(to_shrink > 0){ |
|
205 |
region->target_start += to_shrink; |
|
206 |
region->target_length -= to_shrink; |
|
207 |
}
|
|
208 |
/* Fail when region is empty */
|
|
209 |
if(region->query_length <= 0) |
|
210 |
return FALSE; |
|
211 |
if(region->target_length <= 0) |
|
212 |
return FALSE; |
|
213 |
/* Check region is still on HSP boundary */
|
|
214 |
g_assert(SAR_region_valid_hsp_entry(region, hsp)); |
|
215 |
/* Check validity for non-local models */
|
|
216 |
at_query_edge = (Region_query_end(region) |
|
217 |
== hsp->hsp_set->query->len); |
|
218 |
at_target_edge = (Region_target_end(region) |
|
219 |
== hsp->hsp_set->target->len); |
|
220 |
switch(scope){ |
|
221 |
case C4_Scope_ANYWHERE: |
|
222 |
return TRUE; |
|
223 |
break; |
|
224 |
case C4_Scope_CORNER: |
|
225 |
if(at_query_edge && at_target_edge) |
|
226 |
return TRUE; |
|
227 |
break; |
|
228 |
case C4_Scope_EDGE: |
|
229 |
if(at_query_edge || at_target_edge) |
|
230 |
return TRUE; |
|
231 |
break; |
|
232 |
case C4_Scope_QUERY: |
|
233 |
if(at_query_edge) |
|
234 |
return TRUE; |
|
235 |
break; |
|
236 |
case C4_Scope_TARGET: |
|
237 |
if(at_target_edge) |
|
238 |
return TRUE; |
|
239 |
break; |
|
240 |
}
|
|
241 |
return FALSE; |
|
242 |
}
|
|
243 |
||
244 |
/**/
|
|
245 |
||
246 |
static C4_Score SAR_find_start_component(HPair *hpair, |
|
247 |
Region *region, HSP *hsp, C4_Calc *calc, gint *prefix){ |
|
248 |
register C4_Score component = 0; |
|
249 |
register gint i, query_pos = hsp->query_start, |
|
250 |
target_pos = hsp->target_start; |
|
251 |
register Region *calc_region; |
|
252 |
g_assert(prefix); |
|
253 |
(*prefix) = (Region_query_end(region) - hsp->query_start) |
|
254 |
/ HSP_query_advance(hsp); |
|
255 |
g_assert((*prefix) >= 0); |
|
256 |
calc_region = Region_create(hsp->query_start, hsp->target_start, |
|
257 |
(*prefix) * HSP_query_advance(hsp), |
|
258 |
(*prefix) * HSP_target_advance(hsp)); |
|
259 |
C4_Calc_init(calc, calc_region, hpair->user_data); |
|
260 |
for(i = 0; i < (*prefix); i++){ |
|
261 |
component += C4_Calc_score(calc, query_pos, target_pos, |
|
262 |
hpair->user_data); |
|
263 |
query_pos += HSP_query_advance(hsp); |
|
264 |
target_pos += HSP_target_advance(hsp); |
|
265 |
}
|
|
266 |
g_assert(component <= hsp->score); |
|
267 |
C4_Calc_exit(calc, calc_region, hpair->user_data); |
|
268 |
Region_destroy(calc_region); |
|
269 |
return component; |
|
270 |
}
|
|
271 |
||
272 |
static C4_Score SAR_find_end_component(HPair *hpair, |
|
273 |
Region *region, HSP *hsp, C4_Calc *calc, gint *suffix){ |
|
274 |
register C4_Score component = 0; |
|
275 |
register gint i, query_pos, target_pos; |
|
276 |
register Region *calc_region; |
|
277 |
g_assert(suffix >= 0); |
|
278 |
g_assert(suffix); |
|
279 |
(*suffix) = (HSP_query_end(hsp) - region->query_start) |
|
280 |
/ HSP_query_advance(hsp); |
|
281 |
calc_region = Region_create(region->query_start, |
|
282 |
region->target_start, |
|
283 |
(*suffix) * HSP_query_advance(hsp), |
|
284 |
(*suffix) * HSP_target_advance(hsp)); |
|
285 |
C4_Calc_init(calc, calc_region, hpair->user_data); |
|
286 |
query_pos = region->query_start; |
|
287 |
target_pos = region->target_start; |
|
288 |
for(i = 0; i < (*suffix); i++){ |
|
289 |
component += C4_Calc_score(calc, query_pos, target_pos, |
|
290 |
hpair->user_data); |
|
291 |
query_pos += HSP_query_advance(hsp); |
|
292 |
target_pos += HSP_target_advance(hsp); |
|
293 |
}
|
|
294 |
g_assert(component <= hsp->score); |
|
295 |
C4_Calc_exit(calc, calc_region, hpair->user_data); |
|
296 |
Region_destroy(calc_region); |
|
297 |
return component; |
|
298 |
}
|
|
299 |
||
300 |
/**/
|
|
301 |
||
302 |
static void SAR_HSP_quality(HPair *hpair, C4_Calc *calc, |
|
303 |
HSP *hsp, C4_Score component, |
|
304 |
gint start, gint length, |
|
305 |
gint *half_score, gint *max_score){ |
|
306 |
register gint i, query_pos, target_pos; |
|
307 |
g_assert(half_score); |
|
308 |
g_assert(max_score); |
|
309 |
query_pos = hsp->query_start + (start * HSP_query_advance(hsp)); |
|
310 |
target_pos = hsp->target_start + (start * HSP_target_advance(hsp)); |
|
311 |
(*half_score) = (*max_score) = 0; |
|
312 |
for(i = 0; i < length; i++){ |
|
313 |
(*half_score) += HSP_get_score(hsp, query_pos, target_pos); |
|
314 |
(*max_score) += HSP_query_self(hsp, query_pos); |
|
315 |
query_pos += HSP_query_advance(hsp); |
|
316 |
target_pos += HSP_target_advance(hsp); |
|
317 |
}
|
|
318 |
return; |
|
319 |
}
|
|
320 |
||
321 |
SAR_Terminal *SAR_Terminal_create(HSP *hsp, HPair *hpair, |
|
322 |
Heuristic_Match *match, |
|
323 |
gboolean is_start){ |
|
324 |
register SAR_Terminal *sar_terminal; |
|
325 |
Region region; |
|
326 |
register C4_Score component; |
|
327 |
gint prefix, suffix; |
|
328 |
register SAR_ArgumentSet *sas = SAR_ArgumentSet_create(NULL); |
|
329 |
register gboolean is_valid = FALSE; |
|
330 |
gint half_score, max_score; |
|
331 |
register gint start, length; |
|
332 |
prefix = suffix = 0; |
|
333 |
if(is_start){ |
|
334 |
is_valid = SAR_Terminal_calculate_start_region(hsp, hpair, |
|
335 |
match->start_terminal->range, |
|
336 |
hpair->heuristic->model->start_state->scope, |
|
337 |
®ion); |
|
338 |
} else { /* is end */ |
|
339 |
is_valid = SAR_Terminal_calculate_end_region(hsp, hpair, |
|
340 |
match->end_terminal->range, |
|
341 |
hpair->heuristic->model->end_state->scope, |
|
342 |
®ion); |
|
343 |
}
|
|
344 |
if(!is_valid) |
|
345 |
return NULL; |
|
346 |
if(is_start){ |
|
347 |
component = SAR_find_start_component(hpair, |
|
348 |
®ion, hsp, match->portal->calc, |
|
349 |
&prefix); |
|
350 |
start = prefix; |
|
351 |
length = hsp->cobs - prefix; |
|
352 |
} else { |
|
353 |
component = SAR_find_end_component(hpair, |
|
354 |
®ion, hsp, match->portal->calc, |
|
355 |
&suffix); |
|
356 |
start = hsp->cobs; |
|
357 |
length = hsp->length - hsp->cobs - suffix; |
|
358 |
}
|
|
359 |
if(length && (sas->hsp_quality > 0.0)){ |
|
360 |
SAR_HSP_quality(hpair, match->portal->calc, |
|
361 |
hsp, component, start, length, |
|
362 |
&half_score, &max_score); |
|
363 |
if((((gdouble)half_score/(gdouble)max_score) * 100.0) |
|
364 |
< sas->hsp_quality) |
|
365 |
return NULL; |
|
366 |
}
|
|
367 |
sar_terminal = g_new(SAR_Terminal, 1); |
|
368 |
sar_terminal->region = Region_copy(®ion); |
|
369 |
sar_terminal->component = component; |
|
370 |
return sar_terminal; |
|
371 |
}
|
|
372 |
||
373 |
void SAR_Terminal_destroy(SAR_Terminal *sar_terminal){ |
|
374 |
g_assert(sar_terminal); |
|
375 |
Region_destroy(sar_terminal->region); |
|
376 |
g_free(sar_terminal); |
|
377 |
return; |
|
378 |
}
|
|
379 |
||
380 |
C4_Score SAR_Terminal_find_bound(SAR_Terminal *sar_terminal, |
|
381 |
Heuristic_Bound *heuristic_bound){ |
|
382 |
g_assert(sar_terminal->region->query_length >= 0); |
|
383 |
g_assert(sar_terminal->region->target_length >= 0); |
|
384 |
g_assert(sar_terminal->region->query_length |
|
385 |
<= heuristic_bound->query_range); |
|
386 |
g_assert(sar_terminal->region->target_length |
|
387 |
<= heuristic_bound->target_range); |
|
388 |
return heuristic_bound->matrix[sar_terminal->region->query_length] |
|
389 |
[sar_terminal->region->target_length] |
|
390 |
- sar_terminal->component; |
|
391 |
}
|
|
392 |
||
393 |
C4_Score SAR_Terminal_find_score(SAR_Terminal *sar_terminal, |
|
394 |
Optimal *optimal, HPair *hpair){ |
|
395 |
return Optimal_find_score(optimal, sar_terminal->region, |
|
396 |
hpair->user_data, hpair->subopt) |
|
397 |
- sar_terminal->component; |
|
398 |
}
|
|
399 |
||
400 |
/**/
|
|
401 |
||
402 |
static void SAR_reduce_mid_overlap(HPair *hpair, HSP *src, HSP *dst, |
|
403 |
Region *region, C4_Calc *src_calc, C4_Calc *dst_calc){ |
|
404 |
register C4_Score src_total, dst_total, max_total; |
|
405 |
register gint src_query_pos, src_target_pos, |
|
406 |
dst_query_pos, dst_target_pos; |
|
407 |
register gint max_src_query_pos, max_src_target_pos, |
|
408 |
max_dst_query_pos, max_dst_target_pos, |
|
409 |
max_dist_from_centre; |
|
410 |
/* Reverse up dst region, counting dst_total */
|
|
411 |
if(!(region->query_length + region->target_length)) |
|
412 |
return; |
|
413 |
src_total = dst_total = 0; |
|
414 |
dst_query_pos = Region_query_end(region) |
|
415 |
- HSP_query_advance(dst); |
|
416 |
dst_target_pos = Region_target_end(region) |
|
417 |
- HSP_target_advance(dst); |
|
418 |
while((dst_query_pos >= region->query_start) |
|
419 |
&& (dst_target_pos >= region->target_start) |
|
420 |
&& (dst_query_pos >= dst->query_start) |
|
421 |
&& (dst_target_pos >= dst->target_start)){ |
|
422 |
dst_total += C4_Calc_score(dst_calc, |
|
423 |
dst_query_pos, dst_target_pos, |
|
424 |
hpair->user_data); |
|
425 |
dst_query_pos -= HSP_query_advance(dst); |
|
426 |
dst_target_pos -= HSP_target_advance(dst); |
|
427 |
}
|
|
428 |
dst_query_pos += HSP_query_advance(dst); |
|
429 |
dst_target_pos += HSP_target_advance(dst); |
|
430 |
/* Go down src region, storing max total */
|
|
431 |
src_query_pos = region->query_start; |
|
432 |
src_target_pos = region->target_start; |
|
433 |
max_total = dst_total; |
|
434 |
max_src_query_pos = src_query_pos; |
|
435 |
max_src_target_pos = src_target_pos; |
|
436 |
max_dst_query_pos = dst_query_pos; |
|
437 |
max_dst_target_pos = dst_target_pos; |
|
438 |
max_dist_from_centre = Region_query_end(region) |
|
439 |
- src_query_pos; |
|
440 |
while((src_query_pos < Region_query_end(region)) |
|
441 |
&& (src_target_pos < Region_target_end(region)) |
|
442 |
&& (src_query_pos < HSP_query_end(src)) |
|
443 |
&& (src_target_pos < HSP_target_end(src))){ |
|
444 |
src_total += C4_Calc_score(src_calc, |
|
445 |
src_query_pos, src_target_pos, |
|
446 |
hpair->user_data); |
|
447 |
while((src_query_pos >= dst_query_pos) |
|
448 |
|| (src_target_pos >= dst_target_pos)){ |
|
449 |
dst_total -= C4_Calc_score(dst_calc, |
|
450 |
dst_query_pos, dst_target_pos, |
|
451 |
hpair->user_data); |
|
452 |
dst_query_pos += HSP_query_advance(dst); |
|
453 |
dst_target_pos += HSP_target_advance(dst); |
|
454 |
}
|
|
455 |
if(max_total <= (src_total + dst_total)){ |
|
456 |
/* If the score better
|
|
457 |
* or the distance from the overlap centre is less
|
|
458 |
*/
|
|
459 |
if((max_total < (src_total + dst_total)) |
|
460 |
|| (ABS(Region_query_end(region)-src_query_pos) |
|
461 |
< max_dist_from_centre)){ |
|
462 |
max_dist_from_centre = ABS(Region_query_end(region) |
|
463 |
- src_query_pos); |
|
464 |
max_total = (src_total + dst_total); |
|
465 |
max_src_query_pos = src_query_pos; |
|
466 |
max_src_target_pos = src_target_pos; |
|
467 |
max_dst_query_pos = dst_query_pos; |
|
468 |
max_dst_target_pos = dst_target_pos; |
|
469 |
}
|
|
470 |
}
|
|
471 |
src_query_pos += HSP_query_advance(src); |
|
472 |
src_target_pos += HSP_target_advance(src); |
|
473 |
}
|
|
474 |
/**/
|
|
475 |
region->query_start = max_src_query_pos; |
|
476 |
region->target_start = max_src_target_pos; |
|
477 |
region->query_length = max_dst_query_pos |
|
478 |
- max_src_query_pos; |
|
479 |
region->target_length = max_dst_target_pos |
|
480 |
- max_src_target_pos; |
|
481 |
g_assert(Region_is_valid(region)); |
|
482 |
return; |
|
483 |
}
|
|
484 |
||
485 |
static void SAR_find_end_box(HPair *hpair, HSP *src, HSP *dst, |
|
486 |
Region *region, Region *cobs_box, |
|
487 |
C4_Calc *src_calc, C4_Calc *dst_calc){ |
|
488 |
register gint |
|
489 |
query_overlap = (HSP_query_end(src) - dst->query_start), |
|
490 |
target_overlap = (HSP_target_end(src) - dst->target_start); |
|
491 |
register gint src_query_move = 0, src_target_move = 0, |
|
492 |
dst_query_move = 0, dst_target_move = 0; |
|
493 |
region->query_start = MIN(HSP_query_end(src), dst->query_start); |
|
494 |
region->target_start = MIN(HSP_target_end(src), dst->target_start); |
|
495 |
region->query_length = MAX(HSP_query_end(src), dst->query_start) |
|
496 |
- region->query_start; |
|
497 |
region->target_length = MAX(HSP_target_end(src), dst->target_start) |
|
498 |
- region->target_start; |
|
499 |
if((query_overlap > 0) || (target_overlap > 0)){ |
|
500 |
/* Snap end_box to HSPs within cobs */
|
|
501 |
src_query_move = region->query_start |
|
502 |
- cobs_box->query_start; |
|
503 |
src_target_move = region->target_start |
|
504 |
- cobs_box->target_start; |
|
505 |
if((src_query_move <= 0) || (src_target_move <= 0)){ |
|
506 |
src_query_move = src_target_move = 0; |
|
507 |
} else { |
|
508 |
src_query_move -= (src_query_move % HSP_query_advance(src)); |
|
509 |
src_target_move -= (src_target_move |
|
510 |
% HSP_target_advance(src)); |
|
511 |
if((src_query_move / HSP_query_advance(src)) |
|
512 |
< (src_target_move / HSP_target_advance(src))){ |
|
513 |
src_target_move = (src_query_move |
|
514 |
/ HSP_query_advance(src)) |
|
515 |
* HSP_target_advance(src); |
|
516 |
} else { |
|
517 |
src_query_move = (src_target_move |
|
518 |
/ HSP_target_advance(src)) |
|
519 |
* HSP_query_advance(src); |
|
520 |
}
|
|
521 |
}
|
|
522 |
/**/
|
|
523 |
dst_query_move = Region_query_end(cobs_box) |
|
524 |
- Region_query_end(region); |
|
525 |
dst_target_move = Region_target_end(cobs_box) |
|
526 |
- Region_target_end(region); |
|
527 |
if((dst_query_move <= 0) || (dst_target_move <= 0)){ |
|
528 |
dst_query_move = dst_target_move = 0; |
|
529 |
} else { |
|
530 |
dst_query_move -= (dst_query_move |
|
531 |
% HSP_query_advance(dst)); |
|
532 |
dst_target_move -= (dst_target_move |
|
533 |
% HSP_target_advance(dst)); |
|
534 |
if((dst_query_move / HSP_query_advance(dst)) |
|
535 |
< (dst_target_move / HSP_target_advance(dst))){ |
|
536 |
dst_target_move = (dst_query_move |
|
537 |
/ HSP_query_advance(dst)) |
|
538 |
* HSP_target_advance(dst); |
|
539 |
} else { |
|
540 |
dst_query_move = (dst_target_move |
|
541 |
/ HSP_target_advance(dst)) |
|
542 |
* HSP_query_advance(dst); |
|
543 |
}
|
|
544 |
}
|
|
545 |
region->query_start = cobs_box->query_start |
|
546 |
+ src_query_move; |
|
547 |
region->target_start = cobs_box->target_start |
|
548 |
+ src_target_move; |
|
549 |
region->query_length = Region_query_end(cobs_box) |
|
550 |
- dst_query_move |
|
551 |
- region->query_start; |
|
552 |
region->target_length = Region_target_end(cobs_box) |
|
553 |
- dst_target_move |
|
554 |
- region->target_start; |
|
555 |
g_assert(SAR_region_valid_hsp_entry(region, src)); |
|
556 |
g_assert(SAR_region_valid_hsp_exit(region, dst)); |
|
557 |
SAR_reduce_mid_overlap(hpair, src, dst, region, |
|
558 |
src_calc, dst_calc); |
|
559 |
}
|
|
560 |
g_assert(SAR_region_valid_hsp_entry(region, src)); |
|
561 |
g_assert(SAR_region_valid_hsp_exit(region, dst)); |
|
562 |
return; |
|
563 |
}
|
|
564 |
||
565 |
static gboolean SAR_find_cobs_box(HSP *src, HSP *dst, Region *region){ |
|
566 |
region->query_start = HSP_query_cobs(src); |
|
567 |
region->target_start = HSP_target_cobs(src); |
|
568 |
region->query_length = HSP_query_cobs(dst) |
|
569 |
- region->query_start; |
|
570 |
region->target_length = HSP_target_cobs(dst) |
|
571 |
- region->target_start; |
|
572 |
if(region->query_length <= 0) |
|
573 |
return FALSE; |
|
574 |
if(region->target_length <= 0) |
|
575 |
return FALSE; |
|
576 |
return TRUE; |
|
577 |
}
|
|
578 |
||
579 |
static gboolean SAR_Join_calculate_region(HPair *hpair, |
|
580 |
HSP *src, HSP *dst, |
|
581 |
Region *region, |
|
582 |
Heuristic_Pair *pair){ |
|
583 |
register gint to_shrink; |
|
584 |
Region outer_box; |
|
585 |
/* Find outer_box (cobs box) */
|
|
586 |
if(!SAR_find_cobs_box(src, dst, &outer_box)) |
|
587 |
return FALSE; |
|
588 |
/* Find inner_box (end_box) */
|
|
589 |
SAR_find_end_box(hpair, src, dst, region, &outer_box, |
|
590 |
pair->src->portal->calc, pair->dst->portal->calc); |
|
591 |
/* Check within external range */
|
|
592 |
if(region->query_length > (pair->join->src_range->external_query |
|
593 |
+ pair->join->dst_range->external_query)) |
|
594 |
return FALSE; |
|
595 |
if(region->target_length > (pair->join->src_range->external_target |
|
596 |
+ pair->join->dst_range->external_target)) |
|
597 |
return FALSE; |
|
598 |
/* Grow inner_region by internal_range */
|
|
599 |
region->query_start -= pair->join->src_range->internal_query; |
|
600 |
region->query_length += (pair->join->src_range->internal_query |
|
601 |
+ pair->join->dst_range->internal_query); |
|
602 |
region->target_start -= pair->join->src_range->internal_target; |
|
603 |
region->target_length += (pair->join->src_range->internal_target |
|
604 |
+ pair->join->dst_range->internal_target); |
|
605 |
/* Trim inner_box to intersection (inner,outer) */
|
|
606 |
to_shrink = outer_box.query_start |
|
607 |
- region->query_start; |
|
608 |
if(to_shrink > 0){ |
|
609 |
region->query_start += to_shrink; |
|
610 |
region->query_length -= to_shrink; |
|
611 |
}
|
|
612 |
to_shrink = outer_box.target_start |
|
613 |
- region->target_start; |
|
614 |
if(to_shrink > 0){ |
|
615 |
region->target_start += to_shrink; |
|
616 |
region->target_length -= to_shrink; |
|
617 |
}
|
|
618 |
to_shrink = Region_query_end(region) |
|
619 |
- Region_query_end(&outer_box); |
|
620 |
if(to_shrink > 0) |
|
621 |
region->query_length -= to_shrink; |
|
622 |
to_shrink = Region_target_end(region) |
|
623 |
- Region_target_end(&outer_box); |
|
624 |
if(to_shrink > 0) |
|
625 |
region->target_length -= to_shrink; |
|
626 |
/* Check region not empty */
|
|
627 |
if(region->query_length < 1) |
|
628 |
return FALSE; |
|
629 |
if(region->target_length < 1) |
|
630 |
return FALSE; |
|
631 |
/* Check region is still on HSP boundaries */
|
|
632 |
g_assert(SAR_region_valid_hsp_entry(region, src)); |
|
633 |
g_assert(SAR_region_valid_hsp_exit(region, dst)); |
|
634 |
return TRUE; |
|
635 |
}
|
|
636 |
||
637 |
SAR_Join *SAR_Join_create(HSP *src_hsp, HSP *dst_hsp, HPair *hpair, |
|
638 |
Heuristic_Pair *pair){ |
|
639 |
register SAR_Join *sar_join; |
|
640 |
register C4_Score src_component, dst_component; |
|
641 |
register gint src_length, dst_length; |
|
642 |
gint prefix, suffix, src_half_score, dst_half_score, |
|
643 |
src_max_score, dst_max_score; |
|
644 |
Region region; |
|
645 |
register SAR_ArgumentSet *sas = SAR_ArgumentSet_create(NULL); |
|
646 |
if(!SAR_Join_calculate_region(hpair, src_hsp, dst_hsp, ®ion, |
|
647 |
pair)) |
|
648 |
return NULL; |
|
649 |
src_component = SAR_find_end_component(hpair, |
|
650 |
®ion, src_hsp, pair->src->portal->calc, &suffix); |
|
651 |
dst_component = SAR_find_start_component(hpair, |
|
652 |
®ion, dst_hsp, pair->dst->portal->calc, &prefix); |
|
653 |
src_length = src_hsp->length - src_hsp->cobs - suffix; |
|
654 |
dst_length = dst_hsp->cobs - prefix; |
|
655 |
if((src_length + dst_length) && (sas->hsp_quality > 0.0)){ |
|
656 |
SAR_HSP_quality(hpair, pair->src->portal->calc, |
|
657 |
src_hsp, src_component, |
|
658 |
src_hsp->cobs, src_length, |
|
659 |
&src_half_score, &src_max_score); |
|
660 |
SAR_HSP_quality(hpair, pair->dst->portal->calc, |
|
661 |
dst_hsp, dst_component, |
|
662 |
prefix, dst_length, |
|
663 |
&dst_half_score, &dst_max_score); |
|
664 |
if(((((gdouble)(src_half_score+dst_half_score)) |
|
665 |
/ ((gdouble)(src_max_score+dst_max_score))) * 100.0) |
|
666 |
< sas->hsp_quality) |
|
667 |
return NULL; |
|
668 |
}
|
|
669 |
sar_join = g_new(SAR_Join, 1); |
|
670 |
sar_join->region = Region_copy(®ion); |
|
671 |
sar_join->src_component = src_component; |
|
672 |
sar_join->dst_component = dst_component; |
|
673 |
sar_join->pair = pair; |
|
674 |
return sar_join; |
|
675 |
}
|
|
676 |
||
677 |
void SAR_Join_destroy(SAR_Join *sar_join){ |
|
678 |
g_assert(sar_join); |
|
679 |
Region_destroy(sar_join->region); |
|
680 |
g_free(sar_join); |
|
681 |
return; |
|
682 |
}
|
|
683 |
||
684 |
C4_Score SAR_Join_find_bound(SAR_Join *sar_join){ |
|
685 |
g_assert(sar_join->region->query_length >= 0); |
|
686 |
g_assert(sar_join->region->target_length >= 0); |
|
687 |
g_assert(sar_join->region->query_length |
|
688 |
<= sar_join->pair->join->bound->query_range); |
|
689 |
g_assert(sar_join->region->target_length |
|
690 |
<= sar_join->pair->join->bound->target_range); |
|
691 |
return sar_join->pair->join->bound->matrix |
|
692 |
[sar_join->region->query_length] |
|
693 |
[sar_join->region->target_length] |
|
694 |
- (sar_join->src_component + sar_join->dst_component); |
|
695 |
}
|
|
696 |
||
697 |
C4_Score SAR_Join_find_score(SAR_Join *sar_join, HPair *hpair){ |
|
698 |
return Optimal_find_score(sar_join->pair->join->optimal, |
|
699 |
sar_join->region, |
|
700 |
hpair->user_data, hpair->subopt) |
|
701 |
- (sar_join->src_component + sar_join->dst_component); |
|
702 |
}
|
|
703 |
||
704 |
/**/
|
|
705 |
||
706 |
static gboolean SAR_Span_calculate_regions(HPair *hpair, |
|
707 |
HSP *src, HSP *dst, |
|
708 |
Heuristic_Span *span, |
|
709 |
Region *src_region, |
|
710 |
Region *dst_region, |
|
711 |
C4_Calc *src_calc, |
|
712 |
C4_Calc *dst_calc){ |
|
713 |
register gint to_shrink; |
|
714 |
Region outer_box, end_box; |
|
715 |
/* Find outer_box (cobs box) */
|
|
716 |
if(!SAR_find_cobs_box(src, dst, &outer_box)) |
|
717 |
return FALSE; |
|
718 |
/* Find inner_box (end_box) */
|
|
719 |
SAR_find_end_box(hpair, src, dst, &end_box, &outer_box, |
|
720 |
src_calc, dst_calc); |
|
721 |
/* Set src_region to entry points to end_box */
|
|
722 |
src_region->query_start = end_box.query_start; |
|
723 |
src_region->target_start = end_box.target_start; |
|
724 |
src_region->query_length = 0; |
|
725 |
src_region->target_length = 0; |
|
726 |
/* Set dst_region to exit points from end_box */
|
|
727 |
dst_region->query_start = Region_query_end(&end_box); |
|
728 |
dst_region->target_start = Region_target_end(&end_box); |
|
729 |
dst_region->query_length = 0; |
|
730 |
dst_region->target_length = 0; |
|
731 |
/* Grow src_region by external range */
|
|
732 |
src_region->query_length += span->src_range->external_query; |
|
733 |
src_region->target_length += span->src_range->external_target; |
|
734 |
/* Grow dst_region by external range */
|
|
735 |
dst_region->query_start -= span->dst_range->external_query; |
|
736 |
dst_region->target_start -= span->dst_range->external_target; |
|
737 |
dst_region->query_length += span->dst_range->external_query; |
|
738 |
dst_region->target_length += span->dst_range->external_target; |
|
739 |
/* Grow inner_box on HSPs by internal range */
|
|
740 |
src_region->query_start -= span->src_range->internal_query; |
|
741 |
src_region->query_length += span->src_range->internal_query; |
|
742 |
src_region->target_start -= span->src_range->internal_target; |
|
743 |
src_region->target_length += span->src_range->internal_target; |
|
744 |
/**/
|
|
745 |
/* Grow inner_box on HSPs by internal range */
|
|
746 |
dst_region->query_length += span->dst_range->internal_query; |
|
747 |
dst_region->target_length += span->dst_range->internal_target; |
|
748 |
/**/
|
|
749 |
/* Trim inner_box to intersection (inner,outer) */
|
|
750 |
if(Region_query_end(src_region) > Region_query_end(&outer_box)){ |
|
751 |
src_region->query_length -= (Region_query_end(src_region) |
|
752 |
- Region_query_end(&outer_box)); |
|
753 |
}
|
|
754 |
if(Region_target_end(src_region) > Region_target_end(&outer_box)){ |
|
755 |
src_region->target_length -= (Region_target_end(src_region) |
|
756 |
- Region_target_end(&outer_box)); |
|
757 |
}
|
|
758 |
to_shrink = outer_box.query_start |
|
759 |
- src_region->query_start; |
|
760 |
if(to_shrink > 0){ |
|
761 |
src_region->query_start += to_shrink; |
|
762 |
src_region->query_length -= to_shrink; |
|
763 |
}
|
|
764 |
to_shrink = outer_box.target_start |
|
765 |
- src_region->target_start; |
|
766 |
if(to_shrink > 0){ |
|
767 |
src_region->target_start += to_shrink; |
|
768 |
src_region->target_length -= to_shrink; |
|
769 |
}
|
|
770 |
/* Trim inner_box to intersection (inner,outer) */
|
|
771 |
if(dst_region->query_start < outer_box.query_start){ |
|
772 |
dst_region->query_length -= (outer_box.query_start |
|
773 |
- dst_region->query_start); |
|
774 |
dst_region->query_start = outer_box.query_start; |
|
775 |
}
|
|
776 |
if(dst_region->target_start < outer_box.target_start){ |
|
777 |
dst_region->target_length -= (outer_box.target_start |
|
778 |
- dst_region->target_start); |
|
779 |
dst_region->target_start = outer_box.target_start; |
|
780 |
}
|
|
781 |
to_shrink = Region_query_end(dst_region) |
|
782 |
- Region_query_end(&outer_box); |
|
783 |
if(to_shrink > 0) |
|
784 |
dst_region->query_length -= to_shrink; |
|
785 |
to_shrink = Region_target_end(dst_region) |
|
786 |
- Region_target_end(&outer_box); |
|
787 |
if(to_shrink > 0) |
|
788 |
dst_region->target_length -= to_shrink; |
|
789 |
/* Check size not zero */
|
|
790 |
if(src_region->query_length < 1) |
|
791 |
return FALSE; |
|
792 |
if(src_region->target_length < 1) |
|
793 |
return FALSE; |
|
794 |
if(dst_region->query_length < 1) |
|
795 |
return FALSE; |
|
796 |
if(dst_region->target_length < 1) |
|
797 |
return FALSE; |
|
798 |
/* Check a path is possible */
|
|
799 |
if((dst_region->query_start - Region_query_end(src_region)) |
|
800 |
> span->span->max_query) |
|
801 |
return FALSE; |
|
802 |
if((dst_region->target_start - Region_target_end(src_region)) |
|
803 |
> span->span->max_target) |
|
804 |
return FALSE; |
|
805 |
/* Check region are still on HSP boundaries */
|
|
806 |
g_assert(SAR_region_valid_hsp_entry(src_region, src)); |
|
807 |
g_assert(SAR_region_valid_hsp_exit(dst_region, dst)); |
|
808 |
return TRUE; |
|
809 |
}
|
|
810 |
||
811 |
SAR_Span *SAR_Span_create(HSP *src_hsp, HSP *dst_hsp, HPair *hpair, |
|
812 |
Heuristic_Span *span, |
|
813 |
C4_Portal *src_portal, C4_Portal *dst_portal){ |
|
814 |
register SAR_Span *sar_span; |
|
815 |
register SAR_ArgumentSet *sas = SAR_ArgumentSet_create(NULL); |
|
816 |
register C4_Score src_component, dst_component; |
|
817 |
register gint src_length, dst_length; |
|
818 |
gint suffix, prefix, src_half_score, dst_half_score, |
|
819 |
src_max_score, dst_max_score; |
|
820 |
Region src_region, dst_region; |
|
821 |
if(!SAR_Span_calculate_regions(hpair, src_hsp, dst_hsp, span, |
|
822 |
&src_region, &dst_region, |
|
823 |
src_portal->calc, dst_portal->calc)) |
|
824 |
return NULL; |
|
825 |
src_component = SAR_find_end_component(hpair, |
|
826 |
&src_region, src_hsp, src_portal->calc, &suffix); |
|
827 |
dst_component = SAR_find_start_component(hpair, |
|
828 |
&dst_region, dst_hsp, dst_portal->calc, &prefix); |
|
829 |
src_length = src_hsp->length - src_hsp->cobs - suffix; |
|
830 |
dst_length = dst_hsp->cobs - prefix; |
|
831 |
if((src_length + dst_length) && (sas->hsp_quality > 0.0)){ |
|
832 |
SAR_HSP_quality(hpair, src_portal->calc, |
|
833 |
src_hsp, src_component, |
|
834 |
src_hsp->cobs, src_length, |
|
835 |
&src_half_score, &src_max_score); |
|
836 |
SAR_HSP_quality(hpair, dst_portal->calc, |
|
837 |
dst_hsp, dst_component, |
|
838 |
prefix, dst_length, |
|
839 |
&dst_half_score, &dst_max_score); |
|
840 |
if(((((gdouble)(src_half_score+dst_half_score)) |
|
841 |
/ ((gdouble)(src_max_score+dst_max_score))) * 100.0) |
|
842 |
< sas->hsp_quality) |
|
843 |
return NULL; |
|
844 |
}
|
|
845 |
if((100.0) < sas->hsp_quality) |
|
846 |
return NULL; |
|
847 |
sar_span = g_new(SAR_Span, 1); |
|
848 |
sar_span->src_region = Region_copy(&src_region); |
|
849 |
sar_span->dst_region = Region_copy(&dst_region); |
|
850 |
sar_span->src_component = src_component; |
|
851 |
sar_span->dst_component = dst_component; |
|
852 |
sar_span->span = span; |
|
853 |
return sar_span; |
|
854 |
}
|
|
855 |
||
856 |
void SAR_Span_destroy(SAR_Span *sar_span){ |
|
857 |
g_assert(sar_span); |
|
858 |
Region_destroy(sar_span->src_region); |
|
859 |
Region_destroy(sar_span->dst_region); |
|
860 |
g_free(sar_span); |
|
861 |
return; |
|
862 |
}
|
|
863 |
||
864 |
C4_Score SAR_Span_find_bound(SAR_Span *sar_span){ |
|
865 |
register C4_Score src_raw_score, dst_raw_score; |
|
866 |
register gint query_overlap, target_overlap; |
|
867 |
g_assert(sar_span->src_region->query_length |
|
868 |
<= sar_span->span->src_bound->query_range); |
|
869 |
g_assert(sar_span->src_region->target_length |
|
870 |
<= sar_span->span->src_bound->target_range); |
|
871 |
g_assert(sar_span->dst_region->query_length |
|
872 |
<= sar_span->span->dst_bound->query_range); |
|
873 |
g_assert(sar_span->dst_region->target_length |
|
874 |
<= sar_span->span->dst_bound->target_range); |
|
875 |
query_overlap = Region_query_end(sar_span->src_region) |
|
876 |
- sar_span->dst_region->query_start; |
|
877 |
target_overlap = Region_target_end(sar_span->src_region) |
|
878 |
- sar_span->dst_region->target_start; |
|
879 |
if(query_overlap < 0) |
|
880 |
query_overlap = 0; |
|
881 |
if(target_overlap < 0) |
|
882 |
target_overlap = 0; |
|
883 |
/**/
|
|
884 |
/* Get the source bounds */
|
|
885 |
src_raw_score = sar_span->span->src_bound->matrix |
|
886 |
[sar_span->src_region->query_length-(query_overlap>>1)] |
|
887 |
[sar_span->src_region->target_length-(target_overlap>>1)]; |
|
888 |
dst_raw_score = sar_span->span->dst_bound->matrix |
|
889 |
[sar_span->dst_region->query_length-(query_overlap>>1) |
|
890 |
-(query_overlap & 1)] |
|
891 |
[sar_span->dst_region->target_length-(target_overlap>>1) |
|
892 |
-(target_overlap & 1)]; |
|
893 |
/* Calculate the bound score */
|
|
894 |
return (src_raw_score - sar_span->src_component) |
|
895 |
+ (dst_raw_score - sar_span->dst_component); |
|
896 |
}
|
|
897 |
||
898 |
C4_Score SAR_Span_find_score(SAR_Span *sar_span, HPair *hpair){ |
|
899 |
register C4_Score score; |
|
900 |
register Heuristic_Data *heuristic_data; |
|
901 |
Heuristic_Span_register(sar_span->span, sar_span->src_region, |
|
902 |
sar_span->dst_region); |
|
903 |
/* Calculate src optimal, reporting to the integration matrix */
|
|
904 |
heuristic_data = hpair->user_data; |
|
905 |
heuristic_data->heuristic_span = sar_span->span; |
|
906 |
score = Optimal_find_score(sar_span->span->src_optimal, |
|
907 |
sar_span->src_region, hpair->user_data, |
|
908 |
hpair->subopt); |
|
909 |
Heuristic_Span_integrate(sar_span->span, sar_span->src_region, |
|
910 |
sar_span->dst_region); |
|
911 |
/* Calculate dst optimal, starting from the integration matrix */
|
|
912 |
score = Optimal_find_score(sar_span->span->dst_optimal, |
|
913 |
sar_span->dst_region, hpair->user_data, |
|
914 |
hpair->subopt); |
|
915 |
heuristic_data->heuristic_span = NULL; |
|
916 |
return score - (sar_span->src_component + sar_span->dst_component); |
|
917 |
}
|
|
918 |
||
919 |
/**/
|
|
920 |
||
921 |
SAR_Alignment *SAR_Alignment_create(SAR_Terminal *sar_start, |
|
922 |
SAR_Terminal *sar_end, |
|
923 |
Heuristic_Match *start_match, |
|
924 |
Heuristic_Match *end_match, |
|
925 |
HPair *hpair, C4_Score score){ |
|
926 |
register SAR_Alignment *sar_alignment = g_new(SAR_Alignment, 1); |
|
927 |
register Region *align_region; |
|
928 |
register Alignment *start_alignment |
|
929 |
= Optimal_find_path(start_match->start_terminal->optimal, |
|
930 |
sar_start->region, hpair->user_data, |
|
931 |
C4_IMPOSSIBLY_LOW_SCORE, hpair->subopt); |
|
932 |
g_assert(start_alignment); |
|
933 |
sar_alignment->end_alignment |
|
934 |
= Optimal_find_path(end_match->end_terminal->optimal, |
|
935 |
sar_end->region, hpair->user_data, |
|
936 |
C4_IMPOSSIBLY_LOW_SCORE, hpair->subopt); |
|
937 |
sar_alignment->end_region = Region_share(sar_end->region); |
|
938 |
sar_alignment->end_match = end_match; |
|
939 |
g_assert(sar_alignment->end_alignment); |
|
940 |
align_region = Region_create(start_alignment->region->query_start, |
|
941 |
start_alignment->region->target_start, |
|
942 |
Region_query_end(sar_alignment->end_alignment->region) |
|
943 |
- start_alignment->region->query_start, |
|
944 |
Region_target_end(sar_alignment->end_alignment->region) |
|
945 |
- start_alignment->region->target_start); |
|
946 |
sar_alignment->alignment = Alignment_create(hpair->heuristic->model, |
|
947 |
align_region, score); |
|
948 |
Alignment_import_derived(sar_alignment->alignment, start_alignment, |
|
949 |
start_match->start_terminal->terminal_model); |
|
950 |
sar_alignment->hpair = hpair; |
|
951 |
sar_alignment->last_region = Region_share(start_alignment->region); |
|
952 |
sar_alignment->last_hsp = NULL; |
|
953 |
sar_alignment->last_match = NULL; |
|
954 |
Alignment_destroy(start_alignment); |
|
955 |
Region_destroy(align_region); |
|
956 |
return sar_alignment; |
|
957 |
}
|
|
958 |
||
959 |
void SAR_Alignment_destroy(SAR_Alignment *sar_alignment){ |
|
960 |
g_assert(sar_alignment); |
|
961 |
g_assert(sar_alignment->alignment); |
|
962 |
Alignment_destroy(sar_alignment->alignment); |
|
963 |
if(sar_alignment->end_alignment) |
|
964 |
Alignment_destroy(sar_alignment->end_alignment); |
|
965 |
if(sar_alignment->end_region) |
|
966 |
Region_destroy(sar_alignment->end_region); |
|
967 |
if(sar_alignment->last_region) |
|
968 |
Region_destroy(sar_alignment->last_region); |
|
969 |
g_free(sar_alignment); |
|
970 |
return; |
|
971 |
}
|
|
972 |
||
973 |
static void SAR_Alignment_add_region(SAR_Alignment *sar_alignment, |
|
974 |
Region *src_region, Region *dst_region){ |
|
975 |
register gint suffix; |
|
976 |
g_assert(sar_alignment->last_hsp); |
|
977 |
g_assert(sar_alignment->last_match); |
|
978 |
g_assert(!sar_alignment->last_region); |
|
979 |
suffix = (HSP_query_end(sar_alignment->last_hsp) |
|
980 |
- src_region->query_start) |
|
981 |
/ HSP_query_advance(sar_alignment->last_hsp); |
|
982 |
Alignment_add(sar_alignment->alignment, |
|
983 |
sar_alignment->last_match->transition, |
|
984 |
-suffix); |
|
985 |
sar_alignment->last_hsp = NULL; |
|
986 |
sar_alignment->last_match = NULL; |
|
987 |
sar_alignment->last_region = Region_share(dst_region); |
|
988 |
return; |
|
989 |
}
|
|
990 |
||
991 |
void SAR_Alignment_finalise(SAR_Alignment *sar_alignment){ |
|
992 |
Region seq_region; |
|
993 |
g_assert(sar_alignment->end_alignment); |
|
994 |
SAR_Alignment_add_region(sar_alignment, sar_alignment->end_region, |
|
995 |
sar_alignment->end_region); |
|
996 |
Alignment_import_derived(sar_alignment->alignment, |
|
997 |
sar_alignment->end_alignment, |
|
998 |
sar_alignment->end_match->end_terminal->terminal_model); |
|
999 |
Alignment_destroy(sar_alignment->end_alignment); |
|
1000 |
sar_alignment->end_alignment = NULL; |
|
1001 |
Region_destroy(sar_alignment->end_region); |
|
1002 |
sar_alignment->end_region = NULL; |
|
1003 |
Region_init_static(&seq_region, 0, 0, |
|
1004 |
sar_alignment->hpair->query_length, |
|
1005 |
sar_alignment->hpair->target_length); |
|
1006 |
g_assert(Alignment_is_valid(sar_alignment->alignment, &seq_region, |
|
1007 |
sar_alignment->hpair->user_data)); |
|
1008 |
return; |
|
1009 |
}
|
|
1010 |
||
1011 |
void SAR_Alignment_add_SAR_Join(SAR_Alignment *sar_alignment, |
|
1012 |
SAR_Join *sar_join){ |
|
1013 |
register Alignment *edge_alignment; |
|
1014 |
edge_alignment = Optimal_find_path( |
|
1015 |
sar_join->pair->join->optimal, |
|
1016 |
sar_join->region, sar_alignment->hpair->user_data, |
|
1017 |
C4_IMPOSSIBLY_LOW_SCORE, sar_alignment->hpair->subopt); |
|
1018 |
SAR_Alignment_add_region(sar_alignment, sar_join->region, |
|
1019 |
sar_join->region); |
|
1020 |
Alignment_import_derived(sar_alignment->alignment, edge_alignment, |
|
1021 |
sar_join->pair->join->join_model); |
|
1022 |
Alignment_destroy(edge_alignment); |
|
1023 |
return; |
|
1024 |
}
|
|
1025 |
||
1026 |
void SAR_Alignment_add_SAR_Span(SAR_Alignment *sar_alignment, |
|
1027 |
SAR_Span *sar_span){ |
|
1028 |
register Alignment *src_alignment, *dst_alignment; |
|
1029 |
register gint query_span_start, target_span_start, |
|
1030 |
query_span_end, target_span_end; |
|
1031 |
register Region *src_align_region; |
|
1032 |
register Heuristic_Data *heuristic_data |
|
1033 |
= sar_alignment->hpair->user_data; |
|
1034 |
heuristic_data->heuristic_span = sar_span->span; |
|
1035 |
Heuristic_Span_register(sar_span->span, sar_span->src_region, |
|
1036 |
sar_span->dst_region); |
|
1037 |
Optimal_find_score(sar_span->span->src_optimal, |
|
1038 |
sar_span->src_region, sar_alignment->hpair->user_data, |
|
1039 |
sar_alignment->hpair->subopt); |
|
1040 |
Heuristic_Span_integrate(sar_span->span, sar_span->src_region, |
|
1041 |
sar_span->dst_region); |
|
1042 |
dst_alignment = Optimal_find_path(sar_span->span->dst_optimal, |
|
1043 |
sar_span->dst_region, sar_alignment->hpair->user_data, |
|
1044 |
C4_IMPOSSIBLY_LOW_SCORE, sar_alignment->hpair->subopt); |
|
1045 |
/* Merge edge traceback */
|
|
1046 |
query_span_end = dst_alignment->region->query_start |
|
1047 |
- sar_span->dst_region->query_start; |
|
1048 |
target_span_end = dst_alignment->region->target_start |
|
1049 |
- sar_span->dst_region->target_start; |
|
1050 |
query_span_start
|
|
1051 |
= sar_span->span->dst_integration_matrix |
|
1052 |
[query_span_end][target_span_end].query_pos |
|
1053 |
- sar_span->src_region->query_start; |
|
1054 |
target_span_start
|
|
1055 |
= sar_span->span->dst_integration_matrix |
|
1056 |
[query_span_end][target_span_end].target_pos |
|
1057 |
- sar_span->src_region->target_start; |
|
1058 |
src_align_region = Region_create(sar_span->src_region->query_start, |
|
1059 |
sar_span->src_region->target_start, |
|
1060 |
query_span_start, |
|
1061 |
target_span_start); |
|
1062 |
src_alignment = Optimal_find_path( |
|
1063 |
sar_span->span->src_traceback_optimal, src_align_region, |
|
1064 |
sar_alignment->hpair->user_data, C4_IMPOSSIBLY_LOW_SCORE, |
|
1065 |
sar_alignment->hpair->subopt); |
|
1066 |
Region_destroy(src_align_region); |
|
1067 |
SAR_Alignment_add_region(sar_alignment, sar_span->src_region, |
|
1068 |
sar_span->dst_region); |
|
1069 |
Alignment_import_derived(sar_alignment->alignment, src_alignment, |
|
1070 |
sar_span->span->src_traceback_model); |
|
1071 |
/* Add traceback for the span */
|
|
1072 |
Heuristic_Span_add_traceback(sar_span->span, |
|
1073 |
sar_alignment->alignment, |
|
1074 |
dst_alignment->region->query_start |
|
1075 |
-Region_query_end(src_alignment->region), |
|
1076 |
dst_alignment->region->target_start |
|
1077 |
-Region_target_end(src_alignment->region)); |
|
1078 |
heuristic_data->heuristic_span = NULL; |
|
1079 |
Alignment_import_derived(sar_alignment->alignment, dst_alignment, |
|
1080 |
sar_span->span->dst_model); |
|
1081 |
Alignment_destroy(src_alignment); |
|
1082 |
Alignment_destroy(dst_alignment); |
|
1083 |
return; |
|
1084 |
}
|
|
1085 |
||
1086 |
void SAR_Alignment_add_HSP(SAR_Alignment *sar_alignment, HSP *hsp, |
|
1087 |
Heuristic_Match *match){ |
|
1088 |
register gint prefix; |
|
1089 |
g_assert(sar_alignment->last_region); |
|
1090 |
g_assert(!sar_alignment->last_hsp); |
|
1091 |
g_assert(!sar_alignment->last_match); |
|
1092 |
prefix = (Region_query_end(sar_alignment->last_region) |
|
1093 |
- hsp->query_start) / HSP_query_advance(hsp); |
|
1094 |
Alignment_add(sar_alignment->alignment, match->transition, |
|
1095 |
hsp->length - prefix); |
|
1096 |
Region_destroy(sar_alignment->last_region); |
|
1097 |
sar_alignment->last_region = NULL; |
|
1098 |
sar_alignment->last_hsp = hsp; |
|
1099 |
sar_alignment->last_match = match; |
|
1100 |
return; |
|
1101 |
}
|
|
1102 |