1
/*******************************************************
3
*******************************************************
4
Sep 26, 2011 xalip_sse41.c
5
*******************************************************
6
(C) 2011 Swiss Institute of Bioinformatics
7
Thierry Schuepbach (thierry.schuepbach@isb-sib.ch)
8
*******************************************************/
13
#include <smmintrin.h>
19
#define MAX(a,b) (a>b) ? a : b
20
#define MIN(a,b) (a<b) ? a : b
28
static void InitR(const int iseq, const size_t N1, const size_t N2, const size_t bseq, const size_t lseq,
29
union lScores * const restrict iop, union Positions * const restrict iom,
30
union Positions * const restrict ioi, const struct Profile * const restrict prf)
34
fprintf(stdout,"InitR called with index %i bseq %lu\n",iseq, bseq);
36
// Are we treating sequence index below given start?
37
register const ScoreTuple * restrict FirstSequenceProtein = ( iseq < bseq )
38
? &(prf->Scores.Insertion.FirstSequenceProtein[0])
39
: &(prf->Scores.Insertion.Transitions->From[EXTRA]);
40
const size_t FirstSequenceProteinAlignStep = (iseq < bseq) ? 1 : 4;
42
register __m128i * restrict pIOP = &(iop[0].xmm);
44
// Load FirstSequenceProtein
45
__m128i __FirstSequenceProtein = _mm_loadl_epi64((__m128i*) FirstSequenceProtein);
47
// Convert signed WORD into signed DWORD
48
__FirstSequenceProtein = _mm_cvtepi16_epi32(__FirstSequenceProtein);
51
*pIOP = __FirstSequenceProtein;
53
// Set KOPD ( this is SSE 4.1 )
54
KOPD = _mm_extract_epi32(__FirstSequenceProtein, DELETION);
56
register const TransitionScores * const restrict Transitions = prf->Scores.Insertion.Transitions;
57
register const short int * restrict pMatch = &prf->Scores.Match.Alphabet[_D];
58
const size_t AlignStep = prf->Scores.Match.AlignStep;
60
// Move to next profile First Sequence
61
FirstSequenceProtein += FirstSequenceProteinAlignStep;
64
for (unsigned int iprf=1; iprf<=(unsigned int) prf->Length; ++iprf) {
65
register const int KD = KOPD + (int) *pMatch;
68
// Transform KD into a vector
69
__m128i __KD = _mm_set1_epi32(KD);
71
__m128i __Transitions = _mm_loadl_epi64((__m128i*) &(Transitions[iprf].From[DELETION]));
72
// Convert signed WORD into signed DWORD
73
__Transitions = _mm_cvtepi16_epi32(__Transitions);
74
// Add KD to Transitions
75
__Transitions = _mm_add_epi32(__Transitions, __KD);
77
// Load FirstSequenceProtein
78
__m128i __FirstSequenceProtein = _mm_loadl_epi64((__m128i*) FirstSequenceProtein);
79
// Convert signed WORD into signed DWORD
80
__FirstSequenceProtein = _mm_cvtepi16_epi32(__FirstSequenceProtein);
82
// Move to next profile First Sequence
83
FirstSequenceProtein += FirstSequenceProteinAlignStep;
85
// Get maximum ( this is SSE 4.1 )
86
__m128i __max = _mm_max_epi32(__Transitions, __FirstSequenceProtein);
88
// Store IOPI and IOPM
92
// Set KOPD ( this is SSE 4.1 )
93
KOPD = _mm_extract_epi32(__max, DELETION);
97
union Positions TPOS __attribute__((aligned(16)));;
98
TPOS.Element.One = lseq + 1;
100
TPOS.Element.B = iseq + 1;
101
TPOS.Element.dummy = 0;
103
for (unsigned int iprf=0; iprf<(unsigned int) (N1-1); ++iprf) {
104
_mm_store_si128(&(iom[iprf].xmm), TPOS.xmm);
105
_mm_store_si128(&(ioi[iprf].xmm), TPOS.xmm);
108
if (N1 == 0) { fputs("BUG HERE N1 is NULL\n", stderr); exit(1);}
109
ioi[N1-1].xmm = TPOS.xmm; // Warning N1 > 0 ?
110
TPOS.Element.One = TPOS.Element.B;
111
TPOS.Element.Two = TPOS.Element.B;
112
iom[N1-1].xmm = TPOS.xmm;
114
for (unsigned int iprf=N1; iprf<(unsigned int)N2; ++iprf) {
115
_mm_store_si128(&(iom[iprf].xmm), TPOS.xmm);
116
_mm_store_si128(&(ioi[iprf].xmm), TPOS.xmm);
119
TPOS.Element.One = lseq + 1;
120
TPOS.Element.Two = 0;
122
for (unsigned int iprf=(unsigned int) N2; iprf<=(unsigned int)prf->Length; ++iprf) {
123
_mm_store_si128(&(iom[iprf].xmm), TPOS.xmm);
124
_mm_store_si128(&(ioi[iprf].xmm), TPOS.xmm);
128
static void nextR(const struct Profile * const restrict prf, const unsigned char * const restrict Sequence,
129
const int iseq, union lScores * const restrict iop, union Positions * const restrict iom,
130
union Positions * const restrict ioi,const int lseq, struct Alignment * const restrict alignment,
131
struct Minima * const restrict ifer, const _Bool Lock, const size_t N1, const size_t N2)
134
fprintf(stdout,"NextR called with iseq %i\n",iseq);
137
const unsigned int In = iseq + 1;
138
// WARNING: Fortran uses a 1 based index for sequence
139
const unsigned int SequenceIndex = (unsigned int) Sequence[iseq-1];
142
fputs("nextR_last should have been called\n", stderr);
146
// Disable match and insert vertices of protected region
148
iop[N1-1].Element[MATCH] = NLOW;
149
for (size_t iprf=N1; iprf<N2; ++iprf) {
150
iop[iprf].Element[MATCH] = NLOW;
151
iop[iprf].Element[INSERTION] = NLOW;
154
////////////////////////////////////////////////////////////////////////////////////////////
155
// Profile position 0
156
////////////////////////////////////////////////////////////////////////////////////////////
158
// Save previous match position
159
int Kopm = iop[0].Element[MATCH];
160
union Positions Kpos __attribute__((aligned(16)));
161
Kpos.xmm = iom[0].xmm;
163
const union Positions TEMPpos __attribute__((aligned(16))) = { lseq+1, 0, In, 0 };
165
const union Positions * restrict PTRpos[4];
166
union Positions Kiod;
167
PTRpos[0] = &TEMPpos;
171
// Get pointers to score data
172
const TransitionScores * const restrict Transitions = prf->Scores.Insertion.Transitions;
173
const short int * restrict Insertion = prf->Scores.Insertion.Alphabet;
174
const size_t AlignStep = prf->Scores.Insertion.AlignStep;
176
int Ki = iop[0].Element[INSERTION] + (int) Insertion[SequenceIndex];
179
int Ji = Ki + (int) Transitions[0].From[INSERTION].To[MATCH];
180
int itmp = (int) Transitions[0].From[EXTRA].To[MATCH];
182
iop[0].Element[MATCH] = Ji;
183
iom[0].xmm = ioi[0].xmm;
185
iop[0].Element[MATCH] = itmp;
186
iom[0].xmm = TEMPpos.xmm;
191
Ji = Ki + (int) Transitions[0].From[INSERTION].To[DELETION];
192
itmp = (int) Transitions[0].From[EXTRA].To[DELETION];
195
Kiod.xmm = ioi[0].xmm;
198
Kiod.xmm = TEMPpos.xmm;
201
// Insertion position
202
Ji = Ki + (int) Transitions[0].From[INSERTION].To[INSERTION];
203
itmp = (int) Transitions[0].From[EXTRA].To[INSERTION];
205
iop[0].Element[INSERTION] = Ji;
207
iop[0].Element[INSERTION] = itmp;
208
ioi[0].xmm = TEMPpos.xmm;
214
itmp = MIN(ioi[0].Element.B, iom[0].Element.B);
215
ifer->_c = MIN(itmp, Kiod.Element.B);
217
// Initialize alignment
218
union Positions Fpos __attribute__((aligned(16)));;
219
Fpos.Element.One = alignment->JAL1;
220
Fpos.Element.Two = alignment->JAL2;
221
Fpos.Element.B = alignment->JALB;
222
Fpos.Element.dummy = 0;
224
////////////////////////////////////////////////////////////////////////////////////////////
225
// Loop through rest of profile
226
////////////////////////////////////////////////////////////////////////////////////////////
227
const short int * restrict Match = prf->Scores.Match.Alphabet;
228
Insertion += AlignStep;
230
for (int iprf=1; iprf<=prf->Length; ++iprf) {
231
/////////////////////////////////////////////////////////////////////////////////////////
233
const register int KM = Kopm + (int) Match[SequenceIndex];
234
Kopm = iop[iprf].Element[MATCH];
236
__m128i __KM = _mm_set1_epi32(KM);
238
__m128i __TransitionsM = _mm_loadl_epi64((__m128i*) &(Transitions[iprf].From[MATCH]));
239
// Convert signed WORD into signed DWORD
240
__TransitionsM = _mm_cvtepi16_epi32(__TransitionsM);
241
// Add KM to Transitions
242
__TransitionsM = _mm_add_epi32(__TransitionsM, __KM);
245
/////////////////////////////////////////////////////////////////////////////////////////
247
const register int KI = iop[iprf].Element[INSERTION] + (int) Insertion[SequenceIndex];
248
// one could move on the seq index instead
250
__m128i __KI = _mm_set1_epi32(KI);
252
__m128i __TransitionsI = _mm_loadl_epi64((__m128i*) &(Transitions[iprf].From[INSERTION]));
253
// Convert signed WORD into signed DWORD
254
__TransitionsI = _mm_cvtepi16_epi32(__TransitionsI);
255
// Add KM to Transitions
256
__TransitionsI = _mm_add_epi32(__TransitionsI, __KI);
258
/////////////////////////////////////////////////////////////////////////////////////////
260
const register int KD = Kopd + (int) Match[_D];
262
__m128i __KD = _mm_set1_epi32(KD);
264
__m128i __TransitionsD = _mm_loadl_epi64((__m128i*) &(Transitions[iprf].From[DELETION]));
265
// Convert signed WORD into signed DWORD
266
__TransitionsD = _mm_cvtepi16_epi32(__TransitionsD);
267
// Add KM to Transitions
268
__TransitionsD = _mm_add_epi32(__TransitionsD, __KD);
270
/////////////////////////////////////////////////////////////////////////////////////////
273
__m128i __TransitionsX = _mm_loadl_epi64((__m128i*) &(Transitions[iprf].From[EXTRA]));
274
// Convert signed WORD into signed DWORD
275
__TransitionsX = _mm_cvtepi16_epi32(__TransitionsX);
277
// Insert NLOW into Extension vector -> done in io.c
278
//__TransitionsX = _mm_insert_epi32(__TransitionsX, NLOW, DUMMY);
280
// Move to next profile index
282
Insertion += AlignStep;
284
/////////////////////////////////////////////////////////////////////////////////////////
285
// Compute Maxima (Fortran Nstep function)
286
__m128i __Index = (__m128i) _mm_setzero_si128();
287
__m128i __three = _mm_set_epi32(3,3,3,3);
289
__m128i __Mask = _mm_cmpgt_epi32(__TransitionsD, __TransitionsX);
290
__TransitionsX = _mm_blendv_epi8(__TransitionsX, __TransitionsD, __Mask);
291
__Index = _mm_blendv_epi8(__Index, __three, __Mask);
293
__m128i __One = _mm_set_epi32(1,1,1,1);
294
__Mask = _mm_cmpgt_epi32(__TransitionsM, __TransitionsX);
295
__TransitionsX = _mm_blendv_epi8(__TransitionsX, __TransitionsM, __Mask);
296
__Index = _mm_blendv_epi8(__Index, __One, __Mask);
298
__m128i __Two = _mm_add_epi32(__One, __One);
299
__Mask = _mm_cmpgt_epi32(__TransitionsI, __TransitionsX);
300
__TransitionsX = _mm_blendv_epi8(__TransitionsX, __TransitionsI, __Mask);
301
__Index = _mm_blendv_epi8(__Index, __Two, __Mask);
304
iop[iprf].xmm = __TransitionsX;
305
//StoreMatchInsertion((__m64*) &iop[iprf],(__m128) __TransitionsX);
306
Kopd = _mm_extract_epi32(__TransitionsX, DELETION);
308
/////////////////////////////////////////////////////////////////////////////////////////
309
// Check for new maxima
310
const int KE = _mm_extract_epi32(__TransitionsX, DUMMY);
311
if (KE > alignment->JALS) {
312
alignment->JALS = KE;
313
alignment->JALE = iseq;
314
Fpos.xmm = ioi[iprf].xmm;
315
const unsigned int Id = (unsigned int) _mm_extract_epi32(__Index, DUMMY);
316
if (Id == 1) { // KM is max
318
} else if (Id == 3) { // KD is max
323
printf("FPOS %8i %8i %8i\n", Fpos.Element.One, Fpos.Element.Two, Fpos.Element.B);
325
/////////////////////////////////////////////////////////////////////////////////////////
326
// Update alignment positions
327
union Positions Jpos __attribute__((aligned(16)));
328
Jpos.xmm = iom[iprf].xmm;
329
PTRpos[2] = &ioi[iprf];
331
const int NewM = _mm_extract_epi32(__Index, MATCH);
332
iom[iprf].xmm = PTRpos[NewM]->xmm;
334
const int NewD = _mm_extract_epi32(__Index, DELETION);
335
Kiod.xmm = PTRpos[NewD]->xmm;
337
const int NewI = _mm_extract_epi32(__Index, INSERTION);
338
ioi[iprf].xmm = PTRpos[NewI]->xmm;
342
/////////////////////////////////////////////////////////////////////////////////////////
345
const int t1 = MIN(ioi[iprf].Element.One, iom[iprf].Element.One);
346
const int t2 = MIN(t1, Kiod.Element.One);
347
ifer->_a = MIN(ifer->_a, t2);
350
ifer->_b = MIN(ifer->_b, t2);
352
const int t3 = MIN(ioi[iprf].Element.B, iom[iprf].Element.B);
353
const int t4 = MIN(t3, Kiod.Element.B);
354
ifer->_c = MIN(ifer->_c, t4);
357
////////////////////////////////////////////////////////////////////////////////////////////
359
////////////////////////////////////////////////////////////////////////////////////////////
361
// Finish updating alignment positions
362
alignment->JAL1 = Fpos.Element.One;
363
alignment->JAL2 = Fpos.Element.Two;
364
alignment->JALB = Fpos.Element.B;
366
// Entry and exit from protected regions
367
iom[N1-1].Element.One = MIN(iom[N1-1].Element.One, In);
368
iom[N1-1].Element.Two = In;
370
for (int iprf=N1; iprf<N2; ++iprf) {
371
iom[iprf].Element.One = MIN(iom[iprf].Element.One, In);
372
iom[iprf].Element.Two = In;
374
ioi[iprf].Element.One = MIN(ioi[iprf].Element.One, In);
375
ioi[iprf].Element.Two = In;
379
for (int iprf=0; iprf<=prf->Length; ++iprf) {
380
printf("NEXTR IOP %4.4i %15i %15i %15i\n", iprf, iop[iprf].Element[MATCH], iop[iprf].Element[INSERTION],
381
iop[iprf].Element[DELETION]);
382
printf("NEXTR IOM %4.4i %15i %15i %15i\n", iprf, iom[iprf].Element.One, iom[iprf].Element.Two,
383
iom[iprf].Element.B);
384
printf("NEXTR IOI %4.4i %15i %15i %15i\n", iprf, ioi[iprf].Element.One, ioi[iprf].Element.Two,
385
ioi[iprf].Element.B);
389
printf("NEXTR ALIGN %4i %4i %4i %4i %4i\n",
390
alignment->JAL1, alignment->JAL2, alignment->JALS, alignment->JALB, alignment->JALE);
394
static void nextR_last(const struct Profile * const restrict prf, const unsigned char * const restrict Sequence,
395
const int iseq, union lScores * const restrict iop, union Positions * const restrict iom,
396
union Positions * const restrict ioi,const int lseq, struct Alignment * const restrict alignment,
397
struct Minima * const restrict ifer, const _Bool Lock, const size_t N1, const size_t N2)
400
fprintf(stdout,"NextR_last called with iseq %i\n",iseq);
403
const unsigned int In = iseq + 1;
404
// WARNING: Fortran uses a 1 based index for sequence
405
const unsigned int SequenceIndex = (unsigned int) Sequence[iseq-1];
408
fputs("nextR should have been called\n", stderr);
412
// Disable match and insert vertices of protected region
414
iop[N1-1].Element[MATCH] = NLOW;
415
for (int iprf=N1; iprf<N2; ++iprf) {
416
iop[iprf].Element[MATCH] = NLOW;
417
iop[iprf].Element[INSERTION] = NLOW;
420
////////////////////////////////////////////////////////////////////////////////////////////
421
// Profile position 0
422
////////////////////////////////////////////////////////////////////////////////////////////
424
// Save previous match position
425
int Kopm = iop[0].Element[MATCH];
426
union Positions Kpos __attribute__((aligned(16)));
427
Kpos.xmm = iom[0].xmm;
429
const union Positions TEMPpos __attribute__((aligned(16))) = { lseq+1, 0, In, 0 };
431
const union Positions * restrict PTRpos[4];
432
union Positions Kiod;
433
PTRpos[0] = &TEMPpos;
437
// Get pointers to score data
438
const TransitionScores * const restrict Transitions = prf->Scores.Insertion.Transitions;
439
const short int * restrict Insertion = prf->Scores.Insertion.Alphabet;
440
const size_t AlignStep = prf->Scores.Insertion.AlignStep;
442
int Ki = iop[0].Element[INSERTION] + (int) Insertion[SequenceIndex];
445
int Ji = Ki + (int) Transitions[0].From[INSERTION].To[MATCH];
446
int itmp = (int) Transitions[0].From[EXTRA].To[MATCH];
448
iop[0].Element[MATCH] = Ji;
449
iom[0].xmm = ioi[0].xmm;
451
iop[0].Element[MATCH] = itmp;
452
iom[0].xmm = TEMPpos.xmm;
457
Ji = Ki + (int) Transitions[0].From[INSERTION].To[DELETION];
458
itmp = (int) Transitions[0].From[EXTRA].To[DELETION];
461
Kiod.xmm = ioi[0].xmm;
464
Kiod.xmm = TEMPpos.xmm;
467
// Insertion position
468
Ji = Ki + (int) Transitions[0].From[INSERTION].To[INSERTION];
469
itmp = (int) Transitions[0].From[EXTRA].To[INSERTION];
471
iop[0].Element[INSERTION] = Ji;
473
iop[0].Element[INSERTION] = itmp;
474
ioi[0].xmm = TEMPpos.xmm;
480
itmp = MIN(ioi[0].Element.B, iom[0].Element.B);
481
ifer->_c = MIN(itmp, Kiod.Element.B);
483
// Initialize alignment
484
union Positions Fpos __attribute__((aligned(16)));
485
Fpos.Element.One = alignment->JAL1;
486
Fpos.Element.Two = alignment->JAL2;
487
Fpos.Element.B = alignment->JALB;
488
Fpos.Element.dummy = 0;
490
////////////////////////////////////////////////////////////////////////////////////////////
491
// Loop through rest of profile
492
////////////////////////////////////////////////////////////////////////////////////////////
493
const short int * restrict Match = prf->Scores.Match.Alphabet;
494
const ScoreTuple * const restrict LastProteinSequence = prf->Scores.Insertion.LastSequenceProtein;
495
Insertion += AlignStep;
497
for (int iprf=1; iprf<=prf->Length; ++iprf) {
498
/////////////////////////////////////////////////////////////////////////////////////////
500
const register int KM = Kopm + (int) Match[SequenceIndex];
501
Kopm = iop[iprf].Element[MATCH];
503
__m128i __KM = _mm_set1_epi32(KM);
505
__m128i __TransitionsM = _mm_loadl_epi64((__m128i*) &(Transitions[iprf].From[MATCH]));
506
// Convert signed WORD into signed DWORD
507
__TransitionsM = _mm_cvtepi16_epi32(__TransitionsM);
508
// Insert LastProteinSequence
509
__TransitionsM = _mm_insert_epi32(__TransitionsM, (int) LastProteinSequence[iprf].From[MATCH], DUMMY);
510
// Add KM to Transitions
511
__TransitionsM = _mm_add_epi32(__TransitionsM, __KM);
514
/////////////////////////////////////////////////////////////////////////////////////////
516
const register int KI = iop[iprf].Element[INSERTION] + (int) Insertion[SequenceIndex];
517
// one could move on the seq index instead
519
__m128i __KI = _mm_set1_epi32(KI);
521
__m128i __TransitionsI = _mm_loadl_epi64((__m128i*) &(Transitions[iprf].From[INSERTION]));
522
// Convert signed WORD into signed DWORD
523
__TransitionsI = _mm_cvtepi16_epi32(__TransitionsI);
524
// Insert LastProteinSequence
525
__TransitionsI = _mm_insert_epi32(__TransitionsI, (int) LastProteinSequence[iprf].From[INSERTION], DUMMY);
526
// Add KM to Transitions
527
__TransitionsI = _mm_add_epi32(__TransitionsI, __KI);
529
/////////////////////////////////////////////////////////////////////////////////////////
531
const register int KD = Kopd + (int) Match[_D];
533
__m128i __KD = _mm_set1_epi32(KD);
535
__m128i __TransitionsD = _mm_loadl_epi64((__m128i*) &(Transitions[iprf].From[DELETION]));
536
// Convert signed WORD into signed DWORD
537
__TransitionsD = _mm_cvtepi16_epi32(__TransitionsD);
538
// Insert LastProteinSequence
539
__TransitionsD = _mm_insert_epi32(__TransitionsD, (int) LastProteinSequence[iprf].From[DELETION], DUMMY);
540
// Add KM to Transitions
541
__TransitionsD = _mm_add_epi32(__TransitionsD, __KD);
543
/////////////////////////////////////////////////////////////////////////////////////////
546
__m128i __TransitionsX = _mm_loadl_epi64((__m128i*) &(Transitions[iprf].From[EXTRA]));
547
// Convert signed WORD into signed DWORD
548
__TransitionsX = _mm_cvtepi16_epi32(__TransitionsX);
550
// Insert NLOW into Extension vector -> done in io.c
551
//__TransitionsX = _mm_insert_epi32(__TransitionsX, NLOW, DUMMY);
553
// Move to next profile index
555
Insertion += AlignStep;
557
/////////////////////////////////////////////////////////////////////////////////////////
558
// Compute Maxima (Fortran Nstep function)
559
__m128i __Index = (__m128i) _mm_setzero_ps();
560
__m128i __three = _mm_set_epi32(3,3,3,3);
562
__m128i __Mask = _mm_cmpgt_epi32(__TransitionsD, __TransitionsX);
563
__TransitionsX = _mm_blendv_epi8(__TransitionsX, __TransitionsD, __Mask);
564
__Index = _mm_blendv_epi8(__Index, __three, __Mask);
566
__m128i __One = _mm_set_epi32(1,1,1,1);
567
__Mask = _mm_cmpgt_epi32(__TransitionsM, __TransitionsX);
568
__TransitionsX = _mm_blendv_epi8(__TransitionsX, __TransitionsM, __Mask);
569
__Index = _mm_blendv_epi8(__Index, __One, __Mask);
571
__m128i __Two = _mm_add_epi32(__One, __One);
572
__Mask = _mm_cmpgt_epi32(__TransitionsI, __TransitionsX);
573
__TransitionsX = _mm_blendv_epi8(__TransitionsX, __TransitionsI, __Mask);
574
__Index = _mm_blendv_epi8(__Index, __Two, __Mask);
577
iop[iprf].xmm = __TransitionsX;
578
//_mm_storel_pi((__m64*) &iop[iprf],(__m128) __TransitionsX);
579
Kopd = _mm_extract_epi32(__TransitionsX, DELETION);
581
/////////////////////////////////////////////////////////////////////////////////////////
582
// Check for new maxima
583
const int KE = _mm_extract_epi32(__TransitionsX, DUMMY);
584
if (KE > alignment->JALS) {
585
alignment->JALS = KE;
586
alignment->JALE = iseq;
587
Fpos.xmm = ioi[iprf].xmm;
588
const unsigned int Id = (unsigned int) _mm_extract_epi32(__Index, DUMMY);
589
if (Id == 1) { // KM is max
591
} else if (Id == 3) { // KD is max
596
printf("FPOS %8i %8i %8i\n", Fpos.Element.One, Fpos.Element.Two, Fpos.Element.B);
598
/////////////////////////////////////////////////////////////////////////////////////////
599
// Update alignment positions
600
union Positions Jpos __attribute__((aligned(16)));
601
Jpos.xmm = iom[iprf].xmm;
602
PTRpos[2] = &ioi[iprf];
604
const int NewM = _mm_extract_epi32(__Index, MATCH);
605
iom[iprf].xmm = PTRpos[NewM]->xmm;
607
const int NewD = _mm_extract_epi32(__Index, DELETION);
608
Kiod.xmm = PTRpos[NewD]->xmm;
610
const int NewI = _mm_extract_epi32(__Index, INSERTION);
611
ioi[iprf].xmm = PTRpos[NewI]->xmm;
615
/////////////////////////////////////////////////////////////////////////////////////////
618
const int t1 = MIN(ioi[iprf].Element.One, iom[iprf].Element.One);
619
const int t2 = MIN(t1, Kiod.Element.One);
620
ifer->_a = MIN(ifer->_a, t2);
623
ifer->_b = MIN(ifer->_b, t2);
625
const int t3 = MIN(ioi[iprf].Element.B, iom[iprf].Element.B);
626
const int t4 = MIN(t3, Kiod.Element.B);
627
ifer->_c = MIN(ifer->_c, t4);
630
////////////////////////////////////////////////////////////////////////////////////////////
632
////////////////////////////////////////////////////////////////////////////////////////////
634
// Finish updating alignment positions
635
alignment->JAL1 = Fpos.Element.One;
636
alignment->JAL2 = Fpos.Element.Two;
637
alignment->JALB = Fpos.Element.B;
639
// Entry and exit from protected regions
640
iom[N1-1].Element.One = MIN(iom[N1-1].Element.One, In);
641
iom[N1-1].Element.Two = In;
643
for (int iprf=N1; iprf<N2; ++iprf) {
644
iom[iprf].Element.One = MIN(iom[iprf].Element.One, In);
645
iom[iprf].Element.Two = In;
647
ioi[iprf].Element.One = MIN(ioi[iprf].Element.One, In);
648
ioi[iprf].Element.Two = In;
652
for (int iprf=0; iprf<=prf->Length; ++iprf) {
653
printf("NEXTR IOP %4.4i %15i %15i %15i\n", iprf, iop[iprf].Element[MATCH], iop[iprf].Element[INSERTION],
654
iop[iprf].Element[DELETION]);
655
printf("NEXTR IOM %4.4i %15i %15i %15i\n", iprf, iom[iprf].Element.One, iom[iprf].Element.Two,
656
iom[iprf].Element.B);
657
printf("NEXTR IOI %4.4i %15i %15i %15i\n", iprf, ioi[iprf].Element.One, ioi[iprf].Element.Two,
658
ioi[iprf].Element.B);
662
printf("NEXTR ALIGN %4i %4i %4i %4i %4i\n",
663
alignment->JAL1, alignment->JAL2, alignment->JALS, alignment->JALB, alignment->JALE);
667
int xalip_sse41( const struct Profile * const restrict prf, const unsigned char * const restrict Sequence,
668
union lScores * const restrict iop, union Positions * const restrict iom,
669
union Positions * const restrict ioi, const size_t bseq, const size_t lseq,
670
struct Alignment * const restrict alignment,
671
_Bool * const restrict Lock, const size_t N1, const size_t N2, const _Bool Lopt,
672
const int kcut, const size_t NALI)
675
////////////////////////////////////////////////////////////////////////////////////////////
677
////////////////////////////////////////////////////////////////////////////////////////////
683
// Search control fields
685
size_t jlcp = prf->Length / 2;
689
struct Minima ifer __attribute__((aligned(16)));
691
////////////////////////////////////////////////////////////////////////////////////////////
692
// Two step forward one step backward loop
693
////////////////////////////////////////////////////////////////////////////////////////////
697
struct Alignment lAlignment;
698
lAlignment.JALS = NLOW;
704
// Initiate work array
705
InitR(iseq, N1, N2, bseq, lseq, iop, iom, ioi, prf);
707
for (size_t i=0; i<=prf->Length; ++i) {
708
printf("IOP %8i %8i %8i %8i\n",
709
iop[i].Element[MATCH], iop[i].Element[INSERTION], iop[i].Element[DELETION],
710
iop[i].Element[DUMMY]);
711
printf("IOM %8i %8i %8i %8i\n",
712
iom[i].Element.One, iom[i].Element.Two, iom[i].Element.B,
713
iom[i].Element.dummy);
714
printf("IOI %8i %8i %8i %8i\n",
715
ioi[i].Element.One, ioi[i].Element.Two, ioi[i].Element.B,
716
ioi[i].Element.dummy);
720
// Initiate search control values
723
int nlcp = ilcp + jlcp;
725
// Move one sequence position forward
728
////////////////////////////////////////////////////////////////////////////////////////////
729
// Loop over sequence positions
730
////////////////////////////////////////////////////////////////////////////////////////////
734
/////////////////////////////////////////////////////////////////////////////////////////
738
nextR(prf, Sequence, iseq, iop, iom, ioi, lseq, &lAlignment,&ifer, Lock[iseq], N1, N2);
741
nextR_last(prf, Sequence, iseq, iop, iom, ioi, lseq, &lAlignment,&ifer, Lock[iseq], N1, N2);
743
/////////////////////////////////////////////////////////////////////////////////////////
745
if (lAlignment.JALS >= kcut) {
746
// Determine firdst entry of current row
747
if ( (ifer._a > lAlignment.JAL2) || (iseq == lseq) ) {
748
// Fill in missing alignment data
749
lAlignment.JAL1 = lAlignment.JAL1 == 0 ? lAlignment.JALB : lAlignment.JAL1;
750
lAlignment.JAL2 = lAlignment.JAL2 == 0 ? iseq : lAlignment.JAL2;
753
if (lAlignment.JAL2 < lAlignment.JAL1) {
754
fprintf(stderr, "Error: Illegal alignment found in alignment %lu - no list produced.\n"
755
" Alignement should be from %i to %i\n",1+nali, lAlignment.JAL1, lAlignment.JAL2 );
760
fputs("Warning: Too many alignments found - list may be incomplete.\n", stderr);
765
struct Alignment * pAlignment = &alignment[nali];
766
pAlignment->JALS = lAlignment.JALS;
767
pAlignment->JALB = lAlignment.JALB;
768
pAlignment->JAL1 = lAlignment.JAL1;
769
pAlignment->JAL2 = lAlignment.JAL2;
770
pAlignment->JALE = lAlignment.JALE;
772
printf("XALIP ALIGN %lu %4.4i %4.4i %4.4i %4.4i %4.4i\n",nali,
773
lAlignment.JAL1, lAlignment.JAL2, lAlignment.JALS, lAlignment.JALB, lAlignment.JALE);
775
// Protect sequence region
776
for (int jseq=lAlignment.JAL1; jseq<=lAlignment.JAL2; ++jseq) {
780
// Exit if only searching for optimal alignment
786
if ( ++iseq <= lseq ) goto SeqPosLoop;
789
// Have we reached next check point ?
791
// Determine firdst entry of current row
792
if (ifer._b >= ilcp) {
798
// Calculate next check point
802
// Move one sequence position forward
803
if ( ++iseq <= lseq ) goto SeqPosLoop;