1
/*****************************************************************
2
* Unipro UGENE - Integrated Bioinformatics Suite
4
* This source code is distributed under the terms of the
5
* GNU General Public License. See the files COPYING and LICENSE
8
* Author: Kursad Albayraktaroglu, Jizhu Li, Ivan Efremov
9
*****************************************************************/
11
//Protection from automatically generated .pro files.
12
//This variable is set in local, 'spu' makefile
17
#include <spu_intrinsics.h>
18
#include <spu_mfcio.h>
22
#include "hmmer_spu.h"
24
#define SEQ_BUFFER_SIZE 10000
25
#define HMM_BUFFER_SIZE 150000
28
#define ARR_SIZE (MAX_HMM*4+12)
30
atomic_ea_t lastConsumed __attribute__ ((aligned (128)));
31
atomic_ea_t nomoreSeqs __attribute__ ((aligned (128)));
32
atomic_ea_t bufferEntries __attribute__ ((aligned (128)));
33
mutex_ea_t bufferMutex __attribute__ ((aligned (128)));
34
cond_ea_t bufferEmptyCond __attribute__ ((aligned (128)));
37
struct dma_list_elem {
48
struct dma_list_elem list[16] __attribute__((aligned (16)));
50
void get_large_region(void* dst, unsigned int ea_low, unsigned int nbytes){
52
unsigned int tagid = 0;
53
unsigned int listsize;
59
sz = (nbytes < 16384) ? nbytes : 16384;
60
list[i].size.all32 = sz;
61
list[i].ea_low = ea_low;
67
listsize = i*sizeof(struct dma_list_elem);
68
spu_mfcdma32(dst,(unsigned int) &list[0], listsize, tagid, MFC_GETL_CMD);
69
spu_writech(MFC_WrTagMask, 1 << tagid);
70
(void) spu_mfcstat(2);
73
int main(addr64 spe_id, addr64 param) {
74
unsigned int tag_id=0; /* DMA tag id */
75
int j,k; /* General purpose counters */
76
void* hmmAddr; /* Address for PPE HMM data buffer */
77
void* jobQueueBase; /* Address for the sequence job queue */
78
int hmmDMALength; /* Length of PPE HMM data */
80
volatile spe_initContext ctx __attribute__ ((aligned (128)));
81
volatile unsigned char seqBuffer[2][SEQ_BUFFER_SIZE] __attribute__ ((aligned (128)));
82
volatile unsigned char hmmBuffer[HMM_BUFFER_SIZE] __attribute__ ((aligned (128))) = {0};
83
volatile spe_jobEntity jobInfos[2] __attribute__ ((aligned (128)));
85
/* These arrays assume a maximum HMM size of 512 states. (2060 = (512*4)+12) */
86
char mscCurMem[ARR_SIZE] __attribute__ ((aligned (128))) = {0};
87
char iscCurMem[ARR_SIZE] __attribute__ ((aligned (128))) = {0};
88
char dscCurMem[ARR_SIZE] __attribute__ ((aligned (128))) = {0};
89
char mscPrevMem[ARR_SIZE] __attribute__ ((aligned (128))) = {0};
90
char iscPrevMem[ARR_SIZE] __attribute__ ((aligned (128))) = {0};
91
char dscPrevMem[ARR_SIZE] __attribute__ ((aligned (128))) = {0};
93
int* mscCur = (int*) (mscCurMem + 0xC);
94
int* iscCur = (int*) (iscCurMem + 0xC);
95
int* dscCur = (int*) (dscCurMem + 0xC);
96
int* mscPrev = (int*) (mscPrevMem + 0xC);
97
int* iscPrev = (int*) (iscPrevMem + 0xC);
98
int* dscPrev = (int*) (dscPrevMem + 0xC);
101
hmm_offsets offsetsD;
103
hmm_offsets* offsets;
107
int entityDMAtag = 6;
108
int entityDMAtag_back = 4;
109
int next_idx, buf_idx = 0;
110
unsigned int seqLength[2];
111
unsigned int seqAddress[2];
112
unsigned int entityAddress[2];
113
unsigned int seqDMALength[2];
115
memset( &hmmD, 0, sizeof(spe_hmm) );
116
memset( mscCurMem, 0, ARR_SIZE );
117
memset( iscCurMem, 0, ARR_SIZE );
118
memset( dscCurMem, 0, ARR_SIZE );
119
memset( mscPrevMem, 0, ARR_SIZE );
120
memset( iscPrevMem, 0, ARR_SIZE );
121
memset( dscPrevMem, 0, ARR_SIZE );
123
memset( &ctx, 0, sizeof(spe_initContext) );
124
memset( seqBuffer, 0, 2 * SEQ_BUFFER_SIZE );
125
memset( hmmBuffer, 0, HMM_BUFFER_SIZE );
126
memset( jobInfos, 0, 2 * sizeof(spe_jobEntity) );
127
/* Allocate SPE HMM and offset data structure buffers */
131
/* The first thing an SPE thread does is DMA its job context information in.
132
This data is a single 128-byte cache line and contains information about the
133
first job, the HMM to be processed and so on. */
134
mySPEID = (int) spe_id.ui[1];
136
/* Initiate DMA for context */
137
spu_mfcdma32((void *)(&ctx),(unsigned int) param.ull,sizeof(spe_initContext),tag_id,MFC_GET_CMD);
138
spu_writech(MFC_WrTagMask, 1<<tag_id);
139
(void) spu_mfcstat(2);
141
// Decode the context and populate local variables.
142
lastConsumed = (atomic_ea_t) ctx.lastConsumed_addr;
143
nomoreSeqs = (atomic_ea_t) ctx.nomoreSeqs_addr;
144
bufferEntries = (atomic_ea_t) ctx.bufferEntries_addr;
145
bufferEmptyCond = (cond_ea_t) ctx.bufferEmptyCond_addr;
146
bufferMutex = (mutex_ea_t) ctx.bufferMutex_addr;
147
jobQueueBase = (void*) ctx.jobqueue_begin_addr;
148
hmmAddr = (void*) ctx.hmm_buf_begin;
149
hmmDMALength = ctx.hmm_buf_length;
151
//Check the length of the HMM
152
if (hmmDMALength > HMM_BUFFER_SIZE){
153
printf("Error: The size of this HMM(%d) exceeds the maximum buffer size(%d).\n", hmmDMALength, HMM_BUFFER_SIZE);
156
// Transfer the HMM from the main memory to SPE LS.
157
get_large_region((void*)hmmBuffer, (unsigned int)hmmAddr, (unsigned int)hmmDMALength);
160
// Set up the offsets to create SPE LS HMM profile data structure
161
offsets->M = ctx.hmm_M;
162
offsets->escmem_offset = ctx.offsets.escmem_offset;
163
offsets->bscmem_offset = ctx.offsets.bscmem_offset;
164
offsets->tscmem_offset = ctx.offsets.tscmem_offset;
165
offsets->iscmem_offset = ctx.offsets.iscmem_offset;
166
offsets->mscmem_offset = ctx.offsets.mscmem_offset;
167
offsets->xscmem_offset = ctx.offsets.xscmem_offset;
168
offsets->tsc_TMM_mem_offset = ctx.offsets.tsc_TMM_mem_offset;
169
offsets->tsc_TMI_mem_offset = ctx.offsets.tsc_TMI_mem_offset;
170
offsets->tsc_TMD_mem_offset = ctx.offsets.tsc_TMD_mem_offset;
171
offsets->tsc_TIM_mem_offset = ctx.offsets.tsc_TIM_mem_offset;
172
offsets->tsc_TII_mem_offset = ctx.offsets.tsc_TII_mem_offset;
173
offsets->tsc_TDM_mem_offset = ctx.offsets.tsc_TDM_mem_offset;
174
offsets->tsc_TDD_mem_offset = ctx.offsets.tsc_TDD_mem_offset;
176
/* Allocate and populate the SPE HMM data structure*/
178
allocateSpeHMM (hmm,(unsigned long int)hmmBuffer,offsets);
180
//Initiate the first sequence DMA
184
entityAddress[buf_idx] = (unsigned int) jobQueueBase + (128 * k);
185
spu_mfcdma32((void *)(&(jobInfos[buf_idx])),entityAddress[buf_idx], sizeof(spe_jobEntity),entityDMAtag,MFC_GET_CMD);
186
spu_writech(MFC_WrTagMask, 1 << entityDMAtag);
187
(void) spu_mfcstat(2);
188
seqLength[buf_idx] = jobInfos[buf_idx].seqLength;
189
seqAddress[buf_idx] = jobInfos[buf_idx].seqAddr;
190
seqDMALength[buf_idx] = jobInfos[buf_idx].seqDMALength;
191
spu_mfcdma32((void *)(seqBuffer[buf_idx]),seqAddress[buf_idx], seqDMALength[buf_idx], buf_idx, MFC_GET_CMD);
194
// MAIN PROCESSING LOOP
195
// Retrieve sequences into the buffer and process one by one.
199
// Read and increment the currently available sequence index.
202
if (k == WAIT_FOR_MORE_SEQUENCES){
209
//DOUBLE-BUFFERED DMA OPERATION SEQUENCE
211
// 1. Initiate and complete entity DMA transfer for the next iteration.
212
next_idx = buf_idx ^ 1;
213
entityAddress[next_idx] = (unsigned int) jobQueueBase + (128 * k);
214
spu_mfcdma32((void *)(&(jobInfos[next_idx])),entityAddress[next_idx], sizeof(spe_jobEntity), entityDMAtag, MFC_GET_CMD);
215
spu_writech(MFC_WrTagMask, 1 << entityDMAtag);
216
(void) spu_mfcstat(2);
217
seqLength[next_idx] = jobInfos[next_idx].seqLength;
218
seqAddress[next_idx] = jobInfos[next_idx].seqAddr;
219
seqDMALength[next_idx] = jobInfos[next_idx].seqDMALength;
220
// 2. Initiate sequence DMA transfer for the next iteration.
222
spu_mfcdma32((void *)(seqBuffer[next_idx]),seqAddress[next_idx], seqDMALength[next_idx],next_idx,MFC_GET_CMD);
224
// 3. Wait for the last transfer to complete.
225
spu_writech(MFC_WrTagMask, 1 << buf_idx);
226
(void)spu_mfcstat(2);
228
// 4. Use the data from the previous transfer
230
viterbiScore = SPEViterbiSIMD(hmm,seqBuffer[buf_idx],seqLength[buf_idx],mscCur,mscPrev,iscCur,iscPrev,dscCur,dscPrev);
232
//DMA the result back to the PPE
233
jobInfos[buf_idx].seqProcessed = 1;
234
jobInfos[buf_idx].seqViterbiResult = viterbiScore;
235
spu_mfcdma32((void *)(&(jobInfos[buf_idx])),entityAddress[buf_idx], sizeof(spe_jobEntity), entityDMAtag_back, MFC_PUT_CMD);
241
//Process the final DMA
242
spu_writech(MFC_WrTagMask, 1 << buf_idx);
243
(void)spu_mfcstat(2);
245
viterbiScore = SPEViterbiSIMD(hmm,seqBuffer[buf_idx],seqLength[buf_idx],mscCur,mscPrev,iscCur,iscPrev,dscCur,dscPrev);
247
//DMA the result back to the PPE
248
jobInfos[buf_idx].seqProcessed = 1;
249
jobInfos[buf_idx].seqViterbiResult = viterbiScore;
250
spu_mfcdma32((void *)(&(jobInfos[buf_idx])),entityAddress[buf_idx], sizeof(spe_jobEntity), entityDMAtag_back, MFC_PUT_CMD);
251
(void) spu_mfcstat(2);
257
mutex_lock(bufferMutex);
258
if (atomic_read(bufferEntries)){
259
//If we're here, we can fetch a sequence from the buffer.
260
k = atomic_inc_return(lastConsumed);
261
atomic_dec(bufferEntries);
263
// There are no ready entries in the PPE buffer. This could happen in two situations:
264
// 1) The SPEs have overtaken the PPE sequence file reading process.
265
// 2) There are no sequences left to read.
266
if (atomic_read(nomoreSeqs)==1){
267
//No more sequences to read. Exit.
268
k = NO_MORE_SEQUENCES;
270
k = WAIT_FOR_MORE_SEQUENCES;
271
//Wait until the PPE replenishes the buffer.
272
cond_wait(bufferEmptyCond, bufferMutex);
275
mutex_unlock(bufferMutex);
279
#endif //UGENE_CELL_SPU