36 #define PAGE_ALIGN(x) (((size_t)(x)+sysconf(_SC_PAGESIZE)-1)&~(sysconf(_SC_PAGESIZE)-1))
41 int segLen = (queryLen+15)/16;
47 profile->
loadOpt = (__m128i*) ((
size_t) (profile->
data + 15) & ~(0xf)) ;
53 profile->
len = queryLen;
57 if (bias < -matrix[ i*MATRIX_DIM+j ])
58 bias = -matrix[ i*MATRIX_DIM+j ];
59 pprofile = (u_int8_t*)profile->
profile;
61 for(i=0; i<MATRIX_DIM; i++)
62 for(j=0; j<segLen; j++)
64 if(j+k*segLen < queryLen)
65 *(pprofile++) = matrix[query[j+k*segLen]*MATRIX_DIM+i]+bias;
71 for(i=0; i<queryLen; ++i)
debug(
"\t%c",query[i]+
'A');
92 unsigned char MaxScore = 0;
93 int segLength = (query->
len+15)/16;
97 __m128i * current_profile;
100 __m128i vMinimums = _mm_set1_epi32(0);
102 __m128i vDelFixed = _mm_set1_epi8(-options->
gapOpen);
103 __m128i vBias = _mm_set1_epi8(query->
bias);
105 __m128i vMaxScore = vMinimums;
109 __m128i vCD = vMinimums;
110 __m128i zero = vMinimums;
118 for(i=0;
LIKELY(i<segLength); i++){
119 _mm_store_si128(loadOpt+i,zero);
120 _mm_store_si128(storeOpt+i,zero);
126 for(j=0;
LIKELY(j<dbLen); j++){
135 vStoreOpt = _mm_load_si128(storeOpt+segLength-1);
136 vStoreOpt = _mm_slli_si128(vStoreOpt, 1);
140 current_profile = query->
profile + db[j]*segLength;
150 for(i=0;
LIKELY(i<segLength); i++){
151 vTmp = _mm_load_si128(loadOpt+i);
152 vRD = _mm_subs_epu8(vTmp,vDelFixed);
155 vStoreOpt = _mm_adds_epu8(vStoreOpt, *(current_profile+i));
156 vStoreOpt = _mm_subs_epu8(vStoreOpt, vBias);
159 vMaxScore = _mm_max_epu8(vMaxScore, vStoreOpt);
162 vStoreOpt = _mm_max_epu8(vStoreOpt, vRD);
163 vStoreOpt = _mm_max_epu8(vStoreOpt, vCD);
166 _mm_store_si128(storeOpt+i, vStoreOpt);
169 vCD = _mm_subs_epu8(vStoreOpt, vDelFixed);
176 for(i=0;
LIKELY(i<16);++i) {
179 vCD = _mm_slli_si128(vCD,1);
181 for(k=0;
LIKELY(k<segLength);++k) {
183 vTmp = _mm_load_si128(storeOpt+k);
184 vStoreOpt = _mm_max_epu8(vTmp,vCD);
185 _mm_store_si128(storeOpt+k,vStoreOpt);
188 if(
UNLIKELY(_mm_movemask_epi8(_mm_cmpeq_epi8(vTmp,vStoreOpt)) == 0xFFFF))
goto shortcut;
191 vCD = _mm_subs_epu8(vStoreOpt,vDelFixed);
198 for(ii=0; ii<16;++ii) {
199 for(jj=0; jj<segLength;++jj) {
200 if(ii*segLength+jj < query->len)
201 debug(
"%d\t",(
int)((
unsigned char*)storeOpt)[ii+jj*16]);
210 vMaxScore = _mm_max_epu8(vMaxScore, _mm_srli_si128(vMaxScore, 8));
211 vMaxScore = _mm_max_epu8(vMaxScore, _mm_srli_si128(vMaxScore, 4));
212 vMaxScore = _mm_max_epu8(vMaxScore, _mm_srli_si128(vMaxScore, 2));
213 vMaxScore = _mm_max_epu8(vMaxScore, _mm_srli_si128(vMaxScore, 1));
214 MaxScore = (
unsigned char)_mm_extract_epi16(vMaxScore,0);
215 if ((
int)MaxScore + (int)query->
bias >=255)
217 return((
double)MaxScore);
235 unsigned char MaxScore = 0;
236 int segLength = (query->
len+15)/16;
240 __m128i *
rD = query->
rD;
241 __m128i * current_profile;
244 __m128i vMinimums = _mm_set1_epi32(0);
246 __m128i vDelIncr = _mm_set1_epi8(-options->
gapExt);
247 __m128i vDelFixed = _mm_set1_epi8(-options->
gapOpen);
248 __m128i vBias = _mm_set1_epi8(query->
bias);
250 __m128i vMaxScore = vMinimums;
254 __m128i vCD = vMinimums;
255 __m128i zero = vMinimums;
267 for(i=0;
LIKELY(i<segLength); i++){
268 _mm_store_si128(loadOpt+i,zero);
269 _mm_store_si128(storeOpt+i,zero);
270 _mm_store_si128(rD+i,zero);
276 for(j=0;
LIKELY(j<dbLen); j++) {
283 vStoreOpt = _mm_load_si128(storeOpt+segLength-1);
284 vStoreOpt = _mm_slli_si128(vStoreOpt, 1);
288 current_profile = query->
profile + db[j]*segLength;
298 for(i=0;
LIKELY(i<segLength); i++){
299 vRD = _mm_load_si128(rD+i);
300 vRD = _mm_subs_epu8(vRD, vDelIncr);
301 vTmp = _mm_load_si128(loadOpt+i);
302 vTmp = _mm_subs_epu8(vTmp,vDelFixed);
303 vRD = _mm_max_epu8(vRD,vTmp);
304 _mm_store_si128(rD+i, vRD);
307 vStoreOpt = _mm_adds_epu8(vStoreOpt, *(current_profile+i));
308 vStoreOpt = _mm_subs_epu8(vStoreOpt, vBias);
311 vMaxScore = _mm_max_epu8(vMaxScore, vStoreOpt);
314 vStoreOpt = _mm_max_epu8(vStoreOpt, vRD);
315 vStoreOpt = _mm_max_epu8(vStoreOpt, vCD);
318 _mm_store_si128(storeOpt+i, vStoreOpt);
321 vStoreOpt = _mm_subs_epu8(vStoreOpt, vDelFixed);
322 vCD = _mm_subs_epu8(vCD, vDelIncr);
323 vCD = _mm_max_epu8(vCD, vStoreOpt);
326 vStoreOpt = _mm_load_si128(loadOpt+i);
330 for(i=0;
LIKELY(i<16);++i) {
333 vCD = _mm_slli_si128(vCD,1);
335 for(k=0;
LIKELY(k<segLength);++k) {
337 vStoreOpt = _mm_load_si128(storeOpt+k);
338 vStoreOpt = _mm_max_epu8(vStoreOpt,vCD);
340 _mm_store_si128(storeOpt+k,vStoreOpt);
343 vStoreOpt = _mm_subs_epu8(vStoreOpt,vDelFixed);
344 vCD = _mm_subs_epu8(vCD, vDelIncr);
346 if(
UNLIKELY(_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_subs_epu8(vCD,vStoreOpt),zero)) == 0xFFFF))
goto shortcut;
353 for(ii=0; ii<16;++ii) {
354 for(jj=0; jj<segLength;++jj) {
355 if(ii*segLength+jj < query->len)
356 debug(
"%d\t",(
int)((
unsigned char*)storeOpt)[ii+jj*16]);
365 vMaxScore = _mm_max_epu8(vMaxScore, _mm_srli_si128(vMaxScore, 8));
366 vMaxScore = _mm_max_epu8(vMaxScore, _mm_srli_si128(vMaxScore, 4));
367 vMaxScore = _mm_max_epu8(vMaxScore, _mm_srli_si128(vMaxScore, 2));
368 vMaxScore = _mm_max_epu8(vMaxScore, _mm_srli_si128(vMaxScore, 1));
369 MaxScore = (
unsigned char)_mm_extract_epi16(vMaxScore,0);
370 if ((
int)MaxScore + (int)query->
bias >=255)
372 return((
double)MaxScore);