00001
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include "DynProgr_sse_byte.h"
00031 #include "debug.h"
00032 #include <unistd.h>
00033 #include <stdio.h>
00034 #include <float.h>
00035
00036 #define PAGE_ALIGN(x) (((size_t)(x)+sysconf(_SC_PAGESIZE)-1)&~(sysconf(_SC_PAGESIZE)-1))
00037
00040 EXPORT ProfileByte * swps3_createProfileByteSSE( const char * query, int queryLen, SBMatrix matrix ){
00041 int segLen = (queryLen+15)/16;
00042 int i,j,k;
00043 int bias = 0;
00044 u_int8_t * pprofile;
00045 ProfileByte * profile = malloc( sizeof(ProfileByte)+segLen*(MATRIX_DIM+3)*sizeof(__m128i)+64+2*sysconf(_SC_PAGESIZE) );
00046
00047 profile->loadOpt = (__m128i*) ((size_t) (profile->data + 15) & ~(0xf)) ;
00048 profile->storeOpt = profile->loadOpt + segLen;
00049 profile->rD = profile->storeOpt + segLen;
00050 profile->profile = (__m128i*) PAGE_ALIGN(profile->rD + segLen);
00051
00052
00053 profile->len = queryLen;
00054
00055 for(i=0; i<MATRIX_DIM; i++)
00056 for(j=0; j<MATRIX_DIM; j++)
00057 if (bias < -matrix[ i*MATRIX_DIM+j ])
00058 bias = -matrix[ i*MATRIX_DIM+j ];
00059 pprofile = (u_int8_t*)profile->profile;
00060
00061 for(i=0; i<MATRIX_DIM; i++)
00062 for(j=0; j<segLen; j++)
00063 for(k=0; k<16; k++)
00064 if(j+k*segLen < queryLen)
00065 *(pprofile++) = matrix[query[j+k*segLen]*MATRIX_DIM+i]+bias;
00066 else
00067 *(pprofile++) = bias;
00068 profile->bias = bias;
00069
00070 #ifdef DEBUG
00071 for(i=0; i<queryLen; ++i) debug("\t%c",query[i]+'A');
00072 debug("\n");
00073 #endif
00074 return profile;
00075 }
00076
00077
00078 EXPORT double swps3_alignmentByteSSE( ProfileByte * query, const char * db, int dbLen, Options * options )
00079 {
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091 int i, j;
00092 unsigned char MaxScore = 0;
00093 int segLength = (query->len+15)/16;
00094
00095 __m128i * loadOpt = query->loadOpt;
00096 __m128i * storeOpt = query->storeOpt;
00097 __m128i * rD = query->rD;
00098 __m128i * current_profile;
00099 __m128i * swap;
00100
00101 __m128i vMinimums = _mm_set1_epi32(0);
00102
00103 __m128i vDelIncr = _mm_set1_epi8(-options->gapExt);
00104 __m128i vDelFixed = _mm_set1_epi8(-options->gapOpen);
00105 __m128i vBias = _mm_set1_epi8(query->bias);
00106
00107 __m128i vMaxScore = vMinimums;
00108
00109 __m128i vStoreOpt;
00110 __m128i vRD;
00111 __m128i vCD = vMinimums;
00112 __m128i zero = vMinimums;
00113 __m128i vTmp;
00114 #ifdef DEBUG
00115 int ii,jj;
00116 #endif
00117
00118
00119
00120 for(i=0; LIKELY(i<segLength); i++){
00121 _mm_store_si128(loadOpt+i,zero);
00122 _mm_store_si128(storeOpt+i,zero);
00123 _mm_store_si128(rD+i,zero);
00124 }
00125
00126
00127
00128
00129 for(j=0; LIKELY(j<dbLen); j++){
00130
00131
00132
00133
00134
00135 vCD = zero;
00136
00137
00138
00139 vStoreOpt = _mm_load_si128(storeOpt+segLength-1);
00140 vStoreOpt = _mm_slli_si128(vStoreOpt, 1);
00141
00142
00143
00144 current_profile = query->profile + db[j]*segLength;
00145
00146
00147
00148 swap = storeOpt;
00149 storeOpt = loadOpt;
00150 loadOpt = swap;
00151
00152
00153
00154 for(i=0; LIKELY(i<segLength); i++){
00155 vRD = _mm_load_si128(rD+i);
00156 vRD = _mm_subs_epu8(vRD, vDelIncr);
00157 vTmp = _mm_load_si128(loadOpt+i);
00158 vTmp = _mm_subs_epu8(vTmp,vDelFixed);
00159 vRD = _mm_max_epu8(vRD,vTmp);
00160 _mm_store_si128(rD+i, vRD);
00161
00162
00163 vStoreOpt = _mm_adds_epu8(vStoreOpt, *(current_profile+i));
00164 vStoreOpt = _mm_subs_epu8(vStoreOpt, vBias);
00165
00166
00167 vMaxScore = _mm_max_epu8(vMaxScore, vStoreOpt);
00168
00169
00170 vStoreOpt = _mm_max_epu8(vStoreOpt, vRD);
00171 vStoreOpt = _mm_max_epu8(vStoreOpt, vCD);
00172
00173
00174 _mm_store_si128(storeOpt+i, vStoreOpt);
00175
00176
00177 vStoreOpt = _mm_subs_epu8(vStoreOpt, vDelFixed);
00178 vCD = _mm_subs_epu8(vCD, vDelIncr);
00179 vCD = _mm_max_epu8(vCD, vStoreOpt);
00180
00181
00182 vStoreOpt = _mm_load_si128(loadOpt+i);
00183 }
00184
00185
00186 for(i=0;LIKELY(i<16);++i){
00187 int k;
00188
00189 vCD = _mm_slli_si128(vCD,1);
00190
00191 for(k=0;LIKELY(k<segLength);++k) {
00192
00193 vStoreOpt = _mm_load_si128(storeOpt+k);
00194 vStoreOpt = _mm_max_epu8(vStoreOpt,vCD);
00195 _mm_store_si128(storeOpt+k,vStoreOpt);
00196
00197
00198 vStoreOpt = _mm_subs_epu8(vStoreOpt,vDelFixed);
00199 vCD = _mm_subs_epu8(vCD, vDelIncr);
00200
00201 if(UNLIKELY(_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_subs_epu8(vCD,vStoreOpt),zero)) == 0xFFFF)) goto shortcut;
00202 }
00203 }
00204 shortcut:
00205
00206 #ifdef DEBUG
00207 debug("%c\t",db[j]);
00208 for(ii=0; ii<16;++ii) {
00209 for(jj=0; jj<segLength;++jj) {
00210 if(ii*segLength+jj < query->len)
00211 debug("%d\t",(int)((unsigned char*)storeOpt)[ii+jj*16]);
00212 }
00213 }
00214 debug("\n");
00215 #endif
00216
00217
00218
00219
00220
00221 vStoreOpt = _mm_load_si128(storeOpt+segLength-1);
00222 }
00223
00224 vMaxScore = _mm_max_epu8(vMaxScore, _mm_srli_si128(vMaxScore, 8));
00225 vMaxScore = _mm_max_epu8(vMaxScore, _mm_srli_si128(vMaxScore, 4));
00226 vMaxScore = _mm_max_epu8(vMaxScore, _mm_srli_si128(vMaxScore, 2));
00227 vMaxScore = _mm_max_epu8(vMaxScore, _mm_srli_si128(vMaxScore, 1));
00228 MaxScore = (unsigned char)_mm_extract_epi16(vMaxScore,0);
00229 if ((int)MaxScore + (int)query->bias >=255)
00230 return DBL_MAX;
00231 return((double)MaxScore);
00232 }
00233
00234
00235 EXPORT void swps3_freeProfileByteSSE( ProfileByte * profile ){
00236 free( profile );
00237 }
00238