DynProgr_sse_byte.c

Go to the documentation of this file.
00001 
00005 /*
00006  * Copyright (c) 2007-2008 ETH Zürich, Institute of Computational Science
00007  *
00008  * Permission is hereby granted, free of charge, to any person
00009  * obtaining a copy of this software and associated documentation
00010  * files (the "Software"), to deal in the Software without
00011  * restriction, including without limitation the rights to use,
00012  * copy, modify, merge, publish, distribute, sublicense, and/or sell
00013  * copies of the Software, and to permit persons to whom the
00014  * Software is furnished to do so, subject to the following
00015  * conditions:
00016  *
00017  * The above copyright notice and this permission notice shall be
00018  * included in all copies or substantial portions of the Software.
00019  *
00020  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00021  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
00022  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
00023  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
00024  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
00025  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
00026  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
00027  * OTHER DEALINGS IN THE SOFTWARE.
00028  */
00029 
00030 #include "DynProgr_sse_byte.h"
00031 #include "debug.h"
00032 #include <unistd.h>
00033 #include <stdio.h>
00034 #include <float.h>
00035 
00036 #define PAGE_ALIGN(x) (((size_t)(x)+sysconf(_SC_PAGESIZE)-1)&~(sysconf(_SC_PAGESIZE)-1))
00037 
00040 EXPORT ProfileByte * swps3_createProfileByteSSE( const char * query, int queryLen, SBMatrix matrix ){
00041         int segLen  = (queryLen+15)/16;
00042         int i,j,k;
00043         int bias = 0;
00044         u_int8_t * pprofile;
00045         ProfileByte * profile = malloc( sizeof(ProfileByte)+segLen*(MATRIX_DIM+3)*sizeof(__m128i)+64+2*sysconf(_SC_PAGESIZE) );
00046 
00047         profile->loadOpt     = (__m128i*) ((size_t) (profile->data + 15) & ~(0xf)) ;
00048         profile->storeOpt    = profile->loadOpt + segLen;
00049         profile->rD          = profile->storeOpt + segLen;
00050         profile->profile     = (__m128i*) PAGE_ALIGN(profile->rD + segLen);
00051 
00052         /* Init the profile */
00053         profile->len = queryLen;
00054         /* Init the byte profile */
00055         for(i=0; i<MATRIX_DIM; i++)
00056                 for(j=0; j<MATRIX_DIM; j++)
00057                         if (bias < -matrix[ i*MATRIX_DIM+j ])
00058                                 bias = -matrix[ i*MATRIX_DIM+j ];
00059         pprofile = (u_int8_t*)profile->profile;
00060 
00061         for(i=0; i<MATRIX_DIM; i++)
00062                 for(j=0; j<segLen; j++)
00063                         for(k=0; k<16; k++)
00064                                 if(j+k*segLen < queryLen)
00065                                         *(pprofile++) = matrix[query[j+k*segLen]*MATRIX_DIM+i]+bias;
00066                                 else
00067                                         *(pprofile++) = bias;
00068         profile->bias = bias;
00069 
00070 #ifdef DEBUG
00071         for(i=0; i<queryLen; ++i) debug("\t%c",query[i]+'A');
00072         debug("\n");
00073 #endif
00074         return profile;
00075 }
00076 
00077 
00078 EXPORT double swps3_alignmentByteSSE( ProfileByte * query, const char * db, int dbLen, Options * options )
00079 {
00080 
00081         /**********************************************************************
00082         * This version of the code implements the idea presented in
00083         *
00084         ***********************************************************************
00085         * Striped Smith-Waterman speeds database searches six times over other
00086         * SIMD implementations
00087         *
00088         * Michael Farrar, Bioinformatics, 23(2), pp. 156-161, 2007
00089         **********************************************************************/
00090 
00091         int i, j;
00092         unsigned char MaxScore = 0;
00093         int segLength = (query->len+15)/16; /* the segment length */
00094 
00095         __m128i * loadOpt  = query->loadOpt;
00096         __m128i * storeOpt = query->storeOpt;
00097         __m128i * rD       = query->rD;
00098         __m128i * current_profile;
00099         __m128i * swap;
00100 
00101         __m128i vMinimums = _mm_set1_epi32(0);
00102 
00103         __m128i vDelIncr  = _mm_set1_epi8(-options->gapExt);
00104         __m128i vDelFixed = _mm_set1_epi8(-options->gapOpen);
00105         __m128i vBias     = _mm_set1_epi8(query->bias);
00106 
00107         __m128i vMaxScore = vMinimums;  /* vMaxScore = [0,0] */
00108 
00109         __m128i vStoreOpt;                              /* the new optimal score */
00110         __m128i vRD;                                    /* the new row deletion score */
00111         __m128i vCD = vMinimums;                /* the column deletion score */
00112         __m128i zero = vMinimums;               /* the column deletion score */
00113         __m128i vTmp;
00114 #ifdef DEBUG
00115         int ii,jj;
00116 #endif
00117 
00118         /* initialize the other arrays used for the dynProg code */
00119         /*********************************************************/
00120         for(i=0; LIKELY(i<segLength); i++){
00121                 _mm_store_si128(loadOpt+i,zero);
00122                 _mm_store_si128(storeOpt+i,zero);
00123                 _mm_store_si128(rD+i,zero);
00124         }
00125 
00126         /* looping through all the columns */
00127         /***********************************/
00128 
00129         for(j=0; LIKELY(j<dbLen); j++){
00130 
00131 
00132                 /* compute the opt and cd score depending on the previous column
00133                  *******************************************************************
00134                  * set the column deletion score to zero, has to be fixed later on */
00135                 vCD = zero;
00136 
00137                 /* set the opt score to the elements computed in the previous column*/
00138                 /* set the low of storeOpt to MaxS[j]                               */
00139                 vStoreOpt = _mm_load_si128(storeOpt+segLength-1);
00140                 vStoreOpt = _mm_slli_si128(vStoreOpt, 1);
00141 
00142                 /* compute the current profile, depending on the character in s2 */
00143                 /*****************************************************************/
00144                 current_profile = query->profile + db[j]*segLength;
00145 
00146                 /* swap the old optimal score with the new one */
00147                 /***********************************************/
00148                 swap = storeOpt;
00149                 storeOpt = loadOpt;
00150                 loadOpt = swap;
00151 
00152                 /* main loop computing the max, precomputing etc. */
00153                 /**************************************************/
00154                 for(i=0; LIKELY(i<segLength); i++){
00155                         vRD = _mm_load_si128(rD+i);
00156                         vRD = _mm_subs_epu8(vRD, vDelIncr);
00157                         vTmp = _mm_load_si128(loadOpt+i);
00158                         vTmp = _mm_subs_epu8(vTmp,vDelFixed);
00159                         vRD = _mm_max_epu8(vRD,vTmp);
00160                         _mm_store_si128(rD+i, vRD);
00161 
00162                         /* add the profile the prev. opt */
00163                         vStoreOpt = _mm_adds_epu8(vStoreOpt, *(current_profile+i));
00164                         vStoreOpt = _mm_subs_epu8(vStoreOpt, vBias);
00165 
00166                         /* update the maxscore found so far */
00167                         vMaxScore = _mm_max_epu8(vMaxScore, vStoreOpt);
00168 
00169                         /* compute the correct opt score of the cell */
00170                         vStoreOpt = _mm_max_epu8(vStoreOpt, vRD);
00171                         vStoreOpt = _mm_max_epu8(vStoreOpt, vCD);
00172 
00173                         /* store the opt score of the cell */
00174                         _mm_store_si128(storeOpt+i, vStoreOpt);
00175 
00176                         /* precompute cd for next iteration */
00177                         vStoreOpt = _mm_subs_epu8(vStoreOpt, vDelFixed);
00178                         vCD = _mm_subs_epu8(vCD, vDelIncr);
00179                         vCD = _mm_max_epu8(vCD, vStoreOpt);
00180 
00181                         /* load precomputed opt for next iteration */
00182                         vStoreOpt = _mm_load_si128(loadOpt+i);
00183                 }
00184 
00185 
00186                 for(i=0;LIKELY(i<16);++i){
00187                         int k;
00188                         /* compute the gap extend penalty for the current cell */
00189                         vCD = _mm_slli_si128(vCD,1);
00190 
00191                         for(k=0;LIKELY(k<segLength);++k) {
00192                            /* compute the current optimal value of the cell */
00193                            vStoreOpt = _mm_load_si128(storeOpt+k);
00194                            vStoreOpt = _mm_max_epu8(vStoreOpt,vCD);
00195                            _mm_store_si128(storeOpt+k,vStoreOpt);
00196 
00197                            /* precompute the scores for the next cell */
00198                            vStoreOpt = _mm_subs_epu8(vStoreOpt,vDelFixed);
00199                            vCD = _mm_subs_epu8(vCD, vDelIncr);
00200 
00201                            if(UNLIKELY(_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_subs_epu8(vCD,vStoreOpt),zero)) == 0xFFFF)) goto shortcut;
00202                         }
00203                 }
00204                 shortcut:
00205 
00206 #ifdef DEBUG
00207                 debug("%c\t",db[j]);
00208                 for(ii=0; ii<16;++ii) {
00209                    for(jj=0; jj<segLength;++jj) {
00210                       if(ii*segLength+jj < query->len)
00211                         debug("%d\t",(int)((unsigned char*)storeOpt)[ii+jj*16]);
00212                    }
00213                 }
00214                 debug("\n");
00215 #endif
00216 
00217                 /* store the new MaxScore for the next line block */
00218                 /**************************************************/
00219 
00220                 /* store the element of storeOpt in MaxS */
00221                 vStoreOpt = _mm_load_si128(storeOpt+segLength-1);
00222         }
00223 
00224         vMaxScore = _mm_max_epu8(vMaxScore, _mm_srli_si128(vMaxScore, 8));
00225         vMaxScore = _mm_max_epu8(vMaxScore, _mm_srli_si128(vMaxScore, 4));
00226         vMaxScore = _mm_max_epu8(vMaxScore, _mm_srli_si128(vMaxScore, 2));
00227         vMaxScore = _mm_max_epu8(vMaxScore, _mm_srli_si128(vMaxScore, 1));
00228         MaxScore = (unsigned char)_mm_extract_epi16(vMaxScore,0);
00229         if ((int)MaxScore + (int)query->bias >=255)
00230                 return DBL_MAX;
00231         return((double)MaxScore);
00232 }
00233 
00234 
00235 EXPORT void swps3_freeProfileByteSSE( ProfileByte * profile ){
00236         free( profile );
00237 }
00238 

Generated on Thu Jun 5 12:44:37 2008 for swps3 by  doxygen 1.5.4