DynProgr_altivec.cc

Go to the documentation of this file.
00001 
00005 /*
00006  * Copyright (c) 2007-2008 ETH Zürich, Institute of Computational Science
00007  *
00008  * Permission is hereby granted, free of charge, to any person
00009  * obtaining a copy of this software and associated documentation
00010  * files (the "Software"), to deal in the Software without
00011  * restriction, including without limitation the rights to use,
00012  * copy, modify, merge, publish, distribute, sublicense, and/or sell
00013  * copies of the Software, and to permit persons to whom the
00014  * Software is furnished to do so, subject to the following
00015  * conditions:
00016  *
00017  * The above copyright notice and this permission notice shall be
00018  * included in all copies or substantial portions of the Software.
00019  *
00020  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00021  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
00022  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
00023  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
00024  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
00025  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
00026  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
00027  * OTHER DEALINGS IN THE SOFTWARE.
00028  */
00029 
00030 #include "matrix.h"
00031 #include "DynProgr_altivec.h"
00032 #include <cstdlib>
00033 #include <malloc.h>
00034 #include <float.h>
00035 #include <cstdio>
00036 #include <string.h>
00037 #include <altivec.h>
00038 #include <sys/types.h>
00039 
00040 #define ALIGN16(x)    (((x)+15)&(-16))
00041 #undef SHORTCUT
00042 
00043 template<typename T> static inline T min( T a, T b ){ return a<b?a:b; }
00044 template<typename T> static inline T max( T a, T b ){ return a>b?a:b; }
00045 
00046 template<typename T> struct IsInteger   { static const int value = false; };
00047 template<> struct IsInteger<u_int8_t>   { static const int value = true; };
00048 template<> struct IsInteger<u_int16_t>  { static const int value = true; };
00049 template<> struct IsInteger<u_int32_t>  { static const int value = true; };
00050 template<> struct IsInteger<int8_t>     { static const int value = true; };
00051 template<> struct IsInteger<int16_t>    { static const int value = true; };
00052 template<> struct IsInteger<int32_t>    { static const int value = true; };
00053 
00054 template<typename T> struct IsSigned    { static const int value = (T)-1 < (T)0; };
00055 
00056 template<typename T> struct MaxValue    { static const T value = IsSigned<T>::value ? -1ll ^ (1ll<<(sizeof(T)*8-1)) : (T)-1; };
00057 template<> struct MaxValue<float>       { static const float value = FLT_MAX; };
00058 template<> struct MaxValue<double>      { static const double value = DBL_MAX; };
00059 
00060 template<typename T> struct MinValue    { static const T value = IsSigned<T>::value ? 1ll<<(sizeof(T)*8-1) : (T)0; };
00061 template<> struct MinValue<float>       { static const float value = FLT_MIN; };
00062 template<> struct MinValue<double>      { static const double value = DBL_MIN; };
00063 
00064 template<typename T, typename V> struct Profile {
00065         int len;
00066         T bias;
00067         V * rD;
00068         V * storeOpt;
00069         V * loadOpt;
00070         V * profile;
00071 };
00072 
00081 template<typename T, typename V> static inline Profile<T,V>* allocateProfile(int len)
00082 {
00083         const int nSeg = sizeof(V)/sizeof(T);    // the number of segments
00084         const int segLen = ALIGN16(len)/nSeg; // the segment length
00085 
00086         Profile<T,V> *profile = (Profile<T,V>*)malloc(sizeof(*profile));
00087         profile->len = len;
00088         profile->rD = (V*)malloc(sizeof(V)*segLen);
00089         profile->loadOpt = (V*)malloc(sizeof(V)*segLen);
00090         profile->storeOpt = (V*)malloc(sizeof(V)*segLen);
00091         profile->profile = (V*)malloc(sizeof(V)*MATRIX_DIM*segLen);
00092         return profile;
00093 }
00094 
00098 template<typename T, typename V> void freeProfile(Profile<T,V> *profile)
00099 {
00100         free(profile->profile);
00101         free(profile->storeOpt);
00102         free(profile->loadOpt);
00103         free(profile->rD);
00104         free(profile);
00105 }
00106 
00110 template<typename V> static inline V vec_addx(V a, V b)
00111 {
00112         return vec_adds(a,b);
00113 }
00114 
00118 typedef vector float v_float_t;
00119 template<> static inline v_float_t vec_addx<v_float_t>(v_float_t a, v_float_t b)
00120 {
00121         return vec_add(a,b);
00122 }
00123 
00127 template<typename V> static inline V vec_subx(V a, V b)
00128 {
00129         return vec_subs(a,b);
00130 }
00131 
00135 typedef vector float v_float_t;
00136 template<> static inline v_float_t vec_subx<v_float_t>(v_float_t a, v_float_t b)
00137 {
00138         return vec_sub(a,b);
00139 }
00140 
00141 
00152 template< typename T, typename V > static inline T dynProgrLocal(
00153                 const char* db, int dbLen,
00154                 Profile<T,V> * profile,
00155                 Options *options){
00156 
00157         /**********************************************************************
00158         * This version of the code implements the idea presented in
00159         *
00160         ***********************************************************************
00161         * Striped Smith-Waterman speeds database searches six times over other
00162         * SIMD implementations
00163         *
00164         * Michael Farrar, Bioinformatics, 23(2), pp. 156-161, 2007
00165         **********************************************************************/
00166 
00167         T zero,goal;
00168         /* A vectorized template version */
00169         if (IsInteger<T>::value){
00170                 // adjust the zero and goal values...
00171                 zero = MinValue<T>::value + profile->bias;
00172                 goal = MaxValue<T>::value - profile->bias;
00173         } else {
00174                 zero = (T)0.0;
00175                 goal = MaxValue<T>::value;
00176         }
00177 
00178         V vZero = {zero};
00179         vZero = vec_splat( vZero, 0 );
00180         V vGoal = {goal};
00181         vGoal = vec_splat( vGoal, 0 );
00182         V vDelFixed = {(T)options->gapOpen};
00183         vDelFixed = vec_splat( vDelFixed, 0 );
00184         V vDelInc = {(T)options->gapExt};
00185         vDelInc = vec_splat( vDelInc, 0 );
00186         V vBias = {(T)profile->bias};
00187         vBias = vec_splat( vBias, 0 );
00188 
00189         T maxScore = zero;
00190         const int nSeg = sizeof(V)/sizeof(T);    // the number of segments
00191         const int segLen = ALIGN16(profile->len)/nSeg; // the segment length
00192 
00193         V vMaxScore = vZero;                     // The maximum score
00194         /* Initialize the other arrays */
00195         /*******************************/
00196         for(int i=0; LIKELY(i<segLen); i++)
00197                 profile->loadOpt[i] = profile->storeOpt[i] = profile->rD[i] = vZero;
00198 
00199         /* looping through all the columns */
00200         /***********************************/
00201         for( int i=0; LIKELY(i<dbLen); i++ ){
00202                 V vCD = vZero;
00203 
00204                 // set the opt score to the elements computed in the previous column
00205                 V vStoreOpt = vec_sld(vZero, profile->storeOpt[segLen-1], sizeof(V)-sizeof(T));
00206 
00207                 /* compute the current profile, depending on the character in s2 */
00208                 /*****************************************************************/
00209                 V * currentProfile = profile->profile + db[i]*segLen;
00210 
00211 #if 0
00212                 for(int ii=0; ii<nSeg; ++ii) {
00213                         for(int jj=0; jj<segLen; ++jj) {
00214                                 if(ii*segLen+jj < profile->len)
00215                                         printf("\t%d",(int)((T*)currentProfile)[ii+jj*nSeg]);
00216                         }
00217                 }
00218                 printf("\n");
00219 #endif
00220 
00221                 /* swap the old optimal score with the new one */
00222                 /***********************************************/
00223                 V * swap = profile->storeOpt;
00224                 profile->storeOpt = profile->loadOpt;
00225                 profile->loadOpt  = swap;
00226 
00227                 /* main loop computing the max, precomputing etc. */
00228                 /**************************************************/
00229                 for( int j=0; LIKELY(j<segLen); j++ ){
00230                         // Load the the rd value
00231                         V vRD          = profile->rD[j];
00232                         V vTmp         = profile->loadOpt[j];
00233                         vRD            = vec_addx(vRD,vDelInc);
00234                         vTmp           = vec_addx(vTmp,vDelFixed);
00235                         if(!IsInteger<T>::value || !IsSigned<T>::value) {
00236                                 vRD    = vec_max(vRD,vZero);
00237                         }
00238                         vRD            = vec_max(vTmp,vRD);
00239                         profile->rD[j] = vRD;
00240  
00241                         // add the profile the prev. opt
00242                         vStoreOpt = vec_addx(currentProfile[j],vStoreOpt);
00243                         if(!IsSigned<T>::value)
00244                                 vStoreOpt = vec_subx(vStoreOpt,vBias);
00245 
00246                         // update the maxscore found so far
00247                         vMaxScore = vec_max( vMaxScore, vStoreOpt );
00248                         // precompute the maximum here
00249                         vTmp = vec_max( vCD, vRD );
00250                         // compute the correct opt score of the cell
00251                         vStoreOpt = vec_max( vStoreOpt, vTmp );
00252 
00253                         // store the opt score of the cell
00254                         profile->storeOpt[j] = vStoreOpt;
00255 
00256                         // precompute rd and cd for next iteration
00257                         vStoreOpt = vec_addx(vStoreOpt,vDelFixed);
00258                         vRD       = vec_addx(vRD,vDelInc);
00259                         vCD       = vec_addx(vCD,vDelInc);
00260                         if(!IsInteger<T>::value || !IsSigned<T>::value)
00261                                 vStoreOpt = vec_max(vStoreOpt, vZero);
00262                         vRD       = vec_max( vStoreOpt, vRD );
00263                         vCD       = vec_max( vStoreOpt, vCD );
00264 
00265                         // store precomputed rd
00266                         profile->rD[j] = vRD;
00267 
00268                         // load precomputed opt for next iteration
00269                         vStoreOpt = profile->loadOpt[j];
00270                 }
00271 
00272                 /* TODO prefetch next profile into cache */
00273 
00274                 /* set totcells */
00275                 /****************/
00276 //         totcells += ls1;
00277                 /* check for a changed MaxScore */
00278                 /********************************/
00279                 for( T* tmp = (T*)&vMaxScore; tmp<(T*)(&vMaxScore+1); tmp++ )
00280                         if (UNLIKELY(maxScore < *tmp))
00281                                 maxScore = *tmp;
00282                 // if the goal was reached, exit
00283                 if ( UNLIKELY(maxScore >= goal) )
00284                         return MaxValue<T>::value;
00285 
00286                 V vStoreOptx = profile->storeOpt[0];
00287                 vStoreOptx = vec_addx(vStoreOptx,vDelFixed - vDelInc);
00288                 if(!IsInteger<T>::value)
00289                         vStoreOptx = vec_max( vStoreOptx, vZero );
00290                 V vCDx = vec_sld(vZero, vCD, sizeof(V)-sizeof(T));
00291 
00292                 if(vec_all_le(vCDx,vStoreOptx) == 0) {
00293                         for(int j=0; LIKELY(j<nSeg); ++j) {
00294                                 // set everything up for the next iteration
00295                                 vCD = vec_sld(vZero, vCD, sizeof(V)-sizeof(T));
00296 
00297                                 for(int k=0; LIKELY(k<segLen-1); ++k) {
00298                                         // compute the current optimal value of the cell
00299                                         vStoreOpt = profile->storeOpt[k];
00300                                         vStoreOpt = vec_max( vStoreOpt, vCD );
00301                                         profile->storeOpt[k] = vStoreOpt;
00302 
00303                                         // precompute the scores for the next cell
00304                                         vCD = vec_addx( vCD, vDelInc);
00305                                         vStoreOpt = vec_addx( vStoreOpt, vDelFixed);
00306                                         if(!IsInteger<T>::value) {
00307                                                 vCD = vec_max( vCD, vZero );
00308                                                 vStoreOpt = vec_max( vStoreOpt, vZero );
00309                                         }
00310 
00311                                         #ifdef SHORTCUT
00312                                         if(UNLIKELY(vec_all_le(vCD,vStoreOpt)))
00313                                                 goto shortcut;
00314                                         #endif
00315                                 }
00316 
00317                                 // compute the current optimal value of the cell
00318                                 vStoreOpt = profile->storeOpt[segLen-1];
00319                                 vStoreOpt = vec_max( vStoreOpt, vCD );
00320                                 profile->storeOpt[segLen-1] = vStoreOpt;
00321 
00322                                 // precompute the cd value for the next cell
00323                                 vCD = vec_addx( vCD, vDelInc);
00324                                 vStoreOpt = vec_addx( vStoreOpt, vDelFixed);
00325                                 if(!IsInteger<T>::value) {
00326                                         vCD = vec_max( vCD, vZero );
00327                                         vStoreOpt = vec_max( vStoreOpt, vZero );
00328                                 }
00329 
00330                                 if(UNLIKELY(vec_all_le(vCD,vStoreOpt)))
00331                                         break;
00332                         }
00333                         #ifdef SHORTCUT
00334                         shortcut:
00335                         (void)1;
00336                         #endif
00337                 }
00338 
00339 #ifdef DEBUG
00340                 printf("%c\t",db[i]);
00341                 for(int ii=0; ii<nSeg; ++ii) {
00342                         for(int jj=0; jj<segLen; ++jj) {
00343                                 if(ii*segLen+jj < profile->len)
00344                                         printf("%d\t",(int)(((T*)profile->storeOpt)[ii+jj*nSeg]-zero));
00345                         }
00346                 }
00347                 printf("\n");
00348 #endif
00349         }
00350         return maxScore;
00351 }
00352 
00365 template< typename T, typename V > static inline T dynProgrLocal2(
00366                 const char* db, int dbLen,
00367                 Profile<T,V> * profile,
00368                 Options *options){
00369         /**********************************************************************
00370         * This version of the code implements the idea presented in
00371         *
00372         ***********************************************************************
00373         * Striped Smith-Waterman speeds database searches six times over other
00374         * SIMD implementations
00375         *
00376         * Michael Farrar, Bioinformatics, 23(2), pp. 156-161, 2007
00377         **********************************************************************/
00378 
00379         T zero,goal;
00380         /* A vectorized template version */
00381         if (IsInteger<T>::value){
00382                 // adjust the zero and goal values...
00383                 zero = MinValue<T>::value + profile->bias;
00384                 goal = MaxValue<T>::value - profile->bias;
00385         } else {
00386                 zero = (T)0.0;
00387                 goal = MaxValue<T>::value;
00388         }
00389 
00390         V vZero = {zero};
00391         vZero = vec_splat( vZero, 0 );
00392         V vGoal = {goal};
00393         vGoal = vec_splat( vGoal, 0 );
00394         V vDelFixed = {(T)options->gapOpen};
00395         vDelFixed = vec_splat( vDelFixed, 0 );
00396         V vDelInc = {(T)options->gapExt};
00397         vDelInc = vec_splat( vDelInc, 0 );
00398         V vBias = {(T)profile->bias};
00399         vBias = vec_splat( vBias, 0 );
00400 
00401         T maxScore = zero;
00402         const int nSeg = sizeof(V)/sizeof(T);    // the number of segments
00403         const int segLen = (ALIGN16(profile->len)/nSeg + 1) & ~1; // the segment length
00404         const int subSegLen = segLen / 2;                 // the sub segment length
00405 
00406         V vMaxScore1 = vZero, vMaxScore2 = vZero; // The maximum score
00407 
00408         /* Initialize the other arrays */
00409         /*******************************/
00410         for(int i=0; LIKELY(i<segLen); i++)
00411                 profile->loadOpt[i] = profile->storeOpt[i] = profile->rD[i] = vZero;
00412 
00413         /* looping through all the columns */
00414         /***********************************/
00415         for( int i=0; LIKELY(i<dbLen); i++ ){
00416                 V vCD1 = vZero, vCD2 = vZero;
00417 
00418                 // set the opt score to the elements computed in the previous column
00419                 V vStoreOpt1 = vec_sld(vZero, profile->storeOpt[segLen-1], sizeof(V)-sizeof(T));
00420                 V vStoreOpt2 = profile->storeOpt[subSegLen-1];
00421 
00422                 /* compute the current profile, depending on the character in s2 */
00423                 /*****************************************************************/
00424                 V * currentProfile = profile->profile + db[i]*segLen;
00425 
00426 
00427 #if 0
00428                 for(int ii=0; ii<nSeg; ++ii) {
00429                         for(int jj=0; jj<segLen; ++jj) {
00430                                 if(ii*segLen+jj < profile->len)
00431                                         printf("\t%d",(int)((T*)currentProfile)[ii+jj*nSeg]);
00432                         }
00433                 }
00434                 printf("\n");
00435 #endif
00436 
00437                 /* swap the old optimal score with the new one */
00438                 /***********************************************/
00439                 V * swap = profile->storeOpt;
00440                 profile->storeOpt = profile->loadOpt;
00441                 profile->loadOpt  = swap;
00442 
00443                 /* main loop computing the max, precomputing etc. */
00444                 /**************************************************/
00445                 for( int j=0; LIKELY(j<subSegLen); j++ ){
00446                         // Load the the rd value
00447                         V vRD1          = profile->rD[j          ];
00448                         V vRD2          = profile->rD[j+subSegLen];
00449                         V vTmp1         = profile->loadOpt[j          ];
00450                         V vTmp2         = profile->loadOpt[j+subSegLen];
00451                         vRD1            = vec_addx(vRD1,vDelInc);
00452                         vRD2            = vec_addx(vRD2,vDelInc);
00453                         vTmp1           = vec_addx(vTmp1,vDelFixed);
00454                         vTmp2           = vec_addx(vTmp2,vDelFixed);
00455                         if(!IsInteger<T>::value || !IsSigned<T>::value) {
00456                                 vRD1    = vec_max(vRD1,vZero);
00457                                 vRD2    = vec_max(vRD2,vZero);
00458                         }
00459                         vRD1            = vec_max(vTmp1,vRD1);
00460                         vRD2            = vec_max(vTmp2,vRD2);
00461                         profile->rD[j          ] = vRD1;
00462                         profile->rD[j+subSegLen] = vRD2;
00463 
00464                         // add the profile the prev. opt
00465                         vStoreOpt1 = vec_addx(currentProfile[j          ],vStoreOpt1);
00466                         vStoreOpt2 = vec_addx(currentProfile[j+subSegLen],vStoreOpt2);
00467                         if(!IsSigned<T>::value) {
00468                                 vStoreOpt1 = vec_subx(vStoreOpt1,vBias);
00469                                 vStoreOpt2 = vec_subx(vStoreOpt2,vBias);
00470                         }
00471 
00472                         // update the maxscore found so far
00473                         vMaxScore1 = vec_max( vMaxScore1, vStoreOpt1 );
00474                         vMaxScore2 = vec_max( vMaxScore2, vStoreOpt2 );
00475 
00476                         // precompute the maximum here
00477                         vTmp1 = vec_max( vCD1, vRD1 );
00478                         vTmp2 = vec_max( vCD2, vRD2 );
00479 
00480                         // compute the correct opt score of the cell
00481                         vStoreOpt1 = vec_max( vStoreOpt1, vTmp1 );
00482                         vStoreOpt2 = vec_max( vStoreOpt2, vTmp2 );
00483 
00484                         // store the opt score of the cell
00485                         profile->storeOpt[j          ] = vStoreOpt1;
00486                         profile->storeOpt[j+subSegLen] = vStoreOpt2;
00487 
00488                         // precompute rd and cd for next iteration
00489                         vStoreOpt1 = vec_addx(vStoreOpt1,vDelFixed);
00490                         vStoreOpt2 = vec_addx(vStoreOpt2,vDelFixed);
00491                         vRD1       = vec_addx(vRD1,vDelInc);
00492                         vRD2       = vec_addx(vRD2,vDelInc);
00493                         vCD1       = vec_addx(vCD1,vDelInc);
00494                         vCD2       = vec_addx(vCD2,vDelInc);
00495                         if(!IsInteger<T>::value || !IsSigned<T>::value) {
00496                                 vStoreOpt1 = vec_max(vStoreOpt1, vZero);
00497                                 vStoreOpt2 = vec_max(vStoreOpt2, vZero);
00498                         }
00499                         vRD1       = vec_max( vStoreOpt1, vRD1 );
00500                         vRD2       = vec_max( vStoreOpt2, vRD2 );
00501                         vCD1       = vec_max( vStoreOpt1, vCD1 );
00502                         vCD2       = vec_max( vStoreOpt2, vCD2 );
00503 
00504                         // store precomputed rd
00505                         profile->rD[j          ] = vRD1;
00506                         profile->rD[j+subSegLen] = vRD2;
00507 
00508                         // load precomputed opt for next iteration
00509                         vStoreOpt1 = profile->loadOpt[j          ];
00510                         vStoreOpt2 = profile->loadOpt[j+subSegLen];
00511                 }
00512 
00513                 /* TODO prefetch next profile into cache */
00514 
00515                 /* set totcells */
00516                 /****************/
00517 //         totcells += ls1;
00518                 /* check for a changed MaxScore */
00519                 /********************************/
00520                 V vMaxScore = vec_max( vMaxScore1, vMaxScore2 );
00521                 for( T* tmp = (T*)&vMaxScore; tmp<(T*)(&vMaxScore+1); tmp++ )
00522                         if (UNLIKELY(maxScore < *tmp))
00523                                 maxScore = *tmp;
00524                 // if the goal was reached, exit
00525                 if ( UNLIKELY(maxScore >= goal) )
00526                         return MaxValue<T>::value;
00527 
00528                 V vStoreOptx1 = profile->storeOpt[0        ];
00529                 V vStoreOptx2 = profile->storeOpt[subSegLen];
00530                 vStoreOptx1 = vec_addx(vStoreOptx1,vDelFixed - vDelInc);
00531                 vStoreOptx2 = vec_addx(vStoreOptx2,vDelFixed - vDelInc);
00532                 if(!IsInteger<T>::value) {
00533                         vStoreOptx1 = vec_max( vStoreOptx1, vZero );
00534                         vStoreOptx2 = vec_max( vStoreOptx2, vZero );
00535                 }
00536                 V vCDx1 = vec_sld(vZero, vCD2, sizeof(V)-sizeof(T));
00537                 V vCDx2 = vCD1;
00538 
00539                 if(UNLIKELY((vec_all_le(vCDx1,vStoreOptx1) == 0) || (vec_all_le(vCDx2,vStoreOptx2) == 0))) {
00540                         for(int j=0; LIKELY(j<nSeg+1); ++j) {
00541                                 // set everything up for the next iteration
00542                                 V vRotate = vCD2;
00543                                 vCD2 = vCD1;
00544                                 vCD1 = vec_sld(vZero, vRotate, sizeof(V)-sizeof(T));
00545 
00546                                 for(int k=0; LIKELY(k<subSegLen-1); ++k) {
00547                                         // compute the current optimal value of the cell
00548                                         vStoreOpt1 = profile->storeOpt[k            ];
00549                                         vStoreOpt2 = profile->storeOpt[k + subSegLen];
00550                                         vStoreOpt1 = vec_max( vStoreOpt1, vCD1 );
00551                                         vStoreOpt2 = vec_max( vStoreOpt2, vCD2 );
00552                                         profile->storeOpt[k            ] = vStoreOpt1;
00553                                         profile->storeOpt[k + subSegLen] = vStoreOpt2;
00554 
00555                                         // precompute the scores for the next cell
00556                                         vCD1 = vec_addx( vCD1, vDelInc);
00557                                         vCD2 = vec_addx( vCD2, vDelInc);
00558                                         vStoreOpt1 = vec_addx( vStoreOpt1, vDelFixed);
00559                                         vStoreOpt2 = vec_addx( vStoreOpt2, vDelFixed);
00560                                         if(!IsInteger<T>::value) {
00561                                                 vCD1 = vec_max( vCD1, vZero );
00562                                                 vCD2 = vec_max( vCD2, vZero );
00563                                                 vStoreOpt1 = vec_max( vStoreOpt1, vZero );
00564                                                 vStoreOpt2 = vec_max( vStoreOpt2, vZero );
00565                                         }
00566 
00567                                         #ifdef SHORTCUT
00568                                         if(UNLIKELY(vec_all_le(vCD1,vStoreOpt1) != 0 && vec_all_le(vCD2,vStoreOpt2) != 0))
00569                                                 goto shortcut;
00570                                         #endif
00571                                 }
00572 
00573                                 // compute the current optimal value of the cell
00574                                 vStoreOpt1 = profile->storeOpt[subSegLen - 1];
00575                                 vStoreOpt2 = profile->storeOpt[segLen    - 1];
00576                                 vStoreOpt1 = vec_max( vStoreOpt1, vCD1 );
00577                                 vStoreOpt2 = vec_max( vStoreOpt2, vCD2 );
00578                                 profile->storeOpt[subSegLen - 1] = vStoreOpt1;
00579                                 profile->storeOpt[segLen    - 1] = vStoreOpt2;
00580 
00581                                 // precompute the scores for the next cell
00582                                 vCD1 = vec_addx( vCD1, vDelInc);
00583                                 vCD2 = vec_addx( vCD2, vDelInc);
00584                                 vStoreOpt1 = vec_addx( vStoreOpt1, vDelFixed);
00585                                 vStoreOpt2 = vec_addx( vStoreOpt2, vDelFixed);
00586                                 if(!IsInteger<T>::value) {
00587                                         vCD1 = vec_max( vCD1, vZero );
00588                                         vCD2 = vec_max( vCD2, vZero );
00589                                         vStoreOpt1 = vec_max( vStoreOpt1, vZero );
00590                                         vStoreOpt2 = vec_max( vStoreOpt2, vZero );
00591                                 }
00592 
00593                                 if(UNLIKELY(vec_all_le(vCD1,vStoreOpt1) != 0 && vec_all_le(vCD2,vStoreOpt2) != 0))
00594                                         break;
00595                         }
00596                         #ifdef SHORTCUT
00597                         shortcut:
00598                         (void)1;
00599                         #endif
00600                 }
00601 #ifdef DEBUG
00602                 printf("%c\t",db[i]);
00603                 for(int ii=0; ii<nSeg; ++ii) {
00604                         for(int jj=0; jj<segLen; ++jj) {
00605                                 if(ii*segLen+jj < profile->len)
00606                                         printf("%d\t",(int)(((T*)profile->storeOpt)[ii+jj*nSeg]-zero));
00607                         }
00608                 }
00609                 printf("\n");
00610 #endif
00611         }
00612         return maxScore;
00613 }
00614 
00628 template< typename T, typename V, typename X >
00629 EXPORT Profile<T,V>* swps3_createProfileAltivec( const char *query, int queryLen, X* simi ){
00630         const int alignedLen = ALIGN16(queryLen);
00631         const int nSeg = sizeof(V)/sizeof(T);    // the number of segments
00632         const int segLen = alignedLen/nSeg; // the segment length
00633 
00634         Profile<T,V>* profile = allocateProfile<T,V>(queryLen);
00635 
00636         for( int i=0; i<MATRIX_DIM; i++ ){
00637                 T *currentProfile = ((T*)profile->profile)+i*alignedLen;
00638                 for( int j=0; j<segLen; j++ ){
00639                         T *tmp = currentProfile + j*nSeg;
00640                         for( int k=0; k<nSeg; k++ )
00641                                 if( j + k*segLen < queryLen )
00642                                         tmp[k] = (T)simi[ query[j + k*segLen ] * MATRIX_DIM + i ];
00643                                 else
00644                                         tmp[k] = 0;
00645                 }
00646         }
00647 
00648         return profile;
00649 }
00650 
00668 template< typename T, typename V >
00669 EXPORT double swps3_dynProgrAltivec(const char *db, int dbLen, Profile<T,V> *profile, Options *options){
00670         T zero, goal;
00671         /* A vectorized template version */
00672         if (IsInteger<T>::value){
00673                 // adjust the zero and goal values...
00674                 zero = MinValue<T>::value;
00675                 goal = MaxValue<T>::value;
00676         } else {
00677                 zero = (T)0.0;
00678                 goal = MaxValue<T>::value;
00679         }
00680 
00681         T maxScore=zero;
00682 
00683 #ifdef UNROLL
00684         T currentScore;
00685         if (sizeof(T) < 2)
00686                 currentScore = dynProgrLocal<T,V> ( db, dbLen, profile, options );
00687         else
00688                 currentScore = dynProgrLocal2<T,V> ( db, dbLen, profile, options );
00689 #else
00690         T currentScore = dynProgrLocal<T,V> ( db, dbLen, profile, options );
00691 #endif
00692         if( maxScore < currentScore)
00693                 maxScore = currentScore;
00694 
00695         if(maxScore >= goal)
00696                 return DBL_MAX;
00697 
00698         /* Finally free all the memory we allocated */
00699         /********************************************/
00700         return (double)(maxScore-zero);
00701 }
00702 
00718 EXPORT double swps3_dynProgrByteAltivec(const char *db, int dbLen, void* profile, Options *options)
00719 {
00720         return swps3_dynProgrAltivec<int8_t,vector int8_t>(db,dbLen,(Profile<int8_t,vector int8_t>*)profile,options);
00721 }
00722 
00738 EXPORT double swps3_dynProgrShortAltivec(const char *db, int dbLen, void* profile, Options *options)
00739 {
00740         return swps3_dynProgrAltivec<int16_t,vector int16_t>(db,dbLen,(Profile<int16_t,vector int16_t>*)profile,options);
00741 }
00742 
00759 EXPORT double swps3_dynProgrFloatAltivec(const char *db, int dbLen, void* profile, Options *options)
00760 {
00761         return swps3_dynProgrAltivec<float,vector float>(db,dbLen,(Profile<float,vector float>*)profile,options);
00762 }
00763 
00774 EXPORT void *swps3_createProfileByteAltivec(const char *query, int queryLen, SBMatrix matrix)
00775 {
00776         return swps3_createProfileAltivec<int8_t, vector int8_t>(query, queryLen, matrix);
00777 }
00778 
00789 EXPORT void *swps3_createProfileShortAltivec(const char *query, int queryLen, SBMatrix matrix)
00790 {
00791         return swps3_createProfileAltivec<int16_t, vector int16_t>(query, queryLen, matrix);
00792 }
00793 
00804 EXPORT void *swps3_createProfileFloatAltivec(const char *query, int queryLen, SBMatrix matrix)
00805 {
00806         return swps3_createProfileAltivec<float, vector float>(query, queryLen, matrix);
00807 }
00808 

Generated on Thu Jun 5 12:44:37 2008 for swps3 by  doxygen 1.5.4