swps3
DynProgr_altivec.cc
Go to the documentation of this file.
1 
5 /*
6  * Copyright (c) 2007-2008 ETH Zürich, Institute of Computational Science
7  *
8  * Permission is hereby granted, free of charge, to any person
9  * obtaining a copy of this software and associated documentation
10  * files (the "Software"), to deal in the Software without
11  * restriction, including without limitation the rights to use,
12  * copy, modify, merge, publish, distribute, sublicense, and/or sell
13  * copies of the Software, and to permit persons to whom the
14  * Software is furnished to do so, subject to the following
15  * conditions:
16  *
17  * The above copyright notice and this permission notice shall be
18  * included in all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27  * OTHER DEALINGS IN THE SOFTWARE.
28  */
29 
30 #include "matrix.h"
31 #include "DynProgr_altivec.h"
32 #include <cstdlib>
33 #include <malloc.h>
34 #include <float.h>
35 #include <cstdio>
36 #include <string.h>
37 #include <altivec.h>
38 #include <sys/types.h>
39 
40 #define ALIGN16(x) (((x)+15)&(-16))
41 #undef SHORTCUT
42 
43 template<typename T> static inline T min( T a, T b ){ return a<b?a:b; }
44 template<typename T> static inline T max( T a, T b ){ return a>b?a:b; }
45 
46 template<typename T> struct IsInteger { static const int value = false; };
47 template<> struct IsInteger<u_int8_t> { static const int value = true; };
48 template<> struct IsInteger<u_int16_t> { static const int value = true; };
49 template<> struct IsInteger<u_int32_t> { static const int value = true; };
50 template<> struct IsInteger<int8_t> { static const int value = true; };
51 template<> struct IsInteger<int16_t> { static const int value = true; };
52 template<> struct IsInteger<int32_t> { static const int value = true; };
53 
54 template<typename T> struct IsSigned { static const int value = (T)-1 < (T)0; };
55 
56 template<typename T> struct MaxValue { static const T value = IsSigned<T>::value ? -1ll ^ (1ll<<(sizeof(T)*8-1)) : (T)-1; };
57 template<> struct MaxValue<float> { static const float value = FLT_MAX; };
58 template<> struct MaxValue<double> { static const double value = DBL_MAX; };
59 
60 template<typename T> struct MinValue { static const T value = IsSigned<T>::value ? 1ll<<(sizeof(T)*8-1) : (T)0; };
61 template<> struct MinValue<float> { static const float value = FLT_MIN; };
62 template<> struct MinValue<double> { static const double value = DBL_MIN; };
63 
64 template<typename T, typename V> struct Profile {
65  int len;
67  V * rD;
68  V * storeOpt;
69  V * loadOpt;
70  V * profile;
71 };
72 
81 template<typename T, typename V> static inline Profile<T,V>* allocateProfile(int len)
82 {
83  const int nSeg = sizeof(V)/sizeof(T); // the number of segments
84  const int segLen = ALIGN16(len)/nSeg; // the segment length
85 
86  Profile<T,V> *profile = (Profile<T,V>*)malloc(sizeof(*profile));
87  profile->len = len;
88  profile->rD = (V*)malloc(sizeof(V)*segLen);
89  profile->loadOpt = (V*)malloc(sizeof(V)*segLen);
90  profile->storeOpt = (V*)malloc(sizeof(V)*segLen);
91  profile->profile = (V*)malloc(sizeof(V)*MATRIX_DIM*segLen);
92  return profile;
93 }
94 
98 template<typename T, typename V> void freeProfile(Profile<T,V> *profile)
99 {
100  free(profile->profile);
101  free(profile->storeOpt);
102  free(profile->loadOpt);
103  free(profile->rD);
104  free(profile);
105 }
106 
110 template<typename V> static inline V vec_addx(V a, V b)
111 {
112  return vec_adds(a,b);
113 }
114 
118 typedef vector float v_float_t;
119 template<> static inline v_float_t vec_addx<v_float_t>(v_float_t a, v_float_t b)
120 {
121  return vec_add(a,b);
122 }
123 
127 template<typename V> static inline V vec_subx(V a, V b)
128 {
129  return vec_subs(a,b);
130 }
131 
135 typedef vector float v_float_t;
136 template<> static inline v_float_t vec_subx<v_float_t>(v_float_t a, v_float_t b)
137 {
138  return vec_sub(a,b);
139 }
140 
141 
152 template< typename T, typename V > static inline T dynProgrLocal(
153  const char* db, int dbLen,
155  Options *options){
156 
157  /**********************************************************************
158  * This version of the code implements the idea presented in
159  *
160  ***********************************************************************
161  * Striped Smith-Waterman speeds database searches six times over other
162  * SIMD implementations
163  *
164  * Michael Farrar, Bioinformatics, 23(2), pp. 156-161, 2007
165  **********************************************************************/
166 
167  T zero,goal;
168  /* A vectorized template version */
169  if (IsInteger<T>::value){
170  // adjust the zero and goal values...
171  zero = MinValue<T>::value + profile->bias;
172  goal = MaxValue<T>::value - profile->bias;
173  } else {
174  zero = (T)0.0;
175  goal = MaxValue<T>::value;
176  }
177 
178  V vZero = {zero};
179  vZero = vec_splat( vZero, 0 );
180  V vGoal = {goal};
181  vGoal = vec_splat( vGoal, 0 );
182  V vDelFixed = {(T)options->gapOpen};
183  vDelFixed = vec_splat( vDelFixed, 0 );
184  V vDelInc = {(T)options->gapExt};
185  vDelInc = vec_splat( vDelInc, 0 );
186  V vBias = {(T)profile->bias};
187  vBias = vec_splat( vBias, 0 );
188 
189  T maxScore = zero;
190  const int nSeg = sizeof(V)/sizeof(T); // the number of segments
191  const int segLen = ALIGN16(profile->len)/nSeg; // the segment length
192 
193  V vMaxScore = vZero; // The maximum score
194  /* Initialize the other arrays */
195  /*******************************/
196  for(int i=0; LIKELY(i<segLen); i++)
197  profile->loadOpt[i] = profile->storeOpt[i] = profile->rD[i] = vZero;
198 
199  /* looping through all the columns */
200  /***********************************/
201  for( int i=0; LIKELY(i<dbLen); i++ ){
202  V vCD = vZero;
203 
204  // set the opt score to the elements computed in the previous column
205  V vStoreOpt = vec_sld(vZero, profile->storeOpt[segLen-1], sizeof(V)-sizeof(T));
206 
207  /* compute the current profile, depending on the character in s2 */
208  /*****************************************************************/
209  V * currentProfile = profile->profile + db[i]*segLen;
210 
211 #if 0
212  for(int ii=0; ii<nSeg; ++ii) {
213  for(int jj=0; jj<segLen; ++jj) {
214  if(ii*segLen+jj < profile->len)
215  printf("\t%d",(int)((T*)currentProfile)[ii+jj*nSeg]);
216  }
217  }
218  printf("\n");
219 #endif
220 
221  /* swap the old optimal score with the new one */
222  /***********************************************/
223  V * swap = profile->storeOpt;
224  profile->storeOpt = profile->loadOpt;
225  profile->loadOpt = swap;
226 
227  /* main loop computing the max, precomputing etc. */
228  /**************************************************/
229  for( int j=0; LIKELY(j<segLen); j++ ){
230  // Load the the rd value
231  V vRD = profile->rD[j];
232  V vTmp = profile->loadOpt[j];
233  vRD = vec_addx(vRD,vDelInc);
234  vTmp = vec_addx(vTmp,vDelFixed);
236  vRD = vec_max(vRD,vZero);
237  }
238  vRD = vec_max(vTmp,vRD);
239  profile->rD[j] = vRD;
240 
241  // add the profile the prev. opt
242  vStoreOpt = vec_addx(currentProfile[j],vStoreOpt);
243  if(!IsSigned<T>::value)
244  vStoreOpt = vec_subx(vStoreOpt,vBias);
245 
246  // update the maxscore found so far
247  vMaxScore = vec_max( vMaxScore, vStoreOpt );
248  // precompute the maximum here
249  vTmp = vec_max( vCD, vRD );
250  // compute the correct opt score of the cell
251  vStoreOpt = vec_max( vStoreOpt, vTmp );
252 
253  // store the opt score of the cell
254  profile->storeOpt[j] = vStoreOpt;
255 
256  // precompute rd and cd for next iteration
257  vStoreOpt = vec_addx(vStoreOpt,vDelFixed);
258  vRD = vec_addx(vRD,vDelInc);
259  vCD = vec_addx(vCD,vDelInc);
261  vStoreOpt = vec_max(vStoreOpt, vZero);
262  vRD = vec_max( vStoreOpt, vRD );
263  vCD = vec_max( vStoreOpt, vCD );
264 
265  // store precomputed rd
266  profile->rD[j] = vRD;
267 
268  // load precomputed opt for next iteration
269  vStoreOpt = profile->loadOpt[j];
270  }
271 
272  /* TODO prefetch next profile into cache */
273 
274  /* set totcells */
275  /****************/
276 // totcells += ls1;
277  /* check for a changed MaxScore */
278  /********************************/
279  for( T* tmp = (T*)&vMaxScore; tmp<(T*)(&vMaxScore+1); tmp++ )
280  if (UNLIKELY(maxScore < *tmp))
281  maxScore = *tmp;
282  // if the goal was reached, exit
283  if ( UNLIKELY(maxScore >= goal) )
284  return MaxValue<T>::value;
285 
286  V vStoreOptx = profile->storeOpt[0];
287  vStoreOptx = vec_addx(vStoreOptx,vDelFixed - vDelInc);
289  vStoreOptx = vec_max( vStoreOptx, vZero );
290  V vCDx = vec_sld(vZero, vCD, sizeof(V)-sizeof(T));
291 
292  if(vec_all_le(vCDx,vStoreOptx) == 0) {
293  for(int j=0; LIKELY(j<nSeg); ++j) {
294  // set everything up for the next iteration
295  vCD = vec_sld(vZero, vCD, sizeof(V)-sizeof(T));
296 
297  for(int k=0; LIKELY(k<segLen-1); ++k) {
298  // compute the current optimal value of the cell
299  vStoreOpt = profile->storeOpt[k];
300  vStoreOpt = vec_max( vStoreOpt, vCD );
301  profile->storeOpt[k] = vStoreOpt;
302 
303  // precompute the scores for the next cell
304  vCD = vec_addx( vCD, vDelInc);
305  vStoreOpt = vec_addx( vStoreOpt, vDelFixed);
306  if(!IsInteger<T>::value) {
307  vCD = vec_max( vCD, vZero );
308  vStoreOpt = vec_max( vStoreOpt, vZero );
309  }
310 
311  #ifdef SHORTCUT
312  if(UNLIKELY(vec_all_le(vCD,vStoreOpt)))
313  goto shortcut;
314  #endif
315  }
316 
317  // compute the current optimal value of the cell
318  vStoreOpt = profile->storeOpt[segLen-1];
319  vStoreOpt = vec_max( vStoreOpt, vCD );
320  profile->storeOpt[segLen-1] = vStoreOpt;
321 
322  // precompute the cd value for the next cell
323  vCD = vec_addx( vCD, vDelInc);
324  vStoreOpt = vec_addx( vStoreOpt, vDelFixed);
325  if(!IsInteger<T>::value) {
326  vCD = vec_max( vCD, vZero );
327  vStoreOpt = vec_max( vStoreOpt, vZero );
328  }
329 
330  if(UNLIKELY(vec_all_le(vCD,vStoreOpt)))
331  break;
332  }
333  #ifdef SHORTCUT
334  shortcut:
335  (void)1;
336  #endif
337  }
338 
339 #ifdef DEBUG
340  printf("%c\t",db[i]);
341  for(int ii=0; ii<nSeg; ++ii) {
342  for(int jj=0; jj<segLen; ++jj) {
343  if(ii*segLen+jj < profile->len)
344  printf("%d\t",(int)(((T*)profile->storeOpt)[ii+jj*nSeg]-zero));
345  }
346  }
347  printf("\n");
348 #endif
349  }
350  return maxScore;
351 }
352 
365 template< typename T, typename V > static inline T dynProgrLocal2(
366  const char* db, int dbLen,
368  Options *options){
369  /**********************************************************************
370  * This version of the code implements the idea presented in
371  *
372  ***********************************************************************
373  * Striped Smith-Waterman speeds database searches six times over other
374  * SIMD implementations
375  *
376  * Michael Farrar, Bioinformatics, 23(2), pp. 156-161, 2007
377  **********************************************************************/
378 
379  T zero,goal;
380  /* A vectorized template version */
381  if (IsInteger<T>::value){
382  // adjust the zero and goal values...
383  zero = MinValue<T>::value + profile->bias;
384  goal = MaxValue<T>::value - profile->bias;
385  } else {
386  zero = (T)0.0;
387  goal = MaxValue<T>::value;
388  }
389 
390  V vZero = {zero};
391  vZero = vec_splat( vZero, 0 );
392  V vGoal = {goal};
393  vGoal = vec_splat( vGoal, 0 );
394  V vDelFixed = {(T)options->gapOpen};
395  vDelFixed = vec_splat( vDelFixed, 0 );
396  V vDelInc = {(T)options->gapExt};
397  vDelInc = vec_splat( vDelInc, 0 );
398  V vBias = {(T)profile->bias};
399  vBias = vec_splat( vBias, 0 );
400 
401  T maxScore = zero;
402  const int nSeg = sizeof(V)/sizeof(T); // the number of segments
403  const int segLen = (ALIGN16(profile->len)/nSeg + 1) & ~1; // the segment length
404  const int subSegLen = segLen / 2; // the sub segment length
405 
406  V vMaxScore1 = vZero, vMaxScore2 = vZero; // The maximum score
407 
408  /* Initialize the other arrays */
409  /*******************************/
410  for(int i=0; LIKELY(i<segLen); i++)
411  profile->loadOpt[i] = profile->storeOpt[i] = profile->rD[i] = vZero;
412 
413  /* looping through all the columns */
414  /***********************************/
415  for( int i=0; LIKELY(i<dbLen); i++ ){
416  V vCD1 = vZero, vCD2 = vZero;
417 
418  // set the opt score to the elements computed in the previous column
419  V vStoreOpt1 = vec_sld(vZero, profile->storeOpt[segLen-1], sizeof(V)-sizeof(T));
420  V vStoreOpt2 = profile->storeOpt[subSegLen-1];
421 
422  /* compute the current profile, depending on the character in s2 */
423  /*****************************************************************/
424  V * currentProfile = profile->profile + db[i]*segLen;
425 
426 
427 #if 0
428  for(int ii=0; ii<nSeg; ++ii) {
429  for(int jj=0; jj<segLen; ++jj) {
430  if(ii*segLen+jj < profile->len)
431  printf("\t%d",(int)((T*)currentProfile)[ii+jj*nSeg]);
432  }
433  }
434  printf("\n");
435 #endif
436 
437  /* swap the old optimal score with the new one */
438  /***********************************************/
439  V * swap = profile->storeOpt;
440  profile->storeOpt = profile->loadOpt;
441  profile->loadOpt = swap;
442 
443  /* main loop computing the max, precomputing etc. */
444  /**************************************************/
445  for( int j=0; LIKELY(j<subSegLen); j++ ){
446  // Load the the rd value
447  V vRD1 = profile->rD[j ];
448  V vRD2 = profile->rD[j+subSegLen];
449  V vTmp1 = profile->loadOpt[j ];
450  V vTmp2 = profile->loadOpt[j+subSegLen];
451  vRD1 = vec_addx(vRD1,vDelInc);
452  vRD2 = vec_addx(vRD2,vDelInc);
453  vTmp1 = vec_addx(vTmp1,vDelFixed);
454  vTmp2 = vec_addx(vTmp2,vDelFixed);
456  vRD1 = vec_max(vRD1,vZero);
457  vRD2 = vec_max(vRD2,vZero);
458  }
459  vRD1 = vec_max(vTmp1,vRD1);
460  vRD2 = vec_max(vTmp2,vRD2);
461  profile->rD[j ] = vRD1;
462  profile->rD[j+subSegLen] = vRD2;
463 
464  // add the profile the prev. opt
465  vStoreOpt1 = vec_addx(currentProfile[j ],vStoreOpt1);
466  vStoreOpt2 = vec_addx(currentProfile[j+subSegLen],vStoreOpt2);
467  if(!IsSigned<T>::value) {
468  vStoreOpt1 = vec_subx(vStoreOpt1,vBias);
469  vStoreOpt2 = vec_subx(vStoreOpt2,vBias);
470  }
471 
472  // update the maxscore found so far
473  vMaxScore1 = vec_max( vMaxScore1, vStoreOpt1 );
474  vMaxScore2 = vec_max( vMaxScore2, vStoreOpt2 );
475 
476  // precompute the maximum here
477  vTmp1 = vec_max( vCD1, vRD1 );
478  vTmp2 = vec_max( vCD2, vRD2 );
479 
480  // compute the correct opt score of the cell
481  vStoreOpt1 = vec_max( vStoreOpt1, vTmp1 );
482  vStoreOpt2 = vec_max( vStoreOpt2, vTmp2 );
483 
484  // store the opt score of the cell
485  profile->storeOpt[j ] = vStoreOpt1;
486  profile->storeOpt[j+subSegLen] = vStoreOpt2;
487 
488  // precompute rd and cd for next iteration
489  vStoreOpt1 = vec_addx(vStoreOpt1,vDelFixed);
490  vStoreOpt2 = vec_addx(vStoreOpt2,vDelFixed);
491  vRD1 = vec_addx(vRD1,vDelInc);
492  vRD2 = vec_addx(vRD2,vDelInc);
493  vCD1 = vec_addx(vCD1,vDelInc);
494  vCD2 = vec_addx(vCD2,vDelInc);
496  vStoreOpt1 = vec_max(vStoreOpt1, vZero);
497  vStoreOpt2 = vec_max(vStoreOpt2, vZero);
498  }
499  vRD1 = vec_max( vStoreOpt1, vRD1 );
500  vRD2 = vec_max( vStoreOpt2, vRD2 );
501  vCD1 = vec_max( vStoreOpt1, vCD1 );
502  vCD2 = vec_max( vStoreOpt2, vCD2 );
503 
504  // store precomputed rd
505  profile->rD[j ] = vRD1;
506  profile->rD[j+subSegLen] = vRD2;
507 
508  // load precomputed opt for next iteration
509  vStoreOpt1 = profile->loadOpt[j ];
510  vStoreOpt2 = profile->loadOpt[j+subSegLen];
511  }
512 
513  /* TODO prefetch next profile into cache */
514 
515  /* set totcells */
516  /****************/
517 // totcells += ls1;
518  /* check for a changed MaxScore */
519  /********************************/
520  V vMaxScore = vec_max( vMaxScore1, vMaxScore2 );
521  for( T* tmp = (T*)&vMaxScore; tmp<(T*)(&vMaxScore+1); tmp++ )
522  if (UNLIKELY(maxScore < *tmp))
523  maxScore = *tmp;
524  // if the goal was reached, exit
525  if ( UNLIKELY(maxScore >= goal) )
526  return MaxValue<T>::value;
527 
528  V vStoreOptx1 = profile->storeOpt[0 ];
529  V vStoreOptx2 = profile->storeOpt[subSegLen];
530  vStoreOptx1 = vec_addx(vStoreOptx1,vDelFixed - vDelInc);
531  vStoreOptx2 = vec_addx(vStoreOptx2,vDelFixed - vDelInc);
532  if(!IsInteger<T>::value) {
533  vStoreOptx1 = vec_max( vStoreOptx1, vZero );
534  vStoreOptx2 = vec_max( vStoreOptx2, vZero );
535  }
536  V vCDx1 = vec_sld(vZero, vCD2, sizeof(V)-sizeof(T));
537  V vCDx2 = vCD1;
538 
539  if(UNLIKELY((vec_all_le(vCDx1,vStoreOptx1) == 0) || (vec_all_le(vCDx2,vStoreOptx2) == 0))) {
540  for(int j=0; LIKELY(j<nSeg+1); ++j) {
541  // set everything up for the next iteration
542  V vRotate = vCD2;
543  vCD2 = vCD1;
544  vCD1 = vec_sld(vZero, vRotate, sizeof(V)-sizeof(T));
545 
546  for(int k=0; LIKELY(k<subSegLen-1); ++k) {
547  // compute the current optimal value of the cell
548  vStoreOpt1 = profile->storeOpt[k ];
549  vStoreOpt2 = profile->storeOpt[k + subSegLen];
550  vStoreOpt1 = vec_max( vStoreOpt1, vCD1 );
551  vStoreOpt2 = vec_max( vStoreOpt2, vCD2 );
552  profile->storeOpt[k ] = vStoreOpt1;
553  profile->storeOpt[k + subSegLen] = vStoreOpt2;
554 
555  // precompute the scores for the next cell
556  vCD1 = vec_addx( vCD1, vDelInc);
557  vCD2 = vec_addx( vCD2, vDelInc);
558  vStoreOpt1 = vec_addx( vStoreOpt1, vDelFixed);
559  vStoreOpt2 = vec_addx( vStoreOpt2, vDelFixed);
560  if(!IsInteger<T>::value) {
561  vCD1 = vec_max( vCD1, vZero );
562  vCD2 = vec_max( vCD2, vZero );
563  vStoreOpt1 = vec_max( vStoreOpt1, vZero );
564  vStoreOpt2 = vec_max( vStoreOpt2, vZero );
565  }
566 
567  #ifdef SHORTCUT
568  if(UNLIKELY(vec_all_le(vCD1,vStoreOpt1) != 0 && vec_all_le(vCD2,vStoreOpt2) != 0))
569  goto shortcut;
570  #endif
571  }
572 
573  // compute the current optimal value of the cell
574  vStoreOpt1 = profile->storeOpt[subSegLen - 1];
575  vStoreOpt2 = profile->storeOpt[segLen - 1];
576  vStoreOpt1 = vec_max( vStoreOpt1, vCD1 );
577  vStoreOpt2 = vec_max( vStoreOpt2, vCD2 );
578  profile->storeOpt[subSegLen - 1] = vStoreOpt1;
579  profile->storeOpt[segLen - 1] = vStoreOpt2;
580 
581  // precompute the scores for the next cell
582  vCD1 = vec_addx( vCD1, vDelInc);
583  vCD2 = vec_addx( vCD2, vDelInc);
584  vStoreOpt1 = vec_addx( vStoreOpt1, vDelFixed);
585  vStoreOpt2 = vec_addx( vStoreOpt2, vDelFixed);
586  if(!IsInteger<T>::value) {
587  vCD1 = vec_max( vCD1, vZero );
588  vCD2 = vec_max( vCD2, vZero );
589  vStoreOpt1 = vec_max( vStoreOpt1, vZero );
590  vStoreOpt2 = vec_max( vStoreOpt2, vZero );
591  }
592 
593  if(UNLIKELY(vec_all_le(vCD1,vStoreOpt1) != 0 && vec_all_le(vCD2,vStoreOpt2) != 0))
594  break;
595  }
596  #ifdef SHORTCUT
597  shortcut:
598  (void)1;
599  #endif
600  }
601 #ifdef DEBUG
602  printf("%c\t",db[i]);
603  for(int ii=0; ii<nSeg; ++ii) {
604  for(int jj=0; jj<segLen; ++jj) {
605  if(ii*segLen+jj < profile->len)
606  printf("%d\t",(int)(((T*)profile->storeOpt)[ii+jj*nSeg]-zero));
607  }
608  }
609  printf("\n");
610 #endif
611  }
612  return maxScore;
613 }
614 
628 template< typename T, typename V, typename X >
629 EXPORT Profile<T,V>* swps3_createProfileAltivec( const char *query, int queryLen, X* simi ){
630  const int alignedLen = ALIGN16(queryLen);
631  const int nSeg = sizeof(V)/sizeof(T); // the number of segments
632  const int segLen = alignedLen/nSeg; // the segment length
633 
634  Profile<T,V>* profile = allocateProfile<T,V>(queryLen);
635 
636  for( int i=0; i<MATRIX_DIM; i++ ){
637  T *currentProfile = ((T*)profile->profile)+i*alignedLen;
638  for( int j=0; j<segLen; j++ ){
639  T *tmp = currentProfile + j*nSeg;
640  for( int k=0; k<nSeg; k++ )
641  if( j + k*segLen < queryLen )
642  tmp[k] = (T)simi[ query[j + k*segLen ] * MATRIX_DIM + i ];
643  else
644  tmp[k] = 0;
645  }
646  }
647 
648  return profile;
649 }
650 
668 template< typename T, typename V >
669 EXPORT double swps3_dynProgrAltivec(const char *db, int dbLen, Profile<T,V> *profile, Options *options){
670  T zero, goal;
671  /* A vectorized template version */
672  if (IsInteger<T>::value){
673  // adjust the zero and goal values...
674  zero = MinValue<T>::value;
675  goal = MaxValue<T>::value;
676  } else {
677  zero = (T)0.0;
678  goal = MaxValue<T>::value;
679  }
680 
681  T maxScore=zero;
682 
683 #ifdef UNROLL
684  T currentScore;
685  if (sizeof(T) < 2)
686  currentScore = dynProgrLocal<T,V> ( db, dbLen, profile, options );
687  else
688  currentScore = dynProgrLocal2<T,V> ( db, dbLen, profile, options );
689 #else
690  T currentScore = dynProgrLocal<T,V> ( db, dbLen, profile, options );
691 #endif
692  if( maxScore < currentScore)
693  maxScore = currentScore;
694 
695  if(maxScore >= goal)
696  return DBL_MAX;
697 
698  /* Finally free all the memory we allocated */
699  /********************************************/
700  return (double)(maxScore-zero);
701 }
702 
718 EXPORT double swps3_dynProgrByteAltivec(const char *db, int dbLen, void* profile, Options *options)
719 {
720  return swps3_dynProgrAltivec<int8_t,vector int8_t>(db,dbLen,(Profile<int8_t,vector int8_t>*)profile,options);
721 }
722 
738 EXPORT double swps3_dynProgrShortAltivec(const char *db, int dbLen, void* profile, Options *options)
739 {
740  return swps3_dynProgrAltivec<int16_t,vector int16_t>(db,dbLen,(Profile<int16_t,vector int16_t>*)profile,options);
741 }
742 
759 EXPORT double swps3_dynProgrFloatAltivec(const char *db, int dbLen, void* profile, Options *options)
760 {
761  return swps3_dynProgrAltivec<float,vector float>(db,dbLen,(Profile<float,vector float>*)profile,options);
762 }
763 
774 EXPORT void *swps3_createProfileByteAltivec(const char *query, int queryLen, SBMatrix matrix)
775 {
776  return swps3_createProfileAltivec<int8_t, vector int8_t>(query, queryLen, matrix);
777 }
778 
789 EXPORT void *swps3_createProfileShortAltivec(const char *query, int queryLen, SBMatrix matrix)
790 {
791  return swps3_createProfileAltivec<int16_t, vector int16_t>(query, queryLen, matrix);
792 }
793 
804 EXPORT void *swps3_createProfileFloatAltivec(const char *query, int queryLen, SBMatrix matrix)
805 {
806  return swps3_createProfileAltivec<float, vector float>(query, queryLen, matrix);
807 }
808