00001
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include <spu_mfcio.h>
00031 #include "DynProgr_SPE.h"
00032 #include "DynProgr_SPE_functions.h"
00033 #include <stdio.h>
00034 #include "matrix.h"
00035
00036
00037 #define min(a,b) (((a)<(b))?(a):(b))
00038 #define max(a,b) (((a)>(b))?(a):(b))
00039
00040 static int8_t mainMemory[ TOTAL_MEMORY ] __attribute__((__aligned__(16)));
00041 static int allocated = 0;
00042 static int datatype = -1;
00043 void * alloc( int size ){
00044 void * result = mainMemory + allocated;
00045 allocated += ALIGN16(size);
00046 if(allocated>TOTAL_MEMORY) return (void*)-1;
00047 return result;
00048 }
00049 int memRemaining(){
00050 return TOTAL_MEMORY-allocated;
00051 }
00052 void reset(){
00053 allocated = 0;
00054 datatype = -1;
00055 }
00056
00057 int handleCommand( ppu_addr_t program_data_ea ){
00058 SPECommand cmd __ALIGNED__;
00059 int i;
00060
00061 mfc_get(&cmd, program_data_ea, sizeof(cmd), 0, 0, 0);
00062 mfc_write_tag_mask(1<<0);
00063 mfc_read_tag_status_all();
00064
00065 switch(cmd.command) {
00066 case SPE_CMD_INIT:
00067 reset();
00068 datatype = cmd.data.INIT.datatype;
00069 if(datatype < 0 || datatype > 4) {
00070 datatype = -1;
00071 return -1;
00072 }
00073 fixedDel = cmd.data.INIT.fixedDel;
00074 incDel = cmd.data.INIT.incDel;
00075 maxDbLen = cmd.data.INIT.dbMaxLen;
00076
00077
00078 profile = NULL;
00079 remote_profile = 0;
00080 blockStart = 0;
00081 blockSize = 0;
00082 s1 = NULL;
00083 ls1 = 0;
00084 simi = NULL;
00085
00086
00087
00088 s2 = (char *)alloc( maxDbLen*sizeof(char) );
00089 maxS = alloc( maxDbLen*dataSize[datatype] );
00090 delS = alloc( maxDbLen*dataSize[datatype] );
00091 break;
00092
00093 case SPE_CMD_CREATE_PROFILE:
00094 if(profile != NULL || datatype == -1) return -1;
00095
00096 mn = min(cmd.data.CREATE_PROFILE.matrix.min,min(fixedDel,incDel));
00097 mx = max(cmd.data.CREATE_PROFILE.matrix.max,max(fixedDel,incDel));
00098 ls1 = cmd.data.CREATE_PROFILE.query.len;
00099
00100
00101 s1 = alloc( ls1*sizeof(char) );
00102 for( i=0; i<ls1; i+=MAX_TRANSFER )
00103 mfc_get( s1+i, cmd.data.CREATE_PROFILE.query.addr+i, ALIGN16(min(ls1-i, MAX_TRANSFER)*sizeof(char)), 0, 0, 0 );
00104
00105
00106 simi = alloc( MATRIX_DIM*MATRIX_DIM*dataSize[datatype] );
00107 mfc_get( simi, cmd.data.CREATE_PROFILE.matrix.addr, ALIGN16(MATRIX_DIM*MATRIX_DIM*dataSize[datatype]), 1, 0, 0 );
00108
00109
00110 mfc_write_tag_mask((1<<0)|(1<<1));
00111 mfc_read_tag_status_all();
00112
00113
00114 if(memRemaining() <= 0) return -1;
00115 blockSize=(memRemaining() / ((MATRIX_DIM+3)*dataSize[datatype])) & -16;
00116 if (blockSize < 50) return -1;
00117 blockSize = ALIGN16(min(blockSize,ls1));
00118
00119
00120 profile = alloc( blockSize * MATRIX_DIM * dataSize[datatype] );
00121 loadOpt = alloc( blockSize * dataSize[datatype] );
00122 storeOpt = alloc( blockSize * dataSize[datatype] );
00123 rD = alloc( blockSize * dataSize[datatype] );
00124
00125 blockStart = 0;
00126 #ifdef DEBUG_FETCH
00127 printf(">>>> creating profile\n");
00128 #endif
00129 createProfile[datatype]();
00130 break;
00131
00132 case SPE_CMD_PUT_PROFILE:
00133 if(profile == NULL || s1 == NULL) return -1;
00134
00135
00136
00137 if(blockStart != 0) {
00138 blockStart = 0;
00139 createProfile[datatype]();
00140 }
00141 cmd.data.PUT_PROFILE.blockSize = blockSize;
00142
00143
00144 for(blockStart=0; blockStart<ls1; blockStart+=blockSize ) {
00145 int64_t bs;
00146 int currentBlockSize = ALIGN16(min(ls1-blockStart,blockSize));
00147 if(blockStart != 0) createProfile[datatype]();
00148
00149 for( bs=0; bs<currentBlockSize * MATRIX_DIM * dataSize[datatype]; bs+=MAX_TRANSFER ) {
00150 mfc_put( ((char*)profile)+bs, cmd.data.PUT_PROFILE.addr+blockStart*MATRIX_DIM*dataSize[datatype]+bs, ALIGN16(min(currentBlockSize*MATRIX_DIM*dataSize[datatype]-bs, (int64_t)MAX_TRANSFER)), 0, 0, 0 );
00151
00152
00153 mfc_write_tag_mask(1<<0);
00154 mfc_read_tag_status_all();
00155 }
00156 }
00157
00158
00159 mfc_put(&cmd, program_data_ea, sizeof(cmd), 0, 0, 0);
00160 mfc_write_tag_mask(1<<0);
00161 mfc_read_tag_status_all();
00162 break;
00163
00164 case SPE_CMD_GET_PROFILE:
00165 if(datatype == -1 || profile != NULL) return -1;
00166 remote_profile = cmd.data.GET_PROFILE.profile.addr;
00167
00168 mn = min(cmd.data.GET_PROFILE.profile.min,min(fixedDel,incDel));
00169 mx = max(cmd.data.GET_PROFILE.profile.max,max(fixedDel,incDel));
00170 ls1 = cmd.data.GET_PROFILE.profile.len;
00171 blockSize = cmd.data.GET_PROFILE.profile.blockSize;
00172
00173 profile = alloc( blockSize * MATRIX_DIM * dataSize[datatype] );
00174 loadOpt = alloc( blockSize * dataSize[datatype] );
00175 storeOpt = alloc( blockSize * dataSize[datatype] );
00176 rD = alloc( blockSize * dataSize[datatype] );
00177 if(memRemaining() < 0) return -1;
00178
00179 blockStart = 0;
00180 #ifdef DEBUG_FETCH
00181 printf(">>>> fetching profile (%d bytes)\n",ALIGN16(blockSize * MATRIX_DIM * dataSize[datatype]));
00182 #endif
00183 for( i=0; i<ALIGN16(blockSize * MATRIX_DIM * dataSize[datatype]); i+=MAX_TRANSFER ) {
00184 mfc_get( ((char*)profile)+i, remote_profile+i, ALIGN16(min(blockSize*MATRIX_DIM*dataSize[datatype]-i, (int64_t)MAX_TRANSFER)), 0, 0, 0 );
00185
00186
00187 mfc_write_tag_mask(1<<0);
00188 mfc_read_tag_status_all();
00189 }
00190 break;
00191
00192 case SPE_CMD_ALIGN:
00193 if(profile == NULL) return -1;
00194
00195 ls2 = cmd.data.ALIGN.db.len;
00196
00197
00198 for( i=0; i<ls2; i+=MAX_TRANSFER )
00199 mfc_get( s2+i, cmd.data.ALIGN.db.addr+i, ALIGN16(min(ls2-i, MAX_TRANSFER)*sizeof(char)), 0, 0, 0 );
00200 mfc_write_tag_mask(1<<0);
00201 mfc_read_tag_status_all();
00202
00203
00204 if(blockStart != 0) {
00205 if(remote_profile == 0) {
00206 blockStart = 0;
00207 #ifdef DEBUG_FETCH
00208 printf(">>>> creating profile\n");
00209 #endif
00210 createProfile[datatype]();
00211 } else {
00212 blockStart = 0;
00213 #ifdef DEBUG_FETCH
00214 printf(">>>> fetching profile (%d bytes)\n",ALIGN16(blockSize * MATRIX_DIM * dataSize[datatype]));
00215 #endif
00216 for( i=0; i<ALIGN16(blockSize * MATRIX_DIM * dataSize[datatype]); i+=MAX_TRANSFER ) {
00217 mfc_get( ((char*)profile)+i, remote_profile+i, ALIGN16(min(blockSize*MATRIX_DIM*dataSize[datatype]-i, (int64_t)MAX_TRANSFER)), 0, 0, 0 );
00218
00219
00220 mfc_write_tag_mask(1<<0);
00221 mfc_read_tag_status_all();
00222 }
00223 }
00224 }
00225
00226 cmd.data.ALIGN.result = dynProgLocal[datatype]();
00227
00228
00229 mfc_put(&cmd, program_data_ea, sizeof(cmd), 0, 0, 0);
00230 mfc_write_tag_mask(1<<0);
00231 mfc_read_tag_status_all();
00232 break;
00233
00234 default:
00235 return -1;
00236 }
00237 return 0;
00238 }
00239 #ifdef MAIL
00240 int main() {
00241 while (1){
00242 int res;
00243 ppu_addr_t program_data_ea = spu_read_in_mbox();
00244 program_data_ea += ((ppu_addr_t)spu_read_in_mbox())<<32;
00245 res = handleCommand( program_data_ea );
00246
00247 spu_write_out_intr_mbox( res );
00248 }
00249 return 0;
00250 }
00251
00252 #else
00253 int main(uint64_t spe_id, ppu_addr_t program_data_ea, ppu_addr_t env) {
00254 (void)spe_id;
00255 (void)env;
00256 return handleCommand( program_data_ea );
00257 }
00258 #endif