|
|
|
@ -50,7 +50,7 @@ extern int key_eval(void); |
|
|
|
|
#define DRAMDATASIZE2 DRAMDATASIZE1 |
|
|
|
|
|
|
|
|
|
static uint32_t fpgastate, fpustates; |
|
|
|
|
static int fpgaload(uint32_t *mempt1, uint32_t *mempt2, int16_t calclen, int bReload1, int bReload2) |
|
|
|
|
static int fpgaload(uint32_t *mempt1, uint32_t *mempt2, uint16_t calclen, int bReload1, int bReload2) |
|
|
|
|
{ |
|
|
|
|
uint32_t *sentinel1 = (uint32_t *)(DRAMDATABASE1 + ((DRAMDATASIZE1-1) * sizeof(uint32_t))); |
|
|
|
|
uint32_t *sentinel2 = (uint32_t *)(DRAMDATABASE2 + ((DRAMDATASIZE2-1) * sizeof(uint32_t)));
|
|
|
|
@ -62,7 +62,7 @@ static int fpgaload(uint32_t *mempt1, uint32_t *mempt2, int16_t calclen, int bRe |
|
|
|
|
bfloat16nn_b32Sentinel1_write(*sentinel1); |
|
|
|
|
bfloat16nn_b32Sentinel2_write(*sentinel2); |
|
|
|
|
flush_l2_cache(); // Strictly nec. for longer transfers
|
|
|
|
|
bfloat16nn_b10ArrayWordLen_write(calclen); // Indicate array length for calc.
|
|
|
|
|
bfloat16nn_b10ArrayWordLen_write(calclen>>1); // Indicate array length for calc. but honour split!
|
|
|
|
|
bfloat16nn_b32DRAMLoadAddress1_write((uint32_t)mempt1); // Indicate memory to load from
|
|
|
|
|
bfloat16nn_b32DRAMLoadAddress2_write((uint32_t)mempt2); // Indicate memory to load from
|
|
|
|
|
bfloat16nn_bReload1_write(bReload1 ? 1 : 0); // Reload mem#1
|
|
|
|
@ -113,14 +113,14 @@ void dumpfloat(float f) |
|
|
|
|
int key_eval(void) |
|
|
|
|
{ |
|
|
|
|
extern void printf1(const char *fmt, float f1);
|
|
|
|
|
uint32_t *ui32ptr1, *ui32ptr2;
|
|
|
|
|
//uint32_t *ui32ptr1, *ui32ptr2;
|
|
|
|
|
uint16_t *ui16ptr1, *ui16ptr2;
|
|
|
|
|
float fpResult1, fpResult2;
|
|
|
|
|
uint32_t starttime; |
|
|
|
|
uint32_t deltatime; |
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
#define MAXCALCLEN 4 //784 //16 OK
|
|
|
|
|
#define MAXCALCLEN 784 //784 //16 OK
|
|
|
|
|
|
|
|
|
|
switch(kbhit()) { |
|
|
|
|
case 'r': // Reload
|
|
|
|
@ -156,32 +156,22 @@ int key_eval(void) |
|
|
|
|
printf("S/W Delta t: %dms ", deltatime);
|
|
|
|
|
printf1("\t\t\tS/W SUM=%8.4f\n", sum);
|
|
|
|
|
|
|
|
|
|
// If not cleared fails???
|
|
|
|
|
for(i=0, ui32ptr1 = (uint32_t *)DRAMDATABASE1;i<DRAMDATASIZE1;i++) // Setup test data
|
|
|
|
|
*ui32ptr1++ = 0; // Clear all memory ...
|
|
|
|
|
for(i=0, ui32ptr2 = (uint32_t *)DRAMDATABASE2;i<DRAMDATASIZE2;i++) // Setup test data
|
|
|
|
|
*ui32ptr2++ = 0; // Clear all memory ...
|
|
|
|
|
|
|
|
|
|
ui16ptr1 = (uint16_t *)DRAMDATABASE1; // Absolute: bytes! Matrice/row
|
|
|
|
|
for(i=1;i<=MAXCALCLEN+1;i++) |
|
|
|
|
for(i=1;i<=MAXCALCLEN;i++) |
|
|
|
|
*ui16ptr1++ = f2ui16(1.0 * (float)i );
|
|
|
|
|
ui16ptr2 = (uint16_t *)DRAMDATABASE2; // Absolute: bytes! Vector
|
|
|
|
|
for(i=1;i<=MAXCALCLEN+1;i++) |
|
|
|
|
for(i=1;i<=MAXCALCLEN;i++) |
|
|
|
|
*ui16ptr2++ = f2ui16(1.0 * (float)i );
|
|
|
|
|
|
|
|
|
|
// BOTH: 1*1+2*2+3*3+4*4 (+5*5) = 1+4+9+16+25 = 5+9+16+25 = 14+16=30+25 = 55 WRONG!
|
|
|
|
|
// FPU#1: 1*1 +3*3 = 1+9 = 10 OK
|
|
|
|
|
// FPU#2: 2*2 +4*4 (+5*5)= 4+16(+5*5) = 20(+25) = 45 WRONG!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
starttime = systime(0); |
|
|
|
|
if(fpgaload((uint32_t *)DRAMDATABASE1, (uint32_t *)DRAMDATABASE2, MAXCALCLEN, 1, 1)) { // 800*32-bit=3200 bytes, 400 Words/FPU to calc.
|
|
|
|
|
if(fpgaload((uint32_t *)DRAMDATABASE1, (uint32_t *)DRAMDATABASE2, MAXCALCLEN, 1, 1)) { |
|
|
|
|
deltatime = systime(0)-starttime; |
|
|
|
|
printf("H/W Delta t: %dms ", deltatime);
|
|
|
|
|
fpResult1 = fpResult1_read(); |
|
|
|
|
fpResult2 = fpResult2_read(); |
|
|
|
|
printf("(S=%04Xh: FS=%04Xh)", fpgastate, fpustates); |
|
|
|
|
printf1("\tH/W SUM=%8.4f", fpResult1); |
|
|
|
|
printf1("\t(FPU#2=%8.4f)\n", fpResult2); |
|
|
|
|
printf1("\tH/W SUM=%8.4f\n", fpResult1); |
|
|
|
|
//printf1("\t(FPU#2=%8.4f)\n", fpResult2);
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
printf("CURRENT TIMEOUT: S=%04Xh: FS=%04Xh ", fpgastate, fpustates);
|
|
|
|
|