|
|
|
@ -72,73 +72,58 @@ static float fp1_read(void) |
|
|
|
|
uint32_t v __attribute__((aligned(16))) = 0;
|
|
|
|
|
*(((uint16_t *)&v) + 1) = bfloat16nn_b16Value1_read(); // Low-endian, high half word required
|
|
|
|
|
float *fpt = (float *)&v;
|
|
|
|
|
if(*fpt != 0.0) |
|
|
|
|
return *fpt; |
|
|
|
|
return(-12.34); |
|
|
|
|
return *fpt;
|
|
|
|
|
}
|
|
|
|
|
static float fp2_read(void) |
|
|
|
|
{
|
|
|
|
|
uint32_t v __attribute__((aligned(16))) = 0; |
|
|
|
|
*(((uint16_t *)&v) + 1) = bfloat16nn_b16Value2_read(); |
|
|
|
|
float *fpt = (float *)&v; |
|
|
|
|
if(*fpt != 0.0) |
|
|
|
|
return *fpt; |
|
|
|
|
return(-12.34); |
|
|
|
|
float *fpt = (float *)&v;
|
|
|
|
|
return *fpt;
|
|
|
|
|
} |
|
|
|
|
static float fp3_read(void) |
|
|
|
|
{ |
|
|
|
|
uint32_t v __attribute__((aligned(16))) = 0; |
|
|
|
|
*(((uint16_t *)&v) + 1) = bfloat16nn_b16Value3_read(); |
|
|
|
|
float *fpt = (float *)&v; |
|
|
|
|
if(*fpt != 0.0) |
|
|
|
|
return *fpt; |
|
|
|
|
return(-12.34); |
|
|
|
|
float *fpt = (float *)&v;
|
|
|
|
|
return *fpt;
|
|
|
|
|
} |
|
|
|
|
static float fp4_read(void) |
|
|
|
|
{ |
|
|
|
|
uint32_t v __attribute__((aligned(16))) = 0; |
|
|
|
|
*(((uint16_t *)&v) + 1) = bfloat16nn_b16Value4_read(); |
|
|
|
|
float *fpt = (float *)&v; |
|
|
|
|
if(*fpt != 0.0) |
|
|
|
|
return *fpt; |
|
|
|
|
return(-12.34); |
|
|
|
|
return *fpt;
|
|
|
|
|
} |
|
|
|
|
static float fp5_read(void) |
|
|
|
|
{ |
|
|
|
|
uint32_t v __attribute__((aligned(16))) = 0; |
|
|
|
|
*(((uint16_t *)&v) + 1) = bfloat16nn_b16Value5_read(); |
|
|
|
|
float *fpt = (float *)&v; |
|
|
|
|
if(*fpt != 0.0) |
|
|
|
|
return *fpt; |
|
|
|
|
return(-12.34); |
|
|
|
|
float *fpt = (float *)&v;
|
|
|
|
|
return *fpt;
|
|
|
|
|
} |
|
|
|
|
static float fp6_read(void) |
|
|
|
|
{ |
|
|
|
|
uint32_t v __attribute__((aligned(16))) = 0; |
|
|
|
|
*(((uint16_t *)&v) + 1) = bfloat16nn_b16Value6_read(); |
|
|
|
|
float *fpt = (float *)&v; |
|
|
|
|
if(*fpt != 0.0) |
|
|
|
|
return *fpt; |
|
|
|
|
return(-12.34); |
|
|
|
|
float *fpt = (float *)&v;
|
|
|
|
|
return *fpt;
|
|
|
|
|
} |
|
|
|
|
static float fpResult1_read(void) |
|
|
|
|
{
|
|
|
|
|
uint32_t v __attribute__((aligned(16))) = 0; |
|
|
|
|
*(((uint16_t *)&v) + 1) = bfloat16nn_b16Result1_read(); |
|
|
|
|
float *fpt = (float *)&v; |
|
|
|
|
if(*fpt != 0.0) |
|
|
|
|
return *fpt; |
|
|
|
|
return(-12.34); |
|
|
|
|
float *fpt = (float *)&v;
|
|
|
|
|
return *fpt;
|
|
|
|
|
} |
|
|
|
|
static float fpResult2_read(void) |
|
|
|
|
{
|
|
|
|
|
uint32_t v __attribute__((aligned(16))) = 0; |
|
|
|
|
*(((uint16_t *)&v) + 1) = bfloat16nn_b16Result2_read(); |
|
|
|
|
float *fpt = (float *)&v; |
|
|
|
|
if(*fpt != 0.0) |
|
|
|
|
return *fpt; |
|
|
|
|
return(-12.34); |
|
|
|
|
float *fpt = (float *)&v;
|
|
|
|
|
return *fpt;
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static uint16_t f2ui16(float f) |
|
|
|
|
{ |
|
|
|
|
return *(((uint16_t *)&f)+1); // High half word needed (low-endian), hence ...
|
|
|
|
@ -159,7 +144,7 @@ int key_eval(void) |
|
|
|
|
uint16_t *ui16ptr; |
|
|
|
|
int i;
|
|
|
|
|
float fp1, fp2, fp3, fpResult1; |
|
|
|
|
float fp4, fp5, fp6, fpResult2; |
|
|
|
|
float fp4, fp5, fp6, fpResult2;
|
|
|
|
|
|
|
|
|
|
switch(kbhit()) { |
|
|
|
|
case 'r': // Reload
|
|
|
|
@ -167,12 +152,12 @@ int key_eval(void) |
|
|
|
|
for(i=0, ui32ptr = (uint32_t *)DRAMDATABASE;i<DRAMDATASIZE;i++) // Setup test data
|
|
|
|
|
*ui32ptr++ = i+1;
|
|
|
|
|
ui16ptr = (uint16_t *)(DRAMDATABASE + 0 * sizeof(uint16_t)); // Absolute: bytes!
|
|
|
|
|
*ui16ptr++ = f2ui16(0.0); |
|
|
|
|
*ui16ptr++ = f2ui16(1.0);
|
|
|
|
|
*ui16ptr++ = f2ui16(2.0); |
|
|
|
|
*ui16ptr++ = f2ui16(0.0); // fnmsub: -(0*3 - 4) = +4!
|
|
|
|
|
*ui16ptr++ = f2ui16(3.0);
|
|
|
|
|
*ui16ptr++ = f2ui16(4.0); // FIXME: Fused 0+X returns 0!
|
|
|
|
|
*ui16ptr++ = f2ui16(1.0); |
|
|
|
|
*ui16ptr++ = f2ui16(2.0);
|
|
|
|
|
*ui16ptr++ = f2ui16(3.0); |
|
|
|
|
*ui16ptr++ = f2ui16(4.0); |
|
|
|
|
*ui16ptr++ = f2ui16(5.0);
|
|
|
|
|
*ui16ptr++ = f2ui16(6.0); |
|
|
|
|
if(fpgaload((uint32_t *)DRAMDATABASE, 512)) {
|
|
|
|
|
fp1 = fp1_read(); |
|
|
|
|
fp2 = fp2_read(); |
|
|
|
@ -186,11 +171,11 @@ int key_eval(void) |
|
|
|
|
printf1("V1=%5.3f ", fp1); // FIXME: printf1 fails for 0.0 output !
|
|
|
|
|
printf1("V2=%5.3f ", fp2);
|
|
|
|
|
printf1("V3=%5.3f ", fp3);
|
|
|
|
|
printf1("RESULT=%5.3f\n", fpResult1);
|
|
|
|
|
printf1("RESULT=%6.4f\n", fpResult1);
|
|
|
|
|
printf1("V1=%5.3f ", fp4);
|
|
|
|
|
printf1("V2=%5.3f ", fp5);
|
|
|
|
|
printf1("V3=%5.3f ", fp6);
|
|
|
|
|
printf1("RESULT=%5.3f\n", fpResult2);
|
|
|
|
|
printf1("RESULT=%6.4f\n", fpResult2);
|
|
|
|
|
/*
|
|
|
|
|
for(i=0;i<DRAMDATASIZE;i+=32) { |
|
|
|
|
dram2fpga_b9Offset_write(i); |
|
|
|
@ -209,11 +194,11 @@ int key_eval(void) |
|
|
|
|
printf1("V1=%5.3f ", fp1_read());
|
|
|
|
|
printf1("V2=%5.3f ", fp2_read());
|
|
|
|
|
printf1("V3=%5.3f ", fp3_read());
|
|
|
|
|
printf1("RESULT=%5.3f\n", fpResult1_read()); |
|
|
|
|
printf1("RESULT=%6.4f\n", fpResult1_read()); |
|
|
|
|
printf1("V1=%5.3f ", fp4_read());
|
|
|
|
|
printf1("V2=%5.3f ", fp5_read());
|
|
|
|
|
printf1("V3=%5.3f ", fp6_read());
|
|
|
|
|
printf1("RESULT=%5.3f\n", fpResult2_read());
|
|
|
|
|
printf1("RESULT=%6.4f\n", fpResult2_read());
|
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
printf("INVALIDATED: Timeout!");
|
|
|
|
@ -223,11 +208,11 @@ int key_eval(void) |
|
|
|
|
printf1("V1=%4.2f ", fp1_read());
|
|
|
|
|
printf1("V2=%4.2f ", fp2_read());
|
|
|
|
|
printf1("V3=%4.2f ", fp3_read());
|
|
|
|
|
printf1("RESULT=%4.2f\n", fpResult1_read()); |
|
|
|
|
printf1("RESULT=%6.4f\n", fpResult1_read()); |
|
|
|
|
printf1("V1=%4.2f ", fp4_read());
|
|
|
|
|
printf1("V2=%4.2f ", fp5_read());
|
|
|
|
|
printf1("V3=%4.2f ", fp6_read());
|
|
|
|
|
printf1("RESULT=%4.2f\n", fpResult2_read()); |
|
|
|
|
printf1("RESULT=%6.4f\n", fpResult2_read()); |
|
|
|
|
break; |
|
|
|
|
case 'x': return 1; // Abort indication
|
|
|
|
|
default: ; |
|
|
|
|