Zero operand problems fixed (hopefully!)

master
kaqu 1 year ago
parent 7091db2a27
commit 87b041a371
  1. 2
      bfloat16nn.py
  2. 23
      libmodules/bfloat16processor.py
  3. 71
      software/source/bfloat16nnlib.c
  4. 14
      software/source/my_vsnprintf.c

@ -216,7 +216,7 @@ class BaseSoC(SoCCore):
LoadUnit=dram2fpga,
StoreUnit=None, # *** Not used currently: fpga2dram,
)
self.add_csr("bfloat16nn")
self.add_csr("bfloat16nn")
# USERLED blink (on-board LED)
# only w/ uart-name=crossover option:

@ -119,15 +119,16 @@ class bfloat16Processor(Module):
NextValue(self.fresult, self.fs2), # Return infinity
NextValue(self.fready, 1),
NextState("FPU_IDLE")
).Elif(self.fs1[0:31] == 0, # 0+x: Nothing to add? (w/o sign!)
# TODO: VERIFY -> risq5!
).Elif(self.fs1[0:31] == 0, # 0+x: Nothing to add? (w/o sign!)
If(self.fsub, # Subtract yields negative result!
NextValue(self.fresult, self.fs2 ^ 0x80000000), # Invert sign
).Elif(self.fmsub | self.fnmadd, # 0*x=>0! 0-fs3 or -(0+fs3) = +fs3!
NextValue(self.fresult, self.fs3 ^ 0x80000000), # Invert sign
).Elif(self.fmadd, # 0*x=>0! 0+fs3 = fs3!
NextValue(self.fresult, self.fs3), # Ready!
).Else( # Straight add
NextValue(self.fresult, self.fs2), # Ready!
).Else( # Straight add (& fnmsub: -*- = +!)
NextValue(self.fresult, self.fs2), # Ready!
),
NextValue(self.fready, 1),
NextState("FPU_IDLE")
@ -286,10 +287,18 @@ class bfloat16Processor(Module):
NextValue(self.fresult, self.fs2), # Return infinity
NextValue(self.fready, 1),
NextState("FPU_IDLE")
).Elif((self.fs1[0:31] == 0) | (self.fs2[0:31] == 0), # Nothing to multiply? (w/o sign!)
NextValue(self.fresult, 0), # Result will be zero ...
NextValue(self.fready, 1),
NextState("FPU_IDLE")
# FIXME: Verify -> risq5!
).Elif((self.fs1[0:31] == 0) | (self.fs2[0:31] == 0), # Nothing to multiply? (w/o sign!)
If(self.fmul, # Single instruction? Straight return.
NextValue(self.fresult, 0), # Result will be zero ...
NextValue(self.fready, 1),
NextState("FPU_IDLE")
).Else( # Fused instructions? Continue w/ signed zero
NextValue(self.sign3, self.sign1 ^ self.sign2),
NextValue(self.e3, -127), # Not 0! Will be adjusted in FMADD1
NextValue(self.m3, 0),
NextState("FMADD1")
)
).Else( # Ok, valid floats supplied ...
NextValue(self.sign3, self.sign1 ^ self.sign2), # 1. Calculate result sign
NextValue(self.e3, self.e1 + self.e2), # 2. Calculate resulting exponent (add!)

@ -72,73 +72,58 @@ static float fp1_read(void)
uint32_t v __attribute__((aligned(16))) = 0;
*(((uint16_t *)&v) + 1) = bfloat16nn_b16Value1_read(); // Low-endian, high half word required
float *fpt = (float *)&v;
if(*fpt != 0.0)
return *fpt;
return(-12.34);
return *fpt;
}
static float fp2_read(void)
{
uint32_t v __attribute__((aligned(16))) = 0;
*(((uint16_t *)&v) + 1) = bfloat16nn_b16Value2_read();
float *fpt = (float *)&v;
if(*fpt != 0.0)
return *fpt;
return(-12.34);
float *fpt = (float *)&v;
return *fpt;
}
static float fp3_read(void)
{
uint32_t v __attribute__((aligned(16))) = 0;
*(((uint16_t *)&v) + 1) = bfloat16nn_b16Value3_read();
float *fpt = (float *)&v;
if(*fpt != 0.0)
return *fpt;
return(-12.34);
float *fpt = (float *)&v;
return *fpt;
}
static float fp4_read(void)
{
uint32_t v __attribute__((aligned(16))) = 0;
*(((uint16_t *)&v) + 1) = bfloat16nn_b16Value4_read();
float *fpt = (float *)&v;
if(*fpt != 0.0)
return *fpt;
return(-12.34);
return *fpt;
}
static float fp5_read(void)
{
uint32_t v __attribute__((aligned(16))) = 0;
*(((uint16_t *)&v) + 1) = bfloat16nn_b16Value5_read();
float *fpt = (float *)&v;
if(*fpt != 0.0)
return *fpt;
return(-12.34);
float *fpt = (float *)&v;
return *fpt;
}
static float fp6_read(void)
{
uint32_t v __attribute__((aligned(16))) = 0;
*(((uint16_t *)&v) + 1) = bfloat16nn_b16Value6_read();
float *fpt = (float *)&v;
if(*fpt != 0.0)
return *fpt;
return(-12.34);
float *fpt = (float *)&v;
return *fpt;
}
static float fpResult1_read(void)
{
uint32_t v __attribute__((aligned(16))) = 0;
*(((uint16_t *)&v) + 1) = bfloat16nn_b16Result1_read();
float *fpt = (float *)&v;
if(*fpt != 0.0)
return *fpt;
return(-12.34);
float *fpt = (float *)&v;
return *fpt;
}
static float fpResult2_read(void)
{
uint32_t v __attribute__((aligned(16))) = 0;
*(((uint16_t *)&v) + 1) = bfloat16nn_b16Result2_read();
float *fpt = (float *)&v;
if(*fpt != 0.0)
return *fpt;
return(-12.34);
float *fpt = (float *)&v;
return *fpt;
}
static uint16_t f2ui16(float f)
{
return *(((uint16_t *)&f)+1); // High half word needed (low-endian), hence ...
@ -159,7 +144,7 @@ int key_eval(void)
uint16_t *ui16ptr;
int i;
float fp1, fp2, fp3, fpResult1;
float fp4, fp5, fp6, fpResult2;
float fp4, fp5, fp6, fpResult2;
switch(kbhit()) {
case 'r': // Reload
@ -167,12 +152,12 @@ int key_eval(void)
for(i=0, ui32ptr = (uint32_t *)DRAMDATABASE;i<DRAMDATASIZE;i++) // Setup test data
*ui32ptr++ = i+1;
ui16ptr = (uint16_t *)(DRAMDATABASE + 0 * sizeof(uint16_t)); // Absolute: bytes!
*ui16ptr++ = f2ui16(0.0);
*ui16ptr++ = f2ui16(1.0);
*ui16ptr++ = f2ui16(2.0);
*ui16ptr++ = f2ui16(0.0); // fnmsub: -(0*3 - 4) = +4!
*ui16ptr++ = f2ui16(3.0);
*ui16ptr++ = f2ui16(4.0); // FIXME: Fused 0+X returns 0!
*ui16ptr++ = f2ui16(1.0);
*ui16ptr++ = f2ui16(2.0);
*ui16ptr++ = f2ui16(3.0);
*ui16ptr++ = f2ui16(4.0);
*ui16ptr++ = f2ui16(5.0);
*ui16ptr++ = f2ui16(6.0);
if(fpgaload((uint32_t *)DRAMDATABASE, 512)) {
fp1 = fp1_read();
fp2 = fp2_read();
@ -186,11 +171,11 @@ int key_eval(void)
printf1("V1=%5.3f ", fp1); // FIXME: printf1 fails for 0.0 output !
printf1("V2=%5.3f ", fp2);
printf1("V3=%5.3f ", fp3);
printf1("RESULT=%5.3f\n", fpResult1);
printf1("RESULT=%6.4f\n", fpResult1);
printf1("V1=%5.3f ", fp4);
printf1("V2=%5.3f ", fp5);
printf1("V3=%5.3f ", fp6);
printf1("RESULT=%5.3f\n", fpResult2);
printf1("RESULT=%6.4f\n", fpResult2);
/*
for(i=0;i<DRAMDATASIZE;i+=32) {
dram2fpga_b9Offset_write(i);
@ -209,11 +194,11 @@ int key_eval(void)
printf1("V1=%5.3f ", fp1_read());
printf1("V2=%5.3f ", fp2_read());
printf1("V3=%5.3f ", fp3_read());
printf1("RESULT=%5.3f\n", fpResult1_read());
printf1("RESULT=%6.4f\n", fpResult1_read());
printf1("V1=%5.3f ", fp4_read());
printf1("V2=%5.3f ", fp5_read());
printf1("V3=%5.3f ", fp6_read());
printf1("RESULT=%5.3f\n", fpResult2_read());
printf1("RESULT=%6.4f\n", fpResult2_read());
}
else
printf("INVALIDATED: Timeout!");
@ -223,11 +208,11 @@ int key_eval(void)
printf1("V1=%4.2f ", fp1_read());
printf1("V2=%4.2f ", fp2_read());
printf1("V3=%4.2f ", fp3_read());
printf1("RESULT=%4.2f\n", fpResult1_read());
printf1("RESULT=%6.4f\n", fpResult1_read());
printf1("V1=%4.2f ", fp4_read());
printf1("V2=%4.2f ", fp5_read());
printf1("V3=%4.2f ", fp6_read());
printf1("RESULT=%4.2f\n", fpResult2_read());
printf1("RESULT=%6.4f\n", fpResult2_read());
break;
case 'x': return 1; // Abort indication
default: ;

@ -418,7 +418,19 @@ void printf1(const char *fmt, float f1)
}
}
strncpy(fstr, fr, 5); // Copy format only
my_sprintf(outputstr, fstr, f1); // Eval separately ...
if(f1 != 0.0) // Fails w/ zero ?!
my_sprintf(outputstr, fstr, f1); // Eval separately ...
else { // Zero special treatment
int i;
for(i=0;i < (width-remainder)-1;i++)
outputstr[i] = ' ';
outputstr[i++] = '0';
outputstr[i++] = '.';
for(;i < width;i++)
outputstr[i] = '0';
outputstr[width] = '\0'; // Respect field width
}
outputstr[width] = '\0'; // Respect field width
char *rest = strchr(outputstr,'.');
if(rest != NULL)

Loading…
Cancel
Save