diff --git a/libmodules/bfloat16nncore.py b/libmodules/bfloat16nncore.py index 677308f..467c2d5 100644 --- a/libmodules/bfloat16nncore.py +++ b/libmodules/bfloat16nncore.py @@ -201,21 +201,20 @@ class bfloat16NeuralNetworkCore(Module, AutoCSR, AutoDoc, ModuleDoc): NextValue(self.Loader_Delay, self.Loader_Delay + 1), # Increment ) ) - """ - fpu.fmsub + """ fpu.fnmadd fpu.fnmsub """ Loader_fsm.act("Loader_EXEC1", NextValue(self.b16Status.storage[5], True), # Current status added - NextValue(fpu.fmadd, True), # This command requested + NextValue(fpu.fmsub, True), # This command requested NextValue(fpu.fready, False), # Engage trigger NextState("Loader_EXEC2") ) Loader_fsm.act("Loader_EXEC2", NextValue(self.b16Status.storage[6], True), # Current status added If(fpu.fready, - NextValue(fpu.fmadd, False), # Clear command request + NextValue(fpu.fmsub, False), # Clear command request NextValue(self.b16Result.storage, fpu.fresult[16:32]), # Pick result (little endian, high word!) NextValue(self.b16Status.storage[15], True), # Indicate readyness ... NextValue(self.bReady, True), # Indicate readyness (LED on!) diff --git a/libmodules/bfloat16processor.py b/libmodules/bfloat16processor.py index 6cb6fa3..49ad95a 100644 --- a/libmodules/bfloat16processor.py +++ b/libmodules/bfloat16processor.py @@ -8,7 +8,6 @@ # History: # -------- # 22.04.21/KQ Initial version -# 03.05.21/KQ FMADD1 state bugs fixed (fs1/fs2 not prepared?) # from migen import * @@ -77,11 +76,7 @@ class bfloat16Processor(Module): NextValue(self.sign2, self.fs2[31] ^ self.fsub), # Invert sign for subtraction! NextValue(self.e1, self.fs1[23:31] - 127), NextValue(self.e2, self.fs2[23:31] - 127), - #NextValue(self.m1, Cat(0,0,0, self.fs1[0:23], 1, 0)), # | 0x00800000 + R/G/S bits - #NextValue(self.m1, Cat(0,0,0, self.fs1[0:7], 1, 0)), # | 0x00800000 + R/G/S bits FIXME! NextValue(self.m1, Cat(0,0,0, self.fs1[16:23], 1, 0)), # | 0x00800000 + R/G/S bits - #NextValue(self.m2, Cat(0,0,0, self.fs2[0:23], 1, 0)), # | 0x00800000 + R/G/S bits - #NextValue(self.m2, Cat(0,0,0, self.fs2[0:7], 1, 0)), # | 0x00800000 + R/G/S bits FIXME! NextValue(self.m2, Cat(0,0,0, self.fs2[16:23], 1, 0)), # | 0x00800000 + R/G/S bits NextState("FADD1") ).Elif((self.fmin | self.fmax | self.fmadd | self.fmsub | self.fnmadd | self.fnmsub | self.fmul | self.fdiv) & ~self.fready, # Triggers set & ready flag reset externally! @@ -89,11 +84,7 @@ class bfloat16Processor(Module): NextValue(self.sign2, self.fs2[31]), NextValue(self.e1, self.fs1[23:31] - 127), NextValue(self.e2, self.fs2[23:31] - 127), - #NextValue(self.m1, Cat(self.fs1[0:23], 1, 0)), # | 0x00800000 - #NextValue(self.m1, Cat(self.fs1[0:7], 1, 0, 0,0,0)), # | 0x00800000 FIXME! NextValue(self.m1, Cat(self.fs1[16:23], 1, 0, 0,0,0)), # | 0x00800000 - #NextValue(self.m2, Cat(self.fs2[0:23], 1, 0)), # | 0x00800000 - #NextValue(self.m2, Cat(self.fs2[0:7], 1, 0, 0,0,0)), # | 0x00800000 FIXME! NextValue(self.m2, Cat(self.fs2[16:23], 1, 0, 0,0,0)), # | 0x00800000 If(self.fdiv, # Division NextState("FDIV1"), @@ -107,8 +98,6 @@ class bfloat16Processor(Module): ).Elif(self.fsqrt & ~self.fready, # Trigger set & ready flag reset externally! NextValue(self.sign1, self.fs1[31]), NextValue(self.e1, self.fs1[23:31] - 127), - #NextValue(self.m1, Cat(self.fs1[0:23], 1, 0)), # | 0x00800000 - #NextValue(self.m1, Cat(self.fs1[0:7], 1, 0, 0,0,0)), # | 0x00800000 FIXME! NextValue(self.m1, Cat(self.fs1[16:23], 1, 0, 0,0,0)), # | 0x00800000 NextState("FSQRT1"), ) @@ -341,24 +330,15 @@ class bfloat16Processor(Module): ) ) # End of fmul.s processing FPU_fsm.act("FMADD1", - # sign3/e3/m3 -> sign1/e1/m1, fs3 -> sign2/e2/m2 + # Result->fs1: sign3/e3/m3 -> sign1/e1/m1 & fs1, fs3->fs2: fs3 -> sign2/e2/m2 & fs2 NextValue(self.sign1, self.sign3), # Negate mult. result w/ fxxx NextValue(self.sign2, self.fs3[31] ^ (self.fmsub | self.fnmsub)), # Invert sign for subtraction! NextValue(self.e1, self.e3), NextValue(self.e2, self.fs3[23:31] - 127), - #NextValue(self.m1, Cat(0,0,0, self.m3[0:23], 1, 0)), # | 0x00800000 + R/G/S bits - #NextValue(self.m1, Cat(0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, self.m3[0:7], 1, 0)), # | 0x00800000 + R/G/S bits NextValue(self.m1, Cat(0,0,0, self.m3[0:7], 1, 0)), # | 0x00800000 + R/G/S bits - #NextValue(self.m2, Cat(0,0,0, self.fs3[0:23], 1, 0)), # | 0x00800000 + R/G/S bits - #NextValue(self.m2, Cat(0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, self.fs3[16:23], 1, 0)), # | 0x00800000 + R/G/S bits - NextValue(self.m2, Cat(0,0,0, self.fs3[16:23], 1, 0)), # | 0x00800000 + R/G/S bits - - # sign3/e3/m3 -> fs1 (reconstruction, nec. for compares, s.a.!) + NextValue(self.m2, Cat(0,0,0, self.fs3[16:23], 1, 0)), # | 0x00800000 + R/G/S bits NextValue(self.fs1, Cat(0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, self.m3[0:7], (self.e3+127)[0:8], self.sign3)), - - # fs3 -> fs2 NextValue(self.fs2, self.fs3), - NextState("FADD1") # Add fs1 & fs2! ) diff --git a/libmodules/dramtransfer.py b/libmodules/dramtransfer.py index bb98400..cb5c258 100644 --- a/libmodules/dramtransfer.py +++ b/libmodules/dramtransfer.py @@ -9,7 +9,7 @@ # -------- # 21.12.20/KQ Initial test # 30.12.20/KQ Working (renamed) version -# 22.04.21/KQ In trasfer renamed +# 22.04.21/KQ In transfer renamed # from migen import * diff --git a/software/source/bfloat16nnlib.c b/software/source/bfloat16nnlib.c index ed3e965..73b383a 100644 --- a/software/source/bfloat16nnlib.c +++ b/software/source/bfloat16nnlib.c @@ -100,7 +100,7 @@ static uint16_t f2ui16(float f) return *(((uint16_t *)&f)+1); // High half word needed (low-endian), hence ... } -void dumpfloat(float f) +static void dumpfloat(float f) { printf("%08Xh -> %04Xh\n", *(uint32_t *)&f, f2ui16(f)); }