master
kaqu 1 year ago
parent 69b4557926
commit f7c5c95ffa
  1. 7
      libmodules/bfloat16nncore.py
  2. 24
      libmodules/bfloat16processor.py
  3. 2
      libmodules/dramtransfer.py
  4. 2
      software/source/bfloat16nnlib.c

@ -201,21 +201,20 @@ class bfloat16NeuralNetworkCore(Module, AutoCSR, AutoDoc, ModuleDoc):
NextValue(self.Loader_Delay, self.Loader_Delay + 1), # Increment
)
)
"""
fpu.fmsub
"""
fpu.fnmadd
fpu.fnmsub
"""
Loader_fsm.act("Loader_EXEC1",
NextValue(self.b16Status.storage[5], True), # Current status added
NextValue(fpu.fmadd, True), # This command requested
NextValue(fpu.fmsub, True), # This command requested
NextValue(fpu.fready, False), # Engage trigger
NextState("Loader_EXEC2")
)
Loader_fsm.act("Loader_EXEC2",
NextValue(self.b16Status.storage[6], True), # Current status added
If(fpu.fready,
NextValue(fpu.fmadd, False), # Clear command request
NextValue(fpu.fmsub, False), # Clear command request
NextValue(self.b16Result.storage, fpu.fresult[16:32]), # Pick result (little endian, high word!)
NextValue(self.b16Status.storage[15], True), # Indicate readyness ...
NextValue(self.bReady, True), # Indicate readyness (LED on!)

@ -8,7 +8,6 @@
# History:
# --------
# 22.04.21/KQ Initial version
# 03.05.21/KQ FMADD1 state bugs fixed (fs1/fs2 not prepared?)
#
from migen import *
@ -77,11 +76,7 @@ class bfloat16Processor(Module):
NextValue(self.sign2, self.fs2[31] ^ self.fsub), # Invert sign for subtraction!
NextValue(self.e1, self.fs1[23:31] - 127),
NextValue(self.e2, self.fs2[23:31] - 127),
#NextValue(self.m1, Cat(0,0,0, self.fs1[0:23], 1, 0)), # | 0x00800000 + R/G/S bits
#NextValue(self.m1, Cat(0,0,0, self.fs1[0:7], 1, 0)), # | 0x00800000 + R/G/S bits FIXME!
NextValue(self.m1, Cat(0,0,0, self.fs1[16:23], 1, 0)), # | 0x00800000 + R/G/S bits
#NextValue(self.m2, Cat(0,0,0, self.fs2[0:23], 1, 0)), # | 0x00800000 + R/G/S bits
#NextValue(self.m2, Cat(0,0,0, self.fs2[0:7], 1, 0)), # | 0x00800000 + R/G/S bits FIXME!
NextValue(self.m2, Cat(0,0,0, self.fs2[16:23], 1, 0)), # | 0x00800000 + R/G/S bits
NextState("FADD1")
).Elif((self.fmin | self.fmax | self.fmadd | self.fmsub | self.fnmadd | self.fnmsub | self.fmul | self.fdiv) & ~self.fready, # Triggers set & ready flag reset externally!
@ -89,11 +84,7 @@ class bfloat16Processor(Module):
NextValue(self.sign2, self.fs2[31]),
NextValue(self.e1, self.fs1[23:31] - 127),
NextValue(self.e2, self.fs2[23:31] - 127),
#NextValue(self.m1, Cat(self.fs1[0:23], 1, 0)), # | 0x00800000
#NextValue(self.m1, Cat(self.fs1[0:7], 1, 0, 0,0,0)), # | 0x00800000 FIXME!
NextValue(self.m1, Cat(self.fs1[16:23], 1, 0, 0,0,0)), # | 0x00800000
#NextValue(self.m2, Cat(self.fs2[0:23], 1, 0)), # | 0x00800000
#NextValue(self.m2, Cat(self.fs2[0:7], 1, 0, 0,0,0)), # | 0x00800000 FIXME!
NextValue(self.m2, Cat(self.fs2[16:23], 1, 0, 0,0,0)), # | 0x00800000
If(self.fdiv, # Division
NextState("FDIV1"),
@ -107,8 +98,6 @@ class bfloat16Processor(Module):
).Elif(self.fsqrt & ~self.fready, # Trigger set & ready flag reset externally!
NextValue(self.sign1, self.fs1[31]),
NextValue(self.e1, self.fs1[23:31] - 127),
#NextValue(self.m1, Cat(self.fs1[0:23], 1, 0)), # | 0x00800000
#NextValue(self.m1, Cat(self.fs1[0:7], 1, 0, 0,0,0)), # | 0x00800000 FIXME!
NextValue(self.m1, Cat(self.fs1[16:23], 1, 0, 0,0,0)), # | 0x00800000
NextState("FSQRT1"),
)
@ -341,24 +330,15 @@ class bfloat16Processor(Module):
)
) # End of fmul.s processing
FPU_fsm.act("FMADD1",
# sign3/e3/m3 -> sign1/e1/m1, fs3 -> sign2/e2/m2
# Result->fs1: sign3/e3/m3 -> sign1/e1/m1 & fs1, fs3->fs2: fs3 -> sign2/e2/m2 & fs2
NextValue(self.sign1, self.sign3), # Negate mult. result w/ f<n>xxx
NextValue(self.sign2, self.fs3[31] ^ (self.fmsub | self.fnmsub)), # Invert sign for subtraction!
NextValue(self.e1, self.e3),
NextValue(self.e2, self.fs3[23:31] - 127),
#NextValue(self.m1, Cat(0,0,0, self.m3[0:23], 1, 0)), # | 0x00800000 + R/G/S bits
#NextValue(self.m1, Cat(0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, self.m3[0:7], 1, 0)), # | 0x00800000 + R/G/S bits
NextValue(self.m1, Cat(0,0,0, self.m3[0:7], 1, 0)), # | 0x00800000 + R/G/S bits
#NextValue(self.m2, Cat(0,0,0, self.fs3[0:23], 1, 0)), # | 0x00800000 + R/G/S bits
#NextValue(self.m2, Cat(0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, self.fs3[16:23], 1, 0)), # | 0x00800000 + R/G/S bits
NextValue(self.m2, Cat(0,0,0, self.fs3[16:23], 1, 0)), # | 0x00800000 + R/G/S bits
# sign3/e3/m3 -> fs1 (reconstruction, nec. for compares, s.a.!)
NextValue(self.m2, Cat(0,0,0, self.fs3[16:23], 1, 0)), # | 0x00800000 + R/G/S bits
NextValue(self.fs1, Cat(0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, self.m3[0:7], (self.e3+127)[0:8], self.sign3)),
# fs3 -> fs2
NextValue(self.fs2, self.fs3),
NextState("FADD1") # Add fs1 & fs2!
)

@ -9,7 +9,7 @@
# --------
# 21.12.20/KQ Initial test
# 30.12.20/KQ Working (renamed) version
# 22.04.21/KQ In trasfer renamed
# 22.04.21/KQ In transfer renamed
#
from migen import *

@ -100,7 +100,7 @@ static uint16_t f2ui16(float f)
return *(((uint16_t *)&f)+1); // High half word needed (low-endian), hence ...
}
void dumpfloat(float f)
static void dumpfloat(float f)
{
printf("%08Xh -> %04Xh\n", *(uint32_t *)&f, f2ui16(f));
}

Loading…
Cancel
Save