bugs fixed, fmadd ok
parent
ea794dfa9c
commit
69b4557926
|
@ -201,22 +201,21 @@ class bfloat16NeuralNetworkCore(Module, AutoCSR, AutoDoc, ModuleDoc):
|
|||
NextValue(self.Loader_Delay, self.Loader_Delay + 1), # Increment
|
||||
)
|
||||
)
|
||||
"""
|
||||
fpu.fmadd
|
||||
"""
|
||||
fpu.fmsub
|
||||
fpu.fnmadd
|
||||
fpu.fnmsub
|
||||
"""
|
||||
Loader_fsm.act("Loader_EXEC1",
|
||||
NextValue(self.b16Status.storage[5], True), # Current status added
|
||||
NextValue(fpu.fmax, True), # This command requested
|
||||
NextValue(fpu.fmadd, True), # This command requested
|
||||
NextValue(fpu.fready, False), # Engage trigger
|
||||
NextState("Loader_EXEC2")
|
||||
)
|
||||
Loader_fsm.act("Loader_EXEC2",
|
||||
NextValue(self.b16Status.storage[6], True), # Current status added
|
||||
If(fpu.fready,
|
||||
NextValue(fpu.fmax, False), # Clear command request
|
||||
NextValue(fpu.fmadd, False), # Clear command request
|
||||
NextValue(self.b16Result.storage, fpu.fresult[16:32]), # Pick result (little endian, high word!)
|
||||
NextValue(self.b16Status.storage[15], True), # Indicate readyness ...
|
||||
NextValue(self.bReady, True), # Indicate readyness (LED on!)
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
# History:
|
||||
# --------
|
||||
# 22.04.21/KQ Initial version
|
||||
# 03.05.21/KQ FMADD1 state bugs fixed (fs1/fs2 not prepared?)
|
||||
#
|
||||
|
||||
from migen import *
|
||||
|
@ -77,9 +78,11 @@ class bfloat16Processor(Module):
|
|||
NextValue(self.e1, self.fs1[23:31] - 127),
|
||||
NextValue(self.e2, self.fs2[23:31] - 127),
|
||||
#NextValue(self.m1, Cat(0,0,0, self.fs1[0:23], 1, 0)), # | 0x00800000 + R/G/S bits
|
||||
NextValue(self.m1, Cat(0,0,0, self.fs1[0:7], 1, 0)), # | 0x00800000 + R/G/S bits
|
||||
#NextValue(self.m1, Cat(0,0,0, self.fs1[0:7], 1, 0)), # | 0x00800000 + R/G/S bits FIXME!
|
||||
NextValue(self.m1, Cat(0,0,0, self.fs1[16:23], 1, 0)), # | 0x00800000 + R/G/S bits
|
||||
#NextValue(self.m2, Cat(0,0,0, self.fs2[0:23], 1, 0)), # | 0x00800000 + R/G/S bits
|
||||
NextValue(self.m2, Cat(0,0,0, self.fs2[0:7], 1, 0)), # | 0x00800000 + R/G/S bits
|
||||
#NextValue(self.m2, Cat(0,0,0, self.fs2[0:7], 1, 0)), # | 0x00800000 + R/G/S bits FIXME!
|
||||
NextValue(self.m2, Cat(0,0,0, self.fs2[16:23], 1, 0)), # | 0x00800000 + R/G/S bits
|
||||
NextState("FADD1")
|
||||
).Elif((self.fmin | self.fmax | self.fmadd | self.fmsub | self.fnmadd | self.fnmsub | self.fmul | self.fdiv) & ~self.fready, # Triggers set & ready flag reset externally!
|
||||
NextValue(self.sign1, self.fs1[31]),
|
||||
|
@ -87,9 +90,11 @@ class bfloat16Processor(Module):
|
|||
NextValue(self.e1, self.fs1[23:31] - 127),
|
||||
NextValue(self.e2, self.fs2[23:31] - 127),
|
||||
#NextValue(self.m1, Cat(self.fs1[0:23], 1, 0)), # | 0x00800000
|
||||
NextValue(self.m1, Cat(self.fs1[0:7], 1, 0, 0,0,0)), # | 0x00800000
|
||||
#NextValue(self.m1, Cat(self.fs1[0:7], 1, 0, 0,0,0)), # | 0x00800000 FIXME!
|
||||
NextValue(self.m1, Cat(self.fs1[16:23], 1, 0, 0,0,0)), # | 0x00800000
|
||||
#NextValue(self.m2, Cat(self.fs2[0:23], 1, 0)), # | 0x00800000
|
||||
NextValue(self.m2, Cat(self.fs2[0:7], 1, 0, 0,0,0)), # | 0x00800000
|
||||
#NextValue(self.m2, Cat(self.fs2[0:7], 1, 0, 0,0,0)), # | 0x00800000 FIXME!
|
||||
NextValue(self.m2, Cat(self.fs2[16:23], 1, 0, 0,0,0)), # | 0x00800000
|
||||
If(self.fdiv, # Division
|
||||
NextState("FDIV1"),
|
||||
).Elif(self.fmin, # Minimum
|
||||
|
@ -103,7 +108,8 @@ class bfloat16Processor(Module):
|
|||
NextValue(self.sign1, self.fs1[31]),
|
||||
NextValue(self.e1, self.fs1[23:31] - 127),
|
||||
#NextValue(self.m1, Cat(self.fs1[0:23], 1, 0)), # | 0x00800000
|
||||
NextValue(self.m1, Cat(self.fs1[0:7], 1, 0, 0,0,0)), # | 0x00800000
|
||||
#NextValue(self.m1, Cat(self.fs1[0:7], 1, 0, 0,0,0)), # | 0x00800000 FIXME!
|
||||
NextValue(self.m1, Cat(self.fs1[16:23], 1, 0, 0,0,0)), # | 0x00800000
|
||||
NextState("FSQRT1"),
|
||||
)
|
||||
)
|
||||
|
@ -141,7 +147,7 @@ class bfloat16Processor(Module):
|
|||
NextValue(self.fready, 1),
|
||||
NextState("FPU_IDLE")
|
||||
).Elif((self.fmadd | self.fmsub | self.fnmadd | self.fnmsub) & ((self.e2 == 0) & (self.m2 == 0)), # Nothing to add (w/o sign!)
|
||||
NextState("self.fresult") # Just supply (normalized finally!) result from multiplication!
|
||||
NextState("FRESULT") # Just supply (normalized finally!) result from multiplication!
|
||||
).Else( # Ok, valid floats supplied ...
|
||||
NextValue(self.s_bit, 0),
|
||||
NextValue(self.branch1, 0), # Reset helpers
|
||||
|
@ -236,7 +242,7 @@ class bfloat16Processor(Module):
|
|||
NextValue(self.m3, self.m3 + self.s_bit),
|
||||
NextState("FADD8") # Adjust possible overflow ...
|
||||
).Else( # Nope, all ready
|
||||
NextState("self.fresult")
|
||||
NextState("FRESULT")
|
||||
)
|
||||
)
|
||||
)
|
||||
|
@ -247,10 +253,10 @@ class bfloat16Processor(Module):
|
|||
NextValue(self.m3, self.m3 >> 1), # Adjust mantissa & increment exponent
|
||||
NextValue(self.e3, self.e3 + 1)
|
||||
),
|
||||
NextState("self.fresult")
|
||||
NextState("FRESULT")
|
||||
) # End of fadd.s processing
|
||||
|
||||
FPU_fsm.act("self.fresult", # Result contruction & possible rounding
|
||||
FPU_fsm.act("FRESULT", # Result contruction & possible rounding
|
||||
NextValue(self.FPU_state, 9),
|
||||
# 6. Build the actual resulting float
|
||||
#NextValue(self.fresult, Cat(self.m3[0:23], self.e3+127, self.sign3)),
|
||||
|
@ -327,23 +333,33 @@ class bfloat16Processor(Module):
|
|||
# 6. Construction of result
|
||||
NextValue(self.m3, (self.lm3 >> 23) & 0x7FFFFF),
|
||||
# TODO: e3=se3 omitted ok?
|
||||
If(self.fmul, # Simple multiplication
|
||||
NextState("self.fresult")
|
||||
If(self.fmul, # Simple multiplication
|
||||
NextState("FRESULT")
|
||||
).Else( # Fused multiply-add?
|
||||
NextValue(self.sign3, self.sign3 ^ (self.fnmadd | self.fnmsub)), # Negate mult. result w/ f<n>xxx
|
||||
NextState("FMADD1")
|
||||
)
|
||||
) # End of fmul.s processing
|
||||
FPU_fsm.act("FMADD1",
|
||||
FPU_fsm.act("FMADD1",
|
||||
# sign3/e3/m3 -> sign1/e1/m1, fs3 -> sign2/e2/m2
|
||||
NextValue(self.sign1, self.sign3), # Negate mult. result w/ f<n>xxx
|
||||
NextValue(self.sign2, self.fs3[31] ^ (self.fmsub | self.fnmsub)), # Invert sign for subtraction!
|
||||
NextValue(self.e1, self.e3),
|
||||
NextValue(self.e2, self.fs3[23:31] - 127),
|
||||
#NextValue(self.m1, Cat(0,0,0, self.m3[0:23], 1, 0)), # | 0x00800000 + R/G/S bits
|
||||
NextValue(self.m1, Cat(0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, self.m3[0:7], 1, 0)), # | 0x00800000 + R/G/S bits
|
||||
#NextValue(self.m1, Cat(0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, self.m3[0:7], 1, 0)), # | 0x00800000 + R/G/S bits
|
||||
NextValue(self.m1, Cat(0,0,0, self.m3[0:7], 1, 0)), # | 0x00800000 + R/G/S bits
|
||||
#NextValue(self.m2, Cat(0,0,0, self.fs3[0:23], 1, 0)), # | 0x00800000 + R/G/S bits
|
||||
NextValue(self.m2, Cat(0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, self.fs3[0:7], 1, 0)), # | 0x00800000 + R/G/S bits
|
||||
NextState("FADD1")
|
||||
#NextValue(self.m2, Cat(0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, self.fs3[16:23], 1, 0)), # | 0x00800000 + R/G/S bits
|
||||
NextValue(self.m2, Cat(0,0,0, self.fs3[16:23], 1, 0)), # | 0x00800000 + R/G/S bits
|
||||
|
||||
# sign3/e3/m3 -> fs1 (reconstruction, nec. for compares, s.a.!)
|
||||
NextValue(self.fs1, Cat(0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, self.m3[0:7], (self.e3+127)[0:8], self.sign3)),
|
||||
|
||||
# fs3 -> fs2
|
||||
NextValue(self.fs2, self.fs3),
|
||||
|
||||
NextState("FADD1") # Add fs1 & fs2!
|
||||
)
|
||||
|
||||
FPU_fsm.act("FDIV1",
|
||||
|
@ -397,7 +413,7 @@ class bfloat16Processor(Module):
|
|||
NextValue(self.m3, self.m3 << 1), # Subtraction normalization
|
||||
NextValue(self.e3, self.e3 - 1),
|
||||
).Else(
|
||||
NextState("self.fresult")
|
||||
NextState("FRESULT")
|
||||
)
|
||||
)
|
||||
) # End of fdiv.s processing
|
||||
|
|
Loading…
Reference in New Issue