|
|
|
@ -142,8 +142,8 @@ class bfloat16NeuralNetworkCore(Module, AutoCSR, AutoDoc, ModuleDoc): |
|
|
|
|
) |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
#---------------- bfloat16 FPU ------------------------------------------------------------- |
|
|
|
|
self.submodules.fpu = fpu = bfloat16Processor() # Integrate bfloat16 FPU |
|
|
|
|
#---------------- bfloat16 FPUs ------------------------------------------------------------- |
|
|
|
|
self.submodules.fpu1 = fpu1 = bfloat16Processor() # Integrate bfloat16 FPU |
|
|
|
|
|
|
|
|
|
#---------------- Loaded data testing -------------------------------------------------- |
|
|
|
|
Loader_fsm = FSM(reset_state="Loader_IDLE") # FSM starts idling ... |
|
|
|
@ -182,8 +182,8 @@ class bfloat16NeuralNetworkCore(Module, AutoCSR, AutoDoc, ModuleDoc): |
|
|
|
|
If(self.Loader_Delay > RAMWaitTime, |
|
|
|
|
NextValue(self.b16Value1.storage, LoadUnit.b32Data.storage & 0xFFFF), # Pick 1st date |
|
|
|
|
NextValue(self.b16Value2.storage, LoadUnit.b32Data.storage >> 16), # Pick 2nd date |
|
|
|
|
NextValue(fpu.fs1, Cat(0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, LoadUnit.b32Data.storage[0:16])), |
|
|
|
|
NextValue(fpu.fs2, Cat(0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, LoadUnit.b32Data.storage[16:32])), |
|
|
|
|
NextValue(fpu1.fs1, Cat(0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, LoadUnit.b32Data.storage[0:16])), |
|
|
|
|
NextValue(fpu1.fs2, Cat(0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, LoadUnit.b32Data.storage[16:32])), |
|
|
|
|
NextValue(LoadUnit.b9Offset.storage, 1), # 2nd value offset preparation |
|
|
|
|
NextValue(self.Loader_Delay, 0), # Reset delay |
|
|
|
|
NextState("Loader_LOAD3") |
|
|
|
@ -195,29 +195,25 @@ class bfloat16NeuralNetworkCore(Module, AutoCSR, AutoDoc, ModuleDoc): |
|
|
|
|
NextValue(self.b16Status.storage[3], True), # Current status added |
|
|
|
|
If(self.Loader_Delay > RAMWaitTime, |
|
|
|
|
NextValue(self.b16Value3.storage, LoadUnit.b32Data.storage & 0xFFFF), # Pick 3rd date |
|
|
|
|
NextValue(fpu.fs3, Cat(0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, LoadUnit.b32Data.storage[0:16])), |
|
|
|
|
NextValue(fpu1.fs3, Cat(0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, LoadUnit.b32Data.storage[0:16])), |
|
|
|
|
NextState("Loader_EXEC1") |
|
|
|
|
).Else( # MEM wait cycles |
|
|
|
|
NextValue(self.Loader_Delay, self.Loader_Delay + 1), # Increment |
|
|
|
|
) |
|
|
|
|
) |
|
|
|
|
""" |
|
|
|
|
fpu.fnmadd |
|
|
|
|
fpu.fnmsub |
|
|
|
|
""" |
|
|
|
|
) |
|
|
|
|
Loader_fsm.act("Loader_EXEC1", |
|
|
|
|
NextValue(self.b16Status.storage[5], True), # Current status added |
|
|
|
|
NextValue(fpu.fmsub, True), # This command requested |
|
|
|
|
NextValue(fpu.fready, False), # Engage trigger |
|
|
|
|
NextValue(fpu1.fnmsub, True), # This command requested |
|
|
|
|
NextValue(fpu1.fready, False), # Engage trigger |
|
|
|
|
NextState("Loader_EXEC2") |
|
|
|
|
) |
|
|
|
|
Loader_fsm.act("Loader_EXEC2", |
|
|
|
|
NextValue(self.b16Status.storage[6], True), # Current status added |
|
|
|
|
If(fpu.fready, |
|
|
|
|
NextValue(fpu.fmsub, False), # Clear command request |
|
|
|
|
NextValue(self.b16Result.storage, fpu.fresult[16:32]), # Pick result (little endian, high word!) |
|
|
|
|
If(fpu1.fready, |
|
|
|
|
NextValue(fpu1.fnmsub, False), # Clear command request |
|
|
|
|
NextValue(self.b16Result.storage, fpu1.fresult[16:32]), # Pick result (little endian, high word!) |
|
|
|
|
NextValue(self.b16Status.storage[15], True), # Indicate readyness ... |
|
|
|
|
NextValue(self.bReady, True), # Indicate readyness (LED on!) |
|
|
|
|
NextValue(self.bReady, True), # Indicate readyness (LED on!) (TODO: Remove!) |
|
|
|
|
NextState("Loader_IDLE") |
|
|
|
|
) |
|
|
|
|
) |
|
|
|
|