After cleanup, before increase ...

master
kaqu 1 year ago
parent 24ef44e4d5
commit 8ae7085df7
  1. 48
      libmodules/bfloat16nncore.py

@ -95,26 +95,28 @@ class bfloat16NeuralNetworkCore(Module, AutoCSR, AutoDoc, ModuleDoc):
description="""
FPU states: Low FPU#1, High FPU#2
""")
""" TODO: Remove!
self.b16Value1_1 = CSRStorage(16, reset_less=False,
fields=[CSRField("Value", size=16, description="*Field*: 16-Bit value")],
description="""
description="
FPU#1 Float register 1
""")
")
self.b16Value1_2 = CSRStorage(16, reset_less=False,
fields=[CSRField("Value", size=16, description="*Field*: 16-Bit value")],
description="""
FPU#1 Float register 2
""")
description="
#FPU#1 Float register 2
")
self.b16Value2_1 = CSRStorage(16, reset_less=False,
fields=[CSRField("Value", size=16, description="*Field*: 16-Bit value")],
description="""
FPU#2 Float register 1
""")
description="
#FPU#2 Float register 1
")
self.b16Value2_2 = CSRStorage(16, reset_less=False,
fields=[CSRField("Value", size=16, description="*Field*: 16-Bit value")],
description="""
FPU#2 Float register 2
""")
description=" " "
#FPU#2 Float register 2
" " ")
"""
self.b16Result1 = CSRStorage(16, reset_less=False,
fields=[CSRField("Result1", size=16, description="*Field*: 16-Bit value")],
description="""
@ -183,10 +185,10 @@ class bfloat16NeuralNetworkCore(Module, AutoCSR, AutoDoc, ModuleDoc):
NextValue(LoadUnit.b9Offset2.storage, LUCacheSize >> 1), # Adjust offset to start of 2nd array
NextValue(self.b16Result1.storage, 0), # Indicate # delays
NextValue(self.b16Result2.storage, 0), # Indicate # delays
NextValue(self.b16Value1_1.storage, 0), # Nothing loaded so far ...
NextValue(self.b16Value1_2.storage, 0),
NextValue(self.b16Value2_1.storage, 0),
NextValue(self.b16Value2_2.storage, 0),
#NextValue(self.b16Value1_1.storage, 0), # TODO: Remove! Nothing loaded so far ...
#NextValue(self.b16Value1_2.storage, 0),
#NextValue(self.b16Value2_1.storage, 0),
#NextValue(self.b16Value2_2.storage, 0),
NextValue(self.bReady, False), # LED off!
NextState("Loader_LOAD1")
).Elif(~self.bEnable.storage, # Externally aborted?
@ -209,14 +211,14 @@ class bfloat16NeuralNetworkCore(Module, AutoCSR, AutoDoc, ModuleDoc):
NextValue(self.b16Status.storage[1], True), # Current status added
If(self.Loader_Delay > RAMWaitTime, # Required only for 1st entry ...
# FPU#1
NextValue(self.b16Value1_1.storage, LoadUnit.b32Data1.storage & 0xFFFF), # Pick 1st date
NextValue(self.b16Value1_2.storage, LoadUnit.b32Data1.storage >> 16), # Pick 2nd date
#NextValue(self.b16Value1_1.storage, LoadUnit.b32Data1.storage & 0xFFFF), # TODO: Remove! Pick 1st date
#NextValue(self.b16Value1_2.storage, LoadUnit.b32Data1.storage >> 16), # TODO: Remove! Pick 2nd date
NextValue(fpu1.fs1, Cat(0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, LoadUnit.b32Data1.storage[0:16])),
NextValue(fpu1.fs2, Cat(0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, LoadUnit.b32Data1.storage[16:32])),
NextValue(LoadUnit.b9Offset1.storage, LoadUnit.b9Offset1.storage + 1), # Move on to next entry
# FPU#2
NextValue(self.b16Value2_1.storage, LoadUnit.b32Data2.storage & 0xFFFF), # Pick 1st date
NextValue(self.b16Value2_2.storage, LoadUnit.b32Data2.storage >> 16), # Pick 2nd date
#NextValue(self.b16Value2_1.storage, LoadUnit.b32Data2.storage & 0xFFFF), # TODO: Remove! Pick 1st date
#NextValue(self.b16Value2_2.storage, LoadUnit.b32Data2.storage >> 16), # TODO: Remove! Pick 2nd date
NextValue(fpu2.fs1, Cat(0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, LoadUnit.b32Data2.storage[0:16])),
NextValue(fpu2.fs2, Cat(0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, LoadUnit.b32Data2.storage[16:32])),
NextValue(LoadUnit.b9Offset2.storage, LoadUnit.b9Offset2.storage + 1), # Move on to next entry
@ -228,7 +230,6 @@ class bfloat16NeuralNetworkCore(Module, AutoCSR, AutoDoc, ModuleDoc):
)
Loader_fsm.act("Loader_EXEC1",
NextValue(self.b16Status.storage[2], True), # Current status added
#If(LoadUnit.b9Offset1.storage < self.b9ArrayWordLen.storage, #(LUCacheSize >> 1),
If(LoadUnit.b9Offset1.storage == 1, # As pointer already moved ahead 1!
NextValue(fpu1.fmul, True), # 1st ADD requested
NextValue(fpu2.fmul, True),
@ -237,8 +238,7 @@ class bfloat16NeuralNetworkCore(Module, AutoCSR, AutoDoc, ModuleDoc):
NextValue(fpu2.fmadd, True),
),
NextValue(fpu1.fready, False), # Engage trigger FPU#1
NextValue(fpu2.fready, False), # Engage trigger FPU#2
#),
NextValue(fpu2.fready, False), # Engage trigger FPU#2
NextState("Loader_EXEC2")
)
Loader_fsm.act("Loader_EXEC2",
@ -249,13 +249,13 @@ class bfloat16NeuralNetworkCore(Module, AutoCSR, AutoDoc, ModuleDoc):
If(LoadUnit.b9Offset1.storage == 1, # As pointer already moved ahead 1! (Actually: Entry #0)
NextValue(fpu1.fmul, False), # Clear command request FPU#1
NextValue(fpu2.fmul, False), # Clear command request FPU#2
).Else( # Entries 1..len
).Else( # Entries 1 .. (maxlen-1)
NextValue(fpu1.fmadd, False), # Clear command request FPU#1
NextValue(fpu2.fmadd, False), # Clear command request FPU#2
),
NextValue(fpu1.fs3, Cat(0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, fpu1.fresult[16:32])), # Sum will be used for fmadd.s
NextValue(fpu2.fs3, Cat(0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, fpu2.fresult[16:32])), # Sum will be used for fmadd.s
If(LoadUnit.b9Offset1.storage < self.b9ArrayWordLen.storage, #(LUCacheSize >> 1), # Words 0 .. 255
If(LoadUnit.b9Offset1.storage < self.b9ArrayWordLen.storage, # Words 0 .. 255
NextState("Loader_LOAD2")
).Else( # Finally prepare ADD both result sums (on FPU#1 only!)
NextValue(fpu1.fs1, fpu1.fresult),

Loading…
Cancel
Save