Half-precision floats handling
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
bfloat16nn/libmodules/dramtransfer.py

345 lines
15 KiB

#!/usr/bin/env python3
#
# dramtransfer.py
#
# DRAM access
#
# History:
# --------
# 21.12.20/KQ Initial test
# 30.12.20/KQ Working (renamed) version
# 22.04.21/KQ Inbound transfer renamed
# 06.05.21/KQ Support for 2 read ports added (for now ...)
#
from migen import *
from migen.fhdl.specials import Memory
from litex.soc.interconnect.csr import AutoCSR, CSRStatus, CSRStorage, CSRField, CSRAccess
from litex.soc.integration.doc import AutoDoc, ModuleDoc
from litex.soc.interconnect.csr import *
#from litedram.common import LiteDRAMNativePort
from litedram.core.crossbar import LiteDRAMCrossbar
from litedram.frontend import dma
class DRAM2FPGA(Module, AutoCSR, AutoDoc, ModuleDoc):
"""
DRAM2FPGA class provides the protocol logic to access DRAM values via LiteDRAM
Usage:
######
#. Load ``b32Address`` with base address of range to read from (DRAM: >= 0x40000000)
#. Finally, enable processing by setting ``bEnable`` to true (1).
#. Once ``bValid`` becomes true (1), FPGA local memory is loaded, deactivate ``bEnable``
#. To retrieve, load ``b9Offset`` with offset (from base adress) to read from (0 .. 511),
``b32Data`` will contain the 32-bit value (from local FPGA memory @offset)
Inputs:
#######
:b32Address: Base DRAM Address to load from
:bEnable: To enable running (after initialization)
:b9Offset1: Offset #1 (0..511) into local FPGA memory to read from
:b9Offset2: Offset #2 (0..511) into local FPGA memory to read from
Output:
#######
:bValid: Indicate validity of local FPGA memory, i.e. 'loaded'
:b32Data1: Local FPGA memory at b9Offset1
:b32Data2: Local FPGA memory at b9Offset2
"""
def __init__(self, maxwords=8, dma_reader=None, sync_fifo=None):
# Inputs
self.b32Address = CSRStorage(32, reset_less=True,
fields=[CSRField("Address", size=32, description="*Field*: 32-Bit value")],
description="""
Base DRAM address, to load from
""")
self.bEnable = CSRStorage(1, reset_less=True,
fields=[CSRField("Enable", size=1, description="*Field*: bit", values=[
("0", "DISABLED", "Loading enabled"),
("1", "ENABLED", "Loading disabled"),
])
],
description="""
Enable/disabling DRAM access
""")
self.b9Offset1 = CSRStorage(9, reset_less=True,
fields=[CSRField("Offset1", size=9, description="*Field*: 9-Bit value (0..511)")],
description="""
Offset added to base address, port #1
""")
self.b9Offset2 = CSRStorage(9, reset_less=True,
fields=[CSRField("Offset2", size=9, description="*Field*: 9-Bit value (0..511)")],
description="""
Offset added to base address, port #2
""")
# Outputs
self.bValid = CSRStorage(1, reset_less=True,
fields=[CSRField("Valid", size=1, description="*Field*: bit", values=[
("0", "INVALID", "Data output not available"),
("1", "VALID", "Data valid"),
])
],
description="""
Data valid indication
""")
self.b32Data1 = CSRStorage(32, reset_less=True,
fields=[CSRField("Data1", size=32, description="*Field*: 32-Bit value")],
description="""
Actual value read #1
""")
self.b32Data2 = CSRStorage(32, reset_less=True,
fields=[CSRField("Data2", size=32, description="*Field*: 32-Bit value")],
description="""
Actual value read #2
""")
self.b32RCount = CSRStorage(32, reset_less=True,
fields=[CSRField("RCount", size=32, description="*Field*: 32-Bit value")],
description="""
No. of FIFO entries read so far (only for testing purposes)
""")
# Local 'wire' data
self.b32MemPt = Signal(32) # WRITE: Local FPGA memory offset pointer
self.b2Address1 = Signal(3) # READ: Adress conversion helper #1
self.b2Address2 = Signal(3) # READ: Adress conversion helper #2
self.bData1 = Signal(32) # READ: Helper output data #1
self.bData2 = Signal(32) # READ: Helper output data #2
storage = Memory(32, maxwords) # Local FPGA memory
self.specials += storage
# ---------------------- Local (FPGA) memory DMA filling from DRAM ---------------------------------------------
if (dma_reader != None) and (sync_fifo != None):
# FPGA local memory write port (driven by FSM, s.b.)
wrport = storage.get_port(write_capable=True)
self.specials += wrport
fsm = FSM(reset_state="IDLE") # FSM starts idling ...
self.submodules += fsm
# Prepare DRAM address to load from ...
self.sync += dma_reader._base.storage.eq(self.b32Address.storage) # Base DRAM adress to load from
self.sync += dma_reader._length.storage.eq(maxwords*4) # Fixed byte length of chunk to load from DRAM
fsm.act("IDLE",
If(self.bEnable.storage & ~self.bValid.storage, # Enabled & not busy already
NextValue(dma_reader._localstart, 1), # FPGA DMA self starter (special in litedram/frontend/dma.py)
NextValue(self.b32MemPt, 0), # Reset queue offset (counter)
NextState("DMAWAIT")
).Elif(~self.bEnable.storage, # Reset from external world first
NextValue(self.bValid.storage, 0) # to reset ... before possible re-enable
)
)
fsm.act("DMAWAIT",
NextValue(dma_reader._localstart, 0), # TODO: May be removed?! Reset FPGA DMA self starter
# TODO: Probably wrong?! NextValue(self.bValid.storage, dma_reader._done.status), # Indicate DMA transfer finished to external ...
If(dma_reader._done.status, # Wait 'til DMA transfer finishes ...
NextState("FIFOREAD")
)
)
fsm.act("FIFOREAD",
If(dma_reader._localready, # Transfer finished (really?)
If(sync_fifo.source.valid, # fifo.readable, # Data in queue waiting?
NextValue(self.b32RCount.storage, self.b32RCount.storage + 1), # Increment
If(self.b32MemPt < maxwords, # Legal address?
NextValue(wrport.adr, self.b32MemPt), # Local offset into memory
NextValue(wrport.dat_w, sync_fifo.source.payload.data), # Store current value -> memory
NextValue(wrport.we, 1) # Write enable
),
NextValue(sync_fifo.source.ready, 1), # fifo.re, 1), # ACK readable, request next FIFO entry
NextState("TESTFIFO")
)
)
)
fsm.act("TESTFIFO",
NextValue(wrport.we, 0), # Stop transfer to memory
If(self.b32MemPt >= (maxwords-1),
NextValue(self.b32MemPt, 0), # Reset counter
NextValue(self.bValid.storage, 1), # Indicate validity of results
NextState("IDLE")
).Else(
NextValue(self.b32MemPt, self.b32MemPt + 1), # Increment
NextState("FIFOREAD")
),
NextValue(sync_fifo.source.ready, 0) #fifo.re, 0), # Reset ACK
)
# --------------------------- Local (FPGA) memory retrieval access -----------------------------------------------
# FPGA local memory read port
rdport1 = storage.get_port()
self.specials += rdport1
rdport2 = storage.get_port()
self.specials += rdport2
self.comb += [ # Read from (FPGA local) memory
self.b2Address1.eq(self.b9Offset1.storage[0:2]), # Filter bits 0..1 (range 0-3)
If(self.b9Offset1.storage < maxwords,
#rdport.adr.eq(self.b9Offset1.storage), # w/ translation!
If(self.b2Address1 == 0,
rdport1.adr.eq(self.b9Offset1.storage | 3) # 0->3
).Elif(self.b2Address1 == 1,
rdport1.adr.eq((self.b9Offset1.storage & 0x1FC) | 2) # 1->2
).Elif(self.b2Address1 == 2,
rdport1.adr.eq((self.b9Offset1.storage & 0x1FC) | 1) # 2->1
).Elif(self.b2Address1 == 3,
rdport1.adr.eq(self.b9Offset1.storage & 0x1FC) # 3->0
),
self.bData1.eq(rdport1.dat_r) # Assign to external var. ...
),
self.b2Address2.eq(self.b9Offset2.storage[0:2]), # Filter bits 0..1 (range 0-3)
If(self.b9Offset2.storage < maxwords,
#rdport.adr.eq(self.b9Offset2.storage), # w/ translation!
If(self.b2Address2 == 0,
rdport2.adr.eq(self.b9Offset2.storage | 3) # 0->3
).Elif(self.b2Address2 == 1,
rdport2.adr.eq((self.b9Offset2.storage & 0x1FC) | 2) # 1->2
).Elif(self.b2Address2 == 2,
rdport2.adr.eq((self.b9Offset2.storage & 0x1FC) | 1) # 2->1
).Elif(self.b2Address2 == 3,
rdport2.adr.eq(self.b9Offset2.storage & 0x1FC) # 3->0
),
self.bData2.eq(rdport2.dat_r) # Assign to external var. ...
),
]
self.sync += self.b32Data1.storage.eq(self.bData1) # Assign to external var. ...
self.sync += self.b32Data2.storage.eq(self.bData2) # Assign to external var. ...
class FPGA2DRAM(Module, AutoCSR, AutoDoc, ModuleDoc):
"""
FPGA2DRAM class provides the protocol logic to write single DRAM values via LiteDRAM
Usage:
######
#. Make sure ``bEnable`` is reset (0)
#. ``bData`` must contain the actual value to store to DRAM
#. Load ``b32Address`` with base address to write to (DRAM: >= 0x40000000)
#. Finally, enable processing by setting ``bEnable`` to true (1).
#. Once ``bValid`` becomes true (1), FPGA local memory will be written, deactivate ``bEnable``
Inputs:
#######
:bData: Data (32-bit) to store to DRAM
:b32Address: Base DRAM Address to write to
:bEnable: To enable running (after initialization)
Output:
#######
:bValid: Indicate validity of DRAM Memory, i.e. 'written'
"""
def __init__(self, dma_writer=None, sync_fifo=None):
# Inputs
self.b32Address = CSRStorage(32, reset_less=True,
fields=[CSRField("Address", size=32, description="*Field*: 32-Bit value")],
description="""
Base DRAM address, to write to
""")
self.bEnable = CSRStorage(1, reset_less=True,
fields=[CSRField("Enable", size=1, description="*Field*: bit", values=[
("0", "DISABLED", "Writing enabled"),
("1", "ENABLED", "Writing disabled"),
])
],
description="""
Enable/disabling DRAM access
""")
# Outputs
self.bValid = CSRStorage(1, reset_less=True,
fields=[CSRField("Valid", size=1, description="*Field*: bit", values=[
("0", "INVALID", "Data output not available"),
("1", "VALID", "Data valid"),
])
],
description="""
DRAM data valid indication
""")
#self.b32Data = CSRStorage(32, reset_less=True,
# fields=[CSRField("Data", size=32, description="*Field*: 32-Bit value")],
# description="""
# Actual value to be written
# """)
# Local wiring: Address mixer
self.bData = Signal(32) # Actual value to store (not on wishbone-bus!)
self.bAddress = Signal(32) # Calculated actual address to write to ...
self.b4Address = Signal(4) # WRITE: Adress conversion helper
self.comb += [ # Calculate adjusted address
self.b4Address.eq(self.b32Address.storage[0:4]), # Filter bits 0..3 (range 0-F, 4-byte steps)
If(self.b4Address == 0,
self.bAddress.eq(self.b32Address.storage | 0xC) # 0->12
).Elif(self.b4Address == 4,
self.bAddress.eq((self.b32Address.storage & 0xFFFFFFF0) | 8) # 4->8
).Elif(self.b4Address == 8,
self.bAddress.eq((self.b32Address.storage & 0xFFFFFFF0) | 4) # 8->4
).Elif(self.b4Address == 12,
self.bAddress.eq(self.b32Address.storage & 0xFFFFFFF0) # 12->0
),
]
# Prepare DRAM address to write to ...
self.sync += dma_writer._base.storage.eq(self.bAddress) # Base DRAM adress to write to
self.sync += dma_writer._length.storage.eq(4) # Fixed byte length of chunk to store to DRAM (1 word=4 bytes)
fsm = FSM(reset_state="IDLE") # FSM starts idling ...
self.submodules += fsm
fsm.act("IDLE", # Store data in FIFO
If(self.bEnable.storage & ~self.bValid.storage, # Enabled & not busy already
If(sync_fifo.sink.ready, # fifo.writable!
NextValue(sync_fifo.sink.payload.data, self.bData), # Actual 32-bit word to store
NextValue(sync_fifo.sink.valid, 1), # fifo.we! ACK writable, request next FIFO entry
NextState("FIFOWRITEPULSE")
)
).Elif(~self.bEnable.storage, # Reset from external world first
NextValue(self.bValid.storage, 0) # to reset ... before possible re-enable
)
)
fsm.act("FIFOWRITEPULSE", # Reset FIFO write pulse
NextValue(sync_fifo.sink.valid, 0), # End FIFO write pulse (value now in FIFO)
NextState("STARTDMA")
)
fsm.act("STARTDMA",
NextValue(dma_writer._localstart, 1), # FPGA DMA self starter (special in litedram/frontend/dma.py)
NextState("DMAWAIT")
)
fsm.act("DMAWAIT",
NextValue(dma_writer._localstart, 0), # Reset FPGA DMA self starter
NextValue(self.bValid.storage, dma_writer._done.status), # Indicate DMA transfer finished to external ...
If(dma_writer._done.status, # Wait 'til DMA transfer finishes ...
NextState("WAITCYCLES"),
)
)
fsm.act("WAITCYCLES",
NextState("IDLE"),
)
if __name__ == "__main__":
print("*** This is a module only! ***")