An RV32IMF implementation w/ migen/LiteX
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
Risq5/libmodules/core.py

405 lines
21 KiB

#!/usr/bin/env python3
#
# core.py
#
# Risq5 core cpu
#
# History:
# --------
# 04.01.21/KQ Initial version
#
from migen import *
from migen.fhdl.specials import Memory
from litex.soc.interconnect.csr import AutoCSR, CSRStatus, CSRStorage, CSRField, CSRAccess
from litex.soc.integration.doc import AutoDoc, ModuleDoc
from litex.soc.interconnect.csr import *
from libmodules.dramtransfer import DRAMTransfer
from libmodules.register_file import Risq5RegisterFile
from libmodules.instruction_decode import Risq5Decoder
import libmodules.risq5defs as risq5defs
class Risq5Core(Module, AutoCSR, AutoDoc, ModuleDoc):
"""
Risq5 cpu core class provides:
#. Program counter
#. Register file w/ registers x0..x31 (32-bit)
#. Instruction fetch/decode (L1 cache & opcode decoder)
#. Load/store unit (memory access)
#. Datapath
#. Arithmetics & logic unit (ALU)
#. Debugging support (single step logic)
Usage:
######
#. Not yet defined (apart from RISC-V spec.!)
Inputs:
#######
:bEnable: Run/stop command
:wb_reg_no: Index of register to read or write remotely
:wb_reg_value_w: Register value to write
:wb_reg_we: Write enable
Outputs:
########
:wb_reg_value_r: Register value to be read
:b32_FSMs: FSM states
"""
def __init__(self, RAMWaitTime=128, LU_CacheWait=24, L1CacheSize=8, L1Cache=None, LUCacheSize=4, LUCache=None, SU_Unit=None, clint=None, isa_extensions=0x40000100):
# Inputs
self.b32mode = CSRStorage(32, reset_less=True,
fields=[
CSRField("RunEnable", size=1, description="Bit[0]: Run enable"),
CSRField("NoSingleStep", size=1, description="Bit[1]: No single stepping"),
CSRField("SingleStepPulse", size=1, description="Bit[2]: Single step pulse"),
CSRField("BreakpointValid", size=1, description="Bit[3]: Breakpoint address is valid"),
CSRField("SetNewPC", size=1, description="Bit[4]: Set new PC"),
CSRField("MMExternalInterrupt", size=1, description="Bit[5]: Machine external interrupt"),
CSRField("MMTimerInterrupt", size=1, description="Bit[6]: Machine timer interrupt"),
CSRField("NotUsed", size=24, description="*Reserved*"),
CSRField("UserLED", size=1, description="Bit[31]: User LED access"),
],
description="""
Mode control word
""")
self.b32status = CSRStorage(32, reset_less=True,
fields=[
CSRField("L1CacheValid", size=1, description="Bit[0]: L1 cache valid flag"),
CSRField("Breakpoint", size=1, description="Bit[1]: Breakpoint reached flag"),
CSRField("Illegal", size=1, description="Bit[2]: Illegal instruction flag"),
CSRField("BelowCache", size=1, description="Bit[3]: Branch below L1 cache flag"),
CSRField("BeyondCache", size=1, description="Bit[4]: Branch beyond L1 cache flag"),
CSRField("LUCacheValid", size=1, description="Bit[5]: LU cache valid flag"),
CSRField("BelowCacheJ", size=1, description="Bit[6]: Jump below L1 cache flag"),
CSRField("BeyondCacheJ", size=1, description="Bit[7]: Jump beyond L1 cache flag"),
CSRField("BelowCacheI", size=1, description="Bit[8]: Jump below L1 cache flag"),
CSRField("BeyondCacheI", size=1, description="Bit[9]: Jump beyond L1 cache flag"),
CSRField("MMExternalInterrupt", size=1, description="Bit[10]: Machine external interrupt"),
CSRField("MMTimerInterrupt", size=1, description="Bit[11]: Machine timer interrupt"),
CSRField("NotYetUsed", size=20, description="*Reserved*"),
],
description="""
Status word
""")
self.b32_opcode = CSRStorage(32, reset_less=True,
fields=[CSRField("Opcode", size=32, description="*Field*: 32-Bit value")],
description="""
Next opcode (not yet evaluated)
""")
self.b32_breakpoint = CSRStorage(32, reset_less=True,
fields=[CSRField("Breakpoint", size=32, description="*Field*: 32-Bit value")],
description="""
Breakpoint address (if ``b32mode[3]`` is set)
""")
self.b32_next_pc = CSRStorage(32, reset_less=True,
fields=[CSRField("NextPC", size=32, description="*Field*: 32-Bit value")],
description="""
New PC address (if ``b32mode[4]`` is set)
""")
self.b5_wb_reg_no = CSRStorage(5, reset_less=True,
fields=[CSRField("wb_reg_no", size=5, description="*Field*: 5-Bit value")],
description="""
Register index to read or write remotely
""")
self.b32_wb_reg_value_w = CSRStorage(32, reset_less=True,
fields=[CSRField("wb_reg_value_w", size=32, description="*Field*: 32-Bit value")],
description="""
New value to write to register index ``wb_reg_no``
""")
self.b1_wb_reg_we = CSRStorage(1, reset_less=True,
fields=[CSRField("wb_reg_we", size=1, description="*Field*: bit", values=[
("0", "DISABLED", "-"),
("1", "ENABLED", "WRITE"),
])
],
description="""
Write ``b32_wb_reg_value`` to register offset ``wb_reg_no``
""")
# Outputs
self.b32_PC = CSRStorage(32, reset_less=True,
fields=[CSRField("PC", size=32, description="*Field*: 32-Bit value")],
description="""
CPU program counter (PC)
""")
self.b32_wb_reg_value_r = CSRStorage(32, reset_less=True,
fields=[CSRField("wb_reg_value_r", size=32, description="*Field*: 32-Bit value")],
description="""
Value of register index ``wb_reg_no``
""")
self.b32_FSMs = CSRStorage(32, reset_less=True,
fields=[
CSRField("L1", size=4, description="*L1 FSM*: 4-Bit value"),
CSRField("ALU", size=4, description="*ALU FSM*: 4-Bit value"),
CSRField("DECODE", size=9, description="*DECODE FSM*: 9-Bit value"),
#CSRField("reserved", size=32-13, description="*reserved*")
],
description="""
Diverse FSM states for debugging ...
""")
self.b32_Counters = CSRStorage(32, reset_less=False,
fields=[
CSRField("C1", size=8, description="*C1*: 8-Bit value"),
CSRField("C2", size=8, description="*C2*: 8-Bit value"),
CSRField("C3", size=8, description="*C3*: 8-Bit value"),
CSRField("C4", size=8, description="*C4*: 8-Bit value"),
],
description="""
Several counters for debugging
""")
# Local vars.
# Register file ----------------------------------------------------------------------------
self.submodules.regs = regs = Risq5RegisterFile()
self.sync += [ # External register file write port (debugger)
regs.write_ext_index.eq(self.b5_wb_reg_no.storage), # Let write index follow
regs.ext_wrport.dat_w.eq(self.b32_wb_reg_value_w.storage), # & value as well ...
regs.ext_wrport.we.eq(self.b1_wb_reg_we.storage) # Write request (pulse) forwarder
]
# Instruction fetch ------------------------------------------------------------------------
self.L1CacheOffset = Signal(9) # 0..511 log2_int(L1CacheSize, False)) # Cache reading offset (0..(Size-1))=>Bits)
self.sync += L1Cache.b9Offset.storage.eq(self.L1CacheOffset) # Index cache for reading (forwarded)
# Load Unit cache
self.LUCacheOffset = Signal(2) # 0..3
self.sync += LUCache.b9Offset.storage.eq(self.LUCacheOffset) # Index cache for reading (forwarded)
# Cache loader stati
self.L1_CacheValid = Signal() # Indicate loaded L1 cache
self.sync += self.b32status.storage[0].eq(self.L1_CacheValid) # Indicate cache status to external world
self.LU_CacheValid = Signal() # Indicate loaded LU cache
self.sync += self.b32status.storage[5].eq(self.LU_CacheValid) # Indicate cache status to external world
# Integrate an instruction decoder
self.submodules.decoder = decoder = Risq5Decoder(
regs=regs,
modereg=self.b32mode,
statusreg=self.b32status,
L1CacheOffset=self.L1CacheOffset,
L1CacheSize=L1CacheSize,
LU_CacheWait=LU_CacheWait, # May be as short as 24?!
LU_CacheValid=self.LU_CacheValid,
LUCache=LUCache,
SU_Unit=SU_Unit,
isa_extensions=isa_extensions
)
#---------------- L1 cache -------------------------------------------------------------
L1_fsm = FSM(reset_state="L1_IDLE") # FSM starts idling ...
self.submodules += L1_fsm
self.L1_state = Signal(4, reset_less=True) # Debugging support
L1_fsm.act("L1_IDLE", # If cache not valid fill it!
NextValue(self.L1_state, 0),
If(~self.L1_CacheValid & self.b32mode.storage[0], # Invalid cache & run enabled ...
NextValue(L1Cache.b32Address.storage, regs.pc & 0xFFFFFFF0), # Current PC (aligned) dictates loading ...
NextValue(self.L1CacheOffset, ((regs.pc & 0x0F) >> 2)), # Adjust pointer (local reader), 4-byte width=32-bit
NextState("L1_LOAD1")
)
)
L1_fsm.act("L1_LOAD1", # Engage!
NextValue(self.L1_state, 1),
NextValue(L1Cache.bEnable.storage, 1), # Trigger DRAM transfer to cache
NextState("L1_LOAD2")
)
L1_fsm.act("L1_LOAD2", # Wait for termination of transfer ...
NextValue(self.L1_state, 2),
If(L1Cache.bValid.storage, # Data avail.?
NextValue(L1Cache.bEnable.storage, 0), # Reset/ACK to DRAM transfer (sort of ...)
NextValue(self.L1_CacheValid, 1), # Declare cache valid
NextValue(self.b32_Counters.storage[0:8], self.b32_Counters.storage[0:8] + 1), # C#1 Count L1 cache (re-)loads
NextState("L1_IDLE") # Yap!
)
# TODO: Permit timeout indication ...
)
#---------------- LU cache -------------------------------------------------------------
LU_fsm = FSM(reset_state="LU_IDLE") # FSM starts idling ...
self.submodules += LU_fsm
self.LU_state = Signal(4, reset_less=True) # Debugging support
LU_fsm.act("LU_IDLE", # If cache not valid fill it!
NextValue(self.LU_state, 0),
If(~self.LU_CacheValid & self.b32mode.storage[0], # Invalid cache & run enabled ...
NextValue(LUCache.b32Address.storage, regs.LUAddress & 0xFFFFFFF0), # Current memory address (aligned) dictates loading ...
NextValue(self.LUCacheOffset, ((regs.LUAddress & 0x0F) >> 2)), # Adjust pointer (local reader), 4-byte width=32-bit
NextState("LU_LOAD1")
)
)
LU_fsm.act("LU_LOAD1", # Engage!
NextValue(self.LU_state, 1),
NextValue(LUCache.bEnable.storage, 1), # Trigger DRAM transfer to cache
NextState("LU_LOAD2")
)
LU_fsm.act("LU_LOAD2", # Wait for termination of transfer ...
NextValue(self.LU_state, 2),
If(LUCache.bValid.storage, # Data avail.?
NextValue(LUCache.bEnable.storage, 0), # Reset/ACK to DRAM transfer (sort of ...)
NextValue(self.LU_CacheValid, 1), # Declare cache valid
NextValue(self.b32_Counters.storage[25:32], self.b32_Counters.storage[25:32] + 1), # C#4 7-bit Count LU cache (re-)loads
NextState("LU_IDLE") # Yap!
)
)
# ALU -------------------------------------------------------------------------------------
ALU_fsm = FSM(reset_state="ALU_INIT") # FSM starts w/ init.
self.submodules += ALU_fsm
self.ALU_state = Signal(4, reset_less=True) # Debugging support
self.ALU_loaddelay = Signal(32, reset_less=True) # 1st opcode load delay (TODO: shorter!)
ALU_fsm.act("ALU_INIT", # Initialize static registers (TODO: Run after reset?!)
NextValue(regs.csr[risq5defs.CSR_misa], isa_extensions), # misa: 32 bit RISC-V isa extensions list
# mvendorid, marchid, mimpid, mhartid not specified (zero, acc. to spec.)
NextValue(self.b32status.storage[10], 0), # Edge store reset
NextValue(self.b32status.storage[11], 0), # Edge store reset
NextState("ALU_IDLE")
)
#----- LOOP ENTRY CACHE RELOAD --------------------------------
ALU_fsm.act("ALU_IDLE",
NextValue(self.ALU_state, 0),
NextValue(self.ALU_loaddelay, 0),
If(self.L1_CacheValid, # Opcode data now avail.
NextState("ALU_DELAY")
)
)
ALU_fsm.act("ALU_DELAY",
# ------------------------------ MODIFIED LAST -------------------------------------------
If(self.ALU_loaddelay > RAMWaitTime, # Interrupt calls execute w/ full performance (safely) (TODO: Try to lower ...)
NextState("ALU_EVAL1") # Load delay for opcode cache is next
).Else( # Time not yet elapsed ...
NextValue(self.ALU_loaddelay, self.ALU_loaddelay + 1),
)
)
#----- LOOP ENTRY CACHED DATA ---------------------------------
ALU_fsm.act("ALU_EVAL1",
NextValue(self.ALU_loaddelay, 0),
NextState("ALU_EVAL2") # Load delay nec. for opcode indirect loading
)
ALU_fsm.act("ALU_EVAL2",
If(self.ALU_loaddelay > RAMWaitTime, # Interrupt calls execute w/ full performance (safely) (TODO: Try to lower ...)
NextState("ALU_EVAL3") # Load delay nec. for opcode indirect loading
).Else( # Time not yet elapsed ...
NextValue(self.ALU_loaddelay, self.ALU_loaddelay + 1),
)
)
ALU_fsm.act("ALU_EVAL3",
NextValue(regs.opcode, L1Cache.b32Data.storage), # Load opcode (depends upon L1Cache.b9Offset)
NextState("ALU_EVAL4")
)
ALU_fsm.act("ALU_EVAL4",
NextValue(self.ALU_state, 1),
If(self.b32mode.storage[4], # Set new PC requested?
NextValue(self.b32mode.storage[4], 0), # Yap! Reset PC adjust request
NextValue(regs.pc, self.b32_next_pc.storage), # Adjust PC to debugger set value
NextValue(self.L1_CacheValid, 0), # & enforce cache reload
NextState("ALU_IDLE") # Wait for valid cache ...
).Else( # No 'new pc' command ...
# If run enabled and (not single stepping or (single stepping & step pulse))
If(self.b32mode.storage[0] & (self.b32mode.storage[1] | (~self.b32mode.storage[1] & self.b32mode.storage[2])),
If(self.b32mode.storage[3] & self.b32status.storage[1], # Breakpoint valid & reached?
# Inhibits interrupt breakpoints! Hence not included: NextValue(regs.csr[risq5defs.CSR_mcause], risq5defs.MCAUSE_BREAKPOINT),
NextValue(self.b32mode.storage[1], 0), # Switch to (~ no) 'single stepping'
NextValue(self.b32mode.storage[3], 0), # Clear active breakpoint
).Elif(regs.csr[risq5defs.CSR_mstatus][risq5defs.MSTATUS_MIE] &
((regs.csr[risq5defs.CSR_mie][risq5defs.MIE_MEIE] & (self.b32mode.storage[5] & ~self.b32status.storage[10]))
|(regs.csr[risq5defs.CSR_mie][risq5defs.MIE_MTIE] & (self.b32mode.storage[6] & ~self.b32status.storage[11]))
), # External H/W interrupt detected!
If(self.b32mode.storage[5], # Machine mode: External interrupt
NextValue(self.b32mode.storage[5], 0), # Reset external command
NextValue(self.b32status.storage[10], 1), # Edge store
NextValue(regs.csr[risq5defs.CSR_mcause], risq5defs.MCAUSE_MACHINE_EXTERNAL_INTERRUPT),
).Else( # Machine mode: Timer interrupt assumed
NextValue(self.b32mode.storage[6], 0), # Reset external command
NextValue(self.b32status.storage[11], 1), # Edge store
NextValue(regs.csr[risq5defs.CSR_mcause], risq5defs.MCAUSE_MACHINE_TIMER_INTERRUPT), # 0x80000007
),
NextValue(regs.csr[risq5defs.CSR_mstatus][risq5defs.MSTATUS_MPIE], regs.csr[risq5defs.CSR_mstatus][risq5defs.MSTATUS_MIE]), # Store pending interrupt indication
NextValue(regs.csr[risq5defs.CSR_mip], regs.csr[risq5defs.CSR_mie]), # Store current interrupt flags
NextValue(regs.csr[risq5defs.CSR_mepc], regs.pc), # Store current PC value
NextValue(regs.pc, regs.csr[risq5defs.CSR_mtvec]), # Adjust PC to trap vector
NextValue(self.L1_CacheValid, 0), # & enforce cache reload
NextState("ALU_IDLE") # Wait for valid cache ...
).Else( # No breakpoint set (any more ...)
NextValue(self.b32mode.storage[2], 0), # Reset single step pulse
NextValue(self.b32_Counters.storage[8:16], self.b32_Counters.storage[8:16] + 1), # C#2 Count instructions!
NextValue(decoder.next, 0), # Clear finished flag
NextValue(decoder.L1Reload, 0), # Cache normally stays valid ...
NextValue(decoder.LUReload, 0), # Cache normally stays valid ...
NextValue(decoder.start, 1), # Trigger decoder
NextState("ALU_EVAL5")
)
)
)
)
ALU_fsm.act("ALU_EVAL5",
NextValue(self.ALU_state, 2),
If(decoder.next, # Wait for decoder
# Attention: Do not increment pc forever -> out of DRAM space!
If(decoder.L1Reload, # Reload cache request pending?
NextValue(self.L1CacheOffset, L1CacheSize) # Enforce reload next
).Else(
NextValue(self.L1CacheOffset, self.L1CacheOffset + 1) # Next instruction to load
),
NextState("ALU_NEXT") # Load next instruction (pc+4)
)
)
ALU_fsm.act("ALU_NEXT",
NextValue(self.ALU_state, 4),
If(self.L1CacheOffset >= L1CacheSize, # If cache invalid, reload
NextValue(regs.pc, regs.pc + 4), # Increment PC (the memory pointer)
NextValue(self.L1_CacheValid, 0), # & enforce cache reload
NextState("ALU_IDLE") # Wait for valid cache ...
).Else( # Cache valid, pick next instruction
NextValue(regs.pc, regs.pc + 4), # Increment PC
NextState("ALU_EVAL1") # Continue w/ next instruction evaluation
)
)
self.comb += [
regs.read_ext_index.eq(self.b5_wb_reg_no.storage) # Relay external register read access
]
self.sync += [ # Debugging support, indicate states & regs to outside world ...
self.b32_PC.storage.eq(regs.pc), # PC
self.b32_wb_reg_value_r.storage.eq(regs.ext_x), # Reg[index]
self.b32_opcode.storage.eq(regs.opcode), # Next opcode to execute
self.b32_Counters.storage[16:25].eq(self.L1CacheOffset), # C#3 L1 cache offset (9-bit!)
self.b32status.storage[1].eq(self.b32_breakpoint.storage == regs.pc), # Breakpoint reached flag
self.b32_FSMs.storage.eq((decoder.DECODE_state << 12) | (self.LU_state << 8) | (self.ALU_state << 4) | self.L1_state), # FSMs: 4/4/4/9 bits
If(clint.time_int_p, # Timer pulse
self.b32mode.storage[6].eq(1), # Generate timer interrupt (pulse, set)
self.b32mode.storage[31].eq(~self.b32mode.storage[31]) # Invert LED (TODO: Remove!)
)
]
def Risq5Core_testbench(regs):
print("----- RISQ5 core testbench -----")
for i in range(32):
#yield regs.rfx[i].storage.eq(0)
pass
yield
if __name__ == "__main__":
r5c = Risq5Core()
run_simulation(r5c, Risq5Core_testbench(r5c), vcd_name="risq5_core.vcd", clocks={"sys":16})