fsub/fmul ready

master
kaqu 2021-04-01 12:03:40 +02:00
parent 475617f9f7
commit 3c0d732020
5 changed files with 117 additions and 21 deletions

View File

@ -228,6 +228,10 @@ def disassemble(opcode, pc):
elif op == 0x53: # R-Type (F-Extension)
if f7 == 0x00: # fadd
msg = "fadd.s {0},{1},{2}".format(f_description[rd], f_description[rs1], f_description[rs2])
elif f7 == 0x04: # fsub
msg = "fsub.s {0},{1},{2}".format(f_description[rd], f_description[rs1], f_description[rs2])
elif f7 == 0x08: # fmul
msg = "fmul.s {0},{1},{2}".format(f_description[rd], f_description[rs1], f_description[rs2])
elif f7 == 0x10: # sign injection/inverted/xor'ed
if f3 == 0x00:
msg = "fsgnj.s {0},{1},{2}".format(f_description[rd], f_description[rs1], f_description[rs2])

View File

@ -25,7 +25,10 @@ class Risq5FPUDecoder(Module):
def __init__(self, regs=None):
assert isinstance(regs, Risq5RegisterFile)
self.fadd = Signal() # F-Extension: fadd job trigger
# F-Extension: Job triggers
self.fadd = Signal()
self.fsub = Signal()
self.fmul = Signal()
self.fready = Signal() # Indicate ready
self.fwrite = Signal() # F-Extension: Do a write to a float register
@ -39,6 +42,7 @@ class Risq5FPUDecoder(Module):
self.m1 = Signal((23+1+3,False), reset_less=True) # Unsigned mantissas! TODO: Verify sign!
self.m2 = Signal((24+1+3,False), reset_less=True) # 23 bits + 1bit (1.xx = 0x800000)
self.m3 = Signal((25+1+3,True), reset_less=True) # + Sign + R(0)/Guard & Sticky bits
self.lm3 = Signal((64,True), reset_less=True) # MUL long result
self.s_bit = Signal() # Sticky bit (for rounding control)
self.branch1 = Signal() # Branch helpers
self.branch2 = Signal()
@ -49,14 +53,22 @@ class Risq5FPUDecoder(Module):
FPU_fsm.act("FPU_IDLE",
NextValue(self.FPU_state, 0),
If(self.fadd & ~self.fready, # Triggers set & ready flag reset externally!
NextValue(self.sign1, regs.fs1[31]),
NextValue(self.sign2, regs.fs2[31]),
If((self.fadd | self.fsub) & ~self.fready, # Triggers set & ready flag reset externally!
NextValue(self.sign1, regs.fs1[31]),
NextValue(self.sign2, regs.fs2[31] ^ self.fsub), # Invert sign for subtraction!
NextValue(self.e1, regs.fs1[23:31] - 127),
NextValue(self.e2, regs.fs2[23:31] - 127),
NextValue(self.m1, Cat(0,0,0, regs.fs1[0:23], 1, 0)), # | 0x00800000 + R/G/S bits
NextValue(self.m2, Cat(0,0,0, regs.fs2[0:23], 1, 0)), # | 0x00800000 + R/G/S bits
NextState("FADD1")
).Elif(self.fmul & ~self.fready, # Triggers set & ready flag reset externally!
NextValue(self.sign1, regs.fs1[31]),
NextValue(self.sign2, regs.fs2[31]),
NextValue(self.e1, regs.fs1[23:31] - 127),
NextValue(self.e2, regs.fs2[23:31] - 127),
NextValue(self.m1, Cat(regs.fs1[0:23], 1, 0)), # | 0x00800000
NextValue(self.m2, Cat(regs.fs2[0:23], 1, 0)), # | 0x00800000
NextState("FMUL1")
)
)
@ -170,7 +182,7 @@ class Risq5FPUDecoder(Module):
NextValue(self.m3, self.m3 + self.s_bit),
NextState("FADD8") # Adjust possible overflow ...
).Else( # Nope, all ready
NextState("FADD9")
NextState("FRESULT")
)
)
)
@ -180,9 +192,10 @@ class Risq5FPUDecoder(Module):
NextValue(self.m3, self.m3 >> 1), # Adjust mantissa & increment exponent
NextValue(self.e3, self.e3 + 1)
),
NextState("FADD9")
)
FPU_fsm.act("FADD9",
NextState("FRESULT")
) # End of fadd.s processing
FPU_fsm.act("FRESULT", # Result contruction & possible rounding
NextValue(self.FPU_state, 9),
# 6. Build the actual resulting float
NextValue(regs.frd_wrport.dat_w, Cat(self.m3[0:23], self.e3+127, self.sign3)),
@ -197,7 +210,80 @@ class Risq5FPUDecoder(Module):
NextValue(self.fwrite, 1), # Write required
NextValue(self.fready, 1), # Indicate ready to main decoder
NextState("FPU_IDLE")
) # End of fadd.s processing
)
FPU_fsm.act("FMUL1",
NextValue(self.FPU_state, 1),
# 0. Verify valid ranges 1st!
If(((regs.fs1 & 0x7FFFFFFF) == 0x7FFFFFFF) | ((regs.fs2 & 0x7FFFFFFF) == 0x7FFFFFFF),
NextValue(regs.fcs, regs.fcs | 0x10), # NV: Invalid operation
NextValue(regs.frd_wrport.dat_w, 0x7FFFFFFF), # NAN
NextValue(self.fwrite, 1),
NextValue(self.fready, 1),
NextState("FPU_IDLE")
).Elif(self.e1 == -1, # Infinity
NextValue(regs.fcs, regs.fcs | 0x04), # OF: Overflow
NextValue(regs.frd_wrport.dat_w, regs.fs1), # Return infinity
NextValue(self.fwrite, 1),
NextValue(self.fready, 1),
NextState("FPU_IDLE")
).Elif(self.e2 == -1, # Infinity
NextValue(regs.fcs, regs.fcs | 0x04), # OF: Overflow
NextValue(regs.frd_wrport.dat_w, regs.fs2), # Return infinity
NextValue(self.fwrite, 1),
NextValue(self.fready, 1),
NextState("FPU_IDLE")
).Else( # Ok, valid floats supplied ...
NextValue(self.sign3, self.sign1 ^ self.sign2), # 1. Calculate result sign
NextValue(self.e3, self.e1 + self.e2), # 2. Calculate resulting exponent (add!)
NextValue(self.lm3, self.m1 * self.m2), # 3. Significants multiplication (result size: 2x (sizeof(mantissa)+1) !)
NextState("FMUL2")
)
)
FPU_fsm.act("FMUL2",
NextValue(self.FPU_state, 2),
# 4. MSB set in significants (i.e. bit[45])?
# Bitoffset: 48 32 16 0
If(self.lm3[47], # & 0x0000800000000000, TODO: Verify bit# (45 or 47?)!
NextValue(self.lm3, self.lm3 >> 1), # Normalize result: Overflow
NextValue(self.e3, self.e3 + 1),
),
If(self.fmul, # Regular multiplication
NextState("FMUL3") # Do the rounding!
).Else( # Fused multiply/add? W/O rounding!
NextState("FMUL5")
)
)
FPU_fsm.act("FMUL3",
# 5. Rounding to nearest/even (FCS_FRM=0x00)
If(self.lm3[22] & self.lm3[23], # & 0xC00000) == 0xC00000 Remainder (to be skipped): RESULTBIT(0) + REMAINDERBIT(MSB) set?
If(self.lm3[0:22] != 0, # Sticky-Bit S (ORed rest) set?
#Bit:48 32 16 0 (>>23)
# 0000 2000 0000 0000 (Overflow 1.x)
NextValue(self.lm3, (self.lm3 & 0x00007FFFFF800000) + 0x800000), # Add remainder
NextState("FMUL4")
).Else(
NextState("FMUL5")
)
).Else(
NextState("FMUL5")
)
)
FPU_fsm.act("FMUL4",
# Overflow normalization
# Bit:48 32 16 0
If(self.lm3[47], # & 0x0000800000000000
NextValue(self.lm3, self.lm3 >> 1), # Normalize result: Overflow
NextValue(self.e3, self.e3 + 1)
),
NextState("FMUL5")
)
FPU_fsm.act("FMUL5",
# 6. Construction of result
NextValue(self.m3, (self.lm3 >> 23) & 0x7FFFFF),
# TODO: e3=se3 omitted ok?
NextState("FRESULT")
) # End of fmul.s processing
if __name__ == "__main__":

View File

@ -91,9 +91,11 @@ class Risq5Decoder(Module):
#NextValue(self.next, 0), # Indicate not yet there ... -> Reset @ top level!
NextValue(self.start, 0), # Once!
NextValue(self.write, 0), # Trigger write rd reset
NextValue(fpu_decoder.fadd, 0), # F-Extension: fadd job trigger
NextValue(fpu_decoder.fready, 0), # F-Extension: Clear ready indication
NextValue(fpu_decoder.fwrite, 0), # F-Extension: Trigger write frd reset
# F-Extension
NextValue(fpu_decoder.fready, 0), # Clear ready indication
NextValue(fpu_decoder.fwrite, 0), # Trigger write frd reset
NextValue(self.SUStore, 0), # Trigger store rs2 reset
NextValue(self.opcode_invalid, 0), # Reset invalid opcode indication
NextValue(self.div_instruction, 0), # M extension divide instruction special
@ -327,11 +329,11 @@ class Risq5Decoder(Module):
NextValue(self.DECODE_state, 0x0F), # Dummy action
).Elif(regs.op == 0x53, # R-Type
If(regs.f7 == 0x00, # fadd.s frd, frs1, frs2
NextValue(fpu_decoder.fadd, 1), # Indicate eval. queue!
NextValue(fpu_decoder.fadd, 1), # Post fadd.s indication
).Elif(regs.f7 == 0x04, # fsub.s
NextValue(self.DECODE_state, 0x0F), # Dummy action
NextValue(fpu_decoder.fsub, 1), # Post fsub.s indication
).Elif(regs.f7 == 0x08, # fmul.s
NextValue(self.DECODE_state, 0x0F), # Dummy action
NextValue(fpu_decoder.fmul, 1), # Post fmul.s indication
).Elif(regs.f7 == 0x0C, # fdiv.s
NextValue(self.DECODE_state, 0x0F), # Dummy action
).Elif(regs.f7 == 0x2C, # fsqrt.s
@ -403,8 +405,8 @@ class Risq5Decoder(Module):
NextValue(self.next, 1), # Indicate ready state to ALU
NextState("DECODE_IDLE") # No write!
).Else(
If(fpu_decoder.fadd, # Branch helper: fadd logic engage (req. reset within FPU_WAIT)
NextState("FPU_WAIT")
If(fpu_decoder.fadd | fpu_decoder.fsub | fpu_decoder.fmul, # FPU logic engaging?
NextState("FPU_WAIT") # Reset request within FPU_WAIT!
).Elif(self.div_instruction != 0, # M extension divide instruction?
If(regs.xs2s == 0x0, # RISC-V: Doesn't raise exception div/0!
NextValue(statusreg.storage[12], 1), # Indicate division by zero (TODO: Remove later!)
@ -622,7 +624,9 @@ class Risq5Decoder(Module):
# F-Extension: Wait for FPU ready signals
DECODE_fsm.act("FPU_WAIT",
If(fpu_decoder.fready, # Wait for FPU decoder ready flag
NextValue(fpu_decoder.fadd, 0), # Reset fadd job queue trigger (because of NFA reentry later!)
NextValue(fpu_decoder.fadd, 0), # Reset job queue triggers (because of NFA reentry later!)
NextValue(fpu_decoder.fsub, 0),
NextValue(fpu_decoder.fmul, 0),
NextState("DECODE_WRITE")
)
)

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3
#
# risq5.py
# risq5_imf.py
#
# This file has been derived from LiteX-Boards/colorlight_5b_75x.py
# Copyright (c) 2020 Florent Kermarrec <florent@enjoy-digital.fr>
@ -13,7 +13,7 @@
#
# History:
# --------
# 22.12.20/KQ Inital version
# 22.12.20/KQ Initially derived version
#
# Build/Use ----------------------------------------------------------------------------------------
# - 'python3 risq5.py --build --revision=7.0 --uart-name=crossover --with-etherbone --ip-address=192.168.1.20 --csr-csv=build/csr.csv'

View File

@ -5,7 +5,9 @@ static void start(void)
auipc ra,0 # Store current pc \n\
lui sp,%hi(0x40192000) # Setup stack pointer \n\
addi sp,sp,%lo(0x40192000) # s.a. \n\
repeat: fadd.s f2,f0,f1 # f2 = f0 + f1 (x1=Select/x2=rc) \n\
repeat: fadd.s f2,f0,f1 # f2 = f0 + f1 \n\
fsub.s f2,f0,f1 # f2 = f0 - f1 \n\
fmul.s f2,f0,f1 # f2 = f0 * f1 \n\
j repeat # Loop ... \n\
lw x3,0(sp) # Load float from stack (test) \n\
lw x3,4(sp) # Load float from stack (test) \n\